From 48d0dcbd5b7f80810ce259bc9ed6f57f99e27ca9 Mon Sep 17 00:00:00 2001 From: marha Date: Wed, 16 Feb 2011 16:53:37 +0000 Subject: pixman mesa git update 16 Feb 2011 --- mesalib/src/glsl/ir_constant_expression.cpp | 58 +- mesalib/src/glsl/linker.cpp | 2 +- mesalib/src/mesa/main/mtypes.h | 6739 ++++++++++---------- mesalib/src/mesa/main/state.c | 1466 ++--- mesalib/src/mesa/state_tracker/st_atom_blend.c | 601 +- mesalib/src/mesa/state_tracker/st_cb_bitmap.c | 1783 +++--- .../src/mesa/state_tracker/st_cb_bufferobjects.c | 915 +-- mesalib/src/mesa/state_tracker/st_cb_clear.c | 1122 ++-- mesalib/src/mesa/state_tracker/st_cb_drawpixels.c | 2739 ++++---- mesalib/src/mesa/state_tracker/st_cb_drawtex.c | 611 +- mesalib/src/mesa/state_tracker/st_context.c | 5 + mesalib/src/mesa/state_tracker/st_context.h | 535 +- mesalib/src/mesa/state_tracker/st_draw.c | 1537 +++-- mesalib/src/mesa/state_tracker/st_draw_feedback.c | 1 - mesalib/src/mesa/state_tracker/st_gen_mipmap.c | 846 ++- mesalib/src/mesa/tnl/t_draw.c | 1028 +-- mesalib/src/mesa/vbo/vbo_exec_array.c | 2559 ++++---- mesalib/src/mesa/vbo/vbo_exec_draw.c | 841 +-- mesalib/src/mesa/vbo/vbo_save_draw.c | 609 +- pixman/Makefile.am | 2 +- pixman/configure.ac | 1 + pixman/demos/Makefile.am | 34 + pixman/demos/alpha-test.c | 117 + pixman/demos/clip-in.c | 50 + pixman/demos/clip-test.c | 97 + pixman/demos/composite-test.c | 191 + pixman/demos/convolution-test.c | 47 + pixman/demos/gradient-test.c | 89 + pixman/demos/gtk-utils.c | 115 + pixman/demos/gtk-utils.h | 13 + pixman/demos/radial-test.c | 198 + pixman/demos/screen-test.c | 44 + pixman/demos/trap-test.c | 49 + pixman/pixman/pixman-arm-common.h | 59 +- pixman/pixman/pixman-arm-neon-asm.S | 4758 +++++++------- pixman/pixman/pixman-arm-neon.c | 11 + pixman/pixman/pixman-fast-path.c | 4455 ++++++------- pixman/pixman/pixman-fast-path.h | 1039 +-- pixman/pixman/pixman-sse2.c | 124 +- pixman/test/Makefile.am | 104 +- pixman/test/alpha-test.c | 117 - pixman/test/clip-in.c | 50 - pixman/test/clip-test.c | 97 - pixman/test/composite-test.c | 191 - pixman/test/convolution-test.c | 47 - pixman/test/gradient-test.c | 89 - pixman/test/gtk-utils.c | 115 - pixman/test/gtk-utils.h | 13 - pixman/test/radial-test.c | 198 - pixman/test/scaling-test.c | 618 +- pixman/test/screen-test.c | 44 - pixman/test/trap-test.c | 49 - 52 files changed, 18838 insertions(+), 18384 deletions(-) create mode 100644 pixman/demos/Makefile.am create mode 100644 pixman/demos/alpha-test.c create mode 100644 pixman/demos/clip-in.c create mode 100644 pixman/demos/clip-test.c create mode 100644 pixman/demos/composite-test.c create mode 100644 pixman/demos/convolution-test.c create mode 100644 pixman/demos/gradient-test.c create mode 100644 pixman/demos/gtk-utils.c create mode 100644 pixman/demos/gtk-utils.h create mode 100644 pixman/demos/radial-test.c create mode 100644 pixman/demos/screen-test.c create mode 100644 pixman/demos/trap-test.c delete mode 100644 pixman/test/alpha-test.c delete mode 100644 pixman/test/clip-in.c delete mode 100644 pixman/test/clip-test.c delete mode 100644 pixman/test/composite-test.c delete mode 100644 pixman/test/convolution-test.c delete mode 100644 pixman/test/gradient-test.c delete mode 100644 pixman/test/gtk-utils.c delete mode 100644 pixman/test/gtk-utils.h delete mode 100644 pixman/test/radial-test.c delete mode 100644 pixman/test/screen-test.c delete mode 100644 pixman/test/trap-test.c diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 2841fb350..2a3084896 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -288,24 +288,20 @@ ir_expression::constant_expression_value() break; case ir_unop_rcp: - /* FINISHME: Emit warning when division-by-zero is detected. */ assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { switch (this->type->base_type) { case GLSL_TYPE_UINT: - if (op[0]->value.u[c] == 0.0) - return NULL; - data.u[c] = 1 / op[0]->value.u[c]; + if (op[0]->value.u[c] != 0.0) + data.u[c] = 1 / op[0]->value.u[c]; break; case GLSL_TYPE_INT: - if (op[0]->value.i[c] == 0.0) - return NULL; - data.i[c] = 1 / op[0]->value.i[c]; + if (op[0]->value.i[c] != 0.0) + data.i[c] = 1 / op[0]->value.i[c]; break; case GLSL_TYPE_FLOAT: - if (op[0]->value.f[c] == 0.0) - return NULL; - data.f[c] = 1.0F / op[0]->value.f[c]; + if (op[0]->value.f[c] != 0.0) + data.f[c] = 1.0F / op[0]->value.f[c]; break; default: assert(0); @@ -314,13 +310,9 @@ ir_expression::constant_expression_value() break; case ir_unop_rsq: - /* FINISHME: Emit warning when division-by-zero is detected. */ assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - float s = sqrtf(op[0]->value.f[c]); - if (s == 0) - return NULL; - data.f[c] = 1.0F / s; + data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); } break; @@ -523,18 +515,20 @@ ir_expression::constant_expression_value() switch (op[0]->type->base_type) { case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) - return NULL; - data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + } break; case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) - return NULL; - data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + } break; case GLSL_TYPE_FLOAT: - if (op[1]->value.f[c1] == 0) - return NULL; data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; break; default: @@ -552,18 +546,20 @@ ir_expression::constant_expression_value() switch (op[0]->type->base_type) { case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) - return NULL; - data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + } break; case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) - return NULL; - data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + } break; case GLSL_TYPE_FLOAT: - if (op[1]->value.f[c1] == 0) - return NULL; /* We don't use fmod because it rounds toward zero; GLSL specifies * the use of floor. */ diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 46cd1950c..6c003bb02 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -926,7 +926,7 @@ link_intrastage_shaders(void *mem_ctx, if (var->type->is_array() && (var->type->length == 0)) { const glsl_type *type = glsl_type::get_array_instance(var->type->fields.array, - var->max_array_access); + var->max_array_access + 1); assert(type != NULL); var->type = type; diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index 020595bd0..b5966dffe 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -1,3369 +1,3370 @@ -/* - * Mesa 3-D graphics library - * Version: 7.7 - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file mtypes.h - * Main Mesa data structures. - * - * Please try to mark derived values with a leading underscore ('_'). - */ - -#ifndef MTYPES_H -#define MTYPES_H - - -#include "main/glheader.h" -#include "main/config.h" -#include "main/mfeatures.h" -#include "glapi/glapi.h" -#include "math/m_matrix.h" /* GLmatrix */ -#include "main/simple_list.h" /* struct simple_node */ -#include "main/formats.h" /* MESA_FORMAT_COUNT */ - - -/** - * Color channel data type. - */ -#if CHAN_BITS == 8 - typedef GLubyte GLchan; -#define CHAN_MAX 255 -#define CHAN_MAXF 255.0F -#define CHAN_TYPE GL_UNSIGNED_BYTE -#elif CHAN_BITS == 16 - typedef GLushort GLchan; -#define CHAN_MAX 65535 -#define CHAN_MAXF 65535.0F -#define CHAN_TYPE GL_UNSIGNED_SHORT -#elif CHAN_BITS == 32 - typedef GLfloat GLchan; -#define CHAN_MAX 1.0 -#define CHAN_MAXF 1.0F -#define CHAN_TYPE GL_FLOAT -#else -#error "illegal number of color channel bits" -#endif - - -/** - * Stencil buffer data type. - */ -#if STENCIL_BITS==8 - typedef GLubyte GLstencil; -#elif STENCIL_BITS==16 - typedef GLushort GLstencil; -#else -# error "illegal number of stencil bits" -#endif - - -/** - * \name 64-bit extension of GLbitfield. - */ -/*@{*/ -typedef GLuint64 GLbitfield64; - -/** Set a single bit */ -#define BITFIELD64_BIT(b) (1ULL << (b)) - - -/** - * \name Some forward type declarations - */ -/*@{*/ -struct _mesa_HashTable; -struct gl_attrib_node; -struct gl_list_extensions; -struct gl_meta_state; -struct gl_pixelstore_attrib; -struct gl_program_cache; -struct gl_texture_format; -struct gl_texture_image; -struct gl_texture_object; -struct gl_context; -struct st_context; -/*@}*/ - - -/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */ -#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1) -#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2) -#define PRIM_UNKNOWN (GL_POLYGON+3) - - -/** - * Shader stages. Note that these will become 5 with tessellation. - * These MUST have the same values as gallium's PIPE_SHADER_* - */ -typedef enum -{ - MESA_SHADER_VERTEX = 0, - MESA_SHADER_FRAGMENT = 1, - MESA_SHADER_GEOMETRY = 2, - MESA_SHADER_TYPES = 3 -} gl_shader_type; - - - -/** - * Indexes for vertex program attributes. - * GL_NV_vertex_program aliases generic attributes over the conventional - * attributes. In GL_ARB_vertex_program shader the aliasing is optional. - * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the - * generic attributes are distinct/separate). - */ -typedef enum -{ - VERT_ATTRIB_POS = 0, - VERT_ATTRIB_WEIGHT = 1, - VERT_ATTRIB_NORMAL = 2, - VERT_ATTRIB_COLOR0 = 3, - VERT_ATTRIB_COLOR1 = 4, - VERT_ATTRIB_FOG = 5, - VERT_ATTRIB_COLOR_INDEX = 6, - VERT_ATTRIB_POINT_SIZE = 6, /*alias*/ - VERT_ATTRIB_EDGEFLAG = 7, - VERT_ATTRIB_TEX0 = 8, - VERT_ATTRIB_TEX1 = 9, - VERT_ATTRIB_TEX2 = 10, - VERT_ATTRIB_TEX3 = 11, - VERT_ATTRIB_TEX4 = 12, - VERT_ATTRIB_TEX5 = 13, - VERT_ATTRIB_TEX6 = 14, - VERT_ATTRIB_TEX7 = 15, - VERT_ATTRIB_GENERIC0 = 16, - VERT_ATTRIB_GENERIC1 = 17, - VERT_ATTRIB_GENERIC2 = 18, - VERT_ATTRIB_GENERIC3 = 19, - VERT_ATTRIB_GENERIC4 = 20, - VERT_ATTRIB_GENERIC5 = 21, - VERT_ATTRIB_GENERIC6 = 22, - VERT_ATTRIB_GENERIC7 = 23, - VERT_ATTRIB_GENERIC8 = 24, - VERT_ATTRIB_GENERIC9 = 25, - VERT_ATTRIB_GENERIC10 = 26, - VERT_ATTRIB_GENERIC11 = 27, - VERT_ATTRIB_GENERIC12 = 28, - VERT_ATTRIB_GENERIC13 = 29, - VERT_ATTRIB_GENERIC14 = 30, - VERT_ATTRIB_GENERIC15 = 31, - VERT_ATTRIB_MAX = 32 -} gl_vert_attrib; - -/** - * Bitflags for vertex attributes. - * These are used in bitfields in many places. - */ -/*@{*/ -#define VERT_BIT_POS (1 << VERT_ATTRIB_POS) -#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT) -#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL) -#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0) -#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1) -#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG) -#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX) -#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG) -#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0) -#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1) -#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2) -#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3) -#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4) -#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5) -#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6) -#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7) -#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0) -#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1) -#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2) -#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3) -#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4) -#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5) -#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6) -#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7) -#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8) -#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9) -#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10) -#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11) -#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12) -#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13) -#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14) -#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15) - -#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u))) -#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g))) -/*@}*/ - - -/** - * Indexes for vertex program result attributes - */ -typedef enum -{ - VERT_RESULT_HPOS = 0, - VERT_RESULT_COL0 = 1, - VERT_RESULT_COL1 = 2, - VERT_RESULT_FOGC = 3, - VERT_RESULT_TEX0 = 4, - VERT_RESULT_TEX1 = 5, - VERT_RESULT_TEX2 = 6, - VERT_RESULT_TEX3 = 7, - VERT_RESULT_TEX4 = 8, - VERT_RESULT_TEX5 = 9, - VERT_RESULT_TEX6 = 10, - VERT_RESULT_TEX7 = 11, - VERT_RESULT_PSIZ = 12, - VERT_RESULT_BFC0 = 13, - VERT_RESULT_BFC1 = 14, - VERT_RESULT_EDGE = 15, - VERT_RESULT_VAR0 = 16, /**< shader varying */ - VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING) -} gl_vert_result; - - -/*********************************************/ - -/** - * Indexes for geometry program attributes. - */ -typedef enum -{ - GEOM_ATTRIB_POSITION = 0, - GEOM_ATTRIB_COLOR0 = 1, - GEOM_ATTRIB_COLOR1 = 2, - GEOM_ATTRIB_SECONDARY_COLOR0 = 3, - GEOM_ATTRIB_SECONDARY_COLOR1 = 4, - GEOM_ATTRIB_FOG_FRAG_COORD = 5, - GEOM_ATTRIB_POINT_SIZE = 6, - GEOM_ATTRIB_CLIP_VERTEX = 7, - GEOM_ATTRIB_PRIMITIVE_ID = 8, - GEOM_ATTRIB_TEX_COORD = 9, - - GEOM_ATTRIB_VAR0 = 16, - GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING) -} gl_geom_attrib; - -/** - * Bitflags for geometry attributes. - * These are used in bitfields in many places. - */ -/*@{*/ -#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0) -#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1) -#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0) -#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1) -#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD) -#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD) -#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION) -#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE) -#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX) -#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID) -#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0) - -#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g))) -/*@}*/ - - -/** - * Indexes for geometry program result attributes - */ -typedef enum -{ - GEOM_RESULT_POS = 0, - GEOM_RESULT_COL0 = 1, - GEOM_RESULT_COL1 = 2, - GEOM_RESULT_SCOL0 = 3, - GEOM_RESULT_SCOL1 = 4, - GEOM_RESULT_FOGC = 5, - GEOM_RESULT_TEX0 = 6, - GEOM_RESULT_TEX1 = 7, - GEOM_RESULT_TEX2 = 8, - GEOM_RESULT_TEX3 = 9, - GEOM_RESULT_TEX4 = 10, - GEOM_RESULT_TEX5 = 11, - GEOM_RESULT_TEX6 = 12, - GEOM_RESULT_TEX7 = 13, - GEOM_RESULT_PSIZ = 14, - GEOM_RESULT_CLPV = 15, - GEOM_RESULT_PRID = 16, - GEOM_RESULT_LAYR = 17, - GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */ - /* ### we need to -2 because var0 is 18 instead 16 like in the others */ - GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2) -} gl_geom_result; - - -/** - * Indexes for fragment program input attributes. - */ -typedef enum -{ - FRAG_ATTRIB_WPOS = 0, - FRAG_ATTRIB_COL0 = 1, - FRAG_ATTRIB_COL1 = 2, - FRAG_ATTRIB_FOGC = 3, - FRAG_ATTRIB_TEX0 = 4, - FRAG_ATTRIB_TEX1 = 5, - FRAG_ATTRIB_TEX2 = 6, - FRAG_ATTRIB_TEX3 = 7, - FRAG_ATTRIB_TEX4 = 8, - FRAG_ATTRIB_TEX5 = 9, - FRAG_ATTRIB_TEX6 = 10, - FRAG_ATTRIB_TEX7 = 11, - FRAG_ATTRIB_FACE = 12, /**< front/back face */ - FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */ - FRAG_ATTRIB_VAR0 = 14, /**< shader varying */ - FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING) -} gl_frag_attrib; - -/** - * Bitflags for fragment program input attributes. - */ -/*@{*/ -#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS) -#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0) -#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1) -#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC) -#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE) -#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC) -#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0) -#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1) -#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2) -#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3) -#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4) -#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5) -#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6) -#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7) -#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0) - -#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U)) -#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V)) - -#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \ - FRAG_BIT_TEX1| \ - FRAG_BIT_TEX2| \ - FRAG_BIT_TEX3| \ - FRAG_BIT_TEX4| \ - FRAG_BIT_TEX5| \ - FRAG_BIT_TEX6| \ - FRAG_BIT_TEX7) -/*@}*/ - - -/** - * Fragment program results - */ -typedef enum -{ - FRAG_RESULT_DEPTH = 0, - FRAG_RESULT_STENCIL = 1, - FRAG_RESULT_COLOR = 2, - FRAG_RESULT_DATA0 = 3, - FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) -} gl_frag_result; - - -/** - * Indexes for all renderbuffers - */ -typedef enum -{ - /* the four standard color buffers */ - BUFFER_FRONT_LEFT, - BUFFER_BACK_LEFT, - BUFFER_FRONT_RIGHT, - BUFFER_BACK_RIGHT, - BUFFER_DEPTH, - BUFFER_STENCIL, - BUFFER_ACCUM, - /* optional aux buffer */ - BUFFER_AUX0, - /* generic renderbuffers */ - BUFFER_COLOR0, - BUFFER_COLOR1, - BUFFER_COLOR2, - BUFFER_COLOR3, - BUFFER_COLOR4, - BUFFER_COLOR5, - BUFFER_COLOR6, - BUFFER_COLOR7, - BUFFER_COUNT -} gl_buffer_index; - -/** - * Bit flags for all renderbuffers - */ -#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT) -#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT) -#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT) -#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT) -#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0) -#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1) -#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2) -#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3) -#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH) -#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL) -#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM) -#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0) -#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1) -#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2) -#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3) -#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4) -#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5) -#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6) -#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7) - -/** - * Mask of all the color buffer bits (but not accum). - */ -#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \ - BUFFER_BIT_BACK_LEFT | \ - BUFFER_BIT_FRONT_RIGHT | \ - BUFFER_BIT_BACK_RIGHT | \ - BUFFER_BIT_AUX0 | \ - BUFFER_BIT_COLOR0 | \ - BUFFER_BIT_COLOR1 | \ - BUFFER_BIT_COLOR2 | \ - BUFFER_BIT_COLOR3 | \ - BUFFER_BIT_COLOR4 | \ - BUFFER_BIT_COLOR5 | \ - BUFFER_BIT_COLOR6 | \ - BUFFER_BIT_COLOR7) - - -/** - * Framebuffer configuration (aka visual / pixelformat) - * Note: some of these fields should be boolean, but it appears that - * code in drivers/dri/common/util.c requires int-sized fields. - */ -struct gl_config -{ - GLboolean rgbMode; - GLboolean floatMode; - GLboolean colorIndexMode; /* XXX is this used anywhere? */ - GLuint doubleBufferMode; - GLuint stereoMode; - - GLboolean haveAccumBuffer; - GLboolean haveDepthBuffer; - GLboolean haveStencilBuffer; - - GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ - GLuint redMask, greenMask, blueMask, alphaMask; - GLint rgbBits; /* total bits for rgb */ - GLint indexBits; /* total bits for colorindex */ - - GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits; - GLint depthBits; - GLint stencilBits; - - GLint numAuxBuffers; - - GLint level; - - /* EXT_visual_rating / GLX 1.2 */ - GLint visualRating; - - /* EXT_visual_info / GLX 1.2 */ - GLint transparentPixel; - /* colors are floats scaled to ints */ - GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha; - GLint transparentIndex; - - /* ARB_multisample / SGIS_multisample */ - GLint sampleBuffers; - GLint samples; - - /* SGIX_pbuffer / GLX 1.3 */ - GLint maxPbufferWidth; - GLint maxPbufferHeight; - GLint maxPbufferPixels; - GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */ - GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */ - - /* OML_swap_method */ - GLint swapMethod; - - /* EXT_texture_from_pixmap */ - GLint bindToTextureRgb; - GLint bindToTextureRgba; - GLint bindToMipmapTexture; - GLint bindToTextureTargets; - GLint yInverted; - - /* EXT_framebuffer_sRGB */ - GLint sRGBCapable; -}; - - -/** - * Data structure for color tables - */ -struct gl_color_table -{ - GLenum InternalFormat; /**< The user-specified format */ - GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */ - GLuint Size; /**< number of entries in table */ - GLfloat *TableF; /**< Color table, floating point values */ - GLubyte *TableUB; /**< Color table, ubyte values */ - GLubyte RedSize; - GLubyte GreenSize; - GLubyte BlueSize; - GLubyte AlphaSize; - GLubyte LuminanceSize; - GLubyte IntensitySize; -}; - - -/** - * \name Bit flags used for updating material values. - */ -/*@{*/ -#define MAT_ATTRIB_FRONT_AMBIENT 0 -#define MAT_ATTRIB_BACK_AMBIENT 1 -#define MAT_ATTRIB_FRONT_DIFFUSE 2 -#define MAT_ATTRIB_BACK_DIFFUSE 3 -#define MAT_ATTRIB_FRONT_SPECULAR 4 -#define MAT_ATTRIB_BACK_SPECULAR 5 -#define MAT_ATTRIB_FRONT_EMISSION 6 -#define MAT_ATTRIB_BACK_EMISSION 7 -#define MAT_ATTRIB_FRONT_SHININESS 8 -#define MAT_ATTRIB_BACK_SHININESS 9 -#define MAT_ATTRIB_FRONT_INDEXES 10 -#define MAT_ATTRIB_BACK_INDEXES 11 -#define MAT_ATTRIB_MAX 12 - -#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f)) -#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f)) -#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f)) -#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f)) -#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f)) -#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f)) - -#define MAT_INDEX_AMBIENT 0 -#define MAT_INDEX_DIFFUSE 1 -#define MAT_INDEX_SPECULAR 2 - -#define MAT_BIT_FRONT_AMBIENT (1< ) */ - GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */ - GLfloat _VP_inf_spot_attenuation; - - GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */ - GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */ - GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */ - GLfloat _MatSpecular[2][3]; /**< material spec * light specular */ - GLfloat _dli; /**< CI diffuse light intensity */ - GLfloat _sli; /**< CI specular light intensity */ - /*@}*/ -}; - - -/** - * Light model state. - */ -struct gl_lightmodel -{ - GLfloat Ambient[4]; /**< ambient color */ - GLboolean LocalViewer; /**< Local (or infinite) view point? */ - GLboolean TwoSide; /**< Two (or one) sided lighting? */ - GLenum ColorControl; /**< either GL_SINGLE_COLOR - * or GL_SEPARATE_SPECULAR_COLOR */ -}; - - -/** - * Material state. - */ -struct gl_material -{ - GLfloat Attrib[MAT_ATTRIB_MAX][4]; -}; - - -/** - * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT) - */ -struct gl_accum_attrib -{ - GLfloat ClearColor[4]; /**< Accumulation buffer clear color */ -}; - - -/** - * Color buffer attribute group (GL_COLOR_BUFFER_BIT). - */ -struct gl_colorbuffer_attrib -{ - GLuint ClearIndex; /**< Index to use for glClear */ - GLclampf ClearColor[4]; /**< Color to use for glClear */ - - GLuint IndexMask; /**< Color index write mask */ - GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */ - - GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */ - - /** - * \name alpha testing - */ - /*@{*/ - GLboolean AlphaEnabled; /**< Alpha test enabled flag */ - GLenum AlphaFunc; /**< Alpha test function */ - GLclampf AlphaRef; /**< Alpha reference value */ - /*@}*/ - - /** - * \name Blending - */ - /*@{*/ - GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */ - GLfloat BlendColor[4]; /**< Blending color */ - struct - { - GLenum SrcRGB; /**< RGB blend source term */ - GLenum DstRGB; /**< RGB blend dest term */ - GLenum SrcA; /**< Alpha blend source term */ - GLenum DstA; /**< Alpha blend dest term */ - GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */ - GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */ - } Blend[MAX_DRAW_BUFFERS]; - /** Are the blend func terms currently different for each buffer/target? */ - GLboolean _BlendFuncPerBuffer; - /** Are the blend equations currently different for each buffer/target? */ - GLboolean _BlendEquationPerBuffer; - /*@}*/ - - /** - * \name Logic op - */ - /*@{*/ - GLenum LogicOp; /**< Logic operator */ - GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */ - GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */ - GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */ - /*@}*/ - - GLboolean DitherFlag; /**< Dither enable flag */ - - GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ - GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ - - GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */ -}; - - -/** - * Current attribute group (GL_CURRENT_BIT). - */ -struct gl_current_attrib -{ - /** - * \name Current vertex attributes. - * \note Values are valid only after FLUSH_VERTICES has been called. - * \note Index and Edgeflag current values are stored as floats in the - * SIX and SEVEN attribute slots. - */ - GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */ - - /** - * \name Current raster position attributes (always valid). - * \note This set of attributes is very similar to the SWvertex struct. - */ - /*@{*/ - GLfloat RasterPos[4]; - GLfloat RasterDistance; - GLfloat RasterColor[4]; - GLfloat RasterSecondaryColor[4]; - GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4]; - GLboolean RasterPosValid; - /*@}*/ -}; - - -/** - * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT). - */ -struct gl_depthbuffer_attrib -{ - GLenum Func; /**< Function for depth buffer compare */ - GLclampd Clear; /**< Value to clear depth buffer to */ - GLboolean Test; /**< Depth buffering enabled flag */ - GLboolean Mask; /**< Depth buffer writable? */ - GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */ - GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */ -}; - - -/** - * Evaluator attribute group (GL_EVAL_BIT). - */ -struct gl_eval_attrib -{ - /** - * \name Enable bits - */ - /*@{*/ - GLboolean Map1Color4; - GLboolean Map1Index; - GLboolean Map1Normal; - GLboolean Map1TextureCoord1; - GLboolean Map1TextureCoord2; - GLboolean Map1TextureCoord3; - GLboolean Map1TextureCoord4; - GLboolean Map1Vertex3; - GLboolean Map1Vertex4; - GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */ - GLboolean Map2Color4; - GLboolean Map2Index; - GLboolean Map2Normal; - GLboolean Map2TextureCoord1; - GLboolean Map2TextureCoord2; - GLboolean Map2TextureCoord3; - GLboolean Map2TextureCoord4; - GLboolean Map2Vertex3; - GLboolean Map2Vertex4; - GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */ - GLboolean AutoNormal; - /*@}*/ - - /** - * \name Map Grid endpoints and divisions and calculated du values - */ - /*@{*/ - GLint MapGrid1un; - GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du; - GLint MapGrid2un, MapGrid2vn; - GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du; - GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv; - /*@}*/ -}; - - -/** - * Fog attribute group (GL_FOG_BIT). - */ -struct gl_fog_attrib -{ - GLboolean Enabled; /**< Fog enabled flag */ - GLfloat Color[4]; /**< Fog color */ - GLfloat Density; /**< Density >= 0.0 */ - GLfloat Start; /**< Start distance in eye coords */ - GLfloat End; /**< End distance in eye coords */ - GLfloat Index; /**< Fog index */ - GLenum Mode; /**< Fog mode */ - GLboolean ColorSumEnabled; - GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */ - GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */ -}; - - -/** - * \brief Layout qualifiers for gl_FragDepth. - * - * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with - * a layout qualifier. - * - * \see enum ir_depth_layout - */ -enum gl_frag_depth_layout { - FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ - FRAG_DEPTH_LAYOUT_ANY, - FRAG_DEPTH_LAYOUT_GREATER, - FRAG_DEPTH_LAYOUT_LESS, - FRAG_DEPTH_LAYOUT_UNCHANGED -}; - - -/** - * Hint attribute group (GL_HINT_BIT). - * - * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE. - */ -struct gl_hint_attrib -{ - GLenum PerspectiveCorrection; - GLenum PointSmooth; - GLenum LineSmooth; - GLenum PolygonSmooth; - GLenum Fog; - GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */ - GLenum TextureCompression; /**< GL_ARB_texture_compression */ - GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */ - GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */ -}; - -/** - * Light state flags. - */ -/*@{*/ -#define LIGHT_SPOT 0x1 -#define LIGHT_LOCAL_VIEWER 0x2 -#define LIGHT_POSITIONAL 0x4 -#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER) -/*@}*/ - - -/** - * Lighting attribute group (GL_LIGHT_BIT). - */ -struct gl_light_attrib -{ - struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */ - struct gl_lightmodel Model; /**< Lighting model */ - - /** - * Must flush FLUSH_VERTICES before referencing: - */ - /*@{*/ - struct gl_material Material; /**< Includes front & back values */ - /*@}*/ - - GLboolean Enabled; /**< Lighting enabled flag */ - GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */ - GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */ - GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */ - GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */ - GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */ - GLboolean ColorMaterialEnabled; - GLenum ClampVertexColor; - - struct gl_light EnabledList; /**< List sentinel */ - - /** - * Derived state for optimizations: - */ - /*@{*/ - GLboolean _NeedEyeCoords; - GLboolean _NeedVertices; /**< Use fast shader? */ - GLbitfield _Flags; /**< LIGHT_* flags, see above */ - GLfloat _BaseColor[2][3]; - /*@}*/ -}; - - -/** - * Line attribute group (GL_LINE_BIT). - */ -struct gl_line_attrib -{ - GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */ - GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */ - GLushort StipplePattern; /**< Stipple pattern */ - GLint StippleFactor; /**< Stipple repeat factor */ - GLfloat Width; /**< Line width */ -}; - - -/** - * Display list attribute group (GL_LIST_BIT). - */ -struct gl_list_attrib -{ - GLuint ListBase; -}; - - -/** - * Multisample attribute group (GL_MULTISAMPLE_BIT). - */ -struct gl_multisample_attrib -{ - GLboolean Enabled; - GLboolean _Enabled; /**< true if Enabled and multisample buffer */ - GLboolean SampleAlphaToCoverage; - GLboolean SampleAlphaToOne; - GLboolean SampleCoverage; - GLfloat SampleCoverageValue; - GLboolean SampleCoverageInvert; -}; - - -/** - * A pixelmap (see glPixelMap) - */ -struct gl_pixelmap -{ - GLint Size; - GLfloat Map[MAX_PIXEL_MAP_TABLE]; - GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */ -}; - - -/** - * Collection of all pixelmaps - */ -struct gl_pixelmaps -{ - struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */ - struct gl_pixelmap GtoG; - struct gl_pixelmap BtoB; - struct gl_pixelmap AtoA; - struct gl_pixelmap ItoR; - struct gl_pixelmap ItoG; - struct gl_pixelmap ItoB; - struct gl_pixelmap ItoA; - struct gl_pixelmap ItoI; - struct gl_pixelmap StoS; -}; - - -/** - * Pixel attribute group (GL_PIXEL_MODE_BIT). - */ -struct gl_pixel_attrib -{ - GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */ - - /*--- Begin Pixel Transfer State ---*/ - /* Fields are in the order in which they're applied... */ - - /** Scale & Bias (index shift, offset) */ - /*@{*/ - GLfloat RedBias, RedScale; - GLfloat GreenBias, GreenScale; - GLfloat BlueBias, BlueScale; - GLfloat AlphaBias, AlphaScale; - GLfloat DepthBias, DepthScale; - GLint IndexShift, IndexOffset; - /*@}*/ - - /* Pixel Maps */ - /* Note: actual pixel maps are not part of this attrib group */ - GLboolean MapColorFlag; - GLboolean MapStencilFlag; - - /*--- End Pixel Transfer State ---*/ - - /** glPixelZoom */ - GLfloat ZoomX, ZoomY; - - /** GL_SGI_texture_color_table */ - GLfloat TextureColorTableScale[4]; /**< RGBA */ - GLfloat TextureColorTableBias[4]; /**< RGBA */ -}; - - -/** - * Point attribute group (GL_POINT_BIT). - */ -struct gl_point_attrib -{ - GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */ - GLfloat Size; /**< User-specified point size */ - GLfloat Params[3]; /**< GL_EXT_point_parameters */ - GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */ - GLfloat Threshold; /**< GL_EXT_point_parameters */ - GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */ - GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */ - GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/ - GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */ - GLenum SpriteOrigin; /**< GL_ARB_point_sprite */ -}; - - -/** - * Polygon attribute group (GL_POLYGON_BIT). - */ -struct gl_polygon_attrib -{ - GLenum FrontFace; /**< Either GL_CW or GL_CCW */ - GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ - GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ - GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */ - GLboolean CullFlag; /**< Culling on/off flag */ - GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */ - GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */ - GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */ - GLfloat OffsetFactor; /**< Polygon offset factor, from user */ - GLfloat OffsetUnits; /**< Polygon offset units, from user */ - GLboolean OffsetPoint; /**< Offset in GL_POINT mode */ - GLboolean OffsetLine; /**< Offset in GL_LINE mode */ - GLboolean OffsetFill; /**< Offset in GL_FILL mode */ -}; - - -/** - * Scissor attributes (GL_SCISSOR_BIT). - */ -struct gl_scissor_attrib -{ - GLboolean Enabled; /**< Scissor test enabled? */ - GLint X, Y; /**< Lower left corner of box */ - GLsizei Width, Height; /**< Size of box */ -}; - - -/** - * Stencil attribute group (GL_STENCIL_BUFFER_BIT). - * - * Three sets of stencil data are tracked so that OpenGL 2.0, - * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported - * simultaneously. In each of the stencil state arrays, element 0 corresponds - * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 / - * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the - * GL_EXT_stencil_two_side GL_BACK state. - * - * The derived value \c _BackFace is either 1 or 2 depending on whether or - * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled. - * - * The derived value \c _TestTwoSide is set when the front-face and back-face - * stencil state are different. - */ -struct gl_stencil_attrib -{ - GLboolean Enabled; /**< Enabled flag */ - GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */ - GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */ - GLboolean _Enabled; /**< Enabled and stencil buffer present */ - GLboolean _TestTwoSide; - GLubyte _BackFace; /**< Current back stencil state (1 or 2) */ - GLenum Function[3]; /**< Stencil function */ - GLenum FailFunc[3]; /**< Fail function */ - GLenum ZPassFunc[3]; /**< Depth buffer pass function */ - GLenum ZFailFunc[3]; /**< Depth buffer fail function */ - GLint Ref[3]; /**< Reference value */ - GLuint ValueMask[3]; /**< Value mask */ - GLuint WriteMask[3]; /**< Write mask */ - GLuint Clear; /**< Clear value */ -}; - - -/** - * An index for each type of texture object. These correspond to the GL - * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc. - * Note: the order is from highest priority to lowest priority. - */ -typedef enum -{ - TEXTURE_2D_ARRAY_INDEX, - TEXTURE_1D_ARRAY_INDEX, - TEXTURE_CUBE_INDEX, - TEXTURE_3D_INDEX, - TEXTURE_RECT_INDEX, - TEXTURE_2D_INDEX, - TEXTURE_1D_INDEX, - NUM_TEXTURE_TARGETS -} gl_texture_index; - - -/** - * Bit flags for each type of texture object - * Used for Texture.Unit[]._ReallyEnabled flags. - */ -/*@{*/ -#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX) -#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX) -#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX) -#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX) -#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX) -#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX) -#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX) -/*@}*/ - - -/** - * TexGenEnabled flags. - */ -/*@{*/ -#define S_BIT 1 -#define T_BIT 2 -#define R_BIT 4 -#define Q_BIT 8 -#define STR_BITS (S_BIT | T_BIT | R_BIT) -/*@}*/ - - -/** - * Bit flag versions of the corresponding GL_ constants. - */ -/*@{*/ -#define TEXGEN_SPHERE_MAP 0x1 -#define TEXGEN_OBJ_LINEAR 0x2 -#define TEXGEN_EYE_LINEAR 0x4 -#define TEXGEN_REFLECTION_MAP_NV 0x8 -#define TEXGEN_NORMAL_MAP_NV 0x10 - -#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \ - TEXGEN_REFLECTION_MAP_NV | \ - TEXGEN_NORMAL_MAP_NV) -#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \ - TEXGEN_REFLECTION_MAP_NV | \ - TEXGEN_NORMAL_MAP_NV | \ - TEXGEN_EYE_LINEAR) -/*@}*/ - - - -/** Tex-gen enabled for texture unit? */ -#define ENABLE_TEXGEN(unit) (1 << (unit)) - -/** Non-identity texture matrix for texture unit? */ -#define ENABLE_TEXMAT(unit) (1 << (unit)) - - -/** - * Texel fetch function prototype. We use texel fetch functions to - * extract RGBA, color indexes and depth components out of 1D, 2D and 3D - * texture images. These functions help to isolate us from the gritty - * details of all the various texture image encodings. - * - * \param texImage texture image. - * \param col texel column. - * \param row texel row. - * \param img texel image level/layer. - * \param texelOut output texel (up to 4 GLchans) - */ -typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage, - GLint col, GLint row, GLint img, - GLchan *texelOut ); - -/** - * As above, but returns floats. - * Used for depth component images and for upcoming signed/float - * texture images. - */ -typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage, - GLint col, GLint row, GLint img, - GLfloat *texelOut ); - - -typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage, - GLint col, GLint row, GLint img, - const void *texel); - - -/** - * Texture image state. Describes the dimensions of a texture image, - * the texel format and pointers to Texel Fetch functions. - */ -struct gl_texture_image -{ - GLint InternalFormat; /**< Internal format as given by the user */ - GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA, - * GL_LUMINANCE, GL_LUMINANCE_ALPHA, - * GL_INTENSITY, GL_COLOR_INDEX, - * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT - * only. Used for choosing TexEnv arithmetic. - */ - GLuint TexFormat; /**< The actual format: MESA_FORMAT_x */ - - GLuint Border; /**< 0 or 1 */ - GLuint Width; /**< = 2^WidthLog2 + 2*Border */ - GLuint Height; /**< = 2^HeightLog2 + 2*Border */ - GLuint Depth; /**< = 2^DepthLog2 + 2*Border */ - GLuint Width2; /**< = Width - 2*Border */ - GLuint Height2; /**< = Height - 2*Border */ - GLuint Depth2; /**< = Depth - 2*Border */ - GLuint WidthLog2; /**< = log2(Width2) */ - GLuint HeightLog2; /**< = log2(Height2) */ - GLuint DepthLog2; /**< = log2(Depth2) */ - GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */ - GLfloat WidthScale; /**< used for mipmap LOD computation */ - GLfloat HeightScale; /**< used for mipmap LOD computation */ - GLfloat DepthScale; /**< used for mipmap LOD computation */ - GLboolean IsClientData; /**< Data owned by client? */ - GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */ - - struct gl_texture_object *TexObject; /**< Pointer back to parent object */ - - FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */ - FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */ - - GLuint RowStride; /**< Padded width in units of texels */ - GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to - each 2D slice in 'Data', in texels */ - GLvoid *Data; /**< Image data, accessed via FetchTexel() */ - - /** - * \name For device driver: - */ - /*@{*/ - void *DriverData; /**< Arbitrary device driver data */ - /*@}*/ -}; - - -/** - * Indexes for cube map faces. - */ -typedef enum -{ - FACE_POS_X = 0, - FACE_NEG_X = 1, - FACE_POS_Y = 2, - FACE_NEG_Y = 3, - FACE_POS_Z = 4, - FACE_NEG_Z = 5, - MAX_FACES = 6 -} gl_face_index; - - -/** - * Texture object state. Contains the array of mipmap images, border color, - * wrap modes, filter modes, shadow/texcompare state, and the per-texture - * color palette. - */ -struct gl_texture_object -{ - _glthread_Mutex Mutex; /**< for thread safety */ - GLint RefCount; /**< reference count */ - GLuint Name; /**< the user-visible texture object ID */ - GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */ - GLfloat Priority; /**< in [0,1] */ - union { - GLfloat f[4]; - GLuint ui[4]; - GLint i[4]; - } BorderColor; /**< Interpreted according to texture format */ - GLenum WrapS; /**< S-axis texture image wrap mode */ - GLenum WrapT; /**< T-axis texture image wrap mode */ - GLenum WrapR; /**< R-axis texture image wrap mode */ - GLenum MinFilter; /**< minification filter */ - GLenum MagFilter; /**< magnification filter */ - GLfloat MinLod; /**< min lambda, OpenGL 1.2 */ - GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */ - GLfloat LodBias; /**< OpenGL 1.4 */ - GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */ - GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */ - GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ - GLenum CompareMode; /**< GL_ARB_shadow */ - GLenum CompareFunc; /**< GL_ARB_shadow */ - GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */ - GLenum DepthMode; /**< GL_ARB_depth_texture */ - GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */ - GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */ - GLint CropRect[4]; /**< GL_OES_draw_texture */ - GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */ - GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */ - GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */ - GLboolean _Complete; /**< Is texture object complete? */ - GLboolean _RenderToTexture; /**< Any rendering to this texture? */ - GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ - GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */ - - /** Actual texture images, indexed by [cube face] and [mipmap level] */ - struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS]; - - /** GL_EXT_paletted_texture */ - struct gl_color_table Palette; - - /** - * \name For device driver. - * Note: instead of attaching driver data to this pointer, it's preferable - * to instead use this struct as a base class for your own texture object - * class. Driver->NewTextureObject() can be used to implement the - * allocation. - */ - void *DriverData; /**< Arbitrary device driver data */ -}; - - -/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */ -#define MAX_COMBINER_TERMS 4 - - -/** - * Texture combine environment state. - */ -struct gl_tex_env_combine_state -{ - GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ - GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ - /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */ - GLenum SourceRGB[MAX_COMBINER_TERMS]; - GLenum SourceA[MAX_COMBINER_TERMS]; - /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */ - GLenum OperandRGB[MAX_COMBINER_TERMS]; - GLenum OperandA[MAX_COMBINER_TERMS]; - GLuint ScaleShiftRGB; /**< 0, 1 or 2 */ - GLuint ScaleShiftA; /**< 0, 1 or 2 */ - GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */ - GLuint _NumArgsA; /**< Number of inputs used for the A combiner */ -}; - - -/** - * Texture coord generation state. - */ -struct gl_texgen -{ - GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */ - GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */ - GLfloat ObjectPlane[4]; - GLfloat EyePlane[4]; -}; - - -/** - * Texture unit state. Contains enable flags, texture environment/function/ - * combiners, texgen state, pointers to current texture objects and - * post-filter color tables. - */ -struct gl_texture_unit -{ - GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */ - GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */ - - GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */ - GLfloat EnvColor[4]; - - struct gl_texgen GenS; - struct gl_texgen GenT; - struct gl_texgen GenR; - struct gl_texgen GenQ; - GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */ - GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */ - - GLfloat LodBias; /**< for biasing mipmap levels */ - GLenum BumpTarget; - GLfloat RotMatrix[4]; /* 2x2 matrix */ - - /** - * \name GL_EXT_texture_env_combine - */ - struct gl_tex_env_combine_state Combine; - - /** - * Derived state based on \c EnvMode and the \c BaseFormat of the - * currently enabled texture. - */ - struct gl_tex_env_combine_state _EnvMode; - - /** - * Currently enabled combiner state. This will point to either - * \c Combine or \c _EnvMode. - */ - struct gl_tex_env_combine_state *_CurrentCombine; - - /** Current texture object pointers */ - struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS]; - - /** Points to highest priority, complete and enabled texture object */ - struct gl_texture_object *_Current; - - /** GL_SGI_texture_color_table */ - /*@{*/ - struct gl_color_table ColorTable; - struct gl_color_table ProxyColorTable; - GLboolean ColorTableEnabled; - /*@}*/ -}; - - -/** - * Texture attribute group (GL_TEXTURE_BIT). - */ -struct gl_texture_attrib -{ - GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */ - struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; - - struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS]; - - /** GL_ARB_seamless_cubemap */ - GLboolean CubeMapSeamless; - - /** GL_EXT_shared_texture_palette */ - GLboolean SharedPalette; - struct gl_color_table Palette; - - /** Texture units/samplers used by vertex or fragment texturing */ - GLbitfield _EnabledUnits; - - /** Texture coord units/sets used for fragment texturing */ - GLbitfield _EnabledCoordUnits; - - /** Texture coord units that have texgen enabled */ - GLbitfield _TexGenEnabled; - - /** Texture coord units that have non-identity matrices */ - GLbitfield _TexMatEnabled; - - /** Bitwise-OR of all Texture.Unit[i]._GenFlags */ - GLbitfield _GenFlags; -}; - - -/** - * Transformation attribute group (GL_TRANSFORM_BIT). - */ -struct gl_transform_attrib -{ - GLenum MatrixMode; /**< Matrix mode */ - GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */ - GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */ - GLbitfield ClipPlanesEnabled; /**< on/off bitmask */ - GLboolean Normalize; /**< Normalize all normals? */ - GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */ - GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */ - GLboolean DepthClamp; /**< GL_ARB_depth_clamp */ - - GLfloat CullEyePos[4]; - GLfloat CullObjPos[4]; -}; - - -/** - * Viewport attribute group (GL_VIEWPORT_BIT). - */ -struct gl_viewport_attrib -{ - GLint X, Y; /**< position */ - GLsizei Width, Height; /**< size */ - GLfloat Near, Far; /**< Depth buffer range */ - GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */ -}; - - -/** - * GL_ARB_vertex/pixel_buffer_object buffer object - */ -struct gl_buffer_object -{ - _glthread_Mutex Mutex; - GLint RefCount; - GLuint Name; - GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */ - GLsizeiptrARB Size; /**< Size of buffer storage in bytes */ - GLubyte *Data; /**< Location of storage either in RAM or VRAM. */ - /** Fields describing a mapped buffer */ - /*@{*/ - GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */ - GLvoid *Pointer; /**< User-space address of mapping */ - GLintptr Offset; /**< Mapped offset */ - GLsizeiptr Length; /**< Mapped length */ - /*@}*/ - GLboolean Written; /**< Ever written to? (for debugging) */ - GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ -}; - - -/** - * Client pixel packing/unpacking attributes - */ -struct gl_pixelstore_attrib -{ - GLint Alignment; - GLint RowLength; - GLint SkipPixels; - GLint SkipRows; - GLint ImageHeight; - GLint SkipImages; - GLboolean SwapBytes; - GLboolean LsbFirst; - GLboolean ClientStorage; /**< GL_APPLE_client_storage */ - GLboolean Invert; /**< GL_MESA_pack_invert */ - struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */ -}; - - -/** - * Client vertex array attributes - */ -struct gl_client_array -{ - GLint Size; /**< components per element (1,2,3,4) */ - GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */ - GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */ - GLsizei Stride; /**< user-specified stride */ - GLsizei StrideB; /**< actual stride in bytes */ - const GLubyte *Ptr; /**< Points to array data */ - GLboolean Enabled; /**< Enabled flag is a boolean */ - GLboolean Normalized; /**< GL_ARB_vertex_program */ - GLboolean Integer; /**< Integer-valued? */ - GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */ - GLuint _ElementSize; /**< size of each element in bytes */ - - struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */ - GLuint _MaxElement; /**< max element index into array buffer + 1 */ -}; - - -/** - * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object - * extension, but a nice encapsulation in any case. - */ -struct gl_array_object -{ - /** Name of the array object as received from glGenVertexArrayAPPLE. */ - GLuint Name; - - GLint RefCount; - _glthread_Mutex Mutex; - GLboolean VBOonly; /**< require all arrays to live in VBOs? */ - - /** Conventional vertex arrays */ - /*@{*/ - struct gl_client_array Vertex; - struct gl_client_array Weight; - struct gl_client_array Normal; - struct gl_client_array Color; - struct gl_client_array SecondaryColor; - struct gl_client_array FogCoord; - struct gl_client_array Index; - struct gl_client_array EdgeFlag; - struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS]; - struct gl_client_array PointSize; - /*@}*/ - - /** - * Generic arrays for vertex programs/shaders. - * For NV vertex programs, these attributes alias and take priority - * over the conventional attribs above. For ARB vertex programs and - * GLSL vertex shaders, these attributes are separate. - */ - struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS]; - - /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */ - GLbitfield _Enabled; - - /** - * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs - * we can determine the max legal (in bounds) glDrawElements array index. - */ - GLuint _MaxElement; -}; - - -/** - * Vertex array state - */ -struct gl_array_attrib -{ - /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */ - struct gl_array_object *ArrayObj; - - /** The default vertex array object */ - struct gl_array_object *DefaultArrayObj; - - /** Array objects (GL_ARB/APPLE_vertex_array_object) */ - struct _mesa_HashTable *Objects; - - GLint ActiveTexture; /**< Client Active Texture */ - GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */ - GLuint LockCount; /**< GL_EXT_compiled_vertex_array */ - - /** GL 3.1 (slightly different from GL_NV_primitive_restart) */ - GLboolean PrimitiveRestart; - GLuint RestartIndex; - - GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */ - - /* GL_ARB_vertex_buffer_object */ - struct gl_buffer_object *ArrayBufferObj; - struct gl_buffer_object *ElementArrayBufferObj; -}; - - -/** - * Feedback buffer state - */ -struct gl_feedback -{ - GLenum Type; - GLbitfield _Mask; /**< FB_* bits */ - GLfloat *Buffer; - GLuint BufferSize; - GLuint Count; -}; - - -/** - * Selection buffer state - */ -struct gl_selection -{ - GLuint *Buffer; /**< selection buffer */ - GLuint BufferSize; /**< size of the selection buffer */ - GLuint BufferCount; /**< number of values in the selection buffer */ - GLuint Hits; /**< number of records in the selection buffer */ - GLuint NameStackDepth; /**< name stack depth */ - GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */ - GLboolean HitFlag; /**< hit flag */ - GLfloat HitMinZ; /**< minimum hit depth */ - GLfloat HitMaxZ; /**< maximum hit depth */ -}; - - -/** - * 1-D Evaluator control points - */ -struct gl_1d_map -{ - GLuint Order; /**< Number of control points */ - GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */ - GLfloat *Points; /**< Points to contiguous control points */ -}; - - -/** - * 2-D Evaluator control points - */ -struct gl_2d_map -{ - GLuint Uorder; /**< Number of control points in U dimension */ - GLuint Vorder; /**< Number of control points in V dimension */ - GLfloat u1, u2, du; - GLfloat v1, v2, dv; - GLfloat *Points; /**< Points to contiguous control points */ -}; - - -/** - * All evaluator control point state - */ -struct gl_evaluators -{ - /** - * \name 1-D maps - */ - /*@{*/ - struct gl_1d_map Map1Vertex3; - struct gl_1d_map Map1Vertex4; - struct gl_1d_map Map1Index; - struct gl_1d_map Map1Color4; - struct gl_1d_map Map1Normal; - struct gl_1d_map Map1Texture1; - struct gl_1d_map Map1Texture2; - struct gl_1d_map Map1Texture3; - struct gl_1d_map Map1Texture4; - struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */ - /*@}*/ - - /** - * \name 2-D maps - */ - /*@{*/ - struct gl_2d_map Map2Vertex3; - struct gl_2d_map Map2Vertex4; - struct gl_2d_map Map2Index; - struct gl_2d_map Map2Color4; - struct gl_2d_map Map2Normal; - struct gl_2d_map Map2Texture1; - struct gl_2d_map Map2Texture2; - struct gl_2d_map Map2Texture3; - struct gl_2d_map Map2Texture4; - struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */ - /*@}*/ -}; - - -/** - * Names of the various vertex/fragment program register files, etc. - * - * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c) - * All values should fit in a 4-bit field. - * - * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, - * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to - * be "uniform" variables since they can only be set outside glBegin/End. - * They're also all stored in the same Parameters array. - */ -typedef enum -{ - PROGRAM_TEMPORARY, /**< machine->Temporary[] */ - PROGRAM_INPUT, /**< machine->Inputs[] */ - PROGRAM_OUTPUT, /**< machine->Outputs[] */ - PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */ - PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */ - PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */ - PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */ - PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */ - PROGRAM_CONSTANT, /**< gl_program->Parameters[] */ - PROGRAM_UNIFORM, /**< gl_program->Parameters[] */ - PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */ - PROGRAM_ADDRESS, /**< machine->AddressReg */ - PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */ - PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */ - PROGRAM_UNDEFINED, /**< Invalid/TBD value */ - PROGRAM_FILE_MAX -} gl_register_file; - - -/** - * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be - * one of these values. - */ -typedef enum -{ - SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */ - SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */ - SYSTEM_VALUE_MAX /**< Number of values */ -} gl_system_value; - - -/** Vertex and fragment instructions */ -struct prog_instruction; -struct gl_program_parameter_list; -struct gl_uniform_list; - - -/** - * Base class for any kind of program object - */ -struct gl_program -{ - GLuint Id; - GLubyte *String; /**< Null-terminated program text */ - GLint RefCount; - GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */ - GLenum Format; /**< String encoding format */ - GLboolean Resident; - - struct prog_instruction *Instructions; - - GLbitfield InputsRead; /**< Bitmask of which input regs are read */ - GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ - GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ - GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ - GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ - GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ - GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ - - - /** Named parameters, constants, etc. from program text */ - struct gl_program_parameter_list *Parameters; - /** Numbered local parameters */ - GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4]; - - /** Vertex/fragment shader varying vars */ - struct gl_program_parameter_list *Varying; - /** Vertex program user-defined attributes */ - struct gl_program_parameter_list *Attributes; - - /** Map from sampler unit to texture unit (set by glUniform1i()) */ - GLubyte SamplerUnits[MAX_SAMPLERS]; - /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */ - gl_texture_index SamplerTargets[MAX_SAMPLERS]; - - /** Bitmask of which register files are read/written with indirect - * addressing. Mask of (1 << PROGRAM_x) bits. - */ - GLbitfield IndirectRegisterFiles; - - /** Logical counts */ - /*@{*/ - GLuint NumInstructions; - GLuint NumTemporaries; - GLuint NumParameters; - GLuint NumAttributes; - GLuint NumAddressRegs; - GLuint NumAluInstructions; - GLuint NumTexInstructions; - GLuint NumTexIndirections; - /*@}*/ - /** Native, actual h/w counts */ - /*@{*/ - GLuint NumNativeInstructions; - GLuint NumNativeTemporaries; - GLuint NumNativeParameters; - GLuint NumNativeAttributes; - GLuint NumNativeAddressRegs; - GLuint NumNativeAluInstructions; - GLuint NumNativeTexInstructions; - GLuint NumNativeTexIndirections; - /*@}*/ -}; - - -/** Vertex program object */ -struct gl_vertex_program -{ - struct gl_program Base; /**< base class */ - GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */ - GLboolean IsPositionInvariant; -}; - - -/** Geometry program object */ -struct gl_geometry_program -{ - struct gl_program Base; /**< base class */ - - GLint VerticesOut; - GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, - GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ - GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ -}; - - -/** Fragment program object */ -struct gl_fragment_program -{ - struct gl_program Base; /**< base class */ - GLenum FogOption; - GLboolean UsesKill; /**< shader uses KIL instruction */ - GLboolean OriginUpperLeft; - GLboolean PixelCenterInteger; - enum gl_frag_depth_layout FragDepthLayout; -}; - - -/** - * State common to vertex and fragment programs. - */ -struct gl_program_state -{ - GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */ - const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */ -}; - - -/** - * Context state for vertex programs. - */ -struct gl_vertex_program_state -{ - GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */ - GLboolean _Enabled; /**< Enabled and _valid_ user program? */ - GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */ - GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */ - struct gl_vertex_program *Current; /**< User-bound vertex program */ - - /** Currently enabled and valid vertex program (including internal - * programs, user-defined vertex programs and GLSL vertex shaders). - * This is the program we must use when rendering. - */ - struct gl_vertex_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ - - /* For GL_NV_vertex_program only: */ - GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4]; - GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4]; - - /** Should fixed-function T&L be implemented with a vertex prog? */ - GLboolean _MaintainTnlProgram; - - /** Program to emulate fixed-function T&L (see above) */ - struct gl_vertex_program *_TnlProgram; - - /** Cache of fixed-function programs */ - struct gl_program_cache *Cache; - - GLboolean _Overriden; -}; - - -/** - * Context state for geometry programs. - */ -struct gl_geometry_program_state -{ - GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */ - GLboolean _Enabled; /**< Enabled and valid program? */ - struct gl_geometry_program *Current; /**< user-bound geometry program */ - - /** Currently enabled and valid program (including internal programs - * and compiled shader programs). - */ - struct gl_geometry_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ - - /** Cache of fixed-function programs */ - struct gl_program_cache *Cache; -}; - -/** - * Context state for fragment programs. - */ -struct gl_fragment_program_state -{ - GLboolean Enabled; /**< User-set fragment program enable flag */ - GLboolean _Enabled; /**< Enabled and _valid_ user program? */ - struct gl_fragment_program *Current; /**< User-bound fragment program */ - - /** Currently enabled and valid fragment program (including internal - * programs, user-defined fragment programs and GLSL fragment shaders). - * This is the program we must use when rendering. - */ - struct gl_fragment_program *_Current; - - GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ - - /** Should fixed-function texturing be implemented with a fragment prog? */ - GLboolean _MaintainTexEnvProgram; - - /** Program to emulate fixed-function texture env/combine (see above) */ - struct gl_fragment_program *_TexEnvProgram; - - /** Cache of fixed-function programs */ - struct gl_program_cache *Cache; -}; - - -/** - * ATI_fragment_shader runtime state - */ -#define ATI_FS_INPUT_PRIMARY 0 -#define ATI_FS_INPUT_SECONDARY 1 - -struct atifs_instruction; -struct atifs_setupinst; - -/** - * ATI fragment shader - */ -struct ati_fragment_shader -{ - GLuint Id; - GLint RefCount; - struct atifs_instruction *Instructions[2]; - struct atifs_setupinst *SetupInst[2]; - GLfloat Constants[8][4]; - GLbitfield LocalConstDef; /**< Indicates which constants have been set */ - GLubyte numArithInstr[2]; - GLubyte regsAssigned[2]; - GLubyte NumPasses; /**< 1 or 2 */ - GLubyte cur_pass; - GLubyte last_optype; - GLboolean interpinp1; - GLboolean isValid; - GLuint swizzlerq; -}; - -/** - * Context state for GL_ATI_fragment_shader - */ -struct gl_ati_fragment_shader_state -{ - GLboolean Enabled; - GLboolean _Enabled; /**< enabled and valid shader? */ - GLboolean Compiling; - GLfloat GlobalConstants[8][4]; - struct ati_fragment_shader *Current; -}; - - -/** - * Occlusion/timer query object. - */ -struct gl_query_object -{ - GLenum Target; /**< The query target, when active */ - GLuint Id; /**< hash table ID/name */ - GLuint64EXT Result; /**< the counter */ - GLboolean Active; /**< inside Begin/EndQuery */ - GLboolean Ready; /**< result is ready? */ -}; - - -/** - * Context state for query objects. - */ -struct gl_query_state -{ - struct _mesa_HashTable *QueryObjects; - struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */ - struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */ - - /** GL_NV_conditional_render */ - struct gl_query_object *CondRenderQuery; - - /** GL_EXT_transform_feedback */ - struct gl_query_object *PrimitivesGenerated; - struct gl_query_object *PrimitivesWritten; - - /** GL_ARB_timer_query */ - struct gl_query_object *TimeElapsed; - - GLenum CondRenderMode; -}; - - -/** Sync object state */ -struct gl_sync_object { - struct simple_node link; - GLenum Type; /**< GL_SYNC_FENCE */ - GLuint Name; /**< Fence name */ - GLint RefCount; /**< Reference count */ - GLboolean DeletePending; /**< Object was deleted while there were still - * live references (e.g., sync not yet finished) - */ - GLenum SyncCondition; - GLbitfield Flags; /**< Flags passed to glFenceSync */ - GLuint StatusFlag:1; /**< Has the sync object been signaled? */ -}; - - -/** Set by #pragma directives */ -struct gl_sl_pragmas -{ - GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */ - GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */ - GLboolean Optimize; /**< defaults on */ - GLboolean Debug; /**< defaults off */ -}; - - -/** - * A GLSL vertex or fragment shader object. - */ -struct gl_shader -{ - GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */ - GLuint Name; /**< AKA the handle */ - GLint RefCount; /**< Reference count */ - GLboolean DeletePending; - GLboolean CompileStatus; - const GLchar *Source; /**< Source code string */ - GLuint SourceChecksum; /**< for debug/logging purposes */ - struct gl_program *Program; /**< Post-compile assembly code */ - GLchar *InfoLog; - struct gl_sl_pragmas Pragmas; - - unsigned Version; /**< GLSL version used for linking */ - - struct exec_list *ir; - struct glsl_symbol_table *symbols; - - /** Shaders containing built-in functions that are used for linking. */ - struct gl_shader *builtins_to_link[16]; - unsigned num_builtins_to_link; -}; - - -/** - * A GLSL program object. - * Basically a linked collection of vertex and fragment shaders. - */ -struct gl_shader_program -{ - GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */ - GLuint Name; /**< aka handle or ID */ - GLint RefCount; /**< Reference count */ - GLboolean DeletePending; - - GLuint NumShaders; /**< number of attached shaders */ - struct gl_shader **Shaders; /**< List of attached the shaders */ - - /** User-defined attribute bindings (glBindAttribLocation) */ - struct gl_program_parameter_list *Attributes; - - /** Transform feedback varyings */ - struct { - GLenum BufferMode; - GLuint NumVarying; - GLchar **VaryingNames; /**< Array [NumVarying] of char * */ - } TransformFeedback; - - /** Geometry shader state - copied into gl_geometry_program at link time */ - struct { - GLint VerticesOut; - GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, - GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ - GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ - } Geom; - - /* post-link info: */ - struct gl_vertex_program *VertexProgram; /**< Linked vertex program */ - struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */ - struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */ - struct gl_uniform_list *Uniforms; - struct gl_program_parameter_list *Varying; - GLboolean LinkStatus; /**< GL_LINK_STATUS */ - GLboolean Validated; - GLboolean _Used; /**< Ever used for drawing? */ - GLchar *InfoLog; - - unsigned Version; /**< GLSL version used for linking */ - - /** - * Per-stage shaders resulting from the first stage of linking. - * - * Set of linked shaders for this program. The array is accessed using the - * \c MESA_SHADER_* defines. Entries for non-existent stages will be - * \c NULL. - */ - struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES]; -}; - - -#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */ -#define GLSL_LOG 0x2 /**< Write shaders to files */ -#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */ -#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */ -#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */ -#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */ -#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */ -#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */ - - -/** - * Context state for GLSL vertex/fragment shaders. - */ -struct gl_shader_state -{ - /** - * Programs used for rendering - * - * There is a separate program set for each shader stage. If - * GL_EXT_separate_shader_objects is not supported, each of these must point - * to \c NULL or to the same program. - */ - struct gl_shader_program *CurrentVertexProgram; - struct gl_shader_program *CurrentGeometryProgram; - struct gl_shader_program *CurrentFragmentProgram; - - /** - * Program used by glUniform calls. - * - * Explicitly set by \c glUseProgram and \c glActiveProgramEXT. - */ - struct gl_shader_program *ActiveProgram; - - void *MemPool; - - GLbitfield Flags; /**< Mask of GLSL_x flags */ -}; - -/** - * Compiler options for a single GLSL shaders type - */ -struct gl_shader_compiler_options -{ - /** Driver-selectable options: */ - GLboolean EmitCondCodes; /**< Use condition codes? */ - GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */ - /** - * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't - * support control flow. - */ - GLboolean EmitNoIfs; - GLboolean EmitNoLoops; - GLboolean EmitNoFunctions; - GLboolean EmitNoCont; /**< Emit CONT opcode? */ - GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ - GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ - GLboolean EmitNoPow; /**< Emit POW opcodes? */ - - /** - * \name Forms of indirect addressing the driver cannot do. - */ - /*@{*/ - GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */ - GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */ - GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */ - GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */ - /*@}*/ - - GLuint MaxUnrollIterations; - - struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */ -}; - -/** - * Transform feedback object state - */ -struct gl_transform_feedback_object -{ - GLuint Name; /**< AKA the object ID */ - GLint RefCount; - GLboolean Active; /**< Is transform feedback enabled? */ - GLboolean Paused; /**< Is transform feedback paused? */ - - /** The feedback buffers */ - GLuint BufferNames[MAX_FEEDBACK_ATTRIBS]; - struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS]; - - /** Start of feedback data in dest buffer */ - GLintptr Offset[MAX_FEEDBACK_ATTRIBS]; - /** Max data to put into dest buffer (in bytes) */ - GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS]; -}; - - -/** - * Context state for transform feedback. - */ -struct gl_transform_feedback -{ - GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */ - - GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */ - - /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */ - struct gl_buffer_object *CurrentBuffer; - - /** The table of all transform feedback objects */ - struct _mesa_HashTable *Objects; - - /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */ - struct gl_transform_feedback_object *CurrentObject; - - /** The default xform-fb object (Name==0) */ - struct gl_transform_feedback_object *DefaultObject; -}; - - - -/** - * State which can be shared by multiple contexts: - */ -struct gl_shared_state -{ - _glthread_Mutex Mutex; /**< for thread safety */ - GLint RefCount; /**< Reference count */ - struct _mesa_HashTable *DisplayList; /**< Display lists hash table */ - struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */ - - /** Default texture objects (shared by all texture units) */ - struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS]; - - /** Fallback texture used when a bound texture is incomplete */ - struct gl_texture_object *FallbackTex; - - /** - * \name Thread safety and statechange notification for texture - * objects. - * - * \todo Improve the granularity of locking. - */ - /*@{*/ - _glthread_Mutex TexMutex; /**< texobj thread safety */ - GLuint TextureStateStamp; /**< state notification for shared tex */ - /*@}*/ - - /** Default buffer object for vertex arrays that aren't in VBOs */ - struct gl_buffer_object *NullBufferObj; - - /** - * \name Vertex/geometry/fragment programs - */ - /*@{*/ - struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ - struct gl_vertex_program *DefaultVertexProgram; - struct gl_fragment_program *DefaultFragmentProgram; - struct gl_geometry_program *DefaultGeometryProgram; - /*@}*/ - - /* GL_ATI_fragment_shader */ - struct _mesa_HashTable *ATIShaders; - struct ati_fragment_shader *DefaultFragmentShader; - - struct _mesa_HashTable *BufferObjects; - - /** Table of both gl_shader and gl_shader_program objects */ - struct _mesa_HashTable *ShaderObjects; - - /* GL_EXT_framebuffer_object */ - struct _mesa_HashTable *RenderBuffers; - struct _mesa_HashTable *FrameBuffers; - - /* GL_ARB_sync */ - struct simple_node SyncObjects; - - void *DriverData; /**< Device driver shared state */ -}; - - - - -/** - * A renderbuffer stores colors or depth values or stencil values. - * A framebuffer object will have a collection of these. - * Data are read/written to the buffer with a handful of Get/Put functions. - * - * Instances of this object are allocated with the Driver's NewRenderbuffer - * hook. Drivers will likely wrap this class inside a driver-specific - * class to simulate inheritance. - */ -struct gl_renderbuffer -{ -#define RB_MAGIC 0xaabbccdd - int Magic; /** XXX TEMPORARY DEBUG INFO */ - _glthread_Mutex Mutex; /**< for thread safety */ - GLuint ClassID; /**< Useful for drivers */ - GLuint Name; - GLint RefCount; - GLuint Width, Height; - GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ - - GLenum InternalFormat; /**< The user-specified format */ - GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or - GL_STENCIL_INDEX. */ - GLuint Format; /**< The actual format: MESA_FORMAT_x */ - - GLubyte NumSamples; - - GLenum DataType; /**< Type of values passed to the Get/Put functions */ - GLvoid *Data; /**< This may not be used by some kinds of RBs */ - - /* Used to wrap one renderbuffer around another: */ - struct gl_renderbuffer *Wrapped; - - /* Delete this renderbuffer */ - void (*Delete)(struct gl_renderbuffer *rb); - - /* Allocate new storage for this renderbuffer */ - GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLenum internalFormat, - GLuint width, GLuint height); - - /* Lock/Unlock are called before/after calling the Get/Put functions. - * Not sure this is the right place for these yet. - void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb); - void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb); - */ - - /* Return a pointer to the element/pixel at (x,y). - * Should return NULL if the buffer memory can't be directly addressed. - */ - void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLint x, GLint y); - - /* Get/Read a row of values. - * The values will be of format _BaseFormat and type DataType. - */ - void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - GLint x, GLint y, void *values); - - /* Get/Read values at arbitrary locations. - * The values will be of format _BaseFormat and type DataType. - */ - void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - const GLint x[], const GLint y[], void *values); - - /* Put/Write a row of values. - * The values will be of format _BaseFormat and type DataType. - */ - void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - GLint x, GLint y, const void *values, const GLubyte *mask); - - /* Put/Write a row of RGB values. This is a special-case routine that's - * only used for RGBA renderbuffers when the source data is GL_RGB. That's - * a common case for glDrawPixels and some triangle routines. - * The values will be of format GL_RGB and type DataType. - */ - void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - GLint x, GLint y, const void *values, const GLubyte *mask); - - - /* Put/Write a row of identical values. - * The values will be of format _BaseFormat and type DataType. - */ - void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - GLint x, GLint y, const void *value, const GLubyte *mask); - - /* Put/Write values at arbitrary locations. - * The values will be of format _BaseFormat and type DataType. - */ - void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, - const GLint x[], const GLint y[], const void *values, - const GLubyte *mask); - /* Put/Write identical values at arbitrary locations. - * The values will be of format _BaseFormat and type DataType. - */ - void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLuint count, const GLint x[], const GLint y[], - const void *value, const GLubyte *mask); -}; - - -/** - * A renderbuffer attachment points to either a texture object (and specifies - * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer. - */ -struct gl_renderbuffer_attachment -{ - GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */ - GLboolean Complete; - - /** - * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the - * application supplied renderbuffer object. - */ - struct gl_renderbuffer *Renderbuffer; - - /** - * If \c Type is \c GL_TEXTURE, this stores a pointer to the application - * supplied texture object. - */ - struct gl_texture_object *Texture; - GLuint TextureLevel; /**< Attached mipmap level. */ - GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */ - GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D - * and 2D array textures */ -}; - - -/** - * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc). - * In C++ terms, think of this as a base class from which device drivers - * will make derived classes. - */ -struct gl_framebuffer -{ - _glthread_Mutex Mutex; /**< for thread safety */ - /** - * If zero, this is a window system framebuffer. If non-zero, this - * is a FBO framebuffer; note that for some devices (i.e. those with - * a natural pixel coordinate system for FBOs that differs from the - * OpenGL/Mesa coordinate system), this means that the viewport, - * polygon face orientation, and polygon stipple will have to be inverted. - */ - GLuint Name; - - GLint RefCount; - GLboolean DeletePending; - - /** - * The framebuffer's visual. Immutable if this is a window system buffer. - * Computed from attachments if user-made FBO. - */ - struct gl_config Visual; - - GLboolean Initialized; - - GLuint Width, Height; /**< size of frame buffer in pixels */ - - /** \name Drawing bounds (Intersection of buffer size and scissor box) */ - /*@{*/ - GLint _Xmin, _Xmax; /**< inclusive */ - GLint _Ymin, _Ymax; /**< exclusive */ - /*@}*/ - - /** \name Derived Z buffer stuff */ - /*@{*/ - GLuint _DepthMax; /**< Max depth buffer value */ - GLfloat _DepthMaxF; /**< Float max depth buffer value */ - GLfloat _MRD; /**< minimum resolvable difference in Z values */ - /*@}*/ - - /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ - GLenum _Status; - - /** Integer color values */ - GLboolean _IntegerColor; - - /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */ - struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT]; - - /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER - * attribute group and GL_PIXEL attribute group, respectively. - */ - GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS]; - GLenum ColorReadBuffer; - - /** Computed from ColorDraw/ReadBuffer above */ - GLuint _NumColorDrawBuffers; - GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */ - GLint _ColorReadBufferIndex; /* -1 = None */ - struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS]; - struct gl_renderbuffer *_ColorReadBuffer; - - /** The Actual depth/stencil buffers to use. May be wrappers around the - * depth/stencil buffers attached above. */ - struct gl_renderbuffer *_DepthBuffer; - struct gl_renderbuffer *_StencilBuffer; - - /** Delete this framebuffer */ - void (*Delete)(struct gl_framebuffer *fb); -}; - - -/** - * Precision info for shader datatypes. See glGetShaderPrecisionFormat(). - */ -struct gl_precision -{ - GLushort RangeMin; /**< min value exponent */ - GLushort RangeMax; /**< max value exponent */ - GLushort Precision; /**< number of mantissa bits */ -}; - - -/** - * Limits for vertex and fragment programs/shaders. - */ -struct gl_program_constants -{ - /* logical limits */ - GLuint MaxInstructions; - GLuint MaxAluInstructions; - GLuint MaxTexInstructions; - GLuint MaxTexIndirections; - GLuint MaxAttribs; - GLuint MaxTemps; - GLuint MaxAddressRegs; - GLuint MaxParameters; - GLuint MaxLocalParams; - GLuint MaxEnvParams; - /* native/hardware limits */ - GLuint MaxNativeInstructions; - GLuint MaxNativeAluInstructions; - GLuint MaxNativeTexInstructions; - GLuint MaxNativeTexIndirections; - GLuint MaxNativeAttribs; - GLuint MaxNativeTemps; - GLuint MaxNativeAddressRegs; - GLuint MaxNativeParameters; - /* For shaders */ - GLuint MaxUniformComponents; - /* GL_ARB_geometry_shader4 */ - GLuint MaxGeometryTextureImageUnits; - GLuint MaxGeometryVaryingComponents; - GLuint MaxVertexVaryingComponents; - GLuint MaxGeometryUniformComponents; - GLuint MaxGeometryOutputVertices; - GLuint MaxGeometryTotalOutputComponents; - /* ES 2.0 and GL_ARB_ES2_compatibility */ - struct gl_precision LowFloat, MediumFloat, HighFloat; - struct gl_precision LowInt, MediumInt, HighInt; -}; - - -/** - * Constants which may be overridden by device driver during context creation - * but are never changed after that. - */ -struct gl_constants -{ - GLint MaxTextureMbytes; /**< Max memory per image, in MB */ - GLint MaxTextureLevels; /**< Max mipmap levels. */ - GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */ - GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */ - GLint MaxArrayTextureLayers; /**< Max layers in array textures */ - GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */ - GLuint MaxTextureCoordUnits; - GLuint MaxTextureImageUnits; - GLuint MaxVertexTextureImageUnits; - GLuint MaxCombinedTextureImageUnits; - GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */ - GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ - GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */ - - GLuint MaxArrayLockSize; - - GLint SubPixelBits; - - GLfloat MinPointSize, MaxPointSize; /**< aliased */ - GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */ - GLfloat PointSizeGranularity; - GLfloat MinLineWidth, MaxLineWidth; /**< aliased */ - GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */ - GLfloat LineWidthGranularity; - - GLuint MaxColorTableSize; - - GLuint MaxClipPlanes; - GLuint MaxLights; - GLfloat MaxShininess; /**< GL_NV_light_max_exponent */ - GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */ - - GLuint MaxViewportWidth, MaxViewportHeight; - - struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */ - struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */ - struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */ - GLuint MaxProgramMatrices; - GLuint MaxProgramMatrixStackDepth; - - /** vertex array / buffer object bounds checking */ - GLboolean CheckArrayBounds; - - GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */ - - GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */ - GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */ - GLuint MaxSamples; /**< GL_ARB_framebuffer_object */ - - GLuint MaxVarying; /**< Number of float[4] varying parameters */ - - GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */ - - /** Which texture units support GL_ATI_envmap_bumpmap as targets */ - GLbitfield SupportedBumpUnits; - - /** - * Maximum amount of time, measured in nanseconds, that the server can wait. - */ - GLuint64 MaxServerWaitTimeout; - - /** GL_EXT_provoking_vertex */ - GLboolean QuadsFollowProvokingVertexConvention; - - /** OpenGL version 3.0 */ - GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */ - - /** OpenGL version 3.2 */ - GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */ - - /** GL_EXT_transform_feedback */ - GLuint MaxTransformFeedbackSeparateAttribs; - GLuint MaxTransformFeedbackSeparateComponents; - GLuint MaxTransformFeedbackInterleavedComponents; - - /** GL_EXT_gpu_shader4 */ - GLint MinProgramTexelOffset, MaxProgramTexelOffset; - - /* GL_EXT_framebuffer_sRGB */ - GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */ -}; - - -/** - * Enable flag for each OpenGL extension. Different device drivers will - * enable different extensions at runtime. - */ -struct gl_extensions -{ - GLboolean dummy; /* don't remove this! */ - GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */ - GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */ - GLboolean ARB_ES2_compatibility; - GLboolean ARB_blend_func_extended; - GLboolean ARB_copy_buffer; - GLboolean ARB_depth_buffer_float; - GLboolean ARB_depth_clamp; - GLboolean ARB_depth_texture; - GLboolean ARB_draw_buffers; - GLboolean ARB_draw_buffers_blend; - GLboolean ARB_draw_elements_base_vertex; - GLboolean ARB_draw_instanced; - GLboolean ARB_fragment_coord_conventions; - GLboolean ARB_fragment_program; - GLboolean ARB_fragment_program_shadow; - GLboolean ARB_fragment_shader; - GLboolean ARB_framebuffer_object; - GLboolean ARB_explicit_attrib_location; - GLboolean ARB_geometry_shader4; - GLboolean ARB_half_float_pixel; - GLboolean ARB_half_float_vertex; - GLboolean ARB_instanced_arrays; - GLboolean ARB_map_buffer_range; - GLboolean ARB_multisample; - GLboolean ARB_multitexture; - GLboolean ARB_occlusion_query; - GLboolean ARB_occlusion_query2; - GLboolean ARB_point_sprite; - GLboolean ARB_sampler_objects; - GLboolean ARB_seamless_cube_map; - GLboolean ARB_shader_objects; - GLboolean ARB_shader_stencil_export; - GLboolean ARB_shading_language_100; - GLboolean ARB_shadow; - GLboolean ARB_shadow_ambient; - GLboolean ARB_sync; - GLboolean ARB_texture_border_clamp; - GLboolean ARB_texture_buffer_object; - GLboolean ARB_texture_compression; - GLboolean ARB_texture_compression_rgtc; - GLboolean ARB_texture_cube_map; - GLboolean ARB_texture_env_combine; - GLboolean ARB_texture_env_crossbar; - GLboolean ARB_texture_env_dot3; - GLboolean ARB_texture_float; - GLboolean ARB_texture_mirrored_repeat; - GLboolean ARB_texture_multisample; - GLboolean ARB_texture_non_power_of_two; - GLboolean ARB_texture_rg; - GLboolean ARB_texture_rgb10_a2ui; - GLboolean ARB_timer_query; - GLboolean ARB_transform_feedback2; - GLboolean ARB_transpose_matrix; - GLboolean ARB_uniform_buffer_object; - GLboolean ARB_vertex_array_object; - GLboolean ARB_vertex_buffer_object; - GLboolean ARB_vertex_program; - GLboolean ARB_vertex_shader; - GLboolean ARB_vertex_type_2_10_10_10_rev; - GLboolean ARB_window_pos; - GLboolean EXT_abgr; - GLboolean EXT_bgra; - GLboolean EXT_blend_color; - GLboolean EXT_blend_equation_separate; - GLboolean EXT_blend_func_separate; - GLboolean EXT_blend_logic_op; - GLboolean EXT_blend_minmax; - GLboolean EXT_blend_subtract; - GLboolean EXT_clip_volume_hint; - GLboolean EXT_compiled_vertex_array; - GLboolean EXT_copy_texture; - GLboolean EXT_depth_bounds_test; - GLboolean EXT_draw_buffers2; - GLboolean EXT_draw_range_elements; - GLboolean EXT_fog_coord; - GLboolean EXT_framebuffer_blit; - GLboolean EXT_framebuffer_multisample; - GLboolean EXT_framebuffer_object; - GLboolean EXT_framebuffer_sRGB; - GLboolean EXT_gpu_program_parameters; - GLboolean EXT_gpu_shader4; - GLboolean EXT_multi_draw_arrays; - GLboolean EXT_paletted_texture; - GLboolean EXT_packed_depth_stencil; - GLboolean EXT_packed_float; - GLboolean EXT_packed_pixels; - GLboolean EXT_pixel_buffer_object; - GLboolean EXT_point_parameters; - GLboolean EXT_polygon_offset; - GLboolean EXT_provoking_vertex; - GLboolean EXT_rescale_normal; - GLboolean EXT_shadow_funcs; - GLboolean EXT_secondary_color; - GLboolean EXT_separate_shader_objects; - GLboolean EXT_separate_specular_color; - GLboolean EXT_shared_texture_palette; - GLboolean EXT_stencil_wrap; - GLboolean EXT_stencil_two_side; - GLboolean EXT_subtexture; - GLboolean EXT_texture; - GLboolean EXT_texture_object; - GLboolean EXT_texture3D; - GLboolean EXT_texture_array; - GLboolean EXT_texture_compression_s3tc; - GLboolean EXT_texture_env_add; - GLboolean EXT_texture_env_combine; - GLboolean EXT_texture_env_dot3; - GLboolean EXT_texture_filter_anisotropic; - GLboolean EXT_texture_integer; - GLboolean EXT_texture_lod_bias; - GLboolean EXT_texture_mirror_clamp; - GLboolean EXT_texture_shared_exponent; - GLboolean EXT_texture_sRGB; - GLboolean EXT_texture_sRGB_decode; - GLboolean EXT_texture_swizzle; - GLboolean EXT_transform_feedback; - GLboolean EXT_timer_query; - GLboolean EXT_vertex_array; - GLboolean EXT_vertex_array_bgra; - GLboolean EXT_vertex_array_set; - GLboolean OES_standard_derivatives; - /* vendor extensions */ - GLboolean AMD_conservative_depth; - GLboolean APPLE_client_storage; - GLboolean APPLE_packed_pixels; - GLboolean APPLE_vertex_array_object; - GLboolean APPLE_object_purgeable; - GLboolean ATI_envmap_bumpmap; - GLboolean ATI_texture_mirror_once; - GLboolean ATI_texture_env_combine3; - GLboolean ATI_fragment_shader; - GLboolean ATI_separate_stencil; - GLboolean IBM_rasterpos_clip; - GLboolean IBM_multimode_draw_arrays; - GLboolean MESA_pack_invert; - GLboolean MESA_resize_buffers; - GLboolean MESA_ycbcr_texture; - GLboolean MESA_texture_array; - GLboolean MESA_texture_signed_rgba; - GLboolean NV_blend_square; - GLboolean NV_conditional_render; - GLboolean NV_fragment_program; - GLboolean NV_fragment_program_option; - GLboolean NV_light_max_exponent; - GLboolean NV_point_sprite; - GLboolean NV_primitive_restart; - GLboolean NV_texgen_reflection; - GLboolean NV_texture_env_combine4; - GLboolean NV_texture_rectangle; - GLboolean NV_vertex_program; - GLboolean NV_vertex_program1_1; - GLboolean OES_read_format; - GLboolean SGI_texture_color_table; - GLboolean SGIS_generate_mipmap; - GLboolean SGIS_texture_edge_clamp; - GLboolean SGIS_texture_lod; - GLboolean TDFX_texture_compression_FXT1; - GLboolean S3_s3tc; - GLboolean OES_EGL_image; - GLboolean OES_draw_texture; - GLboolean EXT_texture_format_BGRA8888; - GLboolean extension_sentinel; - /** The extension string */ - const GLubyte *String; - /** Number of supported extensions */ - GLuint Count; -}; - - -/** - * A stack of matrices (projection, modelview, color, texture, etc). - */ -struct gl_matrix_stack -{ - GLmatrix *Top; /**< points into Stack */ - GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */ - GLuint Depth; /**< 0 <= Depth < MaxDepth */ - GLuint MaxDepth; /**< size of Stack[] array */ - GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */ -}; - - -/** - * \name Bits for image transfer operations - * \sa __struct gl_contextRec::ImageTransferState. - */ -/*@{*/ -#define IMAGE_SCALE_BIAS_BIT 0x1 -#define IMAGE_SHIFT_OFFSET_BIT 0x2 -#define IMAGE_MAP_COLOR_BIT 0x4 -#define IMAGE_CLAMP_BIT 0x800 - - -/** Pixel Transfer ops */ -#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \ - IMAGE_SHIFT_OFFSET_BIT | \ - IMAGE_MAP_COLOR_BIT) - -/** - * \name Bits to indicate what state has changed. - */ -/*@{*/ -#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */ -#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */ -#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */ -#define _NEW_COLOR (1 << 3) /**< gl_context::Color */ -#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */ -#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */ -#define _NEW_FOG (1 << 6) /**< gl_context::Fog */ -#define _NEW_HINT (1 << 7) /**< gl_context::Hint */ -#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */ -#define _NEW_LINE (1 << 9) /**< gl_context::Line */ -#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */ -#define _NEW_POINT (1 << 11) /**< gl_context::Point */ -#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */ -#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */ -#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */ -#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */ -#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */ -#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */ -#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */ -#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */ -#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */ -#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */ -#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */ -#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */ -#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */ -#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */ -#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */ -#define _NEW_PROGRAM_CONSTANTS (1 << 27) -#define _NEW_BUFFER_OBJECT (1 << 28) -#define _NEW_ALL ~0 -/*@}*/ - - -/** - * \name Bits to track array state changes - * - * Also used to summarize array enabled. - */ -/*@{*/ -#define _NEW_ARRAY_VERTEX VERT_BIT_POS -#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT -#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL -#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0 -#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1 -#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG -#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX -#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG -#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */ -#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0 -#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1 -#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2 -#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3 -#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4 -#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5 -#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6 -#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7 -#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */ -#define _NEW_ARRAY_ALL 0xffffffff - - -#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i)) -#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i)) -/*@}*/ - - - -/** - * \name A bunch of flags that we think might be useful to drivers. - * - * Set in the __struct gl_contextRec::_TriangleCaps bitfield. - */ -/*@{*/ -#define DD_FLATSHADE 0x1 -#define DD_SEPARATE_SPECULAR 0x2 -#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */ -#define DD_TRI_LIGHT_TWOSIDE 0x8 -#define DD_TRI_UNFILLED 0x10 -#define DD_TRI_SMOOTH 0x20 -#define DD_TRI_STIPPLE 0x40 -#define DD_TRI_OFFSET 0x80 -#define DD_LINE_SMOOTH 0x100 -#define DD_LINE_STIPPLE 0x200 -#define DD_POINT_SMOOTH 0x400 -#define DD_POINT_ATTEN 0x800 -#define DD_TRI_TWOSTENCIL 0x1000 -/*@}*/ - - -/** - * \name Define the state changes under which each of these bits might change - */ -/*@{*/ -#define _DD_NEW_FLATSHADE _NEW_LIGHT -#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM) -#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON -#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT -#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON -#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON -#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON -#define _DD_NEW_TRI_OFFSET _NEW_POLYGON -#define _DD_NEW_LINE_SMOOTH _NEW_LINE -#define _DD_NEW_LINE_STIPPLE _NEW_LINE -#define _DD_NEW_LINE_WIDTH _NEW_LINE -#define _DD_NEW_POINT_SMOOTH _NEW_POINT -#define _DD_NEW_POINT_SIZE _NEW_POINT -#define _DD_NEW_POINT_ATTEN _NEW_POINT -/*@}*/ - - -/** - * Composite state flags - */ -/*@{*/ -#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \ - _NEW_TEXTURE | \ - _NEW_POINT | \ - _NEW_PROGRAM | \ - _NEW_MODELVIEW) - -#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \ - _NEW_TEXTURE) - -#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL) -/*@}*/ - - - - -/* This has to be included here. */ -#include "dd.h" - - -/** - * Display list flags. - * Strictly this is a tnl-private concept, but it doesn't seem - * worthwhile adding a tnl private structure just to hold this one bit - * of information: - */ -#define DLIST_DANGLING_REFS 0x1 - - -/** Opaque declaration of display list payload data type */ -union gl_dlist_node; - - -/** - * Provide a location where information about a display list can be - * collected. Could be extended with driverPrivate structures, - * etc. in the future. - */ -struct gl_display_list -{ - GLuint Name; - GLbitfield Flags; /**< DLIST_x flags */ - /** The dlist commands are in a linked list of nodes */ - union gl_dlist_node *Head; -}; - - -/** - * State used during display list compilation and execution. - */ -struct gl_dlist_state -{ - GLuint CallDepth; /**< Current recursion calling depth */ - - struct gl_display_list *CurrentList; /**< List currently being compiled */ - union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */ - GLuint CurrentPos; /**< Index into current block of nodes */ - - GLvertexformat ListVtxfmt; - - GLubyte ActiveAttribSize[VERT_ATTRIB_MAX]; - GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4]; - - GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX]; - GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4]; - - GLubyte ActiveIndex; - GLfloat CurrentIndex; - - GLubyte ActiveEdgeFlag; - GLboolean CurrentEdgeFlag; - - struct { - /* State known to have been set by the currently-compiling display - * list. Used to eliminate some redundant state changes. - */ - GLenum ShadeModel; - } Current; -}; - - -/** - * Enum for the OpenGL APIs we know about and may support. - */ -typedef enum -{ - API_OPENGL, - API_OPENGLES, - API_OPENGLES2 -} gl_api; - - -/** - * Mesa rendering context. - * - * This is the central context data structure for Mesa. Almost all - * OpenGL state is contained in this structure. - * Think of this as a base class from which device drivers will derive - * sub classes. - * - * The struct gl_context typedef names this structure. - */ -struct gl_context -{ - /** State possibly shared with other contexts in the address space */ - struct gl_shared_state *Shared; - - /** \name API function pointer tables */ - /*@{*/ - gl_api API; - struct _glapi_table *Save; /**< Display list save functions */ - struct _glapi_table *Exec; /**< Execute functions */ - struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */ - /*@}*/ - - struct gl_config Visual; - struct gl_framebuffer *DrawBuffer; /**< buffer for writing */ - struct gl_framebuffer *ReadBuffer; /**< buffer for reading */ - struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */ - struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */ - - /** - * Device driver function pointer table - */ - struct dd_function_table Driver; - - void *DriverCtx; /**< Points to device driver context/state */ - - /** Core/Driver constants */ - struct gl_constants Const; - - /** \name The various 4x4 matrix stacks */ - /*@{*/ - struct gl_matrix_stack ModelviewMatrixStack; - struct gl_matrix_stack ProjectionMatrixStack; - struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS]; - struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES]; - struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */ - /*@}*/ - - /** Combined modelview and projection matrix */ - GLmatrix _ModelProjectMatrix; - - /** \name Display lists */ - struct gl_dlist_state ListState; - - GLboolean ExecuteFlag; /**< Execute GL commands? */ - GLboolean CompileFlag; /**< Compile GL commands into display list? */ - - /** Extension information */ - struct gl_extensions Extensions; - - /** Version info */ - GLuint VersionMajor, VersionMinor; - char *VersionString; - - /** \name State attribute stack (for glPush/PopAttrib) */ - /*@{*/ - GLuint AttribStackDepth; - struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH]; - /*@}*/ - - /** \name Renderer attribute groups - * - * We define a struct for each attribute group to make pushing and popping - * attributes easy. Also it's a good organization. - */ - /*@{*/ - struct gl_accum_attrib Accum; /**< Accum buffer attributes */ - struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */ - struct gl_current_attrib Current; /**< Current attributes */ - struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */ - struct gl_eval_attrib Eval; /**< Eval attributes */ - struct gl_fog_attrib Fog; /**< Fog attributes */ - struct gl_hint_attrib Hint; /**< Hint attributes */ - struct gl_light_attrib Light; /**< Light attributes */ - struct gl_line_attrib Line; /**< Line attributes */ - struct gl_list_attrib List; /**< List attributes */ - struct gl_multisample_attrib Multisample; - struct gl_pixel_attrib Pixel; /**< Pixel attributes */ - struct gl_point_attrib Point; /**< Point attributes */ - struct gl_polygon_attrib Polygon; /**< Polygon attributes */ - GLuint PolygonStipple[32]; /**< Polygon stipple */ - struct gl_scissor_attrib Scissor; /**< Scissor attributes */ - struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */ - struct gl_texture_attrib Texture; /**< Texture attributes */ - struct gl_transform_attrib Transform; /**< Transformation attributes */ - struct gl_viewport_attrib Viewport; /**< Viewport attributes */ - /*@}*/ - - /** \name Client attribute stack */ - /*@{*/ - GLuint ClientAttribStackDepth; - struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH]; - /*@}*/ - - /** \name Client attribute groups */ - /*@{*/ - struct gl_array_attrib Array; /**< Vertex arrays */ - struct gl_pixelstore_attrib Pack; /**< Pixel packing */ - struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */ - struct gl_pixelstore_attrib DefaultPacking; /**< Default params */ - /*@}*/ - - /** \name Other assorted state (not pushed/popped on attribute stack) */ - /*@{*/ - struct gl_pixelmaps PixelMaps; - - struct gl_evaluators EvalMap; /**< All evaluators */ - struct gl_feedback Feedback; /**< Feedback */ - struct gl_selection Select; /**< Selection */ - - struct gl_program_state Program; /**< general program state */ - struct gl_vertex_program_state VertexProgram; - struct gl_fragment_program_state FragmentProgram; - struct gl_geometry_program_state GeometryProgram; - struct gl_ati_fragment_shader_state ATIFragmentShader; - - struct gl_shader_state Shader; /**< GLSL shader object state */ - struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES]; - - struct gl_query_state Query; /**< occlusion, timer queries */ - - struct gl_transform_feedback TransformFeedback; - - struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */ - struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */ - /*@}*/ - - struct gl_meta_state *Meta; /**< for "meta" operations */ - - /* GL_EXT_framebuffer_object */ - struct gl_renderbuffer *CurrentRenderbuffer; - - GLenum ErrorValue; /**< Last error code */ - - /** - * Recognize and silence repeated error debug messages in buggy apps. - */ - const char *ErrorDebugFmtString; - GLuint ErrorDebugCount; - - GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ - GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ - - GLboolean ViewportInitialized; /**< has viewport size been initialized? */ - - GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ - - /** \name Derived state */ - /*@{*/ - /** Bitwise-or of DD_* flags. Note that this bitfield may be used before - * state validation so they need to always be current. - */ - GLbitfield _TriangleCaps; - GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */ - GLfloat _EyeZDir[3]; - GLfloat _ModelViewInvScale; - GLboolean _NeedEyeCoords; - GLboolean _ForceEyeCoords; - - GLuint TextureStateTimestamp; /**< detect changes to shared state */ - - struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */ - struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */ - /**@}*/ - - struct gl_list_extensions *ListExt; /**< driver dlist extensions */ - - /** \name For debugging/development only */ - /*@{*/ - GLboolean FirstTimeCurrent; - /*@}*/ - - /** Dither disable via MESA_NO_DITHER env var */ - GLboolean NoDither; - - /** software compression/decompression supported or not */ - GLboolean Mesa_DXTn; - - GLboolean TextureFormatSupported[MESA_FORMAT_COUNT]; - - /** - * Use dp4 (rather than mul/mad) instructions for position - * transformation? - */ - GLboolean mvp_with_dp4; - - /** - * \name Hooks for module contexts. - * - * These will eventually live in the driver or elsewhere. - */ - /*@{*/ - void *swrast_context; - void *swsetup_context; - void *swtnl_context; - void *swtnl_im; - struct st_context *st; - void *aelt_context; - /*@}*/ -}; - - -#ifdef DEBUG -extern int MESA_VERBOSE; -extern int MESA_DEBUG_FLAGS; -# define MESA_FUNCTION __FUNCTION__ -#else -# define MESA_VERBOSE 0 -# define MESA_DEBUG_FLAGS 0 -# define MESA_FUNCTION "a function" -# ifndef NDEBUG -# define NDEBUG -# endif -#endif - - -/** The MESA_VERBOSE var is a bitmask of these flags */ -enum _verbose -{ - VERBOSE_VARRAY = 0x0001, - VERBOSE_TEXTURE = 0x0002, - VERBOSE_MATERIAL = 0x0004, - VERBOSE_PIPELINE = 0x0008, - VERBOSE_DRIVER = 0x0010, - VERBOSE_STATE = 0x0020, - VERBOSE_API = 0x0040, - VERBOSE_DISPLAY_LIST = 0x0100, - VERBOSE_LIGHTING = 0x0200, - VERBOSE_PRIMS = 0x0400, - VERBOSE_VERTS = 0x0800, - VERBOSE_DISASSEM = 0x1000, - VERBOSE_DRAW = 0x2000, - VERBOSE_SWAPBUFFERS = 0x4000 -}; - - -/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */ -enum _debug -{ - DEBUG_ALWAYS_FLUSH = 0x1 -}; - - - -#endif /* MTYPES_H */ +/* + * Mesa 3-D graphics library + * Version: 7.7 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file mtypes.h + * Main Mesa data structures. + * + * Please try to mark derived values with a leading underscore ('_'). + */ + +#ifndef MTYPES_H +#define MTYPES_H + + +#include "main/glheader.h" +#include "main/config.h" +#include "main/mfeatures.h" +#include "glapi/glapi.h" +#include "math/m_matrix.h" /* GLmatrix */ +#include "main/simple_list.h" /* struct simple_node */ +#include "main/formats.h" /* MESA_FORMAT_COUNT */ + + +/** + * Color channel data type. + */ +#if CHAN_BITS == 8 + typedef GLubyte GLchan; +#define CHAN_MAX 255 +#define CHAN_MAXF 255.0F +#define CHAN_TYPE GL_UNSIGNED_BYTE +#elif CHAN_BITS == 16 + typedef GLushort GLchan; +#define CHAN_MAX 65535 +#define CHAN_MAXF 65535.0F +#define CHAN_TYPE GL_UNSIGNED_SHORT +#elif CHAN_BITS == 32 + typedef GLfloat GLchan; +#define CHAN_MAX 1.0 +#define CHAN_MAXF 1.0F +#define CHAN_TYPE GL_FLOAT +#else +#error "illegal number of color channel bits" +#endif + + +/** + * Stencil buffer data type. + */ +#if STENCIL_BITS==8 + typedef GLubyte GLstencil; +#elif STENCIL_BITS==16 + typedef GLushort GLstencil; +#else +# error "illegal number of stencil bits" +#endif + + +/** + * \name 64-bit extension of GLbitfield. + */ +/*@{*/ +typedef GLuint64 GLbitfield64; + +/** Set a single bit */ +#define BITFIELD64_BIT(b) (1ULL << (b)) + + +/** + * \name Some forward type declarations + */ +/*@{*/ +struct _mesa_HashTable; +struct gl_attrib_node; +struct gl_list_extensions; +struct gl_meta_state; +struct gl_pixelstore_attrib; +struct gl_program_cache; +struct gl_texture_format; +struct gl_texture_image; +struct gl_texture_object; +struct gl_context; +struct st_context; +/*@}*/ + + +/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */ +#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1) +#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2) +#define PRIM_UNKNOWN (GL_POLYGON+3) + + +/** + * Shader stages. Note that these will become 5 with tessellation. + * These MUST have the same values as gallium's PIPE_SHADER_* + */ +typedef enum +{ + MESA_SHADER_VERTEX = 0, + MESA_SHADER_FRAGMENT = 1, + MESA_SHADER_GEOMETRY = 2, + MESA_SHADER_TYPES = 3 +} gl_shader_type; + + + +/** + * Indexes for vertex program attributes. + * GL_NV_vertex_program aliases generic attributes over the conventional + * attributes. In GL_ARB_vertex_program shader the aliasing is optional. + * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the + * generic attributes are distinct/separate). + */ +typedef enum +{ + VERT_ATTRIB_POS = 0, + VERT_ATTRIB_WEIGHT = 1, + VERT_ATTRIB_NORMAL = 2, + VERT_ATTRIB_COLOR0 = 3, + VERT_ATTRIB_COLOR1 = 4, + VERT_ATTRIB_FOG = 5, + VERT_ATTRIB_COLOR_INDEX = 6, + VERT_ATTRIB_POINT_SIZE = 6, /*alias*/ + VERT_ATTRIB_EDGEFLAG = 7, + VERT_ATTRIB_TEX0 = 8, + VERT_ATTRIB_TEX1 = 9, + VERT_ATTRIB_TEX2 = 10, + VERT_ATTRIB_TEX3 = 11, + VERT_ATTRIB_TEX4 = 12, + VERT_ATTRIB_TEX5 = 13, + VERT_ATTRIB_TEX6 = 14, + VERT_ATTRIB_TEX7 = 15, + VERT_ATTRIB_GENERIC0 = 16, + VERT_ATTRIB_GENERIC1 = 17, + VERT_ATTRIB_GENERIC2 = 18, + VERT_ATTRIB_GENERIC3 = 19, + VERT_ATTRIB_GENERIC4 = 20, + VERT_ATTRIB_GENERIC5 = 21, + VERT_ATTRIB_GENERIC6 = 22, + VERT_ATTRIB_GENERIC7 = 23, + VERT_ATTRIB_GENERIC8 = 24, + VERT_ATTRIB_GENERIC9 = 25, + VERT_ATTRIB_GENERIC10 = 26, + VERT_ATTRIB_GENERIC11 = 27, + VERT_ATTRIB_GENERIC12 = 28, + VERT_ATTRIB_GENERIC13 = 29, + VERT_ATTRIB_GENERIC14 = 30, + VERT_ATTRIB_GENERIC15 = 31, + VERT_ATTRIB_MAX = 32 +} gl_vert_attrib; + +/** + * Bitflags for vertex attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define VERT_BIT_POS (1 << VERT_ATTRIB_POS) +#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT) +#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL) +#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0) +#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1) +#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG) +#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX) +#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG) +#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0) +#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1) +#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2) +#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3) +#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4) +#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5) +#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6) +#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7) +#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0) +#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1) +#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2) +#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3) +#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4) +#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5) +#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6) +#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7) +#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8) +#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9) +#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10) +#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11) +#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12) +#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13) +#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14) +#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15) + +#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u))) +#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g))) +/*@}*/ + + +/** + * Indexes for vertex program result attributes + */ +typedef enum +{ + VERT_RESULT_HPOS = 0, + VERT_RESULT_COL0 = 1, + VERT_RESULT_COL1 = 2, + VERT_RESULT_FOGC = 3, + VERT_RESULT_TEX0 = 4, + VERT_RESULT_TEX1 = 5, + VERT_RESULT_TEX2 = 6, + VERT_RESULT_TEX3 = 7, + VERT_RESULT_TEX4 = 8, + VERT_RESULT_TEX5 = 9, + VERT_RESULT_TEX6 = 10, + VERT_RESULT_TEX7 = 11, + VERT_RESULT_PSIZ = 12, + VERT_RESULT_BFC0 = 13, + VERT_RESULT_BFC1 = 14, + VERT_RESULT_EDGE = 15, + VERT_RESULT_VAR0 = 16, /**< shader varying */ + VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING) +} gl_vert_result; + + +/*********************************************/ + +/** + * Indexes for geometry program attributes. + */ +typedef enum +{ + GEOM_ATTRIB_POSITION = 0, + GEOM_ATTRIB_COLOR0 = 1, + GEOM_ATTRIB_COLOR1 = 2, + GEOM_ATTRIB_SECONDARY_COLOR0 = 3, + GEOM_ATTRIB_SECONDARY_COLOR1 = 4, + GEOM_ATTRIB_FOG_FRAG_COORD = 5, + GEOM_ATTRIB_POINT_SIZE = 6, + GEOM_ATTRIB_CLIP_VERTEX = 7, + GEOM_ATTRIB_PRIMITIVE_ID = 8, + GEOM_ATTRIB_TEX_COORD = 9, + + GEOM_ATTRIB_VAR0 = 16, + GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING) +} gl_geom_attrib; + +/** + * Bitflags for geometry attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0) +#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1) +#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0) +#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1) +#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD) +#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD) +#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION) +#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE) +#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX) +#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID) +#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0) + +#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g))) +/*@}*/ + + +/** + * Indexes for geometry program result attributes + */ +typedef enum +{ + GEOM_RESULT_POS = 0, + GEOM_RESULT_COL0 = 1, + GEOM_RESULT_COL1 = 2, + GEOM_RESULT_SCOL0 = 3, + GEOM_RESULT_SCOL1 = 4, + GEOM_RESULT_FOGC = 5, + GEOM_RESULT_TEX0 = 6, + GEOM_RESULT_TEX1 = 7, + GEOM_RESULT_TEX2 = 8, + GEOM_RESULT_TEX3 = 9, + GEOM_RESULT_TEX4 = 10, + GEOM_RESULT_TEX5 = 11, + GEOM_RESULT_TEX6 = 12, + GEOM_RESULT_TEX7 = 13, + GEOM_RESULT_PSIZ = 14, + GEOM_RESULT_CLPV = 15, + GEOM_RESULT_PRID = 16, + GEOM_RESULT_LAYR = 17, + GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */ + /* ### we need to -2 because var0 is 18 instead 16 like in the others */ + GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2) +} gl_geom_result; + + +/** + * Indexes for fragment program input attributes. + */ +typedef enum +{ + FRAG_ATTRIB_WPOS = 0, + FRAG_ATTRIB_COL0 = 1, + FRAG_ATTRIB_COL1 = 2, + FRAG_ATTRIB_FOGC = 3, + FRAG_ATTRIB_TEX0 = 4, + FRAG_ATTRIB_TEX1 = 5, + FRAG_ATTRIB_TEX2 = 6, + FRAG_ATTRIB_TEX3 = 7, + FRAG_ATTRIB_TEX4 = 8, + FRAG_ATTRIB_TEX5 = 9, + FRAG_ATTRIB_TEX6 = 10, + FRAG_ATTRIB_TEX7 = 11, + FRAG_ATTRIB_FACE = 12, /**< front/back face */ + FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */ + FRAG_ATTRIB_VAR0 = 14, /**< shader varying */ + FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING) +} gl_frag_attrib; + +/** + * Bitflags for fragment program input attributes. + */ +/*@{*/ +#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS) +#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0) +#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1) +#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC) +#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE) +#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC) +#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0) +#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1) +#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2) +#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3) +#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4) +#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5) +#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6) +#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7) +#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0) + +#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U)) +#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V)) + +#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \ + FRAG_BIT_TEX1| \ + FRAG_BIT_TEX2| \ + FRAG_BIT_TEX3| \ + FRAG_BIT_TEX4| \ + FRAG_BIT_TEX5| \ + FRAG_BIT_TEX6| \ + FRAG_BIT_TEX7) +/*@}*/ + + +/** + * Fragment program results + */ +typedef enum +{ + FRAG_RESULT_DEPTH = 0, + FRAG_RESULT_STENCIL = 1, + FRAG_RESULT_COLOR = 2, + FRAG_RESULT_DATA0 = 3, + FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +} gl_frag_result; + + +/** + * Indexes for all renderbuffers + */ +typedef enum +{ + /* the four standard color buffers */ + BUFFER_FRONT_LEFT, + BUFFER_BACK_LEFT, + BUFFER_FRONT_RIGHT, + BUFFER_BACK_RIGHT, + BUFFER_DEPTH, + BUFFER_STENCIL, + BUFFER_ACCUM, + /* optional aux buffer */ + BUFFER_AUX0, + /* generic renderbuffers */ + BUFFER_COLOR0, + BUFFER_COLOR1, + BUFFER_COLOR2, + BUFFER_COLOR3, + BUFFER_COLOR4, + BUFFER_COLOR5, + BUFFER_COLOR6, + BUFFER_COLOR7, + BUFFER_COUNT +} gl_buffer_index; + +/** + * Bit flags for all renderbuffers + */ +#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT) +#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT) +#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT) +#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT) +#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0) +#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1) +#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2) +#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3) +#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH) +#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL) +#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM) +#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0) +#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1) +#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2) +#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3) +#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4) +#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5) +#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6) +#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7) + +/** + * Mask of all the color buffer bits (but not accum). + */ +#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \ + BUFFER_BIT_BACK_LEFT | \ + BUFFER_BIT_FRONT_RIGHT | \ + BUFFER_BIT_BACK_RIGHT | \ + BUFFER_BIT_AUX0 | \ + BUFFER_BIT_COLOR0 | \ + BUFFER_BIT_COLOR1 | \ + BUFFER_BIT_COLOR2 | \ + BUFFER_BIT_COLOR3 | \ + BUFFER_BIT_COLOR4 | \ + BUFFER_BIT_COLOR5 | \ + BUFFER_BIT_COLOR6 | \ + BUFFER_BIT_COLOR7) + + +/** + * Framebuffer configuration (aka visual / pixelformat) + * Note: some of these fields should be boolean, but it appears that + * code in drivers/dri/common/util.c requires int-sized fields. + */ +struct gl_config +{ + GLboolean rgbMode; + GLboolean floatMode; + GLboolean colorIndexMode; /* XXX is this used anywhere? */ + GLuint doubleBufferMode; + GLuint stereoMode; + + GLboolean haveAccumBuffer; + GLboolean haveDepthBuffer; + GLboolean haveStencilBuffer; + + GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ + GLuint redMask, greenMask, blueMask, alphaMask; + GLint rgbBits; /* total bits for rgb */ + GLint indexBits; /* total bits for colorindex */ + + GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits; + GLint depthBits; + GLint stencilBits; + + GLint numAuxBuffers; + + GLint level; + + /* EXT_visual_rating / GLX 1.2 */ + GLint visualRating; + + /* EXT_visual_info / GLX 1.2 */ + GLint transparentPixel; + /* colors are floats scaled to ints */ + GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha; + GLint transparentIndex; + + /* ARB_multisample / SGIS_multisample */ + GLint sampleBuffers; + GLint samples; + + /* SGIX_pbuffer / GLX 1.3 */ + GLint maxPbufferWidth; + GLint maxPbufferHeight; + GLint maxPbufferPixels; + GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */ + GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */ + + /* OML_swap_method */ + GLint swapMethod; + + /* EXT_texture_from_pixmap */ + GLint bindToTextureRgb; + GLint bindToTextureRgba; + GLint bindToMipmapTexture; + GLint bindToTextureTargets; + GLint yInverted; + + /* EXT_framebuffer_sRGB */ + GLint sRGBCapable; +}; + + +/** + * Data structure for color tables + */ +struct gl_color_table +{ + GLenum InternalFormat; /**< The user-specified format */ + GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */ + GLuint Size; /**< number of entries in table */ + GLfloat *TableF; /**< Color table, floating point values */ + GLubyte *TableUB; /**< Color table, ubyte values */ + GLubyte RedSize; + GLubyte GreenSize; + GLubyte BlueSize; + GLubyte AlphaSize; + GLubyte LuminanceSize; + GLubyte IntensitySize; +}; + + +/** + * \name Bit flags used for updating material values. + */ +/*@{*/ +#define MAT_ATTRIB_FRONT_AMBIENT 0 +#define MAT_ATTRIB_BACK_AMBIENT 1 +#define MAT_ATTRIB_FRONT_DIFFUSE 2 +#define MAT_ATTRIB_BACK_DIFFUSE 3 +#define MAT_ATTRIB_FRONT_SPECULAR 4 +#define MAT_ATTRIB_BACK_SPECULAR 5 +#define MAT_ATTRIB_FRONT_EMISSION 6 +#define MAT_ATTRIB_BACK_EMISSION 7 +#define MAT_ATTRIB_FRONT_SHININESS 8 +#define MAT_ATTRIB_BACK_SHININESS 9 +#define MAT_ATTRIB_FRONT_INDEXES 10 +#define MAT_ATTRIB_BACK_INDEXES 11 +#define MAT_ATTRIB_MAX 12 + +#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f)) +#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f)) +#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f)) +#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f)) +#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f)) +#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f)) + +#define MAT_INDEX_AMBIENT 0 +#define MAT_INDEX_DIFFUSE 1 +#define MAT_INDEX_SPECULAR 2 + +#define MAT_BIT_FRONT_AMBIENT (1< ) */ + GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */ + GLfloat _VP_inf_spot_attenuation; + + GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */ + GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */ + GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */ + GLfloat _MatSpecular[2][3]; /**< material spec * light specular */ + GLfloat _dli; /**< CI diffuse light intensity */ + GLfloat _sli; /**< CI specular light intensity */ + /*@}*/ +}; + + +/** + * Light model state. + */ +struct gl_lightmodel +{ + GLfloat Ambient[4]; /**< ambient color */ + GLboolean LocalViewer; /**< Local (or infinite) view point? */ + GLboolean TwoSide; /**< Two (or one) sided lighting? */ + GLenum ColorControl; /**< either GL_SINGLE_COLOR + * or GL_SEPARATE_SPECULAR_COLOR */ +}; + + +/** + * Material state. + */ +struct gl_material +{ + GLfloat Attrib[MAT_ATTRIB_MAX][4]; +}; + + +/** + * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT) + */ +struct gl_accum_attrib +{ + GLfloat ClearColor[4]; /**< Accumulation buffer clear color */ +}; + + +/** + * Color buffer attribute group (GL_COLOR_BUFFER_BIT). + */ +struct gl_colorbuffer_attrib +{ + GLuint ClearIndex; /**< Index to use for glClear */ + GLclampf ClearColor[4]; /**< Color to use for glClear */ + + GLuint IndexMask; /**< Color index write mask */ + GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */ + + GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */ + + /** + * \name alpha testing + */ + /*@{*/ + GLboolean AlphaEnabled; /**< Alpha test enabled flag */ + GLenum AlphaFunc; /**< Alpha test function */ + GLclampf AlphaRef; /**< Alpha reference value */ + /*@}*/ + + /** + * \name Blending + */ + /*@{*/ + GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */ + GLfloat BlendColor[4]; /**< Blending color */ + struct + { + GLenum SrcRGB; /**< RGB blend source term */ + GLenum DstRGB; /**< RGB blend dest term */ + GLenum SrcA; /**< Alpha blend source term */ + GLenum DstA; /**< Alpha blend dest term */ + GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */ + GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */ + } Blend[MAX_DRAW_BUFFERS]; + /** Are the blend func terms currently different for each buffer/target? */ + GLboolean _BlendFuncPerBuffer; + /** Are the blend equations currently different for each buffer/target? */ + GLboolean _BlendEquationPerBuffer; + /*@}*/ + + /** + * \name Logic op + */ + /*@{*/ + GLenum LogicOp; /**< Logic operator */ + GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */ + GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */ + GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */ + /*@}*/ + + GLboolean DitherFlag; /**< Dither enable flag */ + + GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ + GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ + + GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */ +}; + + +/** + * Current attribute group (GL_CURRENT_BIT). + */ +struct gl_current_attrib +{ + /** + * \name Current vertex attributes. + * \note Values are valid only after FLUSH_VERTICES has been called. + * \note Index and Edgeflag current values are stored as floats in the + * SIX and SEVEN attribute slots. + */ + GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */ + + /** + * \name Current raster position attributes (always valid). + * \note This set of attributes is very similar to the SWvertex struct. + */ + /*@{*/ + GLfloat RasterPos[4]; + GLfloat RasterDistance; + GLfloat RasterColor[4]; + GLfloat RasterSecondaryColor[4]; + GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4]; + GLboolean RasterPosValid; + /*@}*/ +}; + + +/** + * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT). + */ +struct gl_depthbuffer_attrib +{ + GLenum Func; /**< Function for depth buffer compare */ + GLclampd Clear; /**< Value to clear depth buffer to */ + GLboolean Test; /**< Depth buffering enabled flag */ + GLboolean Mask; /**< Depth buffer writable? */ + GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */ + GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */ +}; + + +/** + * Evaluator attribute group (GL_EVAL_BIT). + */ +struct gl_eval_attrib +{ + /** + * \name Enable bits + */ + /*@{*/ + GLboolean Map1Color4; + GLboolean Map1Index; + GLboolean Map1Normal; + GLboolean Map1TextureCoord1; + GLboolean Map1TextureCoord2; + GLboolean Map1TextureCoord3; + GLboolean Map1TextureCoord4; + GLboolean Map1Vertex3; + GLboolean Map1Vertex4; + GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */ + GLboolean Map2Color4; + GLboolean Map2Index; + GLboolean Map2Normal; + GLboolean Map2TextureCoord1; + GLboolean Map2TextureCoord2; + GLboolean Map2TextureCoord3; + GLboolean Map2TextureCoord4; + GLboolean Map2Vertex3; + GLboolean Map2Vertex4; + GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */ + GLboolean AutoNormal; + /*@}*/ + + /** + * \name Map Grid endpoints and divisions and calculated du values + */ + /*@{*/ + GLint MapGrid1un; + GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du; + GLint MapGrid2un, MapGrid2vn; + GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du; + GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv; + /*@}*/ +}; + + +/** + * Fog attribute group (GL_FOG_BIT). + */ +struct gl_fog_attrib +{ + GLboolean Enabled; /**< Fog enabled flag */ + GLfloat Color[4]; /**< Fog color */ + GLfloat Density; /**< Density >= 0.0 */ + GLfloat Start; /**< Start distance in eye coords */ + GLfloat End; /**< End distance in eye coords */ + GLfloat Index; /**< Fog index */ + GLenum Mode; /**< Fog mode */ + GLboolean ColorSumEnabled; + GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */ + GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */ +}; + + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with + * a layout qualifier. + * + * \see enum ir_depth_layout + */ +enum gl_frag_depth_layout { + FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ + FRAG_DEPTH_LAYOUT_ANY, + FRAG_DEPTH_LAYOUT_GREATER, + FRAG_DEPTH_LAYOUT_LESS, + FRAG_DEPTH_LAYOUT_UNCHANGED +}; + + +/** + * Hint attribute group (GL_HINT_BIT). + * + * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE. + */ +struct gl_hint_attrib +{ + GLenum PerspectiveCorrection; + GLenum PointSmooth; + GLenum LineSmooth; + GLenum PolygonSmooth; + GLenum Fog; + GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */ + GLenum TextureCompression; /**< GL_ARB_texture_compression */ + GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */ + GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */ +}; + +/** + * Light state flags. + */ +/*@{*/ +#define LIGHT_SPOT 0x1 +#define LIGHT_LOCAL_VIEWER 0x2 +#define LIGHT_POSITIONAL 0x4 +#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER) +/*@}*/ + + +/** + * Lighting attribute group (GL_LIGHT_BIT). + */ +struct gl_light_attrib +{ + struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */ + struct gl_lightmodel Model; /**< Lighting model */ + + /** + * Must flush FLUSH_VERTICES before referencing: + */ + /*@{*/ + struct gl_material Material; /**< Includes front & back values */ + /*@}*/ + + GLboolean Enabled; /**< Lighting enabled flag */ + GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */ + GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */ + GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */ + GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */ + GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */ + GLboolean ColorMaterialEnabled; + GLenum ClampVertexColor; + + struct gl_light EnabledList; /**< List sentinel */ + + /** + * Derived state for optimizations: + */ + /*@{*/ + GLboolean _NeedEyeCoords; + GLboolean _NeedVertices; /**< Use fast shader? */ + GLbitfield _Flags; /**< LIGHT_* flags, see above */ + GLfloat _BaseColor[2][3]; + /*@}*/ +}; + + +/** + * Line attribute group (GL_LINE_BIT). + */ +struct gl_line_attrib +{ + GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */ + GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */ + GLushort StipplePattern; /**< Stipple pattern */ + GLint StippleFactor; /**< Stipple repeat factor */ + GLfloat Width; /**< Line width */ +}; + + +/** + * Display list attribute group (GL_LIST_BIT). + */ +struct gl_list_attrib +{ + GLuint ListBase; +}; + + +/** + * Multisample attribute group (GL_MULTISAMPLE_BIT). + */ +struct gl_multisample_attrib +{ + GLboolean Enabled; + GLboolean _Enabled; /**< true if Enabled and multisample buffer */ + GLboolean SampleAlphaToCoverage; + GLboolean SampleAlphaToOne; + GLboolean SampleCoverage; + GLfloat SampleCoverageValue; + GLboolean SampleCoverageInvert; +}; + + +/** + * A pixelmap (see glPixelMap) + */ +struct gl_pixelmap +{ + GLint Size; + GLfloat Map[MAX_PIXEL_MAP_TABLE]; + GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */ +}; + + +/** + * Collection of all pixelmaps + */ +struct gl_pixelmaps +{ + struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */ + struct gl_pixelmap GtoG; + struct gl_pixelmap BtoB; + struct gl_pixelmap AtoA; + struct gl_pixelmap ItoR; + struct gl_pixelmap ItoG; + struct gl_pixelmap ItoB; + struct gl_pixelmap ItoA; + struct gl_pixelmap ItoI; + struct gl_pixelmap StoS; +}; + + +/** + * Pixel attribute group (GL_PIXEL_MODE_BIT). + */ +struct gl_pixel_attrib +{ + GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */ + + /*--- Begin Pixel Transfer State ---*/ + /* Fields are in the order in which they're applied... */ + + /** Scale & Bias (index shift, offset) */ + /*@{*/ + GLfloat RedBias, RedScale; + GLfloat GreenBias, GreenScale; + GLfloat BlueBias, BlueScale; + GLfloat AlphaBias, AlphaScale; + GLfloat DepthBias, DepthScale; + GLint IndexShift, IndexOffset; + /*@}*/ + + /* Pixel Maps */ + /* Note: actual pixel maps are not part of this attrib group */ + GLboolean MapColorFlag; + GLboolean MapStencilFlag; + + /*--- End Pixel Transfer State ---*/ + + /** glPixelZoom */ + GLfloat ZoomX, ZoomY; + + /** GL_SGI_texture_color_table */ + GLfloat TextureColorTableScale[4]; /**< RGBA */ + GLfloat TextureColorTableBias[4]; /**< RGBA */ +}; + + +/** + * Point attribute group (GL_POINT_BIT). + */ +struct gl_point_attrib +{ + GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */ + GLfloat Size; /**< User-specified point size */ + GLfloat Params[3]; /**< GL_EXT_point_parameters */ + GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */ + GLfloat Threshold; /**< GL_EXT_point_parameters */ + GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */ + GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */ + GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/ + GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */ + GLenum SpriteOrigin; /**< GL_ARB_point_sprite */ +}; + + +/** + * Polygon attribute group (GL_POLYGON_BIT). + */ +struct gl_polygon_attrib +{ + GLenum FrontFace; /**< Either GL_CW or GL_CCW */ + GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ + GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ + GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */ + GLboolean CullFlag; /**< Culling on/off flag */ + GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */ + GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */ + GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */ + GLfloat OffsetFactor; /**< Polygon offset factor, from user */ + GLfloat OffsetUnits; /**< Polygon offset units, from user */ + GLboolean OffsetPoint; /**< Offset in GL_POINT mode */ + GLboolean OffsetLine; /**< Offset in GL_LINE mode */ + GLboolean OffsetFill; /**< Offset in GL_FILL mode */ +}; + + +/** + * Scissor attributes (GL_SCISSOR_BIT). + */ +struct gl_scissor_attrib +{ + GLboolean Enabled; /**< Scissor test enabled? */ + GLint X, Y; /**< Lower left corner of box */ + GLsizei Width, Height; /**< Size of box */ +}; + + +/** + * Stencil attribute group (GL_STENCIL_BUFFER_BIT). + * + * Three sets of stencil data are tracked so that OpenGL 2.0, + * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported + * simultaneously. In each of the stencil state arrays, element 0 corresponds + * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 / + * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the + * GL_EXT_stencil_two_side GL_BACK state. + * + * The derived value \c _BackFace is either 1 or 2 depending on whether or + * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled. + * + * The derived value \c _TestTwoSide is set when the front-face and back-face + * stencil state are different. + */ +struct gl_stencil_attrib +{ + GLboolean Enabled; /**< Enabled flag */ + GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */ + GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */ + GLboolean _Enabled; /**< Enabled and stencil buffer present */ + GLboolean _TestTwoSide; + GLubyte _BackFace; /**< Current back stencil state (1 or 2) */ + GLenum Function[3]; /**< Stencil function */ + GLenum FailFunc[3]; /**< Fail function */ + GLenum ZPassFunc[3]; /**< Depth buffer pass function */ + GLenum ZFailFunc[3]; /**< Depth buffer fail function */ + GLint Ref[3]; /**< Reference value */ + GLuint ValueMask[3]; /**< Value mask */ + GLuint WriteMask[3]; /**< Write mask */ + GLuint Clear; /**< Clear value */ +}; + + +/** + * An index for each type of texture object. These correspond to the GL + * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc. + * Note: the order is from highest priority to lowest priority. + */ +typedef enum +{ + TEXTURE_2D_ARRAY_INDEX, + TEXTURE_1D_ARRAY_INDEX, + TEXTURE_CUBE_INDEX, + TEXTURE_3D_INDEX, + TEXTURE_RECT_INDEX, + TEXTURE_2D_INDEX, + TEXTURE_1D_INDEX, + NUM_TEXTURE_TARGETS +} gl_texture_index; + + +/** + * Bit flags for each type of texture object + * Used for Texture.Unit[]._ReallyEnabled flags. + */ +/*@{*/ +#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX) +#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX) +#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX) +#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX) +#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX) +#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX) +#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX) +/*@}*/ + + +/** + * TexGenEnabled flags. + */ +/*@{*/ +#define S_BIT 1 +#define T_BIT 2 +#define R_BIT 4 +#define Q_BIT 8 +#define STR_BITS (S_BIT | T_BIT | R_BIT) +/*@}*/ + + +/** + * Bit flag versions of the corresponding GL_ constants. + */ +/*@{*/ +#define TEXGEN_SPHERE_MAP 0x1 +#define TEXGEN_OBJ_LINEAR 0x2 +#define TEXGEN_EYE_LINEAR 0x4 +#define TEXGEN_REFLECTION_MAP_NV 0x8 +#define TEXGEN_NORMAL_MAP_NV 0x10 + +#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \ + TEXGEN_REFLECTION_MAP_NV | \ + TEXGEN_NORMAL_MAP_NV) +#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \ + TEXGEN_REFLECTION_MAP_NV | \ + TEXGEN_NORMAL_MAP_NV | \ + TEXGEN_EYE_LINEAR) +/*@}*/ + + + +/** Tex-gen enabled for texture unit? */ +#define ENABLE_TEXGEN(unit) (1 << (unit)) + +/** Non-identity texture matrix for texture unit? */ +#define ENABLE_TEXMAT(unit) (1 << (unit)) + + +/** + * Texel fetch function prototype. We use texel fetch functions to + * extract RGBA, color indexes and depth components out of 1D, 2D and 3D + * texture images. These functions help to isolate us from the gritty + * details of all the various texture image encodings. + * + * \param texImage texture image. + * \param col texel column. + * \param row texel row. + * \param img texel image level/layer. + * \param texelOut output texel (up to 4 GLchans) + */ +typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + GLchan *texelOut ); + +/** + * As above, but returns floats. + * Used for depth component images and for upcoming signed/float + * texture images. + */ +typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + GLfloat *texelOut ); + + +typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + const void *texel); + + +/** + * Texture image state. Describes the dimensions of a texture image, + * the texel format and pointers to Texel Fetch functions. + */ +struct gl_texture_image +{ + GLint InternalFormat; /**< Internal format as given by the user */ + GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA, + * GL_LUMINANCE, GL_LUMINANCE_ALPHA, + * GL_INTENSITY, GL_COLOR_INDEX, + * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT + * only. Used for choosing TexEnv arithmetic. + */ + gl_format TexFormat; /**< The actual texture memory format */ + + GLuint Border; /**< 0 or 1 */ + GLuint Width; /**< = 2^WidthLog2 + 2*Border */ + GLuint Height; /**< = 2^HeightLog2 + 2*Border */ + GLuint Depth; /**< = 2^DepthLog2 + 2*Border */ + GLuint Width2; /**< = Width - 2*Border */ + GLuint Height2; /**< = Height - 2*Border */ + GLuint Depth2; /**< = Depth - 2*Border */ + GLuint WidthLog2; /**< = log2(Width2) */ + GLuint HeightLog2; /**< = log2(Height2) */ + GLuint DepthLog2; /**< = log2(Depth2) */ + GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */ + GLfloat WidthScale; /**< used for mipmap LOD computation */ + GLfloat HeightScale; /**< used for mipmap LOD computation */ + GLfloat DepthScale; /**< used for mipmap LOD computation */ + GLboolean IsClientData; /**< Data owned by client? */ + GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */ + + struct gl_texture_object *TexObject; /**< Pointer back to parent object */ + + FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */ + FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */ + + GLuint RowStride; /**< Padded width in units of texels */ + GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to + each 2D slice in 'Data', in texels */ + GLvoid *Data; /**< Image data, accessed via FetchTexel() */ + + /** + * \name For device driver: + */ + /*@{*/ + void *DriverData; /**< Arbitrary device driver data */ + /*@}*/ +}; + + +/** + * Indexes for cube map faces. + */ +typedef enum +{ + FACE_POS_X = 0, + FACE_NEG_X = 1, + FACE_POS_Y = 2, + FACE_NEG_Y = 3, + FACE_POS_Z = 4, + FACE_NEG_Z = 5, + MAX_FACES = 6 +} gl_face_index; + + +/** + * Texture object state. Contains the array of mipmap images, border color, + * wrap modes, filter modes, shadow/texcompare state, and the per-texture + * color palette. + */ +struct gl_texture_object +{ + _glthread_Mutex Mutex; /**< for thread safety */ + GLint RefCount; /**< reference count */ + GLuint Name; /**< the user-visible texture object ID */ + GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */ + GLfloat Priority; /**< in [0,1] */ + union { + GLfloat f[4]; + GLuint ui[4]; + GLint i[4]; + } BorderColor; /**< Interpreted according to texture format */ + GLenum WrapS; /**< S-axis texture image wrap mode */ + GLenum WrapT; /**< T-axis texture image wrap mode */ + GLenum WrapR; /**< R-axis texture image wrap mode */ + GLenum MinFilter; /**< minification filter */ + GLenum MagFilter; /**< magnification filter */ + GLfloat MinLod; /**< min lambda, OpenGL 1.2 */ + GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */ + GLfloat LodBias; /**< OpenGL 1.4 */ + GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */ + GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */ + GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ + GLenum CompareMode; /**< GL_ARB_shadow */ + GLenum CompareFunc; /**< GL_ARB_shadow */ + GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */ + GLenum DepthMode; /**< GL_ARB_depth_texture */ + GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */ + GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */ + GLint CropRect[4]; /**< GL_OES_draw_texture */ + GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */ + GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */ + GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */ + GLboolean _Complete; /**< Is texture object complete? */ + GLboolean _RenderToTexture; /**< Any rendering to this texture? */ + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ + GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */ + + /** Actual texture images, indexed by [cube face] and [mipmap level] */ + struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS]; + + /** GL_EXT_paletted_texture */ + struct gl_color_table Palette; + + /** + * \name For device driver. + * Note: instead of attaching driver data to this pointer, it's preferable + * to instead use this struct as a base class for your own texture object + * class. Driver->NewTextureObject() can be used to implement the + * allocation. + */ + void *DriverData; /**< Arbitrary device driver data */ +}; + + +/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */ +#define MAX_COMBINER_TERMS 4 + + +/** + * Texture combine environment state. + */ +struct gl_tex_env_combine_state +{ + GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ + GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ + /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */ + GLenum SourceRGB[MAX_COMBINER_TERMS]; + GLenum SourceA[MAX_COMBINER_TERMS]; + /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */ + GLenum OperandRGB[MAX_COMBINER_TERMS]; + GLenum OperandA[MAX_COMBINER_TERMS]; + GLuint ScaleShiftRGB; /**< 0, 1 or 2 */ + GLuint ScaleShiftA; /**< 0, 1 or 2 */ + GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */ + GLuint _NumArgsA; /**< Number of inputs used for the A combiner */ +}; + + +/** + * Texture coord generation state. + */ +struct gl_texgen +{ + GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */ + GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */ + GLfloat ObjectPlane[4]; + GLfloat EyePlane[4]; +}; + + +/** + * Texture unit state. Contains enable flags, texture environment/function/ + * combiners, texgen state, pointers to current texture objects and + * post-filter color tables. + */ +struct gl_texture_unit +{ + GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */ + GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */ + + GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */ + GLfloat EnvColor[4]; + + struct gl_texgen GenS; + struct gl_texgen GenT; + struct gl_texgen GenR; + struct gl_texgen GenQ; + GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */ + GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */ + + GLfloat LodBias; /**< for biasing mipmap levels */ + GLenum BumpTarget; + GLfloat RotMatrix[4]; /* 2x2 matrix */ + + /** + * \name GL_EXT_texture_env_combine + */ + struct gl_tex_env_combine_state Combine; + + /** + * Derived state based on \c EnvMode and the \c BaseFormat of the + * currently enabled texture. + */ + struct gl_tex_env_combine_state _EnvMode; + + /** + * Currently enabled combiner state. This will point to either + * \c Combine or \c _EnvMode. + */ + struct gl_tex_env_combine_state *_CurrentCombine; + + /** Current texture object pointers */ + struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS]; + + /** Points to highest priority, complete and enabled texture object */ + struct gl_texture_object *_Current; + + /** GL_SGI_texture_color_table */ + /*@{*/ + struct gl_color_table ColorTable; + struct gl_color_table ProxyColorTable; + GLboolean ColorTableEnabled; + /*@}*/ +}; + + +/** + * Texture attribute group (GL_TEXTURE_BIT). + */ +struct gl_texture_attrib +{ + GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */ + struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; + + struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS]; + + /** GL_ARB_seamless_cubemap */ + GLboolean CubeMapSeamless; + + /** GL_EXT_shared_texture_palette */ + GLboolean SharedPalette; + struct gl_color_table Palette; + + /** Texture units/samplers used by vertex or fragment texturing */ + GLbitfield _EnabledUnits; + + /** Texture coord units/sets used for fragment texturing */ + GLbitfield _EnabledCoordUnits; + + /** Texture coord units that have texgen enabled */ + GLbitfield _TexGenEnabled; + + /** Texture coord units that have non-identity matrices */ + GLbitfield _TexMatEnabled; + + /** Bitwise-OR of all Texture.Unit[i]._GenFlags */ + GLbitfield _GenFlags; +}; + + +/** + * Transformation attribute group (GL_TRANSFORM_BIT). + */ +struct gl_transform_attrib +{ + GLenum MatrixMode; /**< Matrix mode */ + GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */ + GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */ + GLbitfield ClipPlanesEnabled; /**< on/off bitmask */ + GLboolean Normalize; /**< Normalize all normals? */ + GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */ + GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */ + GLboolean DepthClamp; /**< GL_ARB_depth_clamp */ + + GLfloat CullEyePos[4]; + GLfloat CullObjPos[4]; +}; + + +/** + * Viewport attribute group (GL_VIEWPORT_BIT). + */ +struct gl_viewport_attrib +{ + GLint X, Y; /**< position */ + GLsizei Width, Height; /**< size */ + GLfloat Near, Far; /**< Depth buffer range */ + GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */ +}; + + +/** + * GL_ARB_vertex/pixel_buffer_object buffer object + */ +struct gl_buffer_object +{ + _glthread_Mutex Mutex; + GLint RefCount; + GLuint Name; + GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */ + GLsizeiptrARB Size; /**< Size of buffer storage in bytes */ + GLubyte *Data; /**< Location of storage either in RAM or VRAM. */ + /** Fields describing a mapped buffer */ + /*@{*/ + GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */ + GLvoid *Pointer; /**< User-space address of mapping */ + GLintptr Offset; /**< Mapped offset */ + GLsizeiptr Length; /**< Mapped length */ + /*@}*/ + GLboolean Written; /**< Ever written to? (for debugging) */ + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ +}; + + +/** + * Client pixel packing/unpacking attributes + */ +struct gl_pixelstore_attrib +{ + GLint Alignment; + GLint RowLength; + GLint SkipPixels; + GLint SkipRows; + GLint ImageHeight; + GLint SkipImages; + GLboolean SwapBytes; + GLboolean LsbFirst; + GLboolean ClientStorage; /**< GL_APPLE_client_storage */ + GLboolean Invert; /**< GL_MESA_pack_invert */ + struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */ +}; + + +/** + * Client vertex array attributes + */ +struct gl_client_array +{ + GLint Size; /**< components per element (1,2,3,4) */ + GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */ + GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */ + GLsizei Stride; /**< user-specified stride */ + GLsizei StrideB; /**< actual stride in bytes */ + const GLubyte *Ptr; /**< Points to array data */ + GLboolean Enabled; /**< Enabled flag is a boolean */ + GLboolean Normalized; /**< GL_ARB_vertex_program */ + GLboolean Integer; /**< Integer-valued? */ + GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */ + GLuint _ElementSize; /**< size of each element in bytes */ + + struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */ + GLuint _MaxElement; /**< max element index into array buffer + 1 */ +}; + + +/** + * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object + * extension, but a nice encapsulation in any case. + */ +struct gl_array_object +{ + /** Name of the array object as received from glGenVertexArrayAPPLE. */ + GLuint Name; + + GLint RefCount; + _glthread_Mutex Mutex; + GLboolean VBOonly; /**< require all arrays to live in VBOs? */ + + /** Conventional vertex arrays */ + /*@{*/ + struct gl_client_array Vertex; + struct gl_client_array Weight; + struct gl_client_array Normal; + struct gl_client_array Color; + struct gl_client_array SecondaryColor; + struct gl_client_array FogCoord; + struct gl_client_array Index; + struct gl_client_array EdgeFlag; + struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS]; + struct gl_client_array PointSize; + /*@}*/ + + /** + * Generic arrays for vertex programs/shaders. + * For NV vertex programs, these attributes alias and take priority + * over the conventional attribs above. For ARB vertex programs and + * GLSL vertex shaders, these attributes are separate. + */ + struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS]; + + /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */ + GLbitfield _Enabled; + + /** + * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs + * we can determine the max legal (in bounds) glDrawElements array index. + */ + GLuint _MaxElement; +}; + + +/** + * Vertex array state + */ +struct gl_array_attrib +{ + /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */ + struct gl_array_object *ArrayObj; + + /** The default vertex array object */ + struct gl_array_object *DefaultArrayObj; + + /** Array objects (GL_ARB/APPLE_vertex_array_object) */ + struct _mesa_HashTable *Objects; + + GLint ActiveTexture; /**< Client Active Texture */ + GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */ + GLuint LockCount; /**< GL_EXT_compiled_vertex_array */ + + /** GL 3.1 (slightly different from GL_NV_primitive_restart) */ + GLboolean PrimitiveRestart; + GLuint RestartIndex; + + GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */ + GLboolean RebindArrays; /**< whether the VBO module should rebind arrays */ + + /* GL_ARB_vertex_buffer_object */ + struct gl_buffer_object *ArrayBufferObj; + struct gl_buffer_object *ElementArrayBufferObj; +}; + + +/** + * Feedback buffer state + */ +struct gl_feedback +{ + GLenum Type; + GLbitfield _Mask; /**< FB_* bits */ + GLfloat *Buffer; + GLuint BufferSize; + GLuint Count; +}; + + +/** + * Selection buffer state + */ +struct gl_selection +{ + GLuint *Buffer; /**< selection buffer */ + GLuint BufferSize; /**< size of the selection buffer */ + GLuint BufferCount; /**< number of values in the selection buffer */ + GLuint Hits; /**< number of records in the selection buffer */ + GLuint NameStackDepth; /**< name stack depth */ + GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */ + GLboolean HitFlag; /**< hit flag */ + GLfloat HitMinZ; /**< minimum hit depth */ + GLfloat HitMaxZ; /**< maximum hit depth */ +}; + + +/** + * 1-D Evaluator control points + */ +struct gl_1d_map +{ + GLuint Order; /**< Number of control points */ + GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */ + GLfloat *Points; /**< Points to contiguous control points */ +}; + + +/** + * 2-D Evaluator control points + */ +struct gl_2d_map +{ + GLuint Uorder; /**< Number of control points in U dimension */ + GLuint Vorder; /**< Number of control points in V dimension */ + GLfloat u1, u2, du; + GLfloat v1, v2, dv; + GLfloat *Points; /**< Points to contiguous control points */ +}; + + +/** + * All evaluator control point state + */ +struct gl_evaluators +{ + /** + * \name 1-D maps + */ + /*@{*/ + struct gl_1d_map Map1Vertex3; + struct gl_1d_map Map1Vertex4; + struct gl_1d_map Map1Index; + struct gl_1d_map Map1Color4; + struct gl_1d_map Map1Normal; + struct gl_1d_map Map1Texture1; + struct gl_1d_map Map1Texture2; + struct gl_1d_map Map1Texture3; + struct gl_1d_map Map1Texture4; + struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */ + /*@}*/ + + /** + * \name 2-D maps + */ + /*@{*/ + struct gl_2d_map Map2Vertex3; + struct gl_2d_map Map2Vertex4; + struct gl_2d_map Map2Index; + struct gl_2d_map Map2Color4; + struct gl_2d_map Map2Normal; + struct gl_2d_map Map2Texture1; + struct gl_2d_map Map2Texture2; + struct gl_2d_map Map2Texture3; + struct gl_2d_map Map2Texture4; + struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */ + /*@}*/ +}; + + +/** + * Names of the various vertex/fragment program register files, etc. + * + * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c) + * All values should fit in a 4-bit field. + * + * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, + * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to + * be "uniform" variables since they can only be set outside glBegin/End. + * They're also all stored in the same Parameters array. + */ +typedef enum +{ + PROGRAM_TEMPORARY, /**< machine->Temporary[] */ + PROGRAM_INPUT, /**< machine->Inputs[] */ + PROGRAM_OUTPUT, /**< machine->Outputs[] */ + PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */ + PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */ + PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */ + PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */ + PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */ + PROGRAM_CONSTANT, /**< gl_program->Parameters[] */ + PROGRAM_UNIFORM, /**< gl_program->Parameters[] */ + PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */ + PROGRAM_ADDRESS, /**< machine->AddressReg */ + PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */ + PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */ + PROGRAM_UNDEFINED, /**< Invalid/TBD value */ + PROGRAM_FILE_MAX +} gl_register_file; + + +/** + * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. + */ +typedef enum +{ + SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */ + SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */ + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + +/** Vertex and fragment instructions */ +struct prog_instruction; +struct gl_program_parameter_list; +struct gl_uniform_list; + + +/** + * Base class for any kind of program object + */ +struct gl_program +{ + GLuint Id; + GLubyte *String; /**< Null-terminated program text */ + GLint RefCount; + GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */ + GLenum Format; /**< String encoding format */ + GLboolean Resident; + + struct prog_instruction *Instructions; + + GLbitfield InputsRead; /**< Bitmask of which input regs are read */ + GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ + GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ + GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ + GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ + GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ + GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ + + + /** Named parameters, constants, etc. from program text */ + struct gl_program_parameter_list *Parameters; + /** Numbered local parameters */ + GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4]; + + /** Vertex/fragment shader varying vars */ + struct gl_program_parameter_list *Varying; + /** Vertex program user-defined attributes */ + struct gl_program_parameter_list *Attributes; + + /** Map from sampler unit to texture unit (set by glUniform1i()) */ + GLubyte SamplerUnits[MAX_SAMPLERS]; + /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */ + gl_texture_index SamplerTargets[MAX_SAMPLERS]; + + /** Bitmask of which register files are read/written with indirect + * addressing. Mask of (1 << PROGRAM_x) bits. + */ + GLbitfield IndirectRegisterFiles; + + /** Logical counts */ + /*@{*/ + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint NumAttributes; + GLuint NumAddressRegs; + GLuint NumAluInstructions; + GLuint NumTexInstructions; + GLuint NumTexIndirections; + /*@}*/ + /** Native, actual h/w counts */ + /*@{*/ + GLuint NumNativeInstructions; + GLuint NumNativeTemporaries; + GLuint NumNativeParameters; + GLuint NumNativeAttributes; + GLuint NumNativeAddressRegs; + GLuint NumNativeAluInstructions; + GLuint NumNativeTexInstructions; + GLuint NumNativeTexIndirections; + /*@}*/ +}; + + +/** Vertex program object */ +struct gl_vertex_program +{ + struct gl_program Base; /**< base class */ + GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */ + GLboolean IsPositionInvariant; +}; + + +/** Geometry program object */ +struct gl_geometry_program +{ + struct gl_program Base; /**< base class */ + + GLint VerticesOut; + GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, + GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ + GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ +}; + + +/** Fragment program object */ +struct gl_fragment_program +{ + struct gl_program Base; /**< base class */ + GLenum FogOption; + GLboolean UsesKill; /**< shader uses KIL instruction */ + GLboolean OriginUpperLeft; + GLboolean PixelCenterInteger; + enum gl_frag_depth_layout FragDepthLayout; +}; + + +/** + * State common to vertex and fragment programs. + */ +struct gl_program_state +{ + GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */ + const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */ +}; + + +/** + * Context state for vertex programs. + */ +struct gl_vertex_program_state +{ + GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */ + GLboolean _Enabled; /**< Enabled and _valid_ user program? */ + GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */ + GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */ + struct gl_vertex_program *Current; /**< User-bound vertex program */ + + /** Currently enabled and valid vertex program (including internal + * programs, user-defined vertex programs and GLSL vertex shaders). + * This is the program we must use when rendering. + */ + struct gl_vertex_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /* For GL_NV_vertex_program only: */ + GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4]; + GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4]; + + /** Should fixed-function T&L be implemented with a vertex prog? */ + GLboolean _MaintainTnlProgram; + + /** Program to emulate fixed-function T&L (see above) */ + struct gl_vertex_program *_TnlProgram; + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; + + GLboolean _Overriden; +}; + + +/** + * Context state for geometry programs. + */ +struct gl_geometry_program_state +{ + GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */ + GLboolean _Enabled; /**< Enabled and valid program? */ + struct gl_geometry_program *Current; /**< user-bound geometry program */ + + /** Currently enabled and valid program (including internal programs + * and compiled shader programs). + */ + struct gl_geometry_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; +}; + +/** + * Context state for fragment programs. + */ +struct gl_fragment_program_state +{ + GLboolean Enabled; /**< User-set fragment program enable flag */ + GLboolean _Enabled; /**< Enabled and _valid_ user program? */ + struct gl_fragment_program *Current; /**< User-bound fragment program */ + + /** Currently enabled and valid fragment program (including internal + * programs, user-defined fragment programs and GLSL fragment shaders). + * This is the program we must use when rendering. + */ + struct gl_fragment_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /** Should fixed-function texturing be implemented with a fragment prog? */ + GLboolean _MaintainTexEnvProgram; + + /** Program to emulate fixed-function texture env/combine (see above) */ + struct gl_fragment_program *_TexEnvProgram; + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; +}; + + +/** + * ATI_fragment_shader runtime state + */ +#define ATI_FS_INPUT_PRIMARY 0 +#define ATI_FS_INPUT_SECONDARY 1 + +struct atifs_instruction; +struct atifs_setupinst; + +/** + * ATI fragment shader + */ +struct ati_fragment_shader +{ + GLuint Id; + GLint RefCount; + struct atifs_instruction *Instructions[2]; + struct atifs_setupinst *SetupInst[2]; + GLfloat Constants[8][4]; + GLbitfield LocalConstDef; /**< Indicates which constants have been set */ + GLubyte numArithInstr[2]; + GLubyte regsAssigned[2]; + GLubyte NumPasses; /**< 1 or 2 */ + GLubyte cur_pass; + GLubyte last_optype; + GLboolean interpinp1; + GLboolean isValid; + GLuint swizzlerq; +}; + +/** + * Context state for GL_ATI_fragment_shader + */ +struct gl_ati_fragment_shader_state +{ + GLboolean Enabled; + GLboolean _Enabled; /**< enabled and valid shader? */ + GLboolean Compiling; + GLfloat GlobalConstants[8][4]; + struct ati_fragment_shader *Current; +}; + + +/** + * Occlusion/timer query object. + */ +struct gl_query_object +{ + GLenum Target; /**< The query target, when active */ + GLuint Id; /**< hash table ID/name */ + GLuint64EXT Result; /**< the counter */ + GLboolean Active; /**< inside Begin/EndQuery */ + GLboolean Ready; /**< result is ready? */ +}; + + +/** + * Context state for query objects. + */ +struct gl_query_state +{ + struct _mesa_HashTable *QueryObjects; + struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */ + struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */ + + /** GL_NV_conditional_render */ + struct gl_query_object *CondRenderQuery; + + /** GL_EXT_transform_feedback */ + struct gl_query_object *PrimitivesGenerated; + struct gl_query_object *PrimitivesWritten; + + /** GL_ARB_timer_query */ + struct gl_query_object *TimeElapsed; + + GLenum CondRenderMode; +}; + + +/** Sync object state */ +struct gl_sync_object { + struct simple_node link; + GLenum Type; /**< GL_SYNC_FENCE */ + GLuint Name; /**< Fence name */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; /**< Object was deleted while there were still + * live references (e.g., sync not yet finished) + */ + GLenum SyncCondition; + GLbitfield Flags; /**< Flags passed to glFenceSync */ + GLuint StatusFlag:1; /**< Has the sync object been signaled? */ +}; + + +/** Set by #pragma directives */ +struct gl_sl_pragmas +{ + GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */ + GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */ + GLboolean Optimize; /**< defaults on */ + GLboolean Debug; /**< defaults off */ +}; + + +/** + * A GLSL vertex or fragment shader object. + */ +struct gl_shader +{ + GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */ + GLuint Name; /**< AKA the handle */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; + GLboolean CompileStatus; + const GLchar *Source; /**< Source code string */ + GLuint SourceChecksum; /**< for debug/logging purposes */ + struct gl_program *Program; /**< Post-compile assembly code */ + GLchar *InfoLog; + struct gl_sl_pragmas Pragmas; + + unsigned Version; /**< GLSL version used for linking */ + + struct exec_list *ir; + struct glsl_symbol_table *symbols; + + /** Shaders containing built-in functions that are used for linking. */ + struct gl_shader *builtins_to_link[16]; + unsigned num_builtins_to_link; +}; + + +/** + * A GLSL program object. + * Basically a linked collection of vertex and fragment shaders. + */ +struct gl_shader_program +{ + GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */ + GLuint Name; /**< aka handle or ID */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; + + GLuint NumShaders; /**< number of attached shaders */ + struct gl_shader **Shaders; /**< List of attached the shaders */ + + /** User-defined attribute bindings (glBindAttribLocation) */ + struct gl_program_parameter_list *Attributes; + + /** Transform feedback varyings */ + struct { + GLenum BufferMode; + GLuint NumVarying; + GLchar **VaryingNames; /**< Array [NumVarying] of char * */ + } TransformFeedback; + + /** Geometry shader state - copied into gl_geometry_program at link time */ + struct { + GLint VerticesOut; + GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, + GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ + GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ + } Geom; + + /* post-link info: */ + struct gl_vertex_program *VertexProgram; /**< Linked vertex program */ + struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */ + struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */ + struct gl_uniform_list *Uniforms; + struct gl_program_parameter_list *Varying; + GLboolean LinkStatus; /**< GL_LINK_STATUS */ + GLboolean Validated; + GLboolean _Used; /**< Ever used for drawing? */ + GLchar *InfoLog; + + unsigned Version; /**< GLSL version used for linking */ + + /** + * Per-stage shaders resulting from the first stage of linking. + * + * Set of linked shaders for this program. The array is accessed using the + * \c MESA_SHADER_* defines. Entries for non-existent stages will be + * \c NULL. + */ + struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES]; +}; + + +#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */ +#define GLSL_LOG 0x2 /**< Write shaders to files */ +#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */ +#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */ +#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */ +#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */ +#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */ +#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */ + + +/** + * Context state for GLSL vertex/fragment shaders. + */ +struct gl_shader_state +{ + /** + * Programs used for rendering + * + * There is a separate program set for each shader stage. If + * GL_EXT_separate_shader_objects is not supported, each of these must point + * to \c NULL or to the same program. + */ + struct gl_shader_program *CurrentVertexProgram; + struct gl_shader_program *CurrentGeometryProgram; + struct gl_shader_program *CurrentFragmentProgram; + + /** + * Program used by glUniform calls. + * + * Explicitly set by \c glUseProgram and \c glActiveProgramEXT. + */ + struct gl_shader_program *ActiveProgram; + + void *MemPool; + + GLbitfield Flags; /**< Mask of GLSL_x flags */ +}; + +/** + * Compiler options for a single GLSL shaders type + */ +struct gl_shader_compiler_options +{ + /** Driver-selectable options: */ + GLboolean EmitCondCodes; /**< Use condition codes? */ + GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */ + /** + * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't + * support control flow. + */ + GLboolean EmitNoIfs; + GLboolean EmitNoLoops; + GLboolean EmitNoFunctions; + GLboolean EmitNoCont; /**< Emit CONT opcode? */ + GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ + GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ + GLboolean EmitNoPow; /**< Emit POW opcodes? */ + + /** + * \name Forms of indirect addressing the driver cannot do. + */ + /*@{*/ + GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */ + GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */ + GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */ + GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */ + /*@}*/ + + GLuint MaxUnrollIterations; + + struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */ +}; + +/** + * Transform feedback object state + */ +struct gl_transform_feedback_object +{ + GLuint Name; /**< AKA the object ID */ + GLint RefCount; + GLboolean Active; /**< Is transform feedback enabled? */ + GLboolean Paused; /**< Is transform feedback paused? */ + + /** The feedback buffers */ + GLuint BufferNames[MAX_FEEDBACK_ATTRIBS]; + struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS]; + + /** Start of feedback data in dest buffer */ + GLintptr Offset[MAX_FEEDBACK_ATTRIBS]; + /** Max data to put into dest buffer (in bytes) */ + GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS]; +}; + + +/** + * Context state for transform feedback. + */ +struct gl_transform_feedback +{ + GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */ + + GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */ + + /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */ + struct gl_buffer_object *CurrentBuffer; + + /** The table of all transform feedback objects */ + struct _mesa_HashTable *Objects; + + /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */ + struct gl_transform_feedback_object *CurrentObject; + + /** The default xform-fb object (Name==0) */ + struct gl_transform_feedback_object *DefaultObject; +}; + + + +/** + * State which can be shared by multiple contexts: + */ +struct gl_shared_state +{ + _glthread_Mutex Mutex; /**< for thread safety */ + GLint RefCount; /**< Reference count */ + struct _mesa_HashTable *DisplayList; /**< Display lists hash table */ + struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */ + + /** Default texture objects (shared by all texture units) */ + struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS]; + + /** Fallback texture used when a bound texture is incomplete */ + struct gl_texture_object *FallbackTex; + + /** + * \name Thread safety and statechange notification for texture + * objects. + * + * \todo Improve the granularity of locking. + */ + /*@{*/ + _glthread_Mutex TexMutex; /**< texobj thread safety */ + GLuint TextureStateStamp; /**< state notification for shared tex */ + /*@}*/ + + /** Default buffer object for vertex arrays that aren't in VBOs */ + struct gl_buffer_object *NullBufferObj; + + /** + * \name Vertex/geometry/fragment programs + */ + /*@{*/ + struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ + struct gl_vertex_program *DefaultVertexProgram; + struct gl_fragment_program *DefaultFragmentProgram; + struct gl_geometry_program *DefaultGeometryProgram; + /*@}*/ + + /* GL_ATI_fragment_shader */ + struct _mesa_HashTable *ATIShaders; + struct ati_fragment_shader *DefaultFragmentShader; + + struct _mesa_HashTable *BufferObjects; + + /** Table of both gl_shader and gl_shader_program objects */ + struct _mesa_HashTable *ShaderObjects; + + /* GL_EXT_framebuffer_object */ + struct _mesa_HashTable *RenderBuffers; + struct _mesa_HashTable *FrameBuffers; + + /* GL_ARB_sync */ + struct simple_node SyncObjects; + + void *DriverData; /**< Device driver shared state */ +}; + + + + +/** + * A renderbuffer stores colors or depth values or stencil values. + * A framebuffer object will have a collection of these. + * Data are read/written to the buffer with a handful of Get/Put functions. + * + * Instances of this object are allocated with the Driver's NewRenderbuffer + * hook. Drivers will likely wrap this class inside a driver-specific + * class to simulate inheritance. + */ +struct gl_renderbuffer +{ +#define RB_MAGIC 0xaabbccdd + int Magic; /** XXX TEMPORARY DEBUG INFO */ + _glthread_Mutex Mutex; /**< for thread safety */ + GLuint ClassID; /**< Useful for drivers */ + GLuint Name; + GLint RefCount; + GLuint Width, Height; + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ + + GLenum InternalFormat; /**< The user-specified format */ + GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or + GL_STENCIL_INDEX. */ + gl_format Format; /**< The actual renderbuffer memory format */ + + GLubyte NumSamples; + + GLenum DataType; /**< Type of values passed to the Get/Put functions */ + GLvoid *Data; /**< This may not be used by some kinds of RBs */ + + /* Used to wrap one renderbuffer around another: */ + struct gl_renderbuffer *Wrapped; + + /* Delete this renderbuffer */ + void (*Delete)(struct gl_renderbuffer *rb); + + /* Allocate new storage for this renderbuffer */ + GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height); + + /* Lock/Unlock are called before/after calling the Get/Put functions. + * Not sure this is the right place for these yet. + void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb); + void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb); + */ + + /* Return a pointer to the element/pixel at (x,y). + * Should return NULL if the buffer memory can't be directly addressed. + */ + void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLint x, GLint y); + + /* Get/Read a row of values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, void *values); + + /* Get/Read values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + const GLint x[], const GLint y[], void *values); + + /* Put/Write a row of values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask); + + /* Put/Write a row of RGB values. This is a special-case routine that's + * only used for RGBA renderbuffers when the source data is GL_RGB. That's + * a common case for glDrawPixels and some triangle routines. + * The values will be of format GL_RGB and type DataType. + */ + void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask); + + + /* Put/Write a row of identical values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask); + + /* Put/Write values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + const GLint x[], const GLint y[], const void *values, + const GLubyte *mask); + /* Put/Write identical values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask); +}; + + +/** + * A renderbuffer attachment points to either a texture object (and specifies + * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer. + */ +struct gl_renderbuffer_attachment +{ + GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */ + GLboolean Complete; + + /** + * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the + * application supplied renderbuffer object. + */ + struct gl_renderbuffer *Renderbuffer; + + /** + * If \c Type is \c GL_TEXTURE, this stores a pointer to the application + * supplied texture object. + */ + struct gl_texture_object *Texture; + GLuint TextureLevel; /**< Attached mipmap level. */ + GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */ + GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D + * and 2D array textures */ +}; + + +/** + * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc). + * In C++ terms, think of this as a base class from which device drivers + * will make derived classes. + */ +struct gl_framebuffer +{ + _glthread_Mutex Mutex; /**< for thread safety */ + /** + * If zero, this is a window system framebuffer. If non-zero, this + * is a FBO framebuffer; note that for some devices (i.e. those with + * a natural pixel coordinate system for FBOs that differs from the + * OpenGL/Mesa coordinate system), this means that the viewport, + * polygon face orientation, and polygon stipple will have to be inverted. + */ + GLuint Name; + + GLint RefCount; + GLboolean DeletePending; + + /** + * The framebuffer's visual. Immutable if this is a window system buffer. + * Computed from attachments if user-made FBO. + */ + struct gl_config Visual; + + GLboolean Initialized; + + GLuint Width, Height; /**< size of frame buffer in pixels */ + + /** \name Drawing bounds (Intersection of buffer size and scissor box) */ + /*@{*/ + GLint _Xmin, _Xmax; /**< inclusive */ + GLint _Ymin, _Ymax; /**< exclusive */ + /*@}*/ + + /** \name Derived Z buffer stuff */ + /*@{*/ + GLuint _DepthMax; /**< Max depth buffer value */ + GLfloat _DepthMaxF; /**< Float max depth buffer value */ + GLfloat _MRD; /**< minimum resolvable difference in Z values */ + /*@}*/ + + /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ + GLenum _Status; + + /** Integer color values */ + GLboolean _IntegerColor; + + /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */ + struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT]; + + /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER + * attribute group and GL_PIXEL attribute group, respectively. + */ + GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS]; + GLenum ColorReadBuffer; + + /** Computed from ColorDraw/ReadBuffer above */ + GLuint _NumColorDrawBuffers; + GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */ + GLint _ColorReadBufferIndex; /* -1 = None */ + struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS]; + struct gl_renderbuffer *_ColorReadBuffer; + + /** The Actual depth/stencil buffers to use. May be wrappers around the + * depth/stencil buffers attached above. */ + struct gl_renderbuffer *_DepthBuffer; + struct gl_renderbuffer *_StencilBuffer; + + /** Delete this framebuffer */ + void (*Delete)(struct gl_framebuffer *fb); +}; + + +/** + * Precision info for shader datatypes. See glGetShaderPrecisionFormat(). + */ +struct gl_precision +{ + GLushort RangeMin; /**< min value exponent */ + GLushort RangeMax; /**< max value exponent */ + GLushort Precision; /**< number of mantissa bits */ +}; + + +/** + * Limits for vertex and fragment programs/shaders. + */ +struct gl_program_constants +{ + /* logical limits */ + GLuint MaxInstructions; + GLuint MaxAluInstructions; + GLuint MaxTexInstructions; + GLuint MaxTexIndirections; + GLuint MaxAttribs; + GLuint MaxTemps; + GLuint MaxAddressRegs; + GLuint MaxParameters; + GLuint MaxLocalParams; + GLuint MaxEnvParams; + /* native/hardware limits */ + GLuint MaxNativeInstructions; + GLuint MaxNativeAluInstructions; + GLuint MaxNativeTexInstructions; + GLuint MaxNativeTexIndirections; + GLuint MaxNativeAttribs; + GLuint MaxNativeTemps; + GLuint MaxNativeAddressRegs; + GLuint MaxNativeParameters; + /* For shaders */ + GLuint MaxUniformComponents; + /* GL_ARB_geometry_shader4 */ + GLuint MaxGeometryTextureImageUnits; + GLuint MaxGeometryVaryingComponents; + GLuint MaxVertexVaryingComponents; + GLuint MaxGeometryUniformComponents; + GLuint MaxGeometryOutputVertices; + GLuint MaxGeometryTotalOutputComponents; + /* ES 2.0 and GL_ARB_ES2_compatibility */ + struct gl_precision LowFloat, MediumFloat, HighFloat; + struct gl_precision LowInt, MediumInt, HighInt; +}; + + +/** + * Constants which may be overridden by device driver during context creation + * but are never changed after that. + */ +struct gl_constants +{ + GLint MaxTextureMbytes; /**< Max memory per image, in MB */ + GLint MaxTextureLevels; /**< Max mipmap levels. */ + GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */ + GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */ + GLint MaxArrayTextureLayers; /**< Max layers in array textures */ + GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */ + GLuint MaxTextureCoordUnits; + GLuint MaxTextureImageUnits; + GLuint MaxVertexTextureImageUnits; + GLuint MaxCombinedTextureImageUnits; + GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */ + GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ + GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */ + + GLuint MaxArrayLockSize; + + GLint SubPixelBits; + + GLfloat MinPointSize, MaxPointSize; /**< aliased */ + GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */ + GLfloat PointSizeGranularity; + GLfloat MinLineWidth, MaxLineWidth; /**< aliased */ + GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */ + GLfloat LineWidthGranularity; + + GLuint MaxColorTableSize; + + GLuint MaxClipPlanes; + GLuint MaxLights; + GLfloat MaxShininess; /**< GL_NV_light_max_exponent */ + GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */ + + GLuint MaxViewportWidth, MaxViewportHeight; + + struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */ + struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */ + struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */ + GLuint MaxProgramMatrices; + GLuint MaxProgramMatrixStackDepth; + + /** vertex array / buffer object bounds checking */ + GLboolean CheckArrayBounds; + + GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */ + + GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */ + GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */ + GLuint MaxSamples; /**< GL_ARB_framebuffer_object */ + + GLuint MaxVarying; /**< Number of float[4] varying parameters */ + + GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */ + + /** Which texture units support GL_ATI_envmap_bumpmap as targets */ + GLbitfield SupportedBumpUnits; + + /** + * Maximum amount of time, measured in nanseconds, that the server can wait. + */ + GLuint64 MaxServerWaitTimeout; + + /** GL_EXT_provoking_vertex */ + GLboolean QuadsFollowProvokingVertexConvention; + + /** OpenGL version 3.0 */ + GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */ + + /** OpenGL version 3.2 */ + GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */ + + /** GL_EXT_transform_feedback */ + GLuint MaxTransformFeedbackSeparateAttribs; + GLuint MaxTransformFeedbackSeparateComponents; + GLuint MaxTransformFeedbackInterleavedComponents; + + /** GL_EXT_gpu_shader4 */ + GLint MinProgramTexelOffset, MaxProgramTexelOffset; + + /* GL_EXT_framebuffer_sRGB */ + GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */ +}; + + +/** + * Enable flag for each OpenGL extension. Different device drivers will + * enable different extensions at runtime. + */ +struct gl_extensions +{ + GLboolean dummy; /* don't remove this! */ + GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */ + GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */ + GLboolean ARB_ES2_compatibility; + GLboolean ARB_blend_func_extended; + GLboolean ARB_copy_buffer; + GLboolean ARB_depth_buffer_float; + GLboolean ARB_depth_clamp; + GLboolean ARB_depth_texture; + GLboolean ARB_draw_buffers; + GLboolean ARB_draw_buffers_blend; + GLboolean ARB_draw_elements_base_vertex; + GLboolean ARB_draw_instanced; + GLboolean ARB_fragment_coord_conventions; + GLboolean ARB_fragment_program; + GLboolean ARB_fragment_program_shadow; + GLboolean ARB_fragment_shader; + GLboolean ARB_framebuffer_object; + GLboolean ARB_explicit_attrib_location; + GLboolean ARB_geometry_shader4; + GLboolean ARB_half_float_pixel; + GLboolean ARB_half_float_vertex; + GLboolean ARB_instanced_arrays; + GLboolean ARB_map_buffer_range; + GLboolean ARB_multisample; + GLboolean ARB_multitexture; + GLboolean ARB_occlusion_query; + GLboolean ARB_occlusion_query2; + GLboolean ARB_point_sprite; + GLboolean ARB_sampler_objects; + GLboolean ARB_seamless_cube_map; + GLboolean ARB_shader_objects; + GLboolean ARB_shader_stencil_export; + GLboolean ARB_shading_language_100; + GLboolean ARB_shadow; + GLboolean ARB_shadow_ambient; + GLboolean ARB_sync; + GLboolean ARB_texture_border_clamp; + GLboolean ARB_texture_buffer_object; + GLboolean ARB_texture_compression; + GLboolean ARB_texture_compression_rgtc; + GLboolean ARB_texture_cube_map; + GLboolean ARB_texture_env_combine; + GLboolean ARB_texture_env_crossbar; + GLboolean ARB_texture_env_dot3; + GLboolean ARB_texture_float; + GLboolean ARB_texture_mirrored_repeat; + GLboolean ARB_texture_multisample; + GLboolean ARB_texture_non_power_of_two; + GLboolean ARB_texture_rg; + GLboolean ARB_texture_rgb10_a2ui; + GLboolean ARB_timer_query; + GLboolean ARB_transform_feedback2; + GLboolean ARB_transpose_matrix; + GLboolean ARB_uniform_buffer_object; + GLboolean ARB_vertex_array_object; + GLboolean ARB_vertex_buffer_object; + GLboolean ARB_vertex_program; + GLboolean ARB_vertex_shader; + GLboolean ARB_vertex_type_2_10_10_10_rev; + GLboolean ARB_window_pos; + GLboolean EXT_abgr; + GLboolean EXT_bgra; + GLboolean EXT_blend_color; + GLboolean EXT_blend_equation_separate; + GLboolean EXT_blend_func_separate; + GLboolean EXT_blend_logic_op; + GLboolean EXT_blend_minmax; + GLboolean EXT_blend_subtract; + GLboolean EXT_clip_volume_hint; + GLboolean EXT_compiled_vertex_array; + GLboolean EXT_copy_texture; + GLboolean EXT_depth_bounds_test; + GLboolean EXT_draw_buffers2; + GLboolean EXT_draw_range_elements; + GLboolean EXT_fog_coord; + GLboolean EXT_framebuffer_blit; + GLboolean EXT_framebuffer_multisample; + GLboolean EXT_framebuffer_object; + GLboolean EXT_framebuffer_sRGB; + GLboolean EXT_gpu_program_parameters; + GLboolean EXT_gpu_shader4; + GLboolean EXT_multi_draw_arrays; + GLboolean EXT_paletted_texture; + GLboolean EXT_packed_depth_stencil; + GLboolean EXT_packed_float; + GLboolean EXT_packed_pixels; + GLboolean EXT_pixel_buffer_object; + GLboolean EXT_point_parameters; + GLboolean EXT_polygon_offset; + GLboolean EXT_provoking_vertex; + GLboolean EXT_rescale_normal; + GLboolean EXT_shadow_funcs; + GLboolean EXT_secondary_color; + GLboolean EXT_separate_shader_objects; + GLboolean EXT_separate_specular_color; + GLboolean EXT_shared_texture_palette; + GLboolean EXT_stencil_wrap; + GLboolean EXT_stencil_two_side; + GLboolean EXT_subtexture; + GLboolean EXT_texture; + GLboolean EXT_texture_object; + GLboolean EXT_texture3D; + GLboolean EXT_texture_array; + GLboolean EXT_texture_compression_s3tc; + GLboolean EXT_texture_env_add; + GLboolean EXT_texture_env_combine; + GLboolean EXT_texture_env_dot3; + GLboolean EXT_texture_filter_anisotropic; + GLboolean EXT_texture_integer; + GLboolean EXT_texture_lod_bias; + GLboolean EXT_texture_mirror_clamp; + GLboolean EXT_texture_shared_exponent; + GLboolean EXT_texture_sRGB; + GLboolean EXT_texture_sRGB_decode; + GLboolean EXT_texture_swizzle; + GLboolean EXT_transform_feedback; + GLboolean EXT_timer_query; + GLboolean EXT_vertex_array; + GLboolean EXT_vertex_array_bgra; + GLboolean EXT_vertex_array_set; + GLboolean OES_standard_derivatives; + /* vendor extensions */ + GLboolean AMD_conservative_depth; + GLboolean APPLE_client_storage; + GLboolean APPLE_packed_pixels; + GLboolean APPLE_vertex_array_object; + GLboolean APPLE_object_purgeable; + GLboolean ATI_envmap_bumpmap; + GLboolean ATI_texture_mirror_once; + GLboolean ATI_texture_env_combine3; + GLboolean ATI_fragment_shader; + GLboolean ATI_separate_stencil; + GLboolean IBM_rasterpos_clip; + GLboolean IBM_multimode_draw_arrays; + GLboolean MESA_pack_invert; + GLboolean MESA_resize_buffers; + GLboolean MESA_ycbcr_texture; + GLboolean MESA_texture_array; + GLboolean MESA_texture_signed_rgba; + GLboolean NV_blend_square; + GLboolean NV_conditional_render; + GLboolean NV_fragment_program; + GLboolean NV_fragment_program_option; + GLboolean NV_light_max_exponent; + GLboolean NV_point_sprite; + GLboolean NV_primitive_restart; + GLboolean NV_texgen_reflection; + GLboolean NV_texture_env_combine4; + GLboolean NV_texture_rectangle; + GLboolean NV_vertex_program; + GLboolean NV_vertex_program1_1; + GLboolean OES_read_format; + GLboolean SGI_texture_color_table; + GLboolean SGIS_generate_mipmap; + GLboolean SGIS_texture_edge_clamp; + GLboolean SGIS_texture_lod; + GLboolean TDFX_texture_compression_FXT1; + GLboolean S3_s3tc; + GLboolean OES_EGL_image; + GLboolean OES_draw_texture; + GLboolean EXT_texture_format_BGRA8888; + GLboolean extension_sentinel; + /** The extension string */ + const GLubyte *String; + /** Number of supported extensions */ + GLuint Count; +}; + + +/** + * A stack of matrices (projection, modelview, color, texture, etc). + */ +struct gl_matrix_stack +{ + GLmatrix *Top; /**< points into Stack */ + GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */ + GLuint Depth; /**< 0 <= Depth < MaxDepth */ + GLuint MaxDepth; /**< size of Stack[] array */ + GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */ +}; + + +/** + * \name Bits for image transfer operations + * \sa __struct gl_contextRec::ImageTransferState. + */ +/*@{*/ +#define IMAGE_SCALE_BIAS_BIT 0x1 +#define IMAGE_SHIFT_OFFSET_BIT 0x2 +#define IMAGE_MAP_COLOR_BIT 0x4 +#define IMAGE_CLAMP_BIT 0x800 + + +/** Pixel Transfer ops */ +#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \ + IMAGE_SHIFT_OFFSET_BIT | \ + IMAGE_MAP_COLOR_BIT) + +/** + * \name Bits to indicate what state has changed. + */ +/*@{*/ +#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */ +#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */ +#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */ +#define _NEW_COLOR (1 << 3) /**< gl_context::Color */ +#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */ +#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */ +#define _NEW_FOG (1 << 6) /**< gl_context::Fog */ +#define _NEW_HINT (1 << 7) /**< gl_context::Hint */ +#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */ +#define _NEW_LINE (1 << 9) /**< gl_context::Line */ +#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */ +#define _NEW_POINT (1 << 11) /**< gl_context::Point */ +#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */ +#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */ +#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */ +#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */ +#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */ +#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */ +#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */ +#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */ +#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */ +#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */ +#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */ +#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */ +#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */ +#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */ +#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */ +#define _NEW_PROGRAM_CONSTANTS (1 << 27) +#define _NEW_BUFFER_OBJECT (1 << 28) +#define _NEW_ALL ~0 +/*@}*/ + + +/** + * \name Bits to track array state changes + * + * Also used to summarize array enabled. + */ +/*@{*/ +#define _NEW_ARRAY_VERTEX VERT_BIT_POS +#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT +#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL +#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0 +#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1 +#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG +#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX +#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG +#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */ +#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0 +#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1 +#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2 +#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3 +#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4 +#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5 +#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6 +#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7 +#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */ +#define _NEW_ARRAY_ALL 0xffffffff + + +#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i)) +#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i)) +/*@}*/ + + + +/** + * \name A bunch of flags that we think might be useful to drivers. + * + * Set in the __struct gl_contextRec::_TriangleCaps bitfield. + */ +/*@{*/ +#define DD_FLATSHADE 0x1 +#define DD_SEPARATE_SPECULAR 0x2 +#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */ +#define DD_TRI_LIGHT_TWOSIDE 0x8 +#define DD_TRI_UNFILLED 0x10 +#define DD_TRI_SMOOTH 0x20 +#define DD_TRI_STIPPLE 0x40 +#define DD_TRI_OFFSET 0x80 +#define DD_LINE_SMOOTH 0x100 +#define DD_LINE_STIPPLE 0x200 +#define DD_POINT_SMOOTH 0x400 +#define DD_POINT_ATTEN 0x800 +#define DD_TRI_TWOSTENCIL 0x1000 +/*@}*/ + + +/** + * \name Define the state changes under which each of these bits might change + */ +/*@{*/ +#define _DD_NEW_FLATSHADE _NEW_LIGHT +#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM) +#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON +#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT +#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON +#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON +#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON +#define _DD_NEW_TRI_OFFSET _NEW_POLYGON +#define _DD_NEW_LINE_SMOOTH _NEW_LINE +#define _DD_NEW_LINE_STIPPLE _NEW_LINE +#define _DD_NEW_LINE_WIDTH _NEW_LINE +#define _DD_NEW_POINT_SMOOTH _NEW_POINT +#define _DD_NEW_POINT_SIZE _NEW_POINT +#define _DD_NEW_POINT_ATTEN _NEW_POINT +/*@}*/ + + +/** + * Composite state flags + */ +/*@{*/ +#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \ + _NEW_TEXTURE | \ + _NEW_POINT | \ + _NEW_PROGRAM | \ + _NEW_MODELVIEW) + +#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \ + _NEW_TEXTURE) + +#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL) +/*@}*/ + + + + +/* This has to be included here. */ +#include "dd.h" + + +/** + * Display list flags. + * Strictly this is a tnl-private concept, but it doesn't seem + * worthwhile adding a tnl private structure just to hold this one bit + * of information: + */ +#define DLIST_DANGLING_REFS 0x1 + + +/** Opaque declaration of display list payload data type */ +union gl_dlist_node; + + +/** + * Provide a location where information about a display list can be + * collected. Could be extended with driverPrivate structures, + * etc. in the future. + */ +struct gl_display_list +{ + GLuint Name; + GLbitfield Flags; /**< DLIST_x flags */ + /** The dlist commands are in a linked list of nodes */ + union gl_dlist_node *Head; +}; + + +/** + * State used during display list compilation and execution. + */ +struct gl_dlist_state +{ + GLuint CallDepth; /**< Current recursion calling depth */ + + struct gl_display_list *CurrentList; /**< List currently being compiled */ + union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */ + GLuint CurrentPos; /**< Index into current block of nodes */ + + GLvertexformat ListVtxfmt; + + GLubyte ActiveAttribSize[VERT_ATTRIB_MAX]; + GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4]; + + GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX]; + GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4]; + + GLubyte ActiveIndex; + GLfloat CurrentIndex; + + GLubyte ActiveEdgeFlag; + GLboolean CurrentEdgeFlag; + + struct { + /* State known to have been set by the currently-compiling display + * list. Used to eliminate some redundant state changes. + */ + GLenum ShadeModel; + } Current; +}; + + +/** + * Enum for the OpenGL APIs we know about and may support. + */ +typedef enum +{ + API_OPENGL, + API_OPENGLES, + API_OPENGLES2 +} gl_api; + + +/** + * Mesa rendering context. + * + * This is the central context data structure for Mesa. Almost all + * OpenGL state is contained in this structure. + * Think of this as a base class from which device drivers will derive + * sub classes. + * + * The struct gl_context typedef names this structure. + */ +struct gl_context +{ + /** State possibly shared with other contexts in the address space */ + struct gl_shared_state *Shared; + + /** \name API function pointer tables */ + /*@{*/ + gl_api API; + struct _glapi_table *Save; /**< Display list save functions */ + struct _glapi_table *Exec; /**< Execute functions */ + struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */ + /*@}*/ + + struct gl_config Visual; + struct gl_framebuffer *DrawBuffer; /**< buffer for writing */ + struct gl_framebuffer *ReadBuffer; /**< buffer for reading */ + struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */ + struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */ + + /** + * Device driver function pointer table + */ + struct dd_function_table Driver; + + void *DriverCtx; /**< Points to device driver context/state */ + + /** Core/Driver constants */ + struct gl_constants Const; + + /** \name The various 4x4 matrix stacks */ + /*@{*/ + struct gl_matrix_stack ModelviewMatrixStack; + struct gl_matrix_stack ProjectionMatrixStack; + struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS]; + struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES]; + struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */ + /*@}*/ + + /** Combined modelview and projection matrix */ + GLmatrix _ModelProjectMatrix; + + /** \name Display lists */ + struct gl_dlist_state ListState; + + GLboolean ExecuteFlag; /**< Execute GL commands? */ + GLboolean CompileFlag; /**< Compile GL commands into display list? */ + + /** Extension information */ + struct gl_extensions Extensions; + + /** Version info */ + GLuint VersionMajor, VersionMinor; + char *VersionString; + + /** \name State attribute stack (for glPush/PopAttrib) */ + /*@{*/ + GLuint AttribStackDepth; + struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH]; + /*@}*/ + + /** \name Renderer attribute groups + * + * We define a struct for each attribute group to make pushing and popping + * attributes easy. Also it's a good organization. + */ + /*@{*/ + struct gl_accum_attrib Accum; /**< Accum buffer attributes */ + struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */ + struct gl_current_attrib Current; /**< Current attributes */ + struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */ + struct gl_eval_attrib Eval; /**< Eval attributes */ + struct gl_fog_attrib Fog; /**< Fog attributes */ + struct gl_hint_attrib Hint; /**< Hint attributes */ + struct gl_light_attrib Light; /**< Light attributes */ + struct gl_line_attrib Line; /**< Line attributes */ + struct gl_list_attrib List; /**< List attributes */ + struct gl_multisample_attrib Multisample; + struct gl_pixel_attrib Pixel; /**< Pixel attributes */ + struct gl_point_attrib Point; /**< Point attributes */ + struct gl_polygon_attrib Polygon; /**< Polygon attributes */ + GLuint PolygonStipple[32]; /**< Polygon stipple */ + struct gl_scissor_attrib Scissor; /**< Scissor attributes */ + struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */ + struct gl_texture_attrib Texture; /**< Texture attributes */ + struct gl_transform_attrib Transform; /**< Transformation attributes */ + struct gl_viewport_attrib Viewport; /**< Viewport attributes */ + /*@}*/ + + /** \name Client attribute stack */ + /*@{*/ + GLuint ClientAttribStackDepth; + struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH]; + /*@}*/ + + /** \name Client attribute groups */ + /*@{*/ + struct gl_array_attrib Array; /**< Vertex arrays */ + struct gl_pixelstore_attrib Pack; /**< Pixel packing */ + struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */ + struct gl_pixelstore_attrib DefaultPacking; /**< Default params */ + /*@}*/ + + /** \name Other assorted state (not pushed/popped on attribute stack) */ + /*@{*/ + struct gl_pixelmaps PixelMaps; + + struct gl_evaluators EvalMap; /**< All evaluators */ + struct gl_feedback Feedback; /**< Feedback */ + struct gl_selection Select; /**< Selection */ + + struct gl_program_state Program; /**< general program state */ + struct gl_vertex_program_state VertexProgram; + struct gl_fragment_program_state FragmentProgram; + struct gl_geometry_program_state GeometryProgram; + struct gl_ati_fragment_shader_state ATIFragmentShader; + + struct gl_shader_state Shader; /**< GLSL shader object state */ + struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES]; + + struct gl_query_state Query; /**< occlusion, timer queries */ + + struct gl_transform_feedback TransformFeedback; + + struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */ + struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */ + /*@}*/ + + struct gl_meta_state *Meta; /**< for "meta" operations */ + + /* GL_EXT_framebuffer_object */ + struct gl_renderbuffer *CurrentRenderbuffer; + + GLenum ErrorValue; /**< Last error code */ + + /** + * Recognize and silence repeated error debug messages in buggy apps. + */ + const char *ErrorDebugFmtString; + GLuint ErrorDebugCount; + + GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ + GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ + + GLboolean ViewportInitialized; /**< has viewport size been initialized? */ + + GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ + + /** \name Derived state */ + /*@{*/ + /** Bitwise-or of DD_* flags. Note that this bitfield may be used before + * state validation so they need to always be current. + */ + GLbitfield _TriangleCaps; + GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */ + GLfloat _EyeZDir[3]; + GLfloat _ModelViewInvScale; + GLboolean _NeedEyeCoords; + GLboolean _ForceEyeCoords; + + GLuint TextureStateTimestamp; /**< detect changes to shared state */ + + struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */ + struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */ + /**@}*/ + + struct gl_list_extensions *ListExt; /**< driver dlist extensions */ + + /** \name For debugging/development only */ + /*@{*/ + GLboolean FirstTimeCurrent; + /*@}*/ + + /** Dither disable via MESA_NO_DITHER env var */ + GLboolean NoDither; + + /** software compression/decompression supported or not */ + GLboolean Mesa_DXTn; + + GLboolean TextureFormatSupported[MESA_FORMAT_COUNT]; + + /** + * Use dp4 (rather than mul/mad) instructions for position + * transformation? + */ + GLboolean mvp_with_dp4; + + /** + * \name Hooks for module contexts. + * + * These will eventually live in the driver or elsewhere. + */ + /*@{*/ + void *swrast_context; + void *swsetup_context; + void *swtnl_context; + void *swtnl_im; + struct st_context *st; + void *aelt_context; + /*@}*/ +}; + + +#ifdef DEBUG +extern int MESA_VERBOSE; +extern int MESA_DEBUG_FLAGS; +# define MESA_FUNCTION __FUNCTION__ +#else +# define MESA_VERBOSE 0 +# define MESA_DEBUG_FLAGS 0 +# define MESA_FUNCTION "a function" +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + + +/** The MESA_VERBOSE var is a bitmask of these flags */ +enum _verbose +{ + VERBOSE_VARRAY = 0x0001, + VERBOSE_TEXTURE = 0x0002, + VERBOSE_MATERIAL = 0x0004, + VERBOSE_PIPELINE = 0x0008, + VERBOSE_DRIVER = 0x0010, + VERBOSE_STATE = 0x0020, + VERBOSE_API = 0x0040, + VERBOSE_DISPLAY_LIST = 0x0100, + VERBOSE_LIGHTING = 0x0200, + VERBOSE_PRIMS = 0x0400, + VERBOSE_VERTS = 0x0800, + VERBOSE_DISASSEM = 0x1000, + VERBOSE_DRAW = 0x2000, + VERBOSE_SWAPBUFFERS = 0x4000 +}; + + +/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */ +enum _debug +{ + DEBUG_ALWAYS_FLUSH = 0x1 +}; + + + +#endif /* MTYPES_H */ diff --git a/mesalib/src/mesa/main/state.c b/mesalib/src/mesa/main/state.c index c07e2c380..502c42929 100644 --- a/mesalib/src/mesa/main/state.c +++ b/mesalib/src/mesa/main/state.c @@ -1,732 +1,734 @@ -/* - * Mesa 3-D graphics library - * Version: 7.3 - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * \file state.c - * State management. - * - * This file manages recalculation of derived values in struct gl_context. - */ - - -#include "glheader.h" -#include "mtypes.h" -#include "context.h" -#include "debug.h" -#include "macros.h" -#include "ffvertex_prog.h" -#include "framebuffer.h" -#include "light.h" -#include "matrix.h" -#include "pixel.h" -#include "program/program.h" -#include "program/prog_parameter.h" -#include "state.h" -#include "stencil.h" -#include "texenvprogram.h" -#include "texobj.h" -#include "texstate.h" - - -static void -update_separate_specular(struct gl_context *ctx) -{ - if (NEED_SECONDARY_COLOR(ctx)) - ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; - else - ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR; -} - - -/** - * Compute the index of the last array element that can be safely accessed - * in a vertex array. We can really only do this when the array lives in - * a VBO. - * The array->_MaxElement field will be updated. - * Later in glDrawArrays/Elements/etc we can do some bounds checking. - */ -static void -compute_max_element(struct gl_client_array *array) -{ - assert(array->Enabled); - if (array->BufferObj->Name) { - GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr; - GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size; - - if (offset < obj_size) { - array->_MaxElement = (obj_size - offset + - array->StrideB - - array->_ElementSize) / array->StrideB; - } else { - array->_MaxElement = 0; - } - } - else { - /* user-space array, no idea how big it is */ - array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ - } -} - - -/** - * Helper for update_arrays(). - * \return min(current min, array->_MaxElement). - */ -static GLuint -update_min(GLuint min, struct gl_client_array *array) -{ - compute_max_element(array); - return MIN2(min, array->_MaxElement); -} - - -/** - * Update ctx->Array._MaxElement (the max legal index into all enabled arrays). - * Need to do this upon new array state or new buffer object state. - */ -static void -update_arrays( struct gl_context *ctx ) -{ - struct gl_array_object *arrayObj = ctx->Array.ArrayObj; - GLuint i, min = ~0; - - /* find min of _MaxElement values for all enabled arrays */ - - /* 0 */ - if (ctx->VertexProgram._Current - && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]); - } - else if (arrayObj->Vertex.Enabled) { - min = update_min(min, &arrayObj->Vertex); - } - - /* 1 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]); - } - /* no conventional vertex weight array */ - - /* 2 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]); - } - else if (arrayObj->Normal.Enabled) { - min = update_min(min, &arrayObj->Normal); - } - - /* 3 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]); - } - else if (arrayObj->Color.Enabled) { - min = update_min(min, &arrayObj->Color); - } - - /* 4 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]); - } - else if (arrayObj->SecondaryColor.Enabled) { - min = update_min(min, &arrayObj->SecondaryColor); - } - - /* 5 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]); - } - else if (arrayObj->FogCoord.Enabled) { - min = update_min(min, &arrayObj->FogCoord); - } - - /* 6 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]); - } - else if (arrayObj->Index.Enabled) { - min = update_min(min, &arrayObj->Index); - } - - /* 7 */ - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]); - } - - /* 8..15 */ - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) { - if (ctx->VertexProgram._Enabled - && arrayObj->VertexAttrib[i].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[i]); - } - else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits - && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) { - min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]); - } - } - - /* 16..31 */ - if (ctx->VertexProgram._Current) { - for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { - if (arrayObj->VertexAttrib[i].Enabled) { - min = update_min(min, &arrayObj->VertexAttrib[i]); - } - } - } - - if (arrayObj->EdgeFlag.Enabled) { - min = update_min(min, &arrayObj->EdgeFlag); - } - - /* _MaxElement is one past the last legal array element */ - arrayObj->_MaxElement = min; -} - - -/** - * Update the following fields: - * ctx->VertexProgram._Enabled - * ctx->FragmentProgram._Enabled - * ctx->ATIFragmentShader._Enabled - * This needs to be done before texture state validation. - */ -static void -update_program_enables(struct gl_context *ctx) -{ - /* These _Enabled flags indicate if the program is enabled AND valid. */ - ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled - && ctx->VertexProgram.Current->Base.Instructions; - ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled - && ctx->FragmentProgram.Current->Base.Instructions; - ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled - && ctx->ATIFragmentShader.Current->Instructions[0]; -} - - -/** - * Update vertex/fragment program state. In particular, update these fields: - * ctx->VertexProgram._Current - * ctx->VertexProgram._TnlProgram, - * These point to the highest priority enabled vertex/fragment program or are - * NULL if fixed-function processing is to be done. - * - * This function needs to be called after texture state validation in case - * we're generating a fragment program from fixed-function texture state. - * - * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex - * or fragment program is being used. - */ -static GLbitfield -update_program(struct gl_context *ctx) -{ - const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram; - const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram; - const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram; - const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; - const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; - const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; - GLbitfield new_state = 0x0; - - /* - * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current - * pointers to the programs that should be used for rendering. If either - * is NULL, use fixed-function code paths. - * - * These programs may come from several sources. The priority is as - * follows: - * 1. OpenGL 2.0/ARB vertex/fragment shaders - * 2. ARB/NV vertex/fragment programs - * 3. Programs derived from fixed-function state. - * - * Note: it's possible for a vertex shader to get used with a fragment - * program (and vice versa) here, but in practice that shouldn't ever - * come up, or matter. - */ - - if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) { - /* Use shader programs */ - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, - fsProg->FragmentProgram); - } - else if (ctx->FragmentProgram._Enabled) { - /* use user-defined vertex program */ - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, - ctx->FragmentProgram.Current); - } - else if (ctx->FragmentProgram._MaintainTexEnvProgram) { - /* Use fragment program generated from fixed-function state. - */ - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, - _mesa_get_fixed_func_fragment_program(ctx)); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, - ctx->FragmentProgram._Current); - } - else { - /* no fragment program */ - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); - } - - if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { - /* Use shader programs */ - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, - gsProg->GeometryProgram); - } else { - /* no fragment program */ - _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); - } - - /* Examine vertex program after fragment program as - * _mesa_get_fixed_func_vertex_program() needs to know active - * fragprog inputs. - */ - if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) { - /* Use shader programs */ - _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, - vsProg->VertexProgram); - } - else if (ctx->VertexProgram._Enabled) { - /* use user-defined vertex program */ - _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, - ctx->VertexProgram.Current); - } - else if (ctx->VertexProgram._MaintainTnlProgram) { - /* Use vertex program generated from fixed-function state. - */ - _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, - _mesa_get_fixed_func_vertex_program(ctx)); - _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, - ctx->VertexProgram._Current); - } - else { - /* no vertex program */ - _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); - } - - /* Let the driver know what's happening: - */ - if (ctx->FragmentProgram._Current != prevFP) { - new_state |= _NEW_PROGRAM; - if (ctx->Driver.BindProgram) { - ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, - (struct gl_program *) ctx->FragmentProgram._Current); - } - } - - if (ctx->GeometryProgram._Current != prevGP) { - new_state |= _NEW_PROGRAM; - if (ctx->Driver.BindProgram) { - ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM, - (struct gl_program *) ctx->GeometryProgram._Current); - } - } - - if (ctx->VertexProgram._Current != prevVP) { - new_state |= _NEW_PROGRAM; - if (ctx->Driver.BindProgram) { - ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, - (struct gl_program *) ctx->VertexProgram._Current); - } - } - - return new_state; -} - - -/** - * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0. - */ -static GLbitfield -update_program_constants(struct gl_context *ctx) -{ - GLbitfield new_state = 0x0; - - if (ctx->FragmentProgram._Current) { - const struct gl_program_parameter_list *params = - ctx->FragmentProgram._Current->Base.Parameters; - if (params && params->StateFlags & ctx->NewState) { - new_state |= _NEW_PROGRAM_CONSTANTS; - } - } - - if (ctx->GeometryProgram._Current) { - const struct gl_program_parameter_list *params = - ctx->GeometryProgram._Current->Base.Parameters; - /*FIXME: StateFlags is always 0 because we have unnamed constant - * not state changes */ - if (params /*&& params->StateFlags & ctx->NewState*/) { - new_state |= _NEW_PROGRAM_CONSTANTS; - } - } - - if (ctx->VertexProgram._Current) { - const struct gl_program_parameter_list *params = - ctx->VertexProgram._Current->Base.Parameters; - if (params && params->StateFlags & ctx->NewState) { - new_state |= _NEW_PROGRAM_CONSTANTS; - } - } - - return new_state; -} - - - - -static void -update_viewport_matrix(struct gl_context *ctx) -{ - const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF; - - ASSERT(depthMax > 0); - - /* Compute scale and bias values. This is really driver-specific - * and should be maintained elsewhere if at all. - * NOTE: RasterPos uses this. - */ - _math_matrix_viewport(&ctx->Viewport._WindowMap, - ctx->Viewport.X, ctx->Viewport.Y, - ctx->Viewport.Width, ctx->Viewport.Height, - ctx->Viewport.Near, ctx->Viewport.Far, - depthMax); -} - - -/** - * Update derived multisample state. - */ -static void -update_multisample(struct gl_context *ctx) -{ - ctx->Multisample._Enabled = GL_FALSE; - if (ctx->Multisample.Enabled && - ctx->DrawBuffer && - ctx->DrawBuffer->Visual.sampleBuffers) - ctx->Multisample._Enabled = GL_TRUE; -} - - -/** - * Update derived color/blend/logicop state. - */ -static void -update_color(struct gl_context *ctx) -{ - /* This is needed to support 1.1's RGB logic ops AND - * 1.0's blending logicops. - */ - ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx); -} - - -/* - * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET - * in ctx->_TriangleCaps if needed. - */ -static void -update_polygon(struct gl_context *ctx) -{ - ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET); - - if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; - - if ( ctx->Polygon.OffsetPoint - || ctx->Polygon.OffsetLine - || ctx->Polygon.OffsetFill) - ctx->_TriangleCaps |= DD_TRI_OFFSET; -} - - -/** - * Update the ctx->_TriangleCaps bitfield. - * XXX that bitfield should really go away someday! - * This function must be called after other update_*() functions since - * there are dependencies on some other derived values. - */ -#if 0 -static void -update_tricaps(struct gl_context *ctx, GLbitfield new_state) -{ - ctx->_TriangleCaps = 0; - - /* - * Points - */ - if (1/*new_state & _NEW_POINT*/) { - if (ctx->Point.SmoothFlag) - ctx->_TriangleCaps |= DD_POINT_SMOOTH; - if (ctx->Point._Attenuated) - ctx->_TriangleCaps |= DD_POINT_ATTEN; - } - - /* - * Lines - */ - if (1/*new_state & _NEW_LINE*/) { - if (ctx->Line.SmoothFlag) - ctx->_TriangleCaps |= DD_LINE_SMOOTH; - if (ctx->Line.StippleFlag) - ctx->_TriangleCaps |= DD_LINE_STIPPLE; - } - - /* - * Polygons - */ - if (1/*new_state & _NEW_POLYGON*/) { - if (ctx->Polygon.SmoothFlag) - ctx->_TriangleCaps |= DD_TRI_SMOOTH; - if (ctx->Polygon.StippleFlag) - ctx->_TriangleCaps |= DD_TRI_STIPPLE; - if (ctx->Polygon.FrontMode != GL_FILL - || ctx->Polygon.BackMode != GL_FILL) - ctx->_TriangleCaps |= DD_TRI_UNFILLED; - if (ctx->Polygon.CullFlag - && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) - ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; - if (ctx->Polygon.OffsetPoint || - ctx->Polygon.OffsetLine || - ctx->Polygon.OffsetFill) - ctx->_TriangleCaps |= DD_TRI_OFFSET; - } - - /* - * Lighting and shading - */ - if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) - ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE; - if (ctx->Light.ShadeModel == GL_FLAT) - ctx->_TriangleCaps |= DD_FLATSHADE; - if (NEED_SECONDARY_COLOR(ctx)) - ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; - - /* - * Stencil - */ - if (ctx->Stencil._TestTwoSide) - ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL; -} -#endif - - -/** - * Compute derived GL state. - * If __struct gl_contextRec::NewState is non-zero then this function \b must - * be called before rendering anything. - * - * Calls dd_function_table::UpdateState to perform any internal state - * management necessary. - * - * \sa _mesa_update_modelview_project(), _mesa_update_texture(), - * _mesa_update_buffer_bounds(), - * _mesa_update_lighting() and _mesa_update_tnl_spaces(). - */ -void -_mesa_update_state_locked( struct gl_context *ctx ) -{ - GLbitfield new_state = ctx->NewState; - GLbitfield prog_flags = _NEW_PROGRAM; - GLbitfield new_prog_state = 0x0; - - if (new_state == _NEW_CURRENT_ATTRIB) - goto out; - - if (MESA_VERBOSE & VERBOSE_STATE) - _mesa_print_state("_mesa_update_state", new_state); - - /* Determine which state flags effect vertex/fragment program state */ - if (ctx->FragmentProgram._MaintainTexEnvProgram) { - prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG | - _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE | - _NEW_PROGRAM); - } - if (ctx->VertexProgram._MaintainTnlProgram) { - prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | - _NEW_TRANSFORM | _NEW_POINT | - _NEW_FOG | _NEW_LIGHT | - _MESA_NEW_NEED_EYE_COORDS); - } - - /* - * Now update derived state info - */ - - if (new_state & prog_flags) - update_program_enables( ctx ); - - if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) - _mesa_update_modelview_project( ctx, new_state ); - - if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) - _mesa_update_texture( ctx, new_state ); - - if (new_state & _NEW_BUFFERS) - _mesa_update_framebuffer(ctx); - - if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) - _mesa_update_draw_buffer_bounds( ctx ); - - if (new_state & _NEW_POLYGON) - update_polygon( ctx ); - - if (new_state & _NEW_LIGHT) - _mesa_update_lighting( ctx ); - - if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) - _mesa_update_stencil( ctx ); - - if (new_state & _MESA_NEW_TRANSFER_STATE) - _mesa_update_pixel( ctx, new_state ); - - if (new_state & _DD_NEW_SEPARATE_SPECULAR) - update_separate_specular( ctx ); - - if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT)) - update_viewport_matrix(ctx); - - if (new_state & _NEW_MULTISAMPLE) - update_multisample( ctx ); - - if (new_state & _NEW_COLOR) - update_color( ctx ); - -#if 0 - if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT - | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR)) - update_tricaps( ctx, new_state ); -#endif - - /* ctx->_NeedEyeCoords is now up to date. - * - * If the truth value of this variable has changed, update for the - * new lighting space and recompute the positions of lights and the - * normal transform. - * - * If the lighting space hasn't changed, may still need to recompute - * light positions & normal transforms for other reasons. - */ - if (new_state & _MESA_NEW_NEED_EYE_COORDS) - _mesa_update_tnl_spaces( ctx, new_state ); - - if (new_state & prog_flags) { - /* When we generate programs from fixed-function vertex/fragment state - * this call may generate/bind a new program. If so, we need to - * propogate the _NEW_PROGRAM flag to the driver. - */ - new_prog_state |= update_program( ctx ); - } - - if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) - update_arrays( ctx ); - - out: - new_prog_state |= update_program_constants(ctx); - - /* - * Give the driver a chance to act upon the new_state flags. - * The driver might plug in different span functions, for example. - * Also, this is where the driver can invalidate the state of any - * active modules (such as swrast_setup, swrast, tnl, etc). - * - * Set ctx->NewState to zero to avoid recursion if - * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?) - */ - new_state = ctx->NewState | new_prog_state; - ctx->NewState = 0; - ctx->Driver.UpdateState(ctx, new_state); - ctx->Array.NewState = 0; -} - - -/* This is the usual entrypoint for state updates: - */ -void -_mesa_update_state( struct gl_context *ctx ) -{ - _mesa_lock_context_textures(ctx); - _mesa_update_state_locked(ctx); - _mesa_unlock_context_textures(ctx); -} - - - - -/** - * Want to figure out which fragment program inputs are actually - * constant/current values from ctx->Current. These should be - * referenced as a tracked state variable rather than a fragment - * program input, to save the overhead of putting a constant value in - * every submitted vertex, transferring it to hardware, interpolating - * it across the triangle, etc... - * - * When there is a VP bound, just use vp->outputs. But when we're - * generating vp from fixed function state, basically want to - * calculate: - * - * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | - * potential_vp_outputs ) - * - * Where potential_vp_outputs is calculated by looking at enabled - * texgen, etc. - * - * The generated fragment program should then only declare inputs that - * may vary or otherwise differ from the ctx->Current values. - * Otherwise, the fp should track them as state values instead. - */ -void -_mesa_set_varying_vp_inputs( struct gl_context *ctx, - GLbitfield varying_inputs ) -{ - if (ctx->varying_vp_inputs != varying_inputs) { - ctx->varying_vp_inputs = varying_inputs; - ctx->NewState |= _NEW_ARRAY; - /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/ - } -} - - -/** - * Used by drivers to tell core Mesa that the driver is going to - * install/ use its own vertex program. In particular, this will - * prevent generated fragment programs from using state vars instead - * of ordinary varyings/inputs. - */ -void -_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag) -{ - if (ctx->VertexProgram._Overriden != flag) { - ctx->VertexProgram._Overriden = flag; - - /* Set one of the bits which will trigger fragment program - * regeneration: - */ - ctx->NewState |= _NEW_PROGRAM; - } -} +/* + * Mesa 3-D graphics library + * Version: 7.3 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file state.c + * State management. + * + * This file manages recalculation of derived values in struct gl_context. + */ + + +#include "glheader.h" +#include "mtypes.h" +#include "context.h" +#include "debug.h" +#include "macros.h" +#include "ffvertex_prog.h" +#include "framebuffer.h" +#include "light.h" +#include "matrix.h" +#include "pixel.h" +#include "program/program.h" +#include "program/prog_parameter.h" +#include "state.h" +#include "stencil.h" +#include "texenvprogram.h" +#include "texobj.h" +#include "texstate.h" + + +static void +update_separate_specular(struct gl_context *ctx) +{ + if (NEED_SECONDARY_COLOR(ctx)) + ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; + else + ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR; +} + + +/** + * Compute the index of the last array element that can be safely accessed + * in a vertex array. We can really only do this when the array lives in + * a VBO. + * The array->_MaxElement field will be updated. + * Later in glDrawArrays/Elements/etc we can do some bounds checking. + */ +static void +compute_max_element(struct gl_client_array *array) +{ + assert(array->Enabled); + if (array->BufferObj->Name) { + GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr; + GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size; + + if (offset < obj_size) { + array->_MaxElement = (obj_size - offset + + array->StrideB - + array->_ElementSize) / array->StrideB; + } else { + array->_MaxElement = 0; + } + } + else { + /* user-space array, no idea how big it is */ + array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ + } +} + + +/** + * Helper for update_arrays(). + * \return min(current min, array->_MaxElement). + */ +static GLuint +update_min(GLuint min, struct gl_client_array *array) +{ + compute_max_element(array); + return MIN2(min, array->_MaxElement); +} + + +/** + * Update ctx->Array._MaxElement (the max legal index into all enabled arrays). + * Need to do this upon new array state or new buffer object state. + */ +static void +update_arrays( struct gl_context *ctx ) +{ + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i, min = ~0; + + /* find min of _MaxElement values for all enabled arrays */ + + /* 0 */ + if (ctx->VertexProgram._Current + && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]); + } + else if (arrayObj->Vertex.Enabled) { + min = update_min(min, &arrayObj->Vertex); + } + + /* 1 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]); + } + /* no conventional vertex weight array */ + + /* 2 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]); + } + else if (arrayObj->Normal.Enabled) { + min = update_min(min, &arrayObj->Normal); + } + + /* 3 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]); + } + else if (arrayObj->Color.Enabled) { + min = update_min(min, &arrayObj->Color); + } + + /* 4 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]); + } + else if (arrayObj->SecondaryColor.Enabled) { + min = update_min(min, &arrayObj->SecondaryColor); + } + + /* 5 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]); + } + else if (arrayObj->FogCoord.Enabled) { + min = update_min(min, &arrayObj->FogCoord); + } + + /* 6 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]); + } + else if (arrayObj->Index.Enabled) { + min = update_min(min, &arrayObj->Index); + } + + /* 7 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]); + } + + /* 8..15 */ + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) { + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); + } + else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits + && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) { + min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]); + } + } + + /* 16..31 */ + if (ctx->VertexProgram._Current) { + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { + if (arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); + } + } + } + + if (arrayObj->EdgeFlag.Enabled) { + min = update_min(min, &arrayObj->EdgeFlag); + } + + /* _MaxElement is one past the last legal array element */ + arrayObj->_MaxElement = min; +} + + +/** + * Update the following fields: + * ctx->VertexProgram._Enabled + * ctx->FragmentProgram._Enabled + * ctx->ATIFragmentShader._Enabled + * This needs to be done before texture state validation. + */ +static void +update_program_enables(struct gl_context *ctx) +{ + /* These _Enabled flags indicate if the program is enabled AND valid. */ + ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled + && ctx->VertexProgram.Current->Base.Instructions; + ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled + && ctx->FragmentProgram.Current->Base.Instructions; + ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled + && ctx->ATIFragmentShader.Current->Instructions[0]; +} + + +/** + * Update vertex/fragment program state. In particular, update these fields: + * ctx->VertexProgram._Current + * ctx->VertexProgram._TnlProgram, + * These point to the highest priority enabled vertex/fragment program or are + * NULL if fixed-function processing is to be done. + * + * This function needs to be called after texture state validation in case + * we're generating a fragment program from fixed-function texture state. + * + * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex + * or fragment program is being used. + */ +static GLbitfield +update_program(struct gl_context *ctx) +{ + const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram; + const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram; + const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram; + const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; + const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; + const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; + GLbitfield new_state = 0x0; + + /* + * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current + * pointers to the programs that should be used for rendering. If either + * is NULL, use fixed-function code paths. + * + * These programs may come from several sources. The priority is as + * follows: + * 1. OpenGL 2.0/ARB vertex/fragment shaders + * 2. ARB/NV vertex/fragment programs + * 3. Programs derived from fixed-function state. + * + * Note: it's possible for a vertex shader to get used with a fragment + * program (and vice versa) here, but in practice that shouldn't ever + * come up, or matter. + */ + + if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) { + /* Use shader programs */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + fsProg->FragmentProgram); + } + else if (ctx->FragmentProgram._Enabled) { + /* use user-defined vertex program */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + ctx->FragmentProgram.Current); + } + else if (ctx->FragmentProgram._MaintainTexEnvProgram) { + /* Use fragment program generated from fixed-function state. + */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + _mesa_get_fixed_func_fragment_program(ctx)); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, + ctx->FragmentProgram._Current); + } + else { + /* no fragment program */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); + } + + if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { + /* Use shader programs */ + _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, + gsProg->GeometryProgram); + } else { + /* no fragment program */ + _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); + } + + /* Examine vertex program after fragment program as + * _mesa_get_fixed_func_vertex_program() needs to know active + * fragprog inputs. + */ + if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) { + /* Use shader programs */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + vsProg->VertexProgram); + } + else if (ctx->VertexProgram._Enabled) { + /* use user-defined vertex program */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + ctx->VertexProgram.Current); + } + else if (ctx->VertexProgram._MaintainTnlProgram) { + /* Use vertex program generated from fixed-function state. + */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + _mesa_get_fixed_func_vertex_program(ctx)); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, + ctx->VertexProgram._Current); + } + else { + /* no vertex program */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); + } + + /* Let the driver know what's happening: + */ + if (ctx->FragmentProgram._Current != prevFP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, + (struct gl_program *) ctx->FragmentProgram._Current); + } + } + + if (ctx->GeometryProgram._Current != prevGP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM, + (struct gl_program *) ctx->GeometryProgram._Current); + } + } + + if (ctx->VertexProgram._Current != prevVP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, + (struct gl_program *) ctx->VertexProgram._Current); + } + } + + return new_state; +} + + +/** + * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0. + */ +static GLbitfield +update_program_constants(struct gl_context *ctx) +{ + GLbitfield new_state = 0x0; + + if (ctx->FragmentProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->FragmentProgram._Current->Base.Parameters; + if (params && params->StateFlags & ctx->NewState) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + if (ctx->GeometryProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->GeometryProgram._Current->Base.Parameters; + /*FIXME: StateFlags is always 0 because we have unnamed constant + * not state changes */ + if (params /*&& params->StateFlags & ctx->NewState*/) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + if (ctx->VertexProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->VertexProgram._Current->Base.Parameters; + if (params && params->StateFlags & ctx->NewState) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + return new_state; +} + + + + +static void +update_viewport_matrix(struct gl_context *ctx) +{ + const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF; + + ASSERT(depthMax > 0); + + /* Compute scale and bias values. This is really driver-specific + * and should be maintained elsewhere if at all. + * NOTE: RasterPos uses this. + */ + _math_matrix_viewport(&ctx->Viewport._WindowMap, + ctx->Viewport.X, ctx->Viewport.Y, + ctx->Viewport.Width, ctx->Viewport.Height, + ctx->Viewport.Near, ctx->Viewport.Far, + depthMax); +} + + +/** + * Update derived multisample state. + */ +static void +update_multisample(struct gl_context *ctx) +{ + ctx->Multisample._Enabled = GL_FALSE; + if (ctx->Multisample.Enabled && + ctx->DrawBuffer && + ctx->DrawBuffer->Visual.sampleBuffers) + ctx->Multisample._Enabled = GL_TRUE; +} + + +/** + * Update derived color/blend/logicop state. + */ +static void +update_color(struct gl_context *ctx) +{ + /* This is needed to support 1.1's RGB logic ops AND + * 1.0's blending logicops. + */ + ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx); +} + + +/* + * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET + * in ctx->_TriangleCaps if needed. + */ +static void +update_polygon(struct gl_context *ctx) +{ + ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET); + + if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; + + if ( ctx->Polygon.OffsetPoint + || ctx->Polygon.OffsetLine + || ctx->Polygon.OffsetFill) + ctx->_TriangleCaps |= DD_TRI_OFFSET; +} + + +/** + * Update the ctx->_TriangleCaps bitfield. + * XXX that bitfield should really go away someday! + * This function must be called after other update_*() functions since + * there are dependencies on some other derived values. + */ +#if 0 +static void +update_tricaps(struct gl_context *ctx, GLbitfield new_state) +{ + ctx->_TriangleCaps = 0; + + /* + * Points + */ + if (1/*new_state & _NEW_POINT*/) { + if (ctx->Point.SmoothFlag) + ctx->_TriangleCaps |= DD_POINT_SMOOTH; + if (ctx->Point._Attenuated) + ctx->_TriangleCaps |= DD_POINT_ATTEN; + } + + /* + * Lines + */ + if (1/*new_state & _NEW_LINE*/) { + if (ctx->Line.SmoothFlag) + ctx->_TriangleCaps |= DD_LINE_SMOOTH; + if (ctx->Line.StippleFlag) + ctx->_TriangleCaps |= DD_LINE_STIPPLE; + } + + /* + * Polygons + */ + if (1/*new_state & _NEW_POLYGON*/) { + if (ctx->Polygon.SmoothFlag) + ctx->_TriangleCaps |= DD_TRI_SMOOTH; + if (ctx->Polygon.StippleFlag) + ctx->_TriangleCaps |= DD_TRI_STIPPLE; + if (ctx->Polygon.FrontMode != GL_FILL + || ctx->Polygon.BackMode != GL_FILL) + ctx->_TriangleCaps |= DD_TRI_UNFILLED; + if (ctx->Polygon.CullFlag + && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; + if (ctx->Polygon.OffsetPoint || + ctx->Polygon.OffsetLine || + ctx->Polygon.OffsetFill) + ctx->_TriangleCaps |= DD_TRI_OFFSET; + } + + /* + * Lighting and shading + */ + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) + ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE; + if (ctx->Light.ShadeModel == GL_FLAT) + ctx->_TriangleCaps |= DD_FLATSHADE; + if (NEED_SECONDARY_COLOR(ctx)) + ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; + + /* + * Stencil + */ + if (ctx->Stencil._TestTwoSide) + ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL; +} +#endif + + +/** + * Compute derived GL state. + * If __struct gl_contextRec::NewState is non-zero then this function \b must + * be called before rendering anything. + * + * Calls dd_function_table::UpdateState to perform any internal state + * management necessary. + * + * \sa _mesa_update_modelview_project(), _mesa_update_texture(), + * _mesa_update_buffer_bounds(), + * _mesa_update_lighting() and _mesa_update_tnl_spaces(). + */ +void +_mesa_update_state_locked( struct gl_context *ctx ) +{ + GLbitfield new_state = ctx->NewState; + GLbitfield prog_flags = _NEW_PROGRAM; + GLbitfield new_prog_state = 0x0; + + if (new_state == _NEW_CURRENT_ATTRIB) + goto out; + + if (MESA_VERBOSE & VERBOSE_STATE) + _mesa_print_state("_mesa_update_state", new_state); + + /* Determine which state flags effect vertex/fragment program state */ + if (ctx->FragmentProgram._MaintainTexEnvProgram) { + prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG | + _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE | + _NEW_PROGRAM); + } + if (ctx->VertexProgram._MaintainTnlProgram) { + prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | + _NEW_TRANSFORM | _NEW_POINT | + _NEW_FOG | _NEW_LIGHT | + _MESA_NEW_NEED_EYE_COORDS); + } + + /* + * Now update derived state info + */ + + if (new_state & prog_flags) + update_program_enables( ctx ); + + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + _mesa_update_modelview_project( ctx, new_state ); + + if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) + _mesa_update_texture( ctx, new_state ); + + if (new_state & _NEW_BUFFERS) + _mesa_update_framebuffer(ctx); + + if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) + _mesa_update_draw_buffer_bounds( ctx ); + + if (new_state & _NEW_POLYGON) + update_polygon( ctx ); + + if (new_state & _NEW_LIGHT) + _mesa_update_lighting( ctx ); + + if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) + _mesa_update_stencil( ctx ); + + if (new_state & _MESA_NEW_TRANSFER_STATE) + _mesa_update_pixel( ctx, new_state ); + + if (new_state & _DD_NEW_SEPARATE_SPECULAR) + update_separate_specular( ctx ); + + if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT)) + update_viewport_matrix(ctx); + + if (new_state & _NEW_MULTISAMPLE) + update_multisample( ctx ); + + if (new_state & _NEW_COLOR) + update_color( ctx ); + +#if 0 + if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT + | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR)) + update_tricaps( ctx, new_state ); +#endif + + /* ctx->_NeedEyeCoords is now up to date. + * + * If the truth value of this variable has changed, update for the + * new lighting space and recompute the positions of lights and the + * normal transform. + * + * If the lighting space hasn't changed, may still need to recompute + * light positions & normal transforms for other reasons. + */ + if (new_state & _MESA_NEW_NEED_EYE_COORDS) + _mesa_update_tnl_spaces( ctx, new_state ); + + if (new_state & prog_flags) { + /* When we generate programs from fixed-function vertex/fragment state + * this call may generate/bind a new program. If so, we need to + * propogate the _NEW_PROGRAM flag to the driver. + */ + new_prog_state |= update_program( ctx ); + } + + if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) + update_arrays( ctx ); + + out: + new_prog_state |= update_program_constants(ctx); + + /* + * Give the driver a chance to act upon the new_state flags. + * The driver might plug in different span functions, for example. + * Also, this is where the driver can invalidate the state of any + * active modules (such as swrast_setup, swrast, tnl, etc). + * + * Set ctx->NewState to zero to avoid recursion if + * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?) + */ + new_state = ctx->NewState | new_prog_state; + ctx->NewState = 0; + ctx->Driver.UpdateState(ctx, new_state); + ctx->Array.NewState = 0; + if (!ctx->Array.RebindArrays) + ctx->Array.RebindArrays = (new_state & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; +} + + +/* This is the usual entrypoint for state updates: + */ +void +_mesa_update_state( struct gl_context *ctx ) +{ + _mesa_lock_context_textures(ctx); + _mesa_update_state_locked(ctx); + _mesa_unlock_context_textures(ctx); +} + + + + +/** + * Want to figure out which fragment program inputs are actually + * constant/current values from ctx->Current. These should be + * referenced as a tracked state variable rather than a fragment + * program input, to save the overhead of putting a constant value in + * every submitted vertex, transferring it to hardware, interpolating + * it across the triangle, etc... + * + * When there is a VP bound, just use vp->outputs. But when we're + * generating vp from fixed function state, basically want to + * calculate: + * + * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | + * potential_vp_outputs ) + * + * Where potential_vp_outputs is calculated by looking at enabled + * texgen, etc. + * + * The generated fragment program should then only declare inputs that + * may vary or otherwise differ from the ctx->Current values. + * Otherwise, the fp should track them as state values instead. + */ +void +_mesa_set_varying_vp_inputs( struct gl_context *ctx, + GLbitfield varying_inputs ) +{ + if (ctx->varying_vp_inputs != varying_inputs) { + ctx->varying_vp_inputs = varying_inputs; + ctx->NewState |= _NEW_ARRAY; + /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/ + } +} + + +/** + * Used by drivers to tell core Mesa that the driver is going to + * install/ use its own vertex program. In particular, this will + * prevent generated fragment programs from using state vars instead + * of ordinary varyings/inputs. + */ +void +_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag) +{ + if (ctx->VertexProgram._Overriden != flag) { + ctx->VertexProgram._Overriden = flag; + + /* Set one of the bits which will trigger fragment program + * regeneration: + */ + ctx->NewState |= _NEW_PROGRAM; + } +} diff --git a/mesalib/src/mesa/state_tracker/st_atom_blend.c b/mesalib/src/mesa/state_tracker/st_atom_blend.c index 26bb3dab9..fb1c7a4ef 100644 --- a/mesalib/src/mesa/state_tracker/st_atom_blend.c +++ b/mesalib/src/mesa/state_tracker/st_atom_blend.c @@ -1,299 +1,302 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Brian Paul - */ - - -#include "st_context.h" -#include "st_atom.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "cso_cache/cso_context.h" - -#include "main/macros.h" - -/** - * Convert GLenum blend tokens to pipe tokens. - * Both blend factors and blend funcs are accepted. - */ -static GLuint -translate_blend(GLenum blend) -{ - switch (blend) { - /* blend functions */ - case GL_FUNC_ADD: - return PIPE_BLEND_ADD; - case GL_FUNC_SUBTRACT: - return PIPE_BLEND_SUBTRACT; - case GL_FUNC_REVERSE_SUBTRACT: - return PIPE_BLEND_REVERSE_SUBTRACT; - case GL_MIN: - return PIPE_BLEND_MIN; - case GL_MAX: - return PIPE_BLEND_MAX; - - /* blend factors */ - case GL_ONE: - return PIPE_BLENDFACTOR_ONE; - case GL_SRC_COLOR: - return PIPE_BLENDFACTOR_SRC_COLOR; - case GL_SRC_ALPHA: - return PIPE_BLENDFACTOR_SRC_ALPHA; - case GL_DST_ALPHA: - return PIPE_BLENDFACTOR_DST_ALPHA; - case GL_DST_COLOR: - return PIPE_BLENDFACTOR_DST_COLOR; - case GL_SRC_ALPHA_SATURATE: - return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return PIPE_BLENDFACTOR_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return PIPE_BLENDFACTOR_CONST_ALPHA; - /* - return PIPE_BLENDFACTOR_SRC1_COLOR; - return PIPE_BLENDFACTOR_SRC1_ALPHA; - */ - case GL_ZERO: - return PIPE_BLENDFACTOR_ZERO; - case GL_ONE_MINUS_SRC_COLOR: - return PIPE_BLENDFACTOR_INV_SRC_COLOR; - case GL_ONE_MINUS_SRC_ALPHA: - return PIPE_BLENDFACTOR_INV_SRC_ALPHA; - case GL_ONE_MINUS_DST_COLOR: - return PIPE_BLENDFACTOR_INV_DST_COLOR; - case GL_ONE_MINUS_DST_ALPHA: - return PIPE_BLENDFACTOR_INV_DST_ALPHA; - case GL_ONE_MINUS_CONSTANT_COLOR: - return PIPE_BLENDFACTOR_INV_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return PIPE_BLENDFACTOR_INV_CONST_ALPHA; - /* - return PIPE_BLENDFACTOR_INV_SRC1_COLOR; - return PIPE_BLENDFACTOR_INV_SRC1_ALPHA; - */ - default: - assert("invalid GL token in translate_blend()" == NULL); - return 0; - } -} - - -/** - * Convert GLenum logicop tokens to pipe tokens. - */ -static GLuint -translate_logicop(GLenum logicop) -{ - switch (logicop) { - case GL_CLEAR: - return PIPE_LOGICOP_CLEAR; - case GL_NOR: - return PIPE_LOGICOP_NOR; - case GL_AND_INVERTED: - return PIPE_LOGICOP_AND_INVERTED; - case GL_COPY_INVERTED: - return PIPE_LOGICOP_COPY_INVERTED; - case GL_AND_REVERSE: - return PIPE_LOGICOP_AND_REVERSE; - case GL_INVERT: - return PIPE_LOGICOP_INVERT; - case GL_XOR: - return PIPE_LOGICOP_XOR; - case GL_NAND: - return PIPE_LOGICOP_NAND; - case GL_AND: - return PIPE_LOGICOP_AND; - case GL_EQUIV: - return PIPE_LOGICOP_EQUIV; - case GL_NOOP: - return PIPE_LOGICOP_NOOP; - case GL_OR_INVERTED: - return PIPE_LOGICOP_OR_INVERTED; - case GL_COPY: - return PIPE_LOGICOP_COPY; - case GL_OR_REVERSE: - return PIPE_LOGICOP_OR_REVERSE; - case GL_OR: - return PIPE_LOGICOP_OR; - case GL_SET: - return PIPE_LOGICOP_SET; - default: - assert("invalid GL token in translate_logicop()" == NULL); - return 0; - } -} - -/** - * Figure out if colormasks are different per rt. - */ -static GLboolean -colormask_per_rt(struct gl_context *ctx) -{ - /* a bit suboptimal have to compare lots of values */ - unsigned i; - for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) { - if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) { - return GL_TRUE; - } - } - return GL_FALSE; -} - -/** - * Figure out if blend enables/state are different per rt. - */ -static GLboolean -blend_per_rt(struct gl_context *ctx) -{ - if (ctx->Color.BlendEnabled && - (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) { - /* This can only happen if GL_EXT_draw_buffers2 is enabled */ - return GL_TRUE; - } - if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) { - /* this can only happen if GL_ARB_draw_buffers_blend is enabled */ - return GL_TRUE; - } - return GL_FALSE; -} - -static void -update_blend( struct st_context *st ) -{ - struct pipe_blend_state *blend = &st->state.blend; - unsigned num_state = 1; - unsigned i; - - memset(blend, 0, sizeof(*blend)); - - if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) { - num_state = st->ctx->Const.MaxDrawBuffers; - blend->independent_blend_enable = 1; - } - /* Note it is impossible to correctly deal with EXT_blend_logic_op and - EXT_draw_buffers2/EXT_blend_equation_separate at the same time. - These combinations would require support for per-rt logicop enables - and separate alpha/rgb logicop/blend support respectively. Neither - possible in gallium nor most hardware. Assume these combinations - don't happen. */ - if (st->ctx->Color.ColorLogicOpEnabled || - (st->ctx->Color.BlendEnabled && - st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) { - /* logicop enabled */ - blend->logicop_enable = 1; - blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); - } - else if (st->ctx->Color.BlendEnabled) { - /* blending enabled */ - for (i = 0; i < num_state; i++) { - - blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1; - - blend->rt[i].rgb_func = - translate_blend(st->ctx->Color.Blend[i].EquationRGB); - - if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN || - st->ctx->Color.Blend[i].EquationRGB == GL_MAX) { - /* Min/max are special */ - blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; - } - else { - blend->rt[i].rgb_src_factor = - translate_blend(st->ctx->Color.Blend[i].SrcRGB); - blend->rt[i].rgb_dst_factor = - translate_blend(st->ctx->Color.Blend[i].DstRGB); - } - - blend->rt[i].alpha_func = - translate_blend(st->ctx->Color.Blend[i].EquationA); - - if (st->ctx->Color.Blend[i].EquationA == GL_MIN || - st->ctx->Color.Blend[i].EquationA == GL_MAX) { - /* Min/max are special */ - blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; - } - else { - blend->rt[i].alpha_src_factor = - translate_blend(st->ctx->Color.Blend[i].SrcA); - blend->rt[i].alpha_dst_factor = - translate_blend(st->ctx->Color.Blend[i].DstA); - } - } - } - else { - /* no blending / logicop */ - } - - /* Colormask - maybe reverse these bits? */ - for (i = 0; i < num_state; i++) { - if (st->ctx->Color.ColorMask[i][0]) - blend->rt[i].colormask |= PIPE_MASK_R; - if (st->ctx->Color.ColorMask[i][1]) - blend->rt[i].colormask |= PIPE_MASK_G; - if (st->ctx->Color.ColorMask[i][2]) - blend->rt[i].colormask |= PIPE_MASK_B; - if (st->ctx->Color.ColorMask[i][3]) - blend->rt[i].colormask |= PIPE_MASK_A; - } - - if (st->ctx->Color.DitherFlag) - blend->dither = 1; - - if (st->ctx->Multisample.Enabled) { - /* unlike in gallium/d3d10 these operations are only performed - if msaa is enabled */ - if (st->ctx->Multisample.SampleAlphaToCoverage) - blend->alpha_to_coverage = 1; - if (st->ctx->Multisample.SampleAlphaToOne) - blend->alpha_to_one = 1; - } - - cso_set_blend(st->cso_context, blend); - - { - struct pipe_blend_color bc; - COPY_4FV(bc.color, st->ctx->Color.BlendColor); - cso_set_blend_color(st->cso_context, &bc); - } -} - - -const struct st_tracked_state st_update_blend = { - "st_update_blend", /* name */ - { /* dirty */ - (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */ - 0, /* st */ - }, - update_blend, /* update */ -}; +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "st_context.h" +#include "st_atom.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" + +#include "main/macros.h" + +/** + * Convert GLenum blend tokens to pipe tokens. + * Both blend factors and blend funcs are accepted. + */ +static GLuint +translate_blend(GLenum blend) +{ + switch (blend) { + /* blend functions */ + case GL_FUNC_ADD: + return PIPE_BLEND_ADD; + case GL_FUNC_SUBTRACT: + return PIPE_BLEND_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return PIPE_BLEND_REVERSE_SUBTRACT; + case GL_MIN: + return PIPE_BLEND_MIN; + case GL_MAX: + return PIPE_BLEND_MAX; + + /* blend factors */ + case GL_ONE: + return PIPE_BLENDFACTOR_ONE; + case GL_SRC_COLOR: + return PIPE_BLENDFACTOR_SRC_COLOR; + case GL_SRC_ALPHA: + return PIPE_BLENDFACTOR_SRC_ALPHA; + case GL_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_ALPHA; + case GL_DST_COLOR: + return PIPE_BLENDFACTOR_DST_COLOR; + case GL_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_SRC1_COLOR; + return PIPE_BLENDFACTOR_SRC1_ALPHA; + */ + case GL_ZERO: + return PIPE_BLENDFACTOR_ZERO; + case GL_ONE_MINUS_SRC_COLOR: + return PIPE_BLENDFACTOR_INV_SRC_COLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return PIPE_BLENDFACTOR_INV_SRC_ALPHA; + case GL_ONE_MINUS_DST_COLOR: + return PIPE_BLENDFACTOR_INV_DST_COLOR; + case GL_ONE_MINUS_DST_ALPHA: + return PIPE_BLENDFACTOR_INV_DST_ALPHA; + case GL_ONE_MINUS_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_INV_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_INV_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_INV_SRC1_COLOR; + return PIPE_BLENDFACTOR_INV_SRC1_ALPHA; + */ + default: + assert("invalid GL token in translate_blend()" == NULL); + return 0; + } +} + + +/** + * Convert GLenum logicop tokens to pipe tokens. + */ +static GLuint +translate_logicop(GLenum logicop) +{ + switch (logicop) { + case GL_CLEAR: + return PIPE_LOGICOP_CLEAR; + case GL_NOR: + return PIPE_LOGICOP_NOR; + case GL_AND_INVERTED: + return PIPE_LOGICOP_AND_INVERTED; + case GL_COPY_INVERTED: + return PIPE_LOGICOP_COPY_INVERTED; + case GL_AND_REVERSE: + return PIPE_LOGICOP_AND_REVERSE; + case GL_INVERT: + return PIPE_LOGICOP_INVERT; + case GL_XOR: + return PIPE_LOGICOP_XOR; + case GL_NAND: + return PIPE_LOGICOP_NAND; + case GL_AND: + return PIPE_LOGICOP_AND; + case GL_EQUIV: + return PIPE_LOGICOP_EQUIV; + case GL_NOOP: + return PIPE_LOGICOP_NOOP; + case GL_OR_INVERTED: + return PIPE_LOGICOP_OR_INVERTED; + case GL_COPY: + return PIPE_LOGICOP_COPY; + case GL_OR_REVERSE: + return PIPE_LOGICOP_OR_REVERSE; + case GL_OR: + return PIPE_LOGICOP_OR; + case GL_SET: + return PIPE_LOGICOP_SET; + default: + assert("invalid GL token in translate_logicop()" == NULL); + return 0; + } +} + +/** + * Figure out if colormasks are different per rt. + */ +static GLboolean +colormask_per_rt(struct gl_context *ctx) +{ + /* a bit suboptimal have to compare lots of values */ + unsigned i; + for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) { + if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) { + return GL_TRUE; + } + } + return GL_FALSE; +} + +/** + * Figure out if blend enables/state are different per rt. + */ +static GLboolean +blend_per_rt(struct gl_context *ctx) +{ + if (ctx->Color.BlendEnabled && + (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) { + /* This can only happen if GL_EXT_draw_buffers2 is enabled */ + return GL_TRUE; + } + if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) { + /* this can only happen if GL_ARB_draw_buffers_blend is enabled */ + return GL_TRUE; + } + return GL_FALSE; +} + +static void +update_blend( struct st_context *st ) +{ + struct pipe_blend_state *blend = &st->state.blend; + unsigned num_state = 1; + unsigned i, j; + + memset(blend, 0, sizeof(*blend)); + + if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) { + num_state = st->ctx->Const.MaxDrawBuffers; + blend->independent_blend_enable = 1; + } + /* Note it is impossible to correctly deal with EXT_blend_logic_op and + EXT_draw_buffers2/EXT_blend_equation_separate at the same time. + These combinations would require support for per-rt logicop enables + and separate alpha/rgb logicop/blend support respectively. Neither + possible in gallium nor most hardware. Assume these combinations + don't happen. */ + if (st->ctx->Color.ColorLogicOpEnabled || + (st->ctx->Color.BlendEnabled && + st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) { + /* logicop enabled */ + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); + } + else if (st->ctx->Color.BlendEnabled) { + /* blending enabled */ + for (i = 0, j = 0; i < num_state; i++) { + + blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1; + + if (st->ctx->Extensions.ARB_draw_buffers_blend) + j = i; + + blend->rt[i].rgb_func = + translate_blend(st->ctx->Color.Blend[j].EquationRGB); + + if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN || + st->ctx->Color.Blend[i].EquationRGB == GL_MAX) { + /* Min/max are special */ + blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->rt[i].rgb_src_factor = + translate_blend(st->ctx->Color.Blend[j].SrcRGB); + blend->rt[i].rgb_dst_factor = + translate_blend(st->ctx->Color.Blend[j].DstRGB); + } + + blend->rt[i].alpha_func = + translate_blend(st->ctx->Color.Blend[j].EquationA); + + if (st->ctx->Color.Blend[i].EquationA == GL_MIN || + st->ctx->Color.Blend[i].EquationA == GL_MAX) { + /* Min/max are special */ + blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->rt[i].alpha_src_factor = + translate_blend(st->ctx->Color.Blend[j].SrcA); + blend->rt[i].alpha_dst_factor = + translate_blend(st->ctx->Color.Blend[j].DstA); + } + } + } + else { + /* no blending / logicop */ + } + + /* Colormask - maybe reverse these bits? */ + for (i = 0; i < num_state; i++) { + if (st->ctx->Color.ColorMask[i][0]) + blend->rt[i].colormask |= PIPE_MASK_R; + if (st->ctx->Color.ColorMask[i][1]) + blend->rt[i].colormask |= PIPE_MASK_G; + if (st->ctx->Color.ColorMask[i][2]) + blend->rt[i].colormask |= PIPE_MASK_B; + if (st->ctx->Color.ColorMask[i][3]) + blend->rt[i].colormask |= PIPE_MASK_A; + } + + if (st->ctx->Color.DitherFlag) + blend->dither = 1; + + if (st->ctx->Multisample.Enabled) { + /* unlike in gallium/d3d10 these operations are only performed + if msaa is enabled */ + if (st->ctx->Multisample.SampleAlphaToCoverage) + blend->alpha_to_coverage = 1; + if (st->ctx->Multisample.SampleAlphaToOne) + blend->alpha_to_one = 1; + } + + cso_set_blend(st->cso_context, blend); + + { + struct pipe_blend_color bc; + COPY_4FV(bc.color, st->ctx->Color.BlendColor); + cso_set_blend_color(st->cso_context, &bc); + } +} + + +const struct st_tracked_state st_update_blend = { + "st_update_blend", /* name */ + { /* dirty */ + (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */ + 0, /* st */ + }, + update_blend, /* update */ +}; diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c index ddd130a81..0ea567155 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c @@ -1,890 +1,893 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Brian Paul - */ - -#include "main/imports.h" -#include "main/image.h" -#include "main/bufferobj.h" -#include "main/macros.h" -#include "main/mfeatures.h" -#include "program/program.h" -#include "program/prog_print.h" - -#include "st_context.h" -#include "st_atom.h" -#include "st_atom_constbuf.h" -#include "st_program.h" -#include "st_cb_bitmap.h" -#include "st_texture.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_inlines.h" -#include "util/u_draw_quad.h" -#include "util/u_simple_shaders.h" -#include "program/prog_instruction.h" -#include "cso_cache/cso_context.h" - - -#if FEATURE_drawpix - -/** - * glBitmaps are drawn as textured quads. The user's bitmap pattern - * is stored in a texture image. An alpha8 texture format is used. - * The fragment shader samples a bit (texel) from the texture, then - * discards the fragment if the bit is off. - * - * Note that we actually store the inverse image of the bitmap to - * simplify the fragment program. An "on" bit gets stored as texel=0x0 - * and an "off" bit is stored as texel=0xff. Then we kill the - * fragment if the negated texel value is less than zero. - */ - - -/** - * The bitmap cache attempts to accumulate multiple glBitmap calls in a - * buffer which is then rendered en mass upon a flush, state change, etc. - * A wide, short buffer is used to target the common case of a series - * of glBitmap calls being used to draw text. - */ -static GLboolean UseBitmapCache = GL_TRUE; - - -#define BITMAP_CACHE_WIDTH 512 -#define BITMAP_CACHE_HEIGHT 32 - -struct bitmap_cache -{ - /** Window pos to render the cached image */ - GLint xpos, ypos; - /** Bounds of region used in window coords */ - GLint xmin, ymin, xmax, ymax; - - GLfloat color[4]; - - /** Bitmap's Z position */ - GLfloat zpos; - - struct pipe_resource *texture; - struct pipe_transfer *trans; - - GLboolean empty; - - /** An I8 texture image: */ - ubyte *buffer; -}; - - -/** Epsilon for Z comparisons */ -#define Z_EPSILON 1e-06 - - -/** - * Make fragment program for glBitmap: - * Sample the texture and kill the fragment if the bit is 0. - * This program will be combined with the user's fragment program. - */ -static struct st_fragment_program * -make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) -{ - struct st_context *st = st_context(ctx); - struct st_fragment_program *stfp; - struct gl_program *p; - GLuint ic = 0; - - p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!p) - return NULL; - - p->NumInstructions = 3; - - p->Instructions = _mesa_alloc_instructions(p->NumInstructions); - if (!p->Instructions) { - ctx->Driver.DeleteProgram(ctx, p); - return NULL; - } - _mesa_init_instructions(p->Instructions, p->NumInstructions); - - /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY; - p->Instructions[ic].DstReg.Index = 0; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; - p->Instructions[ic].TexSrcUnit = samplerIndex; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - - /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ - p->Instructions[ic].Opcode = OPCODE_KIL; - p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY; - - if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) - p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX; - - p->Instructions[ic].SrcReg[0].Index = 0; - p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW; - ic++; - - /* END; */ - p->Instructions[ic++].Opcode = OPCODE_END; - - assert(ic == p->NumInstructions); - - p->InputsRead = FRAG_BIT_TEX0; - p->OutputsWritten = 0x0; - p->SamplersUsed = (1 << samplerIndex); - - stfp = (struct st_fragment_program *) p; - stfp->Base.UsesKill = GL_TRUE; - - return stfp; -} - - -static int -find_free_bit(uint bitfield) -{ - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) { - return i; - } - } - return -1; -} - - -/** - * Combine basic bitmap fragment program with the user-defined program. - * \param st current context - * \param fpIn the incoming fragment program - * \param fpOut the new fragment program which does fragment culling - * \param bitmap_sampler sampler number for the bitmap texture - */ -void -st_make_bitmap_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut, - GLuint *bitmap_sampler) -{ - struct st_fragment_program *bitmap_prog; - struct gl_program *newProg; - uint sampler; - - /* - * Generate new program which is the user-defined program prefixed - * with the bitmap sampler/kill instructions. - */ - sampler = find_free_bit(fpIn->Base.SamplersUsed); - bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); - - newProg = _mesa_combine_programs(st->ctx, - &bitmap_prog->Base.Base, - &fpIn->Base); - /* done with this after combining */ - st_reference_fragprog(st, &bitmap_prog, NULL); - -#if 0 - { - printf("Combined bitmap program:\n"); - _mesa_print_program(newProg); - printf("InputsRead: 0x%x\n", newProg->InputsRead); - printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); - _mesa_print_parameter_list(newProg->Parameters); - } -#endif - - /* return results */ - *fpOut = (struct gl_fragment_program *) newProg; - *bitmap_sampler = sampler; -} - - -/** - * Copy user-provide bitmap bits into texture buffer, expanding - * bits into texels. - * "On" bits will set texels to 0x0. - * "Off" bits will not modify texels. - * Note that the image is actually going to be upside down in - * the texture. We deal with that with texcoords. - */ -static void -unpack_bitmap(struct st_context *st, - GLint px, GLint py, GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap, - ubyte *destBuffer, uint destStride) -{ - destBuffer += py * destStride + px; - - _mesa_expand_bitmap(width, height, unpack, bitmap, - destBuffer, destStride, 0x0); -} - - -/** - * Create a texture which represents a bitmap image. - */ -static struct pipe_resource * -make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_transfer *transfer; - ubyte *dest; - struct pipe_resource *pt; - - /* PBO source... */ - bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap); - if (!bitmap) { - return NULL; - } - - /** - * Create texture to hold bitmap pattern. - */ - pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format, - 0, width, height, 1, 1, - PIPE_BIND_SAMPLER_VIEW); - if (!pt) { - _mesa_unmap_pbo_source(ctx, unpack); - return NULL; - } - - transfer = pipe_get_transfer(st->pipe, pt, 0, 0, - PIPE_TRANSFER_WRITE, - 0, 0, width, height); - - dest = pipe_transfer_map(pipe, transfer); - - /* Put image into texture transfer */ - memset(dest, 0xff, height * transfer->stride); - unpack_bitmap(st, 0, 0, width, height, unpack, bitmap, - dest, transfer->stride); - - _mesa_unmap_pbo_source(ctx, unpack); - - /* Release transfer */ - pipe_transfer_unmap(pipe, transfer); - pipe->transfer_destroy(pipe, transfer); - - return pt; -} - -static GLuint -setup_bitmap_vertex_data(struct st_context *st, bool normalized, - int x, int y, int width, int height, - float z, const float color[4]) -{ - struct pipe_context *pipe = st->pipe; - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat)fb->Width; - const GLfloat fb_height = (GLfloat)fb->Height; - const GLfloat x0 = (GLfloat)x; - const GLfloat x1 = (GLfloat)(x + width); - const GLfloat y0 = (GLfloat)y; - const GLfloat y1 = (GLfloat)(y + height); - GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; - GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; - const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); - const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); - const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); - const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); - const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */ - GLuint i; - - if(!normalized) - { - sRight = width; - tBot = height; - } - - /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as - * no_flush) updates to buffers where we know there is no conflict - * with previous data. Currently using max_slots > 1 will cause - * synchronous rendering if the driver flushes its command buffers - * between one bitmap and the next. Our flush hook below isn't - * sufficient to catch this as the driver doesn't tell us when it - * flushes its own command buffers. Until this gets fixed, pay the - * price of allocating a new buffer for each bitmap cache-flush to - * avoid synchronous rendering. - */ - if (st->bitmap.vbuf_slot >= max_slots) { - pipe_resource_reference(&st->bitmap.vbuf, NULL); - st->bitmap.vbuf_slot = 0; - } - - if (!st->bitmap.vbuf) { - st->bitmap.vbuf = pipe_buffer_create(pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - max_slots * - sizeof(st->bitmap.vertices)); - } - - /* Positions are in clip coords since we need to do clipping in case - * the bitmap quad goes beyond the window bounds. - */ - st->bitmap.vertices[0][0][0] = clip_x0; - st->bitmap.vertices[0][0][1] = clip_y0; - st->bitmap.vertices[0][2][0] = sLeft; - st->bitmap.vertices[0][2][1] = tTop; - - st->bitmap.vertices[1][0][0] = clip_x1; - st->bitmap.vertices[1][0][1] = clip_y0; - st->bitmap.vertices[1][2][0] = sRight; - st->bitmap.vertices[1][2][1] = tTop; - - st->bitmap.vertices[2][0][0] = clip_x1; - st->bitmap.vertices[2][0][1] = clip_y1; - st->bitmap.vertices[2][2][0] = sRight; - st->bitmap.vertices[2][2][1] = tBot; - - st->bitmap.vertices[3][0][0] = clip_x0; - st->bitmap.vertices[3][0][1] = clip_y1; - st->bitmap.vertices[3][2][0] = sLeft; - st->bitmap.vertices[3][2][1] = tBot; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - st->bitmap.vertices[i][0][2] = z; - st->bitmap.vertices[i][0][3] = 1.0; - st->bitmap.vertices[i][1][0] = color[0]; - st->bitmap.vertices[i][1][1] = color[1]; - st->bitmap.vertices[i][1][2] = color[2]; - st->bitmap.vertices[i][1][3] = color[3]; - st->bitmap.vertices[i][2][2] = 0.0; /*R*/ - st->bitmap.vertices[i][2][3] = 1.0; /*Q*/ - } - - /* put vertex data into vbuf */ - pipe_buffer_write_nooverlap(st->pipe, - st->bitmap.vbuf, - st->bitmap.vbuf_slot - * sizeof(st->bitmap.vertices), - sizeof st->bitmap.vertices, - st->bitmap.vertices); - - return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices; -} - - - -/** - * Render a glBitmap by drawing a textured quad - */ -static void -draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, - GLsizei width, GLsizei height, - struct pipe_sampler_view *sv, - const GLfloat *color) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct cso_context *cso = st->cso_context; - struct st_fp_variant *fpv; - struct st_fp_variant_key key; - GLuint maxSize; - GLuint offset; - - memset(&key, 0, sizeof(key)); - key.st = st; - key.bitmap = GL_TRUE; - - fpv = st_get_fp_variant(st, st->fp, &key); - - /* As an optimization, Mesa's fragment programs will sometimes get the - * primary color from a statevar/constant rather than a varying variable. - * when that's the case, we need to ensure that we use the 'color' - * parameter and not the current attribute color (which may have changed - * through glRasterPos and state validation. - * So, we force the proper color here. Not elegant, but it works. - */ - { - GLfloat colorSave[4]; - COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); - COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color); - st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); - COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave); - } - - - /* limit checks */ - /* XXX if the bitmap is larger than the max texture size, break - * it up into chunks. - */ - maxSize = 1 << (pipe->screen->get_param(pipe->screen, - PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); - assert(width <= (GLsizei)maxSize); - assert(height <= (GLsizei)maxSize); - - cso_save_rasterizer(cso); - cso_save_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_viewport(cso); - cso_save_fragment_shader(cso); - cso_save_vertex_shader(cso); - cso_save_vertex_elements(cso); - - /* rasterizer state: just scissor */ - st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled; - cso_set_rasterizer(cso, &st->bitmap.rasterizer); - - /* fragment shader state: TEX lookup program */ - cso_set_fragment_shader_handle(cso, fpv->driver_shader); - - /* vertex shader state: position + texcoord pass-through */ - cso_set_vertex_shader_handle(cso, st->bitmap.vs); - - /* user samplers, plus our bitmap sampler */ - { - struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; - uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers); - uint i; - for (i = 0; i < st->state.num_samplers; i++) { - samplers[i] = &st->state.samplers[i]; - } - samplers[fpv->bitmap_sampler] = - &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT]; - cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers); - } - - /* user textures, plus the bitmap texture */ - { - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; - uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures); - memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views)); - sampler_views[fpv->bitmap_sampler] = sv; - cso_set_fragment_sampler_views(cso, num, sampler_views); - } - - /* viewport state: viewport matching window dims */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - const GLfloat width = (GLfloat)fb->Width; - const GLfloat height = (GLfloat)fb->Height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * width; - vp.scale[1] = height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 0.5f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * width; - vp.translate[1] = 0.5f * height; - vp.translate[2] = 0.5f; - vp.translate[3] = 0.0f; - cso_set_viewport(cso, &vp); - } - - cso_set_vertex_elements(cso, 3, st->velems_util_draw); - - /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; - - /* draw textured quad */ - offset = setup_bitmap_vertex_data(st, - sv->texture->target != PIPE_TEXTURE_RECT, - x, y, width, height, z, color); - - util_draw_vertex_buffer(pipe, st->bitmap.vbuf, offset, - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 3); /* attribs/vert */ - - - /* restore state */ - cso_restore_rasterizer(cso); - cso_restore_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_viewport(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_vertex_elements(cso); -} - - -static void -reset_cache(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - struct bitmap_cache *cache = st->bitmap.cache; - - /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/ - cache->empty = GL_TRUE; - - cache->xmin = 1000000; - cache->xmax = -1000000; - cache->ymin = 1000000; - cache->ymax = -1000000; - - if (cache->trans) { - pipe->transfer_destroy(pipe, cache->trans); - cache->trans = NULL; - } - - assert(!cache->texture); - - /* allocate a new texture */ - cache->texture = st_texture_create(st, PIPE_TEXTURE_2D, - st->bitmap.tex_format, 0, - BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, - 1, 1, - PIPE_BIND_SAMPLER_VIEW); -} - - -/** Print bitmap image to stdout (debug) */ -static void -print_cache(const struct bitmap_cache *cache) -{ - int i, j, k; - - for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) { - k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1); - for (j = 0; j < BITMAP_CACHE_WIDTH; j++) { - if (cache->buffer[k]) - printf("X"); - else - printf(" "); - k++; - } - printf("\n"); - } -} - - -/** - * Create gallium pipe_transfer object for the bitmap cache. - */ -static void -create_cache_trans(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - struct bitmap_cache *cache = st->bitmap.cache; - - if (cache->trans) - return; - - /* Map the texture transfer. - * Subsequent glBitmap calls will write into the texture image. - */ - cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, - BITMAP_CACHE_WIDTH, - BITMAP_CACHE_HEIGHT); - cache->buffer = pipe_transfer_map(pipe, cache->trans); - - /* init image to all 0xff */ - memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT); -} - - -/** - * If there's anything in the bitmap cache, draw/flush it now. - */ -void -st_flush_bitmap_cache(struct st_context *st) -{ - if (!st->bitmap.cache->empty) { - struct bitmap_cache *cache = st->bitmap.cache; - - if (st->ctx->DrawBuffer) { - struct pipe_context *pipe = st->pipe; - struct pipe_sampler_view *sv; - - assert(cache->xmin <= cache->xmax); - -/* printf("flush size %d x %d at %d, %d\n", - cache->xmax - cache->xmin, - cache->ymax - cache->ymin, - cache->xpos, cache->ypos); -*/ - - /* The texture transfer has been mapped until now. - * So unmap and release the texture transfer before drawing. - */ - if (cache->trans) { - if (0) - print_cache(cache); - pipe_transfer_unmap(pipe, cache->trans); - cache->buffer = NULL; - - pipe->transfer_destroy(pipe, cache->trans); - cache->trans = NULL; - } - - sv = st_create_texture_sampler_view(st->pipe, cache->texture); - if (sv) { - draw_bitmap_quad(st->ctx, - cache->xpos, - cache->ypos, - cache->zpos, - BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, - sv, - cache->color); - - pipe_sampler_view_reference(&sv, NULL); - } - } - - /* release/free the texture */ - pipe_resource_reference(&cache->texture, NULL); - - reset_cache(st); - } -} - - -/** - * Flush bitmap cache and release vertex buffer. - */ -void -st_flush_bitmap( struct st_context *st ) -{ - st_flush_bitmap_cache(st); - - /* Release vertex buffer to avoid synchronous rendering if we were - * to map it in the next frame. - */ - pipe_resource_reference(&st->bitmap.vbuf, NULL); - st->bitmap.vbuf_slot = 0; -} - - -/** - * Try to accumulate this glBitmap call in the bitmap cache. - * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. - */ -static GLboolean -accum_bitmap(struct st_context *st, - GLint x, GLint y, GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, - const GLubyte *bitmap ) -{ - struct bitmap_cache *cache = st->bitmap.cache; - int px = -999, py = -999; - const GLfloat z = st->ctx->Current.RasterPos[2]; - - if (width > BITMAP_CACHE_WIDTH || - height > BITMAP_CACHE_HEIGHT) - return GL_FALSE; /* too big to cache */ - - if (!cache->empty) { - px = x - cache->xpos; /* pos in buffer */ - py = y - cache->ypos; - if (px < 0 || px + width > BITMAP_CACHE_WIDTH || - py < 0 || py + height > BITMAP_CACHE_HEIGHT || - !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) || - ((fabs(z - cache->zpos) > Z_EPSILON))) { - /* This bitmap would extend beyond cache bounds, or the bitmap - * color is changing - * so flush and continue. - */ - st_flush_bitmap_cache(st); - } - } - - if (cache->empty) { - /* Initialize. Center bitmap vertically in the buffer. */ - px = 0; - py = (BITMAP_CACHE_HEIGHT - height) / 2; - cache->xpos = x; - cache->ypos = y - py; - cache->zpos = z; - cache->empty = GL_FALSE; - COPY_4FV(cache->color, st->ctx->Current.RasterColor); - } - - assert(px != -999); - assert(py != -999); - - if (x < cache->xmin) - cache->xmin = x; - if (y < cache->ymin) - cache->ymin = y; - if (x + width > cache->xmax) - cache->xmax = x + width; - if (y + height > cache->ymax) - cache->ymax = y + height; - - /* create the transfer if needed */ - create_cache_trans(st); - - unpack_bitmap(st, px, py, width, height, unpack, bitmap, - cache->buffer, BITMAP_CACHE_WIDTH); - - return GL_TRUE; /* accumulated */ -} - - - -/** - * Called via ctx->Driver.Bitmap() - */ -static void -st_Bitmap(struct gl_context *ctx, GLint x, GLint y, - GLsizei width, GLsizei height, - const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ) -{ - struct st_context *st = st_context(ctx); - struct pipe_resource *pt; - - if (width == 0 || height == 0) - return; - - st_validate_state(st); - - if (!st->bitmap.vs) { - /* create pass-through vertex shader now */ - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR, - TGSI_SEMANTIC_GENERIC }; - const uint semantic_indexes[] = { 0, 0, 0 }; - st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3, - semantic_names, - semantic_indexes); - } - - if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) - return; - - pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); - if (pt) { - struct pipe_sampler_view *sv = - st_create_texture_sampler_view(st->pipe, pt); - - assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT); - - if (sv) { - draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], - width, height, sv, - st->ctx->Current.RasterColor); - - pipe_sampler_view_reference(&sv, NULL); - } - - /* release/free the texture */ - pipe_resource_reference(&pt, NULL); - } -} - - -/** Per-context init */ -void -st_init_bitmap_functions(struct dd_function_table *functions) -{ - functions->Bitmap = st_Bitmap; -} - - -/** Per-context init */ -void -st_init_bitmap(struct st_context *st) -{ - struct pipe_sampler_state *sampler = &st->bitmap.samplers[0]; - struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; - - /* init sampler state once */ - memset(sampler, 0, sizeof(*sampler)); - sampler->wrap_s = PIPE_TEX_WRAP_CLAMP; - sampler->wrap_t = PIPE_TEX_WRAP_CLAMP; - sampler->wrap_r = PIPE_TEX_WRAP_CLAMP; - sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST; - st->bitmap.samplers[1] = *sampler; - st->bitmap.samplers[1].normalized_coords = 1; - - /* init baseline rasterizer state once */ - memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); - st->bitmap.rasterizer.gl_rasterization_rules = 1; - - /* find a usable texture format */ - if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, - PIPE_TEXTURE_2D, 0, - PIPE_BIND_SAMPLER_VIEW, 0)) { - st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; - } - else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM, - PIPE_TEXTURE_2D, 0, - PIPE_BIND_SAMPLER_VIEW, 0)) { - st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM; - } - else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM, - PIPE_TEXTURE_2D, 0, - PIPE_BIND_SAMPLER_VIEW, 0)) { - st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM; - } - else { - /* XXX support more formats */ - assert(0); - } - - /* alloc bitmap cache object */ - st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache); - - reset_cache(st); -} - - -/** Per-context tear-down */ -void -st_destroy_bitmap(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - struct bitmap_cache *cache = st->bitmap.cache; - - if (st->bitmap.vs) { - cso_delete_vertex_shader(st->cso_context, st->bitmap.vs); - st->bitmap.vs = NULL; - } - - if (st->bitmap.vbuf) { - pipe_resource_reference(&st->bitmap.vbuf, NULL); - st->bitmap.vbuf = NULL; - } - - if (cache) { - if (cache->trans) { - pipe_transfer_unmap(pipe, cache->trans); - pipe->transfer_destroy(pipe, cache->trans); - } - pipe_resource_reference(&st->bitmap.cache->texture, NULL); - free(st->bitmap.cache); - st->bitmap.cache = NULL; - } -} - -#endif /* FEATURE_drawpix */ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/program.h" +#include "program/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_program.h" +#include "st_cb_bitmap.h" +#include "st_texture.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "util/u_draw_quad.h" +#include "util/u_simple_shaders.h" +#include "program/prog_instruction.h" +#include "cso_cache/cso_context.h" + + +#if FEATURE_drawpix + +/** + * glBitmaps are drawn as textured quads. The user's bitmap pattern + * is stored in a texture image. An alpha8 texture format is used. + * The fragment shader samples a bit (texel) from the texture, then + * discards the fragment if the bit is off. + * + * Note that we actually store the inverse image of the bitmap to + * simplify the fragment program. An "on" bit gets stored as texel=0x0 + * and an "off" bit is stored as texel=0xff. Then we kill the + * fragment if the negated texel value is less than zero. + */ + + +/** + * The bitmap cache attempts to accumulate multiple glBitmap calls in a + * buffer which is then rendered en mass upon a flush, state change, etc. + * A wide, short buffer is used to target the common case of a series + * of glBitmap calls being used to draw text. + */ +static GLboolean UseBitmapCache = GL_TRUE; + + +#define BITMAP_CACHE_WIDTH 512 +#define BITMAP_CACHE_HEIGHT 32 + +struct bitmap_cache +{ + /** Window pos to render the cached image */ + GLint xpos, ypos; + /** Bounds of region used in window coords */ + GLint xmin, ymin, xmax, ymax; + + GLfloat color[4]; + + /** Bitmap's Z position */ + GLfloat zpos; + + struct pipe_resource *texture; + struct pipe_transfer *trans; + + GLboolean empty; + + /** An I8 texture image: */ + ubyte *buffer; +}; + + +/** Epsilon for Z comparisons */ +#define Z_EPSILON 1e-06 + + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + */ +static struct st_fragment_program * +make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) +{ + struct st_context *st = st_context(ctx); + struct st_fragment_program *stfp; + struct gl_program *p; + GLuint ic = 0; + + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = 3; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY; + p->Instructions[ic].DstReg.Index = 0; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = samplerIndex; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + p->Instructions[ic].Opcode = OPCODE_KIL; + p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX; + + p->Instructions[ic].SrcReg[0].Index = 0; + p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW; + ic++; + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0; + p->OutputsWritten = 0x0; + p->SamplersUsed = (1 << samplerIndex); + + stfp = (struct st_fragment_program *) p; + stfp->Base.UsesKill = GL_TRUE; + + return stfp; +} + + +static int +find_free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) { + return i; + } + } + return -1; +} + + +/** + * Combine basic bitmap fragment program with the user-defined program. + * \param st current context + * \param fpIn the incoming fragment program + * \param fpOut the new fragment program which does fragment culling + * \param bitmap_sampler sampler number for the bitmap texture + */ +void +st_make_bitmap_fragment_program(struct st_context *st, + struct gl_fragment_program *fpIn, + struct gl_fragment_program **fpOut, + GLuint *bitmap_sampler) +{ + struct st_fragment_program *bitmap_prog; + struct gl_program *newProg; + uint sampler; + + /* + * Generate new program which is the user-defined program prefixed + * with the bitmap sampler/kill instructions. + */ + sampler = find_free_bit(fpIn->Base.SamplersUsed); + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + +#if 0 + { + printf("Combined bitmap program:\n"); + _mesa_print_program(newProg); + printf("InputsRead: 0x%x\n", newProg->InputsRead); + printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); + _mesa_print_parameter_list(newProg->Parameters); + } +#endif + + /* return results */ + *fpOut = (struct gl_fragment_program *) newProg; + *bitmap_sampler = sampler; +} + + +/** + * Copy user-provide bitmap bits into texture buffer, expanding + * bits into texels. + * "On" bits will set texels to 0x0. + * "Off" bits will not modify texels. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ +static void +unpack_bitmap(struct st_context *st, + GLint px, GLint py, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + ubyte *destBuffer, uint destStride) +{ + destBuffer += py * destStride + px; + + _mesa_expand_bitmap(width, height, unpack, bitmap, + destBuffer, destStride, 0x0); +} + + +/** + * Create a texture which represents a bitmap image. + */ +static struct pipe_resource * +make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_transfer *transfer; + ubyte *dest; + struct pipe_resource *pt; + + /* PBO source... */ + bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap); + if (!bitmap) { + return NULL; + } + + /** + * Create texture to hold bitmap pattern. + */ + pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format, + 0, width, height, 1, 1, + PIPE_BIND_SAMPLER_VIEW); + if (!pt) { + _mesa_unmap_pbo_source(ctx, unpack); + return NULL; + } + + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, + 0, 0, width, height); + + dest = pipe_transfer_map(pipe, transfer); + + /* Put image into texture transfer */ + memset(dest, 0xff, height * transfer->stride); + unpack_bitmap(st, 0, 0, width, height, unpack, bitmap, + dest, transfer->stride); + + _mesa_unmap_pbo_source(ctx, unpack); + + /* Release transfer */ + pipe_transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); + + return pt; +} + +static GLuint +setup_bitmap_vertex_data(struct st_context *st, bool normalized, + int x, int y, int width, int height, + float z, const float color[4]) +{ + struct pipe_context *pipe = st->pipe; + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat)fb->Width; + const GLfloat fb_height = (GLfloat)fb->Height; + const GLfloat x0 = (GLfloat)x; + const GLfloat x1 = (GLfloat)(x + width); + const GLfloat y0 = (GLfloat)y; + const GLfloat y1 = (GLfloat)(y + height); + GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; + GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; + const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); + const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); + const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); + const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */ + GLuint i; + + if(!normalized) + { + sRight = width; + tBot = height; + } + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + if (st->bitmap.vbuf_slot >= max_slots) { + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; + } + + if (!st->bitmap.vbuf) { + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + max_slots * + sizeof(st->bitmap.vertices)); + } + + /* Positions are in clip coords since we need to do clipping in case + * the bitmap quad goes beyond the window bounds. + */ + st->bitmap.vertices[0][0][0] = clip_x0; + st->bitmap.vertices[0][0][1] = clip_y0; + st->bitmap.vertices[0][2][0] = sLeft; + st->bitmap.vertices[0][2][1] = tTop; + + st->bitmap.vertices[1][0][0] = clip_x1; + st->bitmap.vertices[1][0][1] = clip_y0; + st->bitmap.vertices[1][2][0] = sRight; + st->bitmap.vertices[1][2][1] = tTop; + + st->bitmap.vertices[2][0][0] = clip_x1; + st->bitmap.vertices[2][0][1] = clip_y1; + st->bitmap.vertices[2][2][0] = sRight; + st->bitmap.vertices[2][2][1] = tBot; + + st->bitmap.vertices[3][0][0] = clip_x0; + st->bitmap.vertices[3][0][1] = clip_y1; + st->bitmap.vertices[3][2][0] = sLeft; + st->bitmap.vertices[3][2][1] = tBot; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->bitmap.vertices[i][0][2] = z; + st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][1][0] = color[0]; + st->bitmap.vertices[i][1][1] = color[1]; + st->bitmap.vertices[i][1][2] = color[2]; + st->bitmap.vertices[i][1][3] = color[3]; + st->bitmap.vertices[i][2][2] = 0.0; /*R*/ + st->bitmap.vertices[i][2][3] = 1.0; /*Q*/ + } + + /* put vertex data into vbuf */ + pipe_buffer_write_nooverlap(st->pipe, + st->bitmap.vbuf, + st->bitmap.vbuf_slot + * sizeof(st->bitmap.vertices), + sizeof st->bitmap.vertices, + st->bitmap.vertices); + + return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices; +} + + + +/** + * Render a glBitmap by drawing a textured quad + */ +static void +draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + struct pipe_sampler_view *sv, + const GLfloat *color) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; + struct st_fp_variant *fpv; + struct st_fp_variant_key key; + GLuint maxSize; + GLuint offset; + + memset(&key, 0, sizeof(key)); + key.st = st; + key.bitmap = GL_TRUE; + + fpv = st_get_fp_variant(st, st->fp, &key); + + /* As an optimization, Mesa's fragment programs will sometimes get the + * primary color from a statevar/constant rather than a varying variable. + * when that's the case, we need to ensure that we use the 'color' + * parameter and not the current attribute color (which may have changed + * through glRasterPos and state validation. + * So, we force the proper color here. Not elegant, but it works. + */ + { + GLfloat colorSave[4]; + COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); + COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color); + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave); + } + + + /* limit checks */ + /* XXX if the bitmap is larger than the max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= (GLsizei)maxSize); + assert(height <= (GLsizei)maxSize); + + cso_save_rasterizer(cso); + cso_save_samplers(cso); + cso_save_fragment_sampler_views(cso); + cso_save_viewport(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + + /* rasterizer state: just scissor */ + st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &st->bitmap.rasterizer); + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, fpv->driver_shader); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, st->bitmap.vs); + + /* user samplers, plus our bitmap sampler */ + { + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers); + uint i; + for (i = 0; i < st->state.num_samplers; i++) { + samplers[i] = &st->state.samplers[i]; + } + samplers[fpv->bitmap_sampler] = + &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT]; + cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers); + } + + /* user textures, plus the bitmap texture */ + { + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures); + memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views)); + sampler_views[fpv->bitmap_sampler] = sv; + cso_set_fragment_sampler_views(cso, num, sampler_views); + } + + /* viewport state: viewport matching window dims */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + const GLfloat width = (GLfloat)fb->Width; + const GLfloat height = (GLfloat)fb->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 0.5f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.5f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + cso_set_vertex_elements(cso, 3, st->velems_util_draw); + + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + z = z * 2.0 - 1.0; + + /* draw textured quad */ + offset = setup_bitmap_vertex_data(st, + sv->texture->target != PIPE_TEXTURE_RECT, + x, y, width, height, z, color); + + util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 3); /* attribs/vert */ + + + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_samplers(cso); + cso_restore_fragment_sampler_views(cso); + cso_restore_viewport(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); +} + + +static void +reset_cache(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/ + cache->empty = GL_TRUE; + + cache->xmin = 1000000; + cache->xmax = -1000000; + cache->ymin = 1000000; + cache->ymax = -1000000; + + if (cache->trans) { + pipe->transfer_destroy(pipe, cache->trans); + cache->trans = NULL; + } + + assert(!cache->texture); + + /* allocate a new texture */ + cache->texture = st_texture_create(st, PIPE_TEXTURE_2D, + st->bitmap.tex_format, 0, + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + 1, 1, + PIPE_BIND_SAMPLER_VIEW); +} + + +/** Print bitmap image to stdout (debug) */ +static void +print_cache(const struct bitmap_cache *cache) +{ + int i, j, k; + + for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) { + k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1); + for (j = 0; j < BITMAP_CACHE_WIDTH; j++) { + if (cache->buffer[k]) + printf("X"); + else + printf(" "); + k++; + } + printf("\n"); + } +} + + +/** + * Create gallium pipe_transfer object for the bitmap cache. + */ +static void +create_cache_trans(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + if (cache->trans) + return; + + /* Map the texture transfer. + * Subsequent glBitmap calls will write into the texture image. + */ + cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + BITMAP_CACHE_WIDTH, + BITMAP_CACHE_HEIGHT); + cache->buffer = pipe_transfer_map(pipe, cache->trans); + + /* init image to all 0xff */ + memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT); +} + + +/** + * If there's anything in the bitmap cache, draw/flush it now. + */ +void +st_flush_bitmap_cache(struct st_context *st) +{ + if (!st->bitmap.cache->empty) { + struct bitmap_cache *cache = st->bitmap.cache; + + if (st->ctx->DrawBuffer) { + struct pipe_context *pipe = st->pipe; + struct pipe_sampler_view *sv; + + assert(cache->xmin <= cache->xmax); + +/* printf("flush size %d x %d at %d, %d\n", + cache->xmax - cache->xmin, + cache->ymax - cache->ymin, + cache->xpos, cache->ypos); +*/ + + /* The texture transfer has been mapped until now. + * So unmap and release the texture transfer before drawing. + */ + if (cache->trans) { + if (0) + print_cache(cache); + pipe_transfer_unmap(pipe, cache->trans); + cache->buffer = NULL; + + pipe->transfer_destroy(pipe, cache->trans); + cache->trans = NULL; + } + + sv = st_create_texture_sampler_view(st->pipe, cache->texture); + if (sv) { + draw_bitmap_quad(st->ctx, + cache->xpos, + cache->ypos, + cache->zpos, + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + sv, + cache->color); + + pipe_sampler_view_reference(&sv, NULL); + } + } + + /* release/free the texture */ + pipe_resource_reference(&cache->texture, NULL); + + reset_cache(st); + } +} + + +/** + * Flush bitmap cache and release vertex buffer. + */ +void +st_flush_bitmap( struct st_context *st ) +{ + st_flush_bitmap_cache(st); + + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; +} + + +/** + * Try to accumulate this glBitmap call in the bitmap cache. + * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. + */ +static GLboolean +accum_bitmap(struct st_context *st, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct bitmap_cache *cache = st->bitmap.cache; + int px = -999, py = -999; + const GLfloat z = st->ctx->Current.RasterPos[2]; + + if (width > BITMAP_CACHE_WIDTH || + height > BITMAP_CACHE_HEIGHT) + return GL_FALSE; /* too big to cache */ + + if (!cache->empty) { + px = x - cache->xpos; /* pos in buffer */ + py = y - cache->ypos; + if (px < 0 || px + width > BITMAP_CACHE_WIDTH || + py < 0 || py + height > BITMAP_CACHE_HEIGHT || + !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) || + ((fabs(z - cache->zpos) > Z_EPSILON))) { + /* This bitmap would extend beyond cache bounds, or the bitmap + * color is changing + * so flush and continue. + */ + st_flush_bitmap_cache(st); + } + } + + if (cache->empty) { + /* Initialize. Center bitmap vertically in the buffer. */ + px = 0; + py = (BITMAP_CACHE_HEIGHT - height) / 2; + cache->xpos = x; + cache->ypos = y - py; + cache->zpos = z; + cache->empty = GL_FALSE; + COPY_4FV(cache->color, st->ctx->Current.RasterColor); + } + + assert(px != -999); + assert(py != -999); + + if (x < cache->xmin) + cache->xmin = x; + if (y < cache->ymin) + cache->ymin = y; + if (x + width > cache->xmax) + cache->xmax = x + width; + if (y + height > cache->ymax) + cache->ymax = y + height; + + /* create the transfer if needed */ + create_cache_trans(st); + + unpack_bitmap(st, px, py, width, height, unpack, bitmap, + cache->buffer, BITMAP_CACHE_WIDTH); + + return GL_TRUE; /* accumulated */ +} + + + +/** + * Called via ctx->Driver.Bitmap() + */ +static void +st_Bitmap(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ) +{ + struct st_context *st = st_context(ctx); + struct pipe_resource *pt; + + if (width == 0 || height == 0) + return; + + st_validate_state(st); + + if (!st->bitmap.vs) { + /* create pass-through vertex shader now */ + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0, 0 }; + st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3, + semantic_names, + semantic_indexes); + } + + if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) + return; + + pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); + if (pt) { + struct pipe_sampler_view *sv = + st_create_texture_sampler_view(st->pipe, pt); + + assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT); + + if (sv) { + draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, sv, + st->ctx->Current.RasterColor); + + pipe_sampler_view_reference(&sv, NULL); + } + + /* release/free the texture */ + pipe_resource_reference(&pt, NULL); + } +} + + +/** Per-context init */ +void +st_init_bitmap_functions(struct dd_function_table *functions) +{ + functions->Bitmap = st_Bitmap; +} + + +/** Per-context init */ +void +st_init_bitmap(struct st_context *st) +{ + struct pipe_sampler_state *sampler = &st->bitmap.samplers[0]; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + /* init sampler state once */ + memset(sampler, 0, sizeof(*sampler)); + sampler->wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST; + st->bitmap.samplers[1] = *sampler; + st->bitmap.samplers[1].normalized_coords = 1; + + /* init baseline rasterizer state once */ + memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); + st->bitmap.rasterizer.gl_rasterization_rules = 1; + + /* find a usable texture format */ + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; + } + else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM; + } + else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM; + } + else { + /* XXX support more formats */ + assert(0); + } + + /* alloc bitmap cache object */ + st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache); + + reset_cache(st); +} + + +/** Per-context tear-down */ +void +st_destroy_bitmap(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + if (st->bitmap.vs) { + cso_delete_vertex_shader(st->cso_context, st->bitmap.vs); + st->bitmap.vs = NULL; + } + + if (st->bitmap.vbuf) { + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf = NULL; + } + + if (cache) { + if (cache->trans) { + pipe_transfer_unmap(pipe, cache->trans); + pipe->transfer_destroy(pipe, cache->trans); + } + pipe_resource_reference(&st->bitmap.cache->texture, NULL); + free(st->bitmap.cache); + st->bitmap.cache = NULL; + } +} + +#endif /* FEATURE_drawpix */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c index ba8a8cf89..12528f49f 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -1,447 +1,468 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Functions for pixel buffer objects and vertex/element buffer objects. - */ - - -#include "main/imports.h" -#include "main/mtypes.h" -#include "main/arrayobj.h" -#include "main/bufferobj.h" - -#include "st_context.h" -#include "st_cb_bufferobjects.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" - - -/** - * There is some duplication between mesa's bufferobjects and our - * bufmgr buffers. Both have an integer handle and a hashtable to - * lookup an opaque structure. It would be nice if the handles and - * internal structure where somehow shared. - */ -static struct gl_buffer_object * -st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target) -{ - struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object); - - if (!st_obj) - return NULL; - - _mesa_initialize_buffer_object(&st_obj->Base, name, target); - - return &st_obj->Base; -} - - - -/** - * Deallocate/free a vertex/pixel buffer object. - * Called via glDeleteBuffersARB(). - */ -static void -st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - - assert(obj->RefCount == 0); - assert(st_obj->transfer == NULL); - - if (st_obj->buffer) - pipe_resource_reference(&st_obj->buffer, NULL); - - free(st_obj); -} - - - -/** - * Replace data in a subrange of buffer object. If the data range - * specified by size + offset extends beyond the end of the buffer or - * if data is NULL, no copy is performed. - * Called via glBufferSubDataARB(). - */ -static void -st_bufferobj_subdata(struct gl_context *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - const GLvoid * data, struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - - /* we may be called from VBO code, so double-check params here */ - ASSERT(offset >= 0); - ASSERT(size >= 0); - ASSERT(offset + size <= obj->Size); - - if (!size) - return; - - /* - * According to ARB_vertex_buffer_object specification, if data is null, - * then the contents of the buffer object's data store is undefined. We just - * ignore, and leave it unchanged. - */ - if (!data) - return; - - /* Now that transfers are per-context, we don't have to figure out - * flushing here. Usually drivers won't need to flush in this case - * even if the buffer is currently referenced by hardware - they - * just queue the upload as dma rather than mapping the underlying - * buffer directly. - */ - pipe_buffer_write(st_context(ctx)->pipe, - st_obj->buffer, - offset, size, data); -} - - -/** - * Called via glGetBufferSubDataARB(). - */ -static void -st_bufferobj_get_subdata(struct gl_context *ctx, - GLenum target, - GLintptrARB offset, - GLsizeiptrARB size, - GLvoid * data, struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - - /* we may be called from VBO code, so double-check params here */ - ASSERT(offset >= 0); - ASSERT(size >= 0); - ASSERT(offset + size <= obj->Size); - - if (!size) - return; - - pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer, - offset, size, data); -} - - -/** - * Allocate space for and store data in a buffer object. Any data that was - * previously stored in the buffer object is lost. If data is NULL, - * memory will be allocated, but no copy will occur. - * Called via ctx->Driver.BufferData(). - * \return GL_TRUE for success, GL_FALSE if out of memory - */ -static GLboolean -st_bufferobj_data(struct gl_context *ctx, - GLenum target, - GLsizeiptrARB size, - const GLvoid * data, - GLenum usage, - struct gl_buffer_object *obj) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct st_buffer_object *st_obj = st_buffer_object(obj); - unsigned buffer_usage; - - st_obj->Base.Size = size; - st_obj->Base.Usage = usage; - - switch(target) { - case GL_PIXEL_PACK_BUFFER_ARB: - case GL_PIXEL_UNPACK_BUFFER_ARB: - buffer_usage = PIPE_BIND_RENDER_TARGET; - break; - case GL_ARRAY_BUFFER_ARB: - buffer_usage = PIPE_BIND_VERTEX_BUFFER; - break; - case GL_ELEMENT_ARRAY_BUFFER_ARB: - buffer_usage = PIPE_BIND_INDEX_BUFFER; - break; - default: - buffer_usage = 0; - } - - pipe_resource_reference( &st_obj->buffer, NULL ); - - if (size != 0) { - st_obj->buffer = pipe_buffer_create(pipe->screen, buffer_usage, size); - - if (!st_obj->buffer) { - return GL_FALSE; - } - - if (data) - pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0, - size, data); - return GL_TRUE; - } - - return GL_TRUE; -} - - -/** - * Dummy data whose's pointer is used for zero size buffers or ranges. - */ -static long st_bufferobj_zero_length = 0; - - - -/** - * Called via glMapBufferARB(). - */ -static void * -st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, - struct gl_buffer_object *obj) -{ - struct st_buffer_object *st_obj = st_buffer_object(obj); - uint flags; - - switch (access) { - case GL_WRITE_ONLY: - flags = PIPE_TRANSFER_WRITE; - break; - case GL_READ_ONLY: - flags = PIPE_TRANSFER_READ; - break; - case GL_READ_WRITE: - default: - flags = PIPE_TRANSFER_READ_WRITE; - break; - } - - /* Handle zero-size buffers here rather than in drivers */ - if (obj->Size == 0) { - obj->Pointer = &st_bufferobj_zero_length; - } - else { - obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, - st_obj->buffer, - flags, - &st_obj->transfer); - } - - if (obj->Pointer) { - obj->Offset = 0; - obj->Length = obj->Size; - } - return obj->Pointer; -} - - -/** - * Called via glMapBufferRange(). - */ -static void * -st_bufferobj_map_range(struct gl_context *ctx, GLenum target, - GLintptr offset, GLsizeiptr length, GLbitfield access, - struct gl_buffer_object *obj) -{ - struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_buffer_object *st_obj = st_buffer_object(obj); - enum pipe_transfer_usage flags = 0x0; - - if (access & GL_MAP_WRITE_BIT) - flags |= PIPE_TRANSFER_WRITE; - - if (access & GL_MAP_READ_BIT) - flags |= PIPE_TRANSFER_READ; - - if (access & GL_MAP_FLUSH_EXPLICIT_BIT) - flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; - - if (access & GL_MAP_INVALIDATE_RANGE_BIT) - flags |= PIPE_TRANSFER_DISCARD; - - if (access & GL_MAP_INVALIDATE_BUFFER_BIT) - flags |= PIPE_TRANSFER_DISCARD; - - if (access & GL_MAP_UNSYNCHRONIZED_BIT) - flags |= PIPE_TRANSFER_UNSYNCHRONIZED; - - /* ... other flags ... - */ - - if (access & MESA_MAP_NOWAIT_BIT) - flags |= PIPE_TRANSFER_DONTBLOCK; - - assert(offset >= 0); - assert(length >= 0); - assert(offset < obj->Size); - assert(offset + length <= obj->Size); - - /* - * We go out of way here to hide the degenerate yet valid case of zero - * length range from the pipe driver. - */ - if (!length) { - obj->Pointer = &st_bufferobj_zero_length; - } - else { - obj->Pointer = pipe_buffer_map_range(pipe, - st_obj->buffer, - offset, length, - flags, - &st_obj->transfer); - if (obj->Pointer) { - obj->Pointer = (ubyte *) obj->Pointer + offset; - } - } - - if (obj->Pointer) { - obj->Offset = offset; - obj->Length = length; - obj->AccessFlags = access; - } - - return obj->Pointer; -} - - -static void -st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, - GLintptr offset, GLsizeiptr length, - struct gl_buffer_object *obj) -{ - struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_buffer_object *st_obj = st_buffer_object(obj); - - /* Subrange is relative to mapped range */ - assert(offset >= 0); - assert(length >= 0); - assert(offset + length <= obj->Length); - assert(obj->Pointer); - - if (!length) - return; - - pipe_buffer_flush_mapped_range(pipe, st_obj->transfer, - obj->Offset + offset, length); -} - - -/** - * Called via glUnmapBufferARB(). - */ -static GLboolean -st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) -{ - struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_buffer_object *st_obj = st_buffer_object(obj); - - if (obj->Length) - pipe_buffer_unmap(pipe, st_obj->transfer); - - st_obj->transfer = NULL; - obj->Pointer = NULL; - obj->Offset = 0; - obj->Length = 0; - return GL_TRUE; -} - - -/** - * Called via glCopyBufferSubData(). - */ -static void -st_copy_buffer_subdata(struct gl_context *ctx, - struct gl_buffer_object *src, - struct gl_buffer_object *dst, - GLintptr readOffset, GLintptr writeOffset, - GLsizeiptr size) -{ - struct pipe_context *pipe = st_context(ctx)->pipe; - struct st_buffer_object *srcObj = st_buffer_object(src); - struct st_buffer_object *dstObj = st_buffer_object(dst); - struct pipe_transfer *src_transfer; - struct pipe_transfer *dst_transfer; - ubyte *srcPtr, *dstPtr; - - if(!size) - return; - - /* buffer should not already be mapped */ - assert(!src->Pointer); - assert(!dst->Pointer); - - srcPtr = (ubyte *) pipe_buffer_map_range(pipe, - srcObj->buffer, - readOffset, size, - PIPE_TRANSFER_READ, - &src_transfer); - - dstPtr = (ubyte *) pipe_buffer_map_range(pipe, - dstObj->buffer, - writeOffset, size, - PIPE_TRANSFER_WRITE, - &dst_transfer); - - if (srcPtr && dstPtr) - memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); - - pipe_buffer_unmap(pipe, src_transfer); - pipe_buffer_unmap(pipe, dst_transfer); -} - - -/* TODO: if buffer wasn't created with appropriate usage flags, need - * to recreate it now and copy contents -- or possibly create a - * gallium entrypoint to extend the usage flags and let the driver - * decide if a copy is necessary. - */ -void -st_bufferobj_validate_usage(struct st_context *st, - struct st_buffer_object *obj, - unsigned usage) -{ -} - - -void -st_init_bufferobject_functions(struct dd_function_table *functions) -{ - functions->NewBufferObject = st_bufferobj_alloc; - functions->DeleteBuffer = st_bufferobj_free; - functions->BufferData = st_bufferobj_data; - functions->BufferSubData = st_bufferobj_subdata; - functions->GetBufferSubData = st_bufferobj_get_subdata; - functions->MapBuffer = st_bufferobj_map; - functions->MapBufferRange = st_bufferobj_map_range; - functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; - functions->UnmapBuffer = st_bufferobj_unmap; - functions->CopyBufferSubData = st_copy_buffer_subdata; - - /* For GL_APPLE_vertex_array_object */ - functions->NewArrayObject = _mesa_new_array_object; - functions->DeleteArrayObject = _mesa_delete_array_object; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Functions for pixel buffer objects and vertex/element buffer objects. + */ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/arrayobj.h" +#include "main/bufferobj.h" + +#include "st_context.h" +#include "st_cb_bufferobjects.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target) +{ + struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object); + + if (!st_obj) + return NULL; + + _mesa_initialize_buffer_object(&st_obj->Base, name, target); + + return &st_obj->Base; +} + + + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + assert(obj->RefCount == 0); + assert(st_obj->transfer == NULL); + + if (st_obj->buffer) + pipe_resource_reference(&st_obj->buffer, NULL); + + free(st_obj); +} + + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +st_bufferobj_subdata(struct gl_context *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* we may be called from VBO code, so double-check params here */ + ASSERT(offset >= 0); + ASSERT(size >= 0); + ASSERT(offset + size <= obj->Size); + + if (!size) + return; + + /* + * According to ARB_vertex_buffer_object specification, if data is null, + * then the contents of the buffer object's data store is undefined. We just + * ignore, and leave it unchanged. + */ + if (!data) + return; + + /* Now that transfers are per-context, we don't have to figure out + * flushing here. Usually drivers won't need to flush in this case + * even if the buffer is currently referenced by hardware - they + * just queue the upload as dma rather than mapping the underlying + * buffer directly. + */ + pipe_buffer_write(st_context(ctx)->pipe, + st_obj->buffer, + offset, size, data); +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +st_bufferobj_get_subdata(struct gl_context *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* we may be called from VBO code, so double-check params here */ + ASSERT(offset >= 0); + ASSERT(size >= 0); + ASSERT(offset + size <= obj->Size); + + if (!size) + return; + + pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer, + offset, size, data); +} + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via ctx->Driver.BufferData(). + * \return GL_TRUE for success, GL_FALSE if out of memory + */ +static GLboolean +st_bufferobj_data(struct gl_context *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, + struct gl_buffer_object *obj) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + unsigned bind, pipe_usage; + + st_obj->Base.Size = size; + st_obj->Base.Usage = usage; + + switch(target) { + case GL_PIXEL_PACK_BUFFER_ARB: + case GL_PIXEL_UNPACK_BUFFER_ARB: + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case GL_ARRAY_BUFFER_ARB: + bind = PIPE_BIND_VERTEX_BUFFER; + break; + case GL_ELEMENT_ARRAY_BUFFER_ARB: + bind = PIPE_BIND_INDEX_BUFFER; + break; + default: + bind = 0; + } + + switch (usage) { + case GL_STATIC_DRAW: + case GL_STATIC_READ: + case GL_STATIC_COPY: + pipe_usage = PIPE_USAGE_STATIC; + break; + case GL_DYNAMIC_DRAW: + case GL_DYNAMIC_READ: + case GL_DYNAMIC_COPY: + pipe_usage = PIPE_USAGE_DYNAMIC; + break; + case GL_STREAM_DRAW: + case GL_STREAM_READ: + case GL_STREAM_COPY: + pipe_usage = PIPE_USAGE_STREAM; + break; + default: + pipe_usage = PIPE_USAGE_DEFAULT; + } + + pipe_resource_reference( &st_obj->buffer, NULL ); + + if (size != 0) { + st_obj->buffer = pipe_buffer_create(pipe->screen, bind, + pipe_usage, size); + + if (!st_obj->buffer) { + return GL_FALSE; + } + + if (data) + pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0, + size, data); + return GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Dummy data whose's pointer is used for zero size buffers or ranges. + */ +static long st_bufferobj_zero_length = 0; + + + +/** + * Called via glMapBufferARB(). + */ +static void * +st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, + struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + uint flags; + + switch (access) { + case GL_WRITE_ONLY: + flags = PIPE_TRANSFER_WRITE; + break; + case GL_READ_ONLY: + flags = PIPE_TRANSFER_READ; + break; + case GL_READ_WRITE: + default: + flags = PIPE_TRANSFER_READ_WRITE; + break; + } + + /* Handle zero-size buffers here rather than in drivers */ + if (obj->Size == 0) { + obj->Pointer = &st_bufferobj_zero_length; + } + else { + obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, + st_obj->buffer, + flags, + &st_obj->transfer); + } + + if (obj->Pointer) { + obj->Offset = 0; + obj->Length = obj->Size; + } + return obj->Pointer; +} + + +/** + * Called via glMapBufferRange(). + */ +static void * +st_bufferobj_map_range(struct gl_context *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, GLbitfield access, + struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + enum pipe_transfer_usage flags = 0x0; + + if (access & GL_MAP_WRITE_BIT) + flags |= PIPE_TRANSFER_WRITE; + + if (access & GL_MAP_READ_BIT) + flags |= PIPE_TRANSFER_READ; + + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) + flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; + + if (access & GL_MAP_INVALIDATE_RANGE_BIT) + flags |= PIPE_TRANSFER_DISCARD; + + if (access & GL_MAP_INVALIDATE_BUFFER_BIT) + flags |= PIPE_TRANSFER_DISCARD; + + if (access & GL_MAP_UNSYNCHRONIZED_BIT) + flags |= PIPE_TRANSFER_UNSYNCHRONIZED; + + /* ... other flags ... + */ + + if (access & MESA_MAP_NOWAIT_BIT) + flags |= PIPE_TRANSFER_DONTBLOCK; + + assert(offset >= 0); + assert(length >= 0); + assert(offset < obj->Size); + assert(offset + length <= obj->Size); + + /* + * We go out of way here to hide the degenerate yet valid case of zero + * length range from the pipe driver. + */ + if (!length) { + obj->Pointer = &st_bufferobj_zero_length; + } + else { + obj->Pointer = pipe_buffer_map_range(pipe, + st_obj->buffer, + offset, length, + flags, + &st_obj->transfer); + if (obj->Pointer) { + obj->Pointer = (ubyte *) obj->Pointer + offset; + } + } + + if (obj->Pointer) { + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + } + + return obj->Pointer; +} + + +static void +st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* Subrange is relative to mapped range */ + assert(offset >= 0); + assert(length >= 0); + assert(offset + length <= obj->Length); + assert(obj->Pointer); + + if (!length) + return; + + pipe_buffer_flush_mapped_range(pipe, st_obj->transfer, + obj->Offset + offset, length); +} + + +/** + * Called via glUnmapBufferARB(). + */ +static GLboolean +st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + if (obj->Length) + pipe_buffer_unmap(pipe, st_obj->transfer); + + st_obj->transfer = NULL; + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; +} + + +/** + * Called via glCopyBufferSubData(). + */ +static void +st_copy_buffer_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *srcObj = st_buffer_object(src); + struct st_buffer_object *dstObj = st_buffer_object(dst); + struct pipe_transfer *src_transfer; + struct pipe_transfer *dst_transfer; + ubyte *srcPtr, *dstPtr; + + if(!size) + return; + + /* buffer should not already be mapped */ + assert(!src->Pointer); + assert(!dst->Pointer); + + srcPtr = (ubyte *) pipe_buffer_map_range(pipe, + srcObj->buffer, + readOffset, size, + PIPE_TRANSFER_READ, + &src_transfer); + + dstPtr = (ubyte *) pipe_buffer_map_range(pipe, + dstObj->buffer, + writeOffset, size, + PIPE_TRANSFER_WRITE, + &dst_transfer); + + if (srcPtr && dstPtr) + memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); + + pipe_buffer_unmap(pipe, src_transfer); + pipe_buffer_unmap(pipe, dst_transfer); +} + + +/* TODO: if buffer wasn't created with appropriate usage flags, need + * to recreate it now and copy contents -- or possibly create a + * gallium entrypoint to extend the usage flags and let the driver + * decide if a copy is necessary. + */ +void +st_bufferobj_validate_usage(struct st_context *st, + struct st_buffer_object *obj, + unsigned usage) +{ +} + + +void +st_init_bufferobject_functions(struct dd_function_table *functions) +{ + functions->NewBufferObject = st_bufferobj_alloc; + functions->DeleteBuffer = st_bufferobj_free; + functions->BufferData = st_bufferobj_data; + functions->BufferSubData = st_bufferobj_subdata; + functions->GetBufferSubData = st_bufferobj_get_subdata; + functions->MapBuffer = st_bufferobj_map; + functions->MapBufferRange = st_bufferobj_map_range; + functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; + functions->UnmapBuffer = st_bufferobj_unmap; + functions->CopyBufferSubData = st_copy_buffer_subdata; + + /* For GL_APPLE_vertex_array_object */ + functions->NewArrayObject = _mesa_new_array_object; + functions->DeleteArrayObject = _mesa_delete_array_object; +} diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index 3e27be271..d2e0cd73c 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -1,559 +1,563 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Brian Paul - * Michel Dänzer - */ - -#include "main/glheader.h" -#include "main/formats.h" -#include "main/macros.h" -#include "program/prog_instruction.h" -#include "st_context.h" -#include "st_atom.h" -#include "st_cb_accum.h" -#include "st_cb_clear.h" -#include "st_cb_fbo.h" -#include "st_program.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_format.h" -#include "util/u_inlines.h" -#include "util/u_simple_shaders.h" -#include "util/u_draw_quad.h" - -#include "cso_cache/cso_context.h" - - -/** - * Do per-context initialization for glClear. - */ -void -st_init_clear(struct st_context *st) -{ - struct pipe_context *pipe = st->pipe; - struct pipe_screen *pscreen = st->pipe->screen; - - memset(&st->clear, 0, sizeof(st->clear)); - - st->clear.raster.gl_rasterization_rules = 1; - st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); - - /* fragment shader state: color pass-through program */ - st->clear.fs = util_make_fragment_passthrough_shader(pipe); - - /* vertex shader state: color/position pass-through */ - { - const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, - TGSI_SEMANTIC_COLOR }; - const uint semantic_indexes[] = { 0, 0 }; - st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, - semantic_names, - semantic_indexes); - } -} - - -/** - * Free per-context state for glClear. - */ -void -st_destroy_clear(struct st_context *st) -{ - if (st->clear.fs) { - cso_delete_fragment_shader(st->cso_context, st->clear.fs); - st->clear.fs = NULL; - } - if (st->clear.vs) { - cso_delete_vertex_shader(st->cso_context, st->clear.vs); - st->clear.vs = NULL; - } - if (st->clear.vbuf) { - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf = NULL; - } -} - - -/** - * Draw a screen-aligned quadrilateral. - * Coords are clip coords with y=0=bottom. - */ -static void -draw_quad(struct st_context *st, - float x0, float y0, float x1, float y1, GLfloat z, - const GLfloat color[4]) -{ - struct pipe_context *pipe = st->pipe; - - /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as - * no_flush) updates to buffers where we know there is no conflict - * with previous data. Currently using max_slots > 1 will cause - * synchronous rendering if the driver flushes its command buffers - * between one bitmap and the next. Our flush hook below isn't - * sufficient to catch this as the driver doesn't tell us when it - * flushes its own command buffers. Until this gets fixed, pay the - * price of allocating a new buffer for each bitmap cache-flush to - * avoid synchronous rendering. - */ - const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ - GLuint i; - - if (st->clear.vbuf_slot >= max_slots) { - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf_slot = 0; - } - - if (!st->clear.vbuf) { - st->clear.vbuf = pipe_buffer_create(pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - max_slots * sizeof(st->clear.vertices)); - } - - /* positions */ - st->clear.vertices[0][0][0] = x0; - st->clear.vertices[0][0][1] = y0; - - st->clear.vertices[1][0][0] = x1; - st->clear.vertices[1][0][1] = y0; - - st->clear.vertices[2][0][0] = x1; - st->clear.vertices[2][0][1] = y1; - - st->clear.vertices[3][0][0] = x0; - st->clear.vertices[3][0][1] = y1; - - /* same for all verts: */ - for (i = 0; i < 4; i++) { - st->clear.vertices[i][0][2] = z; - st->clear.vertices[i][0][3] = 1.0; - st->clear.vertices[i][1][0] = color[0]; - st->clear.vertices[i][1][1] = color[1]; - st->clear.vertices[i][1][2] = color[2]; - st->clear.vertices[i][1][3] = color[3]; - } - - /* put vertex data into vbuf */ - pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, - st->clear.vbuf_slot - * sizeof(st->clear.vertices), - sizeof(st->clear.vertices), - st->clear.vertices); - - /* draw */ - util_draw_vertex_buffer(pipe, - st->clear.vbuf, - st->clear.vbuf_slot * sizeof(st->clear.vertices), - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - 2); /* attribs/vert */ - - /* Increment slot */ - st->clear.vbuf_slot++; -} - - - -/** - * Do glClear by drawing a quadrilateral. - * The vertices of the quad will be computed from the - * ctx->DrawBuffer->_X/Ymin/max fields. - */ -static void -clear_with_quad(struct gl_context *ctx, - GLboolean color, GLboolean depth, GLboolean stencil) -{ - struct st_context *st = st_context(ctx); - const struct gl_framebuffer *fb = ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat) fb->Width; - const GLfloat fb_height = (GLfloat) fb->Height; - const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; - const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; - const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; - const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; - - /* - printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, - color ? "color, " : "", - depth ? "depth, " : "", - stencil ? "stencil" : "", - x0, y0, - x1, y1); - */ - - cso_save_blend(st->cso_context); - cso_save_stencil_ref(st->cso_context); - cso_save_depth_stencil_alpha(st->cso_context); - cso_save_rasterizer(st->cso_context); - cso_save_viewport(st->cso_context); - cso_save_clip(st->cso_context); - cso_save_fragment_shader(st->cso_context); - cso_save_vertex_shader(st->cso_context); - cso_save_vertex_elements(st->cso_context); - - /* blend state: RGBA masking */ - { - struct pipe_blend_state blend; - memset(&blend, 0, sizeof(blend)); - blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - if (color) { - if (ctx->Color.ColorMask[0][0]) - blend.rt[0].colormask |= PIPE_MASK_R; - if (ctx->Color.ColorMask[0][1]) - blend.rt[0].colormask |= PIPE_MASK_G; - if (ctx->Color.ColorMask[0][2]) - blend.rt[0].colormask |= PIPE_MASK_B; - if (ctx->Color.ColorMask[0][3]) - blend.rt[0].colormask |= PIPE_MASK_A; - if (st->ctx->Color.DitherFlag) - blend.dither = 1; - } - cso_set_blend(st->cso_context, &blend); - } - - /* depth_stencil state: always pass/set to ref value */ - { - struct pipe_depth_stencil_alpha_state depth_stencil; - memset(&depth_stencil, 0, sizeof(depth_stencil)); - if (depth) { - depth_stencil.depth.enabled = 1; - depth_stencil.depth.writemask = 1; - depth_stencil.depth.func = PIPE_FUNC_ALWAYS; - } - - if (stencil) { - struct pipe_stencil_ref stencil_ref; - memset(&stencil_ref, 0, sizeof(stencil_ref)); - depth_stencil.stencil[0].enabled = 1; - depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; - depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; - depth_stencil.stencil[0].valuemask = 0xff; - depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; - stencil_ref.ref_value[0] = ctx->Stencil.Clear; - cso_set_stencil_ref(st->cso_context, &stencil_ref); - } - - cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); - } - - cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); - - cso_set_rasterizer(st->cso_context, &st->clear.raster); - - /* viewport state: viewport matching window dims */ - { - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * fb_width; - vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 1.0f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * fb_width; - vp.translate[1] = 0.5f * fb_height; - vp.translate[2] = 0.0f; - vp.translate[3] = 0.0f; - cso_set_viewport(st->cso_context, &vp); - } - - cso_set_clip(st->cso_context, &st->clear.clip); - cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); - cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); - - /* draw quad matching scissor rect (XXX verify coord round-off) */ - draw_quad(st, x0, y0, x1, y1, - (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor); - - /* Restore pipe state */ - cso_restore_blend(st->cso_context); - cso_restore_stencil_ref(st->cso_context); - cso_restore_depth_stencil_alpha(st->cso_context); - cso_restore_rasterizer(st->cso_context); - cso_restore_viewport(st->cso_context); - cso_restore_clip(st->cso_context); - cso_restore_fragment_shader(st->cso_context); - cso_restore_vertex_shader(st->cso_context); - cso_restore_vertex_elements(st->cso_context); -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (!ctx->Color.ColorMask[0][0] || - !ctx->Color.ColorMask[0][1] || - !ctx->Color.ColorMask[0][2] || - !ctx->Color.ColorMask[0][3]) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the combiend depth/stencil buffer by - * drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ - const GLuint stencilMax = 0xff; - GLboolean maskStencil - = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - - assert(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (maskStencil) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, - boolean ds_separate) -{ - const struct st_renderbuffer *strb = st_renderbuffer(rb); - const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) - return GL_TRUE; - - return GL_FALSE; -} - - -/** - * Determine if we need to clear the stencil buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, - boolean ds_separate) -{ - const struct st_renderbuffer *strb = st_renderbuffer(rb); - const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); - const GLuint stencilMax = 0xff; - const GLboolean maskStencil - = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - - assert(rb->Format == MESA_FORMAT_S8 || - rb->Format == MESA_FORMAT_Z24_S8 || - rb->Format == MESA_FORMAT_S8_Z24); - - if (maskStencil) - return GL_TRUE; - - if (ctx->Scissor.Enabled && - (ctx->Scissor.X != 0 || - ctx->Scissor.Y != 0 || - ctx->Scissor.Width < rb->Width || - ctx->Scissor.Height < rb->Height)) - return GL_TRUE; - - /* This is correct, but it is necessary to look at the depth clear - * value held in the surface when it comes time to issue the clear, - * rather than taking depth and stencil clear values from the - * current state. - */ - if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) - return GL_TRUE; - - return GL_FALSE; -} - - - -/** - * Called when we need to flush. - */ -void -st_flush_clear(struct st_context *st) -{ - /* Release vertex buffer to avoid synchronous rendering if we were - * to map it in the next frame. - */ - pipe_resource_reference(&st->clear.vbuf, NULL); - st->clear.vbuf_slot = 0; -} - - - -/** - * Called via ctx->Driver.Clear() - */ -static void -st_Clear(struct gl_context *ctx, GLbitfield mask) -{ - static const GLbitfield BUFFER_BITS_DS - = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); - struct st_context *st = st_context(ctx); - struct gl_renderbuffer *depthRb - = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; - struct gl_renderbuffer *stencilRb - = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; - GLbitfield quad_buffers = 0x0; - GLbitfield clear_buffers = 0x0; - GLuint i; - - /* This makes sure the pipe has the latest scissor, etc values */ - st_validate_state( st ); - - if (mask & BUFFER_BITS_COLOR) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; - - if (mask & (1 << b)) { - struct gl_renderbuffer *rb - = ctx->DrawBuffer->Attachment[b].Renderbuffer; - struct st_renderbuffer *strb = st_renderbuffer(rb); - - if (!strb || !strb->surface) - continue; - - if (check_clear_color_with_quad( ctx, rb )) - quad_buffers |= PIPE_CLEAR_COLOR; - else - clear_buffers |= PIPE_CLEAR_COLOR; - } - } - } - - if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { - /* clearing combined depth + stencil */ - struct st_renderbuffer *strb = st_renderbuffer(depthRb); - - if (strb->surface) { - if (check_clear_depth_stencil_with_quad(ctx, depthRb)) - quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - else - clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - } - } - else { - /* separate depth/stencil clears */ - /* I don't think truly separate buffers are actually possible in gallium or hw? */ - if (mask & BUFFER_BIT_DEPTH) { - struct st_renderbuffer *strb = st_renderbuffer(depthRb); - - if (strb->surface) { - if (check_clear_depth_with_quad(ctx, depthRb, - st->clear.enable_ds_separate)) - quad_buffers |= PIPE_CLEAR_DEPTH; - else - clear_buffers |= PIPE_CLEAR_DEPTH; - } - } - if (mask & BUFFER_BIT_STENCIL) { - struct st_renderbuffer *strb = st_renderbuffer(stencilRb); - - if (strb->surface) { - if (check_clear_stencil_with_quad(ctx, stencilRb, - st->clear.enable_ds_separate)) - quad_buffers |= PIPE_CLEAR_STENCIL; - else - clear_buffers |= PIPE_CLEAR_STENCIL; - } - } - } - - /* - * If we're going to use clear_with_quad() for any reason, use it for - * everything possible. - */ - if (quad_buffers) { - quad_buffers |= clear_buffers; - clear_with_quad(ctx, - quad_buffers & PIPE_CLEAR_COLOR, - quad_buffers & PIPE_CLEAR_DEPTH, - quad_buffers & PIPE_CLEAR_STENCIL); - } else if (clear_buffers) { - /* driver cannot know it can clear everything if the buffer - * is a combined depth/stencil buffer but this wasn't actually - * required from the visual. Hence fix this up to avoid potential - * read-modify-write in the driver. - */ - if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && - ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && - (depthRb == stencilRb) && - (ctx->DrawBuffer->Visual.depthBits == 0 || - ctx->DrawBuffer->Visual.stencilBits == 0)) - clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, - ctx->Depth.Clear, ctx->Stencil.Clear); - } - if (mask & BUFFER_BIT_ACCUM) - st_clear_accum_buffer(ctx, - ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); -} - - -void -st_init_clear_functions(struct dd_function_table *functions) -{ - functions->Clear = st_Clear; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + * Michel Dänzer + */ + +#include "main/glheader.h" +#include "main/formats.h" +#include "main/macros.h" +#include "program/prog_instruction.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_accum.h" +#include "st_cb_clear.h" +#include "st_cb_fbo.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" +#include "util/u_draw_quad.h" + +#include "cso_cache/cso_context.h" + + +/** + * Do per-context initialization for glClear. + */ +void +st_init_clear(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *pscreen = st->pipe->screen; + + memset(&st->clear, 0, sizeof(st->clear)); + + st->clear.raster.gl_rasterization_rules = 1; + st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); + + /* fragment shader state: color pass-through program */ + st->clear.fs = util_make_fragment_passthrough_shader(pipe); + + /* vertex shader state: color/position pass-through */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR }; + const uint semantic_indexes[] = { 0, 0 }; + st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, + semantic_names, + semantic_indexes); + } +} + + +/** + * Free per-context state for glClear. + */ +void +st_destroy_clear(struct st_context *st) +{ + if (st->clear.fs) { + cso_delete_fragment_shader(st->cso_context, st->clear.fs); + st->clear.fs = NULL; + } + if (st->clear.vs) { + cso_delete_vertex_shader(st->cso_context, st->clear.vs); + st->clear.vs = NULL; + } + if (st->clear.vbuf) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf = NULL; + } +} + + +/** + * Draw a screen-aligned quadrilateral. + * Coords are clip coords with y=0=bottom. + */ +static void +draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, GLfloat z, + const GLfloat color[4]) +{ + struct pipe_context *pipe = st->pipe; + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ + GLuint i; + + if (st->clear.vbuf_slot >= max_slots) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; + } + + if (!st->clear.vbuf) { + st->clear.vbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + max_slots * sizeof(st->clear.vertices)); + } + + /* positions */ + st->clear.vertices[0][0][0] = x0; + st->clear.vertices[0][0][1] = y0; + + st->clear.vertices[1][0][0] = x1; + st->clear.vertices[1][0][1] = y0; + + st->clear.vertices[2][0][0] = x1; + st->clear.vertices[2][0][1] = y1; + + st->clear.vertices[3][0][0] = x0; + st->clear.vertices[3][0][1] = y1; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->clear.vertices[i][0][2] = z; + st->clear.vertices[i][0][3] = 1.0; + st->clear.vertices[i][1][0] = color[0]; + st->clear.vertices[i][1][1] = color[1]; + st->clear.vertices[i][1][2] = color[2]; + st->clear.vertices[i][1][3] = color[3]; + } + + /* put vertex data into vbuf */ + pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, + st->clear.vbuf_slot + * sizeof(st->clear.vertices), + sizeof(st->clear.vertices), + st->clear.vertices); + + /* draw */ + util_draw_vertex_buffer(pipe, + st->cso_context, + st->clear.vbuf, + st->clear.vbuf_slot * sizeof(st->clear.vertices), + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* Increment slot */ + st->clear.vbuf_slot++; +} + + + +/** + * Do glClear by drawing a quadrilateral. + * The vertices of the quad will be computed from the + * ctx->DrawBuffer->_X/Ymin/max fields. + */ +static void +clear_with_quad(struct gl_context *ctx, + GLboolean color, GLboolean depth, GLboolean stencil) +{ + struct st_context *st = st_context(ctx); + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; + const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; + const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; + + /* + printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, + color ? "color, " : "", + depth ? "depth, " : "", + stencil ? "stencil" : "", + x0, y0, + x1, y1); + */ + + cso_save_blend(st->cso_context); + cso_save_stencil_ref(st->cso_context); + cso_save_depth_stencil_alpha(st->cso_context); + cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + cso_save_clip(st->cso_context); + cso_save_fragment_shader(st->cso_context); + cso_save_vertex_shader(st->cso_context); + cso_save_vertex_elements(st->cso_context); + cso_save_vertex_buffers(st->cso_context); + + /* blend state: RGBA masking */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + if (color) { + if (ctx->Color.ColorMask[0][0]) + blend.rt[0].colormask |= PIPE_MASK_R; + if (ctx->Color.ColorMask[0][1]) + blend.rt[0].colormask |= PIPE_MASK_G; + if (ctx->Color.ColorMask[0][2]) + blend.rt[0].colormask |= PIPE_MASK_B; + if (ctx->Color.ColorMask[0][3]) + blend.rt[0].colormask |= PIPE_MASK_A; + if (st->ctx->Color.DitherFlag) + blend.dither = 1; + } + cso_set_blend(st->cso_context, &blend); + } + + /* depth_stencil state: always pass/set to ref value */ + { + struct pipe_depth_stencil_alpha_state depth_stencil; + memset(&depth_stencil, 0, sizeof(depth_stencil)); + if (depth) { + depth_stencil.depth.enabled = 1; + depth_stencil.depth.writemask = 1; + depth_stencil.depth.func = PIPE_FUNC_ALWAYS; + } + + if (stencil) { + struct pipe_stencil_ref stencil_ref; + memset(&stencil_ref, 0, sizeof(stencil_ref)); + depth_stencil.stencil[0].enabled = 1; + depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; + depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].valuemask = 0xff; + depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; + stencil_ref.ref_value[0] = ctx->Stencil.Clear; + cso_set_stencil_ref(st->cso_context, &stencil_ref); + } + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + } + + cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); + + cso_set_rasterizer(st->cso_context, &st->clear.raster); + + /* viewport state: viewport matching window dims */ + { + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * fb_width; + vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb_width; + vp.translate[1] = 0.5f * fb_height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(st->cso_context, &vp); + } + + cso_set_clip(st->cso_context, &st->clear.clip); + cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); + cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); + + /* draw quad matching scissor rect (XXX verify coord round-off) */ + draw_quad(st, x0, y0, x1, y1, + (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor); + + /* Restore pipe state */ + cso_restore_blend(st->cso_context); + cso_restore_stencil_ref(st->cso_context); + cso_restore_depth_stencil_alpha(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_clip(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_vertex_shader(st->cso_context); + cso_restore_vertex_elements(st->cso_context); + cso_restore_vertex_buffers(st->cso_context); +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3]) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the combiend depth/stencil buffer by + * drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + const GLuint stencilMax = 0xff; + GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (maskStencil) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the stencil buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + const GLuint stencilMax = 0xff; + const GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (maskStencil) + return GL_TRUE; + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + /* This is correct, but it is necessary to look at the depth clear + * value held in the surface when it comes time to issue the clear, + * rather than taking depth and stencil clear values from the + * current state. + */ + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + + +/** + * Called when we need to flush. + */ +void +st_flush_clear(struct st_context *st) +{ + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; +} + + + +/** + * Called via ctx->Driver.Clear() + */ +static void +st_Clear(struct gl_context *ctx, GLbitfield mask) +{ + static const GLbitfield BUFFER_BITS_DS + = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + struct st_context *st = st_context(ctx); + struct gl_renderbuffer *depthRb + = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + struct gl_renderbuffer *stencilRb + = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + GLbitfield quad_buffers = 0x0; + GLbitfield clear_buffers = 0x0; + GLuint i; + + /* This makes sure the pipe has the latest scissor, etc values */ + st_validate_state( st ); + + if (mask & BUFFER_BITS_COLOR) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; + + if (mask & (1 << b)) { + struct gl_renderbuffer *rb + = ctx->DrawBuffer->Attachment[b].Renderbuffer; + struct st_renderbuffer *strb = st_renderbuffer(rb); + + if (!strb || !strb->surface) + continue; + + if (check_clear_color_with_quad( ctx, rb )) + quad_buffers |= PIPE_CLEAR_COLOR; + else + clear_buffers |= PIPE_CLEAR_COLOR; + } + } + } + + if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { + /* clearing combined depth + stencil */ + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_stencil_with_quad(ctx, depthRb)) + quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + else + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + } + } + else { + /* separate depth/stencil clears */ + /* I don't think truly separate buffers are actually possible in gallium or hw? */ + if (mask & BUFFER_BIT_DEPTH) { + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_with_quad(ctx, depthRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_DEPTH; + else + clear_buffers |= PIPE_CLEAR_DEPTH; + } + } + if (mask & BUFFER_BIT_STENCIL) { + struct st_renderbuffer *strb = st_renderbuffer(stencilRb); + + if (strb->surface) { + if (check_clear_stencil_with_quad(ctx, stencilRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_STENCIL; + else + clear_buffers |= PIPE_CLEAR_STENCIL; + } + } + } + + /* + * If we're going to use clear_with_quad() for any reason, use it for + * everything possible. + */ + if (quad_buffers) { + quad_buffers |= clear_buffers; + clear_with_quad(ctx, + quad_buffers & PIPE_CLEAR_COLOR, + quad_buffers & PIPE_CLEAR_DEPTH, + quad_buffers & PIPE_CLEAR_STENCIL); + } else if (clear_buffers) { + /* driver cannot know it can clear everything if the buffer + * is a combined depth/stencil buffer but this wasn't actually + * required from the visual. Hence fix this up to avoid potential + * read-modify-write in the driver. + */ + if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + (depthRb == stencilRb) && + (ctx->DrawBuffer->Visual.depthBits == 0 || + ctx->DrawBuffer->Visual.stencilBits == 0)) + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, + ctx->Depth.Clear, ctx->Stencil.Clear); + } + if (mask & BUFFER_BIT_ACCUM) + st_clear_accum_buffer(ctx, + ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); +} + + +void +st_init_clear_functions(struct dd_function_table *functions) +{ + functions->Clear = st_Clear; +} diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index 56c7e8581..07527002b 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1,1368 +1,1371 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Brian Paul - */ - -#include "main/imports.h" -#include "main/image.h" -#include "main/bufferobj.h" -#include "main/macros.h" -#include "main/mfeatures.h" -#include "main/mtypes.h" -#include "main/pack.h" -#include "main/texformat.h" -#include "main/texstore.h" -#include "program/program.h" -#include "program/prog_print.h" -#include "program/prog_instruction.h" - -#include "st_atom.h" -#include "st_atom_constbuf.h" -#include "st_cb_drawpixels.h" -#include "st_cb_readpixels.h" -#include "st_cb_fbo.h" -#include "st_context.h" -#include "st_debug.h" -#include "st_format.h" -#include "st_program.h" -#include "st_texture.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "tgsi/tgsi_ureg.h" -#include "util/u_draw_quad.h" -#include "util/u_format.h" -#include "util/u_inlines.h" -#include "util/u_math.h" -#include "util/u_tile.h" -#include "cso_cache/cso_context.h" - - -#if FEATURE_drawpix - -/** - * Check if the given program is: - * 0: MOVE result.color, fragment.color; - * 1: END; - */ -static GLboolean -is_passthrough_program(const struct gl_fragment_program *prog) -{ - if (prog->Base.NumInstructions == 2) { - const struct prog_instruction *inst = prog->Base.Instructions; - if (inst[0].Opcode == OPCODE_MOV && - inst[1].Opcode == OPCODE_END && - inst[0].DstReg.File == PROGRAM_OUTPUT && - inst[0].DstReg.Index == FRAG_RESULT_COLOR && - inst[0].DstReg.WriteMask == WRITEMASK_XYZW && - inst[0].SrcReg[0].File == PROGRAM_INPUT && - inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 && - inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) { - return GL_TRUE; - } - } - return GL_FALSE; -} - - - -/** - * Make fragment shader for glDraw/CopyPixels. This shader is made - * by combining the pixel transfer shader with the user-defined shader. - * \param fpIn the current/incoming fragment program - * \param fpOut returns the combined fragment program - */ -void -st_make_drawpix_fragment_program(struct st_context *st, - struct gl_fragment_program *fpIn, - struct gl_fragment_program **fpOut) -{ - struct gl_program *newProg; - - if (is_passthrough_program(fpIn)) { - newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, - &st->pixel_xfer.program->Base); - } - else { -#if 0 - /* debug */ - printf("Base program:\n"); - _mesa_print_program(&fpIn->Base); - printf("DrawPix program:\n"); - _mesa_print_program(&st->pixel_xfer.program->Base.Base); -#endif - newProg = _mesa_combine_programs(st->ctx, - &st->pixel_xfer.program->Base.Base, - &fpIn->Base); - } - -#if 0 - /* debug */ - printf("Combined DrawPixels program:\n"); - _mesa_print_program(newProg); - printf("InputsRead: 0x%x\n", newProg->InputsRead); - printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); - _mesa_print_parameter_list(newProg->Parameters); -#endif - - *fpOut = (struct gl_fragment_program *) newProg; -} - - -/** - * Create fragment program that does a TEX() instruction to get a Z and/or - * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. - * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). - * Pass fragment color through as-is. - * \return pointer to the gl_fragment program - */ -struct gl_fragment_program * -st_make_drawpix_z_stencil_program(struct st_context *st, - GLboolean write_depth, - GLboolean write_stencil) -{ - struct gl_context *ctx = st->ctx; - struct gl_program *p; - struct gl_fragment_program *fp; - GLuint ic = 0; - const GLuint shaderIndex = write_depth * 2 + write_stencil; - - assert(shaderIndex < Elements(st->drawpix.shaders)); - - if (st->drawpix.shaders[shaderIndex]) { - /* already have the proper shader */ - return st->drawpix.shaders[shaderIndex]; - } - - /* - * Create shader now - */ - p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); - if (!p) - return NULL; - - p->NumInstructions = write_depth ? 2 : 1; - p->NumInstructions += write_stencil ? 1 : 0; - - p->Instructions = _mesa_alloc_instructions(p->NumInstructions); - if (!p->Instructions) { - ctx->Driver.DeleteProgram(ctx, p); - return NULL; - } - _mesa_init_instructions(p->Instructions, p->NumInstructions); - - if (write_depth) { - /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH; - p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; - p->Instructions[ic].TexSrcUnit = 0; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - } - - if (write_stencil) { - /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */ - p->Instructions[ic].Opcode = OPCODE_TEX; - p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; - p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL; - p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y; - p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; - p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; - p->Instructions[ic].TexSrcUnit = 1; - p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; - ic++; - } - - /* END; */ - p->Instructions[ic++].Opcode = OPCODE_END; - - assert(ic == p->NumInstructions); - - p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0; - p->OutputsWritten = 0; - if (write_depth) - p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH); - if (write_stencil) - p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL); - - p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ - if (write_stencil) - p->SamplersUsed |= 1 << 1; - - fp = (struct gl_fragment_program *) p; - - /* save the new shader */ - st->drawpix.shaders[shaderIndex] = fp; - - return fp; -} - - -/** - * Create a simple vertex shader that just passes through the - * vertex position and texcoord (and optionally, color). - */ -static void * -make_passthrough_vertex_shader(struct st_context *st, - GLboolean passColor) -{ - if (!st->drawpix.vert_shaders[passColor]) { - struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); - - if (ureg == NULL) - return NULL; - - /* MOV result.pos, vertex.pos; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), - ureg_DECL_vs_input( ureg, 0 )); - - /* MOV result.texcoord0, vertex.attr[1]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ), - ureg_DECL_vs_input( ureg, 1 )); - - if (passColor) { - /* MOV result.color0, vertex.attr[2]; */ - ureg_MOV(ureg, - ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), - ureg_DECL_vs_input( ureg, 2 )); - } - - ureg_END( ureg ); - - st->drawpix.vert_shaders[passColor] = - ureg_create_shader_and_destroy( ureg, st->pipe ); - } - - return st->drawpix.vert_shaders[passColor]; -} - - -/** - * Return a texture base format for drawing/copying an image - * of the given format. - */ -static GLenum -base_format(GLenum format) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - return GL_RGBA; - } -} - - -/** - * Return a texture internalFormat for drawing/copying an image - * of the given format and type. - */ -static GLenum -internal_format(GLenum format, GLenum type) -{ - switch (format) { - case GL_DEPTH_COMPONENT: - return GL_DEPTH_COMPONENT; - case GL_DEPTH_STENCIL: - return GL_DEPTH_STENCIL; - case GL_STENCIL_INDEX: - return GL_STENCIL_INDEX; - default: - if (_mesa_is_integer_format(format)) { - switch (type) { - case GL_BYTE: - return GL_RGBA8I; - case GL_UNSIGNED_BYTE: - return GL_RGBA8UI; - case GL_SHORT: - return GL_RGBA16I; - case GL_UNSIGNED_SHORT: - return GL_RGBA16UI; - case GL_INT: - return GL_RGBA32I; - case GL_UNSIGNED_INT: - return GL_RGBA32UI; - default: - assert(0 && "Unexpected type in internal_format()"); - return GL_RGBA_INTEGER; - } - } - else { - return GL_RGBA; - } - } -} - - -/** - * Create a temporary texture to hold an image of the given size. - * If width, height are not POT and the driver only handles POT textures, - * allocate the next larger size of texture that is POT. - */ -static struct pipe_resource * -alloc_texture(struct st_context *st, GLsizei width, GLsizei height, - enum pipe_format texFormat) -{ - struct pipe_resource *pt; - - pt = st_texture_create(st, st->internal_target, texFormat, 0, - width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW); - - return pt; -} - - -/** - * Make texture containing an image for glDrawPixels image. - * If 'pixels' is NULL, leave the texture image data undefined. - */ -static struct pipe_resource * -make_texture(struct st_context *st, - GLsizei width, GLsizei height, GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) -{ - struct gl_context *ctx = st->ctx; - struct pipe_context *pipe = st->pipe; - gl_format mformat; - struct pipe_resource *pt; - enum pipe_format pipeFormat; - GLuint cpp; - GLenum baseFormat, intFormat; - - baseFormat = base_format(format); - intFormat = internal_format(format, type); - - mformat = st_ChooseTextureFormat_renderable(ctx, intFormat, - format, type, GL_FALSE); - assert(mformat); - - pipeFormat = st_mesa_format_to_pipe_format(mformat); - assert(pipeFormat); - cpp = util_format_get_blocksize(pipeFormat); - - pixels = _mesa_map_pbo_source(ctx, unpack, pixels); - if (!pixels) - return NULL; - - /* alloc temporary texture */ - pt = alloc_texture(st, width, height, pipeFormat); - if (!pt) { - _mesa_unmap_pbo_source(ctx, unpack); - return NULL; - } - - { - struct pipe_transfer *transfer; - static const GLuint dstImageOffsets = 0; - GLboolean success; - GLubyte *dest; - const GLbitfield imageTransferStateSave = ctx->_ImageTransferState; - - /* we'll do pixel transfer in a fragment shader */ - ctx->_ImageTransferState = 0x0; - - transfer = pipe_get_transfer(st->pipe, pt, 0, 0, - PIPE_TRANSFER_WRITE, 0, 0, - width, height); - - /* map texture transfer */ - dest = pipe_transfer_map(pipe, transfer); - - - /* Put image into texture transfer. - * Note that the image is actually going to be upside down in - * the texture. We deal with that with texcoords. - */ - success = _mesa_texstore(ctx, 2, /* dims */ - baseFormat, /* baseInternalFormat */ - mformat, /* gl_format */ - dest, /* dest */ - 0, 0, 0, /* dstX/Y/Zoffset */ - transfer->stride, /* dstRowStride, bytes */ - &dstImageOffsets, /* dstImageOffsets */ - width, height, 1, /* size */ - format, type, /* src format/type */ - pixels, /* data source */ - unpack); - - /* unmap */ - pipe_transfer_unmap(pipe, transfer); - pipe->transfer_destroy(pipe, transfer); - - assert(success); - - /* restore */ - ctx->_ImageTransferState = imageTransferStateSave; - } - - _mesa_unmap_pbo_source(ctx, unpack); - - return pt; -} - - -/** - * Draw quad with texcoords and optional color. - * Coords are gallium window coords with y=0=top. - * \param color may be null - * \param invertTex if true, flip texcoords vertically - */ -static void -draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, - GLfloat x1, GLfloat y1, const GLfloat *color, - GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ - - /* setup vertex data */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat) fb->Width; - const GLfloat fb_height = (GLfloat) fb->Height; - const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; - const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; - const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; - const GLfloat sLeft = 0.0f, sRight = maxXcoord; - const GLfloat tTop = invertTex ? maxYcoord : 0.0f; - const GLfloat tBot = invertTex ? 0.0f : maxYcoord; - GLuint i; - - /* upper-left */ - verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ - verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ - - /* upper-right */ - verts[1][0][0] = clip_x1; - verts[1][0][1] = clip_y0; - - /* lower-right */ - verts[2][0][0] = clip_x1; - verts[2][0][1] = clip_y1; - - /* lower-left */ - verts[3][0][0] = clip_x0; - verts[3][0][1] = clip_y1; - - verts[0][1][0] = sLeft; /* v[0].attr[1].S */ - verts[0][1][1] = tTop; /* v[0].attr[1].T */ - verts[1][1][0] = sRight; - verts[1][1][1] = tTop; - verts[2][1][0] = sRight; - verts[2][1][1] = tBot; - verts[3][1][0] = sLeft; - verts[3][1][1] = tBot; - - /* same for all verts: */ - if (color) { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /* v[i].attr[0].z */ - verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ - verts[i][2][0] = color[0]; /* v[i].attr[2].r */ - verts[i][2][1] = color[1]; /* v[i].attr[2].g */ - verts[i][2][2] = color[2]; /* v[i].attr[2].b */ - verts[i][2][3] = color[3]; /* v[i].attr[2].a */ - verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ - verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ - } - } - else { - for (i = 0; i < 4; i++) { - verts[i][0][2] = z; /*Z*/ - verts[i][0][3] = 1.0f; /*W*/ - verts[i][1][2] = 0.0f; /*R*/ - verts[i][1][3] = 1.0f; /*Q*/ - } - } - } - - { - struct pipe_resource *buf; - - /* allocate/load buffer object with vertex data */ - buf = pipe_buffer_create(pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - sizeof(verts)); - pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts); - - util_draw_vertex_buffer(pipe, buf, 0, - PIPE_PRIM_QUADS, - 4, /* verts */ - 3); /* attribs/vert */ - pipe_resource_reference(&buf, NULL); - } -} - - - -static void -draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, - GLsizei width, GLsizei height, - GLfloat zoomX, GLfloat zoomY, - struct pipe_sampler_view **sv, - int num_sampler_view, - void *driver_vp, - void *driver_fp, - const GLfloat *color, - GLboolean invertTex, - GLboolean write_depth, GLboolean write_stencil) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct cso_context *cso = st->cso_context; - GLfloat x0, y0, x1, y1; - GLsizei maxSize; - boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT; - - /* limit checks */ - /* XXX if DrawPixels image is larger than max texture size, break - * it up into chunks. - */ - maxSize = 1 << (pipe->screen->get_param(pipe->screen, - PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); - assert(width <= maxSize); - assert(height <= maxSize); - - cso_save_rasterizer(cso); - cso_save_viewport(cso); - cso_save_samplers(cso); - cso_save_fragment_sampler_views(cso); - cso_save_fragment_shader(cso); - cso_save_vertex_shader(cso); - cso_save_vertex_elements(cso); - if (write_stencil) { - cso_save_depth_stencil_alpha(cso); - cso_save_blend(cso); - } - - /* rasterizer state: just scissor */ - { - struct pipe_rasterizer_state rasterizer; - memset(&rasterizer, 0, sizeof(rasterizer)); - rasterizer.gl_rasterization_rules = 1; - rasterizer.scissor = ctx->Scissor.Enabled; - cso_set_rasterizer(cso, &rasterizer); - } - - if (write_stencil) { - /* Stencil writing bypasses the normal fragment pipeline to - * disable color writing and set stencil test to always pass. - */ - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - - /* depth/stencil */ - memset(&dsa, 0, sizeof(dsa)); - dsa.stencil[0].enabled = 1; - dsa.stencil[0].func = PIPE_FUNC_ALWAYS; - dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; - dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; - if (write_depth) { - /* writing depth+stencil: depth test always passes */ - dsa.depth.enabled = 1; - dsa.depth.writemask = ctx->Depth.Mask; - dsa.depth.func = PIPE_FUNC_ALWAYS; - } - cso_set_depth_stencil_alpha(cso, &dsa); - - /* blend (colormask) */ - memset(&blend, 0, sizeof(blend)); - cso_set_blend(cso, &blend); - } - - /* fragment shader state: TEX lookup program */ - cso_set_fragment_shader_handle(cso, driver_fp); - - /* vertex shader state: position + texcoord pass-through */ - cso_set_vertex_shader_handle(cso, driver_vp); - - - /* texture sampling state: */ - { - struct pipe_sampler_state sampler; - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.normalized_coords = normalized; - - cso_single_sampler(cso, 0, &sampler); - if (num_sampler_view > 1) { - cso_single_sampler(cso, 1, &sampler); - } - cso_single_sampler_done(cso); - } - - /* viewport state: viewport matching window dims */ - { - const float w = (float) ctx->DrawBuffer->Width; - const float h = (float) ctx->DrawBuffer->Height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * w; - vp.scale[1] = -0.5f * h; - vp.scale[2] = 0.5f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * w; - vp.translate[1] = 0.5f * h; - vp.translate[2] = 0.5f; - vp.translate[3] = 0.0f; - cso_set_viewport(cso, &vp); - } - - cso_set_vertex_elements(cso, 3, st->velems_util_draw); - - /* texture state: */ - cso_set_fragment_sampler_views(cso, num_sampler_view, sv); - - /* Compute Gallium window coords (y=0=top) with pixel zoom. - * Recall that these coords are transformed by the current - * vertex shader and viewport transformation. - */ - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { - y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY); - invertTex = !invertTex; - } - - x0 = (GLfloat) x; - x1 = x + width * ctx->Pixel.ZoomX; - y0 = (GLfloat) y; - y1 = y + height * ctx->Pixel.ZoomY; - - /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ - z = z * 2.0 - 1.0; - - draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, - normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, - normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height); - - /* restore state */ - cso_restore_rasterizer(cso); - cso_restore_viewport(cso); - cso_restore_samplers(cso); - cso_restore_fragment_sampler_views(cso); - cso_restore_fragment_shader(cso); - cso_restore_vertex_shader(cso); - cso_restore_vertex_elements(cso); - if (write_stencil) { - cso_restore_depth_stencil_alpha(cso); - cso_restore_blend(cso); - } -} - - -/** - * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we - * can't use a fragment shader to write stencil values. - */ -static void -draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, - GLsizei width, GLsizei height, GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, - const GLvoid *pixels) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct st_renderbuffer *strb; - enum pipe_transfer_usage usage; - struct pipe_transfer *pt; - const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; - GLint skipPixels; - ubyte *stmap; - struct gl_pixelstore_attrib clippedUnpack = *unpack; - - if (!zoom) { - if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height, - &clippedUnpack)) { - /* totally clipped */ - return; - } - } - - strb = st_renderbuffer(ctx->DrawBuffer-> - Attachment[BUFFER_STENCIL].Renderbuffer); - - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { - y = ctx->DrawBuffer->Height - y - height; - } - - if(format != GL_DEPTH_STENCIL && - util_format_get_component_bits(strb->format, - UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) - usage = PIPE_TRANSFER_READ_WRITE; - else - usage = PIPE_TRANSFER_WRITE; - - pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0, - usage, x, y, - width, height); - - stmap = pipe_transfer_map(pipe, pt); - - pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels); - assert(pixels); - - /* if width > MAX_WIDTH, have to process image in chunks */ - skipPixels = 0; - while (skipPixels < width) { - const GLint spanX = skipPixels; - const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); - GLint row; - for (row = 0; row < height; row++) { - GLubyte sValues[MAX_WIDTH]; - GLuint zValues[MAX_WIDTH]; - GLenum destType = GL_UNSIGNED_BYTE; - const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, - width, height, - format, type, - row, skipPixels); - _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues, - type, source, &clippedUnpack, - ctx->_ImageTransferState); - - if (format == GL_DEPTH_STENCIL) { - _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, - (1 << 24) - 1, type, source, - &clippedUnpack); - } - - if (zoom) { - _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with " - "zoom not complete"); - } - - { - GLint spanY; - - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { - spanY = height - row - 1; - } - else { - spanY = row; - } - - /* now pack the stencil (and Z) values in the dest format */ - switch (pt->resource->format) { - case PIPE_FORMAT_S8_USCALED: - { - ubyte *dest = stmap + spanY * pt->stride + spanX; - assert(usage == PIPE_TRANSFER_WRITE); - memcpy(dest, sValues, spanWidth); - } - break; - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - if (format == GL_DEPTH_STENCIL) { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); - GLint k; - assert(usage == PIPE_TRANSFER_WRITE); - for (k = 0; k < spanWidth; k++) { - dest[k] = zValues[k] | (sValues[k] << 24); - } - } - else { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); - GLint k; - assert(usage == PIPE_TRANSFER_READ_WRITE); - for (k = 0; k < spanWidth; k++) { - dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24); - } - } - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - if (format == GL_DEPTH_STENCIL) { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); - GLint k; - assert(usage == PIPE_TRANSFER_WRITE); - for (k = 0; k < spanWidth; k++) { - dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff); - } - } - else { - uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); - GLint k; - assert(usage == PIPE_TRANSFER_READ_WRITE); - for (k = 0; k < spanWidth; k++) { - dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff); - } - } - break; - default: - assert(0); - } - } - } - skipPixels += spanWidth; - } - - _mesa_unmap_pbo_source(ctx, &clippedUnpack); - - /* unmap the stencil buffer */ - pipe_transfer_unmap(pipe, pt); - pipe->transfer_destroy(pipe, pt); -} - - -/** - * Get fragment program variant for a glDrawPixels or glCopyPixels - * command for RGBA data. - */ -static struct st_fp_variant * -get_color_fp_variant(struct st_context *st) -{ - struct gl_context *ctx = st->ctx; - struct st_fp_variant_key key; - struct st_fp_variant *fpv; - - memset(&key, 0, sizeof(key)); - - key.st = st; - key.drawpixels = 1; - key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 || - ctx->Pixel.RedScale != 1.0 || - ctx->Pixel.GreenBias != 0.0 || - ctx->Pixel.GreenScale != 1.0 || - ctx->Pixel.BlueBias != 0.0 || - ctx->Pixel.BlueScale != 1.0 || - ctx->Pixel.AlphaBias != 0.0 || - ctx->Pixel.AlphaScale != 1.0); - key.pixelMaps = ctx->Pixel.MapColorFlag; - - fpv = st_get_fp_variant(st, st->fp, &key); - - return fpv; -} - - -/** - * Get fragment program variant for a glDrawPixels or glCopyPixels - * command for depth/stencil data. - */ -static struct st_fp_variant * -get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth, - GLboolean write_stencil) -{ - struct st_fp_variant_key key; - struct st_fp_variant *fpv; - - memset(&key, 0, sizeof(key)); - - key.st = st; - key.drawpixels = 1; - key.drawpixels_z = write_depth; - key.drawpixels_stencil = write_stencil; - - fpv = st_get_fp_variant(st, st->fp, &key); - - return fpv; -} - - -/** - * Called via ctx->Driver.DrawPixels() - */ -static void -st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) -{ - void *driver_vp, *driver_fp; - struct st_context *st = st_context(ctx); - const GLfloat *color; - struct pipe_context *pipe = st->pipe; - GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; - struct pipe_sampler_view *sv[2]; - int num_sampler_view = 1; - enum pipe_format stencil_format = PIPE_FORMAT_NONE; - struct st_fp_variant *fpv; - - if (format == GL_DEPTH_STENCIL) - write_stencil = write_depth = GL_TRUE; - else if (format == GL_STENCIL_INDEX) - write_stencil = GL_TRUE; - else if (format == GL_DEPTH_COMPONENT) - write_depth = GL_TRUE; - - if (write_stencil) { - enum pipe_format tex_format; - /* can we write to stencil if not fallback */ - if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) - goto stencil_fallback; - - tex_format = st_choose_format(st->pipe->screen, base_format(format), - PIPE_TEXTURE_2D, - 0, PIPE_BIND_SAMPLER_VIEW); - if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) - stencil_format = PIPE_FORMAT_X24S8_USCALED; - else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) - stencil_format = PIPE_FORMAT_S8X24_USCALED; - else - stencil_format = PIPE_FORMAT_S8_USCALED; - if (stencil_format == PIPE_FORMAT_NONE) - goto stencil_fallback; - } - - /* Mesa state should be up to date by now */ - assert(ctx->NewState == 0x0); - - st_validate_state(st); - - /* - * Get vertex/fragment shaders - */ - if (write_depth || write_stencil) { - fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil); - - driver_fp = fpv->driver_shader; - - driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); - - color = ctx->Current.RasterColor; - } - else { - fpv = get_color_fp_variant(st); - - driver_fp = fpv->driver_shader; - - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); - - color = NULL; - if (st->pixel_xfer.pixelmap_enabled) { - sv[1] = st->pixel_xfer.pixelmap_sampler_view; - num_sampler_view++; - } - } - - /* update fragment program constants */ - st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); - - /* draw with textured quad */ - { - struct pipe_resource *pt - = make_texture(st, width, height, format, type, unpack, pixels); - if (pt) { - sv[0] = st_create_texture_sampler_view(st->pipe, pt); - - if (sv[0]) { - if (write_stencil) { - sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, - stencil_format); - num_sampler_view++; - } - - draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], - width, height, - ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, - sv, - num_sampler_view, - driver_vp, - driver_fp, - color, GL_FALSE, write_depth, write_stencil); - pipe_sampler_view_reference(&sv[0], NULL); - if (num_sampler_view > 1) - pipe_sampler_view_reference(&sv[1], NULL); - } - pipe_resource_reference(&pt, NULL); - } - } - return; - -stencil_fallback: - draw_stencil_pixels(ctx, x, y, width, height, format, type, - unpack, pixels); -} - - - -/** - * Software fallback for glCopyPixels(GL_STENCIL). - */ -static void -copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty) -{ - struct st_renderbuffer *rbDraw; - struct pipe_context *pipe = st_context(ctx)->pipe; - enum pipe_transfer_usage usage; - struct pipe_transfer *ptDraw; - ubyte *drawMap; - ubyte *buffer; - int i; - - buffer = malloc(width * height * sizeof(ubyte)); - if (!buffer) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)"); - return; - } - - /* Get the dest renderbuffer. If there's a wrapper, use the - * underlying renderbuffer. - */ - rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); - if (rbDraw->Base.Wrapped) - rbDraw = st_renderbuffer(rbDraw->Base.Wrapped); - - /* this will do stencil pixel transfer ops */ - st_read_stencil_pixels(ctx, srcx, srcy, width, height, - GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, - &ctx->DefaultPacking, buffer); - - if (0) { - /* debug code: dump stencil values */ - GLint row, col; - for (row = 0; row < height; row++) { - printf("%3d: ", row); - for (col = 0; col < width; col++) { - printf("%02x ", buffer[col + row * width]); - } - printf("\n"); - } - } - - if (util_format_get_component_bits(rbDraw->format, - UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) - usage = PIPE_TRANSFER_READ_WRITE; - else - usage = PIPE_TRANSFER_WRITE; - - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { - dsty = rbDraw->Base.Height - dsty - height; - } - - ptDraw = pipe_get_transfer(st_context(ctx)->pipe, - rbDraw->texture, 0, 0, - usage, dstx, dsty, - width, height); - - assert(util_format_get_blockwidth(ptDraw->resource->format) == 1); - assert(util_format_get_blockheight(ptDraw->resource->format) == 1); - - /* map the stencil buffer */ - drawMap = pipe_transfer_map(pipe, ptDraw); - - /* draw */ - /* XXX PixelZoom not handled yet */ - for (i = 0; i < height; i++) { - ubyte *dst; - const ubyte *src; - int y; - - y = i; - - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { - y = height - y - 1; - } - - dst = drawMap + y * ptDraw->stride; - src = buffer + i * width; - - switch (ptDraw->resource->format) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - { - uint *dst4 = (uint *) dst; - int j; - assert(usage == PIPE_TRANSFER_READ_WRITE); - for (j = 0; j < width; j++) { - *dst4 = (*dst4 & 0xffffff) | (src[j] << 24); - dst4++; - } - } - break; - case PIPE_FORMAT_S8_USCALED_Z24_UNORM: - { - uint *dst4 = (uint *) dst; - int j; - assert(usage == PIPE_TRANSFER_READ_WRITE); - for (j = 0; j < width; j++) { - *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff); - dst4++; - } - } - break; - case PIPE_FORMAT_S8_USCALED: - assert(usage == PIPE_TRANSFER_WRITE); - memcpy(dst, src, width); - break; - default: - assert(0); - } - } - - free(buffer); - - /* unmap the stencil buffer */ - pipe_transfer_unmap(pipe, ptDraw); - pipe->transfer_destroy(pipe, ptDraw); -} - - -static void -st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, - GLsizei width, GLsizei height, - GLint dstx, GLint dsty, GLenum type) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; - struct st_renderbuffer *rbRead; - void *driver_vp, *driver_fp; - struct pipe_resource *pt; - struct pipe_sampler_view *sv[2]; - int num_sampler_view = 1; - GLfloat *color; - enum pipe_format srcFormat, texFormat; - GLboolean invertTex = GL_FALSE; - GLint readX, readY, readW, readH; - GLuint sample_count; - struct gl_pixelstore_attrib pack = ctx->DefaultPacking; - struct st_fp_variant *fpv; - - st_validate_state(st); - - if (type == GL_STENCIL) { - /* can't use texturing to do stencil */ - copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty); - return; - } - - /* - * Get vertex/fragment shaders - */ - if (type == GL_COLOR) { - rbRead = st_get_color_read_renderbuffer(ctx); - color = NULL; - - fpv = get_color_fp_variant(st); - driver_fp = fpv->driver_shader; - - driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); - - if (st->pixel_xfer.pixelmap_enabled) { - sv[1] = st->pixel_xfer.pixelmap_sampler_view; - num_sampler_view++; - } - } - else { - assert(type == GL_DEPTH); - rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); - color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; - - fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE); - driver_fp = fpv->driver_shader; - - driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); - } - - /* update fragment program constants */ - st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); - - - if (rbRead->Base.Wrapped) - rbRead = st_renderbuffer(rbRead->Base.Wrapped); - - sample_count = rbRead->texture->nr_samples; - /* I believe this would be legal, presumably would need to do a resolve - for color, and for depth/stencil spec says to just use one of the - depth/stencil samples per pixel? Need some transfer clarifications. */ - assert(sample_count < 2); - - srcFormat = rbRead->texture->format; - - if (screen->is_format_supported(screen, srcFormat, st->internal_target, - sample_count, - PIPE_BIND_SAMPLER_VIEW, 0)) { - texFormat = srcFormat; - } - else { - /* srcFormat can't be used as a texture format */ - if (type == GL_DEPTH) { - texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, - st->internal_target, sample_count, - PIPE_BIND_DEPTH_STENCIL); - assert(texFormat != PIPE_FORMAT_NONE); - } - else { - /* default color format */ - texFormat = st_choose_format(screen, GL_RGBA, st->internal_target, - sample_count, PIPE_BIND_SAMPLER_VIEW); - assert(texFormat != PIPE_FORMAT_NONE); - } - } - - /* Invert src region if needed */ - if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { - srcy = ctx->ReadBuffer->Height - srcy - height; - invertTex = !invertTex; - } - - /* Clip the read region against the src buffer bounds. - * We'll still allocate a temporary buffer/texture for the original - * src region size but we'll only read the region which is on-screen. - * This may mean that we draw garbage pixels into the dest region, but - * that's expected. - */ - readX = srcx; - readY = srcy; - readW = width; - readH = height; - _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack); - readW = MAX2(0, readW); - readH = MAX2(0, readH); - - /* alloc temporary texture */ - pt = alloc_texture(st, width, height, texFormat); - if (!pt) - return; - - sv[0] = st_create_texture_sampler_view(st->pipe, pt); - if (!sv[0]) { - pipe_resource_reference(&pt, NULL); - return; - } - - /* Make temporary texture which is a copy of the src region. - */ - if (srcFormat == texFormat) { - struct pipe_box src_box; - u_box_2d(readX, readY, readW, readH, &src_box); - /* copy source framebuffer surface into mipmap/texture */ - pipe->resource_copy_region(pipe, - pt, /* dest tex */ - 0, - pack.SkipPixels, pack.SkipRows, 0, /* dest pos */ - rbRead->texture, /* src tex */ - 0, - &src_box); - - } - else { - /* CPU-based fallback/conversion */ - struct pipe_transfer *ptRead = - pipe_get_transfer(st->pipe, rbRead->texture, 0, 0, - PIPE_TRANSFER_READ, - readX, readY, readW, readH); - struct pipe_transfer *ptTex; - enum pipe_transfer_usage transfer_usage; - - if (ST_DEBUG & DEBUG_FALLBACK) - debug_printf("%s: fallback processing\n", __FUNCTION__); - - if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format)) - transfer_usage = PIPE_TRANSFER_READ_WRITE; - else - transfer_usage = PIPE_TRANSFER_WRITE; - - ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage, - 0, 0, width, height); - - /* copy image from ptRead surface to ptTex surface */ - if (type == GL_COLOR) { - /* alternate path using get/put_tile() */ - GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - enum pipe_format readFormat, drawFormat; - readFormat = util_format_linear(rbRead->texture->format); - drawFormat = util_format_linear(pt->format); - pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH, - readFormat, buf); - pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows, - readW, readH, drawFormat, buf); - free(buf); - } - else { - /* GL_DEPTH */ - GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf); - pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows, - readW, readH, buf); - free(buf); - } - - pipe->transfer_destroy(pipe, ptRead); - pipe->transfer_destroy(pipe, ptTex); - } - - /* OK, the texture 'pt' contains the src image/pixels. Now draw a - * textured quad with that texture. - */ - draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2], - width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, - sv, - num_sampler_view, - driver_vp, - driver_fp, - color, invertTex, GL_FALSE, GL_FALSE); - - pipe_resource_reference(&pt, NULL); - pipe_sampler_view_reference(&sv[0], NULL); -} - - - -void st_init_drawpixels_functions(struct dd_function_table *functions) -{ - functions->DrawPixels = st_DrawPixels; - functions->CopyPixels = st_CopyPixels; -} - - -void -st_destroy_drawpix(struct st_context *st) -{ - GLuint i; - - for (i = 0; i < Elements(st->drawpix.shaders); i++) { - if (st->drawpix.shaders[i]) - _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL); - } - - st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL); - if (st->drawpix.vert_shaders[0]) - ureg_free_tokens(st->drawpix.vert_shaders[0]); - if (st->drawpix.vert_shaders[1]) - ureg_free_tokens(st->drawpix.vert_shaders[1]); -} - -#endif /* FEATURE_drawpix */ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "main/mtypes.h" +#include "main/pack.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "program/program.h" +#include "program/prog_print.h" +#include "program/prog_instruction.h" + +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_cb_drawpixels.h" +#include "st_cb_readpixels.h" +#include "st_cb_fbo.h" +#include "st_context.h" +#include "st_debug.h" +#include "st_format.h" +#include "st_program.h" +#include "st_texture.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_ureg.h" +#include "util/u_draw_quad.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_tile.h" +#include "cso_cache/cso_context.h" + + +#if FEATURE_drawpix + +/** + * Check if the given program is: + * 0: MOVE result.color, fragment.color; + * 1: END; + */ +static GLboolean +is_passthrough_program(const struct gl_fragment_program *prog) +{ + if (prog->Base.NumInstructions == 2) { + const struct prog_instruction *inst = prog->Base.Instructions; + if (inst[0].Opcode == OPCODE_MOV && + inst[1].Opcode == OPCODE_END && + inst[0].DstReg.File == PROGRAM_OUTPUT && + inst[0].DstReg.Index == FRAG_RESULT_COLOR && + inst[0].DstReg.WriteMask == WRITEMASK_XYZW && + inst[0].SrcReg[0].File == PROGRAM_INPUT && + inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 && + inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) { + return GL_TRUE; + } + } + return GL_FALSE; +} + + + +/** + * Make fragment shader for glDraw/CopyPixels. This shader is made + * by combining the pixel transfer shader with the user-defined shader. + * \param fpIn the current/incoming fragment program + * \param fpOut returns the combined fragment program + */ +void +st_make_drawpix_fragment_program(struct st_context *st, + struct gl_fragment_program *fpIn, + struct gl_fragment_program **fpOut) +{ + struct gl_program *newProg; + + if (is_passthrough_program(fpIn)) { + newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, + &st->pixel_xfer.program->Base); + } + else { +#if 0 + /* debug */ + printf("Base program:\n"); + _mesa_print_program(&fpIn->Base); + printf("DrawPix program:\n"); + _mesa_print_program(&st->pixel_xfer.program->Base.Base); +#endif + newProg = _mesa_combine_programs(st->ctx, + &st->pixel_xfer.program->Base.Base, + &fpIn->Base); + } + +#if 0 + /* debug */ + printf("Combined DrawPixels program:\n"); + _mesa_print_program(newProg); + printf("InputsRead: 0x%x\n", newProg->InputsRead); + printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); + _mesa_print_parameter_list(newProg->Parameters); +#endif + + *fpOut = (struct gl_fragment_program *) newProg; +} + + +/** + * Create fragment program that does a TEX() instruction to get a Z and/or + * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. + * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). + * Pass fragment color through as-is. + * \return pointer to the gl_fragment program + */ +struct gl_fragment_program * +st_make_drawpix_z_stencil_program(struct st_context *st, + GLboolean write_depth, + GLboolean write_stencil) +{ + struct gl_context *ctx = st->ctx; + struct gl_program *p; + struct gl_fragment_program *fp; + GLuint ic = 0; + const GLuint shaderIndex = write_depth * 2 + write_stencil; + + assert(shaderIndex < Elements(st->drawpix.shaders)); + + if (st->drawpix.shaders[shaderIndex]) { + /* already have the proper shader */ + return st->drawpix.shaders[shaderIndex]; + } + + /* + * Create shader now + */ + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = write_depth ? 2 : 1; + p->NumInstructions += write_stencil ? 1 : 0; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + if (write_depth) { + /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH; + p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 0; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + } + + if (write_stencil) { + /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL; + p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 1; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + } + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0; + p->OutputsWritten = 0; + if (write_depth) + p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH); + if (write_stencil) + p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL); + + p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ + if (write_stencil) + p->SamplersUsed |= 1 << 1; + + fp = (struct gl_fragment_program *) p; + + /* save the new shader */ + st->drawpix.shaders[shaderIndex] = fp; + + return fp; +} + + +/** + * Create a simple vertex shader that just passes through the + * vertex position and texcoord (and optionally, color). + */ +static void * +make_passthrough_vertex_shader(struct st_context *st, + GLboolean passColor) +{ + if (!st->drawpix.vert_shaders[passColor]) { + struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + + if (ureg == NULL) + return NULL; + + /* MOV result.pos, vertex.pos; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), + ureg_DECL_vs_input( ureg, 0 )); + + /* MOV result.texcoord0, vertex.attr[1]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ), + ureg_DECL_vs_input( ureg, 1 )); + + if (passColor) { + /* MOV result.color0, vertex.attr[2]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), + ureg_DECL_vs_input( ureg, 2 )); + } + + ureg_END( ureg ); + + st->drawpix.vert_shaders[passColor] = + ureg_create_shader_and_destroy( ureg, st->pipe ); + } + + return st->drawpix.vert_shaders[passColor]; +} + + +/** + * Return a texture base format for drawing/copying an image + * of the given format. + */ +static GLenum +base_format(GLenum format) +{ + switch (format) { + case GL_DEPTH_COMPONENT: + return GL_DEPTH_COMPONENT; + case GL_DEPTH_STENCIL: + return GL_DEPTH_STENCIL; + case GL_STENCIL_INDEX: + return GL_STENCIL_INDEX; + default: + return GL_RGBA; + } +} + + +/** + * Return a texture internalFormat for drawing/copying an image + * of the given format and type. + */ +static GLenum +internal_format(GLenum format, GLenum type) +{ + switch (format) { + case GL_DEPTH_COMPONENT: + return GL_DEPTH_COMPONENT; + case GL_DEPTH_STENCIL: + return GL_DEPTH_STENCIL; + case GL_STENCIL_INDEX: + return GL_STENCIL_INDEX; + default: + if (_mesa_is_integer_format(format)) { + switch (type) { + case GL_BYTE: + return GL_RGBA8I; + case GL_UNSIGNED_BYTE: + return GL_RGBA8UI; + case GL_SHORT: + return GL_RGBA16I; + case GL_UNSIGNED_SHORT: + return GL_RGBA16UI; + case GL_INT: + return GL_RGBA32I; + case GL_UNSIGNED_INT: + return GL_RGBA32UI; + default: + assert(0 && "Unexpected type in internal_format()"); + return GL_RGBA_INTEGER; + } + } + else { + return GL_RGBA; + } + } +} + + +/** + * Create a temporary texture to hold an image of the given size. + * If width, height are not POT and the driver only handles POT textures, + * allocate the next larger size of texture that is POT. + */ +static struct pipe_resource * +alloc_texture(struct st_context *st, GLsizei width, GLsizei height, + enum pipe_format texFormat) +{ + struct pipe_resource *pt; + + pt = st_texture_create(st, st->internal_target, texFormat, 0, + width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW); + + return pt; +} + + +/** + * Make texture containing an image for glDrawPixels image. + * If 'pixels' is NULL, leave the texture image data undefined. + */ +static struct pipe_resource * +make_texture(struct st_context *st, + GLsizei width, GLsizei height, GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct gl_context *ctx = st->ctx; + struct pipe_context *pipe = st->pipe; + gl_format mformat; + struct pipe_resource *pt; + enum pipe_format pipeFormat; + GLuint cpp; + GLenum baseFormat, intFormat; + + baseFormat = base_format(format); + intFormat = internal_format(format, type); + + mformat = st_ChooseTextureFormat_renderable(ctx, intFormat, + format, type, GL_FALSE); + assert(mformat); + + pipeFormat = st_mesa_format_to_pipe_format(mformat); + assert(pipeFormat); + cpp = util_format_get_blocksize(pipeFormat); + + pixels = _mesa_map_pbo_source(ctx, unpack, pixels); + if (!pixels) + return NULL; + + /* alloc temporary texture */ + pt = alloc_texture(st, width, height, pipeFormat); + if (!pt) { + _mesa_unmap_pbo_source(ctx, unpack); + return NULL; + } + + { + struct pipe_transfer *transfer; + static const GLuint dstImageOffsets = 0; + GLboolean success; + GLubyte *dest; + const GLbitfield imageTransferStateSave = ctx->_ImageTransferState; + + /* we'll do pixel transfer in a fragment shader */ + ctx->_ImageTransferState = 0x0; + + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + width, height); + + /* map texture transfer */ + dest = pipe_transfer_map(pipe, transfer); + + + /* Put image into texture transfer. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ + success = _mesa_texstore(ctx, 2, /* dims */ + baseFormat, /* baseInternalFormat */ + mformat, /* gl_format */ + dest, /* dest */ + 0, 0, 0, /* dstX/Y/Zoffset */ + transfer->stride, /* dstRowStride, bytes */ + &dstImageOffsets, /* dstImageOffsets */ + width, height, 1, /* size */ + format, type, /* src format/type */ + pixels, /* data source */ + unpack); + + /* unmap */ + pipe_transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); + + assert(success); + + /* restore */ + ctx->_ImageTransferState = imageTransferStateSave; + } + + _mesa_unmap_pbo_source(ctx, unpack); + + return pt; +} + + +/** + * Draw quad with texcoords and optional color. + * Coords are gallium window coords with y=0=top. + * \param color may be null + * \param invertTex if true, flip texcoords vertically + */ +static void +draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, + GLfloat x1, GLfloat y1, const GLfloat *color, + GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ + + /* setup vertex data */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; + const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; + const GLfloat sLeft = 0.0f, sRight = maxXcoord; + const GLfloat tTop = invertTex ? maxYcoord : 0.0f; + const GLfloat tBot = invertTex ? 0.0f : maxYcoord; + GLuint i; + + /* upper-left */ + verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ + verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ + + /* upper-right */ + verts[1][0][0] = clip_x1; + verts[1][0][1] = clip_y0; + + /* lower-right */ + verts[2][0][0] = clip_x1; + verts[2][0][1] = clip_y1; + + /* lower-left */ + verts[3][0][0] = clip_x0; + verts[3][0][1] = clip_y1; + + verts[0][1][0] = sLeft; /* v[0].attr[1].S */ + verts[0][1][1] = tTop; /* v[0].attr[1].T */ + verts[1][1][0] = sRight; + verts[1][1][1] = tTop; + verts[2][1][0] = sRight; + verts[2][1][1] = tBot; + verts[3][1][0] = sLeft; + verts[3][1][1] = tBot; + + /* same for all verts: */ + if (color) { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /* v[i].attr[0].z */ + verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ + verts[i][2][0] = color[0]; /* v[i].attr[2].r */ + verts[i][2][1] = color[1]; /* v[i].attr[2].g */ + verts[i][2][2] = color[2]; /* v[i].attr[2].b */ + verts[i][2][3] = color[3]; /* v[i].attr[2].a */ + verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ + verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ + } + } + else { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /*Z*/ + verts[i][0][3] = 1.0f; /*W*/ + verts[i][1][2] = 0.0f; /*R*/ + verts[i][1][3] = 1.0f; /*Q*/ + } + } + } + + { + struct pipe_resource *buf; + + /* allocate/load buffer object with vertex data */ + buf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(verts)); + pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts); + + util_draw_vertex_buffer(pipe, st->cso_context, buf, 0, + PIPE_PRIM_QUADS, + 4, /* verts */ + 3); /* attribs/vert */ + pipe_resource_reference(&buf, NULL); + } +} + + + +static void +draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + GLfloat zoomX, GLfloat zoomY, + struct pipe_sampler_view **sv, + int num_sampler_view, + void *driver_vp, + void *driver_fp, + const GLfloat *color, + GLboolean invertTex, + GLboolean write_depth, GLboolean write_stencil) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; + GLfloat x0, y0, x1, y1; + GLsizei maxSize; + boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT; + + /* limit checks */ + /* XXX if DrawPixels image is larger than max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= maxSize); + assert(height <= maxSize); + + cso_save_rasterizer(cso); + cso_save_viewport(cso); + cso_save_samplers(cso); + cso_save_fragment_sampler_views(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + if (write_stencil) { + cso_save_depth_stencil_alpha(cso); + cso_save_blend(cso); + } + + /* rasterizer state: just scissor */ + { + struct pipe_rasterizer_state rasterizer; + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.gl_rasterization_rules = 1; + rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &rasterizer); + } + + if (write_stencil) { + /* Stencil writing bypasses the normal fragment pipeline to + * disable color writing and set stencil test to always pass. + */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + + /* depth/stencil */ + memset(&dsa, 0, sizeof(dsa)); + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + if (write_depth) { + /* writing depth+stencil: depth test always passes */ + dsa.depth.enabled = 1; + dsa.depth.writemask = ctx->Depth.Mask; + dsa.depth.func = PIPE_FUNC_ALWAYS; + } + cso_set_depth_stencil_alpha(cso, &dsa); + + /* blend (colormask) */ + memset(&blend, 0, sizeof(blend)); + cso_set_blend(cso, &blend); + } + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, driver_fp); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, driver_vp); + + + /* texture sampling state: */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = normalized; + + cso_single_sampler(cso, 0, &sampler); + if (num_sampler_view > 1) { + cso_single_sampler(cso, 1, &sampler); + } + cso_single_sampler_done(cso); + } + + /* viewport state: viewport matching window dims */ + { + const float w = (float) ctx->DrawBuffer->Width; + const float h = (float) ctx->DrawBuffer->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * w; + vp.scale[1] = -0.5f * h; + vp.scale[2] = 0.5f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * w; + vp.translate[1] = 0.5f * h; + vp.translate[2] = 0.5f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + cso_set_vertex_elements(cso, 3, st->velems_util_draw); + + /* texture state: */ + cso_set_fragment_sampler_views(cso, num_sampler_view, sv); + + /* Compute Gallium window coords (y=0=top) with pixel zoom. + * Recall that these coords are transformed by the current + * vertex shader and viewport transformation. + */ + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { + y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY); + invertTex = !invertTex; + } + + x0 = (GLfloat) x; + x1 = x + width * ctx->Pixel.ZoomX; + y0 = (GLfloat) y; + y1 = y + height * ctx->Pixel.ZoomY; + + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + z = z * 2.0 - 1.0; + + draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, + normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, + normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height); + + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_viewport(cso); + cso_restore_samplers(cso); + cso_restore_fragment_sampler_views(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); + if (write_stencil) { + cso_restore_depth_stencil_alpha(cso); + cso_restore_blend(cso); + } +} + + +/** + * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we + * can't use a fragment shader to write stencil values. + */ +static void +draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *strb; + enum pipe_transfer_usage usage; + struct pipe_transfer *pt; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + GLint skipPixels; + ubyte *stmap; + struct gl_pixelstore_attrib clippedUnpack = *unpack; + + if (!zoom) { + if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height, + &clippedUnpack)) { + /* totally clipped */ + return; + } + } + + strb = st_renderbuffer(ctx->DrawBuffer-> + Attachment[BUFFER_STENCIL].Renderbuffer); + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y = ctx->DrawBuffer->Height - y - height; + } + + if(format != GL_DEPTH_STENCIL && + util_format_get_component_bits(strb->format, + UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) + usage = PIPE_TRANSFER_READ_WRITE; + else + usage = PIPE_TRANSFER_WRITE; + + pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0, + usage, x, y, + width, height); + + stmap = pipe_transfer_map(pipe, pt); + + pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels); + assert(pixels); + + /* if width > MAX_WIDTH, have to process image in chunks */ + skipPixels = 0; + while (skipPixels < width) { + const GLint spanX = skipPixels; + const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + GLint row; + for (row = 0; row < height; row++) { + GLubyte sValues[MAX_WIDTH]; + GLuint zValues[MAX_WIDTH]; + GLenum destType = GL_UNSIGNED_BYTE; + const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, + width, height, + format, type, + row, skipPixels); + _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues, + type, source, &clippedUnpack, + ctx->_ImageTransferState); + + if (format == GL_DEPTH_STENCIL) { + _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + (1 << 24) - 1, type, source, + &clippedUnpack); + } + + if (zoom) { + _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with " + "zoom not complete"); + } + + { + GLint spanY; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + spanY = height - row - 1; + } + else { + spanY = row; + } + + /* now pack the stencil (and Z) values in the dest format */ + switch (pt->resource->format) { + case PIPE_FORMAT_S8_USCALED: + { + ubyte *dest = stmap + spanY * pt->stride + spanX; + assert(usage == PIPE_TRANSFER_WRITE); + memcpy(dest, sValues, spanWidth); + } + break; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = zValues[k] | (sValues[k] << 24); + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24); + } + } + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff); + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff); + } + } + break; + default: + assert(0); + } + } + } + skipPixels += spanWidth; + } + + _mesa_unmap_pbo_source(ctx, &clippedUnpack); + + /* unmap the stencil buffer */ + pipe_transfer_unmap(pipe, pt); + pipe->transfer_destroy(pipe, pt); +} + + +/** + * Get fragment program variant for a glDrawPixels or glCopyPixels + * command for RGBA data. + */ +static struct st_fp_variant * +get_color_fp_variant(struct st_context *st) +{ + struct gl_context *ctx = st->ctx; + struct st_fp_variant_key key; + struct st_fp_variant *fpv; + + memset(&key, 0, sizeof(key)); + + key.st = st; + key.drawpixels = 1; + key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 || + ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || + ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || + ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || + ctx->Pixel.AlphaScale != 1.0); + key.pixelMaps = ctx->Pixel.MapColorFlag; + + fpv = st_get_fp_variant(st, st->fp, &key); + + return fpv; +} + + +/** + * Get fragment program variant for a glDrawPixels or glCopyPixels + * command for depth/stencil data. + */ +static struct st_fp_variant * +get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth, + GLboolean write_stencil) +{ + struct st_fp_variant_key key; + struct st_fp_variant *fpv; + + memset(&key, 0, sizeof(key)); + + key.st = st; + key.drawpixels = 1; + key.drawpixels_z = write_depth; + key.drawpixels_stencil = write_stencil; + + fpv = st_get_fp_variant(st, st->fp, &key); + + return fpv; +} + + +/** + * Called via ctx->Driver.DrawPixels() + */ +static void +st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) +{ + void *driver_vp, *driver_fp; + struct st_context *st = st_context(ctx); + const GLfloat *color; + struct pipe_context *pipe = st->pipe; + GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; + struct pipe_sampler_view *sv[2]; + int num_sampler_view = 1; + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + struct st_fp_variant *fpv; + + if (format == GL_DEPTH_STENCIL) + write_stencil = write_depth = GL_TRUE; + else if (format == GL_STENCIL_INDEX) + write_stencil = GL_TRUE; + else if (format == GL_DEPTH_COMPONENT) + write_depth = GL_TRUE; + + if (write_stencil) { + enum pipe_format tex_format; + /* can we write to stencil if not fallback */ + if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) + goto stencil_fallback; + + tex_format = st_choose_format(st->pipe->screen, base_format(format), + PIPE_TEXTURE_2D, + 0, PIPE_BIND_SAMPLER_VIEW); + if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + stencil_format = PIPE_FORMAT_X24S8_USCALED; + else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) + stencil_format = PIPE_FORMAT_S8X24_USCALED; + else + stencil_format = PIPE_FORMAT_S8_USCALED; + if (stencil_format == PIPE_FORMAT_NONE) + goto stencil_fallback; + } + + /* Mesa state should be up to date by now */ + assert(ctx->NewState == 0x0); + + st_validate_state(st); + + /* + * Get vertex/fragment shaders + */ + if (write_depth || write_stencil) { + fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil); + + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); + + color = ctx->Current.RasterColor; + } + else { + fpv = get_color_fp_variant(st); + + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); + + color = NULL; + if (st->pixel_xfer.pixelmap_enabled) { + sv[1] = st->pixel_xfer.pixelmap_sampler_view; + num_sampler_view++; + } + } + + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + + /* draw with textured quad */ + { + struct pipe_resource *pt + = make_texture(st, width, height, format, type, unpack, pixels); + if (pt) { + sv[0] = st_create_texture_sampler_view(st->pipe, pt); + + if (sv[0]) { + if (write_stencil) { + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, + stencil_format); + num_sampler_view++; + } + + draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, + ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + sv, + num_sampler_view, + driver_vp, + driver_fp, + color, GL_FALSE, write_depth, write_stencil); + pipe_sampler_view_reference(&sv[0], NULL); + if (num_sampler_view > 1) + pipe_sampler_view_reference(&sv[1], NULL); + } + pipe_resource_reference(&pt, NULL); + } + } + return; + +stencil_fallback: + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); +} + + + +/** + * Software fallback for glCopyPixels(GL_STENCIL). + */ +static void +copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty) +{ + struct st_renderbuffer *rbDraw; + struct pipe_context *pipe = st_context(ctx)->pipe; + enum pipe_transfer_usage usage; + struct pipe_transfer *ptDraw; + ubyte *drawMap; + ubyte *buffer; + int i; + + buffer = malloc(width * height * sizeof(ubyte)); + if (!buffer) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)"); + return; + } + + /* Get the dest renderbuffer. If there's a wrapper, use the + * underlying renderbuffer. + */ + rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); + if (rbDraw->Base.Wrapped) + rbDraw = st_renderbuffer(rbDraw->Base.Wrapped); + + /* this will do stencil pixel transfer ops */ + st_read_stencil_pixels(ctx, srcx, srcy, width, height, + GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, + &ctx->DefaultPacking, buffer); + + if (0) { + /* debug code: dump stencil values */ + GLint row, col; + for (row = 0; row < height; row++) { + printf("%3d: ", row); + for (col = 0; col < width; col++) { + printf("%02x ", buffer[col + row * width]); + } + printf("\n"); + } + } + + if (util_format_get_component_bits(rbDraw->format, + UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) + usage = PIPE_TRANSFER_READ_WRITE; + else + usage = PIPE_TRANSFER_WRITE; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + dsty = rbDraw->Base.Height - dsty - height; + } + + ptDraw = pipe_get_transfer(st_context(ctx)->pipe, + rbDraw->texture, 0, 0, + usage, dstx, dsty, + width, height); + + assert(util_format_get_blockwidth(ptDraw->resource->format) == 1); + assert(util_format_get_blockheight(ptDraw->resource->format) == 1); + + /* map the stencil buffer */ + drawMap = pipe_transfer_map(pipe, ptDraw); + + /* draw */ + /* XXX PixelZoom not handled yet */ + for (i = 0; i < height; i++) { + ubyte *dst; + const ubyte *src; + int y; + + y = i; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y = height - y - 1; + } + + dst = drawMap + y * ptDraw->stride; + src = buffer + i * width; + + switch (ptDraw->resource->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = (*dst4 & 0xffffff) | (src[j] << 24); + dst4++; + } + } + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + uint *dst4 = (uint *) dst; + int j; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff); + dst4++; + } + } + break; + case PIPE_FORMAT_S8_USCALED: + assert(usage == PIPE_TRANSFER_WRITE); + memcpy(dst, src, width); + break; + default: + assert(0); + } + } + + free(buffer); + + /* unmap the stencil buffer */ + pipe_transfer_unmap(pipe, ptDraw); + pipe->transfer_destroy(pipe, ptDraw); +} + + +static void +st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct st_renderbuffer *rbRead; + void *driver_vp, *driver_fp; + struct pipe_resource *pt; + struct pipe_sampler_view *sv[2]; + int num_sampler_view = 1; + GLfloat *color; + enum pipe_format srcFormat, texFormat; + GLboolean invertTex = GL_FALSE; + GLint readX, readY, readW, readH; + GLuint sample_count; + struct gl_pixelstore_attrib pack = ctx->DefaultPacking; + struct st_fp_variant *fpv; + + st_validate_state(st); + + if (type == GL_STENCIL) { + /* can't use texturing to do stencil */ + copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty); + return; + } + + /* + * Get vertex/fragment shaders + */ + if (type == GL_COLOR) { + rbRead = st_get_color_read_renderbuffer(ctx); + color = NULL; + + fpv = get_color_fp_variant(st); + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); + + if (st->pixel_xfer.pixelmap_enabled) { + sv[1] = st->pixel_xfer.pixelmap_sampler_view; + num_sampler_view++; + } + } + else { + assert(type == GL_DEPTH); + rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + + fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE); + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); + } + + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + + + if (rbRead->Base.Wrapped) + rbRead = st_renderbuffer(rbRead->Base.Wrapped); + + sample_count = rbRead->texture->nr_samples; + /* I believe this would be legal, presumably would need to do a resolve + for color, and for depth/stencil spec says to just use one of the + depth/stencil samples per pixel? Need some transfer clarifications. */ + assert(sample_count < 2); + + srcFormat = rbRead->texture->format; + + if (screen->is_format_supported(screen, srcFormat, st->internal_target, + sample_count, + PIPE_BIND_SAMPLER_VIEW, 0)) { + texFormat = srcFormat; + } + else { + /* srcFormat can't be used as a texture format */ + if (type == GL_DEPTH) { + texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, + st->internal_target, sample_count, + PIPE_BIND_DEPTH_STENCIL); + assert(texFormat != PIPE_FORMAT_NONE); + } + else { + /* default color format */ + texFormat = st_choose_format(screen, GL_RGBA, st->internal_target, + sample_count, PIPE_BIND_SAMPLER_VIEW); + assert(texFormat != PIPE_FORMAT_NONE); + } + } + + /* Invert src region if needed */ + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + srcy = ctx->ReadBuffer->Height - srcy - height; + invertTex = !invertTex; + } + + /* Clip the read region against the src buffer bounds. + * We'll still allocate a temporary buffer/texture for the original + * src region size but we'll only read the region which is on-screen. + * This may mean that we draw garbage pixels into the dest region, but + * that's expected. + */ + readX = srcx; + readY = srcy; + readW = width; + readH = height; + _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack); + readW = MAX2(0, readW); + readH = MAX2(0, readH); + + /* alloc temporary texture */ + pt = alloc_texture(st, width, height, texFormat); + if (!pt) + return; + + sv[0] = st_create_texture_sampler_view(st->pipe, pt); + if (!sv[0]) { + pipe_resource_reference(&pt, NULL); + return; + } + + /* Make temporary texture which is a copy of the src region. + */ + if (srcFormat == texFormat) { + struct pipe_box src_box; + u_box_2d(readX, readY, readW, readH, &src_box); + /* copy source framebuffer surface into mipmap/texture */ + pipe->resource_copy_region(pipe, + pt, /* dest tex */ + 0, + pack.SkipPixels, pack.SkipRows, 0, /* dest pos */ + rbRead->texture, /* src tex */ + 0, + &src_box); + + } + else { + /* CPU-based fallback/conversion */ + struct pipe_transfer *ptRead = + pipe_get_transfer(st->pipe, rbRead->texture, 0, 0, + PIPE_TRANSFER_READ, + readX, readY, readW, readH); + struct pipe_transfer *ptTex; + enum pipe_transfer_usage transfer_usage; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + + if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format)) + transfer_usage = PIPE_TRANSFER_READ_WRITE; + else + transfer_usage = PIPE_TRANSFER_WRITE; + + ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage, + 0, 0, width, height); + + /* copy image from ptRead surface to ptTex surface */ + if (type == GL_COLOR) { + /* alternate path using get/put_tile() */ + GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + enum pipe_format readFormat, drawFormat; + readFormat = util_format_linear(rbRead->texture->format); + drawFormat = util_format_linear(pt->format); + pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH, + readFormat, buf); + pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows, + readW, readH, drawFormat, buf); + free(buf); + } + else { + /* GL_DEPTH */ + GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); + pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf); + pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows, + readW, readH, buf); + free(buf); + } + + pipe->transfer_destroy(pipe, ptRead); + pipe->transfer_destroy(pipe, ptTex); + } + + /* OK, the texture 'pt' contains the src image/pixels. Now draw a + * textured quad with that texture. + */ + draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2], + width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + sv, + num_sampler_view, + driver_vp, + driver_fp, + color, invertTex, GL_FALSE, GL_FALSE); + + pipe_resource_reference(&pt, NULL); + pipe_sampler_view_reference(&sv[0], NULL); +} + + + +void st_init_drawpixels_functions(struct dd_function_table *functions) +{ + functions->DrawPixels = st_DrawPixels; + functions->CopyPixels = st_CopyPixels; +} + + +void +st_destroy_drawpix(struct st_context *st) +{ + GLuint i; + + for (i = 0; i < Elements(st->drawpix.shaders); i++) { + if (st->drawpix.shaders[i]) + _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL); + } + + st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL); + if (st->drawpix.vert_shaders[0]) + ureg_free_tokens(st->drawpix.vert_shaders[0]); + if (st->drawpix.vert_shaders[1]) + ureg_free_tokens(st->drawpix.vert_shaders[1]); +} + +#endif /* FEATURE_drawpix */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c index 5976f1048..86ceb9d78 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c @@ -1,304 +1,307 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - **************************************************************************/ - - -/** - * Implementation of glDrawTex() for GL_OES_draw_tex - */ - - - -#include "main/imports.h" -#include "main/image.h" -#include "main/macros.h" -#include "main/mfeatures.h" -#include "program/program.h" -#include "program/prog_print.h" - -#include "st_context.h" -#include "st_atom.h" -#include "st_cb_drawtex.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_draw_quad.h" -#include "util/u_simple_shaders.h" - -#include "cso_cache/cso_context.h" - - -#if FEATURE_OES_draw_texture - - -struct cached_shader -{ - void *handle; - - uint num_attribs; - uint semantic_names[2 + MAX_TEXTURE_UNITS]; - uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; -}; - -#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS) - -/** - * Simple linear list cache. - * Most of the time there'll only be one cached shader. - */ -static struct cached_shader CachedShaders[MAX_SHADERS]; -static GLuint NumCachedShaders = 0; - - -static void * -lookup_shader(struct pipe_context *pipe, - uint num_attribs, - const uint *semantic_names, - const uint *semantic_indexes) -{ - GLuint i, j; - - /* look for existing shader with same attributes */ - for (i = 0; i < NumCachedShaders; i++) { - if (CachedShaders[i].num_attribs == num_attribs) { - GLboolean match = GL_TRUE; - for (j = 0; j < num_attribs; j++) { - if (semantic_names[j] != CachedShaders[i].semantic_names[j] || - semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) { - match = GL_FALSE; - break; - } - } - if (match) - return CachedShaders[i].handle; - } - } - - /* not found - create new one now */ - if (NumCachedShaders >= MAX_SHADERS) { - return NULL; - } - - CachedShaders[i].num_attribs = num_attribs; - for (j = 0; j < num_attribs; j++) { - CachedShaders[i].semantic_names[j] = semantic_names[j]; - CachedShaders[i].semantic_indexes[j] = semantic_indexes[j]; - } - - CachedShaders[i].handle = - util_make_vertex_passthrough_shader(pipe, - num_attribs, - semantic_names, - semantic_indexes); - NumCachedShaders++; - - return CachedShaders[i].handle; -} - -static void -st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, - GLfloat width, GLfloat height) -{ - struct st_context *st = ctx->st; - struct pipe_context *pipe = st->pipe; - struct cso_context *cso = ctx->st->cso_context; - struct pipe_resource *vbuffer; - struct pipe_transfer *vbuffer_transfer; - GLuint i, numTexCoords, numAttribs; - GLboolean emitColor; - uint semantic_names[2 + MAX_TEXTURE_UNITS]; - uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; - struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; - GLbitfield inputs = VERT_BIT_POS; - - st_validate_state(st); - - /* determine if we need vertex color */ - if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0) - emitColor = GL_TRUE; - else - emitColor = GL_FALSE; - - /* determine how many enabled sets of texcoords */ - numTexCoords = 0; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { - inputs |= VERT_BIT_TEX(i); - numTexCoords++; - } - } - - /* total number of attributes per vertex */ - numAttribs = 1 + emitColor + numTexCoords; - - - /* create the vertex buffer */ - vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, - numAttribs * 4 * 4 * sizeof(GLfloat)); - - /* load vertex buffer */ - { -#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \ - do { \ - GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \ - assert(k < 4 * 4 * numAttribs); \ - vbuf[k + 0] = X; \ - vbuf[k + 1] = Y; \ - vbuf[k + 2] = Z; \ - vbuf[k + 3] = W; \ - } while (0) - - const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; - GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer, - PIPE_TRANSFER_WRITE, - &vbuffer_transfer); - GLuint attr; - - z = CLAMP(z, 0.0f, 1.0f); - - /* positions (in clip coords) */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLfloat fb_width = (GLfloat)fb->Width; - const GLfloat fb_height = (GLfloat)fb->Height; - - const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); - const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); - const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); - const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); - - SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */ - SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */ - SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */ - SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */ - - semantic_names[0] = TGSI_SEMANTIC_POSITION; - semantic_indexes[0] = 0; - } - - /* colors */ - if (emitColor) { - const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; - SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]); - SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]); - SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]); - SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); - semantic_names[1] = TGSI_SEMANTIC_COLOR; - semantic_indexes[1] = 0; - attr = 2; - } - else { - attr = 1; - } - - /* texcoords */ - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { - struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current; - struct gl_texture_image *img = obj->Image[0][obj->BaseLevel]; - const GLfloat wt = (GLfloat) img->Width; - const GLfloat ht = (GLfloat) img->Height; - const GLfloat s0 = obj->CropRect[0] / wt; - const GLfloat t0 = obj->CropRect[1] / ht; - const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt; - const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; - - /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ - SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ - SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ - SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ - SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ - - semantic_names[attr] = TGSI_SEMANTIC_GENERIC; - semantic_indexes[attr] = 0; - - attr++; - } - } - - pipe_buffer_unmap(pipe, vbuffer_transfer); - -#undef SET_ATTRIB - } - - - cso_save_viewport(cso); - cso_save_vertex_shader(cso); - cso_save_vertex_elements(cso); - - { - void *vs = lookup_shader(pipe, numAttribs, - semantic_names, semantic_indexes); - cso_set_vertex_shader_handle(cso, vs); - } - - for (i = 0; i < numAttribs; i++) { - velements[i].src_offset = i * 4 * sizeof(float); - velements[i].instance_divisor = 0; - velements[i].vertex_buffer_index = 0; - velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - } - cso_set_vertex_elements(cso, numAttribs, velements); - - /* viewport state: viewport matching window dims */ - { - const struct gl_framebuffer *fb = st->ctx->DrawBuffer; - const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); - const GLfloat width = (GLfloat)fb->Width; - const GLfloat height = (GLfloat)fb->Height; - struct pipe_viewport_state vp; - vp.scale[0] = 0.5f * width; - vp.scale[1] = height * (invert ? -0.5f : 0.5f); - vp.scale[2] = 1.0f; - vp.scale[3] = 1.0f; - vp.translate[0] = 0.5f * width; - vp.translate[1] = 0.5f * height; - vp.translate[2] = 0.0f; - vp.translate[3] = 0.0f; - cso_set_viewport(cso, &vp); - } - - - util_draw_vertex_buffer(pipe, vbuffer, - 0, /* offset */ - PIPE_PRIM_TRIANGLE_FAN, - 4, /* verts */ - numAttribs); /* attribs/vert */ - - - pipe_resource_reference(&vbuffer, NULL); - - /* restore state */ - cso_restore_viewport(cso); - cso_restore_vertex_shader(cso); - cso_restore_vertex_elements(cso); -} - - -void -st_init_drawtex_functions(struct dd_function_table *functions) -{ - functions->DrawTex = st_DrawTex; -} - - -/** - * Free any cached shaders - */ -void -st_destroy_drawtex(struct st_context *st) -{ - GLuint i; - for (i = 0; i < NumCachedShaders; i++) { - cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle); - } - NumCachedShaders = 0; -} - - -#endif /* FEATURE_OES_draw_texture */ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + **************************************************************************/ + + +/** + * Implementation of glDrawTex() for GL_OES_draw_tex + */ + + + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/program.h" +#include "program/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_drawtex.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_draw_quad.h" +#include "util/u_simple_shaders.h" + +#include "cso_cache/cso_context.h" + + +#if FEATURE_OES_draw_texture + + +struct cached_shader +{ + void *handle; + + uint num_attribs; + uint semantic_names[2 + MAX_TEXTURE_UNITS]; + uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; +}; + +#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS) + +/** + * Simple linear list cache. + * Most of the time there'll only be one cached shader. + */ +static struct cached_shader CachedShaders[MAX_SHADERS]; +static GLuint NumCachedShaders = 0; + + +static void * +lookup_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes) +{ + GLuint i, j; + + /* look for existing shader with same attributes */ + for (i = 0; i < NumCachedShaders; i++) { + if (CachedShaders[i].num_attribs == num_attribs) { + GLboolean match = GL_TRUE; + for (j = 0; j < num_attribs; j++) { + if (semantic_names[j] != CachedShaders[i].semantic_names[j] || + semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) { + match = GL_FALSE; + break; + } + } + if (match) + return CachedShaders[i].handle; + } + } + + /* not found - create new one now */ + if (NumCachedShaders >= MAX_SHADERS) { + return NULL; + } + + CachedShaders[i].num_attribs = num_attribs; + for (j = 0; j < num_attribs; j++) { + CachedShaders[i].semantic_names[j] = semantic_names[j]; + CachedShaders[i].semantic_indexes[j] = semantic_indexes[j]; + } + + CachedShaders[i].handle = + util_make_vertex_passthrough_shader(pipe, + num_attribs, + semantic_names, + semantic_indexes); + NumCachedShaders++; + + return CachedShaders[i].handle; +} + +static void +st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, + GLfloat width, GLfloat height) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = ctx->st->cso_context; + struct pipe_resource *vbuffer; + struct pipe_transfer *vbuffer_transfer; + GLuint i, numTexCoords, numAttribs; + GLboolean emitColor; + uint semantic_names[2 + MAX_TEXTURE_UNITS]; + uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; + struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; + GLbitfield inputs = VERT_BIT_POS; + + st_validate_state(st); + + /* determine if we need vertex color */ + if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0) + emitColor = GL_TRUE; + else + emitColor = GL_FALSE; + + /* determine how many enabled sets of texcoords */ + numTexCoords = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { + inputs |= VERT_BIT_TEX(i); + numTexCoords++; + } + } + + /* total number of attributes per vertex */ + numAttribs = 1 + emitColor + numTexCoords; + + + /* create the vertex buffer */ + vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + numAttribs * 4 * 4 * sizeof(GLfloat)); + + /* load vertex buffer */ + { +#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \ + do { \ + GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \ + assert(k < 4 * 4 * numAttribs); \ + vbuf[k + 0] = X; \ + vbuf[k + 1] = Y; \ + vbuf[k + 2] = Z; \ + vbuf[k + 3] = W; \ + } while (0) + + const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; + GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer, + PIPE_TRANSFER_WRITE, + &vbuffer_transfer); + GLuint attr; + + z = CLAMP(z, 0.0f, 1.0f); + + /* positions (in clip coords) */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat)fb->Width; + const GLfloat fb_height = (GLfloat)fb->Height; + + const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); + const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); + const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); + const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + + SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */ + SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */ + SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */ + SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */ + + semantic_names[0] = TGSI_SEMANTIC_POSITION; + semantic_indexes[0] = 0; + } + + /* colors */ + if (emitColor) { + const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); + semantic_names[1] = TGSI_SEMANTIC_COLOR; + semantic_indexes[1] = 0; + attr = 2; + } + else { + attr = 1; + } + + /* texcoords */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { + struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current; + struct gl_texture_image *img = obj->Image[0][obj->BaseLevel]; + const GLfloat wt = (GLfloat) img->Width; + const GLfloat ht = (GLfloat) img->Height; + const GLfloat s0 = obj->CropRect[0] / wt; + const GLfloat t0 = obj->CropRect[1] / ht; + const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt; + const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; + + /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ + SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ + SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ + SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ + SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ + + semantic_names[attr] = TGSI_SEMANTIC_GENERIC; + semantic_indexes[attr] = 0; + + attr++; + } + } + + pipe_buffer_unmap(pipe, vbuffer_transfer); + +#undef SET_ATTRIB + } + + + cso_save_viewport(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + + { + void *vs = lookup_shader(pipe, numAttribs, + semantic_names, semantic_indexes); + cso_set_vertex_shader_handle(cso, vs); + } + + for (i = 0; i < numAttribs; i++) { + velements[i].src_offset = i * 4 * sizeof(float); + velements[i].instance_divisor = 0; + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + cso_set_vertex_elements(cso, numAttribs, velements); + + /* viewport state: viewport matching window dims */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + const GLfloat width = (GLfloat)fb->Width; + const GLfloat height = (GLfloat)fb->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + + util_draw_vertex_buffer(pipe, cso, vbuffer, + 0, /* offset */ + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + numAttribs); /* attribs/vert */ + + + pipe_resource_reference(&vbuffer, NULL); + + /* restore state */ + cso_restore_viewport(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); +} + + +void +st_init_drawtex_functions(struct dd_function_table *functions) +{ + functions->DrawTex = st_DrawTex; +} + + +/** + * Free any cached shaders + */ +void +st_destroy_drawtex(struct st_context *st) +{ + GLuint i; + for (i = 0; i < NumCachedShaders; i++) { + cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle); + } + NumCachedShaders = 0; +} + + +#endif /* FEATURE_OES_draw_texture */ diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c index dccbff3c1..7a19f35bb 100644 --- a/mesalib/src/mesa/state_tracker/st_context.c +++ b/mesalib/src/mesa/state_tracker/st_context.c @@ -203,6 +203,11 @@ static void st_destroy_context_priv( struct st_context *st ) st_destroy_drawpix(st); st_destroy_drawtex(st); + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + for (i = 0; i < Elements(st->state.sampler_views); i++) { pipe_sampler_view_reference(&st->state.sampler_views[i], NULL); } diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h index 492ee600e..77765f023 100644 --- a/mesalib/src/mesa/state_tracker/st_context.h +++ b/mesalib/src/mesa/state_tracker/st_context.h @@ -1,265 +1,270 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef ST_CONTEXT_H -#define ST_CONTEXT_H - -#include "main/mtypes.h" -#include "pipe/p_state.h" -#include "state_tracker/st_api.h" - -struct bitmap_cache; -struct blit_state; -struct dd_function_table; -struct draw_context; -struct draw_stage; -struct gen_mipmap_state; -struct st_context; -struct st_fragment_program; - - -#define ST_NEW_MESA 0x1 /* Mesa state has changed */ -#define ST_NEW_FRAGMENT_PROGRAM 0x2 -#define ST_NEW_VERTEX_PROGRAM 0x4 -#define ST_NEW_FRAMEBUFFER 0x8 -#define ST_NEW_EDGEFLAGS_DATA 0x10 -#define ST_NEW_GEOMETRY_PROGRAM 0x20 - - -struct st_state_flags { - GLuint mesa; - GLuint st; -}; - -struct st_tracked_state { - const char *name; - struct st_state_flags dirty; - void (*update)( struct st_context *st ); -}; - - - -struct st_context -{ - struct st_context_iface iface; - - struct gl_context *ctx; - - struct pipe_context *pipe; - - struct draw_context *draw; /**< For selection/feedback/rastpos only */ - struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */ - struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */ - struct draw_stage *rastpos_stage; /**< For glRasterPos */ - - - /* On old libGL's for linux we need to invalidate the drawables - * on glViewpport calls, this is set via a option. - */ - boolean invalidate_on_gl_viewport; - - /* Some state is contained in constant objects. - * Other state is just parameter values. - */ - struct { - struct pipe_blend_state blend; - struct pipe_depth_stencil_alpha_state depth_stencil; - struct pipe_rasterizer_state rasterizer; - struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; - struct pipe_clip_state clip; - struct { - void *ptr; - unsigned size; - } constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; - struct pipe_scissor_state scissor; - struct pipe_viewport_state viewport; - unsigned sample_mask; - - GLuint num_samplers; - GLuint num_textures; - - GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ - } state; - - char vendor[100]; - char renderer[100]; - - struct st_state_flags dirty; - - GLboolean missing_textures; - GLboolean vertdata_edgeflags; - - /** Mapping from VERT_RESULT_x to post-transformed vertex slot */ - const GLuint *vertex_result_to_slot; - - struct st_vertex_program *vp; /**< Currently bound vertex program */ - struct st_fragment_program *fp; /**< Currently bound fragment program */ - struct st_geometry_program *gp; /**< Currently bound geometry program */ - - struct st_vp_variant *vp_variant; - struct st_fp_variant *fp_variant; - struct st_gp_variant *gp_variant; - - struct gl_texture_object *default_texture; - - struct { - struct gl_program_cache *cache; - struct st_fragment_program *program; /**< cur pixel transfer prog */ - GLuint xfer_prog_sn; /**< pixel xfer program serial no. */ - GLuint user_prog_sn; /**< user fragment program serial no. */ - struct st_fragment_program *combined_prog; - GLuint combined_prog_sn; - struct pipe_resource *pixelmap_texture; - struct pipe_sampler_view *pixelmap_sampler_view; - boolean pixelmap_enabled; /**< use the pixelmap texture? */ - } pixel_xfer; - - /** for glBitmap */ - struct { - struct pipe_rasterizer_state rasterizer; - struct pipe_sampler_state samplers[2]; - enum pipe_format tex_format; - void *vs; - float vertices[4][3][4]; /**< vertex pos + color + texcoord */ - struct pipe_resource *vbuf; - unsigned vbuf_slot; /* next free slot in vbuf */ - struct bitmap_cache *cache; - } bitmap; - - /** for glDraw/CopyPixels */ - struct { - struct gl_fragment_program *shaders[4]; - void *vert_shaders[2]; /**< ureg shaders */ - } drawpix; - - /** for glClear */ - struct { - struct pipe_rasterizer_state raster; - struct pipe_viewport_state viewport; - struct pipe_clip_state clip; - void *vs; - void *fs; - float vertices[4][2][4]; /**< vertex pos + color */ - struct pipe_resource *vbuf; - unsigned vbuf_slot; - boolean enable_ds_separate; - } clear; - - /** used for anything using util_draw_vertex_buffer */ - struct pipe_vertex_element velems_util_draw[3]; - - void *passthrough_fs; /**< simple pass-through frag shader */ - - enum pipe_texture_target internal_target; - struct gen_mipmap_state *gen_mipmap; - struct blit_state *blit; - - struct cso_context *cso_context; - - int force_msaa; - void *winsys_drawable_handle; -}; - - -/* Need this so that we can implement Mesa callbacks in this module. - */ -static INLINE struct st_context *st_context(struct gl_context *ctx) -{ - return ctx->st; -} - - -/** - * Wrapper for struct gl_framebuffer. - * This is an opaque type to the outside world. - */ -struct st_framebuffer -{ - struct gl_framebuffer Base; - void *Private; - - struct st_framebuffer_iface *iface; - enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; - unsigned num_statts; - int32_t revalidate; -}; - - -extern void st_init_driver_functions(struct dd_function_table *functions); - -void st_invalidate_state(struct gl_context * ctx, GLuint new_state); - - - -#define Y_0_TOP 1 -#define Y_0_BOTTOM 2 - -static INLINE GLuint -st_fb_orientation(const struct gl_framebuffer *fb) -{ - if (fb && fb->Name == 0) { - /* Drawing into a window (on-screen buffer). - * - * Negate Y scale to flip image vertically. - * The NDC Y coords prior to viewport transformation are in the range - * [y=-1=bottom, y=1=top] - * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where - * H is the window height. - * Use the viewport transformation to invert Y. - */ - return Y_0_TOP; - } - else { - /* Drawing into user-created FBO (very likely a texture). - * - * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering. - */ - return Y_0_BOTTOM; - } -} - - -/** clear-alloc a struct-sized object, with casting */ -#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) - - -extern int -st_get_msaa(void); - -extern struct st_context * -st_create_context(gl_api api, struct pipe_context *pipe, - const struct gl_config *visual, - struct st_context *share); - -extern void -st_destroy_context(struct st_context *st); - - -#endif +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CONTEXT_H +#define ST_CONTEXT_H + +#include "main/mtypes.h" +#include "pipe/p_state.h" +#include "state_tracker/st_api.h" + +struct bitmap_cache; +struct blit_state; +struct dd_function_table; +struct draw_context; +struct draw_stage; +struct gen_mipmap_state; +struct st_context; +struct st_fragment_program; + + +#define ST_NEW_MESA 0x1 /* Mesa state has changed */ +#define ST_NEW_FRAGMENT_PROGRAM 0x2 +#define ST_NEW_VERTEX_PROGRAM 0x4 +#define ST_NEW_FRAMEBUFFER 0x8 +#define ST_NEW_EDGEFLAGS_DATA 0x10 +#define ST_NEW_GEOMETRY_PROGRAM 0x20 + + +struct st_state_flags { + GLuint mesa; + GLuint st; +}; + +struct st_tracked_state { + const char *name; + struct st_state_flags dirty; + void (*update)( struct st_context *st ); +}; + + + +struct st_context +{ + struct st_context_iface iface; + + struct gl_context *ctx; + + struct pipe_context *pipe; + + struct draw_context *draw; /**< For selection/feedback/rastpos only */ + struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */ + struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */ + struct draw_stage *rastpos_stage; /**< For glRasterPos */ + + + /* On old libGL's for linux we need to invalidate the drawables + * on glViewpport calls, this is set via a option. + */ + boolean invalidate_on_gl_viewport; + + /* Some state is contained in constant objects. + * Other state is just parameter values. + */ + struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; + struct pipe_clip_state clip; + struct { + void *ptr; + unsigned size; + } constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + unsigned sample_mask; + + GLuint num_samplers; + GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ + } state; + + char vendor[100]; + char renderer[100]; + + struct st_state_flags dirty; + + GLboolean missing_textures; + GLboolean vertdata_edgeflags; + + /** Mapping from VERT_RESULT_x to post-transformed vertex slot */ + const GLuint *vertex_result_to_slot; + + struct st_vertex_program *vp; /**< Currently bound vertex program */ + struct st_fragment_program *fp; /**< Currently bound fragment program */ + struct st_geometry_program *gp; /**< Currently bound geometry program */ + + struct st_vp_variant *vp_variant; + struct st_fp_variant *fp_variant; + struct st_gp_variant *gp_variant; + + struct gl_texture_object *default_texture; + + struct { + struct gl_program_cache *cache; + struct st_fragment_program *program; /**< cur pixel transfer prog */ + GLuint xfer_prog_sn; /**< pixel xfer program serial no. */ + GLuint user_prog_sn; /**< user fragment program serial no. */ + struct st_fragment_program *combined_prog; + GLuint combined_prog_sn; + struct pipe_resource *pixelmap_texture; + struct pipe_sampler_view *pixelmap_sampler_view; + boolean pixelmap_enabled; /**< use the pixelmap texture? */ + } pixel_xfer; + + /** for glBitmap */ + struct { + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[2]; + enum pipe_format tex_format; + void *vs; + float vertices[4][3][4]; /**< vertex pos + color + texcoord */ + struct pipe_resource *vbuf; + unsigned vbuf_slot; /* next free slot in vbuf */ + struct bitmap_cache *cache; + } bitmap; + + /** for glDraw/CopyPixels */ + struct { + struct gl_fragment_program *shaders[4]; + void *vert_shaders[2]; /**< ureg shaders */ + } drawpix; + + /** for glClear */ + struct { + struct pipe_rasterizer_state raster; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + void *vs; + void *fs; + float vertices[4][2][4]; /**< vertex pos + color */ + struct pipe_resource *vbuf; + unsigned vbuf_slot; + boolean enable_ds_separate; + } clear; + + /** used for anything using util_draw_vertex_buffer */ + struct pipe_vertex_element velems_util_draw[3]; + + void *passthrough_fs; /**< simple pass-through frag shader */ + + enum pipe_texture_target internal_target; + struct gen_mipmap_state *gen_mipmap; + struct blit_state *blit; + + struct cso_context *cso_context; + + int force_msaa; + void *winsys_drawable_handle; + + /* User vertex buffers. */ + struct pipe_resource *user_vb[PIPE_MAX_ATTRIBS]; + unsigned user_vb_stride[PIPE_MAX_ATTRIBS]; + unsigned num_user_vbs; +}; + + +/* Need this so that we can implement Mesa callbacks in this module. + */ +static INLINE struct st_context *st_context(struct gl_context *ctx) +{ + return ctx->st; +} + + +/** + * Wrapper for struct gl_framebuffer. + * This is an opaque type to the outside world. + */ +struct st_framebuffer +{ + struct gl_framebuffer Base; + void *Private; + + struct st_framebuffer_iface *iface; + enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; + unsigned num_statts; + int32_t revalidate; +}; + + +extern void st_init_driver_functions(struct dd_function_table *functions); + +void st_invalidate_state(struct gl_context * ctx, GLuint new_state); + + + +#define Y_0_TOP 1 +#define Y_0_BOTTOM 2 + +static INLINE GLuint +st_fb_orientation(const struct gl_framebuffer *fb) +{ + if (fb && fb->Name == 0) { + /* Drawing into a window (on-screen buffer). + * + * Negate Y scale to flip image vertically. + * The NDC Y coords prior to viewport transformation are in the range + * [y=-1=bottom, y=1=top] + * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where + * H is the window height. + * Use the viewport transformation to invert Y. + */ + return Y_0_TOP; + } + else { + /* Drawing into user-created FBO (very likely a texture). + * + * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering. + */ + return Y_0_BOTTOM; + } +} + + +/** clear-alloc a struct-sized object, with casting */ +#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) + + +extern int +st_get_msaa(void); + +extern struct st_context * +st_create_context(gl_api api, struct pipe_context *pipe, + const struct gl_config *visual, + struct st_context *share); + +extern void +st_destroy_context(struct st_context *st); + + +#endif diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c index 19466ea44..830e3e3c1 100644 --- a/mesalib/src/mesa/state_tracker/st_draw.c +++ b/mesalib/src/mesa/state_tracker/st_draw.c @@ -1,788 +1,749 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* - * This file implements the st_draw_vbo() function which is called from - * Mesa's VBO module. All point/line/triangle rendering is done through - * this function whether the user called glBegin/End, glDrawArrays, - * glDrawElements, glEvalMesh, or glCalList, etc. - * - * We basically convert the VBO's vertex attribute/array information into - * Gallium vertex state, bind the vertex buffer objects and call - * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays(). - * - * Authors: - * Keith Whitwell - */ - - -#include "main/imports.h" -#include "main/image.h" -#include "main/macros.h" -#include "main/mfeatures.h" -#include "program/prog_uniform.h" - -#include "vbo/vbo.h" - -#include "st_context.h" -#include "st_atom.h" -#include "st_cb_bufferobjects.h" -#include "st_draw.h" -#include "st_program.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_prim.h" -#include "util/u_draw_quad.h" -#include "draw/draw_context.h" -#include "cso_cache/cso_context.h" - - -static GLuint double_types[4] = { - PIPE_FORMAT_R64_FLOAT, - PIPE_FORMAT_R64G64_FLOAT, - PIPE_FORMAT_R64G64B64_FLOAT, - PIPE_FORMAT_R64G64B64A64_FLOAT -}; - -static GLuint float_types[4] = { - PIPE_FORMAT_R32_FLOAT, - PIPE_FORMAT_R32G32_FLOAT, - PIPE_FORMAT_R32G32B32_FLOAT, - PIPE_FORMAT_R32G32B32A32_FLOAT -}; - -static GLuint half_float_types[4] = { - PIPE_FORMAT_R16_FLOAT, - PIPE_FORMAT_R16G16_FLOAT, - PIPE_FORMAT_R16G16B16_FLOAT, - PIPE_FORMAT_R16G16B16A16_FLOAT -}; - -static GLuint uint_types_norm[4] = { - PIPE_FORMAT_R32_UNORM, - PIPE_FORMAT_R32G32_UNORM, - PIPE_FORMAT_R32G32B32_UNORM, - PIPE_FORMAT_R32G32B32A32_UNORM -}; - -static GLuint uint_types_scale[4] = { - PIPE_FORMAT_R32_USCALED, - PIPE_FORMAT_R32G32_USCALED, - PIPE_FORMAT_R32G32B32_USCALED, - PIPE_FORMAT_R32G32B32A32_USCALED -}; - -static GLuint int_types_norm[4] = { - PIPE_FORMAT_R32_SNORM, - PIPE_FORMAT_R32G32_SNORM, - PIPE_FORMAT_R32G32B32_SNORM, - PIPE_FORMAT_R32G32B32A32_SNORM -}; - -static GLuint int_types_scale[4] = { - PIPE_FORMAT_R32_SSCALED, - PIPE_FORMAT_R32G32_SSCALED, - PIPE_FORMAT_R32G32B32_SSCALED, - PIPE_FORMAT_R32G32B32A32_SSCALED -}; - -static GLuint ushort_types_norm[4] = { - PIPE_FORMAT_R16_UNORM, - PIPE_FORMAT_R16G16_UNORM, - PIPE_FORMAT_R16G16B16_UNORM, - PIPE_FORMAT_R16G16B16A16_UNORM -}; - -static GLuint ushort_types_scale[4] = { - PIPE_FORMAT_R16_USCALED, - PIPE_FORMAT_R16G16_USCALED, - PIPE_FORMAT_R16G16B16_USCALED, - PIPE_FORMAT_R16G16B16A16_USCALED -}; - -static GLuint short_types_norm[4] = { - PIPE_FORMAT_R16_SNORM, - PIPE_FORMAT_R16G16_SNORM, - PIPE_FORMAT_R16G16B16_SNORM, - PIPE_FORMAT_R16G16B16A16_SNORM -}; - -static GLuint short_types_scale[4] = { - PIPE_FORMAT_R16_SSCALED, - PIPE_FORMAT_R16G16_SSCALED, - PIPE_FORMAT_R16G16B16_SSCALED, - PIPE_FORMAT_R16G16B16A16_SSCALED -}; - -static GLuint ubyte_types_norm[4] = { - PIPE_FORMAT_R8_UNORM, - PIPE_FORMAT_R8G8_UNORM, - PIPE_FORMAT_R8G8B8_UNORM, - PIPE_FORMAT_R8G8B8A8_UNORM -}; - -static GLuint ubyte_types_scale[4] = { - PIPE_FORMAT_R8_USCALED, - PIPE_FORMAT_R8G8_USCALED, - PIPE_FORMAT_R8G8B8_USCALED, - PIPE_FORMAT_R8G8B8A8_USCALED -}; - -static GLuint byte_types_norm[4] = { - PIPE_FORMAT_R8_SNORM, - PIPE_FORMAT_R8G8_SNORM, - PIPE_FORMAT_R8G8B8_SNORM, - PIPE_FORMAT_R8G8B8A8_SNORM -}; - -static GLuint byte_types_scale[4] = { - PIPE_FORMAT_R8_SSCALED, - PIPE_FORMAT_R8G8_SSCALED, - PIPE_FORMAT_R8G8B8_SSCALED, - PIPE_FORMAT_R8G8B8A8_SSCALED -}; - -static GLuint fixed_types[4] = { - PIPE_FORMAT_R32_FIXED, - PIPE_FORMAT_R32G32_FIXED, - PIPE_FORMAT_R32G32B32_FIXED, - PIPE_FORMAT_R32G32B32A32_FIXED -}; - - - -/** - * Return a PIPE_FORMAT_x for the given GL datatype and size. - */ -GLuint -st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, - GLboolean normalized) -{ - assert((type >= GL_BYTE && type <= GL_DOUBLE) || - type == GL_FIXED || type == GL_HALF_FLOAT); - assert(size >= 1); - assert(size <= 4); - assert(format == GL_RGBA || format == GL_BGRA); - - if (format == GL_BGRA) { - /* this is an odd-ball case */ - assert(type == GL_UNSIGNED_BYTE); - assert(normalized); - return PIPE_FORMAT_B8G8R8A8_UNORM; - } - - if (normalized) { - switch (type) { - case GL_DOUBLE: return double_types[size-1]; - case GL_FLOAT: return float_types[size-1]; - case GL_HALF_FLOAT: return half_float_types[size-1]; - case GL_INT: return int_types_norm[size-1]; - case GL_SHORT: return short_types_norm[size-1]; - case GL_BYTE: return byte_types_norm[size-1]; - case GL_UNSIGNED_INT: return uint_types_norm[size-1]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1]; - case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1]; - case GL_FIXED: return fixed_types[size-1]; - default: assert(0); return 0; - } - } - else { - switch (type) { - case GL_DOUBLE: return double_types[size-1]; - case GL_FLOAT: return float_types[size-1]; - case GL_HALF_FLOAT: return half_float_types[size-1]; - case GL_INT: return int_types_scale[size-1]; - case GL_SHORT: return short_types_scale[size-1]; - case GL_BYTE: return byte_types_scale[size-1]; - case GL_UNSIGNED_INT: return uint_types_scale[size-1]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1]; - case GL_FIXED: return fixed_types[size-1]; - default: assert(0); return 0; - } - } - return 0; /* silence compiler warning */ -} - - - - - -/** - * Examine the active arrays to determine if we have interleaved - * vertex arrays all living in one VBO, or all living in user space. - * \param userSpace returns whether the arrays are in user space. - */ -static GLboolean -is_interleaved_arrays(const struct st_vertex_program *vp, - const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLboolean *userSpace) -{ - GLuint attr; - const struct gl_buffer_object *firstBufObj = NULL; - GLint firstStride = -1; - GLuint num_client_arrays = 0; - const GLubyte *client_addr = NULL; - - for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj; - const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */ - - if (firstStride < 0) { - firstStride = stride; - } - else if (firstStride != stride) { - return GL_FALSE; - } - - if (!bufObj || !bufObj->Name) { - num_client_arrays++; - /* Try to detect if the client-space arrays are - * "close" to each other. - */ - if (!client_addr) { - client_addr = arrays[mesaAttr]->Ptr; - } - else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) { - /* arrays start too far apart */ - return GL_FALSE; - } - } - else if (!firstBufObj) { - firstBufObj = bufObj; - } - else if (bufObj != firstBufObj) { - return GL_FALSE; - } - } - - *userSpace = (num_client_arrays == vpv->num_inputs); - /* debug_printf("user space: %s (%d arrays, %d inputs)\n", - (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */ - - return GL_TRUE; -} - - -/** - * Compute the memory range occupied by the arrays. - */ -static void -get_arrays_bounds(const struct st_vertex_program *vp, - const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLuint max_index, - const GLubyte **low, const GLubyte **high) -{ - const GLubyte *low_addr = NULL; - const GLubyte *high_addr = NULL; - GLuint attr; - - /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */ - - for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - const GLint stride = arrays[mesaAttr]->StrideB; - const GLubyte *start = arrays[mesaAttr]->Ptr; - const unsigned sz = (arrays[mesaAttr]->Size * - _mesa_sizeof_type(arrays[mesaAttr]->Type)); - const GLubyte *end = start + (max_index * stride) + sz; - - /* debug_printf("attr %u: stride %d size %u start %p end %p\n", - attr, stride, sz, start, end); */ - - if (attr == 0) { - low_addr = start; - high_addr = end; - } - else { - low_addr = MIN2(low_addr, start); - high_addr = MAX2(high_addr, end); - } - } - - *low = low_addr; - *high = high_addr; -} - - -/** - * Set up for drawing interleaved arrays that all live in one VBO - * or all live in user space. - * \param vbuffer returns vertex buffer info - * \param velements returns vertex element info - */ -static void -setup_interleaved_attribs(struct gl_context *ctx, - const struct st_vertex_program *vp, - const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLuint max_index, - GLboolean userSpace, - struct pipe_vertex_buffer *vbuffer, - struct pipe_vertex_element velements[]) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - GLuint attr; - const GLubyte *offset0 = NULL; - - for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - struct st_buffer_object *stobj = st_buffer_object(bufobj); - GLsizei stride = arrays[mesaAttr]->StrideB; - - /*printf("stobj %u = %p\n", attr, (void*)stobj);*/ - - if (attr == 0) { - const GLubyte *low, *high; - - get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high); - /* debug_printf("buffer range: %p %p range %d max index %u\n", - low, high, high - low, max_index); */ - - offset0 = low; - if (userSpace) { - vbuffer->buffer = - pipe_user_buffer_create(pipe->screen, (void *) low, high - low, - PIPE_BIND_VERTEX_BUFFER); - vbuffer->buffer_offset = 0; - } - else { - vbuffer->buffer = NULL; - pipe_resource_reference(&vbuffer->buffer, stobj->buffer); - vbuffer->buffer_offset = pointer_to_offset(low); - } - vbuffer->stride = stride; /* in bytes */ - vbuffer->max_index = max_index; - } - - /* - if (arrays[mesaAttr]->InstanceDivisor) - vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement; - else - vbuffer[attr].max_index = max_index; - */ - - velements[attr].src_offset = - (unsigned) (arrays[mesaAttr]->Ptr - offset0); - velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; - velements[attr].vertex_buffer_index = 0; - velements[attr].src_format = - st_pipe_vertex_format(arrays[mesaAttr]->Type, - arrays[mesaAttr]->Size, - arrays[mesaAttr]->Format, - arrays[mesaAttr]->Normalized); - assert(velements[attr].src_format); - } -} - - -/** - * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each - * vertex attribute. - * \param vbuffer returns vertex buffer info - * \param velements returns vertex element info - */ -static void -setup_non_interleaved_attribs(struct gl_context *ctx, - const struct st_vertex_program *vp, - const struct st_vp_variant *vpv, - const struct gl_client_array **arrays, - GLuint max_index, - GLboolean *userSpace, - struct pipe_vertex_buffer vbuffer[], - struct pipe_vertex_element velements[]) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - GLuint attr; - - for (attr = 0; attr < vpv->num_inputs; attr++) { - const GLuint mesaAttr = vp->index_to_input[attr]; - struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - GLsizei stride = arrays[mesaAttr]->StrideB; - - *userSpace = GL_FALSE; - - if (bufobj && bufobj->Name) { - /* Attribute data is in a VBO. - * Recall that for VBOs, the gl_client_array->Ptr field is - * really an offset from the start of the VBO, not a pointer. - */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - assert(stobj->buffer); - /*printf("stobj %u = %p\n", attr, (void*) stobj);*/ - - vbuffer[attr].buffer = NULL; - pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer); - vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr); - } - else { - /* attribute data is in user-space memory, not a VBO */ - uint bytes; - /*printf("user-space array %d stride %d\n", attr, stride);*/ - - *userSpace = GL_TRUE; - - /* wrap user data */ - if (arrays[mesaAttr]->Ptr) { - /* user's vertex array */ - if (arrays[mesaAttr]->StrideB) { - bytes = arrays[mesaAttr]->StrideB * (max_index + 1); - } - else { - bytes = arrays[mesaAttr]->Size - * _mesa_sizeof_type(arrays[mesaAttr]->Type); - } - vbuffer[attr].buffer = - pipe_user_buffer_create(pipe->screen, - (void *) arrays[mesaAttr]->Ptr, bytes, - PIPE_BIND_VERTEX_BUFFER); - } - else { - /* no array, use ctx->Current.Attrib[] value */ - bytes = sizeof(ctx->Current.Attrib[0]); - vbuffer[attr].buffer = - pipe_user_buffer_create(pipe->screen, - (void *) ctx->Current.Attrib[mesaAttr], - bytes, - PIPE_BIND_VERTEX_BUFFER); - stride = 0; - } - - vbuffer[attr].buffer_offset = 0; - } - - assert(velements[attr].src_offset <= 2048); /* 11-bit field */ - - /* common-case setup */ - vbuffer[attr].stride = stride; /* in bytes */ - if (arrays[mesaAttr]->InstanceDivisor) - vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement; - else - vbuffer[attr].max_index = max_index; - - velements[attr].src_offset = 0; - velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; - velements[attr].vertex_buffer_index = attr; - velements[attr].src_format - = st_pipe_vertex_format(arrays[mesaAttr]->Type, - arrays[mesaAttr]->Size, - arrays[mesaAttr]->Format, - arrays[mesaAttr]->Normalized); - assert(velements[attr].src_format); - } -} - - -static void -setup_index_buffer(struct gl_context *ctx, - const struct _mesa_index_buffer *ib, - struct pipe_index_buffer *ibuffer) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - - memset(ibuffer, 0, sizeof(*ibuffer)); - if (ib) { - struct gl_buffer_object *bufobj = ib->obj; - - switch (ib->type) { - case GL_UNSIGNED_INT: - ibuffer->index_size = 4; - break; - case GL_UNSIGNED_SHORT: - ibuffer->index_size = 2; - break; - case GL_UNSIGNED_BYTE: - ibuffer->index_size = 1; - break; - default: - assert(0); - return; - } - - /* get/create the index buffer object */ - if (bufobj && bufobj->Name) { - /* elements/indexes are in a real VBO */ - struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_resource_reference(&ibuffer->buffer, stobj->buffer); - ibuffer->offset = pointer_to_offset(ib->ptr); - } - else { - /* element/indicies are in user space memory */ - ibuffer->buffer = - pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, - ib->count * ibuffer->index_size, - PIPE_BIND_INDEX_BUFFER); - } - } -} - -/** - * Prior to drawing, check that any uniforms referenced by the - * current shader have been set. If a uniform has not been set, - * issue a warning. - */ -static void -check_uniforms(struct gl_context *ctx) -{ - struct gl_shader_program *shProg[3] = { - ctx->Shader.CurrentVertexProgram, - ctx->Shader.CurrentGeometryProgram, - ctx->Shader.CurrentFragmentProgram, - }; - unsigned j; - - for (j = 0; j < 3; j++) { - unsigned i; - - if (shProg[j] == NULL || !shProg[j]->LinkStatus) - continue; - - for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) { - const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i]; - if (!u->Initialized) { - _mesa_warning(ctx, - "Using shader with uninitialized uniform: %s", - u->Name); - } - } - } -} - - -/** - * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to - * the corresponding Gallium type. - */ -static unsigned -translate_prim(const struct gl_context *ctx, unsigned prim) -{ - /* GL prims should match Gallium prims, spot-check a few */ - assert(GL_POINTS == PIPE_PRIM_POINTS); - assert(GL_QUADS == PIPE_PRIM_QUADS); - assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); - - /* Avoid quadstrips if it's easy to do so: - * Note: it's imporant to do the correct trimming if we change the prim type! - * We do that wherever this function is called. - */ - if (prim == GL_QUAD_STRIP && - ctx->Light.ShadeModel != GL_FLAT && - ctx->Polygon.FrontMode == GL_FILL && - ctx->Polygon.BackMode == GL_FILL) - prim = GL_TRIANGLE_STRIP; - - return prim; -} - - - -/** - * This function gets plugged into the VBO module and is called when - * we have something to render. - * Basically, translate the information into the format expected by gallium. - */ -void -st_draw_vbo(struct gl_context *ctx, - const struct gl_client_array **arrays, - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - const struct st_vertex_program *vp; - const struct st_vp_variant *vpv; - struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; - GLuint attr; - struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; - unsigned num_vbuffers, num_velements; - struct pipe_index_buffer ibuffer; - GLboolean userSpace = GL_FALSE; - GLboolean vertDataEdgeFlags; - struct pipe_draw_info info; - unsigned i; - - /* Mesa core state should have been validated already */ - assert(ctx->NewState == 0x0); - - /* Gallium probably doesn't want this in some cases. */ - if (!index_bounds_valid) - if (!vbo_all_varyings_in_vbos(arrays)) - vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); - - /* sanity check for pointer arithmetic below */ - assert(sizeof(arrays[0]->Ptr[0]) == 1); - - vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj && - arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name; - if (vertDataEdgeFlags != st->vertdata_edgeflags) { - st->vertdata_edgeflags = vertDataEdgeFlags; - st->dirty.st |= ST_NEW_EDGEFLAGS_DATA; - } - - st_validate_state(st); - - /* must get these after state validation! */ - vp = st->vp; - vpv = st->vp_variant; - -#if 0 - if (MESA_VERBOSE & VERBOSE_GLSL) { - check_uniforms(ctx); - } -#else - (void) check_uniforms; -#endif - - memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); - /* - * Setup the vbuffer[] and velements[] arrays. - */ - if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) { - /*printf("Draw interleaved\n");*/ - setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace, - vbuffer, velements); - num_vbuffers = 1; - num_velements = vpv->num_inputs; - if (num_velements == 0) - num_vbuffers = 0; - } - else { - /*printf("Draw non-interleaved\n");*/ - setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index, - &userSpace, vbuffer, velements); - num_vbuffers = vpv->num_inputs; - num_velements = vpv->num_inputs; - } - -#if 0 - { - GLuint i; - for (i = 0; i < num_vbuffers; i++) { - printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride); - printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index); - printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset); - printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer); - } - for (i = 0; i < num_velements; i++) { - printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index); - printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset); - printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format)); - } - } -#endif - - pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer); - cso_set_vertex_elements(st->cso_context, num_velements, velements); - - setup_index_buffer(ctx, ib, &ibuffer); - pipe->set_index_buffer(pipe, &ibuffer); - - util_draw_init_info(&info); - if (ib) { - info.indexed = TRUE; - if (min_index != ~0 && max_index != ~0) { - info.min_index = min_index; - info.max_index = max_index; - } - } - - info.primitive_restart = st->ctx->Array.PrimitiveRestart; - info.restart_index = st->ctx->Array.RestartIndex; - - /* do actual drawing */ - for (i = 0; i < nr_prims; i++) { - info.mode = translate_prim( ctx, prims[i].mode ); - info.start = prims[i].start; - info.count = prims[i].count; - info.instance_count = prims[i].num_instances; - info.index_bias = prims[i].basevertex; - if (!ib) { - info.min_index = info.start; - info.max_index = info.start + info.count - 1; - } - - if (u_trim_pipe_prim(info.mode, &info.count)) - pipe->draw_vbo(pipe, &info); - } - - pipe_resource_reference(&ibuffer.buffer, NULL); - - /* unreference buffers (frees wrapped user-space buffer objects) */ - for (attr = 0; attr < num_vbuffers; attr++) { - pipe_resource_reference(&vbuffer[attr].buffer, NULL); - assert(!vbuffer[attr].buffer); - } - - if (userSpace) - { - pipe->set_vertex_buffers(pipe, 0, NULL); - } -} - - -void st_init_draw( struct st_context *st ) -{ - struct gl_context *ctx = st->ctx; - - vbo_set_draw_func(ctx, st_draw_vbo); - -#if FEATURE_feedback || FEATURE_rastpos - st->draw = draw_create(st->pipe); /* for selection/feedback */ - - /* Disable draw options that might convert points/lines to tris, etc. - * as that would foul-up feedback/selection mode. - */ - draw_wide_line_threshold(st->draw, 1000.0f); - draw_wide_point_threshold(st->draw, 1000.0f); - draw_enable_line_stipple(st->draw, FALSE); - draw_enable_point_sprites(st->draw, FALSE); -#endif -} - - -void st_destroy_draw( struct st_context *st ) -{ -#if FEATURE_feedback || FEATURE_rastpos - draw_destroy(st->draw); -#endif -} - - +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * This file implements the st_draw_vbo() function which is called from + * Mesa's VBO module. All point/line/triangle rendering is done through + * this function whether the user called glBegin/End, glDrawArrays, + * glDrawElements, glEvalMesh, or glCalList, etc. + * + * We basically convert the VBO's vertex attribute/array information into + * Gallium vertex state, bind the vertex buffer objects and call + * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays(). + * + * Authors: + * Keith Whitwell + */ + + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/prog_uniform.h" + +#include "vbo/vbo.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_bufferobjects.h" +#include "st_draw.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_prim.h" +#include "util/u_draw_quad.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_context.h" + + +static GLuint double_types[4] = { + PIPE_FORMAT_R64_FLOAT, + PIPE_FORMAT_R64G64_FLOAT, + PIPE_FORMAT_R64G64B64_FLOAT, + PIPE_FORMAT_R64G64B64A64_FLOAT +}; + +static GLuint float_types[4] = { + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT +}; + +static GLuint half_float_types[4] = { + PIPE_FORMAT_R16_FLOAT, + PIPE_FORMAT_R16G16_FLOAT, + PIPE_FORMAT_R16G16B16_FLOAT, + PIPE_FORMAT_R16G16B16A16_FLOAT +}; + +static GLuint uint_types_norm[4] = { + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM +}; + +static GLuint uint_types_scale[4] = { + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32A32_USCALED +}; + +static GLuint int_types_norm[4] = { + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM +}; + +static GLuint int_types_scale[4] = { + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED +}; + +static GLuint ushort_types_norm[4] = { + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16B16_UNORM, + PIPE_FORMAT_R16G16B16A16_UNORM +}; + +static GLuint ushort_types_scale[4] = { + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16B16_USCALED, + PIPE_FORMAT_R16G16B16A16_USCALED +}; + +static GLuint short_types_norm[4] = { + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16_SNORM, + PIPE_FORMAT_R16G16B16A16_SNORM +}; + +static GLuint short_types_scale[4] = { + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R16G16B16_SSCALED, + PIPE_FORMAT_R16G16B16A16_SSCALED +}; + +static GLuint ubyte_types_norm[4] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8B8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM +}; + +static GLuint ubyte_types_scale[4] = { + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8B8_USCALED, + PIPE_FORMAT_R8G8B8A8_USCALED +}; + +static GLuint byte_types_norm[4] = { + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM +}; + +static GLuint byte_types_scale[4] = { + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R8G8B8_SSCALED, + PIPE_FORMAT_R8G8B8A8_SSCALED +}; + +static GLuint fixed_types[4] = { + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED +}; + + + +/** + * Return a PIPE_FORMAT_x for the given GL datatype and size. + */ +GLuint +st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, + GLboolean normalized) +{ + assert((type >= GL_BYTE && type <= GL_DOUBLE) || + type == GL_FIXED || type == GL_HALF_FLOAT); + assert(size >= 1); + assert(size <= 4); + assert(format == GL_RGBA || format == GL_BGRA); + + if (format == GL_BGRA) { + /* this is an odd-ball case */ + assert(type == GL_UNSIGNED_BYTE); + assert(normalized); + return PIPE_FORMAT_B8G8R8A8_UNORM; + } + + if (normalized) { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_HALF_FLOAT: return half_float_types[size-1]; + case GL_INT: return int_types_norm[size-1]; + case GL_SHORT: return short_types_norm[size-1]; + case GL_BYTE: return byte_types_norm[size-1]; + case GL_UNSIGNED_INT: return uint_types_norm[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + else { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_HALF_FLOAT: return half_float_types[size-1]; + case GL_INT: return int_types_scale[size-1]; + case GL_SHORT: return short_types_scale[size-1]; + case GL_BYTE: return byte_types_scale[size-1]; + case GL_UNSIGNED_INT: return uint_types_scale[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + return 0; /* silence compiler warning */ +} + + + + + +/** + * Examine the active arrays to determine if we have interleaved + * vertex arrays all living in one VBO, or all living in user space. + * \param userSpace returns whether the arrays are in user space. + */ +static GLboolean +is_interleaved_arrays(const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays) +{ + GLuint attr; + const struct gl_buffer_object *firstBufObj = NULL; + GLint firstStride = -1; + const GLubyte *client_addr = NULL; + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj; + const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */ + + if (firstStride < 0) { + firstStride = stride; + } + else if (firstStride != stride) { + return GL_FALSE; + } + + if (!bufObj || !bufObj->Name) { + /* Try to detect if the client-space arrays are + * "close" to each other. + */ + if (!client_addr) { + client_addr = arrays[mesaAttr]->Ptr; + } + else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) { + /* arrays start too far apart */ + return GL_FALSE; + } + } + else if (!firstBufObj) { + firstBufObj = bufObj; + } + else if (bufObj != firstBufObj) { + return GL_FALSE; + } + } + + return GL_TRUE; +} + + +/** + * Set up for drawing interleaved arrays that all live in one VBO + * or all live in user space. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_interleaved_attribs(struct gl_context *ctx, + const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays, + struct pipe_vertex_buffer *vbuffer, + struct pipe_vertex_element velements[], + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLuint attr; + const GLubyte *low_addr = NULL; + + /* Find the lowest address. */ + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr; + + low_addr = !low_addr ? start : MIN2(low_addr, start); + } + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + struct st_buffer_object *stobj = st_buffer_object(bufobj); + GLsizei stride = arrays[mesaAttr]->StrideB; + + if (attr == 0) { + if (bufobj && bufobj->Name) { + vbuffer->buffer = NULL; + pipe_resource_reference(&vbuffer->buffer, stobj->buffer); + vbuffer->buffer_offset = pointer_to_offset(low_addr); + } else { + vbuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void*)low_addr, + stride * (max_index + 1), + PIPE_BIND_VERTEX_BUFFER); + vbuffer->buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[0], vbuffer->buffer); + st->user_vb_stride[0] = stride; + st->num_user_vbs = 1; + } + vbuffer->stride = stride; /* in bytes */ + } + + velements[attr].src_offset = + (unsigned) (arrays[mesaAttr]->Ptr - low_addr); + velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; + velements[attr].vertex_buffer_index = 0; + velements[attr].src_format = + st_pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + +/** + * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each + * vertex attribute. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_non_interleaved_attribs(struct gl_context *ctx, + const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays, + struct pipe_vertex_buffer vbuffer[], + struct pipe_vertex_element velements[], + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLuint attr; + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + GLsizei stride = arrays[mesaAttr]->StrideB; + + if (bufobj && bufobj->Name) { + /* Attribute data is in a VBO. + * Recall that for VBOs, the gl_client_array->Ptr field is + * really an offset from the start of the VBO, not a pointer. + */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + assert(stobj->buffer); + + vbuffer[attr].buffer = NULL; + pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer); + vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr); + } + else { + /* wrap user data */ + if (arrays[mesaAttr]->Ptr) { + vbuffer[attr].buffer = + pipe_user_buffer_create(pipe->screen, + (void *) arrays[mesaAttr]->Ptr, + stride * (max_index + 1), + PIPE_BIND_VERTEX_BUFFER); + } + else { + /* no array, use ctx->Current.Attrib[] value */ + uint bytes = sizeof(ctx->Current.Attrib[0]); + vbuffer[attr].buffer = + pipe_user_buffer_create(pipe->screen, + (void *) ctx->Current.Attrib[mesaAttr], + bytes, + PIPE_BIND_VERTEX_BUFFER); + stride = 0; + } + + vbuffer[attr].buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer); + st->user_vb_stride[attr] = stride; + st->num_user_vbs = MAX2(st->num_user_vbs, attr+1); + } + + /* common-case setup */ + vbuffer[attr].stride = stride; /* in bytes */ + + velements[attr].src_offset = 0; + velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; + velements[attr].vertex_buffer_index = attr; + velements[attr].src_format + = st_pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + +static void +setup_index_buffer(struct gl_context *ctx, + const struct _mesa_index_buffer *ib, + struct pipe_index_buffer *ibuffer) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + + memset(ibuffer, 0, sizeof(*ibuffer)); + if (ib) { + struct gl_buffer_object *bufobj = ib->obj; + + switch (ib->type) { + case GL_UNSIGNED_INT: + ibuffer->index_size = 4; + break; + case GL_UNSIGNED_SHORT: + ibuffer->index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer->index_size = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_resource_reference(&ibuffer->buffer, stobj->buffer); + ibuffer->offset = pointer_to_offset(ib->ptr); + } + else { + /* element/indicies are in user space memory */ + ibuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * ibuffer->index_size, + PIPE_BIND_INDEX_BUFFER); + } + } +} + +/** + * Prior to drawing, check that any uniforms referenced by the + * current shader have been set. If a uniform has not been set, + * issue a warning. + */ +static void +check_uniforms(struct gl_context *ctx) +{ + struct gl_shader_program *shProg[3] = { + ctx->Shader.CurrentVertexProgram, + ctx->Shader.CurrentGeometryProgram, + ctx->Shader.CurrentFragmentProgram, + }; + unsigned j; + + for (j = 0; j < 3; j++) { + unsigned i; + + if (shProg[j] == NULL || !shProg[j]->LinkStatus) + continue; + + for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) { + const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i]; + if (!u->Initialized) { + _mesa_warning(ctx, + "Using shader with uninitialized uniform: %s", + u->Name); + } + } + } +} + + +/** + * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to + * the corresponding Gallium type. + */ +static unsigned +translate_prim(const struct gl_context *ctx, unsigned prim) +{ + /* GL prims should match Gallium prims, spot-check a few */ + assert(GL_POINTS == PIPE_PRIM_POINTS); + assert(GL_QUADS == PIPE_PRIM_QUADS); + assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); + + /* Avoid quadstrips if it's easy to do so: + * Note: it's imporant to do the correct trimming if we change the prim type! + * We do that wherever this function is called. + */ + if (prim == GL_QUAD_STRIP && + ctx->Light.ShadeModel != GL_FLAT && + ctx->Polygon.FrontMode == GL_FILL && + ctx->Polygon.BackMode == GL_FILL) + prim = GL_TRIANGLE_STRIP; + + return prim; +} + + +static void +st_validate_varrays(struct gl_context *ctx, + const struct gl_client_array **arrays, + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + const struct st_vertex_program *vp; + const struct st_vp_variant *vpv; + struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers, num_velements; + GLuint attr; + unsigned i; + + /* must get these after state validation! */ + vp = st->vp; + vpv = st->vp_variant; + + memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); + + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + st->num_user_vbs = 0; + + /* + * Setup the vbuffer[] and velements[] arrays. + */ + if (is_interleaved_arrays(vp, vpv, arrays)) { + setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements, + max_index); + num_vbuffers = 1; + num_velements = vpv->num_inputs; + if (num_velements == 0) + num_vbuffers = 0; + } + else { + setup_non_interleaved_attribs(ctx, vp, vpv, arrays, + vbuffer, velements, max_index); + num_vbuffers = vpv->num_inputs; + num_velements = vpv->num_inputs; + } + + cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer); + cso_set_vertex_elements(st->cso_context, num_velements, velements); + + /* unreference buffers (frees wrapped user-space buffer objects) + * This is OK, because the pipe driver should reference buffers by itself + * in set_vertex_buffers. */ + for (attr = 0; attr < num_vbuffers; attr++) { + pipe_resource_reference(&vbuffer[attr].buffer, NULL); + assert(!vbuffer[attr].buffer); + } +} + + +/** + * This function gets plugged into the VBO module and is called when + * we have something to render. + * Basically, translate the information into the format expected by gallium. + */ +void +st_draw_vbo(struct gl_context *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_index_buffer ibuffer; + struct pipe_draw_info info; + unsigned i; + GLboolean new_array = + st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; + + /* Mesa core state should have been validated already */ + assert(ctx->NewState == 0x0); + + if (ib) { + /* Gallium probably doesn't want this in some cases. */ + if (!index_bounds_valid) + if (!vbo_all_varyings_in_vbos(arrays)) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + } else { + /* Get min/max index for non-indexed drawing. */ + min_index = ~0; + max_index = 0; + + for (i = 0; i < nr_prims; i++) { + min_index = MIN2(min_index, prims[i].start); + max_index = MAX2(max_index, prims[i].start + prims[i].count - 1); + } + } + + /* Validate state. */ + if (st->dirty.st) { + GLboolean vertDataEdgeFlags; + + /* sanity check for pointer arithmetic below */ + assert(sizeof(arrays[0]->Ptr[0]) == 1); + + vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj && + arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name; + if (vertDataEdgeFlags != st->vertdata_edgeflags) { + st->vertdata_edgeflags = vertDataEdgeFlags; + st->dirty.st |= ST_NEW_EDGEFLAGS_DATA; + } + + st_validate_state(st); + + if (new_array) { + st_validate_varrays(ctx, arrays, max_index); + } + +#if 0 + if (MESA_VERBOSE & VERBOSE_GLSL) { + check_uniforms(ctx); + } +#else + (void) check_uniforms; +#endif + } + + /* Notify the driver that the content of user buffers may have been + * changed. */ + if (!new_array && st->num_user_vbs) { + for (i = 0; i < st->num_user_vbs; i++) { + if (st->user_vb[i]) { + unsigned stride = st->user_vb_stride[i]; + + if (stride) { + pipe->redefine_user_buffer(pipe, st->user_vb[i], + min_index * stride, + (max_index + 1 - min_index) * stride); + } else { + /* stride == 0 */ + pipe->redefine_user_buffer(pipe, st->user_vb[i], + 0, st->user_vb[i]->width0); + } + } + } + } + + setup_index_buffer(ctx, ib, &ibuffer); + pipe->set_index_buffer(pipe, &ibuffer); + + util_draw_init_info(&info); + if (ib) { + info.indexed = TRUE; + if (min_index != ~0 && max_index != ~0) { + info.min_index = min_index; + info.max_index = max_index; + } + } + + info.primitive_restart = st->ctx->Array.PrimitiveRestart; + info.restart_index = st->ctx->Array.RestartIndex; + + /* do actual drawing */ + for (i = 0; i < nr_prims; i++) { + info.mode = translate_prim( ctx, prims[i].mode ); + info.start = prims[i].start; + info.count = prims[i].count; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; + } + + if (u_trim_pipe_prim(info.mode, &info.count)) + pipe->draw_vbo(pipe, &info); + } + + pipe_resource_reference(&ibuffer.buffer, NULL); +} + + +void st_init_draw( struct st_context *st ) +{ + struct gl_context *ctx = st->ctx; + + vbo_set_draw_func(ctx, st_draw_vbo); + +#if FEATURE_feedback || FEATURE_rastpos + st->draw = draw_create(st->pipe); /* for selection/feedback */ + + /* Disable draw options that might convert points/lines to tris, etc. + * as that would foul-up feedback/selection mode. + */ + draw_wide_line_threshold(st->draw, 1000.0f); + draw_wide_point_threshold(st->draw, 1000.0f); + draw_enable_line_stipple(st->draw, FALSE); + draw_enable_point_sprites(st->draw, FALSE); +#endif +} + + +void st_destroy_draw( struct st_context *st ) +{ +#if FEATURE_feedback || FEATURE_rastpos + draw_destroy(st->draw); +#endif +} + + diff --git a/mesalib/src/mesa/state_tracker/st_draw_feedback.c b/mesalib/src/mesa/state_tracker/st_draw_feedback.c index 545b32d75..1e1220bfe 100644 --- a/mesalib/src/mesa/state_tracker/st_draw_feedback.c +++ b/mesalib/src/mesa/state_tracker/st_draw_feedback.c @@ -179,7 +179,6 @@ st_feedback_draw_vbo(struct gl_context *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffers[attr].max_index = max_index; velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].src_format = diff --git a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c index 0be66a2c2..18eb3be68 100644 --- a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c +++ b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c @@ -1,424 +1,422 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "main/imports.h" -#include "main/mipmap.h" -#include "main/teximage.h" - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_format.h" -#include "util/u_gen_mipmap.h" - -#include "st_debug.h" -#include "st_context.h" -#include "st_texture.h" -#include "st_gen_mipmap.h" -#include "st_cb_texture.h" - - -/** - * one-time init for generate mipmap - * XXX Note: there may be other times we need no-op/simple state like this. - * In that case, some code refactoring would be good. - */ -void -st_init_generate_mipmap(struct st_context *st) -{ - st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context); -} - - -void -st_destroy_generate_mipmap(struct st_context *st) -{ - util_destroy_gen_mipmap(st->gen_mipmap); - st->gen_mipmap = NULL; -} - - -/** - * Generate mipmap levels using hardware rendering. - * \return TRUE if successful, FALSE if not possible - */ -static boolean -st_render_mipmap(struct st_context *st, - GLenum target, - struct st_texture_object *stObj, - uint baseLevel, uint lastLevel) -{ - struct pipe_context *pipe = st->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe); - const uint face = _mesa_tex_target_to_face(target); - - assert(psv->texture == stObj->pt); -#if 0 - assert(target != GL_TEXTURE_3D); /* implemented but untested */ -#endif - - /* check if we can render in the texture's format */ - /* XXX should probably kill this and always use util_gen_mipmap - since this implements a sw fallback as well */ - if (!screen->is_format_supported(screen, psv->format, psv->texture->target, - 0, PIPE_BIND_RENDER_TARGET, 0)) { - return FALSE; - } - - util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel, - PIPE_TEX_FILTER_LINEAR); - - return TRUE; -} - - -/** - * Helper function to decompress an image. The result is a 32-bpp RGBA - * image with stride==width. - */ -static void -decompress_image(enum pipe_format format, - const uint8_t *src, uint8_t *dst, - unsigned width, unsigned height) -{ - const struct util_format_description *desc = util_format_description(format); - const uint bw = util_format_get_blockwidth(format); - const uint bh = util_format_get_blockheight(format); - const uint dst_stride = 4 * MAX2(width, bw); - const uint src_stride = util_format_get_stride(format, width); - - desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); - - if (width < bw || height < bh) { - /* We're decompressing an image smaller than the compression - * block size. We don't want garbage pixel values in the region - * outside (width x height) so replicate pixels from the (width - * x height) region to fill out the (bw x bh) block size. - */ - uint x, y; - for (y = 0; y < bh; y++) { - for (x = 0; x < bw; x++) { - if (x >= width || y >= height) { - uint p = (y * bw + x) * 4; - dst[p + 0] = dst[0]; - dst[p + 1] = dst[1]; - dst[p + 2] = dst[2]; - dst[p + 3] = dst[3]; - } - } - } - } -} - - -/** - * Helper function to compress an image. The source is a 32-bpp RGBA image - * with stride==width. - */ -static void -compress_image(enum pipe_format format, - const uint8_t *src, uint8_t *dst, - unsigned width, unsigned height) -{ - const struct util_format_description *desc = util_format_description(format); - const uint dst_stride = util_format_get_stride(format, width); - const uint src_stride = 4 * width; - - desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); -} - - -/** - * Software fallback for generate mipmap levels. - */ -static void -fallback_generate_mipmap(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct pipe_context *pipe = st_context(ctx)->pipe; - struct pipe_resource *pt = st_get_texobj_resource(texObj); - const uint baseLevel = texObj->BaseLevel; - const uint lastLevel = pt->last_level; - const uint face = _mesa_tex_target_to_face(target); - uint dstLevel; - GLenum datatype; - GLuint comps; - GLboolean compressed; - - if (ST_DEBUG & DEBUG_FALLBACK) - debug_printf("%s: fallback processing\n", __FUNCTION__); - - assert(target != GL_TEXTURE_3D); /* not done yet */ - - compressed = - _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat); - - if (compressed) { - datatype = GL_UNSIGNED_BYTE; - comps = 4; - } - else { - _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat, - &datatype, &comps); - assert(comps > 0 && "bad texture format in fallback_generate_mipmap()"); - } - - for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { - const uint srcLevel = dstLevel - 1; - const uint srcWidth = u_minify(pt->width0, srcLevel); - const uint srcHeight = u_minify(pt->height0, srcLevel); - const uint srcDepth = u_minify(pt->depth0, srcLevel); - const uint dstWidth = u_minify(pt->width0, dstLevel); - const uint dstHeight = u_minify(pt->height0, dstLevel); - const uint dstDepth = u_minify(pt->depth0, dstLevel); - struct pipe_transfer *srcTrans, *dstTrans; - const ubyte *srcData; - ubyte *dstData; - int srcStride, dstStride; - - srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel, - face, - PIPE_TRANSFER_READ, 0, 0, - srcWidth, srcHeight); - - dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel, - face, - PIPE_TRANSFER_WRITE, 0, 0, - dstWidth, dstHeight); - - srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans); - dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans); - - srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format); - dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format); - - /* this cannot work correctly for 3d since it does - not respect layerStride. */ - if (compressed) { - const enum pipe_format format = pt->format; - const uint bw = util_format_get_blockwidth(format); - const uint bh = util_format_get_blockheight(format); - const uint srcWidth2 = align(srcWidth, bw); - const uint srcHeight2 = align(srcHeight, bh); - const uint dstWidth2 = align(dstWidth, bw); - const uint dstHeight2 = align(dstHeight, bh); - uint8_t *srcTemp, *dstTemp; - - assert(comps == 4); - - srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000); - dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000); - - /* decompress the src image: srcData -> srcTemp */ - decompress_image(format, srcData, srcTemp, srcWidth, srcHeight); - - _mesa_generate_mipmap_level(target, datatype, comps, - 0 /*border*/, - srcWidth2, srcHeight2, srcDepth, - srcTemp, - srcWidth2, /* stride in texels */ - dstWidth2, dstHeight2, dstDepth, - dstTemp, - dstWidth2); /* stride in texels */ - - /* compress the new image: dstTemp -> dstData */ - compress_image(format, dstTemp, dstData, dstWidth, dstHeight); - - free(srcTemp); - free(dstTemp); - } - else { - _mesa_generate_mipmap_level(target, datatype, comps, - 0 /*border*/, - srcWidth, srcHeight, srcDepth, - srcData, - srcStride, /* stride in texels */ - dstWidth, dstHeight, dstDepth, - dstData, - dstStride); /* stride in texels */ - } - - pipe_transfer_unmap(pipe, srcTrans); - pipe_transfer_unmap(pipe, dstTrans); - - pipe->transfer_destroy(pipe, srcTrans); - pipe->transfer_destroy(pipe, dstTrans); - } -} - - -/** - * Compute the expected number of mipmap levels in the texture given - * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/ - * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap - * levels should be generated. - */ -static GLuint -compute_num_levels(struct gl_context *ctx, - struct gl_texture_object *texObj, - GLenum target) -{ - if (target == GL_TEXTURE_RECTANGLE_ARB) { - return 1; - } - else { - const struct gl_texture_image *baseImage = - _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel); - GLuint size, numLevels; - - size = MAX2(baseImage->Width2, baseImage->Height2); - size = MAX2(size, baseImage->Depth2); - - numLevels = texObj->BaseLevel; - - while (size > 0) { - numLevels++; - size >>= 1; - } - - numLevels = MIN2(numLevels, texObj->MaxLevel + 1); - - assert(numLevels >= 1); - - return numLevels; - } -} - - -/** - * Called via ctx->Driver.GenerateMipmap(). - */ -void -st_generate_mipmap(struct gl_context *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct st_context *st = st_context(ctx); - struct st_texture_object *stObj = st_texture_object(texObj); - struct pipe_resource *pt = st_get_texobj_resource(texObj); - const uint baseLevel = texObj->BaseLevel; - uint lastLevel; - uint dstLevel; - - if (!pt) - return; - - /* not sure if this ultimately actually should work, - but we're not supporting multisampled textures yet. */ - assert(pt->nr_samples < 2); - - /* find expected last mipmap level to generate*/ - lastLevel = compute_num_levels(ctx, texObj, target) - 1; - - if (lastLevel == 0) - return; - - if (pt->last_level < lastLevel) { - /* The current gallium texture doesn't have space for all the - * mipmap levels we need to generate. So allocate a new texture. - */ - struct pipe_resource *oldTex = stObj->pt; - - /* create new texture with space for more levels */ - stObj->pt = st_texture_create(st, - oldTex->target, - oldTex->format, - lastLevel, - oldTex->width0, - oldTex->height0, - oldTex->depth0, - oldTex->array_size, - oldTex->bind); - - /* The texture isn't in a "complete" state yet so set the expected - * lastLevel here, since it won't get done in st_finalize_texture(). - */ - stObj->lastLevel = lastLevel; - - /* This will copy the old texture's base image into the new texture - * which we just allocated. - */ - st_finalize_texture(ctx, st->pipe, texObj); - - /* release the old tex (will likely be freed too) */ - pipe_resource_reference(&oldTex, NULL); - pipe_sampler_view_reference(&stObj->sampler_view, NULL); - - pt = stObj->pt; - } - else { - /* Make sure that the base texture image data is present in the - * texture buffer. - */ - st_finalize_texture(ctx, st->pipe, texObj); - } - - assert(pt->last_level >= lastLevel); - - /* Try to generate the mipmap by rendering/texturing. If that fails, - * use the software fallback. - */ - if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) { - /* since the util code actually also has a fallback, should - probably make it never fail and kill this */ - fallback_generate_mipmap(ctx, target, texObj); - } - - /* Fill in the Mesa gl_texture_image fields */ - for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { - const uint srcLevel = dstLevel - 1; - const struct gl_texture_image *srcImage - = _mesa_get_tex_image(ctx, texObj, target, srcLevel); - struct gl_texture_image *dstImage; - struct st_texture_image *stImage; - uint dstWidth = u_minify(pt->width0, dstLevel); - uint dstHeight = u_minify(pt->height0, dstLevel); - uint dstDepth = u_minify(pt->depth0, dstLevel); - uint border = srcImage->Border; - - dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); - if (!dstImage) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); - return; - } - - /* Free old image data */ - if (dstImage->Data) - ctx->Driver.FreeTexImageData(ctx, dstImage); - - /* initialize new image */ - _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, - dstDepth, border, srcImage->InternalFormat, - srcImage->TexFormat); - - stImage = st_texture_image(dstImage); - stImage->level = dstLevel; - - pipe_resource_reference(&stImage->pt, pt); - } -} +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mipmap.h" +#include "main/teximage.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_gen_mipmap.h" + +#include "st_debug.h" +#include "st_context.h" +#include "st_texture.h" +#include "st_gen_mipmap.h" +#include "st_cb_texture.h" + + +/** + * one-time init for generate mipmap + * XXX Note: there may be other times we need no-op/simple state like this. + * In that case, some code refactoring would be good. + */ +void +st_init_generate_mipmap(struct st_context *st) +{ + st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context); +} + + +void +st_destroy_generate_mipmap(struct st_context *st) +{ + util_destroy_gen_mipmap(st->gen_mipmap); + st->gen_mipmap = NULL; +} + + +/** + * Generate mipmap levels using hardware rendering. + * \return TRUE if successful, FALSE if not possible + */ +static boolean +st_render_mipmap(struct st_context *st, + GLenum target, + struct st_texture_object *stObj, + uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe); + const uint face = _mesa_tex_target_to_face(target); + + assert(psv->texture == stObj->pt); +#if 0 + assert(target != GL_TEXTURE_3D); /* implemented but untested */ +#endif + + /* check if we can render in the texture's format */ + /* XXX should probably kill this and always use util_gen_mipmap + since this implements a sw fallback as well */ + if (!screen->is_format_supported(screen, psv->format, psv->texture->target, + 0, PIPE_BIND_RENDER_TARGET, 0)) { + return FALSE; + } + + util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel, + PIPE_TEX_FILTER_LINEAR); + + return TRUE; +} + + +/** + * Helper function to decompress an image. The result is a 32-bpp RGBA + * image with stride==width. + */ +static void +decompress_image(enum pipe_format format, + const uint8_t *src, uint8_t *dst, + unsigned width, unsigned height, unsigned src_stride) +{ + const struct util_format_description *desc = util_format_description(format); + const uint bw = util_format_get_blockwidth(format); + const uint bh = util_format_get_blockheight(format); + const uint dst_stride = 4 * MAX2(width, bw); + + desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); + + if (width < bw || height < bh) { + /* We're decompressing an image smaller than the compression + * block size. We don't want garbage pixel values in the region + * outside (width x height) so replicate pixels from the (width + * x height) region to fill out the (bw x bh) block size. + */ + uint x, y; + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { + if (x >= width || y >= height) { + uint p = (y * bw + x) * 4; + dst[p + 0] = dst[0]; + dst[p + 1] = dst[1]; + dst[p + 2] = dst[2]; + dst[p + 3] = dst[3]; + } + } + } + } +} + + +/** + * Helper function to compress an image. The source is a 32-bpp RGBA image + * with stride==width. + */ +static void +compress_image(enum pipe_format format, + const uint8_t *src, uint8_t *dst, + unsigned width, unsigned height, unsigned dst_stride) +{ + const struct util_format_description *desc = util_format_description(format); + const uint src_stride = 4 * width; + + desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); +} + + +/** + * Software fallback for generate mipmap levels. + */ +static void +fallback_generate_mipmap(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct pipe_resource *pt = st_get_texobj_resource(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + const uint face = _mesa_tex_target_to_face(target); + uint dstLevel; + GLenum datatype; + GLuint comps; + GLboolean compressed; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + + assert(target != GL_TEXTURE_3D); /* not done yet */ + + compressed = + _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat); + + if (compressed) { + datatype = GL_UNSIGNED_BYTE; + comps = 4; + } + else { + _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat, + &datatype, &comps); + assert(comps > 0 && "bad texture format in fallback_generate_mipmap()"); + } + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const uint srcWidth = u_minify(pt->width0, srcLevel); + const uint srcHeight = u_minify(pt->height0, srcLevel); + const uint srcDepth = u_minify(pt->depth0, srcLevel); + const uint dstWidth = u_minify(pt->width0, dstLevel); + const uint dstHeight = u_minify(pt->height0, dstLevel); + const uint dstDepth = u_minify(pt->depth0, dstLevel); + struct pipe_transfer *srcTrans, *dstTrans; + const ubyte *srcData; + ubyte *dstData; + int srcStride, dstStride; + + srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel, + face, + PIPE_TRANSFER_READ, 0, 0, + srcWidth, srcHeight); + + dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel, + face, + PIPE_TRANSFER_WRITE, 0, 0, + dstWidth, dstHeight); + + srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans); + dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans); + + srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format); + dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format); + + /* this cannot work correctly for 3d since it does + not respect layerStride. */ + if (compressed) { + const enum pipe_format format = pt->format; + const uint bw = util_format_get_blockwidth(format); + const uint bh = util_format_get_blockheight(format); + const uint srcWidth2 = align(srcWidth, bw); + const uint srcHeight2 = align(srcHeight, bh); + const uint dstWidth2 = align(dstWidth, bw); + const uint dstHeight2 = align(dstHeight, bh); + uint8_t *srcTemp, *dstTemp; + + assert(comps == 4); + + srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000); + dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000); + + /* decompress the src image: srcData -> srcTemp */ + decompress_image(format, srcData, srcTemp, srcWidth, srcHeight, srcTrans->stride); + + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + srcWidth2, srcHeight2, srcDepth, + srcTemp, + srcWidth2, /* stride in texels */ + dstWidth2, dstHeight2, dstDepth, + dstTemp, + dstWidth2); /* stride in texels */ + + /* compress the new image: dstTemp -> dstData */ + compress_image(format, dstTemp, dstData, dstWidth, dstHeight, dstTrans->stride); + + free(srcTemp); + free(dstTemp); + } + else { + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + srcWidth, srcHeight, srcDepth, + srcData, + srcStride, /* stride in texels */ + dstWidth, dstHeight, dstDepth, + dstData, + dstStride); /* stride in texels */ + } + + pipe_transfer_unmap(pipe, srcTrans); + pipe_transfer_unmap(pipe, dstTrans); + + pipe->transfer_destroy(pipe, srcTrans); + pipe->transfer_destroy(pipe, dstTrans); + } +} + + +/** + * Compute the expected number of mipmap levels in the texture given + * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/ + * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap + * levels should be generated. + */ +static GLuint +compute_num_levels(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target) +{ + if (target == GL_TEXTURE_RECTANGLE_ARB) { + return 1; + } + else { + const struct gl_texture_image *baseImage = + _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel); + GLuint size, numLevels; + + size = MAX2(baseImage->Width2, baseImage->Height2); + size = MAX2(size, baseImage->Depth2); + + numLevels = texObj->BaseLevel; + + while (size > 0) { + numLevels++; + size >>= 1; + } + + numLevels = MIN2(numLevels, texObj->MaxLevel + 1); + + assert(numLevels >= 1); + + return numLevels; + } +} + + +/** + * Called via ctx->Driver.GenerateMipmap(). + */ +void +st_generate_mipmap(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct st_context *st = st_context(ctx); + struct st_texture_object *stObj = st_texture_object(texObj); + struct pipe_resource *pt = st_get_texobj_resource(texObj); + const uint baseLevel = texObj->BaseLevel; + uint lastLevel; + uint dstLevel; + + if (!pt) + return; + + /* not sure if this ultimately actually should work, + but we're not supporting multisampled textures yet. */ + assert(pt->nr_samples < 2); + + /* find expected last mipmap level to generate*/ + lastLevel = compute_num_levels(ctx, texObj, target) - 1; + + if (lastLevel == 0) + return; + + if (pt->last_level < lastLevel) { + /* The current gallium texture doesn't have space for all the + * mipmap levels we need to generate. So allocate a new texture. + */ + struct pipe_resource *oldTex = stObj->pt; + + /* create new texture with space for more levels */ + stObj->pt = st_texture_create(st, + oldTex->target, + oldTex->format, + lastLevel, + oldTex->width0, + oldTex->height0, + oldTex->depth0, + oldTex->array_size, + oldTex->bind); + + /* The texture isn't in a "complete" state yet so set the expected + * lastLevel here, since it won't get done in st_finalize_texture(). + */ + stObj->lastLevel = lastLevel; + + /* This will copy the old texture's base image into the new texture + * which we just allocated. + */ + st_finalize_texture(ctx, st->pipe, texObj); + + /* release the old tex (will likely be freed too) */ + pipe_resource_reference(&oldTex, NULL); + pipe_sampler_view_reference(&stObj->sampler_view, NULL); + + pt = stObj->pt; + } + else { + /* Make sure that the base texture image data is present in the + * texture buffer. + */ + st_finalize_texture(ctx, st->pipe, texObj); + } + + assert(pt->last_level >= lastLevel); + + /* Try to generate the mipmap by rendering/texturing. If that fails, + * use the software fallback. + */ + if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) { + /* since the util code actually also has a fallback, should + probably make it never fail and kill this */ + fallback_generate_mipmap(ctx, target, texObj); + } + + /* Fill in the Mesa gl_texture_image fields */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const struct gl_texture_image *srcImage + = _mesa_get_tex_image(ctx, texObj, target, srcLevel); + struct gl_texture_image *dstImage; + struct st_texture_image *stImage; + uint dstWidth = u_minify(pt->width0, dstLevel); + uint dstHeight = u_minify(pt->height0, dstLevel); + uint dstDepth = u_minify(pt->depth0, dstLevel); + uint border = srcImage->Border; + + dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); + if (!dstImage) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); + return; + } + + /* Free old image data */ + if (dstImage->Data) + ctx->Driver.FreeTexImageData(ctx, dstImage); + + /* initialize new image */ + _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, + dstDepth, border, srcImage->InternalFormat, + srcImage->TexFormat); + + stImage = st_texture_image(dstImage); + stImage->level = dstLevel; + + pipe_resource_reference(&stImage->pt, pt); + } +} diff --git a/mesalib/src/mesa/tnl/t_draw.c b/mesalib/src/mesa/tnl/t_draw.c index 741f0ed3f..b1967e654 100644 --- a/mesalib/src/mesa/tnl/t_draw.c +++ b/mesalib/src/mesa/tnl/t_draw.c @@ -1,494 +1,534 @@ -/* - * Mesa 3-D graphics library - * Version: 7.1 - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell - */ - -#include "main/glheader.h" -#include "main/condrender.h" -#include "main/context.h" -#include "main/imports.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/enums.h" - -#include "t_context.h" -#include "tnl.h" - - - -static GLubyte *get_space(struct gl_context *ctx, GLuint bytes) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - GLubyte *space = malloc(bytes); - - tnl->block[tnl->nr_blocks++] = space; - return space; -} - - -static void free_space(struct gl_context *ctx) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint i; - for (i = 0; i < tnl->nr_blocks; i++) - free(tnl->block[i]); - tnl->nr_blocks = 0; -} - - -/* Convert the incoming array to GLfloats. Understands the - * array->Normalized flag and selects the correct conversion method. - */ -#define CONVERT( TYPE, MACRO ) do { \ - GLuint i, j; \ - if (input->Normalized) { \ - for (i = 0; i < count; i++) { \ - const TYPE *in = (TYPE *)ptr; \ - for (j = 0; j < sz; j++) { \ - *fptr++ = MACRO(*in); \ - in++; \ - } \ - ptr += input->StrideB; \ - } \ - } else { \ - for (i = 0; i < count; i++) { \ - const TYPE *in = (TYPE *)ptr; \ - for (j = 0; j < sz; j++) { \ - *fptr++ = (GLfloat)(*in); \ - in++; \ - } \ - ptr += input->StrideB; \ - } \ - } \ -} while (0) - - -/** - * Convert array of BGRA/GLubyte[4] values to RGBA/float[4] - * \param ptr input/ubyte array - * \param fptr output/float array - */ -static void -convert_bgra_to_float(const struct gl_client_array *input, - const GLubyte *ptr, GLfloat *fptr, - GLuint count ) -{ - GLuint i; - assert(input->Normalized); - assert(input->Size == 4); - for (i = 0; i < count; i++) { - const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */ - *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */ - *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */ - *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */ - *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */ - ptr += input->StrideB; - } -} - -static void -convert_half_to_float(const struct gl_client_array *input, - const GLubyte *ptr, GLfloat *fptr, - GLuint count, GLuint sz) -{ - GLuint i, j; - - for (i = 0; i < count; i++) { - GLhalfARB *in = (GLhalfARB *)ptr; - - for (j = 0; j < sz; j++) { - *fptr++ = _mesa_half_to_float(in[j]); - } - ptr += input->StrideB; - } -} - -/* Adjust pointer to point at first requested element, convert to - * floating point, populate VB->AttribPtr[]. - */ -static void _tnl_import_array( struct gl_context *ctx, - GLuint attrib, - GLuint count, - const struct gl_client_array *input, - const GLubyte *ptr ) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint stride = input->StrideB; - - if (input->Type != GL_FLOAT) { - const GLuint sz = input->Size; - GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat)); - GLfloat *fptr = (GLfloat *)buf; - - switch (input->Type) { - case GL_BYTE: - CONVERT(GLbyte, BYTE_TO_FLOAT); - break; - case GL_UNSIGNED_BYTE: - if (input->Format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - convert_bgra_to_float(input, ptr, fptr, count); - } - else { - CONVERT(GLubyte, UBYTE_TO_FLOAT); - } - break; - case GL_SHORT: - CONVERT(GLshort, SHORT_TO_FLOAT); - break; - case GL_UNSIGNED_SHORT: - CONVERT(GLushort, USHORT_TO_FLOAT); - break; - case GL_INT: - CONVERT(GLint, INT_TO_FLOAT); - break; - case GL_UNSIGNED_INT: - CONVERT(GLuint, UINT_TO_FLOAT); - break; - case GL_DOUBLE: - CONVERT(GLdouble, (GLfloat)); - break; - case GL_HALF_FLOAT: - convert_half_to_float(input, ptr, fptr, count, sz); - break; - default: - assert(0); - break; - } - - ptr = buf; - stride = sz * sizeof(GLfloat); - } - - VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib]; - VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr; - VB->AttribPtr[attrib]->start = (GLfloat *)ptr; - VB->AttribPtr[attrib]->count = count; - VB->AttribPtr[attrib]->stride = stride; - VB->AttribPtr[attrib]->size = input->Size; - - /* This should die, but so should the whole GLvector4f concept: - */ - VB->AttribPtr[attrib]->flags = (((1<Size)-1) | - VEC_NOT_WRITEABLE | - (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE)); - - VB->AttribPtr[attrib]->storage = NULL; -} - -#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2) - - -static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx, - const GLvector4f *input, - GLuint count) -{ - const GLubyte *ptr = (const GLubyte *)input->data; - const GLuint stride = input->stride; - GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS); - GLboolean *bptr = space; - GLuint i; - - for (i = 0; i < count; i++) { - *bptr++ = ((GLfloat *)ptr)[0] == 1.0; - ptr += stride; - } - - return space; -} - - -static void bind_inputs( struct gl_context *ctx, - const struct gl_client_array *inputs[], - GLint count, - struct gl_buffer_object **bo, - GLuint *nr_bo ) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - - /* Map all the VBOs - */ - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - const void *ptr; - - if (inputs[i]->BufferObj->Name) { - if (!inputs[i]->BufferObj->Pointer) { - bo[*nr_bo] = inputs[i]->BufferObj; - (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - inputs[i]->BufferObj); - - assert(inputs[i]->BufferObj->Pointer); - } - - ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer, - inputs[i]->Ptr); - } - else - ptr = inputs[i]->Ptr; - - /* Just make sure the array is floating point, otherwise convert to - * temporary storage. - * - * XXX: remove the GLvector4f type at some stage and just use - * client arrays. - */ - _tnl_import_array(ctx, i, count, inputs[i], ptr); - } - - /* We process only the vertices between min & max index: - */ - VB->Count = count; - - /* These should perhaps be part of _TNL_ATTRIB_* */ - VB->BackfaceColorPtr = NULL; - VB->BackfaceIndexPtr = NULL; - VB->BackfaceSecondaryColorPtr = NULL; - - /* Clipping and drawing code still requires this to be a packed - * array of ubytes which can be written into. TODO: Fix and - * remove. - */ - if (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL) - { - VB->EdgeFlag = _tnl_import_edgeflag( ctx, - VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG], - VB->Count ); - } - else { - /* the data previously pointed to by EdgeFlag may have been freed */ - VB->EdgeFlag = NULL; - } -} - - -/* Translate indices to GLuints and store in VB->Elts. - */ -static void bind_indices( struct gl_context *ctx, - const struct _mesa_index_buffer *ib, - struct gl_buffer_object **bo, - GLuint *nr_bo) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - void *ptr; - - if (!ib) { - VB->Elts = NULL; - return; - } - - if (ib->obj->Name && !ib->obj->Pointer) { - bo[*nr_bo] = ib->obj; - (*nr_bo)++; - ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER, - GL_READ_ONLY_ARB, - ib->obj); - - assert(ib->obj->Pointer); - } - - ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); - - if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) { - VB->Elts = (GLuint *) ptr; - } - else { - GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint)); - VB->Elts = elts; - - if (ib->type == GL_UNSIGNED_INT) { - const GLuint *in = (GLuint *)ptr; - for (i = 0; i < ib->count; i++) - *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; - } - else if (ib->type == GL_UNSIGNED_SHORT) { - const GLushort *in = (GLushort *)ptr; - for (i = 0; i < ib->count; i++) - *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; - } - else { - const GLubyte *in = (GLubyte *)ptr; - for (i = 0; i < ib->count; i++) - *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; - } - } -} - -static void bind_prims( struct gl_context *ctx, - const struct _mesa_prim *prim, - GLuint nr_prims ) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - - VB->Primitive = prim; - VB->PrimitiveCount = nr_prims; -} - -static void unmap_vbos( struct gl_context *ctx, - struct gl_buffer_object **bo, - GLuint nr_bo ) -{ - GLuint i; - for (i = 0; i < nr_bo; i++) { - ctx->Driver.UnmapBuffer(ctx, - 0, /* target -- I don't see why this would be needed */ - bo[i]); - } -} - - -void _tnl_vbo_draw_prims(struct gl_context *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) -{ - if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - - _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); -} - -/* This is the main entrypoint into the slimmed-down software tnl - * module. In a regular swtnl driver, this can be plugged straight - * into the vbo->Driver.DrawPrims() callback. - */ -void _tnl_draw_prims( struct gl_context *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - const GLuint TEST_SPLIT = 0; - const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES; - GLint max_basevertex = prim->basevertex; - GLuint i; - - /* Mesa core state should have been validated already */ - assert(ctx->NewState == 0x0); - - if (!_mesa_check_conditional_render(ctx)) - return; /* don't draw */ - - for (i = 1; i < nr_prims; i++) - max_basevertex = MAX2(max_basevertex, prim[i].basevertex); - - if (0) - { - printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - for (i = 0; i < nr_prims; i++) - printf("prim %d: %s start %d count %d\n", i, - _mesa_lookup_enum_by_nr(prim[i].mode), - prim[i].start, - prim[i].count); - } - - if (min_index) { - /* We always translate away calls with min_index != 0. - */ - vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, - min_index, max_index, - _tnl_vbo_draw_prims ); - return; - } - else if ((GLint)max_index + max_basevertex > max) { - /* The software TNL pipeline has a fixed amount of storage for - * vertices and it is necessary to split incoming drawing commands - * if they exceed that limit. - */ - struct split_limits limits; - limits.max_verts = max; - limits.max_vb_size = ~0; - limits.max_indices = ~0; - - /* This will split the buffers one way or another and - * recursively call back into this function. - */ - vbo_split_prims( ctx, arrays, prim, nr_prims, ib, - 0, max_index + prim->basevertex, - _tnl_vbo_draw_prims, - &limits ); - } - else { - /* May need to map a vertex buffer object for every attribute plus - * one for the index buffer. - */ - struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1]; - GLuint nr_bo = 0; - GLuint inst; - - for (i = 0; i < nr_prims;) { - GLuint this_nr_prims; - - /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices - * will rebase the elements to the basevertex, and we'll only - * emit strings of prims with the same basevertex in one draw call. - */ - for (this_nr_prims = 1; i + this_nr_prims < nr_prims; - this_nr_prims++) { - if (prim[i].basevertex != prim[i + this_nr_prims].basevertex) - break; - } - - assert(prim[i].num_instances > 0); - - /* Binding inputs may imply mapping some vertex buffer objects. - * They will need to be unmapped below. - */ - for (inst = 0; inst < prim[i].num_instances; inst++) { - - bind_prims(ctx, &prim[i], this_nr_prims); - bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1, - bo, &nr_bo); - bind_indices(ctx, ib, bo, &nr_bo); - - tnl->CurInstance = inst; - TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx); - - unmap_vbos(ctx, bo, nr_bo); - free_space(ctx); - } - - i += this_nr_prims; - } - } -} - +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/condrender.h" +#include "main/context.h" +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "t_context.h" +#include "tnl.h" + + + +static GLubyte *get_space(struct gl_context *ctx, GLuint bytes) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLubyte *space = malloc(bytes); + + tnl->block[tnl->nr_blocks++] = space; + return space; +} + + +static void free_space(struct gl_context *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint i; + for (i = 0; i < tnl->nr_blocks; i++) + free(tnl->block[i]); + tnl->nr_blocks = 0; +} + + +/* Convert the incoming array to GLfloats. Understands the + * array->Normalized flag and selects the correct conversion method. + */ +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)ptr; \ + for (j = 0; j < sz; j++) { \ + *fptr++ = MACRO(*in); \ + in++; \ + } \ + ptr += input->StrideB; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)ptr; \ + for (j = 0; j < sz; j++) { \ + *fptr++ = (GLfloat)(*in); \ + in++; \ + } \ + ptr += input->StrideB; \ + } \ + } \ +} while (0) + + +/** + * Convert array of BGRA/GLubyte[4] values to RGBA/float[4] + * \param ptr input/ubyte array + * \param fptr output/float array + */ +static void +convert_bgra_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count ) +{ + GLuint i; + assert(input->Normalized); + assert(input->Size == 4); + for (i = 0; i < count; i++) { + const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */ + *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */ + *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */ + *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */ + *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */ + ptr += input->StrideB; + } +} + +static void +convert_half_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count, GLuint sz) +{ + GLuint i, j; + + for (i = 0; i < count; i++) { + GLhalfARB *in = (GLhalfARB *)ptr; + + for (j = 0; j < sz; j++) { + *fptr++ = _mesa_half_to_float(in[j]); + } + ptr += input->StrideB; + } +} + +/** + * \brief Convert fixed-point to floating-point. + * + * In OpenGL, a fixed-point number is a "signed 2's complement 16.16 scaled + * integer" (Table 2.2 of the OpenGL ES 2.0 spec). + * + * If the buffer has the \c normalized flag set, the formula + * \code normalize(x) := (2*x + 1) / (2^16 - 1) \endcode + * is used to map the fixed-point numbers into the range [-1, 1]. + */ +static void +convert_fixed_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count) +{ + GLuint i, j; + const GLint size = input->Size; + + if (input->Normalized) { + for (i = 0; i < count; ++i) { + const GLfixed *in = (GLfixed *) ptr; + for (j = 0; j < size; ++j) { + *fptr++ = (GLfloat) (2 * in[j] + 1) / (GLfloat) ((1 << 16) - 1); + } + ptr += input->StrideB; + } + } else { + for (i = 0; i < count; ++i) { + const GLfixed *in = (GLfixed *) ptr; + for (j = 0; j < size; ++j) { + *fptr++ = in[j] / (GLfloat) (1 << 16); + } + ptr += input->StrideB; + } + } +} + +/* Adjust pointer to point at first requested element, convert to + * floating point, populate VB->AttribPtr[]. + */ +static void _tnl_import_array( struct gl_context *ctx, + GLuint attrib, + GLuint count, + const struct gl_client_array *input, + const GLubyte *ptr ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint stride = input->StrideB; + + if (input->Type != GL_FLOAT) { + const GLuint sz = input->Size; + GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat)); + GLfloat *fptr = (GLfloat *)buf; + + switch (input->Type) { + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + if (input->Format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + convert_bgra_to_float(input, ptr, fptr, count); + } + else { + CONVERT(GLubyte, UBYTE_TO_FLOAT); + } + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_HALF_FLOAT: + convert_half_to_float(input, ptr, fptr, count, sz); + break; + case GL_FIXED: + convert_fixed_to_float(input, ptr, fptr, count); + break; + default: + assert(0); + break; + } + + ptr = buf; + stride = sz * sizeof(GLfloat); + } + + VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib]; + VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr; + VB->AttribPtr[attrib]->start = (GLfloat *)ptr; + VB->AttribPtr[attrib]->count = count; + VB->AttribPtr[attrib]->stride = stride; + VB->AttribPtr[attrib]->size = input->Size; + + /* This should die, but so should the whole GLvector4f concept: + */ + VB->AttribPtr[attrib]->flags = (((1<Size)-1) | + VEC_NOT_WRITEABLE | + (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE)); + + VB->AttribPtr[attrib]->storage = NULL; +} + +#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2) + + +static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx, + const GLvector4f *input, + GLuint count) +{ + const GLubyte *ptr = (const GLubyte *)input->data; + const GLuint stride = input->stride; + GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS); + GLboolean *bptr = space; + GLuint i; + + for (i = 0; i < count; i++) { + *bptr++ = ((GLfloat *)ptr)[0] == 1.0; + ptr += stride; + } + + return space; +} + + +static void bind_inputs( struct gl_context *ctx, + const struct gl_client_array *inputs[], + GLint count, + struct gl_buffer_object **bo, + GLuint *nr_bo ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; + + /* Map all the VBOs + */ + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + const void *ptr; + + if (inputs[i]->BufferObj->Name) { + if (!inputs[i]->BufferObj->Pointer) { + bo[*nr_bo] = inputs[i]->BufferObj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER, + GL_READ_ONLY_ARB, + inputs[i]->BufferObj); + + assert(inputs[i]->BufferObj->Pointer); + } + + ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer, + inputs[i]->Ptr); + } + else + ptr = inputs[i]->Ptr; + + /* Just make sure the array is floating point, otherwise convert to + * temporary storage. + * + * XXX: remove the GLvector4f type at some stage and just use + * client arrays. + */ + _tnl_import_array(ctx, i, count, inputs[i], ptr); + } + + /* We process only the vertices between min & max index: + */ + VB->Count = count; + + /* These should perhaps be part of _TNL_ATTRIB_* */ + VB->BackfaceColorPtr = NULL; + VB->BackfaceIndexPtr = NULL; + VB->BackfaceSecondaryColorPtr = NULL; + + /* Clipping and drawing code still requires this to be a packed + * array of ubytes which can be written into. TODO: Fix and + * remove. + */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) + { + VB->EdgeFlag = _tnl_import_edgeflag( ctx, + VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG], + VB->Count ); + } + else { + /* the data previously pointed to by EdgeFlag may have been freed */ + VB->EdgeFlag = NULL; + } +} + + +/* Translate indices to GLuints and store in VB->Elts. + */ +static void bind_indices( struct gl_context *ctx, + const struct _mesa_index_buffer *ib, + struct gl_buffer_object **bo, + GLuint *nr_bo) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; + void *ptr; + + if (!ib) { + VB->Elts = NULL; + return; + } + + if (ib->obj->Name && !ib->obj->Pointer) { + bo[*nr_bo] = ib->obj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER, + GL_READ_ONLY_ARB, + ib->obj); + + assert(ib->obj->Pointer); + } + + ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); + + if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) { + VB->Elts = (GLuint *) ptr; + } + else { + GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint)); + VB->Elts = elts; + + if (ib->type == GL_UNSIGNED_INT) { + const GLuint *in = (GLuint *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + else if (ib->type == GL_UNSIGNED_SHORT) { + const GLushort *in = (GLushort *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + else { + const GLubyte *in = (GLubyte *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + } +} + +static void bind_prims( struct gl_context *ctx, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + + VB->Primitive = prim; + VB->PrimitiveCount = nr_prims; +} + +static void unmap_vbos( struct gl_context *ctx, + struct gl_buffer_object **bo, + GLuint nr_bo ) +{ + GLuint i; + for (i = 0; i < nr_bo; i++) { + ctx->Driver.UnmapBuffer(ctx, + 0, /* target -- I don't see why this would be needed */ + bo[i]); + } +} + + +void _tnl_vbo_draw_prims(struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +/* This is the main entrypoint into the slimmed-down software tnl + * module. In a regular swtnl driver, this can be plugged straight + * into the vbo->Driver.DrawPrims() callback. + */ +void _tnl_draw_prims( struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + const GLuint TEST_SPLIT = 0; + const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES; + GLint max_basevertex = prim->basevertex; + GLuint i; + + /* Mesa core state should have been validated already */ + assert(ctx->NewState == 0x0); + + if (!_mesa_check_conditional_render(ctx)) + return; /* don't draw */ + + for (i = 1; i < nr_prims; i++) + max_basevertex = MAX2(max_basevertex, prim[i].basevertex); + + if (0) + { + printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + for (i = 0; i < nr_prims; i++) + printf("prim %d: %s start %d count %d\n", i, + _mesa_lookup_enum_by_nr(prim[i].mode), + prim[i].start, + prim[i].count); + } + + if (min_index) { + /* We always translate away calls with min_index != 0. + */ + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, + min_index, max_index, + _tnl_vbo_draw_prims ); + return; + } + else if ((GLint)max_index + max_basevertex > max) { + /* The software TNL pipeline has a fixed amount of storage for + * vertices and it is necessary to split incoming drawing commands + * if they exceed that limit. + */ + struct split_limits limits; + limits.max_verts = max; + limits.max_vb_size = ~0; + limits.max_indices = ~0; + + /* This will split the buffers one way or another and + * recursively call back into this function. + */ + vbo_split_prims( ctx, arrays, prim, nr_prims, ib, + 0, max_index + prim->basevertex, + _tnl_vbo_draw_prims, + &limits ); + } + else { + /* May need to map a vertex buffer object for every attribute plus + * one for the index buffer. + */ + struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1]; + GLuint nr_bo = 0; + GLuint inst; + + for (i = 0; i < nr_prims;) { + GLuint this_nr_prims; + + /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices + * will rebase the elements to the basevertex, and we'll only + * emit strings of prims with the same basevertex in one draw call. + */ + for (this_nr_prims = 1; i + this_nr_prims < nr_prims; + this_nr_prims++) { + if (prim[i].basevertex != prim[i + this_nr_prims].basevertex) + break; + } + + assert(prim[i].num_instances > 0); + + /* Binding inputs may imply mapping some vertex buffer objects. + * They will need to be unmapped below. + */ + for (inst = 0; inst < prim[i].num_instances; inst++) { + + bind_prims(ctx, &prim[i], this_nr_prims); + bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1, + bo, &nr_bo); + bind_indices(ctx, ib, bo, &nr_bo); + + tnl->CurInstance = inst; + TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx); + + unmap_vbos(ctx, bo, nr_bo); + free_space(ctx); + } + + i += this_nr_prims; + } + } +} + diff --git a/mesalib/src/mesa/vbo/vbo_exec_array.c b/mesalib/src/mesa/vbo/vbo_exec_array.c index 13b54d59c..6749541b7 100644 --- a/mesalib/src/mesa/vbo/vbo_exec_array.c +++ b/mesalib/src/mesa/vbo/vbo_exec_array.c @@ -1,1277 +1,1282 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2009 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/context.h" -#include "main/state.h" -#include "main/api_validate.h" -#include "main/varray.h" -#include "main/bufferobj.h" -#include "main/enums.h" -#include "main/macros.h" - -#include "vbo_context.h" - - -/** - * Compute min and max elements by scanning the index buffer for - * glDraw[Range]Elements() calls. - * If primitive restart is enabled, we need to ignore restart - * indexes when computing min/max. - */ -void -vbo_get_minmax_index(struct gl_context *ctx, - const struct _mesa_prim *prim, - const struct _mesa_index_buffer *ib, - GLuint *min_index, GLuint *max_index) -{ - const GLboolean restart = ctx->Array.PrimitiveRestart; - const GLuint restartIndex = ctx->Array.RestartIndex; - const GLuint count = prim->count; - const void *indices; - GLuint i; - - if (_mesa_is_bufferobj(ib->obj)) { - const GLvoid *map = - ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, ib->obj); - indices = ADD_POINTERS(map, ib->ptr); - } else { - indices = ib->ptr; - } - - switch (ib->type) { - case GL_UNSIGNED_INT: { - const GLuint *ui_indices = (const GLuint *)indices; - GLuint max_ui = 0; - GLuint min_ui = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (ui_indices[i] != restartIndex) { - if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; - if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; - } - } - } - else { - for (i = 0; i < count; i++) { - if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; - if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; - } - } - *min_index = min_ui; - *max_index = max_ui; - break; - } - case GL_UNSIGNED_SHORT: { - const GLushort *us_indices = (const GLushort *)indices; - GLuint max_us = 0; - GLuint min_us = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (us_indices[i] != restartIndex) { - if (us_indices[i] > max_us) max_us = us_indices[i]; - if (us_indices[i] < min_us) min_us = us_indices[i]; - } - } - } - else { - for (i = 0; i < count; i++) { - if (us_indices[i] > max_us) max_us = us_indices[i]; - if (us_indices[i] < min_us) min_us = us_indices[i]; - } - } - *min_index = min_us; - *max_index = max_us; - break; - } - case GL_UNSIGNED_BYTE: { - const GLubyte *ub_indices = (const GLubyte *)indices; - GLuint max_ub = 0; - GLuint min_ub = ~0U; - if (restart) { - for (i = 0; i < count; i++) { - if (ub_indices[i] != restartIndex) { - if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; - if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; - } - } - } - else { - for (i = 0; i < count; i++) { - if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; - if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; - } - } - *min_index = min_ub; - *max_index = max_ub; - break; - } - default: - assert(0); - break; - } - - if (_mesa_is_bufferobj(ib->obj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj); - } -} - - -/** - * Check that element 'j' of the array has reasonable data. - * Map VBO if needed. - * For debugging purposes; not normally used. - */ -static void -check_array_data(struct gl_context *ctx, struct gl_client_array *array, - GLuint attrib, GLuint j) -{ - if (array->Enabled) { - const void *data = array->Ptr; - if (_mesa_is_bufferobj(array->BufferObj)) { - if (!array->BufferObj->Pointer) { - /* need to map now */ - array->BufferObj->Pointer = - ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, array->BufferObj); - } - data = ADD_POINTERS(data, array->BufferObj->Pointer); - } - switch (array->Type) { - case GL_FLOAT: - { - GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j); - GLint k; - for (k = 0; k < array->Size; k++) { - if (IS_INF_OR_NAN(f[k]) || - f[k] >= 1.0e20 || f[k] <= -1.0e10) { - printf("Bad array data:\n"); - printf(" Element[%u].%u = %f\n", j, k, f[k]); - printf(" Array %u at %p\n", attrib, (void* ) array); - printf(" Type 0x%x, Size %d, Stride %d\n", - array->Type, array->Size, array->Stride); - printf(" Address/offset %p in Buffer Object %u\n", - array->Ptr, array->BufferObj->Name); - f[k] = 1.0; /* XXX replace the bad value! */ - } - /*assert(!IS_INF_OR_NAN(f[k]));*/ - } - } - break; - default: - ; - } - } -} - - -/** - * Unmap the buffer object referenced by given array, if mapped. - */ -static void -unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array) -{ - if (array->Enabled && - _mesa_is_bufferobj(array->BufferObj) && - _mesa_bufferobj_mapped(array->BufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj); - } -} - - -/** - * Examine the array's data for NaNs, etc. - * For debug purposes; not normally used. - */ -static void -check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, - const void *elements, GLint basevertex) -{ - struct gl_array_object *arrayObj = ctx->Array.ArrayObj; - const void *elemMap; - GLint i, k; - - if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - elemMap = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); - elements = ADD_POINTERS(elements, elemMap); - } - - for (i = 0; i < count; i++) { - GLuint j; - - /* j = element[i] */ - switch (elemType) { - case GL_UNSIGNED_BYTE: - j = ((const GLubyte *) elements)[i]; - break; - case GL_UNSIGNED_SHORT: - j = ((const GLushort *) elements)[i]; - break; - case GL_UNSIGNED_INT: - j = ((const GLuint *) elements)[i]; - break; - default: - assert(0); - } - - /* check element j of each enabled array */ - check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j); - check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j); - check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j); - check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j); - for (k = 0; k < Elements(arrayObj->TexCoord); k++) { - check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j); - } - for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { - check_array_data(ctx, &arrayObj->VertexAttrib[k], - VERT_ATTRIB_GENERIC0 + k, j); - } - } - - if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); - } - - unmap_array_buffer(ctx, &arrayObj->Vertex); - unmap_array_buffer(ctx, &arrayObj->Normal); - unmap_array_buffer(ctx, &arrayObj->Color); - for (k = 0; k < Elements(arrayObj->TexCoord); k++) { - unmap_array_buffer(ctx, &arrayObj->TexCoord[k]); - } - for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { - unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]); - } -} - - -/** - * Check array data, looking for NaNs, etc. - */ -static void -check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count) -{ - /* TO DO */ -} - - -/** - * Print info/data for glDrawArrays(), for debugging. - */ -static void -print_draw_arrays(struct gl_context *ctx, - GLenum mode, GLint start, GLsizei count) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - int i; - - printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n", - mode, start, count); - - for (i = 0; i < 32; i++) { - GLuint bufName = exec->array.inputs[i]->BufferObj->Name; - GLint stride = exec->array.inputs[i]->Stride; - printf("attr %2d: size %d stride %d enabled %d " - "ptr %p Bufobj %u\n", - i, - exec->array.inputs[i]->Size, - stride, - /*exec->array.inputs[i]->Enabled,*/ - exec->array.legacy_array[i]->Enabled, - exec->array.inputs[i]->Ptr, - bufName); - - if (bufName) { - struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName); - GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY_ARB, buf); - int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; - float *f = (float *) (p + offset); - int *k = (int *) f; - int i; - int n = (count * stride) / 4; - if (n > 32) - n = 32; - printf(" Data at offset %d:\n", offset); - for (i = 0; i < n; i++) { - printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); - } - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf); - } - } -} - - -/** - * Bind the VBO executor to the current vertex array object prior - * to drawing. - * - * Just translate the arrayobj into a sane layout. - */ -static void -bind_array_obj(struct gl_context *ctx) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - struct gl_array_object *arrayObj = ctx->Array.ArrayObj; - GLuint i; - - /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array - * rather than as individual named arrays. Then this function can - * go away. - */ - exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex; - exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight; - exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal; - exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color; - exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor; - exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord; - exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index; - if (arrayObj->PointSize.Enabled) { - /* this aliases COLOR_INDEX */ - exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize; - } - exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag; - - for (i = 0; i < Elements(arrayObj->TexCoord); i++) - exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i]; - - for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { - assert(i < Elements(exec->array.generic_array)); - exec->array.generic_array[i] = &arrayObj->VertexAttrib[i]; - } - - exec->array.array_obj = arrayObj->Name; -} - - -/** - * Set the vbo->exec->inputs[] pointers to point to the enabled - * vertex arrays. This depends on the current vertex program/shader - * being executed because of whether or not generic vertex arrays - * alias the conventional vertex arrays. - * For arrays that aren't enabled, we set the input[attrib] pointer - * to point at a zero-stride current value "array". - */ -static void -recalculate_input_bindings(struct gl_context *ctx) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - const struct gl_client_array **inputs = &exec->array.inputs[0]; - GLbitfield const_inputs = 0x0; - GLuint i; - - exec->array.program_mode = get_program_mode(ctx); - exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled; - - switch (exec->array.program_mode) { - case VP_NONE: - /* When no vertex program is active (or the vertex program is generated - * from fixed-function state). We put the material values into the - * generic slots. This is the only situation where material values - * are available as per-vertex attributes. - */ - for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { - if (exec->array.legacy_array[i]->Enabled) - inputs[i] = exec->array.legacy_array[i]; - else { - inputs[i] = &vbo->legacy_currval[i]; - const_inputs |= 1 << i; - } - } - - for (i = 0; i < MAT_ATTRIB_MAX; i++) { - inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; - const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); - } - - /* Could use just about anything, just to fill in the empty - * slots: - */ - for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { - inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; - const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); - } - break; - - case VP_NV: - /* NV_vertex_program - attribute arrays alias and override - * conventional, legacy arrays. No materials, and the generic - * slots are vacant. - */ - for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { - if (exec->array.generic_array[i]->Enabled) - inputs[i] = exec->array.generic_array[i]; - else if (exec->array.legacy_array[i]->Enabled) - inputs[i] = exec->array.legacy_array[i]; - else { - inputs[i] = &vbo->legacy_currval[i]; - const_inputs |= 1 << i; - } - } - - /* Could use just about anything, just to fill in the empty - * slots: - */ - for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { - inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; - const_inputs |= 1 << i; - } - break; - - case VP_ARB: - /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0] - * attribute array aliases and overrides the legacy position array. - * - * Otherwise, legacy attributes available in the legacy slots, - * generic attributes in the generic slots and materials are not - * available as per-vertex attributes. - */ - if (exec->array.generic_array[0]->Enabled) - inputs[0] = exec->array.generic_array[0]; - else if (exec->array.legacy_array[0]->Enabled) - inputs[0] = exec->array.legacy_array[0]; - else { - inputs[0] = &vbo->legacy_currval[0]; - const_inputs |= 1 << 0; - } - - for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { - if (exec->array.legacy_array[i]->Enabled) - inputs[i] = exec->array.legacy_array[i]; - else { - inputs[i] = &vbo->legacy_currval[i]; - const_inputs |= 1 << i; - } - } - - for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { - if (exec->array.generic_array[i]->Enabled) - inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; - else { - inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; - const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); - } - - } - break; - } - - _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); -} - - -/** - * Examine the enabled vertex arrays to set the exec->array.inputs[] values. - * These will point to the arrays to actually use for drawing. Some will - * be user-provided arrays, other will be zero-stride const-valued arrays. - * Note that this might set the _NEW_ARRAY dirty flag so state validation - * must be done after this call. - */ -static void -bind_arrays(struct gl_context *ctx) -{ - bind_array_obj(ctx); - recalculate_input_bindings(ctx); -} - - -/** - * Helper function called by the other DrawArrays() functions below. - * This is where we handle primitive restart for drawing non-indexed - * arrays. If primitive restart is enabled, it typically means - * splitting one DrawArrays() into two. - */ -static void -vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start, - GLsizei count, GLuint numInstances) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - struct _mesa_prim prim[2]; - - bind_arrays(ctx); - - /* Again... because we may have changed the bitmask of per-vertex varying - * attributes. If we regenerate the fixed-function vertex program now - * we may be able to prune down the number of vertex attributes which we - * need in the shader. - */ - if (ctx->NewState) - _mesa_update_state(ctx); - - prim[0].begin = 1; - prim[0].end = 1; - prim[0].weak = 0; - prim[0].pad = 0; - prim[0].mode = mode; - prim[0].start = 0; /* filled in below */ - prim[0].count = 0; /* filled in below */ - prim[0].indexed = 0; - prim[0].basevertex = 0; - prim[0].num_instances = numInstances; - - /* Implement the primitive restart index */ - if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) { - GLuint primCount = 0; - - if (ctx->Array.RestartIndex == start) { - /* special case: RestartIndex at beginning */ - if (count > 1) { - prim[0].start = start + 1; - prim[0].count = count - 1; - primCount = 1; - } - } - else if (ctx->Array.RestartIndex == start + count - 1) { - /* special case: RestartIndex at end */ - if (count > 1) { - prim[0].start = start; - prim[0].count = count - 1; - primCount = 1; - } - } - else { - /* general case: RestartIndex in middle, split into two prims */ - prim[0].start = start; - prim[0].count = ctx->Array.RestartIndex - start; - - prim[1] = prim[0]; - prim[1].start = ctx->Array.RestartIndex + 1; - prim[1].count = count - prim[1].start; - - primCount = 2; - } - - if (primCount > 0) { - /* draw one or two prims */ - vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL, - GL_TRUE, start, start + count - 1); - } - } - else { - /* no prim restart */ - prim[0].start = start; - prim[0].count = count; - - vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL, - GL_TRUE, start, start + count - 1); - } -} - - - -/** - * Called from glDrawArrays when in immediate mode (not display list mode). - */ -static void GLAPIENTRY -vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, count); - - if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) - return; - - FLUSH_CURRENT( ctx, 0 ); - - if (!_mesa_valid_to_render(ctx, "glDrawArrays")) { - return; - } - - if (0) - check_draw_arrays_data(ctx, start, count); - - vbo_draw_arrays(ctx, mode, start, count, 1); - - if (0) - print_draw_arrays(ctx, mode, start, count); -} - - -/** - * Called from glDrawArraysInstanced when in immediate mode (not - * display list mode). - */ -static void GLAPIENTRY -vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, - GLsizei numInstances) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, count, numInstances); - - if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances)) - return; - - FLUSH_CURRENT( ctx, 0 ); - - if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) { - return; - } - - if (0) - check_draw_arrays_data(ctx, start, count); - - vbo_draw_arrays(ctx, mode, start, count, numInstances); - - if (0) - print_draw_arrays(ctx, mode, start, count); -} - - -/** - * Map GL_ELEMENT_ARRAY_BUFFER and print contents. - * For debugging. - */ -static void -dump_element_buffer(struct gl_context *ctx, GLenum type) -{ - const GLvoid *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_READ_ONLY, - ctx->Array.ElementArrayBufferObj); - switch (type) { - case GL_UNSIGNED_BYTE: - { - const GLubyte *us = (const GLubyte *) map; - GLint i; - for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) { - printf("%02x ", us[i]); - if (i % 32 == 31) - printf("\n"); - } - printf("\n"); - } - break; - case GL_UNSIGNED_SHORT: - { - const GLushort *us = (const GLushort *) map; - GLint i; - for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) { - printf("%04x ", us[i]); - if (i % 16 == 15) - printf("\n"); - } - printf("\n"); - } - break; - case GL_UNSIGNED_INT: - { - const GLuint *us = (const GLuint *) map; - GLint i; - for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) { - printf("%08x ", us[i]); - if (i % 8 == 7) - printf("\n"); - } - printf("\n"); - } - break; - default: - ; - } - - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, - ctx->Array.ElementArrayBufferObj); -} - - -/** - * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements. - * Do the rendering for a glDrawElements or glDrawRangeElements call after - * we've validated buffer bounds, etc. - */ -static void -vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, - GLboolean index_bounds_valid, - GLuint start, GLuint end, - GLsizei count, GLenum type, - const GLvoid *indices, - GLint basevertex, GLint numInstances) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - struct _mesa_index_buffer ib; - struct _mesa_prim prim[1]; - - FLUSH_CURRENT( ctx, 0 ); - - if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) { - return; - } - - bind_arrays( ctx ); - - /* check for dirty state again */ - if (ctx->NewState) - _mesa_update_state( ctx ); - - ib.count = count; - ib.type = type; - ib.obj = ctx->Array.ElementArrayBufferObj; - ib.ptr = indices; - - prim[0].begin = 1; - prim[0].end = 1; - prim[0].weak = 0; - prim[0].pad = 0; - prim[0].mode = mode; - prim[0].start = 0; - prim[0].count = count; - prim[0].indexed = 1; - prim[0].basevertex = basevertex; - prim[0].num_instances = numInstances; - - /* Need to give special consideration to rendering a range of - * indices starting somewhere above zero. Typically the - * application is issuing multiple DrawRangeElements() to draw - * successive primitives layed out linearly in the vertex arrays. - * Unless the vertex arrays are all in a VBO (or locked as with - * CVA), the OpenGL semantics imply that we need to re-read or - * re-upload the vertex data on each draw call. - * - * In the case of hardware tnl, we want to avoid starting the - * upload at zero, as it will mean every draw call uploads an - * increasing amount of not-used vertex data. Worse - in the - * software tnl module, all those vertices might be transformed and - * lit but never rendered. - * - * If we just upload or transform the vertices in start..end, - * however, the indices will be incorrect. - * - * At this level, we don't know exactly what the requirements of - * the backend are going to be, though it will likely boil down to - * either: - * - * 1) Do nothing, everything is in a VBO and is processed once - * only. - * - * 2) Adjust the indices and vertex arrays so that start becomes - * zero. - * - * Rather than doing anything here, I'll provide a helper function - * for the latter case elsewhere. - */ - - vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, - index_bounds_valid, start, end ); -} - - -/** - * Called by glDrawRangeElementsBaseVertex() in immediate mode. - */ -static void GLAPIENTRY -vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, - GLuint start, GLuint end, - GLsizei count, GLenum type, - const GLvoid *indices, - GLint basevertex) -{ - static GLuint warnCount = 0; - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, - "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), start, end, count, - _mesa_lookup_enum_by_nr(type), indices, basevertex); - - if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, - type, indices, basevertex )) - return; - - /* NOTE: It's important that 'end' is a reasonable value. - * in _tnl_draw_prims(), we use end to determine how many vertices - * to transform. If it's too large, we can unnecessarily split prims - * or we can read/write out of memory in several different places! - */ - - /* Catch/fix some potential user errors */ - if (type == GL_UNSIGNED_BYTE) { - start = MIN2(start, 0xff); - end = MIN2(end, 0xff); - } - else if (type == GL_UNSIGNED_SHORT) { - start = MIN2(start, 0xffff); - end = MIN2(end, 0xffff); - } - - if (end >= ctx->Array.ArrayObj->_MaxElement) { - /* the max element is out of bounds of one or more enabled arrays */ - warnCount++; - - if (warnCount < 10) { - _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " - "type 0x%x, indices=%p)\n" - "\tend is out of bounds (max=%u) " - "Element Buffer %u (size %d)\n" - "\tThis should probably be fixed in the application.", - start, end, count, type, indices, - ctx->Array.ArrayObj->_MaxElement - 1, - ctx->Array.ElementArrayBufferObj->Name, - (int) ctx->Array.ElementArrayBufferObj->Size); - } - - if (0) - dump_element_buffer(ctx, type); - - if (0) - _mesa_print_arrays(ctx); - -#ifdef DEBUG - /* 'end' was out of bounds, but now let's check the actual array - * indexes to see if any of them are out of bounds. - */ - { - GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, - ctx->Array.ElementArrayBufferObj); - if (max >= ctx->Array.ArrayObj->_MaxElement) { - if (warnCount < 10) { - _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, " - "count %d, type 0x%x, indices=%p)\n" - "\tindex=%u is out of bounds (max=%u) " - "Element Buffer %u (size %d)\n" - "\tSkipping the glDrawRangeElements() call", - start, end, count, type, indices, max, - ctx->Array.ArrayObj->_MaxElement - 1, - ctx->Array.ElementArrayBufferObj->Name, - (int) ctx->Array.ElementArrayBufferObj->Size); - } - } - /* XXX we could also find the min index and compare to 'start' - * to see if start is correct. But it's more likely to get the - * upper bound wrong. - */ - } -#endif - - /* Set 'end' to the max possible legal value */ - assert(ctx->Array.ArrayObj->_MaxElement >= 1); - end = ctx->Array.ArrayObj->_MaxElement - 1; - } - else if (0) { - printf("glDraw[Range]Elements{,BaseVertex}" - "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, " - "base %d\n", - start, end, type, count, - ctx->Array.ElementArrayBufferObj->Name, - basevertex); - } - -#if 0 - check_draw_elements_data(ctx, count, type, indices); -#else - (void) check_draw_elements_data; -#endif - - vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end, - count, type, indices, basevertex, 1); -} - - -/** - * Called by glDrawRangeElements() in immediate mode. - */ -static void GLAPIENTRY -vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, - GLsizei count, GLenum type, const GLvoid *indices) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, - "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n", - _mesa_lookup_enum_by_nr(mode), start, end, count, - _mesa_lookup_enum_by_nr(type), indices); - - vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, - indices, 0); -} - - -/** - * Called by glDrawElements() in immediate mode. - */ -static void GLAPIENTRY -vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices); - - if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 )) - return; - - vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, - count, type, indices, 0, 1); -} - - -/** - * Called by glDrawElementsBaseVertex() in immediate mode. - */ -static void GLAPIENTRY -vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLint basevertex) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, basevertex); - - if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, - basevertex )) - return; - - vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, - count, type, indices, basevertex, 1); -} - - -/** - * Called by glDrawElementsInstanced() in immediate mode. - */ -static void GLAPIENTRY -vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLsizei numInstances) -{ - GET_CURRENT_CONTEXT(ctx); - - if (MESA_VERBOSE & VERBOSE_DRAW) - _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n", - _mesa_lookup_enum_by_nr(mode), count, - _mesa_lookup_enum_by_nr(type), indices, numInstances); - - if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, - numInstances)) - return; - - vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, - count, type, indices, 0, numInstances); -} - - -/** - * Inner support for both _mesa_MultiDrawElements() and - * _mesa_MultiDrawRangeElements(). - * This does the actual rendering after we've checked array indexes, etc. - */ -static void -vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount, - const GLint *basevertex) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - struct _mesa_index_buffer ib; - struct _mesa_prim *prim; - unsigned int index_type_size = 0; - uintptr_t min_index_ptr, max_index_ptr; - GLboolean fallback = GL_FALSE; - int i; - - if (primcount == 0) - return; - - FLUSH_CURRENT( ctx, 0 ); - - if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) { - return; - } - - prim = calloc(1, primcount * sizeof(*prim)); - if (prim == NULL) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements"); - return; - } - - /* Decide if we can do this all as one set of primitives sharing the - * same index buffer, or if we have to reset the index pointer per - * primitive. - */ - bind_arrays( ctx ); - - /* check for dirty state again */ - if (ctx->NewState) - _mesa_update_state( ctx ); - - switch (type) { - case GL_UNSIGNED_INT: - index_type_size = 4; - break; - case GL_UNSIGNED_SHORT: - index_type_size = 2; - break; - case GL_UNSIGNED_BYTE: - index_type_size = 1; - break; - default: - assert(0); - } - - min_index_ptr = (uintptr_t)indices[0]; - max_index_ptr = 0; - for (i = 0; i < primcount; i++) { - min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]); - max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] + - index_type_size * count[i]); - } - - /* Check if we can handle this thing as a bunch of index offsets from the - * same index pointer. If we can't, then we have to fall back to doing - * a draw_prims per primitive. - * Check that the difference between each prim's indexes is a multiple of - * the index/element size. - */ - if (index_type_size != 1) { - for (i = 0; i < primcount; i++) { - if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) { - fallback = GL_TRUE; - break; - } - } - } - - /* If the index buffer isn't in a VBO, then treating the application's - * subranges of the index buffer as one large index buffer may lead to - * us reading unmapped memory. - */ - if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) - fallback = GL_TRUE; - - if (!fallback) { - ib.count = (max_index_ptr - min_index_ptr) / index_type_size; - ib.type = type; - ib.obj = ctx->Array.ElementArrayBufferObj; - ib.ptr = (void *)min_index_ptr; - - for (i = 0; i < primcount; i++) { - prim[i].begin = (i == 0); - prim[i].end = (i == primcount - 1); - prim[i].weak = 0; - prim[i].pad = 0; - prim[i].mode = mode; - prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size; - prim[i].count = count[i]; - prim[i].indexed = 1; - prim[i].num_instances = 1; - if (basevertex != NULL) - prim[i].basevertex = basevertex[i]; - else - prim[i].basevertex = 0; - } - - vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib, - GL_FALSE, ~0, ~0); - } else { - /* render one prim at a time */ - for (i = 0; i < primcount; i++) { - ib.count = count[i]; - ib.type = type; - ib.obj = ctx->Array.ElementArrayBufferObj; - ib.ptr = indices[i]; - - prim[0].begin = 1; - prim[0].end = 1; - prim[0].weak = 0; - prim[0].pad = 0; - prim[0].mode = mode; - prim[0].start = 0; - prim[0].count = count[i]; - prim[0].indexed = 1; - prim[0].num_instances = 1; - if (basevertex != NULL) - prim[0].basevertex = basevertex[i]; - else - prim[0].basevertex = 0; - - vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib, - GL_FALSE, ~0, ~0); - } - } - - free(prim); -} - - -static void GLAPIENTRY -vbo_exec_MultiDrawElements(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid **indices, - GLsizei primcount) -{ - GET_CURRENT_CONTEXT(ctx); - GLint i; - - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - for (i = 0; i < primcount; i++) { - if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], - 0)) - return; - } - - vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, - NULL); -} - - -static void GLAPIENTRY -vbo_exec_MultiDrawElementsBaseVertex(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid **indices, - GLsizei primcount, - const GLsizei *basevertex) -{ - GET_CURRENT_CONTEXT(ctx); - GLint i; - - ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); - - for (i = 0; i < primcount; i++) { - if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], - basevertex[i])) - return; - } - - vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, - basevertex); -} - - -/** - * Plug in the immediate-mode vertex array drawing commands into the - * givven vbo_exec_context object. - */ -void -vbo_exec_array_init( struct vbo_exec_context *exec ) -{ - exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays; - exec->vtxfmt.DrawElements = vbo_exec_DrawElements; - exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements; - exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements; - exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex; - exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex; - exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex; - exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced; - exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced; -} - - -void -vbo_exec_array_destroy( struct vbo_exec_context *exec ) -{ - /* nothing to do */ -} - - - -/** - * The following functions are only used for OpenGL ES 1/2 support. - * And some aren't even supported (yet) in ES 1/2. - */ - - -void GLAPIENTRY -_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count) -{ - vbo_exec_DrawArrays(mode, first, count); -} - - -void GLAPIENTRY -_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices) -{ - vbo_exec_DrawElements(mode, count, type, indices); -} - - -void GLAPIENTRY -_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, - const GLvoid *indices, GLint basevertex) -{ - vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex); -} - - -void GLAPIENTRY -_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, - GLenum type, const GLvoid *indices) -{ - vbo_exec_DrawRangeElements(mode, start, end, count, type, indices); -} - - -void GLAPIENTRY -_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, - GLsizei count, GLenum type, - const GLvoid *indices, GLint basevertex) -{ - vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, - indices, basevertex); -} - - -void GLAPIENTRY -_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount) -{ - vbo_exec_MultiDrawElements(mode, count, type, indices, primcount); -} - - -void GLAPIENTRY -_mesa_MultiDrawElementsBaseVertex(GLenum mode, - const GLsizei *count, GLenum type, - const GLvoid **indices, GLsizei primcount, - const GLint *basevertex) -{ - vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices, - primcount, basevertex); -} +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/varray.h" +#include "main/bufferobj.h" +#include "main/enums.h" +#include "main/macros.h" + +#include "vbo_context.h" + + +/** + * Compute min and max elements by scanning the index buffer for + * glDraw[Range]Elements() calls. + * If primitive restart is enabled, we need to ignore restart + * indexes when computing min/max. + */ +void +vbo_get_minmax_index(struct gl_context *ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint *min_index, GLuint *max_index) +{ + const GLboolean restart = ctx->Array.PrimitiveRestart; + const GLuint restartIndex = ctx->Array.RestartIndex; + const GLuint count = prim->count; + const void *indices; + GLuint i; + + if (_mesa_is_bufferobj(ib->obj)) { + const GLvoid *map = + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, ib->obj); + indices = ADD_POINTERS(map, ib->ptr); + } else { + indices = ib->ptr; + } + + switch (ib->type) { + case GL_UNSIGNED_INT: { + const GLuint *ui_indices = (const GLuint *)indices; + GLuint max_ui = 0; + GLuint min_ui = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ui_indices[i] != restartIndex) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + *min_index = min_ui; + *max_index = max_ui; + break; + } + case GL_UNSIGNED_SHORT: { + const GLushort *us_indices = (const GLushort *)indices; + GLuint max_us = 0; + GLuint min_us = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (us_indices[i] != restartIndex) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + *min_index = min_us; + *max_index = max_us; + break; + } + case GL_UNSIGNED_BYTE: { + const GLubyte *ub_indices = (const GLubyte *)indices; + GLuint max_ub = 0; + GLuint min_ub = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ub_indices[i] != restartIndex) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + *min_index = min_ub; + *max_index = max_ub; + break; + } + default: + assert(0); + break; + } + + if (_mesa_is_bufferobj(ib->obj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj); + } +} + + +/** + * Check that element 'j' of the array has reasonable data. + * Map VBO if needed. + * For debugging purposes; not normally used. + */ +static void +check_array_data(struct gl_context *ctx, struct gl_client_array *array, + GLuint attrib, GLuint j) +{ + if (array->Enabled) { + const void *data = array->Ptr; + if (_mesa_is_bufferobj(array->BufferObj)) { + if (!array->BufferObj->Pointer) { + /* need to map now */ + array->BufferObj->Pointer = + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY, array->BufferObj); + } + data = ADD_POINTERS(data, array->BufferObj->Pointer); + } + switch (array->Type) { + case GL_FLOAT: + { + GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j); + GLint k; + for (k = 0; k < array->Size; k++) { + if (IS_INF_OR_NAN(f[k]) || + f[k] >= 1.0e20 || f[k] <= -1.0e10) { + printf("Bad array data:\n"); + printf(" Element[%u].%u = %f\n", j, k, f[k]); + printf(" Array %u at %p\n", attrib, (void* ) array); + printf(" Type 0x%x, Size %d, Stride %d\n", + array->Type, array->Size, array->Stride); + printf(" Address/offset %p in Buffer Object %u\n", + array->Ptr, array->BufferObj->Name); + f[k] = 1.0; /* XXX replace the bad value! */ + } + /*assert(!IS_INF_OR_NAN(f[k]));*/ + } + } + break; + default: + ; + } + } +} + + +/** + * Unmap the buffer object referenced by given array, if mapped. + */ +static void +unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array) +{ + if (array->Enabled && + _mesa_is_bufferobj(array->BufferObj) && + _mesa_bufferobj_mapped(array->BufferObj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj); + } +} + + +/** + * Examine the array's data for NaNs, etc. + * For debug purposes; not normally used. + */ +static void +check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, + const void *elements, GLint basevertex) +{ + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + const void *elemMap; + GLint i, k; + + if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { + elemMap = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, + ctx->Array.ElementArrayBufferObj); + elements = ADD_POINTERS(elements, elemMap); + } + + for (i = 0; i < count; i++) { + GLuint j; + + /* j = element[i] */ + switch (elemType) { + case GL_UNSIGNED_BYTE: + j = ((const GLubyte *) elements)[i]; + break; + case GL_UNSIGNED_SHORT: + j = ((const GLushort *) elements)[i]; + break; + case GL_UNSIGNED_INT: + j = ((const GLuint *) elements)[i]; + break; + default: + assert(0); + } + + /* check element j of each enabled array */ + check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j); + check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j); + check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j); + check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j); + for (k = 0; k < Elements(arrayObj->TexCoord); k++) { + check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j); + } + for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { + check_array_data(ctx, &arrayObj->VertexAttrib[k], + VERT_ATTRIB_GENERIC0 + k, j); + } + } + + if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + ctx->Array.ElementArrayBufferObj); + } + + unmap_array_buffer(ctx, &arrayObj->Vertex); + unmap_array_buffer(ctx, &arrayObj->Normal); + unmap_array_buffer(ctx, &arrayObj->Color); + for (k = 0; k < Elements(arrayObj->TexCoord); k++) { + unmap_array_buffer(ctx, &arrayObj->TexCoord[k]); + } + for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { + unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]); + } +} + + +/** + * Check array data, looking for NaNs, etc. + */ +static void +check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count) +{ + /* TO DO */ +} + + +/** + * Print info/data for glDrawArrays(), for debugging. + */ +static void +print_draw_arrays(struct gl_context *ctx, + GLenum mode, GLint start, GLsizei count) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + int i; + + printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n", + mode, start, count); + + for (i = 0; i < 32; i++) { + GLuint bufName = exec->array.inputs[i]->BufferObj->Name; + GLint stride = exec->array.inputs[i]->Stride; + printf("attr %2d: size %d stride %d enabled %d " + "ptr %p Bufobj %u\n", + i, + exec->array.inputs[i]->Size, + stride, + /*exec->array.inputs[i]->Enabled,*/ + exec->array.legacy_array[i]->Enabled, + exec->array.inputs[i]->Ptr, + bufName); + + if (bufName) { + struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName); + GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY_ARB, buf); + int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; + float *f = (float *) (p + offset); + int *k = (int *) f; + int i; + int n = (count * stride) / 4; + if (n > 32) + n = 32; + printf(" Data at offset %d:\n", offset); + for (i = 0; i < n; i++) { + printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); + } + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf); + } + } +} + + +/** + * Bind the VBO executor to the current vertex array object prior + * to drawing. + * + * Just translate the arrayobj into a sane layout. + */ +static void +bind_array_obj(struct gl_context *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i; + + /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array + * rather than as individual named arrays. Then this function can + * go away. + */ + exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex; + exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight; + exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal; + exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color; + exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor; + exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord; + exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index; + if (arrayObj->PointSize.Enabled) { + /* this aliases COLOR_INDEX */ + exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize; + } + exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag; + + for (i = 0; i < Elements(arrayObj->TexCoord); i++) + exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i]; + + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { + assert(i < Elements(exec->array.generic_array)); + exec->array.generic_array[i] = &arrayObj->VertexAttrib[i]; + } + + exec->array.array_obj = arrayObj->Name; +} + + +/** + * Set the vbo->exec->inputs[] pointers to point to the enabled + * vertex arrays. This depends on the current vertex program/shader + * being executed because of whether or not generic vertex arrays + * alias the conventional vertex arrays. + * For arrays that aren't enabled, we set the input[attrib] pointer + * to point at a zero-stride current value "array". + */ +static void +recalculate_input_bindings(struct gl_context *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + const struct gl_client_array **inputs = &exec->array.inputs[0]; + GLbitfield const_inputs = 0x0; + GLuint i; + + exec->array.program_mode = get_program_mode(ctx); + exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled; + + switch (exec->array.program_mode) { + case VP_NONE: + /* When no vertex program is active (or the vertex program is generated + * from fixed-function state). We put the material values into the + * generic slots. This is the only situation where material values + * are available as per-vertex attributes. + */ + for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + for (i = 0; i < MAT_ATTRIB_MAX; i++) { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + + /* Could use just about anything, just to fill in the empty + * slots: + */ + for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + break; + + case VP_NV: + /* NV_vertex_program - attribute arrays alias and override + * conventional, legacy arrays. No materials, and the generic + * slots are vacant. + */ + for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.generic_array[i]->Enabled) + inputs[i] = exec->array.generic_array[i]; + else if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + /* Could use just about anything, just to fill in the empty + * slots: + */ + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { + inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; + const_inputs |= 1 << i; + } + break; + + case VP_ARB: + /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0] + * attribute array aliases and overrides the legacy position array. + * + * Otherwise, legacy attributes available in the legacy slots, + * generic attributes in the generic slots and materials are not + * available as per-vertex attributes. + */ + if (exec->array.generic_array[0]->Enabled) + inputs[0] = exec->array.generic_array[0]; + else if (exec->array.legacy_array[0]->Enabled) + inputs[0] = exec->array.legacy_array[0]; + else { + inputs[0] = &vbo->legacy_currval[0]; + const_inputs |= 1 << 0; + } + + for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { + if (exec->array.generic_array[i]->Enabled) + inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; + else { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + + } + break; + } + + _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); +} + + +/** + * Examine the enabled vertex arrays to set the exec->array.inputs[] values. + * These will point to the arrays to actually use for drawing. Some will + * be user-provided arrays, other will be zero-stride const-valued arrays. + * Note that this might set the _NEW_ARRAY dirty flag so state validation + * must be done after this call. + */ +static void +bind_arrays(struct gl_context *ctx) +{ + if (!ctx->Array.RebindArrays) { + return; + } + + bind_array_obj(ctx); + recalculate_input_bindings(ctx); + ctx->Array.RebindArrays = GL_FALSE; +} + + +/** + * Helper function called by the other DrawArrays() functions below. + * This is where we handle primitive restart for drawing non-indexed + * arrays. If primitive restart is enabled, it typically means + * splitting one DrawArrays() into two. + */ +static void +vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start, + GLsizei count, GLuint numInstances) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_prim prim[2]; + + bind_arrays(ctx); + + /* Again... because we may have changed the bitmask of per-vertex varying + * attributes. If we regenerate the fixed-function vertex program now + * we may be able to prune down the number of vertex attributes which we + * need in the shader. + */ + if (ctx->NewState) + _mesa_update_state(ctx); + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; /* filled in below */ + prim[0].count = 0; /* filled in below */ + prim[0].indexed = 0; + prim[0].basevertex = 0; + prim[0].num_instances = numInstances; + + /* Implement the primitive restart index */ + if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) { + GLuint primCount = 0; + + if (ctx->Array.RestartIndex == start) { + /* special case: RestartIndex at beginning */ + if (count > 1) { + prim[0].start = start + 1; + prim[0].count = count - 1; + primCount = 1; + } + } + else if (ctx->Array.RestartIndex == start + count - 1) { + /* special case: RestartIndex at end */ + if (count > 1) { + prim[0].start = start; + prim[0].count = count - 1; + primCount = 1; + } + } + else { + /* general case: RestartIndex in middle, split into two prims */ + prim[0].start = start; + prim[0].count = ctx->Array.RestartIndex - start; + + prim[1] = prim[0]; + prim[1].start = ctx->Array.RestartIndex + 1; + prim[1].count = count - prim[1].start; + + primCount = 2; + } + + if (primCount > 0) { + /* draw one or two prims */ + vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL, + GL_TRUE, start, start + count - 1); + } + } + else { + /* no prim restart */ + prim[0].start = start; + prim[0].count = count; + + vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL, + GL_TRUE, start, start + count - 1); + } +} + + + +/** + * Called from glDrawArrays when in immediate mode (not display list mode). + */ +static void GLAPIENTRY +vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, count); + + if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDrawArrays")) { + return; + } + + if (0) + check_draw_arrays_data(ctx, start, count); + + vbo_draw_arrays(ctx, mode, start, count, 1); + + if (0) + print_draw_arrays(ctx, mode, start, count); +} + + +/** + * Called from glDrawArraysInstanced when in immediate mode (not + * display list mode). + */ +static void GLAPIENTRY +vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, + GLsizei numInstances) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, count, numInstances); + + if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances)) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) { + return; + } + + if (0) + check_draw_arrays_data(ctx, start, count); + + vbo_draw_arrays(ctx, mode, start, count, numInstances); + + if (0) + print_draw_arrays(ctx, mode, start, count); +} + + +/** + * Map GL_ELEMENT_ARRAY_BUFFER and print contents. + * For debugging. + */ +static void +dump_element_buffer(struct gl_context *ctx, GLenum type) +{ + const GLvoid *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, + ctx->Array.ElementArrayBufferObj); + switch (type) { + case GL_UNSIGNED_BYTE: + { + const GLubyte *us = (const GLubyte *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) { + printf("%02x ", us[i]); + if (i % 32 == 31) + printf("\n"); + } + printf("\n"); + } + break; + case GL_UNSIGNED_SHORT: + { + const GLushort *us = (const GLushort *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) { + printf("%04x ", us[i]); + if (i % 16 == 15) + printf("\n"); + } + printf("\n"); + } + break; + case GL_UNSIGNED_INT: + { + const GLuint *us = (const GLuint *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) { + printf("%08x ", us[i]); + if (i % 8 == 7) + printf("\n"); + } + printf("\n"); + } + break; + default: + ; + } + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + ctx->Array.ElementArrayBufferObj); +} + + +/** + * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements. + * Do the rendering for a glDrawElements or glDrawRangeElements call after + * we've validated buffer bounds, etc. + */ +static void +vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, + GLboolean index_bounds_valid, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, + GLint basevertex, GLint numInstances) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_index_buffer ib; + struct _mesa_prim prim[1]; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) { + return; + } + + bind_arrays( ctx ); + + /* check for dirty state again */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + ib.count = count; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = indices; + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count; + prim[0].indexed = 1; + prim[0].basevertex = basevertex; + prim[0].num_instances = numInstances; + + /* Need to give special consideration to rendering a range of + * indices starting somewhere above zero. Typically the + * application is issuing multiple DrawRangeElements() to draw + * successive primitives layed out linearly in the vertex arrays. + * Unless the vertex arrays are all in a VBO (or locked as with + * CVA), the OpenGL semantics imply that we need to re-read or + * re-upload the vertex data on each draw call. + * + * In the case of hardware tnl, we want to avoid starting the + * upload at zero, as it will mean every draw call uploads an + * increasing amount of not-used vertex data. Worse - in the + * software tnl module, all those vertices might be transformed and + * lit but never rendered. + * + * If we just upload or transform the vertices in start..end, + * however, the indices will be incorrect. + * + * At this level, we don't know exactly what the requirements of + * the backend are going to be, though it will likely boil down to + * either: + * + * 1) Do nothing, everything is in a VBO and is processed once + * only. + * + * 2) Adjust the indices and vertex arrays so that start becomes + * zero. + * + * Rather than doing anything here, I'll provide a helper function + * for the latter case elsewhere. + */ + + vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, + index_bounds_valid, start, end ); +} + + +/** + * Called by glDrawRangeElementsBaseVertex() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, + GLint basevertex) +{ + static GLuint warnCount = 0; + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, + "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, end, count, + _mesa_lookup_enum_by_nr(type), indices, basevertex); + + if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, + type, indices, basevertex )) + return; + + /* NOTE: It's important that 'end' is a reasonable value. + * in _tnl_draw_prims(), we use end to determine how many vertices + * to transform. If it's too large, we can unnecessarily split prims + * or we can read/write out of memory in several different places! + */ + + /* Catch/fix some potential user errors */ + if (type == GL_UNSIGNED_BYTE) { + start = MIN2(start, 0xff); + end = MIN2(end, 0xff); + } + else if (type == GL_UNSIGNED_SHORT) { + start = MIN2(start, 0xffff); + end = MIN2(end, 0xffff); + } + + if (end >= ctx->Array.ArrayObj->_MaxElement) { + /* the max element is out of bounds of one or more enabled arrays */ + warnCount++; + + if (warnCount < 10) { + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " + "type 0x%x, indices=%p)\n" + "\tend is out of bounds (max=%u) " + "Element Buffer %u (size %d)\n" + "\tThis should probably be fixed in the application.", + start, end, count, type, indices, + ctx->Array.ArrayObj->_MaxElement - 1, + ctx->Array.ElementArrayBufferObj->Name, + (int) ctx->Array.ElementArrayBufferObj->Size); + } + + if (0) + dump_element_buffer(ctx, type); + + if (0) + _mesa_print_arrays(ctx); + +#ifdef DEBUG + /* 'end' was out of bounds, but now let's check the actual array + * indexes to see if any of them are out of bounds. + */ + { + GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, + ctx->Array.ElementArrayBufferObj); + if (max >= ctx->Array.ArrayObj->_MaxElement) { + if (warnCount < 10) { + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, " + "count %d, type 0x%x, indices=%p)\n" + "\tindex=%u is out of bounds (max=%u) " + "Element Buffer %u (size %d)\n" + "\tSkipping the glDrawRangeElements() call", + start, end, count, type, indices, max, + ctx->Array.ArrayObj->_MaxElement - 1, + ctx->Array.ElementArrayBufferObj->Name, + (int) ctx->Array.ElementArrayBufferObj->Size); + } + } + /* XXX we could also find the min index and compare to 'start' + * to see if start is correct. But it's more likely to get the + * upper bound wrong. + */ + } +#endif + + /* Set 'end' to the max possible legal value */ + assert(ctx->Array.ArrayObj->_MaxElement >= 1); + end = ctx->Array.ArrayObj->_MaxElement - 1; + } + else if (0) { + printf("glDraw[Range]Elements{,BaseVertex}" + "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, " + "base %d\n", + start, end, type, count, + ctx->Array.ElementArrayBufferObj->Name, + basevertex); + } + +#if 0 + check_draw_elements_data(ctx, count, type, indices); +#else + (void) check_draw_elements_data; +#endif + + vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end, + count, type, indices, basevertex, 1); +} + + +/** + * Called by glDrawRangeElements() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, + "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n", + _mesa_lookup_enum_by_nr(mode), start, end, count, + _mesa_lookup_enum_by_nr(type), indices); + + vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, + indices, 0); +} + + +/** + * Called by glDrawElements() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices); + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 )) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, 0, 1); +} + + +/** + * Called by glDrawElementsBaseVertex() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices, basevertex); + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, + basevertex )) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, basevertex, 1); +} + + +/** + * Called by glDrawElementsInstanced() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLsizei numInstances) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices, numInstances); + + if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, + numInstances)) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, 0, numInstances); +} + + +/** + * Inner support for both _mesa_MultiDrawElements() and + * _mesa_MultiDrawRangeElements(). + * This does the actual rendering after we've checked array indexes, etc. + */ +static void +vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount, + const GLint *basevertex) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_index_buffer ib; + struct _mesa_prim *prim; + unsigned int index_type_size = 0; + uintptr_t min_index_ptr, max_index_ptr; + GLboolean fallback = GL_FALSE; + int i; + + if (primcount == 0) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) { + return; + } + + prim = calloc(1, primcount * sizeof(*prim)); + if (prim == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements"); + return; + } + + /* Decide if we can do this all as one set of primitives sharing the + * same index buffer, or if we have to reset the index pointer per + * primitive. + */ + bind_arrays( ctx ); + + /* check for dirty state again */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + switch (type) { + case GL_UNSIGNED_INT: + index_type_size = 4; + break; + case GL_UNSIGNED_SHORT: + index_type_size = 2; + break; + case GL_UNSIGNED_BYTE: + index_type_size = 1; + break; + default: + assert(0); + } + + min_index_ptr = (uintptr_t)indices[0]; + max_index_ptr = 0; + for (i = 0; i < primcount; i++) { + min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]); + max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] + + index_type_size * count[i]); + } + + /* Check if we can handle this thing as a bunch of index offsets from the + * same index pointer. If we can't, then we have to fall back to doing + * a draw_prims per primitive. + * Check that the difference between each prim's indexes is a multiple of + * the index/element size. + */ + if (index_type_size != 1) { + for (i = 0; i < primcount; i++) { + if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) { + fallback = GL_TRUE; + break; + } + } + } + + /* If the index buffer isn't in a VBO, then treating the application's + * subranges of the index buffer as one large index buffer may lead to + * us reading unmapped memory. + */ + if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) + fallback = GL_TRUE; + + if (!fallback) { + ib.count = (max_index_ptr - min_index_ptr) / index_type_size; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = (void *)min_index_ptr; + + for (i = 0; i < primcount; i++) { + prim[i].begin = (i == 0); + prim[i].end = (i == primcount - 1); + prim[i].weak = 0; + prim[i].pad = 0; + prim[i].mode = mode; + prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size; + prim[i].count = count[i]; + prim[i].indexed = 1; + prim[i].num_instances = 1; + if (basevertex != NULL) + prim[i].basevertex = basevertex[i]; + else + prim[i].basevertex = 0; + } + + vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib, + GL_FALSE, ~0, ~0); + } else { + /* render one prim at a time */ + for (i = 0; i < primcount; i++) { + ib.count = count[i]; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = indices[i]; + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count[i]; + prim[0].indexed = 1; + prim[0].num_instances = 1; + if (basevertex != NULL) + prim[0].basevertex = basevertex[i]; + else + prim[0].basevertex = 0; + + vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib, + GL_FALSE, ~0, ~0); + } + } + + free(prim); +} + + +static void GLAPIENTRY +vbo_exec_MultiDrawElements(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, + GLsizei primcount) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); + + for (i = 0; i < primcount; i++) { + if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], + 0)) + return; + } + + vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, + NULL); +} + + +static void GLAPIENTRY +vbo_exec_MultiDrawElementsBaseVertex(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, + GLsizei primcount, + const GLsizei *basevertex) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); + + for (i = 0; i < primcount; i++) { + if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], + basevertex[i])) + return; + } + + vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, + basevertex); +} + + +/** + * Plug in the immediate-mode vertex array drawing commands into the + * givven vbo_exec_context object. + */ +void +vbo_exec_array_init( struct vbo_exec_context *exec ) +{ + exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays; + exec->vtxfmt.DrawElements = vbo_exec_DrawElements; + exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements; + exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements; + exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex; + exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex; + exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex; + exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced; + exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced; +} + + +void +vbo_exec_array_destroy( struct vbo_exec_context *exec ) +{ + /* nothing to do */ +} + + + +/** + * The following functions are only used for OpenGL ES 1/2 support. + * And some aren't even supported (yet) in ES 1/2. + */ + + +void GLAPIENTRY +_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count) +{ + vbo_exec_DrawArrays(mode, first, count); +} + + +void GLAPIENTRY +_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + vbo_exec_DrawElements(mode, count, type, indices); +} + + +void GLAPIENTRY +_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex); +} + + +void GLAPIENTRY +_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, + GLenum type, const GLvoid *indices) +{ + vbo_exec_DrawRangeElements(mode, start, end, count, type, indices); +} + + +void GLAPIENTRY +_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, + indices, basevertex); +} + + +void GLAPIENTRY +_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount) +{ + vbo_exec_MultiDrawElements(mode, count, type, indices, primcount); +} + + +void GLAPIENTRY +_mesa_MultiDrawElementsBaseVertex(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount, + const GLint *basevertex) +{ + vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices, + primcount, basevertex); +} diff --git a/mesalib/src/mesa/vbo/vbo_exec_draw.c b/mesalib/src/mesa/vbo/vbo_exec_draw.c index 048f3d170..f8be83ea8 100644 --- a/mesalib/src/mesa/vbo/vbo_exec_draw.c +++ b/mesalib/src/mesa/vbo/vbo_exec_draw.c @@ -1,420 +1,421 @@ -/* - * Mesa 3-D graphics library - * Version: 7.2 - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell - */ - -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/compiler.h" -#include "main/enums.h" -#include "main/mfeatures.h" -#include "main/state.h" - -#include "vbo_context.h" - - -#if FEATURE_beginend - - -static void -vbo_exec_debug_verts( struct vbo_exec_context *exec ) -{ - GLuint count = exec->vtx.vert_count; - GLuint i; - - printf("%s: %u vertices %d primitives, %d vertsize\n", - __FUNCTION__, - count, - exec->vtx.prim_count, - exec->vtx.vertex_size); - - for (i = 0 ; i < exec->vtx.prim_count ; i++) { - struct _mesa_prim *prim = &exec->vtx.prim[i]; - printf(" prim %d: %s%s %d..%d %s %s\n", - i, - _mesa_lookup_prim_by_nr(prim->mode), - prim->weak ? " (weak)" : "", - prim->start, - prim->start + prim->count, - prim->begin ? "BEGIN" : "(wrap)", - prim->end ? "END" : "(wrap)"); - } -} - - -/* - * NOTE: Need to have calculated primitives by this point -- do it on the fly. - * NOTE: Old 'parity' issue is gone. - */ -static GLuint -vbo_copy_vertices( struct vbo_exec_context *exec ) -{ - GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; - GLuint ovf, i; - GLuint sz = exec->vtx.vertex_size; - GLfloat *dst = exec->vtx.copied.buffer; - const GLfloat *src = (exec->vtx.buffer_map + - exec->vtx.prim[exec->vtx.prim_count-1].start * - exec->vtx.vertex_size); - - - switch (exec->ctx->Driver.CurrentExecPrimitive) { - case GL_POINTS: - return 0; - case GL_LINES: - ovf = nr&1; - for (i = 0 ; i < ovf ; i++) - memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); - return i; - case GL_TRIANGLES: - ovf = nr%3; - for (i = 0 ; i < ovf ; i++) - memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); - return i; - case GL_QUADS: - ovf = nr&3; - for (i = 0 ; i < ovf ; i++) - memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); - return i; - case GL_LINE_STRIP: - if (nr == 0) { - return 0; - } - else { - memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) ); - return 1; - } - case GL_LINE_LOOP: - case GL_TRIANGLE_FAN: - case GL_POLYGON: - if (nr == 0) { - return 0; - } - else if (nr == 1) { - memcpy( dst, src+0, sz * sizeof(GLfloat) ); - return 1; - } - else { - memcpy( dst, src+0, sz * sizeof(GLfloat) ); - memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) ); - return 2; - } - case GL_TRIANGLE_STRIP: - /* no parity issue, but need to make sure the tri is not drawn twice */ - if (nr & 1) { - exec->vtx.prim[exec->vtx.prim_count-1].count--; - } - /* fallthrough */ - case GL_QUAD_STRIP: - switch (nr) { - case 0: - ovf = 0; - break; - case 1: - ovf = 1; - break; - default: - ovf = 2 + (nr & 1); - break; - } - for (i = 0 ; i < ovf ; i++) - memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); - return i; - case PRIM_OUTSIDE_BEGIN_END: - return 0; - default: - assert(0); - return 0; - } -} - - - -/* TODO: populate these as the vertex is defined: - */ -static void -vbo_exec_bind_arrays( struct gl_context *ctx ) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_exec_context *exec = &vbo->exec; - struct gl_client_array *arrays = exec->vtx.arrays; - const GLuint count = exec->vtx.vert_count; - const GLuint *map; - GLuint attr; - GLbitfield varying_inputs = 0x0; - - /* Install the default (ie Current) attributes first, then overlay - * all active ones. - */ - switch (get_program_mode(exec->ctx)) { - case VP_NONE: - for (attr = 0; attr < 16; attr++) { - exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; - } - for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { - ASSERT(attr + 16 < Elements(exec->vtx.inputs)); - exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr]; - } - map = vbo->map_vp_none; - break; - case VP_NV: - case VP_ARB: - /* The aliasing of attributes for NV vertex programs has already - * occurred. NV vertex programs cannot access material values, - * nor attributes greater than VERT_ATTRIB_TEX7. - */ - for (attr = 0; attr < 16; attr++) { - exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; - ASSERT(attr + 16 < Elements(exec->vtx.inputs)); - exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr]; - } - map = vbo->map_vp_arb; - - /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. - * In that case we effectively need to route the data from - * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. - */ - if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && - (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { - exec->vtx.inputs[16] = exec->vtx.inputs[0]; - exec->vtx.attrsz[16] = exec->vtx.attrsz[0]; - exec->vtx.attrptr[16] = exec->vtx.attrptr[0]; - exec->vtx.attrsz[0] = 0; - } - break; - default: - assert(0); - } - - /* Make all active attributes (including edgeflag) available as - * arrays of floats. - */ - for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) { - const GLuint src = map[attr]; - - if (exec->vtx.attrsz[src]) { - GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] - - (GLbyte *)exec->vtx.vertex; - - /* override the default array set above */ - ASSERT(attr < Elements(exec->vtx.inputs)); - ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */ - exec->vtx.inputs[attr] = &arrays[attr]; - - if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { - /* a real buffer obj: Ptr is an offset, not a pointer*/ - assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */ - assert(offset >= 0); - arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset; - } - else { - /* Ptr into ordinary app memory */ - arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset; - } - arrays[attr].Size = exec->vtx.attrsz[src]; - arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat); - arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat); - arrays[attr].Type = GL_FLOAT; - arrays[attr].Format = GL_RGBA; - arrays[attr].Enabled = 1; - _mesa_reference_buffer_object(ctx, - &arrays[attr].BufferObj, - exec->vtx.bufferobj); - arrays[attr]._MaxElement = count; /* ??? */ - - varying_inputs |= 1 << attr; - } - } - - _mesa_set_varying_vp_inputs( ctx, varying_inputs ); -} - - -static void -vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) -{ - GLenum target = GL_ARRAY_BUFFER_ARB; - - if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { - struct gl_context *ctx = exec->ctx; - - if (ctx->Driver.FlushMappedBufferRange) { - GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset; - GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); - - if (length) - ctx->Driver.FlushMappedBufferRange(ctx, target, - offset, length, - exec->vtx.bufferobj); - } - - exec->vtx.buffer_used += (exec->vtx.buffer_ptr - - exec->vtx.buffer_map) * sizeof(float); - - assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); - assert(exec->vtx.buffer_ptr != NULL); - - ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); - exec->vtx.buffer_map = NULL; - exec->vtx.buffer_ptr = NULL; - exec->vtx.max_vert = 0; - } -} - - -void -vbo_exec_vtx_map( struct vbo_exec_context *exec ) -{ - struct gl_context *ctx = exec->ctx; - const GLenum target = GL_ARRAY_BUFFER_ARB; - const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */ - const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */ - GL_MAP_INVALIDATE_RANGE_BIT | - GL_MAP_UNSYNCHRONIZED_BIT | - GL_MAP_FLUSH_EXPLICIT_BIT | - MESA_MAP_NOWAIT_BIT; - const GLenum usage = GL_STREAM_DRAW_ARB; - - if (!_mesa_is_bufferobj(exec->vtx.bufferobj)) - return; - - if (exec->vtx.buffer_map != NULL) { - assert(0); - exec->vtx.buffer_map = NULL; - exec->vtx.buffer_ptr = NULL; - } - - if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && - ctx->Driver.MapBufferRange) { - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBufferRange(ctx, - target, - exec->vtx.buffer_used, - (VBO_VERT_BUFFER_SIZE - - exec->vtx.buffer_used), - accessRange, - exec->vtx.bufferobj); - exec->vtx.buffer_ptr = exec->vtx.buffer_map; - } - - if (!exec->vtx.buffer_map) { - exec->vtx.buffer_used = 0; - - ctx->Driver.BufferData(ctx, target, - VBO_VERT_BUFFER_SIZE, - NULL, usage, exec->vtx.bufferobj); - - - if (ctx->Driver.MapBufferRange) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, - 0, VBO_VERT_BUFFER_SIZE, - accessRange, - exec->vtx.bufferobj); - if (!exec->vtx.buffer_map) - exec->vtx.buffer_map = - (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); - assert(exec->vtx.buffer_map); - exec->vtx.buffer_ptr = exec->vtx.buffer_map; - } - - if (0) - printf("map %d..\n", exec->vtx.buffer_used); -} - - - -/** - * Execute the buffer and save copied verts. - */ -void -vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ) -{ - if (0) - vbo_exec_debug_verts( exec ); - - if (exec->vtx.prim_count && - exec->vtx.vert_count) { - - exec->vtx.copied.nr = vbo_copy_vertices( exec ); - - if (exec->vtx.copied.nr != exec->vtx.vert_count) { - struct gl_context *ctx = exec->ctx; - - /* Before the update_state() as this may raise _NEW_ARRAY - * from _mesa_set_varying_vp_inputs(). - */ - vbo_exec_bind_arrays( ctx ); - - if (ctx->NewState) - _mesa_update_state( ctx ); - - if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { - vbo_exec_vtx_unmap( exec ); - } - - if (0) - printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, - exec->vtx.vert_count); - - vbo_context(ctx)->draw_prims( ctx, - exec->vtx.inputs, - exec->vtx.prim, - exec->vtx.prim_count, - NULL, - GL_TRUE, - 0, - exec->vtx.vert_count - 1); - - /* If using a real VBO, get new storage -- unless asked not to. - */ - if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) { - vbo_exec_vtx_map( exec ); - } - } - } - - /* May have to unmap explicitly if we didn't draw: - */ - if (unmap && - _mesa_is_bufferobj(exec->vtx.bufferobj) && - exec->vtx.buffer_map) { - vbo_exec_vtx_unmap( exec ); - } - - - if (unmap || exec->vtx.vertex_size == 0) - exec->vtx.max_vert = 0; - else - exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / - (exec->vtx.vertex_size * sizeof(GLfloat))); - - exec->vtx.buffer_ptr = exec->vtx.buffer_map; - exec->vtx.prim_count = 0; - exec->vtx.vert_count = 0; -} - - -#endif /* FEATURE_beginend */ +/* + * Mesa 3-D graphics library + * Version: 7.2 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/bufferobj.h" +#include "main/compiler.h" +#include "main/enums.h" +#include "main/mfeatures.h" +#include "main/state.h" + +#include "vbo_context.h" + + +#if FEATURE_beginend + + +static void +vbo_exec_debug_verts( struct vbo_exec_context *exec ) +{ + GLuint count = exec->vtx.vert_count; + GLuint i; + + printf("%s: %u vertices %d primitives, %d vertsize\n", + __FUNCTION__, + count, + exec->vtx.prim_count, + exec->vtx.vertex_size); + + for (i = 0 ; i < exec->vtx.prim_count ; i++) { + struct _mesa_prim *prim = &exec->vtx.prim[i]; + printf(" prim %d: %s%s %d..%d %s %s\n", + i, + _mesa_lookup_prim_by_nr(prim->mode), + prim->weak ? " (weak)" : "", + prim->start, + prim->start + prim->count, + prim->begin ? "BEGIN" : "(wrap)", + prim->end ? "END" : "(wrap)"); + } +} + + +/* + * NOTE: Need to have calculated primitives by this point -- do it on the fly. + * NOTE: Old 'parity' issue is gone. + */ +static GLuint +vbo_copy_vertices( struct vbo_exec_context *exec ) +{ + GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; + GLuint ovf, i; + GLuint sz = exec->vtx.vertex_size; + GLfloat *dst = exec->vtx.copied.buffer; + const GLfloat *src = (exec->vtx.buffer_map + + exec->vtx.prim[exec->vtx.prim_count-1].start * + exec->vtx.vertex_size); + + + switch (exec->ctx->Driver.CurrentExecPrimitive) { + case GL_POINTS: + return 0; + case GL_LINES: + ovf = nr&1; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_TRIANGLES: + ovf = nr%3; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_QUADS: + ovf = nr&3; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_LINE_STRIP: + if (nr == 0) { + return 0; + } + else { + memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 1; + } + case GL_LINE_LOOP: + case GL_TRIANGLE_FAN: + case GL_POLYGON: + if (nr == 0) { + return 0; + } + else if (nr == 1) { + memcpy( dst, src+0, sz * sizeof(GLfloat) ); + return 1; + } + else { + memcpy( dst, src+0, sz * sizeof(GLfloat) ); + memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 2; + } + case GL_TRIANGLE_STRIP: + /* no parity issue, but need to make sure the tri is not drawn twice */ + if (nr & 1) { + exec->vtx.prim[exec->vtx.prim_count-1].count--; + } + /* fallthrough */ + case GL_QUAD_STRIP: + switch (nr) { + case 0: + ovf = 0; + break; + case 1: + ovf = 1; + break; + default: + ovf = 2 + (nr & 1); + break; + } + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case PRIM_OUTSIDE_BEGIN_END: + return 0; + default: + assert(0); + return 0; + } +} + + + +/* TODO: populate these as the vertex is defined: + */ +static void +vbo_exec_bind_arrays( struct gl_context *ctx ) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct gl_client_array *arrays = exec->vtx.arrays; + const GLuint count = exec->vtx.vert_count; + const GLuint *map; + GLuint attr; + GLbitfield varying_inputs = 0x0; + + /* Install the default (ie Current) attributes first, then overlay + * all active ones. + */ + switch (get_program_mode(exec->ctx)) { + case VP_NONE: + for (attr = 0; attr < 16; attr++) { + exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; + } + for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { + ASSERT(attr + 16 < Elements(exec->vtx.inputs)); + exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr]; + } + map = vbo->map_vp_none; + break; + case VP_NV: + case VP_ARB: + /* The aliasing of attributes for NV vertex programs has already + * occurred. NV vertex programs cannot access material values, + * nor attributes greater than VERT_ATTRIB_TEX7. + */ + for (attr = 0; attr < 16; attr++) { + exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; + ASSERT(attr + 16 < Elements(exec->vtx.inputs)); + exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr]; + } + map = vbo->map_vp_arb; + + /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. + * In that case we effectively need to route the data from + * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. + */ + if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && + (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { + exec->vtx.inputs[16] = exec->vtx.inputs[0]; + exec->vtx.attrsz[16] = exec->vtx.attrsz[0]; + exec->vtx.attrptr[16] = exec->vtx.attrptr[0]; + exec->vtx.attrsz[0] = 0; + } + break; + default: + assert(0); + } + + /* Make all active attributes (including edgeflag) available as + * arrays of floats. + */ + for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) { + const GLuint src = map[attr]; + + if (exec->vtx.attrsz[src]) { + GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] - + (GLbyte *)exec->vtx.vertex; + + /* override the default array set above */ + ASSERT(attr < Elements(exec->vtx.inputs)); + ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */ + exec->vtx.inputs[attr] = &arrays[attr]; + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + /* a real buffer obj: Ptr is an offset, not a pointer*/ + assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */ + assert(offset >= 0); + arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset; + } + else { + /* Ptr into ordinary app memory */ + arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset; + } + arrays[attr].Size = exec->vtx.attrsz[src]; + arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Format = GL_RGBA; + arrays[attr].Enabled = 1; + _mesa_reference_buffer_object(ctx, + &arrays[attr].BufferObj, + exec->vtx.bufferobj); + arrays[attr]._MaxElement = count; /* ??? */ + + varying_inputs |= 1 << attr; + ctx->NewState |= _NEW_ARRAY; + } + } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); +} + + +static void +vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) +{ + GLenum target = GL_ARRAY_BUFFER_ARB; + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + struct gl_context *ctx = exec->ctx; + + if (ctx->Driver.FlushMappedBufferRange) { + GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset; + GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); + + if (length) + ctx->Driver.FlushMappedBufferRange(ctx, target, + offset, length, + exec->vtx.bufferobj); + } + + exec->vtx.buffer_used += (exec->vtx.buffer_ptr - + exec->vtx.buffer_map) * sizeof(float); + + assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); + assert(exec->vtx.buffer_ptr != NULL); + + ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); + exec->vtx.buffer_map = NULL; + exec->vtx.buffer_ptr = NULL; + exec->vtx.max_vert = 0; + } +} + + +void +vbo_exec_vtx_map( struct vbo_exec_context *exec ) +{ + struct gl_context *ctx = exec->ctx; + const GLenum target = GL_ARRAY_BUFFER_ARB; + const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */ + const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */ + GL_MAP_INVALIDATE_RANGE_BIT | + GL_MAP_UNSYNCHRONIZED_BIT | + GL_MAP_FLUSH_EXPLICIT_BIT | + MESA_MAP_NOWAIT_BIT; + const GLenum usage = GL_STREAM_DRAW_ARB; + + if (!_mesa_is_bufferobj(exec->vtx.bufferobj)) + return; + + if (exec->vtx.buffer_map != NULL) { + assert(0); + exec->vtx.buffer_map = NULL; + exec->vtx.buffer_ptr = NULL; + } + + if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && + ctx->Driver.MapBufferRange) { + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, + target, + exec->vtx.buffer_used, + (VBO_VERT_BUFFER_SIZE - + exec->vtx.buffer_used), + accessRange, + exec->vtx.bufferobj); + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + } + + if (!exec->vtx.buffer_map) { + exec->vtx.buffer_used = 0; + + ctx->Driver.BufferData(ctx, target, + VBO_VERT_BUFFER_SIZE, + NULL, usage, exec->vtx.bufferobj); + + + if (ctx->Driver.MapBufferRange) + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, + 0, VBO_VERT_BUFFER_SIZE, + accessRange, + exec->vtx.bufferobj); + if (!exec->vtx.buffer_map) + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); + assert(exec->vtx.buffer_map); + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + } + + if (0) + printf("map %d..\n", exec->vtx.buffer_used); +} + + + +/** + * Execute the buffer and save copied verts. + */ +void +vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ) +{ + if (0) + vbo_exec_debug_verts( exec ); + + if (exec->vtx.prim_count && + exec->vtx.vert_count) { + + exec->vtx.copied.nr = vbo_copy_vertices( exec ); + + if (exec->vtx.copied.nr != exec->vtx.vert_count) { + struct gl_context *ctx = exec->ctx; + + /* Before the update_state() as this may raise _NEW_ARRAY + * from _mesa_set_varying_vp_inputs(). + */ + vbo_exec_bind_arrays( ctx ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + vbo_exec_vtx_unmap( exec ); + } + + if (0) + printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, + exec->vtx.vert_count); + + vbo_context(ctx)->draw_prims( ctx, + exec->vtx.inputs, + exec->vtx.prim, + exec->vtx.prim_count, + NULL, + GL_TRUE, + 0, + exec->vtx.vert_count - 1); + + /* If using a real VBO, get new storage -- unless asked not to. + */ + if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) { + vbo_exec_vtx_map( exec ); + } + } + } + + /* May have to unmap explicitly if we didn't draw: + */ + if (unmap && + _mesa_is_bufferobj(exec->vtx.bufferobj) && + exec->vtx.buffer_map) { + vbo_exec_vtx_unmap( exec ); + } + + + if (unmap || exec->vtx.vertex_size == 0) + exec->vtx.max_vert = 0; + else + exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / + (exec->vtx.vertex_size * sizeof(GLfloat))); + + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + exec->vtx.prim_count = 0; + exec->vtx.vert_count = 0; +} + + +#endif /* FEATURE_beginend */ diff --git a/mesalib/src/mesa/vbo/vbo_save_draw.c b/mesalib/src/mesa/vbo/vbo_save_draw.c index 6d8dbdb86..634a6d3f8 100644 --- a/mesalib/src/mesa/vbo/vbo_save_draw.c +++ b/mesalib/src/mesa/vbo/vbo_save_draw.c @@ -1,304 +1,305 @@ -/* - * Mesa 3-D graphics library - * Version: 7.2 - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* Author: - * Keith Whitwell - */ - -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/imports.h" -#include "main/mfeatures.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/light.h" -#include "main/state.h" - -#include "vbo_context.h" - - -#if FEATURE_dlist - - -/** - * After playback, copy everything but the position from the - * last vertex to the saved state - */ -static void -_playback_copy_to_current(struct gl_context *ctx, - const struct vbo_save_vertex_list *node) -{ - struct vbo_context *vbo = vbo_context(ctx); - GLfloat vertex[VBO_ATTRIB_MAX * 4]; - GLfloat *data; - GLuint i, offset; - - if (node->current_size == 0) - return; - - if (node->current_data) { - data = node->current_data; - } - else { - data = vertex; - - if (node->count) - offset = (node->buffer_offset + - (node->count-1) * node->vertex_size * sizeof(GLfloat)); - else - offset = node->buffer_offset; - - ctx->Driver.GetBufferSubData( ctx, 0, offset, - node->vertex_size * sizeof(GLfloat), - data, node->vertex_store->bufferobj ); - - data += node->attrsz[0]; /* skip vertex position */ - } - - for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { - if (node->attrsz[i]) { - GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; - GLfloat tmp[4]; - - COPY_CLEAN_4V(tmp, - node->attrsz[i], - data); - - if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) { - memcpy(current, tmp, 4 * sizeof(GLfloat)); - - vbo->currval[i].Size = node->attrsz[i]; - - if (i >= VBO_ATTRIB_FIRST_MATERIAL && - i <= VBO_ATTRIB_LAST_MATERIAL) - ctx->NewState |= _NEW_LIGHT; - - ctx->NewState |= _NEW_CURRENT_ATTRIB; - } - - data += node->attrsz[i]; - } - } - - /* Colormaterial -- this kindof sucks. - */ - if (ctx->Light.ColorMaterialEnabled) { - _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]); - } - - /* CurrentExecPrimitive - */ - if (node->prim_count) { - const struct _mesa_prim *prim = &node->prim[node->prim_count - 1]; - if (prim->end) - ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; - else - ctx->Driver.CurrentExecPrimitive = prim->mode; - } -} - - - -/** - * Treat the vertex storage as a VBO, define vertex arrays pointing - * into it: - */ -static void vbo_bind_vertex_list(struct gl_context *ctx, - const struct vbo_save_vertex_list *node) -{ - struct vbo_context *vbo = vbo_context(ctx); - struct vbo_save_context *save = &vbo->save; - struct gl_client_array *arrays = save->arrays; - GLuint buffer_offset = node->buffer_offset; - const GLuint *map; - GLuint attr; - GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */ - GLbitfield varying_inputs = 0x0; - - memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz)); - - /* Install the default (ie Current) attributes first, then overlay - * all active ones. - */ - switch (get_program_mode(ctx)) { - case VP_NONE: - for (attr = 0; attr < 16; attr++) { - save->inputs[attr] = &vbo->legacy_currval[attr]; - } - for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { - save->inputs[attr + 16] = &vbo->mat_currval[attr]; - } - map = vbo->map_vp_none; - break; - case VP_NV: - case VP_ARB: - /* The aliasing of attributes for NV vertex programs has already - * occurred. NV vertex programs cannot access material values, - * nor attributes greater than VERT_ATTRIB_TEX7. - */ - for (attr = 0; attr < 16; attr++) { - save->inputs[attr] = &vbo->legacy_currval[attr]; - save->inputs[attr + 16] = &vbo->generic_currval[attr]; - } - map = vbo->map_vp_arb; - - /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. - * In that case we effectively need to route the data from - * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. - */ - if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && - (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { - save->inputs[16] = save->inputs[0]; - node_attrsz[16] = node_attrsz[0]; - node_attrsz[0] = 0; - } - break; - default: - assert(0); - } - - for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { - const GLuint src = map[attr]; - - if (node_attrsz[src]) { - /* override the default array set above */ - save->inputs[attr] = &arrays[attr]; - - arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset; - arrays[attr].Size = node->attrsz[src]; - arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat); - arrays[attr].Stride = node->vertex_size * sizeof(GLfloat); - arrays[attr].Type = GL_FLOAT; - arrays[attr].Format = GL_RGBA; - arrays[attr].Enabled = 1; - _mesa_reference_buffer_object(ctx, - &arrays[attr].BufferObj, - node->vertex_store->bufferobj); - arrays[attr]._MaxElement = node->count; /* ??? */ - - assert(arrays[attr].BufferObj->Name); - - buffer_offset += node->attrsz[src] * sizeof(GLfloat); - varying_inputs |= 1<Driver.MapBuffer(ctx, - GL_ARRAY_BUFFER_ARB, - GL_READ_ONLY, /* ? */ - list->vertex_store->bufferobj); - - vbo_loopback_vertex_list(ctx, - (const GLfloat *)(buffer + list->buffer_offset), - list->attrsz, - list->prim, - list->prim_count, - list->wrap_count, - list->vertex_size); - - ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, - list->vertex_store->bufferobj); -} - - -/** - * Execute the buffer and save copied verts. - * This is called from the display list code when executing - * a drawing command. - */ -void -vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) -{ - const struct vbo_save_vertex_list *node = - (const struct vbo_save_vertex_list *) data; - struct vbo_save_context *save = &vbo_context(ctx)->save; - - FLUSH_CURRENT(ctx, 0); - - if (node->prim_count > 0 && node->count > 0) { - - if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END && - node->prim[0].begin) { - - /* Degenerate case: list is called inside begin/end pair and - * includes operations such as glBegin or glDrawArrays. - */ - if (0) - printf("displaylist recursive begin"); - - vbo_save_loopback_vertex_list( ctx, node ); - return; - } - else if (save->replay_flags) { - /* Various degnerate cases: translate into immediate mode - * calls rather than trying to execute in place. - */ - vbo_save_loopback_vertex_list( ctx, node ); - return; - } - - if (ctx->NewState) - _mesa_update_state( ctx ); - - /* XXX also need to check if shader enabled, but invalid */ - if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || - (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { - _mesa_error(ctx, GL_INVALID_OPERATION, - "glBegin (invalid vertex/fragment program)"); - return; - } - - vbo_bind_vertex_list( ctx, node ); - - /* Again... - */ - if (ctx->NewState) - _mesa_update_state( ctx ); - - vbo_context(ctx)->draw_prims(ctx, - save->inputs, - node->prim, - node->prim_count, - NULL, - GL_TRUE, - 0, /* Node is a VBO, so this is ok */ - node->count - 1); - } - - /* Copy to current? - */ - _playback_copy_to_current( ctx, node ); -} - - -#endif /* FEATURE_dlist */ +/* + * Mesa 3-D graphics library + * Version: 7.2 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Author: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/imports.h" +#include "main/mfeatures.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/light.h" +#include "main/state.h" + +#include "vbo_context.h" + + +#if FEATURE_dlist + + +/** + * After playback, copy everything but the position from the + * last vertex to the saved state + */ +static void +_playback_copy_to_current(struct gl_context *ctx, + const struct vbo_save_vertex_list *node) +{ + struct vbo_context *vbo = vbo_context(ctx); + GLfloat vertex[VBO_ATTRIB_MAX * 4]; + GLfloat *data; + GLuint i, offset; + + if (node->current_size == 0) + return; + + if (node->current_data) { + data = node->current_data; + } + else { + data = vertex; + + if (node->count) + offset = (node->buffer_offset + + (node->count-1) * node->vertex_size * sizeof(GLfloat)); + else + offset = node->buffer_offset; + + ctx->Driver.GetBufferSubData( ctx, 0, offset, + node->vertex_size * sizeof(GLfloat), + data, node->vertex_store->bufferobj ); + + data += node->attrsz[0]; /* skip vertex position */ + } + + for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { + if (node->attrsz[i]) { + GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; + + COPY_CLEAN_4V(tmp, + node->attrsz[i], + data); + + if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) { + memcpy(current, tmp, 4 * sizeof(GLfloat)); + + vbo->currval[i].Size = node->attrsz[i]; + + if (i >= VBO_ATTRIB_FIRST_MATERIAL && + i <= VBO_ATTRIB_LAST_MATERIAL) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } + + data += node->attrsz[i]; + } + } + + /* Colormaterial -- this kindof sucks. + */ + if (ctx->Light.ColorMaterialEnabled) { + _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]); + } + + /* CurrentExecPrimitive + */ + if (node->prim_count) { + const struct _mesa_prim *prim = &node->prim[node->prim_count - 1]; + if (prim->end) + ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; + else + ctx->Driver.CurrentExecPrimitive = prim->mode; + } +} + + + +/** + * Treat the vertex storage as a VBO, define vertex arrays pointing + * into it: + */ +static void vbo_bind_vertex_list(struct gl_context *ctx, + const struct vbo_save_vertex_list *node) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_save_context *save = &vbo->save; + struct gl_client_array *arrays = save->arrays; + GLuint buffer_offset = node->buffer_offset; + const GLuint *map; + GLuint attr; + GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */ + GLbitfield varying_inputs = 0x0; + + memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz)); + + /* Install the default (ie Current) attributes first, then overlay + * all active ones. + */ + switch (get_program_mode(ctx)) { + case VP_NONE: + for (attr = 0; attr < 16; attr++) { + save->inputs[attr] = &vbo->legacy_currval[attr]; + } + for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { + save->inputs[attr + 16] = &vbo->mat_currval[attr]; + } + map = vbo->map_vp_none; + break; + case VP_NV: + case VP_ARB: + /* The aliasing of attributes for NV vertex programs has already + * occurred. NV vertex programs cannot access material values, + * nor attributes greater than VERT_ATTRIB_TEX7. + */ + for (attr = 0; attr < 16; attr++) { + save->inputs[attr] = &vbo->legacy_currval[attr]; + save->inputs[attr + 16] = &vbo->generic_currval[attr]; + } + map = vbo->map_vp_arb; + + /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. + * In that case we effectively need to route the data from + * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. + */ + if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && + (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { + save->inputs[16] = save->inputs[0]; + node_attrsz[16] = node_attrsz[0]; + node_attrsz[0] = 0; + } + break; + default: + assert(0); + } + + for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { + const GLuint src = map[attr]; + + if (node_attrsz[src]) { + /* override the default array set above */ + save->inputs[attr] = &arrays[attr]; + + arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset; + arrays[attr].Size = node->attrsz[src]; + arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat); + arrays[attr].Stride = node->vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Format = GL_RGBA; + arrays[attr].Enabled = 1; + _mesa_reference_buffer_object(ctx, + &arrays[attr].BufferObj, + node->vertex_store->bufferobj); + arrays[attr]._MaxElement = node->count; /* ??? */ + + assert(arrays[attr].BufferObj->Name); + + buffer_offset += node->attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<NewState |= _NEW_ARRAY; + } + } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); +} + + +static void +vbo_save_loopback_vertex_list(struct gl_context *ctx, + const struct vbo_save_vertex_list *list) +{ + const char *buffer = ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY, /* ? */ + list->vertex_store->bufferobj); + + vbo_loopback_vertex_list(ctx, + (const GLfloat *)(buffer + list->buffer_offset), + list->attrsz, + list->prim, + list->prim_count, + list->wrap_count, + list->vertex_size); + + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + list->vertex_store->bufferobj); +} + + +/** + * Execute the buffer and save copied verts. + * This is called from the display list code when executing + * a drawing command. + */ +void +vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) +{ + const struct vbo_save_vertex_list *node = + (const struct vbo_save_vertex_list *) data; + struct vbo_save_context *save = &vbo_context(ctx)->save; + + FLUSH_CURRENT(ctx, 0); + + if (node->prim_count > 0 && node->count > 0) { + + if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END && + node->prim[0].begin) { + + /* Degenerate case: list is called inside begin/end pair and + * includes operations such as glBegin or glDrawArrays. + */ + if (0) + printf("displaylist recursive begin"); + + vbo_save_loopback_vertex_list( ctx, node ); + return; + } + else if (save->replay_flags) { + /* Various degnerate cases: translate into immediate mode + * calls rather than trying to execute in place. + */ + vbo_save_loopback_vertex_list( ctx, node ); + return; + } + + if (ctx->NewState) + _mesa_update_state( ctx ); + + /* XXX also need to check if shader enabled, but invalid */ + if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || + (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBegin (invalid vertex/fragment program)"); + return; + } + + vbo_bind_vertex_list( ctx, node ); + + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + vbo_context(ctx)->draw_prims(ctx, + save->inputs, + node->prim, + node->prim_count, + NULL, + GL_TRUE, + 0, /* Node is a VBO, so this is ok */ + node->count - 1); + } + + /* Copy to current? + */ + _playback_copy_to_current( ctx, node ); +} + + +#endif /* FEATURE_dlist */ diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 63b08c1fb..062c58a89 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = pixman test +SUBDIRS = pixman test demos pkgconfigdir=$(libdir)/pkgconfig pkgconfig_DATA=pixman-1.pc diff --git a/pixman/configure.ac b/pixman/configure.ac index ab2ecde1b..5242799bb 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -794,6 +794,7 @@ AC_OUTPUT([pixman-1.pc Makefile pixman/Makefile pixman/pixman-version.h + demos/Makefile test/Makefile]) m4_if(m4_eval(pixman_minor % 2), [1], [ diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am new file mode 100644 index 000000000..2dcdfd350 --- /dev/null +++ b/pixman/demos/Makefile.am @@ -0,0 +1,34 @@ +if HAVE_GTK + +AM_CFLAGS = @OPENMP_CFLAGS@ +AM_LDFLAGS = @OPENMP_CFLAGS@ + +LDADD = $(GTK_LIBS) $(top_builddir)/pixman/libpixman-1.la -lm +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) + +GTK_UTILS = gtk-utils.c gtk-utils.h + +DEMOS = \ + clip-test \ + clip-in \ + composite-test \ + gradient-test \ + radial-test \ + alpha-test \ + screen-test \ + convolution-test \ + trap-test + +gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) +alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) +composite_test_SOURCES = composite-test.c $(GTK_UTILS) +clip_test_SOURCES = clip-test.c $(GTK_UTILS) +clip_in_SOURCES = clip-in.c $(GTK_UTILS) +trap_test_SOURCES = trap-test.c $(GTK_UTILS) +screen_test_SOURCES = screen-test.c $(GTK_UTILS) +convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) +radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) + +noinst_PROGRAMS = $(DEMOS) + +endif diff --git a/pixman/demos/alpha-test.c b/pixman/demos/alpha-test.c new file mode 100644 index 000000000..92c208142 --- /dev/null +++ b/pixman/demos/alpha-test.c @@ -0,0 +1,117 @@ +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + + uint32_t *alpha = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *grad_img; + pixman_image_t *alpha_img; + pixman_image_t *dest_img; + pixman_image_t *src_img; + int i; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0x0000, 0x0000, 0x0000, 0x0000 } }, + { pixman_int_to_fixed (1), { 0xffff, 0x0000, 0x1111, 0xffff } } + }; + pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 }; + pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH), + pixman_int_to_fixed (0) }; +#if 0 + pixman_transform_t trans = { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }; +#else + pixman_transform_t trans = { + { { pixman_fixed_1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } } + }; +#endif + + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + alpha[i] = 0x4f00004f; /* pale blue */ + + alpha_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + alpha, + WIDTH * 4); + + for (i = 0; i < WIDTH * HEIGHT; ++i) + dest[i] = 0xffffff00; /* yellow */ + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + + for (i = 0; i < WIDTH * HEIGHT; ++i) + src[i] = 0xffff0000; + + src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + src, + WIDTH * 4); + + c_inner.x = pixman_double_to_fixed (50.0); + c_inner.y = pixman_double_to_fixed (50.0); + c_outer.x = pixman_double_to_fixed (50.0); + c_outer.y = pixman_double_to_fixed (50.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (50.0); + +#if 0 + grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); +#endif +#if 0 + grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); + grad_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); +#endif + + grad_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + + pixman_image_set_transform (grad_img, &trans); + pixman_image_set_repeat (grad_img, PIXMAN_REPEAT_PAD); + + pixman_image_composite (PIXMAN_OP_OVER, grad_img, NULL, alpha_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + pixman_image_set_alpha_map (src_img, alpha_img, 10, 10); + + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + printf ("0, 0: %x\n", dest[0]); + printf ("10, 10: %x\n", dest[10 * 10 + 10]); + printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (grad_img); + pixman_image_unref (alpha_img); + free (dest); + + return 0; +} diff --git a/pixman/demos/clip-in.c b/pixman/demos/clip-in.c new file mode 100644 index 000000000..51579811f --- /dev/null +++ b/pixman/demos/clip-in.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +/* This test demonstrates that clipping is done totally different depending + * on whether the source is transformed or not. + */ +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define SMALL 25 + + uint32_t *sbits = malloc (SMALL * SMALL * 4); + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + pixman_transform_t trans = { + { + { pixman_double_to_fixed (1.0), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.1), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.1), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } + } }; + + pixman_image_t *src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, SMALL, SMALL, sbits, 4 * SMALL); + pixman_image_t *dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, 4 * WIDTH); + + memset (bits, 0xff, WIDTH * HEIGHT * 4); + memset (sbits, 0x00, SMALL * SMALL * 4); + + pixman_image_composite (PIXMAN_OP_IN, + src_img, NULL, dest_img, + 0, 0, 0, 0, SMALL, SMALL, 200, 200); + + pixman_image_set_transform (src_img, &trans); + + pixman_image_composite (PIXMAN_OP_IN, + src_img, NULL, dest_img, + 0, 0, 0, 0, SMALL * 2, SMALL * 2, 200, 200); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/pixman/demos/clip-test.c b/pixman/demos/clip-test.c new file mode 100644 index 000000000..aa0df4482 --- /dev/null +++ b/pixman/demos/clip-test.c @@ -0,0 +1,97 @@ +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +#define WIDTH 200 +#define HEIGHT 200 + +static pixman_image_t * +create_solid_bits (uint32_t pixel) +{ + uint32_t *pixels = malloc (WIDTH * HEIGHT * 4); + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + pixels[i] = pixel; + + return pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + pixels, + WIDTH * 4); +} + +int +main (int argc, char **argv) +{ + pixman_image_t *gradient_img; + pixman_image_t *src_img, *dst_img; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0xffff, 0x0000, 0x0000, 0xffff } }, + { pixman_int_to_fixed (1), { 0xffff, 0xffff, 0x0000, 0xffff } } + }; +#if 0 + pixman_point_fixed_t p1 = { 0, 0 }; + pixman_point_fixed_t p2 = { pixman_int_to_fixed (WIDTH), + pixman_int_to_fixed (HEIGHT) }; +#endif + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; + pixman_region32_t clip_region; + pixman_transform_t trans = { + { { pixman_double_to_fixed (1.3), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.5), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.5), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } + } + }; + + src_img = create_solid_bits (0xff0000ff); + + c_inner.x = pixman_double_to_fixed (100.0); + c_inner.y = pixman_double_to_fixed (100.0); + c_outer.x = pixman_double_to_fixed (100.0); + c_outer.y = pixman_double_to_fixed (100.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (100.0); + + gradient_img = pixman_image_create_radial_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); + +#if 0 + gradient_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + +#endif + + pixman_image_composite (PIXMAN_OP_OVER, gradient_img, NULL, src_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + pixman_region32_init_rect (&clip_region, 50, 0, 100, 200); + pixman_image_set_clip_region32 (src_img, &clip_region); + pixman_image_set_source_clipping (src_img, TRUE); + pixman_image_set_has_client_clip (src_img, TRUE); + pixman_image_set_transform (src_img, &trans); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); + + dst_img = create_solid_bits (0xffff0000); + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dst_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + +#if 0 + printf ("0, 0: %x\n", src[0]); + printf ("10, 10: %x\n", src[10 * 10 + 10]); + printf ("w, h: %x\n", src[(HEIGHT - 1) * 100 + (WIDTH - 1)]); +#endif + + show_image (dst_img); + + pixman_image_unref (gradient_img); + pixman_image_unref (src_img); + + return 0; +} diff --git a/pixman/demos/composite-test.c b/pixman/demos/composite-test.c new file mode 100644 index 000000000..79d5d5eac --- /dev/null +++ b/pixman/demos/composite-test.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +#define WIDTH 60 +#define HEIGHT 60 + +typedef struct { + const char *name; + pixman_op_t op; +} operator_t; + +static const operator_t operators[] = { + { "CLEAR", PIXMAN_OP_CLEAR }, + { "SRC", PIXMAN_OP_SRC }, + { "DST", PIXMAN_OP_DST }, + { "OVER", PIXMAN_OP_OVER }, + { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE }, + { "IN", PIXMAN_OP_IN }, + { "IN_REVERSE", PIXMAN_OP_IN_REVERSE }, + { "OUT", PIXMAN_OP_OUT }, + { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE }, + { "ATOP", PIXMAN_OP_ATOP }, + { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE }, + { "XOR", PIXMAN_OP_XOR }, + { "ADD", PIXMAN_OP_ADD }, + { "SATURATE", PIXMAN_OP_SATURATE }, + + { "MULTIPLY", PIXMAN_OP_MULTIPLY }, + { "SCREEN", PIXMAN_OP_SCREEN }, + { "OVERLAY", PIXMAN_OP_OVERLAY }, + { "DARKEN", PIXMAN_OP_DARKEN }, + { "LIGHTEN", PIXMAN_OP_LIGHTEN }, + { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE }, + { "COLOR_BURN", PIXMAN_OP_COLOR_BURN }, + { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT }, + { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT }, + { "DIFFERENCE", PIXMAN_OP_DIFFERENCE }, + { "EXCLUSION", PIXMAN_OP_EXCLUSION }, + { "HSL_HUE", PIXMAN_OP_HSL_HUE }, + { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION }, + { "HSL_COLOR", PIXMAN_OP_HSL_COLOR }, + { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY }, +}; + +static uint32_t +reader (const void *src, int size) +{ + switch (size) + { + case 1: + return *(uint8_t *)src; + case 2: + return *(uint16_t *)src; + case 4: + return *(uint32_t *)src; + default: + g_assert_not_reached(); + } +} + +static void +writer (void *src, uint32_t value, int size) +{ + switch (size) + { + case 1: + *(uint8_t *)src = value; + break; + + case 2: + *(uint16_t *)src = value; + break; + + case 4: + *(uint32_t *)src = value; + break; + + default: + break; + } +} + +int +main (int argc, char **argv) +{ +#define d2f pixman_double_to_fixed + + GtkWidget *window, *swindow; + GtkWidget *table; + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *src_img; + pixman_image_t *dest_img; + pixman_point_fixed_t p1 = { -10 << 0, 0 }; + pixman_point_fixed_t p2 = { WIDTH << 16, (HEIGHT - 10) << 16 }; + uint16_t full = 0xcfff; + uint16_t low = 0x5000; + uint16_t alpha = 0xffff; + pixman_gradient_stop_t stops[6] = + { + { d2f (0.0), { full, low, low, alpha } }, + { d2f (0.25), { full, full, low, alpha } }, + { d2f (0.4), { low, full, low, alpha } }, + { d2f (0.6), { low, full, full, alpha } }, + { d2f (0.8), { low, low, full, alpha } }, + { d2f (1.0), { full, low, full, alpha } }, + }; + + int i; + + gtk_init (&argc, &argv); + + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + + gtk_window_set_default_size (GTK_WINDOW (window), 800, 600); + + g_signal_connect (window, "delete-event", + G_CALLBACK (gtk_main_quit), + NULL); + table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE); + + src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, + sizeof (stops) / sizeof (stops[0])); + + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + pixman_image_set_accessors (dest_img, reader, writer); + + for (i = 0; i < G_N_ELEMENTS (operators); ++i) + { + GtkWidget *image; + GdkPixbuf *pixbuf; + GtkWidget *vbox; + GtkWidget *label; + int j, k; + + vbox = gtk_vbox_new (FALSE, 0); + + label = gtk_label_new (operators[i].name); + gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6); + gtk_widget_show (label); + + for (j = 0; j < HEIGHT; ++j) + { + for (k = 0; k < WIDTH; ++k) + dest[j * WIDTH + k] = 0x7f6f6f00; + } + pixman_image_composite (operators[i].op, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), TRUE, + WIDTH, HEIGHT, WIDTH * 4); + image = gtk_image_new_from_pixbuf (pixbuf); + gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0); + gtk_widget_show (image); + + gtk_table_attach_defaults (GTK_TABLE (table), vbox, + i % 6, (i % 6) + 1, i / 6, (i / 6) + 1); + gtk_widget_show (vbox); + + g_object_unref (pixbuf); + } + + pixman_image_unref (src_img); + free (src); + pixman_image_unref (dest_img); + free (dest); + + swindow = gtk_scrolled_window_new (NULL, NULL); + gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table); + gtk_widget_show (table); + + gtk_container_add (GTK_CONTAINER (window), swindow); + gtk_widget_show (swindow); + + gtk_widget_show (window); + + gtk_main (); + + return 0; +} diff --git a/pixman/demos/convolution-test.c b/pixman/demos/convolution-test.c new file mode 100644 index 000000000..da284af7b --- /dev/null +++ b/pixman/demos/convolution-test.c @@ -0,0 +1,47 @@ +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + +#define d2f pixman_double_to_fixed + + uint32_t *src = malloc (WIDTH * HEIGHT * 4); + uint32_t *mask = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_fixed_t convolution[] = + { + d2f (3), d2f (3), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + d2f (0.5), d2f (0.5), d2f (0.5), + }; + pixman_image_t *simg, *mimg, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src[i] = 0x7f007f00; + mask[i] = (i % 256) * 0x01000000; + dest[i] = 0; + } + + simg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src, WIDTH * 4); + mimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, mask, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); + + pixman_image_set_filter (mimg, PIXMAN_FILTER_CONVOLUTION, + convolution, 11); + + pixman_image_composite (PIXMAN_OP_OVER, simg, mimg, dimg, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/pixman/demos/gradient-test.c b/pixman/demos/gradient-test.c new file mode 100644 index 000000000..fc84844b0 --- /dev/null +++ b/pixman/demos/gradient-test.c @@ -0,0 +1,89 @@ +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 400 +#define HEIGHT 200 + + uint32_t *dest = malloc (WIDTH * HEIGHT * 4); + pixman_image_t *src_img; + pixman_image_t *dest_img; + int i; + pixman_gradient_stop_t stops[2] = + { + { pixman_int_to_fixed (0), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, + { pixman_int_to_fixed (1), { 0xffff, 0x1111, 0x1111, 0x1111 } } + }; + pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 }; + pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH / 8.), + pixman_int_to_fixed (0) }; +#if 0 + pixman_transform_t trans = { + { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, + { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } + } + }; +#else + pixman_transform_t trans = { + { { pixman_fixed_1, 0, 0 }, + { 0, pixman_fixed_1, 0 }, + { 0, 0, pixman_fixed_1 } } + }; +#endif + + pixman_point_fixed_t c_inner; + pixman_point_fixed_t c_outer; + pixman_fixed_t r_inner; + pixman_fixed_t r_outer; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + dest[i] = 0x4f00004f; /* pale blue */ + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + dest, + WIDTH * 4); + + c_inner.x = pixman_double_to_fixed (50.0); + c_inner.y = pixman_double_to_fixed (50.0); + c_outer.x = pixman_double_to_fixed (50.0); + c_outer.y = pixman_double_to_fixed (50.0); + r_inner = 0; + r_outer = pixman_double_to_fixed (50.0); + + src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); +#if 0 + src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, + stops, 2); + src_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, + r_inner, r_outer, + stops, 2); +#endif + + src_img = pixman_image_create_linear_gradient (&p1, &p2, + stops, 2); + + pixman_image_set_transform (src_img, &trans); + pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD); + + pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, + 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); + + printf ("0, 0: %x\n", dest[0]); + printf ("10, 10: %x\n", dest[10 * 10 + 10]); + printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (dest); + + return 0; +} diff --git a/pixman/demos/gtk-utils.c b/pixman/demos/gtk-utils.c new file mode 100644 index 000000000..f45cdc912 --- /dev/null +++ b/pixman/demos/gtk-utils.c @@ -0,0 +1,115 @@ +#include +#include +#include "pixman-private.h" /* For image->bits.format + * FIXME: there should probably be public API for this + */ +#include "gtk-utils.h" + +GdkPixbuf * +pixbuf_from_argb32 (uint32_t *bits, + gboolean has_alpha, + int width, + int height, + int stride) +{ + GdkPixbuf *pixbuf = gdk_pixbuf_new (GDK_COLORSPACE_RGB, TRUE, + 8, width, height); + int p_stride = gdk_pixbuf_get_rowstride (pixbuf); + guint32 *p_bits = (guint32 *)gdk_pixbuf_get_pixels (pixbuf); + int w, h; + + for (h = 0; h < height; ++h) + { + for (w = 0; w < width; ++w) + { + uint32_t argb = bits[h * (stride / 4) + w]; + guint r, g, b, a; + char *pb = (char *)p_bits; + + pb += h * p_stride + w * 4; + + r = (argb & 0x00ff0000) >> 16; + g = (argb & 0x0000ff00) >> 8; + b = (argb & 0x000000ff) >> 0; + a = has_alpha? (argb & 0xff000000) >> 24 : 0xff; + + if (a) + { + r = (r * 255) / a; + g = (g * 255) / a; + b = (b * 255) / a; + } + + if (r > 255) r = 255; + if (g > 255) g = 255; + if (b > 255) b = 255; + + pb[0] = r; + pb[1] = g; + pb[2] = b; + pb[3] = a; + } + } + + return pixbuf; +} + + +static gboolean +on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data) +{ + GdkPixbuf *pixbuf = data; + + gdk_draw_pixbuf (widget->window, NULL, + pixbuf, 0, 0, 0, 0, + gdk_pixbuf_get_width (pixbuf), + gdk_pixbuf_get_height (pixbuf), + GDK_RGB_DITHER_NONE, + 0, 0); + + return TRUE; +} + +void +show_image (pixman_image_t *image) +{ + GtkWidget *window; + GdkPixbuf *pixbuf; + int width, height, stride; + int argc; + char **argv; + char *arg0 = g_strdup ("pixman-test-program"); + gboolean has_alpha; + pixman_format_code_t format; + + argc = 1; + argv = (char **)&arg0; + + gtk_init (&argc, &argv); + + window = gtk_window_new (GTK_WINDOW_TOPLEVEL); + width = pixman_image_get_width (image); + height = pixman_image_get_height (image); + stride = pixman_image_get_stride (image); + + gtk_window_set_default_size (GTK_WINDOW (window), width, height); + + format = image->bits.format; + + if (format == PIXMAN_a8r8g8b8) + has_alpha = TRUE; + else if (format == PIXMAN_x8r8g8b8) + has_alpha = FALSE; + else + g_error ("Can't deal with this format: %x\n", format); + + pixbuf = pixbuf_from_argb32 (pixman_image_get_data (image), has_alpha, + width, height, stride); + + g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf); + g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); + + gtk_widget_show (window); + + gtk_main (); +} diff --git a/pixman/demos/gtk-utils.h b/pixman/demos/gtk-utils.h new file mode 100644 index 000000000..2cb13bcf0 --- /dev/null +++ b/pixman/demos/gtk-utils.h @@ -0,0 +1,13 @@ +#include +#include +#include +#include +#include "pixman.h" + +void show_image (pixman_image_t *image); + +GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits, + gboolean has_alpha, + int width, + int height, + int stride); diff --git a/pixman/demos/radial-test.c b/pixman/demos/radial-test.c new file mode 100644 index 000000000..35e90d786 --- /dev/null +++ b/pixman/demos/radial-test.c @@ -0,0 +1,198 @@ +#include "../test/utils.h" +#include "gtk-utils.h" + +#define NUM_GRADIENTS 7 +#define NUM_STOPS 3 +#define NUM_REPEAT 4 +#define SIZE 128 +#define WIDTH (SIZE * NUM_GRADIENTS) +#define HEIGHT (SIZE * NUM_REPEAT) + +/* + * We want to test all the possible relative positions of the start + * and end circle: + * + * - The start circle can be smaller/equal/bigger than the end + * circle. A radial gradient can be classified in one of these + * three cases depending on the sign of dr. + * + * - The smaller circle can be completely inside/internally + * tangent/outside (at least in part) of the bigger circle. This + * classification is the same as the one which can be computed by + * examining the sign of a = (dx^2 + dy^2 - dr^2). + * + * - If the two circles have the same size, neither can be inside or + * internally tangent + * + * This test draws radial gradients whose circles always have the same + * centers (0, 0) and (1, 0), but with different radiuses. From left + * to right: + * + * - Small start circle completely inside the end circle + * 0.25 -> 1.75; dr = 1.5 > 0; a = 1 - 1.50^2 < 0 + * + * - Small start circle internally tangent to the end circle + * 0.50 -> 1.50; dr = 1.0 > 0; a = 1 - 1.00^2 = 0 + * + * - Small start circle outside of the end circle + * 0.50 -> 1.00; dr = 0.5 > 0; a = 1 - 0.50^2 > 0 + * + * - Start circle with the same size as the end circle + * 1.00 -> 1.00; dr = 0.0 = 0; a = 1 - 0.00^2 > 0 + * + * - Small end circle outside of the start circle + * 1.00 -> 0.50; dr = -0.5 > 0; a = 1 - 0.50^2 > 0 + * + * - Small end circle internally tangent to the start circle + * 1.50 -> 0.50; dr = -1.0 > 0; a = 1 - 1.00^2 = 0 + * + * - Small end circle completely inside the start circle + * 1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0 + * + */ + +const static double radiuses[NUM_GRADIENTS] = { + 0.25, + 0.50, + 0.50, + 1.00, + 1.00, + 1.50, + 1.75 +}; + +#define double_to_color(x) \ + (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16)) + +#define PIXMAN_STOP(offset,r,g,b,a) \ + { pixman_double_to_fixed (offset), \ + { \ + double_to_color (r), \ + double_to_color (g), \ + double_to_color (b), \ + double_to_color (a) \ + } \ + } + +static const pixman_gradient_stop_t stops[NUM_STOPS] = { + PIXMAN_STOP (0.0, 1, 0, 0, 0.75), + PIXMAN_STOP (0.70710678, 0, 1, 0, 0), + PIXMAN_STOP (1.0, 0, 0, 1, 1) +}; + +static pixman_image_t * +create_radial (int index) +{ + pixman_point_fixed_t p0, p1; + pixman_fixed_t r0, r1; + double x0, x1, radius0, radius1, left, right, center; + + x0 = 0; + x1 = 1; + radius0 = radiuses[index]; + radius1 = radiuses[NUM_GRADIENTS - index - 1]; + + /* center the gradient */ + left = MIN (x0 - radius0, x1 - radius1); + right = MAX (x0 + radius0, x1 + radius1); + center = (left + right) * 0.5; + x0 -= center; + x1 -= center; + + /* scale to make it fit within a 1x1 rect centered in (0,0) */ + x0 *= 0.25; + x1 *= 0.25; + radius0 *= 0.25; + radius1 *= 0.25; + + p0.x = pixman_double_to_fixed (x0); + p0.y = pixman_double_to_fixed (0); + + p1.x = pixman_double_to_fixed (x1); + p1.y = pixman_double_to_fixed (0); + + r0 = pixman_double_to_fixed (radius0); + r1 = pixman_double_to_fixed (radius1); + + return pixman_image_create_radial_gradient (&p0, &p1, + r0, r1, + stops, NUM_STOPS); +} + +static const pixman_repeat_t repeat[NUM_REPEAT] = { + PIXMAN_REPEAT_NONE, + PIXMAN_REPEAT_NORMAL, + PIXMAN_REPEAT_REFLECT, + PIXMAN_REPEAT_PAD +}; + +int +main (int argc, char **argv) +{ + pixman_transform_t transform; + pixman_image_t *src_img, *dest_img; + int i, j; + + enable_fp_exceptions (); + + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, + WIDTH, HEIGHT, + NULL, 0); + + pixman_transform_init_identity (&transform); + + /* + * The create_radial() function returns gradients centered in the + * origin and whose interesting part fits a 1x1 square. We want to + * paint these gradients on a SIZExSIZE square and to make things + * easier we want the origin in the top-left corner of the square + * we want to see. + */ + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + pixman_transform_scale (NULL, &transform, + pixman_double_to_fixed (SIZE), + pixman_double_to_fixed (SIZE)); + + /* + * Gradients are evaluated at the center of each pixel, so we need + * to translate by half a pixel to trigger some interesting + * cornercases. In particular, the original implementation of PDF + * radial gradients tried to divide by 0 when using this transform + * on the "tangent circles" cases. + */ + pixman_transform_translate (NULL, &transform, + pixman_double_to_fixed (0.5), + pixman_double_to_fixed (0.5)); + + for (i = 0; i < NUM_GRADIENTS; i++) + { + src_img = create_radial (i); + pixman_image_set_transform (src_img, &transform); + + for (j = 0; j < NUM_REPEAT; j++) + { + pixman_image_set_repeat (src_img, repeat[j]); + + pixman_image_composite32 (PIXMAN_OP_OVER, + src_img, + NULL, + dest_img, + 0, 0, + 0, 0, + i * SIZE, j * SIZE, + SIZE, SIZE); + + } + + pixman_image_unref (src_img); + } + + show_image (dest_img); + + pixman_image_unref (dest_img); + + return 0; +} diff --git a/pixman/demos/screen-test.c b/pixman/demos/screen-test.c new file mode 100644 index 000000000..e69dba3de --- /dev/null +++ b/pixman/demos/screen-test.c @@ -0,0 +1,44 @@ +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 40 +#define HEIGHT 40 + + uint32_t *src1 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src2 = malloc (WIDTH * HEIGHT * 4); + uint32_t *src3 = malloc (WIDTH * HEIGHT * 4); + uint32_t *dest = malloc (3 * WIDTH * 2 * HEIGHT * 4); + pixman_image_t *simg1, *simg2, *simg3, *dimg; + + int i; + + for (i = 0; i < WIDTH * HEIGHT; ++i) + { + src1[i] = 0x7ff00000; + src2[i] = 0x7f00ff00; + src3[i] = 0x7f0000ff; + } + + for (i = 0; i < 3 * WIDTH * 2 * HEIGHT; ++i) + { + dest[i] = 0x0; + } + + simg1 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src1, WIDTH * 4); + simg2 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src2, WIDTH * 4); + simg3 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src3, WIDTH * 4); + dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, 3 * WIDTH, 2 * HEIGHT, dest, 3 * WIDTH * 4); + + pixman_image_composite (PIXMAN_OP_SCREEN, simg1, NULL, dimg, 0, 0, 0, 0, WIDTH, HEIGHT / 4, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg2, NULL, dimg, 0, 0, 0, 0, (WIDTH/2), HEIGHT / 4 + HEIGHT / 2, WIDTH, HEIGHT); + pixman_image_composite (PIXMAN_OP_SCREEN, simg3, NULL, dimg, 0, 0, 0, 0, (4 * WIDTH) / 3, HEIGHT, WIDTH, HEIGHT); + + show_image (dimg); + + return 0; +} diff --git a/pixman/demos/trap-test.c b/pixman/demos/trap-test.c new file mode 100644 index 000000000..19295e7a5 --- /dev/null +++ b/pixman/demos/trap-test.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include "pixman.h" +#include "gtk-utils.h" + +int +main (int argc, char **argv) +{ +#define WIDTH 200 +#define HEIGHT 200 + + pixman_image_t *src_img; + pixman_image_t *mask_img; + pixman_image_t *dest_img; + pixman_trap_t trap; + pixman_color_t white = { 0x0000, 0xffff, 0x0000, 0xffff }; + uint32_t *bits = malloc (WIDTH * HEIGHT * 4); + uint32_t *mbits = malloc (WIDTH * HEIGHT); + + memset (mbits, 0, WIDTH * HEIGHT); + memset (bits, 0xff, WIDTH * HEIGHT * 4); + + trap.top.l = pixman_int_to_fixed (50) + 0x8000; + trap.top.r = pixman_int_to_fixed (150) + 0x8000; + trap.top.y = pixman_int_to_fixed (30); + + trap.bot.l = pixman_int_to_fixed (50) + 0x8000; + trap.bot.r = pixman_int_to_fixed (150) + 0x8000; + trap.bot.y = pixman_int_to_fixed (150); + + mask_img = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, mbits, WIDTH); + src_img = pixman_image_create_solid_fill (&white); + dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); + + pixman_add_traps (mask_img, 0, 0, 1, &trap); + + pixman_image_composite (PIXMAN_OP_OVER, + src_img, mask_img, dest_img, + 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); + + show_image (dest_img); + + pixman_image_unref (src_img); + pixman_image_unref (dest_img); + free (bits); + + return 0; +} diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index 372e9f9a8..9b1322b6c 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -282,19 +282,20 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ src_type, dst_type) \ void \ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x);\ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ \ static force_inline void \ scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ - src_type * ps, \ + const src_type * ps, \ int32_t w, \ pixman_fixed_t vx, \ pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx) \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ { \ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ vx, unit_x);\ @@ -316,4 +317,48 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + #endif diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 51533d42f..47daf457c 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -1,2365 +1,2393 @@ -/* - * Copyright © 2009 Nokia Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) - */ - -/* - * This file contains implementations of NEON optimized pixel processing - * functions. There is no full and detailed tutorial, but some functions - * (those which are exposing some new or interesting features) are - * extensively commented and can be used as examples. - * - * You may want to have a look at the comments for following functions: - * - pixman_composite_over_8888_0565_asm_neon - * - pixman_composite_over_n_8_0565_asm_neon - */ - -/* Prevent the stack from becoming executable for no reason... */ -#if defined(__linux__) && defined(__ELF__) -.section .note.GNU-stack,"",%progbits -#endif - - .text - .fpu neon - .arch armv7a - .object_arch armv4 - .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ - .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ - .arm - .altmacro - -#include "pixman-arm-neon-asm.h" - -/* Global configuration options and preferences */ - -/* - * The code can optionally make use of unaligned memory accesses to improve - * performance of handling leading/trailing pixels for each scanline. - * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for - * example in linux if unaligned memory accesses are not configured to - * generate.exceptions. - */ -.set RESPECT_STRICT_ALIGNMENT, 1 - -/* - * Set default prefetch type. There is a choice between the following options: - * - * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work - * as NOP to workaround some HW bugs or for whatever other reason) - * - * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where - * advanced prefetch intruduces heavy overhead) - * - * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 - * which can run ARM and NEON instructions simultaneously so that extra ARM - * instructions do not add (many) extra cycles, but improve prefetch efficiency) - * - * Note: some types of function can't support advanced prefetch and fallback - * to simple one (those which handle 24bpp pixels) - */ -.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED - -/* Prefetch distance in pixels for simple prefetch */ -.set PREFETCH_DISTANCE_SIMPLE, 64 - -/* - * Implementation of pixman_composite_over_8888_0565_asm_neon - * - * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and - * performs OVER compositing operation. Function fast_composite_over_8888_0565 - * from pixman-fast-path.c does the same in C and can be used as a reference. - * - * First we need to have some NEON assembly code which can do the actual - * operation on the pixels and provide it to the template macro. - * - * Template macro quite conveniently takes care of emitting all the necessary - * code for memory reading and writing (including quite tricky cases of - * handling unaligned leading/trailing pixels), so we only need to deal with - * the data in NEON registers. - * - * NEON registers allocation in general is recommented to be the following: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) - * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) - * d28, d29, d30, d31 - place for storing the result (destination pixels) - * - * As can be seen above, four 64-bit NEON registers are used for keeping - * intermediate pixel data and up to 8 pixels can be processed in one step - * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). - * - * This particular function uses the following registers allocation: - * d0, d1, d2, d3 - contain loaded source pixel data - * d4, d5 - contain loaded destination pixels (they are needed) - * d28, d29 - place for storing the result (destination pixels) - */ - -/* - * Step one. We need to have some code to do some arithmetics on pixel data. - * This is implemented as a pair of macros: '*_head' and '*_tail'. When used - * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, - * perform all the needed calculations and write the result to {d28, d29}. - * The rationale for having two macros and not just one will be explained - * later. In practice, any single monolitic function which does the work can - * be split into two parts in any arbitrary way without affecting correctness. - * - * There is one special trick here too. Common template macro can optionally - * make our life a bit easier by doing R, G, B, A color components - * deinterleaving for 32bpp pixel formats (and this feature is used in - * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that - * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we - * actually use d0 register for blue channel (a vector of eight 8-bit - * values), d1 register for green, d2 for red and d3 for alpha. This - * simple conversion can be also done with a few NEON instructions: - * - * Packed to planar conversion: - * vuzp.8 d0, d1 - * vuzp.8 d2, d3 - * vuzp.8 d1, d3 - * vuzp.8 d0, d2 - * - * Planar to packed conversion: - * vzip.8 d0, d2 - * vzip.8 d1, d3 - * vzip.8 d2, d3 - * vzip.8 d0, d1 - * - * But pixel can be loaded directly in planar format using VLD4.8 NEON - * instruction. It is 1 cycle slower than VLD1.32, so this is not always - * desirable, that's why deinterleaving is optional. - * - * But anyway, here is the code: - */ -.macro pixman_composite_over_8888_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vmvn.8 d3, d3 /* invert source alpha */ - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_8888_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* - * OK, now we got almost everything that we need. Using the above two - * macros, the work can be done right. But now we want to optimize - * it a bit. ARM Cortex-A8 is an in-order core, and benefits really - * a lot from good code scheduling and software pipelining. - * - * Let's construct some code, which will run in the core main loop. - * Some pseudo-code of the main loop will look like this: - * head - * while (...) { - * tail - * head - * } - * tail - * - * It may look a bit weird, but this setup allows to hide instruction - * latencies better and also utilize dual-issue capability more - * efficiently (make pairs of load-store and ALU instructions). - * - * So what we need now is a '*_tail_head' macro, which will be used - * in the core main loop. A trivial straightforward implementation - * of this macro would look like this: - * - * pixman_composite_over_8888_0565_process_pixblock_tail - * vst1.16 {d28, d29}, [DST_W, :128]! - * vld1.16 {d4, d5}, [DST_R, :128]! - * vld4.32 {d0, d1, d2, d3}, [SRC]! - * pixman_composite_over_8888_0565_process_pixblock_head - * cache_preload 8, 8 - * - * Now it also got some VLD/VST instructions. We simply can't move from - * processing one block of pixels to the other one with just arithmetics. - * The previously processed data needs to be written to memory and new - * data needs to be fetched. Fortunately, this main loop does not deal - * with partial leading/trailing pixels and can load/store a full block - * of pixels in a bulk. Additionally, destination buffer is already - * 16 bytes aligned here (which is good for performance). - * - * New things here are DST_R, DST_W, SRC and MASK identifiers. These - * are the aliases for ARM registers which are used as pointers for - * accessing data. We maintain separate pointers for reading and writing - * destination buffer (DST_R and DST_W). - * - * Another new thing is 'cache_preload' macro. It is used for prefetching - * data into CPU L2 cache and improve performance when dealing with large - * images which are far larger than cache size. It uses one argument - * (actually two, but they need to be the same here) - number of pixels - * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some - * details about this macro. Moreover, if good performance is needed - * the code from this macro needs to be copied into '*_tail_head' macro - * and mixed with the rest of code for optimal instructions scheduling. - * We are actually doing it below. - * - * Now after all the explanations, here is the optimized code. - * Different instruction streams (originaling from '*_head', '*_tail' - * and 'cache_preload' macro) use different indentation levels for - * better readability. Actually taking the code from one of these - * indentation levels and ignoring a few VLD/VST instructions would - * result in exactly the code from '*_head', '*_tail' or 'cache_preload' - * macro! - */ - -#if 1 - -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - vqadd.u8 d16, d2, d20 - vld1.16 {d4, d5}, [DST_R, :128]! - vqadd.u8 q9, q0, q11 - vshrn.u16 d6, q2, #8 - fetch_src_pixblock - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vshll.u8 q14, d16, #8 - PF add PF_X, PF_X, #8 - vshll.u8 q8, d19, #8 - PF tst PF_CTL, #0xF - vsri.u8 d6, d6, #5 - PF addne PF_X, PF_X, #8 - vmvn.8 d3, d3 - PF subne PF_CTL, PF_CTL, #1 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - vmull.u8 q10, d3, d6 - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vsri.u16 q14, q8, #5 - PF cmp PF_X, ORIG_W - vshll.u8 q9, d18, #8 - vrshr.u16 q13, q10, #8 - PF subge PF_X, PF_X, ORIG_W - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - PF subges PF_CTL, PF_CTL, #0x10 - vsri.u16 q14, q9, #11 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vraddhn.u16 d22, q12, q15 - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -#else - -/* If we did not care much about the performance, we would just use this... */ -.macro pixman_composite_over_8888_0565_process_pixblock_tail_head - pixman_composite_over_8888_0565_process_pixblock_tail - vst1.16 {d28, d29}, [DST_W, :128]! - vld1.16 {d4, d5}, [DST_R, :128]! - fetch_src_pixblock - pixman_composite_over_8888_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -#endif - -/* - * And now the final part. We are using 'generate_composite_function' macro - * to put all the stuff together. We are specifying the name of the function - * which we want to get, number of bits per pixel for the source, mask and - * destination (0 if unused, like mask in this case). Next come some bit - * flags: - * FLAG_DST_READWRITE - tells that the destination buffer is both read - * and written, for write-only buffer we would use - * FLAG_DST_WRITEONLY flag instead - * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data - * and separate color channels for 32bpp format. - * The next things are: - * - the number of pixels processed per iteration (8 in this case, because - * that's the maximum what can fit into four 64-bit NEON registers). - * - prefetch distance, measured in pixel blocks. In this case it is 5 times - * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal - * prefetch distance can be selected by running some benchmarks. - * - * After that we specify some macros, these are 'default_init', - * 'default_cleanup' here which are empty (but it is possible to have custom - * init/cleanup macros to be able to save/restore some extra NEON registers - * like d8-d15 or do anything else) followed by - * 'pixman_composite_over_8888_0565_process_pixblock_head', - * 'pixman_composite_over_8888_0565_process_pixblock_tail' and - * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' - * which we got implemented above. - * - * The last part is the NEON registers allocation scheme. - */ -generate_composite_function \ - pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_0565_process_pixblock_head - /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format - and put data into d6 - red, d7 - green, d30 - blue */ - vshrn.u16 d6, q2, #8 - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vshrn.u16 d30, q2, #2 - /* now do alpha blending, storing results in 8-bit planar format - into d16 - red, d19 - green, d18 - blue */ - vmull.u8 q10, d3, d6 - vmull.u8 q11, d3, d7 - vmull.u8 q12, d3, d30 - vrshr.u16 q13, q10, #8 - vrshr.u16 q3, q11, #8 - vrshr.u16 q15, q12, #8 - vraddhn.u16 d20, q10, q13 - vraddhn.u16 d23, q11, q3 - vraddhn.u16 d22, q12, q15 -.endm - -.macro pixman_composite_over_n_0565_process_pixblock_tail - /* ... continue alpha blending */ - vqadd.u8 d16, d2, d20 - vqadd.u8 q9, q0, q11 - /* convert the result to r5g6b5 and store it into {d28, d29} */ - vshll.u8 q14, d16, #8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_0565_process_pixblock_tail_head - pixman_composite_over_n_0565_process_pixblock_tail - vld1.16 {d4, d5}, [DST_R, :128]! - vst1.16 {d28, d29}, [DST_W, :128]! - pixman_composite_over_n_0565_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] - vmvn.8 d3, d3 /* invert source alpha */ -.endm - -generate_composite_function \ - pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_0565_init, \ - default_cleanup, \ - pixman_composite_over_n_0565_process_pixblock_head, \ - pixman_composite_over_n_0565_process_pixblock_tail, \ - pixman_composite_over_n_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_0565_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q14, d2, #8 - vshll.u8 q9, d0, #8 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_8888_0565_process_pixblock_tail_head - vsri.u16 q14, q8, #5 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - fetch_src_pixblock - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vsri.u16 q14, q9, #11 - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vshll.u8 q14, d2, #8 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vshll.u8 q9, d0, #8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_src_0565_8888_process_pixblock_head - vshrn.u16 d30, q0, #8 - vshrn.u16 d29, q0, #3 - vsli.u16 q0, q0, #5 - vmov.u8 d31, #255 - vsri.u8 d30, d30, #5 - vsri.u8 d29, d29, #6 - vshrn.u16 d28, q0, #2 -.endm - -.macro pixman_composite_src_0565_8888_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_src_0565_8888_process_pixblock_tail_head - pixman_composite_src_0565_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - fetch_src_pixblock - pixman_composite_src_0565_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_process_pixblock_head - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail -.endm - -.macro pixman_composite_add_8_8_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #32 - PF tst PF_CTL, #0xF - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #32 - PF subne PF_CTL, PF_CTL, #1 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - vqadd.u8 q14, q0, q2 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vqadd.u8 q15, q1, q3 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_process_pixblock_tail, \ - pixman_composite_add_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head - vmvn.8 d24, d3 /* get inverted alpha */ - /* do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8888_process_pixblock_head - pixman_composite_out_reverse_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - fetch_src_pixblock - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8888_process_pixblock_tail_head - pixman_composite_over_8888_8888_process_pixblock_tail - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_8888_8888_process_pixblock_head - cache_preload 8, 8 -.endm - -.macro pixman_composite_over_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - PF cmp PF_X, ORIG_W - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! - vmvn.8 d22, d3 - PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF subge PF_X, PF_X, ORIG_W - vmull.u8 q8, d22, d4 - PF subges PF_CTL, PF_CTL, #0x10 - vmull.u8 q9, d22, d5 - vmull.u8 q10, d22, d6 - PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! - vmull.u8 q11, d22, d7 -.endm - -.macro pixman_composite_over_reverse_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d7[0]}, [DUMMY] - vdup.8 d4, d7[0] - vdup.8 d5, d7[1] - vdup.8 d6, d7[2] - vdup.8 d7, d7[3] -.endm - -generate_composite_function \ - pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_reverse_n_8888_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 4, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_8_0565_process_pixblock_head - vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ - vshrn.u16 d7, q2, #3 - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vmull.u8 q8, d3, d6 /* now do alpha blending */ - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail - /* 3 cycle bubble (after vmull.u8) */ - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - /* 1 cycle bubble */ - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 /* convert to 16bpp */ - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - /* 1 cycle bubble */ - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head - vld1.16 {d4, d5}, [DST_R, :128]! - vshrn.u16 d6, q2, #8 - fetch_mask_pixblock - vshrn.u16 d7, q2, #3 - fetch_src_pixblock - vmull.u8 q6, d24, d10 - vrshr.u16 q13, q8, #8 - vrshr.u16 q11, q9, #8 - vrshr.u16 q15, q10, #8 - vraddhn.u16 d16, q8, q13 - vraddhn.u16 d27, q9, q11 - vraddhn.u16 d26, q10, q15 - vqadd.u8 d16, d2, d16 - vmull.u8 q1, d24, d9 - vqadd.u8 q9, q0, q13 - vshll.u8 q14, d16, #8 - vmull.u8 q0, d24, d8 - vshll.u8 q8, d19, #8 - vshll.u8 q9, d18, #8 - vsri.u16 q14, q8, #5 - vmull.u8 q7, d24, d11 - vsri.u16 q14, q9, #11 - - cache_preload 8, 8 - - vsli.u16 q2, q2, #5 - vrshr.u16 q8, q0, #8 - vrshr.u16 q9, q1, #8 - vrshr.u16 q10, q6, #8 - vrshr.u16 q11, q7, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q6, q10 - vraddhn.u16 d3, q7, q11 - vsri.u8 d6, d6, #5 - vsri.u8 d7, d7, #6 - vmvn.8 d3, d3 - vshrn.u16 d30, q2, #2 - vst1.16 {d28, d29}, [DST_W, :128]! - vmull.u8 q8, d3, d6 - vmull.u8 q9, d3, d7 - vmull.u8 q10, d3, d30 -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -/* - * This function needs a special initialization of solid mask. - * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET - * offset, split into color components and replicated in d8-d11 - * registers. Additionally, this function needs all the NEON registers, - * so it has to save d8-d15 registers which are callee saved according - * to ABI. These registers are restored from 'cleanup' macro. All the - * other NEON registers are caller saved, so can be clobbered freely - * without introducing any problems. - */ -.macro pixman_composite_over_n_8_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_0565_init, \ - pixman_composite_over_n_8_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d24[0]}, [DUMMY] - vdup.8 d24, d24[3] -.endm - -.macro pixman_composite_over_8888_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_0565_init, \ - pixman_composite_over_8888_n_0565_cleanup, \ - pixman_composite_over_8888_8_0565_process_pixblock_head, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail, \ - pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 24 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0565_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_0565_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 16, 16 -.endm - -generate_composite_function \ - pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_0565_process_pixblock_head, \ - pixman_composite_src_0565_0565_process_pixblock_tail, \ - pixman_composite_src_0565_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8_process_pixblock_tail_head - vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #8 - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_WRITEONLY, \ - 32, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8_init, \ - pixman_composite_src_n_8_cleanup, \ - pixman_composite_src_n_8_process_pixblock_head, \ - pixman_composite_src_n_8_process_pixblock_tail, \ - pixman_composite_src_n_8_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_0565_process_pixblock_head -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_0565_process_pixblock_tail_head - vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_0565_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #16 - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_0565_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 16, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_0565_init, \ - pixman_composite_src_n_0565_cleanup, \ - pixman_composite_src_n_0565_process_pixblock_head, \ - pixman_composite_src_n_0565_process_pixblock_tail, \ - pixman_composite_src_n_0565_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_n_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! -.endm - -.macro pixman_composite_src_n_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d0[0]}, [DUMMY] - vsli.u64 d0, d0, #32 - vorr d1, d0, d0 - vorr q1, q0, q0 -.endm - -.macro pixman_composite_src_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 0, /* prefetch distance */ \ - pixman_composite_src_n_8888_init, \ - pixman_composite_src_n_8888_cleanup, \ - pixman_composite_src_n_8888_process_pixblock_head, \ - pixman_composite_src_n_8888_process_pixblock_tail, \ - pixman_composite_src_n_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_8888_8888_process_pixblock_head -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_8888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_8888_process_pixblock_head, \ - pixman_composite_src_8888_8888_process_pixblock_tail, \ - pixman_composite_src_8888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_x888_8888_process_pixblock_head - vorr q0, q0, q2 - vorr q1, q1, q2 -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail -.endm - -.macro pixman_composite_src_x888_8888_process_pixblock_tail_head - vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! - fetch_src_pixblock - vorr q0, q0, q2 - vorr q1, q1, q2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_x888_8888_init - vmov.u8 q2, #0xFF - vshl.u32 q2, q2, #24 -.endm - -generate_composite_function \ - pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_x888_8888_init, \ - default_cleanup, \ - pixman_composite_src_x888_8888_process_pixblock_head, \ - pixman_composite_src_x888_8888_process_pixblock_tail, \ - pixman_composite_src_x888_8888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8888_process_pixblock_head - /* expecting deinterleaved source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24 (d25, d26, d27 are unused) */ - - /* in */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 d24, d3 /* get inverted alpha */ - /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ - /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_over_n_8_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head - pixman_composite_over_n_8_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - cache_preload 8, 8 - pixman_composite_over_n_8_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8888_init, \ - pixman_composite_over_n_8_8888_cleanup, \ - pixman_composite_over_n_8_8888_process_pixblock_head, \ - pixman_composite_over_n_8_8888_process_pixblock_tail, \ - pixman_composite_over_n_8_8888_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8_8_process_pixblock_head - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d8 - vmull.u8 q6, d26, d8 - vmull.u8 q7, d27, d8 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 q12, q0 - vmvn.8 q13, q1 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_n_8_8_process_pixblock_tail_head - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_n_8_8_process_pixblock_tail - fetch_mask_pixblock - cache_preload 32, 32 - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_over_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d8[0]}, [DUMMY] - vdup.8 d8, d8[3] -.endm - -.macro pixman_composite_over_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8_8_init, \ - pixman_composite_over_n_8_8_cleanup, \ - pixman_composite_over_n_8_8_process_pixblock_head, \ - pixman_composite_over_n_8_8_process_pixblock_tail, \ - pixman_composite_over_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head - /* - * 'combine_mask_ca' replacement - * - * input: solid src (n) in {d8, d9, d10, d11} - * dest in {d4, d5, d6, d7 } - * mask in {d24, d25, d26, d27} - * output: updated src in {d0, d1, d2, d3 } - * updated mask in {d24, d25, d26, d3 } - */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d25, d9 - vmull.u8 q6, d26, d10 - vmull.u8 q7, d27, d11 - vmull.u8 q9, d11, d25 - vmull.u8 q12, d11, d24 - vmull.u8 q13, d11, d26 - vrshr.u16 q8, q0, #8 - vrshr.u16 q10, q1, #8 - vrshr.u16 q11, q6, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q10 - vraddhn.u16 d2, q6, q11 - vrshr.u16 q11, q12, #8 - vrshr.u16 q8, q9, #8 - vrshr.u16 q6, q13, #8 - vrshr.u16 q10, q7, #8 - vraddhn.u16 d24, q12, q11 - vraddhn.u16 d25, q9, q8 - vraddhn.u16 d26, q13, q6 - vraddhn.u16 d3, q7, q10 - /* - * 'combine_over_ca' replacement - * - * output: updated dest in {d28, d29, d30, d31} - */ - vmvn.8 d24, d24 - vmvn.8 d25, d25 - vmull.u8 q8, d24, d4 - vmull.u8 q9, d25, d5 - vmvn.8 d26, d26 - vmvn.8 d27, d3 - vmull.u8 q10, d26, d6 - vmull.u8 q11, d27, d7 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail - /* ... continue 'combine_over_ca' replacement */ - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrshr.u16 q6, q10, #8 - vrshr.u16 q7, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q6, q10 - vraddhn.u16 d31, q7, q11 - fetch_mask_pixblock - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 - cache_preload 8, 8 - pixman_composite_over_n_8888_8888_ca_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_n_8888_8888_ca_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d8, d11[0] - vdup.8 d9, d11[1] - vdup.8 d10, d11[2] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_over_n_8888_8888_ca_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_n_8888_8888_ca_init, \ - pixman_composite_over_n_8888_8888_ca_cleanup, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ - pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_in_n_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* and destination data in {d4, d5, d6, d7} */ - vmull.u8 q8, d4, d3 - vmull.u8 q9, d5, d3 - vmull.u8 q10, d6, d3 - vmull.u8 q11, d7, d3 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q8, q14 - vraddhn.u16 d29, q9, q15 - vraddhn.u16 d30, q10, q12 - vraddhn.u16 d31, q11, q13 -.endm - -.macro pixman_composite_in_n_8_process_pixblock_tail_head - pixman_composite_in_n_8_process_pixblock_tail - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - cache_preload 32, 32 - pixman_composite_in_n_8_process_pixblock_head - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_in_n_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_in_n_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_in_n_8_init, \ - pixman_composite_in_n_8_cleanup, \ - pixman_composite_in_n_8_process_pixblock_head, \ - pixman_composite_in_n_8_process_pixblock_tail, \ - pixman_composite_in_n_8_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -.macro pixman_composite_add_n_8_8_process_pixblock_head - /* expecting source data in {d8, d9, d10, d11} */ - /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ - /* and destination data in {d4, d5, d6, d7} */ - /* mask is in d24, d25, d26, d27 */ - vmull.u8 q0, d24, d11 - vmull.u8 q1, d25, d11 - vmull.u8 q6, d26, d11 - vmull.u8 q7, d27, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_n_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_n_8_8_process_pixblock_tail_head - pixman_composite_add_n_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - cache_preload 32, 32 - pixman_composite_add_n_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_n_8_8_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vpush {d8-d15} - vld1.32 {d11[0]}, [DUMMY] - vdup.8 d11, d11[3] -.endm - -.macro pixman_composite_add_n_8_8_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8_init, \ - pixman_composite_add_n_8_8_cleanup, \ - pixman_composite_add_n_8_8_process_pixblock_head, \ - pixman_composite_add_n_8_8_process_pixblock_tail, \ - pixman_composite_add_n_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8_8_8_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d24, d0 - vmull.u8 q9, d25, d1 - vmull.u8 q10, d26, d2 - vmull.u8 q11, d27, d3 - vrshr.u16 q0, q8, #8 - vrshr.u16 q1, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d0, q0, q8 - vraddhn.u16 d1, q1, q9 - vraddhn.u16 d2, q12, q10 - vraddhn.u16 d3, q13, q11 - vqadd.u8 q14, q0, q2 - vqadd.u8 q15, q1, q3 -.endm - -.macro pixman_composite_add_8_8_8_process_pixblock_tail -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_8_8_8_process_pixblock_tail_head - pixman_composite_add_8_8_8_process_pixblock_tail - vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! - vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! - fetch_mask_pixblock - fetch_src_pixblock - cache_preload 32, 32 - pixman_composite_add_8_8_8_process_pixblock_head -.endm - -.macro pixman_composite_add_8_8_8_init -.endm - -.macro pixman_composite_add_8_8_8_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ - FLAG_DST_READWRITE, \ - 32, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8_8_8_init, \ - pixman_composite_add_8_8_8_cleanup, \ - pixman_composite_add_8_8_8_process_pixblock_head, \ - pixman_composite_add_8_8_8_process_pixblock_tail, \ - pixman_composite_add_8_8_8_process_pixblock_tail_head - -/******************************************************************************/ - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* mask in {d24, d25, d26, d27} */ - vmull.u8 q8, d27, d0 - vmull.u8 q9, d27, d1 - vmull.u8 q10, d27, d2 - vmull.u8 q11, d27, d3 - /* 1 cycle bubble */ - vrsra.u16 q8, q8, #8 - vrsra.u16 q9, q9, #8 - vrsra.u16 q10, q10, #8 - vrsra.u16 q11, q11, #8 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail - /* 2 cycle bubble */ - vrshrn.u16 d28, q8, #8 - vrshrn.u16 d29, q9, #8 - vrshrn.u16 d30, q10, #8 - vrshrn.u16 d31, q11, #8 - vqadd.u8 q14, q2, q14 - /* 1 cycle bubble */ - vqadd.u8 q15, q3, q15 -.endm - -.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - fetch_src_pixblock - vrshrn.u16 d28, q8, #8 - fetch_mask_pixblock - vrshrn.u16 d29, q9, #8 - vmull.u8 q8, d27, d0 - vrshrn.u16 d30, q10, #8 - vmull.u8 q9, d27, d1 - vrshrn.u16 d31, q11, #8 - vmull.u8 q10, d27, d2 - vqadd.u8 q14, q2, q14 - vmull.u8 q11, d27, d3 - vqadd.u8 q15, q3, q15 - vrsra.u16 q8, q8, #8 - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vrsra.u16 q9, q9, #8 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - vrsra.u16 q10, q10, #8 - - cache_preload 8, 8 - - vrsra.u16 q11, q11, #8 -.endm - -generate_composite_function \ - pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -generate_composite_function_single_scanline \ - pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head - -/******************************************************************************/ - -generate_composite_function \ - pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_n_8_8888_init - add DUMMY, sp, #ARGS_STACK_OFFSET - vld1.32 {d3[0]}, [DUMMY] - vdup.8 d0, d3[0] - vdup.8 d1, d3[1] - vdup.8 d2, d3[2] - vdup.8 d3, d3[3] -.endm - -.macro pixman_composite_add_n_8_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_n_8_8888_init, \ - pixman_composite_add_n_8_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_8888_n_8888_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vld1.32 {d27[0]}, [DUMMY] - vdup.8 d27, d27[3] -.endm - -.macro pixman_composite_add_8888_n_8888_cleanup -.endm - -generate_composite_function \ - pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_add_8888_n_8888_init, \ - pixman_composite_add_8888_n_8888_cleanup, \ - pixman_composite_add_8888_8888_8888_process_pixblock_head, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ - pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 27 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - /* expecting source data in {d0, d1, d2, d3} */ - /* destination data in {d4, d5, d6, d7} */ - /* solid mask is in d15 */ - - /* 'in' */ - vmull.u8 q8, d15, d3 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q13, q8, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d3, q8, q13 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 - vmvn.8 d24, d3 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 -.endm - -.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function_single_scanline \ - pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ - pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_8888_n_8888_process_pixblock_head - pixman_composite_out_reverse_8888_n_8888_process_pixblock_head -.endm - -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail - pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail - vqadd.u8 q14, q0, q14 - vqadd.u8 q15, q1, q15 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -.macro pixman_composite_over_8888_n_8888_init - add DUMMY, sp, #48 - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_8888_n_8888_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_8888_n_8888_init, \ - pixman_composite_over_8888_n_8888_cleanup, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail_head - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -generate_composite_function_single_scanline \ - pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 12 /* mask_basereg */ - -/******************************************************************************/ - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head - vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - pixman_composite_over_8888_n_8888_process_pixblock_tail - fetch_src_pixblock - cache_preload 8, 8 - fetch_mask_pixblock - pixman_composite_over_8888_n_8888_process_pixblock_head - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_8888_n_8888_process_pixblock_head, \ - pixman_composite_over_8888_n_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0888_process_pixblock_head -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_0888_process_pixblock_tail_head - vst3.8 {d0, d1, d2}, [DST_W]! - fetch_src_pixblock - cache_preload 8, 8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0888_process_pixblock_head, \ - pixman_composite_src_0888_0888_process_pixblock_tail, \ - pixman_composite_src_0888_0888_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_head - vswp d0, d2 -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail -.endm - -.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head - vst4.8 {d0, d1, d2, d3}, [DST_W]! - fetch_src_pixblock - vswp d0, d2 - cache_preload 8, 8 -.endm - -.macro pixman_composite_src_0888_8888_rev_init - veor d3, d3, d3 -.endm - -generate_composite_function \ - pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - pixman_composite_src_0888_8888_rev_init, \ - default_cleanup, \ - pixman_composite_src_0888_8888_rev_process_pixblock_head, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ - pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ - 0, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_head - vshll.u8 q8, d1, #8 - vshll.u8 q9, d2, #8 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail - vshll.u8 q14, d0, #8 - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 -.endm - -.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head - vshll.u8 q14, d0, #8 - fetch_src_pixblock - vsri.u16 q14, q8, #5 - vsri.u16 q14, q9, #11 - vshll.u8 q8, d1, #8 - vst1.16 {d28, d29}, [DST_W, :128]! - vshll.u8 q9, d2, #8 -.endm - -generate_composite_function \ - pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ - FLAG_DST_WRITEONLY, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0888_0565_rev_process_pixblock_head, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ - pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d30, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 -.endm - -.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d30, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d28, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_pixbuf_8888_process_pixblock_head, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - vraddhn.u16 d28, q11, q8 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 -.endm - -.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head - vrshr.u16 q11, q8, #8 - vswp d3, d31 - vrshr.u16 q12, q9, #8 - vrshr.u16 q13, q10, #8 - fetch_src_pixblock - vraddhn.u16 d28, q11, q8 - PF add PF_X, PF_X, #8 - PF tst PF_CTL, #0xF - PF addne PF_X, PF_X, #8 - PF subne PF_CTL, PF_CTL, #1 - vraddhn.u16 d29, q12, q9 - vraddhn.u16 d30, q13, q10 - vmull.u8 q8, d3, d0 - vmull.u8 q9, d3, d1 - vmull.u8 q10, d3, d2 - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - PF cmp PF_X, ORIG_W - PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] - PF subge PF_X, PF_X, ORIG_W - PF subges PF_CTL, PF_CTL, #0x10 - PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! -.endm - -generate_composite_function \ - pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - 10, /* prefetch distance */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ - pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 0, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmvn.8 d7, d15 - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vmull.u8 q8, d7, d4 - vmull.u8 q9, d7, d5 - vmull.u8 q13, d7, d6 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q13, #8 - vraddhn.u16 d28, q14, q8 - vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q13 - vqadd.u8 q0, q0, q14 - vqadd.u8 q1, q1, q15 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_over_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_over_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_over_0565_n_0565_init - add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) - vpush {d8-d15} - vld1.32 {d15[0]}, [DUMMY] - vdup.8 d15, d15[3] -.endm - -.macro pixman_composite_over_0565_n_0565_cleanup - vpop {d8-d15} -.endm - -generate_composite_function \ - pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - pixman_composite_over_0565_n_0565_init, \ - pixman_composite_over_0565_n_0565_cleanup, \ - pixman_composite_over_0565_8_0565_process_pixblock_head, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail, \ - pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_add_0565_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q4, d2, d1, d0 - convert_0565_to_x888 q5, d6, d5, d4 - /* source pixel data is in {d0, d1, d2, XX} */ - /* destination pixel data is in {d4, d5, d6, XX} */ - vmull.u8 q6, d15, d2 - vmull.u8 q5, d15, d1 - vmull.u8 q4, d15, d0 - vrshr.u16 q12, q6, #8 - vrshr.u16 q11, q5, #8 - vrshr.u16 q10, q4, #8 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d1, q5, q11 - vraddhn.u16 d0, q4, q10 -.endm - -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail - vqadd.u8 q0, q0, q2 - vqadd.u8 q1, q1, q3 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head - fetch_mask_pixblock - pixman_composite_add_0565_8_0565_process_pixblock_tail - fetch_src_pixblock - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_add_0565_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_add_0565_8_0565_process_pixblock_head, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail, \ - pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ - -/******************************************************************************/ - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_head - /* mask is in d15 */ - convert_0565_to_x888 q5, d6, d5, d4 - /* destination pixel data is in {d4, d5, d6, xx} */ - vmvn.8 d24, d15 /* get inverted alpha */ - /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 -.endm - -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail - vrshr.u16 q14, q8, #8 - vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vraddhn.u16 d0, q14, q8 - vraddhn.u16 d1, q15, q9 - vraddhn.u16 d2, q12, q10 - /* 32bpp result is in {d0, d1, d2, XX} */ - convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -.endm - -/* TODO: expand macros and do better instructions scheduling */ -.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head - fetch_src_pixblock - pixman_composite_out_reverse_8_0565_process_pixblock_tail - vld1.16 {d10, d11}, [DST_R, :128]! - cache_preload 8, 8 - pixman_composite_out_reverse_8_0565_process_pixblock_head - vst1.16 {d28, d29}, [DST_W, :128]! -.endm - -generate_composite_function \ - pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ - FLAG_DST_READWRITE, \ - 8, /* number of pixels, processed in a single block */ \ - 5, /* prefetch distance */ \ - default_init_need_all_regs, \ - default_cleanup_need_all_regs, \ - pixman_composite_out_reverse_8_0565_process_pixblock_head, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ - pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 10, /* dst_r_basereg */ \ - 15, /* src_basereg */ \ - 0 /* mask_basereg */ - -/******************************************************************************/ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_8888_process_pixblock_head, \ - pixman_composite_over_8888_8888_process_pixblock_tail, \ - pixman_composite_over_8888_8888_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ - FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_over_8888_0565_process_pixblock_head, \ - pixman_composite_over_8888_0565_process_pixblock_tail, \ - pixman_composite_over_8888_0565_process_pixblock_tail_head, \ - 28, /* dst_w_basereg */ \ - 4, /* dst_r_basereg */ \ - 0, /* src_basereg */ \ - 24 /* mask_basereg */ - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_8888_0565_process_pixblock_head, \ - pixman_composite_src_8888_0565_process_pixblock_tail, \ - pixman_composite_src_8888_0565_process_pixblock_tail_head - -generate_composite_function_nearest_scanline \ - pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ - FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ - 8, /* number of pixels, processed in a single block */ \ - default_init, \ - default_cleanup, \ - pixman_composite_src_0565_8888_process_pixblock_head, \ - pixman_composite_src_0565_8888_process_pixblock_tail, \ - pixman_composite_src_0565_8888_process_pixblock_tail_head +/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains implementations of NEON optimized pixel processing + * functions. There is no full and detailed tutorial, but some functions + * (those which are exposing some new or interesting features) are + * extensively commented and can be used as examples. + * + * You may want to have a look at the comments for following functions: + * - pixman_composite_over_8888_0565_asm_neon + * - pixman_composite_over_n_8_0565_asm_neon + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ + .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ + .arm + .altmacro + +#include "pixman-arm-neon-asm.h" + +/* Global configuration options and preferences */ + +/* + * The code can optionally make use of unaligned memory accesses to improve + * performance of handling leading/trailing pixels for each scanline. + * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for + * example in linux if unaligned memory accesses are not configured to + * generate.exceptions. + */ +.set RESPECT_STRICT_ALIGNMENT, 1 + +/* + * Set default prefetch type. There is a choice between the following options: + * + * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work + * as NOP to workaround some HW bugs or for whatever other reason) + * + * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where + * advanced prefetch intruduces heavy overhead) + * + * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 + * which can run ARM and NEON instructions simultaneously so that extra ARM + * instructions do not add (many) extra cycles, but improve prefetch efficiency) + * + * Note: some types of function can't support advanced prefetch and fallback + * to simple one (those which handle 24bpp pixels) + */ +.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED + +/* Prefetch distance in pixels for simple prefetch */ +.set PREFETCH_DISTANCE_SIMPLE, 64 + +/* + * Implementation of pixman_composite_over_8888_0565_asm_neon + * + * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and + * performs OVER compositing operation. Function fast_composite_over_8888_0565 + * from pixman-fast-path.c does the same in C and can be used as a reference. + * + * First we need to have some NEON assembly code which can do the actual + * operation on the pixels and provide it to the template macro. + * + * Template macro quite conveniently takes care of emitting all the necessary + * code for memory reading and writing (including quite tricky cases of + * handling unaligned leading/trailing pixels), so we only need to deal with + * the data in NEON registers. + * + * NEON registers allocation in general is recommented to be the following: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) + * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) + * d28, d29, d30, d31 - place for storing the result (destination pixels) + * + * As can be seen above, four 64-bit NEON registers are used for keeping + * intermediate pixel data and up to 8 pixels can be processed in one step + * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). + * + * This particular function uses the following registers allocation: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5 - contain loaded destination pixels (they are needed) + * d28, d29 - place for storing the result (destination pixels) + */ + +/* + * Step one. We need to have some code to do some arithmetics on pixel data. + * This is implemented as a pair of macros: '*_head' and '*_tail'. When used + * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, + * perform all the needed calculations and write the result to {d28, d29}. + * The rationale for having two macros and not just one will be explained + * later. In practice, any single monolitic function which does the work can + * be split into two parts in any arbitrary way without affecting correctness. + * + * There is one special trick here too. Common template macro can optionally + * make our life a bit easier by doing R, G, B, A color components + * deinterleaving for 32bpp pixel formats (and this feature is used in + * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that + * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we + * actually use d0 register for blue channel (a vector of eight 8-bit + * values), d1 register for green, d2 for red and d3 for alpha. This + * simple conversion can be also done with a few NEON instructions: + * + * Packed to planar conversion: + * vuzp.8 d0, d1 + * vuzp.8 d2, d3 + * vuzp.8 d1, d3 + * vuzp.8 d0, d2 + * + * Planar to packed conversion: + * vzip.8 d0, d2 + * vzip.8 d1, d3 + * vzip.8 d2, d3 + * vzip.8 d0, d1 + * + * But pixel can be loaded directly in planar format using VLD4.8 NEON + * instruction. It is 1 cycle slower than VLD1.32, so this is not always + * desirable, that's why deinterleaving is optional. + * + * But anyway, here is the code: + */ +.macro pixman_composite_over_8888_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vmvn.8 d3, d3 /* invert source alpha */ + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_8888_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* + * OK, now we got almost everything that we need. Using the above two + * macros, the work can be done right. But now we want to optimize + * it a bit. ARM Cortex-A8 is an in-order core, and benefits really + * a lot from good code scheduling and software pipelining. + * + * Let's construct some code, which will run in the core main loop. + * Some pseudo-code of the main loop will look like this: + * head + * while (...) { + * tail + * head + * } + * tail + * + * It may look a bit weird, but this setup allows to hide instruction + * latencies better and also utilize dual-issue capability more + * efficiently (make pairs of load-store and ALU instructions). + * + * So what we need now is a '*_tail_head' macro, which will be used + * in the core main loop. A trivial straightforward implementation + * of this macro would look like this: + * + * pixman_composite_over_8888_0565_process_pixblock_tail + * vst1.16 {d28, d29}, [DST_W, :128]! + * vld1.16 {d4, d5}, [DST_R, :128]! + * vld4.32 {d0, d1, d2, d3}, [SRC]! + * pixman_composite_over_8888_0565_process_pixblock_head + * cache_preload 8, 8 + * + * Now it also got some VLD/VST instructions. We simply can't move from + * processing one block of pixels to the other one with just arithmetics. + * The previously processed data needs to be written to memory and new + * data needs to be fetched. Fortunately, this main loop does not deal + * with partial leading/trailing pixels and can load/store a full block + * of pixels in a bulk. Additionally, destination buffer is already + * 16 bytes aligned here (which is good for performance). + * + * New things here are DST_R, DST_W, SRC and MASK identifiers. These + * are the aliases for ARM registers which are used as pointers for + * accessing data. We maintain separate pointers for reading and writing + * destination buffer (DST_R and DST_W). + * + * Another new thing is 'cache_preload' macro. It is used for prefetching + * data into CPU L2 cache and improve performance when dealing with large + * images which are far larger than cache size. It uses one argument + * (actually two, but they need to be the same here) - number of pixels + * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some + * details about this macro. Moreover, if good performance is needed + * the code from this macro needs to be copied into '*_tail_head' macro + * and mixed with the rest of code for optimal instructions scheduling. + * We are actually doing it below. + * + * Now after all the explanations, here is the optimized code. + * Different instruction streams (originaling from '*_head', '*_tail' + * and 'cache_preload' macro) use different indentation levels for + * better readability. Actually taking the code from one of these + * indentation levels and ignoring a few VLD/VST instructions would + * result in exactly the code from '*_head', '*_tail' or 'cache_preload' + * macro! + */ + +#if 1 + +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + vqadd.u8 d16, d2, d20 + vld1.16 {d4, d5}, [DST_R, :128]! + vqadd.u8 q9, q0, q11 + vshrn.u16 d6, q2, #8 + fetch_src_pixblock + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vshll.u8 q14, d16, #8 + PF add PF_X, PF_X, #8 + vshll.u8 q8, d19, #8 + PF tst PF_CTL, #0xF + vsri.u8 d6, d6, #5 + PF addne PF_X, PF_X, #8 + vmvn.8 d3, d3 + PF subne PF_CTL, PF_CTL, #1 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + vmull.u8 q10, d3, d6 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vsri.u16 q14, q8, #5 + PF cmp PF_X, ORIG_W + vshll.u8 q9, d18, #8 + vrshr.u16 q13, q10, #8 + PF subge PF_X, PF_X, ORIG_W + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + PF subges PF_CTL, PF_CTL, #0x10 + vsri.u16 q14, q9, #11 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vraddhn.u16 d22, q12, q15 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +#else + +/* If we did not care much about the performance, we would just use this... */ +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + pixman_composite_over_8888_0565_process_pixblock_tail + vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! + fetch_src_pixblock + pixman_composite_over_8888_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +#endif + +/* + * And now the final part. We are using 'generate_composite_function' macro + * to put all the stuff together. We are specifying the name of the function + * which we want to get, number of bits per pixel for the source, mask and + * destination (0 if unused, like mask in this case). Next come some bit + * flags: + * FLAG_DST_READWRITE - tells that the destination buffer is both read + * and written, for write-only buffer we would use + * FLAG_DST_WRITEONLY flag instead + * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data + * and separate color channels for 32bpp format. + * The next things are: + * - the number of pixels processed per iteration (8 in this case, because + * that's the maximum what can fit into four 64-bit NEON registers). + * - prefetch distance, measured in pixel blocks. In this case it is 5 times + * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal + * prefetch distance can be selected by running some benchmarks. + * + * After that we specify some macros, these are 'default_init', + * 'default_cleanup' here which are empty (but it is possible to have custom + * init/cleanup macros to be able to save/restore some extra NEON registers + * like d8-d15 or do anything else) followed by + * 'pixman_composite_over_8888_0565_process_pixblock_head', + * 'pixman_composite_over_8888_0565_process_pixblock_tail' and + * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' + * which we got implemented above. + * + * The last part is the NEON registers allocation scheme. + */ +generate_composite_function \ + pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_n_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_0565_process_pixblock_tail_head + pixman_composite_over_n_0565_process_pixblock_tail + vld1.16 {d4, d5}, [DST_R, :128]! + vst1.16 {d28, d29}, [DST_W, :128]! + pixman_composite_over_n_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d3, d3 /* invert source alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_0565_init, \ + default_cleanup, \ + pixman_composite_over_n_0565_process_pixblock_head, \ + pixman_composite_over_n_0565_process_pixblock_tail, \ + pixman_composite_over_n_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_0565_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q14, d2, #8 + vshll.u8 q9, d0, #8 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail_head + vsri.u16 q14, q8, #5 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + fetch_src_pixblock + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vsri.u16 q14, q9, #11 + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vshll.u8 q14, d2, #8 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vshll.u8 q9, d0, #8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_src_0565_8888_process_pixblock_head + vshrn.u16 d30, q0, #8 + vshrn.u16 d29, q0, #3 + vsli.u16 q0, q0, #5 + vmov.u8 d31, #255 + vsri.u8 d30, d30, #5 + vsri.u8 d29, d29, #6 + vshrn.u16 d28, q0, #2 +.endm + +.macro pixman_composite_src_0565_8888_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_src_0565_8888_process_pixblock_tail_head + pixman_composite_src_0565_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + fetch_src_pixblock + pixman_composite_src_0565_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_process_pixblock_head + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #32 + PF tst PF_CTL, #0xF + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #32 + PF subne PF_CTL, PF_CTL, #1 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head + vmvn.8 d24, d3 /* get inverted alpha */ + /* do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8888_process_pixblock_head + pixman_composite_out_reverse_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + pixman_composite_over_8888_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_8888_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +.macro pixman_composite_over_reverse_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d7[0]}, [DUMMY] + vdup.8 d4, d7[0] + vdup.8 d5, d7[1] + vdup.8 d6, d7[2] + vdup.8 d7, d7[3] +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_reverse_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 4, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8_0565_process_pixblock_head + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vmull.u8 q8, d3, d6 /* now do alpha blending */ + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail + /* 3 cycle bubble (after vmull.u8) */ + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + /* 1 cycle bubble */ + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 /* convert to 16bpp */ + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + /* 1 cycle bubble */ + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! + vshrn.u16 d6, q2, #8 + fetch_mask_pixblock + vshrn.u16 d7, q2, #3 + fetch_src_pixblock + vmull.u8 q6, d24, d10 + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + vmull.u8 q1, d24, d9 + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 + vmull.u8 q0, d24, d8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vmull.u8 q7, d24, d11 + vsri.u16 q14, q9, #11 + + cache_preload 8, 8 + + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vst1.16 {d28, d29}, [DST_W, :128]! + vmull.u8 q8, d3, d6 + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +/* + * This function needs a special initialization of solid mask. + * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET + * offset, split into color components and replicated in d8-d11 + * registers. Additionally, this function needs all the NEON registers, + * so it has to save d8-d15 registers which are callee saved according + * to ABI. These registers are restored from 'cleanup' macro. All the + * other NEON registers are caller saved, so can be clobbered freely + * without introducing any problems. + */ +.macro pixman_composite_over_n_8_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_0565_init, \ + pixman_composite_over_n_8_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d24[0]}, [DUMMY] + vdup.8 d24, d24[3] +.endm + +.macro pixman_composite_over_8888_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_0565_init, \ + pixman_composite_over_8888_n_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0565_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 16, 16 +.endm + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_0565_process_pixblock_head, \ + pixman_composite_src_0565_0565_process_pixblock_tail, \ + pixman_composite_src_0565_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail_head + vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #8 + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8_init, \ + pixman_composite_src_n_8_cleanup, \ + pixman_composite_src_n_8_process_pixblock_head, \ + pixman_composite_src_n_8_process_pixblock_tail, \ + pixman_composite_src_n_8_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_0565_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_0565_init, \ + pixman_composite_src_n_0565_cleanup, \ + pixman_composite_src_n_0565_process_pixblock_head, \ + pixman_composite_src_n_0565_process_pixblock_tail, \ + pixman_composite_src_n_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8888_init, \ + pixman_composite_src_n_8888_cleanup, \ + pixman_composite_src_n_8888_process_pixblock_head, \ + pixman_composite_src_n_8888_process_pixblock_tail, \ + pixman_composite_src_n_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_8888_process_pixblock_head, \ + pixman_composite_src_8888_8888_process_pixblock_tail, \ + pixman_composite_src_8888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_x888_8888_process_pixblock_head + vorr q0, q0, q2 + vorr q1, q1, q2 +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + vorr q0, q0, q2 + vorr q1, q1, q2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_x888_8888_init + vmov.u8 q2, #0xFF + vshl.u32 q2, q2, #24 +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_x888_8888_init, \ + default_cleanup, \ + pixman_composite_src_x888_8888_process_pixblock_head, \ + pixman_composite_src_x888_8888_process_pixblock_tail, \ + pixman_composite_src_x888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 d24, d3 /* get inverted alpha */ + /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ + /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head + pixman_composite_over_n_8_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 8, 8 + pixman_composite_over_n_8_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8888_init, \ + pixman_composite_over_n_8_8888_cleanup, \ + pixman_composite_over_n_8_8888_process_pixblock_head, \ + pixman_composite_over_n_8_8888_process_pixblock_tail, \ + pixman_composite_over_n_8_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d8 + vmull.u8 q6, d26, d8 + vmull.u8 q7, d27, d8 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 q12, q0 + vmvn.8 q13, q1 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8_process_pixblock_tail_head + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_n_8_8_process_pixblock_tail + fetch_mask_pixblock + cache_preload 32, 32 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d8[0]}, [DUMMY] + vdup.8 d8, d8[3] +.endm + +.macro pixman_composite_over_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8_init, \ + pixman_composite_over_n_8_8_cleanup, \ + pixman_composite_over_n_8_8_process_pixblock_head, \ + pixman_composite_over_n_8_8_process_pixblock_tail, \ + pixman_composite_over_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} + * dest in {d4, d5, d6, d7 } + * mask in {d24, d25, d26, d27} + * output: updated src in {d0, d1, d2, d3 } + * updated mask in {d24, d25, d26, d3 } + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q7, d27, d11 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vrshr.u16 q10, q7, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + vraddhn.u16 d26, q13, q6 + vraddhn.u16 d3, q7, q10 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in {d28, d29, d30, d31} + */ + vmvn.8 d24, d24 + vmvn.8 d25, d25 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmvn.8 d26, d26 + vmvn.8 d27, d3 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + fetch_mask_pixblock + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + cache_preload 8, 8 + pixman_composite_over_n_8888_8888_ca_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_8888_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_8888_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_8888_ca_init, \ + pixman_composite_over_n_8888_8888_ca_cleanup, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_in_n_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* and destination data in {d4, d5, d6, d7} */ + vmull.u8 q8, d4, d3 + vmull.u8 q9, d5, d3 + vmull.u8 q10, d6, d3 + vmull.u8 q11, d7, d3 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q8, q14 + vraddhn.u16 d29, q9, q15 + vraddhn.u16 d30, q10, q12 + vraddhn.u16 d31, q11, q13 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail_head + pixman_composite_in_n_8_process_pixblock_tail + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 32, 32 + pixman_composite_in_n_8_process_pixblock_head + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_in_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_in_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_in_n_8_init, \ + pixman_composite_in_n_8_cleanup, \ + pixman_composite_in_n_8_process_pixblock_head, \ + pixman_composite_in_n_8_process_pixblock_tail, \ + pixman_composite_in_n_8_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +.macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24, d25, d26, d27 */ + vmull.u8 q0, d24, d11 + vmull.u8 q1, d25, d11 + vmull.u8 q6, d26, d11 + vmull.u8 q7, d27, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_n_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_n_8_8_process_pixblock_tail_head + pixman_composite_add_n_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 32, 32 + pixman_composite_add_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_add_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8_init, \ + pixman_composite_add_n_8_8_cleanup, \ + pixman_composite_add_n_8_8_process_pixblock_head, \ + pixman_composite_add_n_8_8_process_pixblock_tail, \ + pixman_composite_add_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d25, d1 + vmull.u8 q10, d26, d2 + vmull.u8 q11, d27, d3 + vrshr.u16 q0, q8, #8 + vrshr.u16 q1, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q12, q10 + vraddhn.u16 d3, q13, q11 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_8_8_8_process_pixblock_tail_head + pixman_composite_add_8_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + fetch_src_pixblock + cache_preload 32, 32 + pixman_composite_add_8_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_8_8_8_init +.endm + +.macro pixman_composite_add_8_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8_8_8_init, \ + pixman_composite_add_8_8_8_cleanup, \ + pixman_composite_add_8_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d27, d0 + vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 + /* 1 cycle bubble */ + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail + /* 2 cycle bubble */ + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 + vqadd.u8 q14, q2, q14 + /* 1 cycle bubble */ + vqadd.u8 q15, q3, q15 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + vrshrn.u16 d28, q8, #8 + fetch_mask_pixblock + vrshrn.u16 d29, q9, #8 + vmull.u8 q8, d27, d0 + vrshrn.u16 d30, q10, #8 + vmull.u8 q9, d27, d1 + vrshrn.u16 d31, q11, #8 + vmull.u8 q10, d27, d2 + vqadd.u8 q14, q2, q14 + vmull.u8 q11, d27, d3 + vqadd.u8 q15, q3, q15 + vrsra.u16 q8, q8, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrsra.u16 q9, q9, #8 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q10, q10, #8 + + cache_preload 8, 8 + + vrsra.u16 q11, q11, #8 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +generate_composite_function \ + pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_add_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8888_init, \ + pixman_composite_add_n_8_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_8888_n_8888_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vld1.32 {d27[0]}, [DUMMY] + vdup.8 d27, d27[3] +.endm + +.macro pixman_composite_add_8888_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8888_n_8888_init, \ + pixman_composite_add_8888_n_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* solid mask is in d15 */ + + /* 'in' */ + vmull.u8 q8, d15, d3 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q13, q8, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d3, q8, q13 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 + vmvn.8 d24, d3 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_8888_process_pixblock_head + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_8888_n_8888_init + add DUMMY, sp, #48 + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_8888_n_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_8888_init, \ + pixman_composite_over_8888_n_8888_cleanup, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0888_process_pixblock_head +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail_head + vst3.8 {d0, d1, d2}, [DST_W]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0888_process_pixblock_head, \ + pixman_composite_src_0888_0888_process_pixblock_tail, \ + pixman_composite_src_0888_0888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_head + vswp d0, d2 +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head + vst4.8 {d0, d1, d2, d3}, [DST_W]! + fetch_src_pixblock + vswp d0, d2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_0888_8888_rev_init + veor d3, d3, d3 +.endm + +generate_composite_function \ + pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_0888_8888_rev_init, \ + default_cleanup, \ + pixman_composite_src_0888_8888_rev_process_pixblock_head, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q9, d2, #8 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail + vshll.u8 q14, d0, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head + vshll.u8 q14, d0, #8 + fetch_src_pixblock + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + vshll.u8 q9, d2, #8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0565_rev_process_pixblock_head, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d30, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d30, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_pixbuf_8888_process_pixblock_head, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d28, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d28, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmvn.8 d7, d15 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vmull.u8 q8, d7, d4 + vmull.u8 q9, d7, d5 + vmull.u8 q13, d7, d6 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q13, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q13 + vqadd.u8 q0, q0, q14 + vqadd.u8 q1, q1, q15 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_over_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_over_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_0565_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_0565_n_0565_init, \ + pixman_composite_over_0565_n_0565_cleanup, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail + vqadd.u8 q0, q0, q2 + vqadd.u8 q1, q1, q3 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_add_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_add_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_add_0565_8_0565_process_pixblock_head, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q5, d6, d5, d4 + /* destination pixel data is in {d4, d5, d6, xx} */ + vmvn.8 d24, d15 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 +.endm + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vraddhn.u16 d0, q14, q8 + vraddhn.u16 d1, q15, q9 + vraddhn.u16 d2, q12, q10 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_0565_process_pixblock_tail + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8_0565_process_pixblock_head, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 15, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index 7d6c83775..3e0c0d1c2 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -122,6 +122,11 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + void pixman_composite_src_n_8_asm_neon (int32_t w, int32_t h, @@ -332,6 +337,12 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 4cb8321aa..92f030871 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -1,2227 +1,2228 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifdef HAVE_CONFIG_H -#include -#endif -#include -#include -#include "pixman-private.h" -#include "pixman-combine32.h" -#include "pixman-fast-path.h" - -static force_inline uint32_t -fetch_24 (uint8_t *a) -{ - if (((unsigned long)a) & 1) - { -#ifdef WORDS_BIGENDIAN - return (*a << 16) | (*(uint16_t *)(a + 1)); -#else - return *a | (*(uint16_t *)(a + 1) << 8); -#endif - } - else - { -#ifdef WORDS_BIGENDIAN - return (*(uint16_t *)a << 8) | *(a + 2); -#else - return *(uint16_t *)a | (*(a + 2) << 16); -#endif - } -} - -static force_inline void -store_24 (uint8_t *a, - uint32_t v) -{ - if (((unsigned long)a) & 1) - { -#ifdef WORDS_BIGENDIAN - *a = (uint8_t) (v >> 16); - *(uint16_t *)(a + 1) = (uint16_t) (v); -#else - *a = (uint8_t) (v); - *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); -#endif - } - else - { -#ifdef WORDS_BIGENDIAN - *(uint16_t *)a = (uint16_t)(v >> 8); - *(a + 2) = (uint8_t)v; -#else - *(uint16_t *)a = (uint16_t)v; - *(a + 2) = (uint8_t)(v >> 16); -#endif - } -} - -static force_inline uint32_t -over (uint32_t src, - uint32_t dest) -{ - uint32_t a = ~src >> 24; - - UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); - - return dest; -} - -static uint32_t -in (uint32_t x, - uint8_t y) -{ - uint16_t a = y; - - UN8x4_MUL_UN8 (x, a); - - return x; -} - -/* - * Naming convention: - * - * op_src_mask_dest - */ -static void -fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *src, *src_line; - uint32_t *dst, *dst_line; - uint8_t *mask, *mask_line; - int src_stride, mask_stride, dst_stride; - uint8_t m; - uint32_t s, d; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - src = src_line; - src_line += src_stride; - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - - w = width; - while (w--) - { - m = *mask++; - if (m) - { - s = *src | 0xff000000; - - if (m == 0xff) - { - *dst = s; - } - else - { - d = in (s, m); - *dst = over (d, *dst); - } - } - src++; - dst++; - } - } -} - -static void -fast_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - uint16_t t; - - src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - - srca = src >> 24; - - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - if (srca == 0xff) - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - - if (m == 0) - *dst = 0; - else if (m != 0xff) - *dst = MUL_UN8 (m, *dst, t); - - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - m = MUL_UN8 (m, srca, t); - - if (m == 0) - *dst = 0; - else if (m != 0xff) - *dst = MUL_UN8 (m, *dst, t); - - dst++; - } - } - } -} - -static void -fast_composite_in_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint8_t s; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - - if (s == 0) - *dst = 0; - else if (s != 0xff) - *dst = MUL_UN8 (s, *dst, t); - - dst++; - } - } -} - -static void -fast_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dst_line, *dst, d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - *dst = src; - else - *dst = over (src, *dst); - } - else if (m) - { - d = in (src, m); - *dst = over (d, *dst); - } - dst++; - } - } -} - -static void -fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, s; - uint32_t *dst_line, *dst, d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - - if (ma) - { - d = *dst; - s = src; - - UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); - - *dst = s; - } - - dst++; - } - } -} - -static void -fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca, s; - uint32_t *dst_line, *dst, d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - if (ma == 0xffffffff) - { - if (srca == 0xff) - *dst = src; - else - *dst = over (src, *dst); - } - else if (ma) - { - d = *dst; - s = src; - - UN8x4_MUL_UN8x4 (s, ma); - UN8x4_MUL_UN8 (ma, srca); - ma = ~ma; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - - *dst = d; - } - - dst++; - } - } -} - -static void -fast_composite_over_n_8_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint8_t *dst_line, *dst; - uint32_t d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - { - d = src; - } - else - { - d = fetch_24 (dst); - d = over (src, d); - } - store_24 (dst, d); - } - else if (m) - { - d = over (in (src, m), fetch_24 (dst)); - store_24 (dst, d); - } - dst += 3; - } - } -} - -static void -fast_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint16_t *dst_line, *dst; - uint32_t d; - uint8_t *mask_line, *mask, m; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - m = *mask++; - if (m == 0xff) - { - if (srca == 0xff) - { - d = src; - } - else - { - d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); - } - *dst = CONVERT_8888_TO_0565 (d); - } - else if (m) - { - d = *dst; - d = over (in (src, m), CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca, s; - uint16_t src16; - uint16_t *dst_line, *dst; - uint32_t d; - uint32_t *mask_line, *mask, ma; - int dst_stride, mask_stride; - int32_t w; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - srca = src >> 24; - if (src == 0) - return; - - src16 = CONVERT_8888_TO_0565 (src); - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - ma = *mask++; - if (ma == 0xffffffff) - { - if (srca == 0xff) - { - *dst = src16; - } - else - { - d = *dst; - d = over (src, CONVERT_0565_TO_0888 (d)); - *dst = CONVERT_8888_TO_0565 (d); - } - } - else if (ma) - { - d = *dst; - d = CONVERT_0565_TO_0888 (d); - - s = src; - - UN8x4_MUL_UN8x4 (s, ma); - UN8x4_MUL_UN8 (ma, srca); - ma = ~ma; - UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); - - *dst = CONVERT_8888_TO_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - uint8_t a; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (a == 0xff) - *dst = s; - else if (s) - *dst = over (s, *dst); - dst++; - } - } -} - -static void -fast_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - *dst++ = (*src++) | 0xff000000; - } -} - -#if 0 -static void -fast_composite_over_8888_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint8_t *dst_line, *dst; - uint32_t d; - uint32_t *src_line, *src, s; - uint8_t a; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (a) - { - if (a == 0xff) - d = s; - else - d = over (s, fetch_24 (dst)); - - store_24 (dst, d); - } - dst += 3; - } - } -} -#endif - -static void -fast_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint16_t *dst_line, *dst; - uint32_t d; - uint32_t *src_line, *src, s; - uint8_t a; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - a = s >> 24; - if (s) - { - if (a == 0xff) - { - d = s; - } - else - { - d = *dst; - d = over (s, CONVERT_0565_TO_0888 (d)); - } - *dst = CONVERT_8888_TO_0565 (d); - } - dst++; - } - } -} - -static void -fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint16_t *dst_line, *dst; - uint32_t *src_line, *src, s; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - *dst = CONVERT_8888_TO_0565 (s); - dst++; - } - } -} - -static void -fast_composite_add_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint8_t *dst_line, *dst; - uint8_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint8_t s, d; - uint16_t t; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - if (s) - { - if (s != 0xff) - { - d = *dst; - t = d + s; - s = t | (0 - (t >> 8)); - } - *dst = s; - } - dst++; - } - } -} - -static void -fast_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - uint32_t s, d; - - PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - s = *src++; - if (s) - { - if (s != 0xffffffff) - { - d = *dst; - if (d) - UN8x4_ADD_UN8x4 (s, d); - } - *dst = s; - } - dst++; - } - } -} - -static void -fast_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint8_t *dst_line, *dst; - uint8_t *mask_line, *mask; - int dst_stride, mask_stride; - int32_t w; - uint32_t src; - uint8_t sa; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - sa = (src >> 24); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - while (w--) - { - uint16_t tmp; - uint16_t a; - uint32_t m, d; - uint32_t r; - - a = *mask++; - d = *dst; - - m = MUL_UN8 (sa, a, tmp); - r = ADD_UN8 (m, d, tmp); - - *dst++ = r; - } - } -} - -#ifdef WORDS_BIGENDIAN -#define CREATE_BITMASK(n) (0x80000000 >> (n)) -#define UPDATE_BITMASK(n) ((n) >> 1) -#else -#define CREATE_BITMASK(n) (1 << (n)) -#define UPDATE_BITMASK(n) ((n) << 1) -#endif - -#define TEST_BIT(p, n) \ - (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) -#define SET_BIT(p, n) \ - do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); - -static void -fast_composite_add_1000_1000 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *dst_line, *dst; - uint32_t *src_line, *src; - int dst_stride, src_stride; - int32_t w; - - PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, - src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, - dst_stride, dst_line, 1); - - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - src = src_line; - src_line += src_stride; - w = width; - - while (w--) - { - /* - * TODO: improve performance by processing uint32_t data instead - * of individual bits - */ - if (TEST_BIT (src, src_x + w)) - SET_BIT (dst, dest_x + w); - } - } -} - -static void -fast_composite_over_n_1_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint32_t *dst, *dst_line; - uint32_t *mask, *mask_line; - int mask_stride, dst_stride; - uint32_t bitcache, bitmask; - int32_t w; - - if (width <= 0) - return; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, - dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, - mask_stride, mask_line, 1); - mask_line += mask_x >> 5; - - if (srca == 0xff) - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = src; - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = over (src, *dst); - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } -} - -static void -fast_composite_over_n_1_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src, srca; - uint16_t *dst, *dst_line; - uint32_t *mask, *mask_line; - int mask_stride, dst_stride; - uint32_t bitcache, bitmask; - int32_t w; - uint32_t d; - uint16_t src565; - - if (width <= 0) - return; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - srca = src >> 24; - if (src == 0) - return; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, - dst_stride, dst_line, 1); - PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, - mask_stride, mask_line, 1); - mask_line += mask_x >> 5; - - if (srca == 0xff) - { - src565 = CONVERT_8888_TO_0565 (src); - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - *dst = src565; - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } - else - { - while (height--) - { - dst = dst_line; - dst_line += dst_stride; - mask = mask_line; - mask_line += mask_stride; - w = width; - - bitcache = *mask++; - bitmask = CREATE_BITMASK (mask_x & 31); - - while (w--) - { - if (bitmask == 0) - { - bitcache = *mask++; - bitmask = CREATE_BITMASK (0); - } - if (bitcache & bitmask) - { - d = over (src, CONVERT_0565_TO_0888 (*dst)); - *dst = CONVERT_8888_TO_0565 (d); - } - bitmask = UPDATE_BITMASK (bitmask); - dst++; - } - } - } -} - -/* - * Simple bitblt - */ - -static void -fast_composite_solid_fill (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t src; - - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); - - if (dst_image->bits.format == PIXMAN_a1) - { - src = src >> 31; - } - else if (dst_image->bits.format == PIXMAN_a8) - { - src = src >> 24; - } - else if (dst_image->bits.format == PIXMAN_r5g6b5 || - dst_image->bits.format == PIXMAN_b5g6r5) - { - src = CONVERT_8888_TO_0565 (src); - } - - pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dst_image->bits.format), - dest_x, dest_y, - width, height, - src); -} - -static void -fast_composite_src_memcpy (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; - uint32_t n_bytes = width * bpp; - int dst_stride, src_stride; - uint8_t *dst; - uint8_t *src; - - src_stride = src_image->bits.rowstride * 4; - dst_stride = dst_image->bits.rowstride * 4; - - src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; - dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; - - while (height--) - { - memcpy (dst, src, n_bytes); - - dst += dst_stride; - src += src_stride; - } -} - -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) -FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) -FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) -FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) -FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) -FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) -FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) -FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) -FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) -FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) - -/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ -static force_inline void -scaled_nearest_scanline_565_565_SRC (uint16_t * dst, - uint16_t * src, - int32_t w, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - pixman_fixed_t max_vx) -{ - uint16_t tmp1, tmp2, tmp3, tmp4; - while ((w -= 4) >= 0) - { - tmp1 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - tmp2 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - tmp3 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - tmp4 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - *dst++ = tmp1; - *dst++ = tmp2; - *dst++ = tmp3; - *dst++ = tmp4; - } - if (w & 2) - { - tmp1 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - tmp2 = src[pixman_fixed_to_int (vx)]; - vx += unit_x; - *dst++ = tmp1; - *dst++ = tmp2; - } - if (w & 1) - *dst++ = src[pixman_fixed_to_int (vx)]; -} - -FAST_NEAREST_MAINLOOP (565_565_cover_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, COVER) -FAST_NEAREST_MAINLOOP (565_565_none_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, NONE) -FAST_NEAREST_MAINLOOP (565_565_pad_SRC, - scaled_nearest_scanline_565_565_SRC, - uint16_t, uint16_t, PAD) - -static force_inline uint32_t -fetch_nearest (pixman_repeat_t src_repeat, - pixman_format_code_t format, - uint32_t *src, int x, int src_width) -{ - if (repeat (src_repeat, &x, src_width)) - { - if (format == PIXMAN_x8r8g8b8) - return *(src + x) | 0xff000000; - else - return *(src + x); - } - else - { - return 0; - } -} - -static force_inline void -combine_over (uint32_t s, uint32_t *dst) -{ - if (s) - { - uint8_t ia = 0xff - (s >> 24); - - if (ia) - UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); - else - *dst = s; - } -} - -static force_inline void -combine_src (uint32_t s, uint32_t *dst) -{ - *dst = s; -} - -static void -fast_composite_scaled_nearest (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) -{ - uint32_t *dst_line; - uint32_t *src_line; - int dst_stride, src_stride; - int src_width, src_height; - pixman_repeat_t src_repeat; - pixman_fixed_t unit_x, unit_y; - pixman_format_code_t src_format; - pixman_vector_t v; - pixman_fixed_t vy; - - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be - * transformed from destination space to source space - */ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); - - /* reference point is the center of the pixel */ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; - v.vector[2] = pixman_fixed_1; - - if (!pixman_transform_point_3d (src_image->common.transform, &v)) - return; - - unit_x = src_image->common.transform->matrix[0][0]; - unit_y = src_image->common.transform->matrix[1][1]; - - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ - v.vector[0] -= pixman_fixed_e; - v.vector[1] -= pixman_fixed_e; - - src_height = src_image->bits.height; - src_width = src_image->bits.width; - src_repeat = src_image->common.repeat; - src_format = src_image->bits.format; - - vy = v.vector[1]; - while (height--) - { - pixman_fixed_t vx = v.vector[0]; - int y = pixman_fixed_to_int (vy); - uint32_t *dst = dst_line; - - dst_line += dst_stride; - - /* adjust the y location by a unit vector in the y direction - * this is equivalent to transforming y+1 of the destination point to source space */ - vy += unit_y; - - if (!repeat (src_repeat, &y, src_height)) - { - if (op == PIXMAN_OP_SRC) - memset (dst, 0, sizeof (*dst) * width); - } - else - { - int w = width; - - uint32_t *src = src_line + y * src_stride; - - while (w >= 2) - { - uint32_t s1, s2; - int x1, x2; - - x1 = pixman_fixed_to_int (vx); - vx += unit_x; - - x2 = pixman_fixed_to_int (vx); - vx += unit_x; - - w -= 2; - - s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); - s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); - - if (op == PIXMAN_OP_OVER) - { - combine_over (s1, dst++); - combine_over (s2, dst++); - } - else - { - combine_src (s1, dst++); - combine_src (s2, dst++); - } - } - - while (w--) - { - uint32_t s; - int x; - - x = pixman_fixed_to_int (vx); - vx += unit_x; - - s = fetch_nearest (src_repeat, src_format, src, x, src_width); - - if (op == PIXMAN_OP_OVER) - combine_over (s, dst++); - else - combine_src (s, dst++); - } - } - } -} - -#define CACHE_LINE_SIZE 64 - -#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ - \ -static void \ -blt_rotated_90_trivial_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int w, \ - int h) \ -{ \ - int x, y; \ - for (y = 0; y < h; y++) \ - { \ - const pix_type *s = src + (h - y - 1); \ - pix_type *d = dst + dst_stride * y; \ - for (x = 0; x < w; x++) \ - { \ - *d++ = *s; \ - s += src_stride; \ - } \ - } \ -} \ - \ -static void \ -blt_rotated_270_trivial_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int w, \ - int h) \ -{ \ - int x, y; \ - for (y = 0; y < h; y++) \ - { \ - const pix_type *s = src + src_stride * (w - 1) + y; \ - pix_type *d = dst + dst_stride * y; \ - for (x = 0; x < w; x++) \ - { \ - *d++ = *s; \ - s -= src_stride; \ - } \ - } \ -} \ - \ -static void \ -blt_rotated_90_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int W, \ - int H) \ -{ \ - int x; \ - int leading_pixels = 0, trailing_pixels = 0; \ - const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ - \ - /* \ - * split processing into handling destination as TILE_SIZExH cache line \ - * aligned vertical stripes (optimistically assuming that destination \ - * stride is a multiple of cache line, if not - it will be just a bit \ - * slower) \ - */ \ - \ - if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ - { \ - leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (leading_pixels > W) \ - leading_pixels = W; \ - \ - /* unaligned leading part NxH (where N < TILE_SIZE) */ \ - blt_rotated_90_trivial_##suffix ( \ - dst, \ - dst_stride, \ - src, \ - src_stride, \ - leading_pixels, \ - H); \ - \ - dst += leading_pixels; \ - src += leading_pixels * src_stride; \ - W -= leading_pixels; \ - } \ - \ - if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ - { \ - trailing_pixels = (((uintptr_t)(dst + W) & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (trailing_pixels > W) \ - trailing_pixels = W; \ - W -= trailing_pixels; \ - } \ - \ - for (x = 0; x < W; x += TILE_SIZE) \ - { \ - /* aligned middle part TILE_SIZExH */ \ - blt_rotated_90_trivial_##suffix ( \ - dst + x, \ - dst_stride, \ - src + src_stride * x, \ - src_stride, \ - TILE_SIZE, \ - H); \ - } \ - \ - if (trailing_pixels) \ - { \ - /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ - blt_rotated_90_trivial_##suffix ( \ - dst + W, \ - dst_stride, \ - src + W * src_stride, \ - src_stride, \ - trailing_pixels, \ - H); \ - } \ -} \ - \ -static void \ -blt_rotated_270_##suffix (pix_type *dst, \ - int dst_stride, \ - const pix_type *src, \ - int src_stride, \ - int W, \ - int H) \ -{ \ - int x; \ - int leading_pixels = 0, trailing_pixels = 0; \ - const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ - \ - /* \ - * split processing into handling destination as TILE_SIZExH cache line \ - * aligned vertical stripes (optimistically assuming that destination \ - * stride is a multiple of cache line, if not - it will be just a bit \ - * slower) \ - */ \ - \ - if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ - { \ - leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (leading_pixels > W) \ - leading_pixels = W; \ - \ - /* unaligned leading part NxH (where N < TILE_SIZE) */ \ - blt_rotated_270_trivial_##suffix ( \ - dst, \ - dst_stride, \ - src + src_stride * (W - leading_pixels), \ - src_stride, \ - leading_pixels, \ - H); \ - \ - dst += leading_pixels; \ - W -= leading_pixels; \ - } \ - \ - if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ - { \ - trailing_pixels = (((uintptr_t)(dst + W) & \ - (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ - if (trailing_pixels > W) \ - trailing_pixels = W; \ - W -= trailing_pixels; \ - src += trailing_pixels * src_stride; \ - } \ - \ - for (x = 0; x < W; x += TILE_SIZE) \ - { \ - /* aligned middle part TILE_SIZExH */ \ - blt_rotated_270_trivial_##suffix ( \ - dst + x, \ - dst_stride, \ - src + src_stride * (W - x - TILE_SIZE), \ - src_stride, \ - TILE_SIZE, \ - H); \ - } \ - \ - if (trailing_pixels) \ - { \ - /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ - blt_rotated_270_trivial_##suffix ( \ - dst + W, \ - dst_stride, \ - src - trailing_pixels * src_stride, \ - src_stride, \ - trailing_pixels, \ - H); \ - } \ -} \ - \ -static void \ -fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - pix_type *dst_line; \ - pix_type *src_line; \ - int dst_stride, src_stride; \ - int src_x_t, src_y_t; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ - dst_stride, dst_line, 1); \ - src_x_t = -src_y + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[0][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ - src_y_t = src_x + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[1][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e); \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ - src_stride, src_line, 1); \ - blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ - width, height); \ -} \ - \ -static void \ -fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - pix_type *dst_line; \ - pix_type *src_line; \ - int dst_stride, src_stride; \ - int src_x_t, src_y_t; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ - dst_stride, dst_line, 1); \ - src_x_t = src_y + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[0][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e); \ - src_y_t = -src_x + pixman_fixed_to_int ( \ - src_image->common.transform->matrix[1][2] + \ - pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ - PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ - src_stride, src_line, 1); \ - blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ - width, height); \ -} - -FAST_SIMPLE_ROTATE (8, uint8_t) -FAST_SIMPLE_ROTATE (565, uint16_t) -FAST_SIMPLE_ROTATE (8888, uint32_t) - -static const pixman_fast_path_t c_fast_paths[] = -{ - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), - PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), - PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), - PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), - PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), - PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), - PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), - - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), - SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), - - SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), - SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), - - SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), - -#define NEAREST_FAST_PATH(op,s,d) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest, \ - } - - NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), - NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), - - NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), - NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), - - NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), - NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), - NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), - - NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), - NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), - NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), - -#define SIMPLE_ROTATE_FLAGS(angle) \ - (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_SAMPLES_COVER_CLIP | \ - FAST_PATH_STANDARD_FLAGS) - -#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_rotate_90_##suffix, \ - }, \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_rotate_270_##suffix, \ - } - - SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), - SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), - SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), - - { PIXMAN_OP_NONE }, -}; - -#ifdef WORDS_BIGENDIAN -#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) -#else -#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) -#endif - -static force_inline void -pixman_fill1_line (uint32_t *dst, int offs, int width, int v) -{ - if (offs) - { - int leading_pixels = 32 - offs; - if (leading_pixels >= width) - { - if (v) - *dst |= A1_FILL_MASK (width, offs); - else - *dst &= ~A1_FILL_MASK (width, offs); - return; - } - else - { - if (v) - *dst++ |= A1_FILL_MASK (leading_pixels, offs); - else - *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); - width -= leading_pixels; - } - } - while (width >= 32) - { - if (v) - *dst++ = 0xFFFFFFFF; - else - *dst++ = 0; - width -= 32; - } - if (width > 0) - { - if (v) - *dst |= A1_FILL_MASK (width, 0); - else - *dst &= ~A1_FILL_MASK (width, 0); - } -} - -static void -pixman_fill1 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - uint32_t *dst = bits + y * stride + (x >> 5); - int offs = x & 31; - - if (xor & 1) - { - while (height--) - { - pixman_fill1_line (dst, offs, width, 1); - dst += stride; - } - } - else - { - while (height--) - { - pixman_fill1_line (dst, offs, width, 0); - dst += stride; - } - } -} - -static void -pixman_fill8 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - int byte_stride = stride * (int) sizeof (uint32_t); - uint8_t *dst = (uint8_t *) bits; - uint8_t v = xor & 0xff; - int i; - - dst = dst + y * byte_stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - dst[i] = v; - - dst += byte_stride; - } -} - -static void -pixman_fill16 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - int short_stride = - (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); - uint16_t *dst = (uint16_t *)bits; - uint16_t v = xor & 0xffff; - int i; - - dst = dst + y * short_stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - dst[i] = v; - - dst += short_stride; - } -} - -static void -pixman_fill32 (uint32_t *bits, - int stride, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - int i; - - bits = bits + y * stride + x; - - while (height--) - { - for (i = 0; i < width; ++i) - bits[i] = xor; - - bits += stride; - } -} - -static pixman_bool_t -fast_path_fill (pixman_implementation_t *imp, - uint32_t * bits, - int stride, - int bpp, - int x, - int y, - int width, - int height, - uint32_t xor) -{ - switch (bpp) - { - case 1: - pixman_fill1 (bits, stride, x, y, width, height, xor); - break; - - case 8: - pixman_fill8 (bits, stride, x, y, width, height, xor); - break; - - case 16: - pixman_fill16 (bits, stride, x, y, width, height, xor); - break; - - case 32: - pixman_fill32 (bits, stride, x, y, width, height, xor); - break; - - default: - return _pixman_implementation_fill ( - imp->delegate, bits, stride, bpp, x, y, width, height, xor); - break; - } - - return TRUE; -} - -pixman_implementation_t * -_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) -{ - pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); - - imp->fill = fast_path_fill; - - return imp; -} +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-fast-path.h" + +static force_inline uint32_t +fetch_24 (uint8_t *a) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + return (*a << 16) | (*(uint16_t *)(a + 1)); +#else + return *a | (*(uint16_t *)(a + 1) << 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + return (*(uint16_t *)a << 8) | *(a + 2); +#else + return *(uint16_t *)a | (*(a + 2) << 16); +#endif + } +} + +static force_inline void +store_24 (uint8_t *a, + uint32_t v) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + *a = (uint8_t) (v >> 16); + *(uint16_t *)(a + 1) = (uint16_t) (v); +#else + *a = (uint8_t) (v); + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + *(uint16_t *)a = (uint16_t)(v >> 8); + *(a + 2) = (uint8_t)v; +#else + *(uint16_t *)a = (uint16_t)v; + *(a + 2) = (uint8_t)(v >> 16); +#endif + } +} + +static force_inline uint32_t +over (uint32_t src, + uint32_t dest) +{ + uint32_t a = ~src >> 24; + + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); + + return dest; +} + +static uint32_t +in (uint32_t x, + uint8_t y) +{ + uint16_t a = y; + + UN8x4_MUL_UN8 (x, a); + + return x; +} + +/* + * Naming convention: + * + * op_src_mask_dest + */ +static void +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; + uint8_t m; + uint32_t s, d; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + while (w--) + { + m = *mask++; + if (m) + { + s = *src | 0xff000000; + + if (m == 0xff) + { + *dst = s; + } + else + { + d = in (s, m); + *dst = over (d, *dst); + } + } + src++; + dst++; + } + } +} + +static void +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + uint16_t t; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + m = MUL_UN8 (m, srca, t); + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } +} + +static void +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + + if (s == 0) + *dst = 0; + else if (s != 0xff) + *dst = MUL_UN8 (s, *dst, t); + + dst++; + } + } +} + +static void +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (m) + { + d = in (src, m); + *dst = over (d, *dst); + } + dst++; + } + } +} + +static void +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = d; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = fetch_24 (dst); + d = over (src, d); + } + store_24 (dst, d); + } + else if (m) + { + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); + } + dst += 3; + } + } +} + +static void +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + else if (m) + { + d = *dst; + d = over (in (src, m), CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + src16 = CONVERT_8888_TO_0565 (src); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + { + *dst = src16; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + } + else if (ma) + { + d = *dst; + d = CONVERT_0565_TO_0888 (d); + + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a == 0xff) + *dst = s; + else if (s) + *dst = over (s, *dst); + dst++; + } + } +} + +static void +fast_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + *dst++ = (*src++) | 0xff000000; + } +} + +#if 0 +static void +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a) + { + if (a == 0xff) + d = s; + else + d = over (s, fetch_24 (dst)); + + store_24 (dst, d); + } + dst += 3; + } + } +} +#endif + +static void +fast_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (s) + { + if (a == 0xff) + { + d = s; + } + else + { + d = *dst; + d = over (s, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + *dst = CONVERT_8888_TO_0565 (s); + dst++; + } + } +} + +static void +fast_composite_add_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xff) + { + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint32_t s, d; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xffffffff) + { + d = *dst; + if (d) + UN8x4_ADD_UN8x4 (s, d); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + sa = (src >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + } + } +} + +#ifdef WORDS_BIGENDIAN +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) +#else +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1000_1000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +static void +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + uint32_t d; + uint16_t src565; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + src565 = CONVERT_8888_TO_0565 (src); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src565; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + { + d = over (src, CONVERT_0565_TO_0888 (*dst)); + *dst = CONVERT_8888_TO_0565 (d); + } + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +/* + * Simple bitblt + */ + +static void +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (dst_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dst_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } + else if (dst_image->bits.format == PIXMAN_r5g6b5 || + dst_image->bits.format == PIXMAN_b5g6r5) + { + src = CONVERT_8888_TO_0565 (src); + } + + pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, + width, height, + src); +} + +static void +fast_composite_src_memcpy (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; + + src_stride = src_image->bits.rowstride * 4; + dst_stride = dst_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + + while (height--) + { + memcpy (dst, src, n_bytes); + + dst += dst_stride; + src += src_stride; + } +} + +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + const uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) +{ + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst++ = src[pixman_fixed_to_int (vx)]; +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER) +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE) +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD) + +static force_inline uint32_t +fetch_nearest (pixman_repeat_t src_repeat, + pixman_format_code_t format, + uint32_t *src, int x, int src_width) +{ + if (repeat (src_repeat, &x, src_width)) + { + if (format == PIXMAN_x8r8g8b8) + return *(src + x) | 0xff000000; + else + return *(src + x); + } + else + { + return 0; + } +} + +static force_inline void +combine_over (uint32_t s, uint32_t *dst) +{ + if (s) + { + uint8_t ia = 0xff - (s >> 24); + + if (ia) + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); + else + *dst = s; + } +} + +static force_inline void +combine_src (uint32_t s, uint32_t *dst) +{ + *dst = s; +} + +static void +fast_composite_scaled_nearest (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line; + uint32_t *src_line; + int dst_stride, src_stride; + int src_width, src_height; + pixman_repeat_t src_repeat; + pixman_fixed_t unit_x, unit_y; + pixman_format_code_t src_format; + pixman_vector_t v; + pixman_fixed_t vy; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be + * transformed from destination space to source space + */ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (src_image->common.transform, &v)) + return; + + unit_x = src_image->common.transform->matrix[0][0]; + unit_y = src_image->common.transform->matrix[1][1]; + + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ + v.vector[0] -= pixman_fixed_e; + v.vector[1] -= pixman_fixed_e; + + src_height = src_image->bits.height; + src_width = src_image->bits.width; + src_repeat = src_image->common.repeat; + src_format = src_image->bits.format; + + vy = v.vector[1]; + while (height--) + { + pixman_fixed_t vx = v.vector[0]; + int y = pixman_fixed_to_int (vy); + uint32_t *dst = dst_line; + + dst_line += dst_stride; + + /* adjust the y location by a unit vector in the y direction + * this is equivalent to transforming y+1 of the destination point to source space */ + vy += unit_y; + + if (!repeat (src_repeat, &y, src_height)) + { + if (op == PIXMAN_OP_SRC) + memset (dst, 0, sizeof (*dst) * width); + } + else + { + int w = width; + + uint32_t *src = src_line + y * src_stride; + + while (w >= 2) + { + uint32_t s1, s2; + int x1, x2; + + x1 = pixman_fixed_to_int (vx); + vx += unit_x; + + x2 = pixman_fixed_to_int (vx); + vx += unit_x; + + w -= 2; + + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); + + if (op == PIXMAN_OP_OVER) + { + combine_over (s1, dst++); + combine_over (s2, dst++); + } + else + { + combine_src (s1, dst++); + combine_src (s2, dst++); + } + } + + while (w--) + { + uint32_t s; + int x; + + x = pixman_fixed_to_int (vx); + vx += unit_x; + + s = fetch_nearest (src_repeat, src_format, src, x, src_width); + + if (op == PIXMAN_OP_OVER) + combine_over (s, dst++); + else + combine_src (s, dst++); + } + } + } +} + +#define CACHE_LINE_SIZE 64 + +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ + \ +static void \ +blt_rotated_90_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + (h - y - 1); \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s += src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_270_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + src_stride * (w - 1) + y; \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s -= src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_90_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src, \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + src += leading_pixels * src_stride; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * x, \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src + W * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +blt_rotated_270_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src + src_stride * (W - leading_pixels), \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + src += trailing_pixels * src_stride; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * (W - x - TILE_SIZE), \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src - trailing_pixels * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = -src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ + src_y_t = src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} \ + \ +static void \ +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + src_y_t = -src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} + +FAST_SIMPLE_ROTATE (8, uint8_t) +FAST_SIMPLE_ROTATE (565, uint16_t) +FAST_SIMPLE_ROTATE (8888, uint32_t) + +static const pixman_fast_path_t c_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + +#define NEAREST_FAST_PATH(op,s,d) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest, \ + } + + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), + +#define SIMPLE_ROTATE_FLAGS(angle) \ + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP | \ + FAST_PATH_STANDARD_FLAGS) + +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_90_##suffix, \ + }, \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_270_##suffix, \ + } + + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + + { PIXMAN_OP_NONE }, +}; + +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + if (xor & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } +} + +static void +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int byte_stride = stride * (int) sizeof (uint32_t); + uint8_t *dst = (uint8_t *) bits; + uint8_t v = xor & 0xff; + int i; + + dst = dst + y * byte_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += byte_stride; + } +} + +static void +pixman_fill16 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); + uint16_t *dst = (uint16_t *)bits; + uint16_t v = xor & 0xffff; + int i; + + dst = dst + y * short_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += short_stride; + } +} + +static void +pixman_fill32 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int i; + + bits = bits + y * stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + bits[i] = xor; + + bits += stride; + } +} + +static pixman_bool_t +fast_path_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + switch (bpp) + { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, xor); + break; + + case 8: + pixman_fill8 (bits, stride, x, y, width, height, xor); + break; + + case 16: + pixman_fill16 (bits, stride, x, y, width, height, xor); + break; + + case 32: + pixman_fill32 (bits, stride, x, y, width, height, xor); + break; + + default: + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); + break; + } + + return TRUE; +} + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); + + imp->fill = fast_path_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h index bb7032d86..d08122293 100644 --- a/pixman/pixman/pixman-fast-path.h +++ b/pixman/pixman/pixman-fast-path.h @@ -1,449 +1,590 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided "as is" - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Author: Keith Packard, SuSE, Inc. - */ - -#ifndef PIXMAN_FAST_PATH_H__ -#define PIXMAN_FAST_PATH_H__ - -#include "pixman-private.h" - -#define PIXMAN_REPEAT_COVER -1 - -static force_inline pixman_bool_t -repeat (pixman_repeat_t repeat, int *c, int size) -{ - if (repeat == PIXMAN_REPEAT_NONE) - { - if (*c < 0 || *c >= size) - return FALSE; - } - else if (repeat == PIXMAN_REPEAT_NORMAL) - { - while (*c >= size) - *c -= size; - while (*c < 0) - *c += size; - } - else if (repeat == PIXMAN_REPEAT_PAD) - { - *c = CLIP (*c, 0, size - 1); - } - else /* REFLECT */ - { - *c = MOD (*c, size * 2); - if (*c >= size) - *c = size * 2 - *c - 1; - } - return TRUE; -} - -/* - * For each scanline fetched from source image with PAD repeat: - * - calculate how many pixels need to be padded on the left side - * - calculate how many pixels need to be padded on the right side - * - update width to only count pixels which are fetched from the image - * All this information is returned via 'width', 'left_pad', 'right_pad' - * arguments. The code is assuming that 'unit_x' is positive. - * - * Note: 64-bit math is used in order to avoid potential overflows, which - * is probably excessive in many cases. This particular function - * may need its own correctness test and performance tuning. - */ -static force_inline void -pad_repeat_get_scanline_bounds (int32_t source_image_width, - pixman_fixed_t vx, - pixman_fixed_t unit_x, - int32_t * width, - int32_t * left_pad, - int32_t * right_pad) -{ - int64_t max_vx = (int64_t) source_image_width << 16; - int64_t tmp; - if (vx < 0) - { - tmp = ((int64_t) unit_x - 1 - vx) / unit_x; - if (tmp > *width) - { - *left_pad = *width; - *width = 0; - } - else - { - *left_pad = (int32_t) tmp; - *width -= (int32_t) tmp; - } - } - else - { - *left_pad = 0; - } - tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; - if (tmp < 0) - { - *right_pad = *width; - *width = 0; - } - else if (tmp >= *width) - { - *right_pad = 0; - } - else - { - *right_pad = *width - (int32_t) tmp; - *width = (int32_t) tmp; - } -} - -/* A macroified version of specialized nearest scalers for some - * common 8888 and 565 formats. It supports SRC and OVER ops. - * - * There are two repeat versions, one that handles repeat normal, - * and one without repeat handling that only works if the src region - * used is completely covered by the pre-repeated source samples. - * - * The loops are unrolled to process two pixels per iteration for better - * performance on most CPU architectures (superscalar processors - * can issue several operations simultaneously, other processors can hide - * instructions latencies by pipelining operations). Unrolling more - * does not make much sense because the compiler will start running out - * of spare registers soon. - */ - -#define GET_8888_ALPHA(s) ((s) >> 24) - /* This is not actually used since we don't have an OVER with - 565 source, but it is needed to build. */ -#define GET_0565_ALPHA(s) 0xff - -#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ -static force_inline void \ -scanline_func_name (dst_type_t *dst, \ - src_type_t *src, \ - int32_t w, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx) \ -{ \ - uint32_t d; \ - src_type_t s1, s2; \ - uint8_t a1, a2; \ - int x1, x2; \ - \ - if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ - abort(); \ - \ - while ((w -= 2) >= 0) \ - { \ - x1 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s1 = src[x1]; \ - \ - x2 = vx >> 16; \ - vx += unit_x; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* This works because we know that unit_x is positive */ \ - while (vx >= max_vx) \ - vx -= max_vx; \ - } \ - s2 = src[x2]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - \ - if (a2 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - else if (s2) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ - a2 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ - } \ - } \ - \ - if (w & 1) \ - { \ - x1 = vx >> 16; \ - s1 = src[x1]; \ - \ - if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ - { \ - a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ - \ - if (a1 == 0xff) \ - { \ - *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - else if (s1) \ - { \ - d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ - s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ - a1 ^= 0xff; \ - UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ - *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ - } \ - dst++; \ - } \ - else /* PIXMAN_OP_SRC */ \ - { \ - *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ - } \ - } \ -} - -#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ -static void \ -fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dst_x, \ - int32_t dst_y, \ - int32_t width, \ - int32_t height) \ -{ \ - dst_type_t *dst_line; \ - src_type_t *src_first_line; \ - int y; \ - pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ - pixman_fixed_t max_vy; \ - pixman_vector_t v; \ - pixman_fixed_t vx, vy; \ - pixman_fixed_t unit_x, unit_y; \ - int32_t left_pad, right_pad; \ - \ - src_type_t *src; \ - dst_type_t *dst; \ - int src_stride, dst_stride; \ - \ - PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ - /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ - * transformed from destination space to source space */ \ - PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ - \ - /* reference point is the center of the pixel */ \ - v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ - v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ - v.vector[2] = pixman_fixed_1; \ - \ - if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ - return; \ - \ - unit_x = src_image->common.transform->matrix[0][0]; \ - unit_y = src_image->common.transform->matrix[1][1]; \ - \ - /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ - v.vector[0] -= pixman_fixed_e; \ - v.vector[1] -= pixman_fixed_e; \ - \ - vx = v.vector[0]; \ - vy = v.vector[1]; \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - { \ - /* Clamp repeating positions inside the actual samples */ \ - max_vx = src_image->bits.width << 16; \ - max_vy = src_image->bits.height << 16; \ - \ - repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - } \ - \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ - PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ - &width, &left_pad, &right_pad); \ - vx += left_pad * unit_x; \ - } \ - \ - while (--height >= 0) \ - { \ - dst = dst_line; \ - dst_line += dst_stride; \ - \ - y = vy >> 16; \ - vy += unit_y; \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ - repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ - if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ - { \ - repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (dst, src, left_pad, 0, 0, 0); \ - } \ - if (width > 0) \ - { \ - scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \ - right_pad, 0, 0, 0); \ - } \ - } \ - else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ - { \ - static src_type_t zero[1] = { 0 }; \ - if (y < 0 || y >= src_image->bits.height) \ - { \ - scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0); \ - continue; \ - } \ - src = src_first_line + src_stride * y; \ - if (left_pad > 0) \ - { \ - scanline_func (dst, zero, left_pad, 0, 0, 0); \ - } \ - if (width > 0) \ - { \ - scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ - } \ - if (right_pad > 0) \ - { \ - scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0); \ - } \ - } \ - else \ - { \ - src = src_first_line + src_stride * y; \ - scanline_func (dst, src, width, vx, unit_x, max_vx); \ - } \ - } \ -} - -/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ -#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \ - repeat_mode) \ - -#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ - src_type_t, dst_type_t, OP, repeat_mode) \ - FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ - OP, repeat_mode) \ - FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \ - scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ - src_type_t, dst_type_t, repeat_mode) - - -#define SCALED_NEAREST_FLAGS \ - (FAST_PATH_SCALE_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT) - -#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NORMAL_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_PAD_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - (SCALED_NEAREST_FLAGS | \ - FAST_PATH_NONE_REPEAT | \ - FAST_PATH_X_UNIT_POSITIVE), \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ - } - -#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ - { PIXMAN_OP_ ## op, \ - PIXMAN_ ## s, \ - SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ - PIXMAN_null, 0, \ - PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ - fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ - } - -/* Prefer the use of 'cover' variant, because it is faster */ -#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ - SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ - SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) - -#endif +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s1 = src[x1]; \ + \ + x2 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s2 = src[x2]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = vx >> 16; \ + s1 = src[x1]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dst_x, \ + int32_t dst_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* Clamp repeating positions inside the actual samples */ \ + max_vx = src_image->bits.width << 16; \ + max_vy = src_image->bits.height << 16; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = vy >> 16; \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width - 1, \ + right_pad, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +#endif diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 91adc0560..2e135e2fe 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5800,7 +5800,8 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, - pixman_fixed_t max_vx) + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) { uint32_t s, d; const uint32_t* pm = NULL; @@ -5809,6 +5810,9 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; + if (fully_transparent_src) + return; + /* Align dst on a 16-byte boundary */ while (w && ((unsigned long)pd & 15)) { @@ -5894,6 +5898,119 @@ FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, scaled_nearest_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, PAD) +static force_inline void +scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && (unsigned long)dst & 15) + { + uint32_t s = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + if (s) + { + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 dest = _mm_movepi64_pi64 (xmm_mask); + __m64 alpha_dst = unpack_32_1x64 (d); + + *dst = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + uint32_t s = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + if (s) + { + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 mask = _mm_movepi64_pi64 (xmm_mask); + __m64 dest = unpack_32_1x64 (d); + + *dst = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + + _mm_empty (); +} + +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) + static const pixman_fast_path_t sse2_fast_paths[] = { /* PIXMAN_OP_OVER */ @@ -5990,6 +6107,11 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am index 8d8471d1c..3ce466eec 100644 --- a/pixman/test/Makefile.am +++ b/pixman/test/Makefile.am @@ -1,7 +1,6 @@ AM_CFLAGS = @OPENMP_CFLAGS@ -AM_LDFLAGS = @OPENMP_CFLAGS@ - -TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la -lm +AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@ +LDADD = $(top_builddir)/pixman/libpixman-1.la -lm INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman TESTPROGRAMS = \ @@ -22,121 +21,24 @@ TESTPROGRAMS = \ affine-test \ composite -a1_trap_test_LDADD = $(TEST_LDADD) -a1_trap_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -fetch_test_LDADD = $(TEST_LDADD) -fetch_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -trap_crasher_LDADD = $(TEST_LDADD) -trap_crasher_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -oob_test_LDADD = $(TEST_LDADD) -oob_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -scaling_crash_test_LDADD = $(TEST_LDADD) -scaling_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -region_translate_test_LDADD = $(TEST_LDADD) -region_translate_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -pdf_op_test_LDADD = $(TEST_LDADD) -pdf_op_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h - -region_test_LDADD = $(TEST_LDADD) -region_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ region_test_SOURCES = region-test.c utils.c utils.h - -blitters_test_LDADD = $(TEST_LDADD) -blitters_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ blitters_test_SOURCES = blitters-test.c utils.c utils.h - -scaling_test_LDADD = $(TEST_LDADD) -scaling_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ scaling_test_SOURCES = scaling-test.c utils.c utils.h - -affine_test_LDADD = $(TEST_LDADD) -affine_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ affine_test_SOURCES = affine-test.c utils.c utils.h - -alphamap_LDADD = $(TEST_LDADD) -alphamap_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ alphamap_SOURCES = alphamap.c utils.c utils.h - -alpha_loop_LDADD = $(TEST_LDADD) -alpha_loop_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h - -composite_LDADD = $(TEST_LDADD) -composite_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ composite_SOURCES = composite.c utils.c utils.h - -gradient_crash_test_LDADD = $(TEST_LDADD) -gradient_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h - -stress_test_LDADD = $(TEST_LDADD) -stress_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ stress_test_SOURCES = stress-test.c utils.c utils.h -# GTK using test programs - -if HAVE_GTK - -GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS) -GTK_UTILS = gtk-utils.c gtk-utils.h - -TESTPROGRAMS_GTK = \ - clip-test \ - clip-in \ - composite-test \ - gradient-test \ - radial-test \ - alpha-test \ - screen-test \ - convolution-test \ - trap-test - -INCLUDES += $(GTK_CFLAGS) - -gradient_test_LDADD = $(GTK_LDADD) -gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) - -radial_test_LDADD = $(GTK_LDADD) -radial_test_SOURCES = radial-test.c utils.c utils.h $(GTK_UTILS) - -alpha_test_LDADD = $(GTK_LDADD) -alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) - -composite_test_LDADD = $(GTK_LDADD) -composite_test_SOURCES = composite-test.c $(GTK_UTILS) - -clip_test_LDADD = $(GTK_LDADD) -clip_test_SOURCES = clip-test.c $(GTK_UTILS) - -clip_in_LDADD = $(GTK_LDADD) -clip_in_SOURCES = clip-in.c $(GTK_UTILS) - -trap_test_LDADD = $(GTK_LDADD) -trap_test_SOURCES = trap-test.c $(GTK_UTILS) - -screen_test_LDADD = $(GTK_LDADD) -screen_test_SOURCES = screen-test.c $(GTK_UTILS) - -convolution_test_LDADD = $(GTK_LDADD) -convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) - -endif - # Benchmarks BENCHMARKS = \ lowlevel-blt-bench lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h -lowlevel_blt_bench_LDADD = $(TEST_LDADD) -noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK) $(BENCHMARKS) +noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS) TESTS = $(TESTPROGRAMS) diff --git a/pixman/test/alpha-test.c b/pixman/test/alpha-test.c deleted file mode 100644 index 92c208142..000000000 --- a/pixman/test/alpha-test.c +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 400 -#define HEIGHT 200 - - uint32_t *alpha = malloc (WIDTH * HEIGHT * 4); - uint32_t *dest = malloc (WIDTH * HEIGHT * 4); - uint32_t *src = malloc (WIDTH * HEIGHT * 4); - pixman_image_t *grad_img; - pixman_image_t *alpha_img; - pixman_image_t *dest_img; - pixman_image_t *src_img; - int i; - pixman_gradient_stop_t stops[2] = - { - { pixman_int_to_fixed (0), { 0x0000, 0x0000, 0x0000, 0x0000 } }, - { pixman_int_to_fixed (1), { 0xffff, 0x0000, 0x1111, 0xffff } } - }; - pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 }; - pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH), - pixman_int_to_fixed (0) }; -#if 0 - pixman_transform_t trans = { - { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } - } - }; -#else - pixman_transform_t trans = { - { { pixman_fixed_1, 0, 0 }, - { 0, pixman_fixed_1, 0 }, - { 0, 0, pixman_fixed_1 } } - }; -#endif - - pixman_point_fixed_t c_inner; - pixman_point_fixed_t c_outer; - pixman_fixed_t r_inner; - pixman_fixed_t r_outer; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - alpha[i] = 0x4f00004f; /* pale blue */ - - alpha_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - alpha, - WIDTH * 4); - - for (i = 0; i < WIDTH * HEIGHT; ++i) - dest[i] = 0xffffff00; /* yellow */ - - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - dest, - WIDTH * 4); - - for (i = 0; i < WIDTH * HEIGHT; ++i) - src[i] = 0xffff0000; - - src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - src, - WIDTH * 4); - - c_inner.x = pixman_double_to_fixed (50.0); - c_inner.y = pixman_double_to_fixed (50.0); - c_outer.x = pixman_double_to_fixed (50.0); - c_outer.y = pixman_double_to_fixed (50.0); - r_inner = 0; - r_outer = pixman_double_to_fixed (50.0); - -#if 0 - grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, - stops, 2); -#endif -#if 0 - grad_img = pixman_image_create_conical_gradient (&c_inner, r_inner, - stops, 2); - grad_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, - r_inner, r_outer, - stops, 2); -#endif - - grad_img = pixman_image_create_linear_gradient (&p1, &p2, - stops, 2); - - pixman_image_set_transform (grad_img, &trans); - pixman_image_set_repeat (grad_img, PIXMAN_REPEAT_PAD); - - pixman_image_composite (PIXMAN_OP_OVER, grad_img, NULL, alpha_img, - 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); - - pixman_image_set_alpha_map (src_img, alpha_img, 10, 10); - - pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, - 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); - - printf ("0, 0: %x\n", dest[0]); - printf ("10, 10: %x\n", dest[10 * 10 + 10]); - printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); - - show_image (dest_img); - - pixman_image_unref (src_img); - pixman_image_unref (grad_img); - pixman_image_unref (alpha_img); - free (dest); - - return 0; -} diff --git a/pixman/test/clip-in.c b/pixman/test/clip-in.c deleted file mode 100644 index 51579811f..000000000 --- a/pixman/test/clip-in.c +++ /dev/null @@ -1,50 +0,0 @@ -#include -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -/* This test demonstrates that clipping is done totally different depending - * on whether the source is transformed or not. - */ -int -main (int argc, char **argv) -{ -#define WIDTH 200 -#define HEIGHT 200 - -#define SMALL 25 - - uint32_t *sbits = malloc (SMALL * SMALL * 4); - uint32_t *bits = malloc (WIDTH * HEIGHT * 4); - pixman_transform_t trans = { - { - { pixman_double_to_fixed (1.0), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.1), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.1), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } - } }; - - pixman_image_t *src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, SMALL, SMALL, sbits, 4 * SMALL); - pixman_image_t *dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, 4 * WIDTH); - - memset (bits, 0xff, WIDTH * HEIGHT * 4); - memset (sbits, 0x00, SMALL * SMALL * 4); - - pixman_image_composite (PIXMAN_OP_IN, - src_img, NULL, dest_img, - 0, 0, 0, 0, SMALL, SMALL, 200, 200); - - pixman_image_set_transform (src_img, &trans); - - pixman_image_composite (PIXMAN_OP_IN, - src_img, NULL, dest_img, - 0, 0, 0, 0, SMALL * 2, SMALL * 2, 200, 200); - - show_image (dest_img); - - pixman_image_unref (src_img); - pixman_image_unref (dest_img); - free (bits); - - return 0; -} diff --git a/pixman/test/clip-test.c b/pixman/test/clip-test.c deleted file mode 100644 index aa0df4482..000000000 --- a/pixman/test/clip-test.c +++ /dev/null @@ -1,97 +0,0 @@ -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -#define WIDTH 200 -#define HEIGHT 200 - -static pixman_image_t * -create_solid_bits (uint32_t pixel) -{ - uint32_t *pixels = malloc (WIDTH * HEIGHT * 4); - int i; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - pixels[i] = pixel; - - return pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - pixels, - WIDTH * 4); -} - -int -main (int argc, char **argv) -{ - pixman_image_t *gradient_img; - pixman_image_t *src_img, *dst_img; - pixman_gradient_stop_t stops[2] = - { - { pixman_int_to_fixed (0), { 0xffff, 0x0000, 0x0000, 0xffff } }, - { pixman_int_to_fixed (1), { 0xffff, 0xffff, 0x0000, 0xffff } } - }; -#if 0 - pixman_point_fixed_t p1 = { 0, 0 }; - pixman_point_fixed_t p2 = { pixman_int_to_fixed (WIDTH), - pixman_int_to_fixed (HEIGHT) }; -#endif - pixman_point_fixed_t c_inner; - pixman_point_fixed_t c_outer; - pixman_fixed_t r_inner; - pixman_fixed_t r_outer; - pixman_region32_t clip_region; - pixman_transform_t trans = { - { { pixman_double_to_fixed (1.3), pixman_double_to_fixed (0), pixman_double_to_fixed (-0.5), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (1), pixman_double_to_fixed (-0.5), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (0), pixman_double_to_fixed (1.0) } - } - }; - - src_img = create_solid_bits (0xff0000ff); - - c_inner.x = pixman_double_to_fixed (100.0); - c_inner.y = pixman_double_to_fixed (100.0); - c_outer.x = pixman_double_to_fixed (100.0); - c_outer.y = pixman_double_to_fixed (100.0); - r_inner = 0; - r_outer = pixman_double_to_fixed (100.0); - - gradient_img = pixman_image_create_radial_gradient (&c_inner, &c_outer, - r_inner, r_outer, - stops, 2); - -#if 0 - gradient_img = pixman_image_create_linear_gradient (&p1, &p2, - stops, 2); - -#endif - - pixman_image_composite (PIXMAN_OP_OVER, gradient_img, NULL, src_img, - 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - - pixman_region32_init_rect (&clip_region, 50, 0, 100, 200); - pixman_image_set_clip_region32 (src_img, &clip_region); - pixman_image_set_source_clipping (src_img, TRUE); - pixman_image_set_has_client_clip (src_img, TRUE); - pixman_image_set_transform (src_img, &trans); - pixman_image_set_repeat (src_img, PIXMAN_REPEAT_NORMAL); - - dst_img = create_solid_bits (0xffff0000); - pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dst_img, - 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - - -#if 0 - printf ("0, 0: %x\n", src[0]); - printf ("10, 10: %x\n", src[10 * 10 + 10]); - printf ("w, h: %x\n", src[(HEIGHT - 1) * 100 + (WIDTH - 1)]); -#endif - - show_image (dst_img); - - pixman_image_unref (gradient_img); - pixman_image_unref (src_img); - - return 0; -} diff --git a/pixman/test/composite-test.c b/pixman/test/composite-test.c deleted file mode 100644 index 79d5d5eac..000000000 --- a/pixman/test/composite-test.c +++ /dev/null @@ -1,191 +0,0 @@ -#include -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -#define WIDTH 60 -#define HEIGHT 60 - -typedef struct { - const char *name; - pixman_op_t op; -} operator_t; - -static const operator_t operators[] = { - { "CLEAR", PIXMAN_OP_CLEAR }, - { "SRC", PIXMAN_OP_SRC }, - { "DST", PIXMAN_OP_DST }, - { "OVER", PIXMAN_OP_OVER }, - { "OVER_REVERSE", PIXMAN_OP_OVER_REVERSE }, - { "IN", PIXMAN_OP_IN }, - { "IN_REVERSE", PIXMAN_OP_IN_REVERSE }, - { "OUT", PIXMAN_OP_OUT }, - { "OUT_REVERSE", PIXMAN_OP_OUT_REVERSE }, - { "ATOP", PIXMAN_OP_ATOP }, - { "ATOP_REVERSE", PIXMAN_OP_ATOP_REVERSE }, - { "XOR", PIXMAN_OP_XOR }, - { "ADD", PIXMAN_OP_ADD }, - { "SATURATE", PIXMAN_OP_SATURATE }, - - { "MULTIPLY", PIXMAN_OP_MULTIPLY }, - { "SCREEN", PIXMAN_OP_SCREEN }, - { "OVERLAY", PIXMAN_OP_OVERLAY }, - { "DARKEN", PIXMAN_OP_DARKEN }, - { "LIGHTEN", PIXMAN_OP_LIGHTEN }, - { "COLOR_DODGE", PIXMAN_OP_COLOR_DODGE }, - { "COLOR_BURN", PIXMAN_OP_COLOR_BURN }, - { "HARD_LIGHT", PIXMAN_OP_HARD_LIGHT }, - { "SOFT_LIGHT", PIXMAN_OP_SOFT_LIGHT }, - { "DIFFERENCE", PIXMAN_OP_DIFFERENCE }, - { "EXCLUSION", PIXMAN_OP_EXCLUSION }, - { "HSL_HUE", PIXMAN_OP_HSL_HUE }, - { "HSL_SATURATION", PIXMAN_OP_HSL_SATURATION }, - { "HSL_COLOR", PIXMAN_OP_HSL_COLOR }, - { "HSL_LUMINOSITY", PIXMAN_OP_HSL_LUMINOSITY }, -}; - -static uint32_t -reader (const void *src, int size) -{ - switch (size) - { - case 1: - return *(uint8_t *)src; - case 2: - return *(uint16_t *)src; - case 4: - return *(uint32_t *)src; - default: - g_assert_not_reached(); - } -} - -static void -writer (void *src, uint32_t value, int size) -{ - switch (size) - { - case 1: - *(uint8_t *)src = value; - break; - - case 2: - *(uint16_t *)src = value; - break; - - case 4: - *(uint32_t *)src = value; - break; - - default: - break; - } -} - -int -main (int argc, char **argv) -{ -#define d2f pixman_double_to_fixed - - GtkWidget *window, *swindow; - GtkWidget *table; - uint32_t *dest = malloc (WIDTH * HEIGHT * 4); - uint32_t *src = malloc (WIDTH * HEIGHT * 4); - pixman_image_t *src_img; - pixman_image_t *dest_img; - pixman_point_fixed_t p1 = { -10 << 0, 0 }; - pixman_point_fixed_t p2 = { WIDTH << 16, (HEIGHT - 10) << 16 }; - uint16_t full = 0xcfff; - uint16_t low = 0x5000; - uint16_t alpha = 0xffff; - pixman_gradient_stop_t stops[6] = - { - { d2f (0.0), { full, low, low, alpha } }, - { d2f (0.25), { full, full, low, alpha } }, - { d2f (0.4), { low, full, low, alpha } }, - { d2f (0.6), { low, full, full, alpha } }, - { d2f (0.8), { low, low, full, alpha } }, - { d2f (1.0), { full, low, full, alpha } }, - }; - - int i; - - gtk_init (&argc, &argv); - - window = gtk_window_new (GTK_WINDOW_TOPLEVEL); - - gtk_window_set_default_size (GTK_WINDOW (window), 800, 600); - - g_signal_connect (window, "delete-event", - G_CALLBACK (gtk_main_quit), - NULL); - table = gtk_table_new (G_N_ELEMENTS (operators) / 6, 6, TRUE); - - src_img = pixman_image_create_linear_gradient (&p1, &p2, stops, - sizeof (stops) / sizeof (stops[0])); - - pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD); - - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - dest, - WIDTH * 4); - pixman_image_set_accessors (dest_img, reader, writer); - - for (i = 0; i < G_N_ELEMENTS (operators); ++i) - { - GtkWidget *image; - GdkPixbuf *pixbuf; - GtkWidget *vbox; - GtkWidget *label; - int j, k; - - vbox = gtk_vbox_new (FALSE, 0); - - label = gtk_label_new (operators[i].name); - gtk_box_pack_start (GTK_BOX (vbox), label, FALSE, FALSE, 6); - gtk_widget_show (label); - - for (j = 0; j < HEIGHT; ++j) - { - for (k = 0; k < WIDTH; ++k) - dest[j * WIDTH + k] = 0x7f6f6f00; - } - pixman_image_composite (operators[i].op, src_img, NULL, dest_img, - 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - pixbuf = pixbuf_from_argb32 (pixman_image_get_data (dest_img), TRUE, - WIDTH, HEIGHT, WIDTH * 4); - image = gtk_image_new_from_pixbuf (pixbuf); - gtk_box_pack_start (GTK_BOX (vbox), image, FALSE, FALSE, 0); - gtk_widget_show (image); - - gtk_table_attach_defaults (GTK_TABLE (table), vbox, - i % 6, (i % 6) + 1, i / 6, (i / 6) + 1); - gtk_widget_show (vbox); - - g_object_unref (pixbuf); - } - - pixman_image_unref (src_img); - free (src); - pixman_image_unref (dest_img); - free (dest); - - swindow = gtk_scrolled_window_new (NULL, NULL); - gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), - GTK_POLICY_AUTOMATIC, - GTK_POLICY_AUTOMATIC); - - gtk_scrolled_window_add_with_viewport (GTK_SCROLLED_WINDOW (swindow), table); - gtk_widget_show (table); - - gtk_container_add (GTK_CONTAINER (window), swindow); - gtk_widget_show (swindow); - - gtk_widget_show (window); - - gtk_main (); - - return 0; -} diff --git a/pixman/test/convolution-test.c b/pixman/test/convolution-test.c deleted file mode 100644 index da284af7b..000000000 --- a/pixman/test/convolution-test.c +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 200 -#define HEIGHT 200 - -#define d2f pixman_double_to_fixed - - uint32_t *src = malloc (WIDTH * HEIGHT * 4); - uint32_t *mask = malloc (WIDTH * HEIGHT * 4); - uint32_t *dest = malloc (WIDTH * HEIGHT * 4); - pixman_fixed_t convolution[] = - { - d2f (3), d2f (3), - d2f (0.5), d2f (0.5), d2f (0.5), - d2f (0.5), d2f (0.5), d2f (0.5), - d2f (0.5), d2f (0.5), d2f (0.5), - }; - pixman_image_t *simg, *mimg, *dimg; - - int i; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - { - src[i] = 0x7f007f00; - mask[i] = (i % 256) * 0x01000000; - dest[i] = 0; - } - - simg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src, WIDTH * 4); - mimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, mask, WIDTH * 4); - dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, dest, WIDTH * 4); - - pixman_image_set_filter (mimg, PIXMAN_FILTER_CONVOLUTION, - convolution, 11); - - pixman_image_composite (PIXMAN_OP_OVER, simg, mimg, dimg, 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - - show_image (dimg); - - return 0; -} diff --git a/pixman/test/gradient-test.c b/pixman/test/gradient-test.c deleted file mode 100644 index fc84844b0..000000000 --- a/pixman/test/gradient-test.c +++ /dev/null @@ -1,89 +0,0 @@ -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 400 -#define HEIGHT 200 - - uint32_t *dest = malloc (WIDTH * HEIGHT * 4); - pixman_image_t *src_img; - pixman_image_t *dest_img; - int i; - pixman_gradient_stop_t stops[2] = - { - { pixman_int_to_fixed (0), { 0xffff, 0xeeee, 0xeeee, 0xeeee } }, - { pixman_int_to_fixed (1), { 0xffff, 0x1111, 0x1111, 0x1111 } } - }; - pixman_point_fixed_t p1 = { pixman_double_to_fixed (0), 0 }; - pixman_point_fixed_t p2 = { pixman_double_to_fixed (WIDTH / 8.), - pixman_int_to_fixed (0) }; -#if 0 - pixman_transform_t trans = { - { { pixman_double_to_fixed (2), pixman_double_to_fixed (0.5), pixman_double_to_fixed (-100), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (3), pixman_double_to_fixed (0), }, - { pixman_double_to_fixed (0), pixman_double_to_fixed (0.000), pixman_double_to_fixed (1.0) } - } - }; -#else - pixman_transform_t trans = { - { { pixman_fixed_1, 0, 0 }, - { 0, pixman_fixed_1, 0 }, - { 0, 0, pixman_fixed_1 } } - }; -#endif - - pixman_point_fixed_t c_inner; - pixman_point_fixed_t c_outer; - pixman_fixed_t r_inner; - pixman_fixed_t r_outer; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - dest[i] = 0x4f00004f; /* pale blue */ - - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - dest, - WIDTH * 4); - - c_inner.x = pixman_double_to_fixed (50.0); - c_inner.y = pixman_double_to_fixed (50.0); - c_outer.x = pixman_double_to_fixed (50.0); - c_outer.y = pixman_double_to_fixed (50.0); - r_inner = 0; - r_outer = pixman_double_to_fixed (50.0); - - src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, - stops, 2); -#if 0 - src_img = pixman_image_create_conical_gradient (&c_inner, r_inner, - stops, 2); - src_img = pixman_image_create_linear_gradient (&c_inner, &c_outer, - r_inner, r_outer, - stops, 2); -#endif - - src_img = pixman_image_create_linear_gradient (&p1, &p2, - stops, 2); - - pixman_image_set_transform (src_img, &trans); - pixman_image_set_repeat (src_img, PIXMAN_REPEAT_PAD); - - pixman_image_composite (PIXMAN_OP_OVER, src_img, NULL, dest_img, - 0, 0, 0, 0, 0, 0, 10 * WIDTH, HEIGHT); - - printf ("0, 0: %x\n", dest[0]); - printf ("10, 10: %x\n", dest[10 * 10 + 10]); - printf ("w, h: %x\n", dest[(HEIGHT - 1) * 100 + (WIDTH - 1)]); - - show_image (dest_img); - - pixman_image_unref (src_img); - pixman_image_unref (dest_img); - free (dest); - - return 0; -} diff --git a/pixman/test/gtk-utils.c b/pixman/test/gtk-utils.c deleted file mode 100644 index f45cdc912..000000000 --- a/pixman/test/gtk-utils.c +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include "pixman-private.h" /* For image->bits.format - * FIXME: there should probably be public API for this - */ -#include "gtk-utils.h" - -GdkPixbuf * -pixbuf_from_argb32 (uint32_t *bits, - gboolean has_alpha, - int width, - int height, - int stride) -{ - GdkPixbuf *pixbuf = gdk_pixbuf_new (GDK_COLORSPACE_RGB, TRUE, - 8, width, height); - int p_stride = gdk_pixbuf_get_rowstride (pixbuf); - guint32 *p_bits = (guint32 *)gdk_pixbuf_get_pixels (pixbuf); - int w, h; - - for (h = 0; h < height; ++h) - { - for (w = 0; w < width; ++w) - { - uint32_t argb = bits[h * (stride / 4) + w]; - guint r, g, b, a; - char *pb = (char *)p_bits; - - pb += h * p_stride + w * 4; - - r = (argb & 0x00ff0000) >> 16; - g = (argb & 0x0000ff00) >> 8; - b = (argb & 0x000000ff) >> 0; - a = has_alpha? (argb & 0xff000000) >> 24 : 0xff; - - if (a) - { - r = (r * 255) / a; - g = (g * 255) / a; - b = (b * 255) / a; - } - - if (r > 255) r = 255; - if (g > 255) g = 255; - if (b > 255) b = 255; - - pb[0] = r; - pb[1] = g; - pb[2] = b; - pb[3] = a; - } - } - - return pixbuf; -} - - -static gboolean -on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data) -{ - GdkPixbuf *pixbuf = data; - - gdk_draw_pixbuf (widget->window, NULL, - pixbuf, 0, 0, 0, 0, - gdk_pixbuf_get_width (pixbuf), - gdk_pixbuf_get_height (pixbuf), - GDK_RGB_DITHER_NONE, - 0, 0); - - return TRUE; -} - -void -show_image (pixman_image_t *image) -{ - GtkWidget *window; - GdkPixbuf *pixbuf; - int width, height, stride; - int argc; - char **argv; - char *arg0 = g_strdup ("pixman-test-program"); - gboolean has_alpha; - pixman_format_code_t format; - - argc = 1; - argv = (char **)&arg0; - - gtk_init (&argc, &argv); - - window = gtk_window_new (GTK_WINDOW_TOPLEVEL); - width = pixman_image_get_width (image); - height = pixman_image_get_height (image); - stride = pixman_image_get_stride (image); - - gtk_window_set_default_size (GTK_WINDOW (window), width, height); - - format = image->bits.format; - - if (format == PIXMAN_a8r8g8b8) - has_alpha = TRUE; - else if (format == PIXMAN_x8r8g8b8) - has_alpha = FALSE; - else - g_error ("Can't deal with this format: %x\n", format); - - pixbuf = pixbuf_from_argb32 (pixman_image_get_data (image), has_alpha, - width, height, stride); - - g_signal_connect (window, "expose_event", G_CALLBACK (on_expose), pixbuf); - g_signal_connect (window, "delete_event", G_CALLBACK (gtk_main_quit), NULL); - - gtk_widget_show (window); - - gtk_main (); -} diff --git a/pixman/test/gtk-utils.h b/pixman/test/gtk-utils.h deleted file mode 100644 index 2cb13bcf0..000000000 --- a/pixman/test/gtk-utils.h +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include -#include -#include "pixman.h" - -void show_image (pixman_image_t *image); - -GdkPixbuf *pixbuf_from_argb32 (uint32_t *bits, - gboolean has_alpha, - int width, - int height, - int stride); diff --git a/pixman/test/radial-test.c b/pixman/test/radial-test.c deleted file mode 100644 index 5d716c339..000000000 --- a/pixman/test/radial-test.c +++ /dev/null @@ -1,198 +0,0 @@ -#include "utils.h" -#include "gtk-utils.h" - -#define NUM_GRADIENTS 7 -#define NUM_STOPS 3 -#define NUM_REPEAT 4 -#define SIZE 128 -#define WIDTH (SIZE * NUM_GRADIENTS) -#define HEIGHT (SIZE * NUM_REPEAT) - -/* - * We want to test all the possible relative positions of the start - * and end circle: - * - * - The start circle can be smaller/equal/bigger than the end - * circle. A radial gradient can be classified in one of these - * three cases depending on the sign of dr. - * - * - The smaller circle can be completely inside/internally - * tangent/outside (at least in part) of the bigger circle. This - * classification is the same as the one which can be computed by - * examining the sign of a = (dx^2 + dy^2 - dr^2). - * - * - If the two circles have the same size, neither can be inside or - * internally tangent - * - * This test draws radial gradients whose circles always have the same - * centers (0, 0) and (1, 0), but with different radiuses. From left - * to right: - * - * - Small start circle completely inside the end circle - * 0.25 -> 1.75; dr = 1.5 > 0; a = 1 - 1.50^2 < 0 - * - * - Small start circle internally tangent to the end circle - * 0.50 -> 1.50; dr = 1.0 > 0; a = 1 - 1.00^2 = 0 - * - * - Small start circle outside of the end circle - * 0.50 -> 1.00; dr = 0.5 > 0; a = 1 - 0.50^2 > 0 - * - * - Start circle with the same size as the end circle - * 1.00 -> 1.00; dr = 0.0 = 0; a = 1 - 0.00^2 > 0 - * - * - Small end circle outside of the start circle - * 1.00 -> 0.50; dr = -0.5 > 0; a = 1 - 0.50^2 > 0 - * - * - Small end circle internally tangent to the start circle - * 1.50 -> 0.50; dr = -1.0 > 0; a = 1 - 1.00^2 = 0 - * - * - Small end circle completely inside the start circle - * 1.75 -> 0.25; dr = -1.5 > 0; a = 1 - 1.50^2 < 0 - * - */ - -const static double radiuses[NUM_GRADIENTS] = { - 0.25, - 0.50, - 0.50, - 1.00, - 1.00, - 1.50, - 1.75 -}; - -#define double_to_color(x) \ - (((uint32_t) ((x)*65536)) - (((uint32_t) ((x)*65536)) >> 16)) - -#define PIXMAN_STOP(offset,r,g,b,a) \ - { pixman_double_to_fixed (offset), \ - { \ - double_to_color (r), \ - double_to_color (g), \ - double_to_color (b), \ - double_to_color (a) \ - } \ - } - -static const pixman_gradient_stop_t stops[NUM_STOPS] = { - PIXMAN_STOP (0.0, 1, 0, 0, 0.75), - PIXMAN_STOP (0.70710678, 0, 1, 0, 0), - PIXMAN_STOP (1.0, 0, 0, 1, 1) -}; - -static pixman_image_t * -create_radial (int index) -{ - pixman_point_fixed_t p0, p1; - pixman_fixed_t r0, r1; - double x0, x1, radius0, radius1, left, right, center; - - x0 = 0; - x1 = 1; - radius0 = radiuses[index]; - radius1 = radiuses[NUM_GRADIENTS - index - 1]; - - /* center the gradient */ - left = MIN (x0 - radius0, x1 - radius1); - right = MAX (x0 + radius0, x1 + radius1); - center = (left + right) * 0.5; - x0 -= center; - x1 -= center; - - /* scale to make it fit within a 1x1 rect centered in (0,0) */ - x0 *= 0.25; - x1 *= 0.25; - radius0 *= 0.25; - radius1 *= 0.25; - - p0.x = pixman_double_to_fixed (x0); - p0.y = pixman_double_to_fixed (0); - - p1.x = pixman_double_to_fixed (x1); - p1.y = pixman_double_to_fixed (0); - - r0 = pixman_double_to_fixed (radius0); - r1 = pixman_double_to_fixed (radius1); - - return pixman_image_create_radial_gradient (&p0, &p1, - r0, r1, - stops, NUM_STOPS); -} - -static const pixman_repeat_t repeat[NUM_REPEAT] = { - PIXMAN_REPEAT_NONE, - PIXMAN_REPEAT_NORMAL, - PIXMAN_REPEAT_REFLECT, - PIXMAN_REPEAT_PAD -}; - -int -main (int argc, char **argv) -{ - pixman_transform_t transform; - pixman_image_t *src_img, *dest_img; - int i, j; - - enable_fp_exceptions (); - - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, - WIDTH, HEIGHT, - NULL, 0); - - pixman_transform_init_identity (&transform); - - /* - * The create_radial() function returns gradients centered in the - * origin and whose interesting part fits a 1x1 square. We want to - * paint these gradients on a SIZExSIZE square and to make things - * easier we want the origin in the top-left corner of the square - * we want to see. - */ - pixman_transform_translate (NULL, &transform, - pixman_double_to_fixed (0.5), - pixman_double_to_fixed (0.5)); - - pixman_transform_scale (NULL, &transform, - pixman_double_to_fixed (SIZE), - pixman_double_to_fixed (SIZE)); - - /* - * Gradients are evaluated at the center of each pixel, so we need - * to translate by half a pixel to trigger some interesting - * cornercases. In particular, the original implementation of PDF - * radial gradients tried to divide by 0 when using this transform - * on the "tangent circles" cases. - */ - pixman_transform_translate (NULL, &transform, - pixman_double_to_fixed (0.5), - pixman_double_to_fixed (0.5)); - - for (i = 0; i < NUM_GRADIENTS; i++) - { - src_img = create_radial (i); - pixman_image_set_transform (src_img, &transform); - - for (j = 0; j < NUM_REPEAT; j++) - { - pixman_image_set_repeat (src_img, repeat[j]); - - pixman_image_composite32 (PIXMAN_OP_OVER, - src_img, - NULL, - dest_img, - 0, 0, - 0, 0, - i * SIZE, j * SIZE, - SIZE, SIZE); - - } - - pixman_image_unref (src_img); - } - - show_image (dest_img); - - pixman_image_unref (dest_img); - - return 0; -} diff --git a/pixman/test/scaling-test.c b/pixman/test/scaling-test.c index 7b78017a3..dbb9d39b0 100644 --- a/pixman/test/scaling-test.c +++ b/pixman/test/scaling-test.c @@ -1,250 +1,368 @@ -/* - * Test program, which can detect some problems with nearest neighbour - * and bilinear scaling in pixman. Testing is done by running lots - * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8 - * and r5g6b5 color formats. - * - * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in - * the case of test failure. - */ -#include -#include -#include -#include "utils.h" - -#define MAX_SRC_WIDTH 16 -#define MAX_SRC_HEIGHT 16 -#define MAX_DST_WIDTH 16 -#define MAX_DST_HEIGHT 16 -#define MAX_STRIDE 4 - -/* - * Composite operation with pseudorandom images - */ -uint32_t -test_composite (int testnum, - int verbose) -{ - int i; - pixman_image_t * src_img; - pixman_image_t * dst_img; - pixman_transform_t transform; - pixman_region16_t clip; - int src_width, src_height; - int dst_width, dst_height; - int src_stride, dst_stride; - int src_x, src_y; - int dst_x, dst_y; - int src_bpp; - int dst_bpp; - int w, h; - pixman_fixed_t scale_x = 65536, scale_y = 65536; - pixman_fixed_t translate_x = 0, translate_y = 0; - pixman_op_t op; - pixman_repeat_t repeat = PIXMAN_REPEAT_NONE; - pixman_format_code_t src_fmt, dst_fmt; - uint32_t * srcbuf; - uint32_t * dstbuf; - uint32_t crc32; - FLOAT_REGS_CORRUPTION_DETECTOR_START (); - - lcg_srand (testnum); - - src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; - op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER; - - src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; - src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; - dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; - dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; - src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; - dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; - - if (src_stride & 3) - src_stride += 2; - - if (dst_stride & 3) - dst_stride += 2; - - src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); - src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); - dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); - dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); - w = lcg_rand_n (dst_width * 3 / 2 - dst_x); - h = lcg_rand_n (dst_height * 3 / 2 - dst_y); - - srcbuf = (uint32_t *)malloc (src_stride * src_height); - dstbuf = (uint32_t *)malloc (dst_stride * dst_height); - - for (i = 0; i < src_stride * src_height; i++) - *((uint8_t *)srcbuf + i) = lcg_rand_n (256); - - for (i = 0; i < dst_stride * dst_height; i++) - *((uint8_t *)dstbuf + i) = lcg_rand_n (256); - - src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ? - PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; - - dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ? - PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; - - src_img = pixman_image_create_bits ( - src_fmt, src_width, src_height, srcbuf, src_stride); - - dst_img = pixman_image_create_bits ( - dst_fmt, dst_width, dst_height, dstbuf, dst_stride); - - image_endian_swap (src_img, src_bpp * 8); - image_endian_swap (dst_img, dst_bpp * 8); - - if (lcg_rand_n (8) > 0) - { - scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); - scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); - translate_x = lcg_rand_N (65536); - translate_y = lcg_rand_N (65536); - pixman_transform_init_scale (&transform, scale_x, scale_y); - pixman_transform_translate (&transform, NULL, translate_x, translate_y); - pixman_image_set_transform (src_img, &transform); - } - - switch (lcg_rand_n (4)) - { - case 0: - repeat = PIXMAN_REPEAT_NONE; - break; - - case 1: - repeat = PIXMAN_REPEAT_NORMAL; - break; - - case 2: - repeat = PIXMAN_REPEAT_PAD; - break; - - case 3: - repeat = PIXMAN_REPEAT_REFLECT; - break; - - default: - break; - } - pixman_image_set_repeat (src_img, repeat); - - if (lcg_rand_n (2)) - pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); - else - pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); - - if (verbose) - { - printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); - printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", - op, scale_x, scale_y, repeat); - printf ("translate_x=%d, translate_y=%d\n", - translate_x, translate_y); - printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", - src_width, src_height, dst_width, dst_height); - printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", - src_x, src_y, dst_x, dst_y); - printf ("w=%d, h=%d\n", w, h); - } - - if (lcg_rand_n (8) == 0) - { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; - - for (i = 0; i < n; i++) - { - clip_boxes[i].x1 = lcg_rand_n (src_width); - clip_boxes[i].y1 = lcg_rand_n (src_height); - clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); - clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); - - if (verbose) - { - printf ("source clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - - pixman_region_init_rects (&clip, clip_boxes, n); - pixman_image_set_clip_region (src_img, &clip); - pixman_image_set_source_clipping (src_img, 1); - pixman_region_fini (&clip); - } - - if (lcg_rand_n (8) == 0) - { - pixman_box16_t clip_boxes[2]; - int n = lcg_rand_n (2) + 1; - for (i = 0; i < n; i++) - { - clip_boxes[i].x1 = lcg_rand_n (dst_width); - clip_boxes[i].y1 = lcg_rand_n (dst_height); - clip_boxes[i].x2 = - clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); - clip_boxes[i].y2 = - clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); - - if (verbose) - { - printf ("destination clip box: [%d,%d-%d,%d]\n", - clip_boxes[i].x1, clip_boxes[i].y1, - clip_boxes[i].x2, clip_boxes[i].y2); - } - } - pixman_region_init_rects (&clip, clip_boxes, n); - pixman_image_set_clip_region (dst_img, &clip); - pixman_region_fini (&clip); - } - - pixman_image_composite (op, src_img, NULL, dst_img, - src_x, src_y, 0, 0, dst_x, dst_y, w, h); - - if (dst_fmt == PIXMAN_x8r8g8b8) - { - /* ignore unused part */ - for (i = 0; i < dst_stride * dst_height / 4; i++) - dstbuf[i] &= 0xFFFFFF; - } - - image_endian_swap (dst_img, dst_bpp * 8); - - if (verbose) - { - int j; - - for (i = 0; i < dst_height; i++) - { - for (j = 0; j < dst_stride; j++) - printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); - - printf ("\n"); - } - } - - pixman_image_unref (src_img); - pixman_image_unref (dst_img); - - crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); - free (srcbuf); - free (dstbuf); - - FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); - return crc32; -} - -int -main (int argc, const char *argv[]) -{ - pixman_disable_out_of_bounds_workaround (); - - return fuzzer_test_main("scaling", 8000000, 0x7F1AB59F, - test_composite, argc, argv); -} +/* + * Test program, which can detect some problems with nearest neighbour + * and bilinear scaling in pixman. Testing is done by running lots + * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8 + * and r5g6b5 color formats. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include +#include +#include +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 8 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 8 +#define MAX_STRIDE 4 + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * mask_img; + pixman_image_t * dst_img; + pixman_transform_t transform; + pixman_region16_t clip; + int src_width, src_height; + int mask_width, mask_height; + int dst_width, dst_height; + int src_stride, mask_stride, dst_stride; + int src_x, src_y; + int mask_x, mask_y; + int dst_x, dst_y; + int src_bpp; + int mask_bpp = 1; + int dst_bpp; + int w, h; + pixman_fixed_t scale_x = 65536, scale_y = 65536; + pixman_fixed_t translate_x = 0, translate_y = 0; + pixman_fixed_t mask_scale_x = 65536, mask_scale_y = 65536; + pixman_fixed_t mask_translate_x = 0, mask_translate_y = 0; + pixman_op_t op; + pixman_repeat_t repeat = PIXMAN_REPEAT_NONE; + pixman_repeat_t mask_repeat = PIXMAN_REPEAT_NONE; + pixman_format_code_t src_fmt, dst_fmt; + uint32_t * srcbuf; + uint32_t * dstbuf; + uint32_t * maskbuf; + uint32_t crc32; + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + lcg_srand (testnum); + + src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + switch (lcg_rand_n (3)) + { + case 0: + op = PIXMAN_OP_SRC; + break; + case 1: + op = PIXMAN_OP_OVER; + break; + default: + op = PIXMAN_OP_ADD; + break; + } + + src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + + if (lcg_rand_n (2)) + { + mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + } + else + { + mask_width = mask_height = 1; + } + + dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; + dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp; + dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + + if (src_stride & 3) + src_stride += 2; + + if (mask_stride & 1) + mask_stride += 1; + if (mask_stride & 2) + mask_stride += 2; + + if (dst_stride & 3) + dst_stride += 2; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2); + mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2); + dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + w = lcg_rand_n (dst_width * 3 / 2 - dst_x); + h = lcg_rand_n (dst_height * 3 / 2 - dst_y); + + srcbuf = (uint32_t *)malloc (src_stride * src_height); + maskbuf = (uint32_t *)malloc (mask_stride * mask_height); + dstbuf = (uint32_t *)malloc (dst_stride * dst_height); + + for (i = 0; i < src_stride * src_height; i++) + *((uint8_t *)srcbuf + i) = lcg_rand_n (256); + + for (i = 0; i < mask_stride * mask_height; i++) + *((uint8_t *)maskbuf + i) = lcg_rand_n (256); + + for (i = 0; i < dst_stride * dst_height; i++) + *((uint8_t *)dstbuf + i) = lcg_rand_n (256); + + src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + src_img = pixman_image_create_bits ( + src_fmt, src_width, src_height, srcbuf, src_stride); + + mask_img = pixman_image_create_bits ( + PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride); + + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + + image_endian_swap (src_img, src_bpp * 8); + image_endian_swap (dst_img, dst_bpp * 8); + + if (lcg_rand_n (4) > 0) + { + scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); + scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); + translate_x = lcg_rand_N (65536); + translate_y = lcg_rand_N (65536); + pixman_transform_init_scale (&transform, scale_x, scale_y); + pixman_transform_translate (&transform, NULL, translate_x, translate_y); + pixman_image_set_transform (src_img, &transform); + } + + if (lcg_rand_n (2) > 0) + { + mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); + mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); + mask_translate_x = lcg_rand_N (65536); + mask_translate_y = lcg_rand_N (65536); + pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y); + pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y); + pixman_image_set_transform (mask_img, &transform); + } + + switch (lcg_rand_n (4)) + { + case 0: + mask_repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + mask_repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + mask_repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + mask_repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (mask_img, mask_repeat); + + switch (lcg_rand_n (4)) + { + case 0: + repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (src_img, repeat); + + if (lcg_rand_n (2)) + pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (lcg_rand_n (2)) + pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (verbose) + { + printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); + printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", + op, scale_x, scale_y, repeat); + printf ("translate_x=%d, translate_y=%d\n", + translate_x, translate_y); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("w=%d, h=%d\n", w, h); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (mask_width); + clip_boxes[i].y1 = lcg_rand_n (mask_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("mask clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (mask_img, &clip); + pixman_image_set_source_clipping (mask_img, 1); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (dst_width); + clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (2) == 0) + pixman_image_composite (op, src_img, NULL, dst_img, + src_x, src_y, 0, 0, dst_x, dst_y, w, h); + else + pixman_image_composite (op, src_img, mask_img, dst_img, + src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); + + if (dst_fmt == PIXMAN_x8r8g8b8) + { + /* ignore unused part */ + for (i = 0; i < dst_stride * dst_height / 4; i++) + dstbuf[i] &= 0xFFFFFF; + } + + image_endian_swap (dst_img, dst_bpp * 8); + + if (verbose) + { + int j; + + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); + + printf ("\n"); + } + } + + pixman_image_unref (src_img); + pixman_image_unref (mask_img); + pixman_image_unref (dst_img); + + crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); + free (srcbuf); + free (maskbuf); + free (dstbuf); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + pixman_disable_out_of_bounds_workaround (); + + return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2, + test_composite, argc, argv); +} diff --git a/pixman/test/screen-test.c b/pixman/test/screen-test.c deleted file mode 100644 index e69dba3de..000000000 --- a/pixman/test/screen-test.c +++ /dev/null @@ -1,44 +0,0 @@ -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 40 -#define HEIGHT 40 - - uint32_t *src1 = malloc (WIDTH * HEIGHT * 4); - uint32_t *src2 = malloc (WIDTH * HEIGHT * 4); - uint32_t *src3 = malloc (WIDTH * HEIGHT * 4); - uint32_t *dest = malloc (3 * WIDTH * 2 * HEIGHT * 4); - pixman_image_t *simg1, *simg2, *simg3, *dimg; - - int i; - - for (i = 0; i < WIDTH * HEIGHT; ++i) - { - src1[i] = 0x7ff00000; - src2[i] = 0x7f00ff00; - src3[i] = 0x7f0000ff; - } - - for (i = 0; i < 3 * WIDTH * 2 * HEIGHT; ++i) - { - dest[i] = 0x0; - } - - simg1 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src1, WIDTH * 4); - simg2 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src2, WIDTH * 4); - simg3 = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src3, WIDTH * 4); - dimg = pixman_image_create_bits (PIXMAN_a8r8g8b8, 3 * WIDTH, 2 * HEIGHT, dest, 3 * WIDTH * 4); - - pixman_image_composite (PIXMAN_OP_SCREEN, simg1, NULL, dimg, 0, 0, 0, 0, WIDTH, HEIGHT / 4, WIDTH, HEIGHT); - pixman_image_composite (PIXMAN_OP_SCREEN, simg2, NULL, dimg, 0, 0, 0, 0, (WIDTH/2), HEIGHT / 4 + HEIGHT / 2, WIDTH, HEIGHT); - pixman_image_composite (PIXMAN_OP_SCREEN, simg3, NULL, dimg, 0, 0, 0, 0, (4 * WIDTH) / 3, HEIGHT, WIDTH, HEIGHT); - - show_image (dimg); - - return 0; -} diff --git a/pixman/test/trap-test.c b/pixman/test/trap-test.c deleted file mode 100644 index 19295e7a5..000000000 --- a/pixman/test/trap-test.c +++ /dev/null @@ -1,49 +0,0 @@ -#include -#include -#include -#include "pixman.h" -#include "gtk-utils.h" - -int -main (int argc, char **argv) -{ -#define WIDTH 200 -#define HEIGHT 200 - - pixman_image_t *src_img; - pixman_image_t *mask_img; - pixman_image_t *dest_img; - pixman_trap_t trap; - pixman_color_t white = { 0x0000, 0xffff, 0x0000, 0xffff }; - uint32_t *bits = malloc (WIDTH * HEIGHT * 4); - uint32_t *mbits = malloc (WIDTH * HEIGHT); - - memset (mbits, 0, WIDTH * HEIGHT); - memset (bits, 0xff, WIDTH * HEIGHT * 4); - - trap.top.l = pixman_int_to_fixed (50) + 0x8000; - trap.top.r = pixman_int_to_fixed (150) + 0x8000; - trap.top.y = pixman_int_to_fixed (30); - - trap.bot.l = pixman_int_to_fixed (50) + 0x8000; - trap.bot.r = pixman_int_to_fixed (150) + 0x8000; - trap.bot.y = pixman_int_to_fixed (150); - - mask_img = pixman_image_create_bits (PIXMAN_a8, WIDTH, HEIGHT, mbits, WIDTH); - src_img = pixman_image_create_solid_fill (&white); - dest_img = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, bits, WIDTH * 4); - - pixman_add_traps (mask_img, 0, 0, 1, &trap); - - pixman_image_composite (PIXMAN_OP_OVER, - src_img, mask_img, dest_img, - 0, 0, 0, 0, 0, 0, WIDTH, HEIGHT); - - show_image (dest_img); - - pixman_image_unref (src_img); - pixman_image_unref (dest_img); - free (bits); - - return 0; -} -- cgit v1.2.3