diff options
41 files changed, 17829 insertions, 17375 deletions
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp index 2841fb350..2a3084896 100644 --- a/mesalib/src/glsl/ir_constant_expression.cpp +++ b/mesalib/src/glsl/ir_constant_expression.cpp @@ -288,24 +288,20 @@ ir_expression::constant_expression_value() break; case ir_unop_rcp: - /* FINISHME: Emit warning when division-by-zero is detected. */ assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { switch (this->type->base_type) { case GLSL_TYPE_UINT: - if (op[0]->value.u[c] == 0.0) - return NULL; - data.u[c] = 1 / op[0]->value.u[c]; + if (op[0]->value.u[c] != 0.0) + data.u[c] = 1 / op[0]->value.u[c]; break; case GLSL_TYPE_INT: - if (op[0]->value.i[c] == 0.0) - return NULL; - data.i[c] = 1 / op[0]->value.i[c]; + if (op[0]->value.i[c] != 0.0) + data.i[c] = 1 / op[0]->value.i[c]; break; case GLSL_TYPE_FLOAT: - if (op[0]->value.f[c] == 0.0) - return NULL; - data.f[c] = 1.0F / op[0]->value.f[c]; + if (op[0]->value.f[c] != 0.0) + data.f[c] = 1.0F / op[0]->value.f[c]; break; default: assert(0); @@ -314,13 +310,9 @@ ir_expression::constant_expression_value() break; case ir_unop_rsq: - /* FINISHME: Emit warning when division-by-zero is detected. */ assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); for (unsigned c = 0; c < op[0]->type->components(); c++) { - float s = sqrtf(op[0]->value.f[c]); - if (s == 0) - return NULL; - data.f[c] = 1.0F / s; + data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); } break; @@ -523,18 +515,20 @@ ir_expression::constant_expression_value() switch (op[0]->type->base_type) { case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) - return NULL; - data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + } break; case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) - return NULL; - data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + } break; case GLSL_TYPE_FLOAT: - if (op[1]->value.f[c1] == 0) - return NULL; data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; break; default: @@ -552,18 +546,20 @@ ir_expression::constant_expression_value() switch (op[0]->type->base_type) { case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) - return NULL; - data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + } break; case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) - return NULL; - data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + } break; case GLSL_TYPE_FLOAT: - if (op[1]->value.f[c1] == 0) - return NULL; /* We don't use fmod because it rounds toward zero; GLSL specifies * the use of floor. */ diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp index 46cd1950c..6c003bb02 100644 --- a/mesalib/src/glsl/linker.cpp +++ b/mesalib/src/glsl/linker.cpp @@ -926,7 +926,7 @@ link_intrastage_shaders(void *mem_ctx, if (var->type->is_array() && (var->type->length == 0)) { const glsl_type *type = glsl_type::get_array_instance(var->type->fields.array, - var->max_array_access); + var->max_array_access + 1); assert(type != NULL); var->type = type; diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index 020595bd0..b5966dffe 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -1,3369 +1,3370 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.7
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file mtypes.h
- * Main Mesa data structures.
- *
- * Please try to mark derived values with a leading underscore ('_').
- */
-
-#ifndef MTYPES_H
-#define MTYPES_H
-
-
-#include "main/glheader.h"
-#include "main/config.h"
-#include "main/mfeatures.h"
-#include "glapi/glapi.h"
-#include "math/m_matrix.h" /* GLmatrix */
-#include "main/simple_list.h" /* struct simple_node */
-#include "main/formats.h" /* MESA_FORMAT_COUNT */
-
-
-/**
- * Color channel data type.
- */
-#if CHAN_BITS == 8
- typedef GLubyte GLchan;
-#define CHAN_MAX 255
-#define CHAN_MAXF 255.0F
-#define CHAN_TYPE GL_UNSIGNED_BYTE
-#elif CHAN_BITS == 16
- typedef GLushort GLchan;
-#define CHAN_MAX 65535
-#define CHAN_MAXF 65535.0F
-#define CHAN_TYPE GL_UNSIGNED_SHORT
-#elif CHAN_BITS == 32
- typedef GLfloat GLchan;
-#define CHAN_MAX 1.0
-#define CHAN_MAXF 1.0F
-#define CHAN_TYPE GL_FLOAT
-#else
-#error "illegal number of color channel bits"
-#endif
-
-
-/**
- * Stencil buffer data type.
- */
-#if STENCIL_BITS==8
- typedef GLubyte GLstencil;
-#elif STENCIL_BITS==16
- typedef GLushort GLstencil;
-#else
-# error "illegal number of stencil bits"
-#endif
-
-
-/**
- * \name 64-bit extension of GLbitfield.
- */
-/*@{*/
-typedef GLuint64 GLbitfield64;
-
-/** Set a single bit */
-#define BITFIELD64_BIT(b) (1ULL << (b))
-
-
-/**
- * \name Some forward type declarations
- */
-/*@{*/
-struct _mesa_HashTable;
-struct gl_attrib_node;
-struct gl_list_extensions;
-struct gl_meta_state;
-struct gl_pixelstore_attrib;
-struct gl_program_cache;
-struct gl_texture_format;
-struct gl_texture_image;
-struct gl_texture_object;
-struct gl_context;
-struct st_context;
-/*@}*/
-
-
-/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
-#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1)
-#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
-#define PRIM_UNKNOWN (GL_POLYGON+3)
-
-
-/**
- * Shader stages. Note that these will become 5 with tessellation.
- * These MUST have the same values as gallium's PIPE_SHADER_*
- */
-typedef enum
-{
- MESA_SHADER_VERTEX = 0,
- MESA_SHADER_FRAGMENT = 1,
- MESA_SHADER_GEOMETRY = 2,
- MESA_SHADER_TYPES = 3
-} gl_shader_type;
-
-
-
-/**
- * Indexes for vertex program attributes.
- * GL_NV_vertex_program aliases generic attributes over the conventional
- * attributes. In GL_ARB_vertex_program shader the aliasing is optional.
- * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
- * generic attributes are distinct/separate).
- */
-typedef enum
-{
- VERT_ATTRIB_POS = 0,
- VERT_ATTRIB_WEIGHT = 1,
- VERT_ATTRIB_NORMAL = 2,
- VERT_ATTRIB_COLOR0 = 3,
- VERT_ATTRIB_COLOR1 = 4,
- VERT_ATTRIB_FOG = 5,
- VERT_ATTRIB_COLOR_INDEX = 6,
- VERT_ATTRIB_POINT_SIZE = 6, /*alias*/
- VERT_ATTRIB_EDGEFLAG = 7,
- VERT_ATTRIB_TEX0 = 8,
- VERT_ATTRIB_TEX1 = 9,
- VERT_ATTRIB_TEX2 = 10,
- VERT_ATTRIB_TEX3 = 11,
- VERT_ATTRIB_TEX4 = 12,
- VERT_ATTRIB_TEX5 = 13,
- VERT_ATTRIB_TEX6 = 14,
- VERT_ATTRIB_TEX7 = 15,
- VERT_ATTRIB_GENERIC0 = 16,
- VERT_ATTRIB_GENERIC1 = 17,
- VERT_ATTRIB_GENERIC2 = 18,
- VERT_ATTRIB_GENERIC3 = 19,
- VERT_ATTRIB_GENERIC4 = 20,
- VERT_ATTRIB_GENERIC5 = 21,
- VERT_ATTRIB_GENERIC6 = 22,
- VERT_ATTRIB_GENERIC7 = 23,
- VERT_ATTRIB_GENERIC8 = 24,
- VERT_ATTRIB_GENERIC9 = 25,
- VERT_ATTRIB_GENERIC10 = 26,
- VERT_ATTRIB_GENERIC11 = 27,
- VERT_ATTRIB_GENERIC12 = 28,
- VERT_ATTRIB_GENERIC13 = 29,
- VERT_ATTRIB_GENERIC14 = 30,
- VERT_ATTRIB_GENERIC15 = 31,
- VERT_ATTRIB_MAX = 32
-} gl_vert_attrib;
-
-/**
- * Bitflags for vertex attributes.
- * These are used in bitfields in many places.
- */
-/*@{*/
-#define VERT_BIT_POS (1 << VERT_ATTRIB_POS)
-#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT)
-#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL)
-#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0)
-#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1)
-#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG)
-#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX)
-#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG)
-#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0)
-#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1)
-#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2)
-#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3)
-#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4)
-#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5)
-#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6)
-#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7)
-#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0)
-#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1)
-#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2)
-#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3)
-#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4)
-#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5)
-#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6)
-#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7)
-#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8)
-#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9)
-#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10)
-#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11)
-#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12)
-#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13)
-#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14)
-#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15)
-
-#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u)))
-#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g)))
-/*@}*/
-
-
-/**
- * Indexes for vertex program result attributes
- */
-typedef enum
-{
- VERT_RESULT_HPOS = 0,
- VERT_RESULT_COL0 = 1,
- VERT_RESULT_COL1 = 2,
- VERT_RESULT_FOGC = 3,
- VERT_RESULT_TEX0 = 4,
- VERT_RESULT_TEX1 = 5,
- VERT_RESULT_TEX2 = 6,
- VERT_RESULT_TEX3 = 7,
- VERT_RESULT_TEX4 = 8,
- VERT_RESULT_TEX5 = 9,
- VERT_RESULT_TEX6 = 10,
- VERT_RESULT_TEX7 = 11,
- VERT_RESULT_PSIZ = 12,
- VERT_RESULT_BFC0 = 13,
- VERT_RESULT_BFC1 = 14,
- VERT_RESULT_EDGE = 15,
- VERT_RESULT_VAR0 = 16, /**< shader varying */
- VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING)
-} gl_vert_result;
-
-
-/*********************************************/
-
-/**
- * Indexes for geometry program attributes.
- */
-typedef enum
-{
- GEOM_ATTRIB_POSITION = 0,
- GEOM_ATTRIB_COLOR0 = 1,
- GEOM_ATTRIB_COLOR1 = 2,
- GEOM_ATTRIB_SECONDARY_COLOR0 = 3,
- GEOM_ATTRIB_SECONDARY_COLOR1 = 4,
- GEOM_ATTRIB_FOG_FRAG_COORD = 5,
- GEOM_ATTRIB_POINT_SIZE = 6,
- GEOM_ATTRIB_CLIP_VERTEX = 7,
- GEOM_ATTRIB_PRIMITIVE_ID = 8,
- GEOM_ATTRIB_TEX_COORD = 9,
-
- GEOM_ATTRIB_VAR0 = 16,
- GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING)
-} gl_geom_attrib;
-
-/**
- * Bitflags for geometry attributes.
- * These are used in bitfields in many places.
- */
-/*@{*/
-#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0)
-#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1)
-#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0)
-#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1)
-#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD)
-#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD)
-#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION)
-#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE)
-#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX)
-#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID)
-#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0)
-
-#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g)))
-/*@}*/
-
-
-/**
- * Indexes for geometry program result attributes
- */
-typedef enum
-{
- GEOM_RESULT_POS = 0,
- GEOM_RESULT_COL0 = 1,
- GEOM_RESULT_COL1 = 2,
- GEOM_RESULT_SCOL0 = 3,
- GEOM_RESULT_SCOL1 = 4,
- GEOM_RESULT_FOGC = 5,
- GEOM_RESULT_TEX0 = 6,
- GEOM_RESULT_TEX1 = 7,
- GEOM_RESULT_TEX2 = 8,
- GEOM_RESULT_TEX3 = 9,
- GEOM_RESULT_TEX4 = 10,
- GEOM_RESULT_TEX5 = 11,
- GEOM_RESULT_TEX6 = 12,
- GEOM_RESULT_TEX7 = 13,
- GEOM_RESULT_PSIZ = 14,
- GEOM_RESULT_CLPV = 15,
- GEOM_RESULT_PRID = 16,
- GEOM_RESULT_LAYR = 17,
- GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */
- /* ### we need to -2 because var0 is 18 instead 16 like in the others */
- GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2)
-} gl_geom_result;
-
-
-/**
- * Indexes for fragment program input attributes.
- */
-typedef enum
-{
- FRAG_ATTRIB_WPOS = 0,
- FRAG_ATTRIB_COL0 = 1,
- FRAG_ATTRIB_COL1 = 2,
- FRAG_ATTRIB_FOGC = 3,
- FRAG_ATTRIB_TEX0 = 4,
- FRAG_ATTRIB_TEX1 = 5,
- FRAG_ATTRIB_TEX2 = 6,
- FRAG_ATTRIB_TEX3 = 7,
- FRAG_ATTRIB_TEX4 = 8,
- FRAG_ATTRIB_TEX5 = 9,
- FRAG_ATTRIB_TEX6 = 10,
- FRAG_ATTRIB_TEX7 = 11,
- FRAG_ATTRIB_FACE = 12, /**< front/back face */
- FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */
- FRAG_ATTRIB_VAR0 = 14, /**< shader varying */
- FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
-} gl_frag_attrib;
-
-/**
- * Bitflags for fragment program input attributes.
- */
-/*@{*/
-#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS)
-#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0)
-#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1)
-#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC)
-#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE)
-#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC)
-#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0)
-#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1)
-#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2)
-#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3)
-#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4)
-#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5)
-#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6)
-#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7)
-#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0)
-
-#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U))
-#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V))
-
-#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \
- FRAG_BIT_TEX1| \
- FRAG_BIT_TEX2| \
- FRAG_BIT_TEX3| \
- FRAG_BIT_TEX4| \
- FRAG_BIT_TEX5| \
- FRAG_BIT_TEX6| \
- FRAG_BIT_TEX7)
-/*@}*/
-
-
-/**
- * Fragment program results
- */
-typedef enum
-{
- FRAG_RESULT_DEPTH = 0,
- FRAG_RESULT_STENCIL = 1,
- FRAG_RESULT_COLOR = 2,
- FRAG_RESULT_DATA0 = 3,
- FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
-} gl_frag_result;
-
-
-/**
- * Indexes for all renderbuffers
- */
-typedef enum
-{
- /* the four standard color buffers */
- BUFFER_FRONT_LEFT,
- BUFFER_BACK_LEFT,
- BUFFER_FRONT_RIGHT,
- BUFFER_BACK_RIGHT,
- BUFFER_DEPTH,
- BUFFER_STENCIL,
- BUFFER_ACCUM,
- /* optional aux buffer */
- BUFFER_AUX0,
- /* generic renderbuffers */
- BUFFER_COLOR0,
- BUFFER_COLOR1,
- BUFFER_COLOR2,
- BUFFER_COLOR3,
- BUFFER_COLOR4,
- BUFFER_COLOR5,
- BUFFER_COLOR6,
- BUFFER_COLOR7,
- BUFFER_COUNT
-} gl_buffer_index;
-
-/**
- * Bit flags for all renderbuffers
- */
-#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT)
-#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT)
-#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT)
-#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT)
-#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0)
-#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1)
-#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2)
-#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3)
-#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH)
-#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL)
-#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM)
-#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0)
-#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1)
-#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2)
-#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3)
-#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4)
-#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5)
-#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6)
-#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7)
-
-/**
- * Mask of all the color buffer bits (but not accum).
- */
-#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \
- BUFFER_BIT_BACK_LEFT | \
- BUFFER_BIT_FRONT_RIGHT | \
- BUFFER_BIT_BACK_RIGHT | \
- BUFFER_BIT_AUX0 | \
- BUFFER_BIT_COLOR0 | \
- BUFFER_BIT_COLOR1 | \
- BUFFER_BIT_COLOR2 | \
- BUFFER_BIT_COLOR3 | \
- BUFFER_BIT_COLOR4 | \
- BUFFER_BIT_COLOR5 | \
- BUFFER_BIT_COLOR6 | \
- BUFFER_BIT_COLOR7)
-
-
-/**
- * Framebuffer configuration (aka visual / pixelformat)
- * Note: some of these fields should be boolean, but it appears that
- * code in drivers/dri/common/util.c requires int-sized fields.
- */
-struct gl_config
-{
- GLboolean rgbMode;
- GLboolean floatMode;
- GLboolean colorIndexMode; /* XXX is this used anywhere? */
- GLuint doubleBufferMode;
- GLuint stereoMode;
-
- GLboolean haveAccumBuffer;
- GLboolean haveDepthBuffer;
- GLboolean haveStencilBuffer;
-
- GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */
- GLuint redMask, greenMask, blueMask, alphaMask;
- GLint rgbBits; /* total bits for rgb */
- GLint indexBits; /* total bits for colorindex */
-
- GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits;
- GLint depthBits;
- GLint stencilBits;
-
- GLint numAuxBuffers;
-
- GLint level;
-
- /* EXT_visual_rating / GLX 1.2 */
- GLint visualRating;
-
- /* EXT_visual_info / GLX 1.2 */
- GLint transparentPixel;
- /* colors are floats scaled to ints */
- GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha;
- GLint transparentIndex;
-
- /* ARB_multisample / SGIS_multisample */
- GLint sampleBuffers;
- GLint samples;
-
- /* SGIX_pbuffer / GLX 1.3 */
- GLint maxPbufferWidth;
- GLint maxPbufferHeight;
- GLint maxPbufferPixels;
- GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */
- GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */
-
- /* OML_swap_method */
- GLint swapMethod;
-
- /* EXT_texture_from_pixmap */
- GLint bindToTextureRgb;
- GLint bindToTextureRgba;
- GLint bindToMipmapTexture;
- GLint bindToTextureTargets;
- GLint yInverted;
-
- /* EXT_framebuffer_sRGB */
- GLint sRGBCapable;
-};
-
-
-/**
- * Data structure for color tables
- */
-struct gl_color_table
-{
- GLenum InternalFormat; /**< The user-specified format */
- GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */
- GLuint Size; /**< number of entries in table */
- GLfloat *TableF; /**< Color table, floating point values */
- GLubyte *TableUB; /**< Color table, ubyte values */
- GLubyte RedSize;
- GLubyte GreenSize;
- GLubyte BlueSize;
- GLubyte AlphaSize;
- GLubyte LuminanceSize;
- GLubyte IntensitySize;
-};
-
-
-/**
- * \name Bit flags used for updating material values.
- */
-/*@{*/
-#define MAT_ATTRIB_FRONT_AMBIENT 0
-#define MAT_ATTRIB_BACK_AMBIENT 1
-#define MAT_ATTRIB_FRONT_DIFFUSE 2
-#define MAT_ATTRIB_BACK_DIFFUSE 3
-#define MAT_ATTRIB_FRONT_SPECULAR 4
-#define MAT_ATTRIB_BACK_SPECULAR 5
-#define MAT_ATTRIB_FRONT_EMISSION 6
-#define MAT_ATTRIB_BACK_EMISSION 7
-#define MAT_ATTRIB_FRONT_SHININESS 8
-#define MAT_ATTRIB_BACK_SHININESS 9
-#define MAT_ATTRIB_FRONT_INDEXES 10
-#define MAT_ATTRIB_BACK_INDEXES 11
-#define MAT_ATTRIB_MAX 12
-
-#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f))
-#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f))
-#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f))
-#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f))
-#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f))
-#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f))
-
-#define MAT_INDEX_AMBIENT 0
-#define MAT_INDEX_DIFFUSE 1
-#define MAT_INDEX_SPECULAR 2
-
-#define MAT_BIT_FRONT_AMBIENT (1<<MAT_ATTRIB_FRONT_AMBIENT)
-#define MAT_BIT_BACK_AMBIENT (1<<MAT_ATTRIB_BACK_AMBIENT)
-#define MAT_BIT_FRONT_DIFFUSE (1<<MAT_ATTRIB_FRONT_DIFFUSE)
-#define MAT_BIT_BACK_DIFFUSE (1<<MAT_ATTRIB_BACK_DIFFUSE)
-#define MAT_BIT_FRONT_SPECULAR (1<<MAT_ATTRIB_FRONT_SPECULAR)
-#define MAT_BIT_BACK_SPECULAR (1<<MAT_ATTRIB_BACK_SPECULAR)
-#define MAT_BIT_FRONT_EMISSION (1<<MAT_ATTRIB_FRONT_EMISSION)
-#define MAT_BIT_BACK_EMISSION (1<<MAT_ATTRIB_BACK_EMISSION)
-#define MAT_BIT_FRONT_SHININESS (1<<MAT_ATTRIB_FRONT_SHININESS)
-#define MAT_BIT_BACK_SHININESS (1<<MAT_ATTRIB_BACK_SHININESS)
-#define MAT_BIT_FRONT_INDEXES (1<<MAT_ATTRIB_FRONT_INDEXES)
-#define MAT_BIT_BACK_INDEXES (1<<MAT_ATTRIB_BACK_INDEXES)
-
-
-#define FRONT_MATERIAL_BITS (MAT_BIT_FRONT_EMISSION | \
- MAT_BIT_FRONT_AMBIENT | \
- MAT_BIT_FRONT_DIFFUSE | \
- MAT_BIT_FRONT_SPECULAR | \
- MAT_BIT_FRONT_SHININESS | \
- MAT_BIT_FRONT_INDEXES)
-
-#define BACK_MATERIAL_BITS (MAT_BIT_BACK_EMISSION | \
- MAT_BIT_BACK_AMBIENT | \
- MAT_BIT_BACK_DIFFUSE | \
- MAT_BIT_BACK_SPECULAR | \
- MAT_BIT_BACK_SHININESS | \
- MAT_BIT_BACK_INDEXES)
-
-#define ALL_MATERIAL_BITS (FRONT_MATERIAL_BITS | BACK_MATERIAL_BITS)
-/*@}*/
-
-
-#define EXP_TABLE_SIZE 512 /**< Specular exponent lookup table sizes */
-#define SHINE_TABLE_SIZE 256 /**< Material shininess lookup table sizes */
-
-/**
- * Material shininess lookup table.
- */
-struct gl_shine_tab
-{
- struct gl_shine_tab *next, *prev;
- GLfloat tab[SHINE_TABLE_SIZE+1];
- GLfloat shininess;
- GLuint refcount;
-};
-
-
-/**
- * Light source state.
- */
-struct gl_light
-{
- struct gl_light *next; /**< double linked list with sentinel */
- struct gl_light *prev;
-
- GLfloat Ambient[4]; /**< ambient color */
- GLfloat Diffuse[4]; /**< diffuse color */
- GLfloat Specular[4]; /**< specular color */
- GLfloat EyePosition[4]; /**< position in eye coordinates */
- GLfloat SpotDirection[4]; /**< spotlight direction in eye coordinates */
- GLfloat SpotExponent;
- GLfloat SpotCutoff; /**< in degrees */
- GLfloat _CosCutoffNeg; /**< = cos(SpotCutoff) */
- GLfloat _CosCutoff; /**< = MAX(0, cos(SpotCutoff)) */
- GLfloat ConstantAttenuation;
- GLfloat LinearAttenuation;
- GLfloat QuadraticAttenuation;
- GLboolean Enabled; /**< On/off flag */
-
- /**
- * \name Derived fields
- */
- /*@{*/
- GLbitfield _Flags; /**< State */
-
- GLfloat _Position[4]; /**< position in eye/obj coordinates */
- GLfloat _VP_inf_norm[3]; /**< Norm direction to infinite light */
- GLfloat _h_inf_norm[3]; /**< Norm( _VP_inf_norm + <0,0,1> ) */
- GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */
- GLfloat _VP_inf_spot_attenuation;
-
- GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */
- GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */
- GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */
- GLfloat _MatSpecular[2][3]; /**< material spec * light specular */
- GLfloat _dli; /**< CI diffuse light intensity */
- GLfloat _sli; /**< CI specular light intensity */
- /*@}*/
-};
-
-
-/**
- * Light model state.
- */
-struct gl_lightmodel
-{
- GLfloat Ambient[4]; /**< ambient color */
- GLboolean LocalViewer; /**< Local (or infinite) view point? */
- GLboolean TwoSide; /**< Two (or one) sided lighting? */
- GLenum ColorControl; /**< either GL_SINGLE_COLOR
- * or GL_SEPARATE_SPECULAR_COLOR */
-};
-
-
-/**
- * Material state.
- */
-struct gl_material
-{
- GLfloat Attrib[MAT_ATTRIB_MAX][4];
-};
-
-
-/**
- * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT)
- */
-struct gl_accum_attrib
-{
- GLfloat ClearColor[4]; /**< Accumulation buffer clear color */
-};
-
-
-/**
- * Color buffer attribute group (GL_COLOR_BUFFER_BIT).
- */
-struct gl_colorbuffer_attrib
-{
- GLuint ClearIndex; /**< Index to use for glClear */
- GLclampf ClearColor[4]; /**< Color to use for glClear */
-
- GLuint IndexMask; /**< Color index write mask */
- GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */
-
- GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */
-
- /**
- * \name alpha testing
- */
- /*@{*/
- GLboolean AlphaEnabled; /**< Alpha test enabled flag */
- GLenum AlphaFunc; /**< Alpha test function */
- GLclampf AlphaRef; /**< Alpha reference value */
- /*@}*/
-
- /**
- * \name Blending
- */
- /*@{*/
- GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */
- GLfloat BlendColor[4]; /**< Blending color */
- struct
- {
- GLenum SrcRGB; /**< RGB blend source term */
- GLenum DstRGB; /**< RGB blend dest term */
- GLenum SrcA; /**< Alpha blend source term */
- GLenum DstA; /**< Alpha blend dest term */
- GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */
- GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */
- } Blend[MAX_DRAW_BUFFERS];
- /** Are the blend func terms currently different for each buffer/target? */
- GLboolean _BlendFuncPerBuffer;
- /** Are the blend equations currently different for each buffer/target? */
- GLboolean _BlendEquationPerBuffer;
- /*@}*/
-
- /**
- * \name Logic op
- */
- /*@{*/
- GLenum LogicOp; /**< Logic operator */
- GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */
- GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */
- GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */
- /*@}*/
-
- GLboolean DitherFlag; /**< Dither enable flag */
-
- GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
- GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
-
- GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */
-};
-
-
-/**
- * Current attribute group (GL_CURRENT_BIT).
- */
-struct gl_current_attrib
-{
- /**
- * \name Current vertex attributes.
- * \note Values are valid only after FLUSH_VERTICES has been called.
- * \note Index and Edgeflag current values are stored as floats in the
- * SIX and SEVEN attribute slots.
- */
- GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */
-
- /**
- * \name Current raster position attributes (always valid).
- * \note This set of attributes is very similar to the SWvertex struct.
- */
- /*@{*/
- GLfloat RasterPos[4];
- GLfloat RasterDistance;
- GLfloat RasterColor[4];
- GLfloat RasterSecondaryColor[4];
- GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4];
- GLboolean RasterPosValid;
- /*@}*/
-};
-
-
-/**
- * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT).
- */
-struct gl_depthbuffer_attrib
-{
- GLenum Func; /**< Function for depth buffer compare */
- GLclampd Clear; /**< Value to clear depth buffer to */
- GLboolean Test; /**< Depth buffering enabled flag */
- GLboolean Mask; /**< Depth buffer writable? */
- GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */
- GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */
-};
-
-
-/**
- * Evaluator attribute group (GL_EVAL_BIT).
- */
-struct gl_eval_attrib
-{
- /**
- * \name Enable bits
- */
- /*@{*/
- GLboolean Map1Color4;
- GLboolean Map1Index;
- GLboolean Map1Normal;
- GLboolean Map1TextureCoord1;
- GLboolean Map1TextureCoord2;
- GLboolean Map1TextureCoord3;
- GLboolean Map1TextureCoord4;
- GLboolean Map1Vertex3;
- GLboolean Map1Vertex4;
- GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */
- GLboolean Map2Color4;
- GLboolean Map2Index;
- GLboolean Map2Normal;
- GLboolean Map2TextureCoord1;
- GLboolean Map2TextureCoord2;
- GLboolean Map2TextureCoord3;
- GLboolean Map2TextureCoord4;
- GLboolean Map2Vertex3;
- GLboolean Map2Vertex4;
- GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */
- GLboolean AutoNormal;
- /*@}*/
-
- /**
- * \name Map Grid endpoints and divisions and calculated du values
- */
- /*@{*/
- GLint MapGrid1un;
- GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du;
- GLint MapGrid2un, MapGrid2vn;
- GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du;
- GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv;
- /*@}*/
-};
-
-
-/**
- * Fog attribute group (GL_FOG_BIT).
- */
-struct gl_fog_attrib
-{
- GLboolean Enabled; /**< Fog enabled flag */
- GLfloat Color[4]; /**< Fog color */
- GLfloat Density; /**< Density >= 0.0 */
- GLfloat Start; /**< Start distance in eye coords */
- GLfloat End; /**< End distance in eye coords */
- GLfloat Index; /**< Fog index */
- GLenum Mode; /**< Fog mode */
- GLboolean ColorSumEnabled;
- GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */
- GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */
-};
-
-
-/**
- * \brief Layout qualifiers for gl_FragDepth.
- *
- * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
- * a layout qualifier.
- *
- * \see enum ir_depth_layout
- */
-enum gl_frag_depth_layout {
- FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
- FRAG_DEPTH_LAYOUT_ANY,
- FRAG_DEPTH_LAYOUT_GREATER,
- FRAG_DEPTH_LAYOUT_LESS,
- FRAG_DEPTH_LAYOUT_UNCHANGED
-};
-
-
-/**
- * Hint attribute group (GL_HINT_BIT).
- *
- * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE.
- */
-struct gl_hint_attrib
-{
- GLenum PerspectiveCorrection;
- GLenum PointSmooth;
- GLenum LineSmooth;
- GLenum PolygonSmooth;
- GLenum Fog;
- GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */
- GLenum TextureCompression; /**< GL_ARB_texture_compression */
- GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */
- GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */
-};
-
-/**
- * Light state flags.
- */
-/*@{*/
-#define LIGHT_SPOT 0x1
-#define LIGHT_LOCAL_VIEWER 0x2
-#define LIGHT_POSITIONAL 0x4
-#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER)
-/*@}*/
-
-
-/**
- * Lighting attribute group (GL_LIGHT_BIT).
- */
-struct gl_light_attrib
-{
- struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */
- struct gl_lightmodel Model; /**< Lighting model */
-
- /**
- * Must flush FLUSH_VERTICES before referencing:
- */
- /*@{*/
- struct gl_material Material; /**< Includes front & back values */
- /*@}*/
-
- GLboolean Enabled; /**< Lighting enabled flag */
- GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */
- GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */
- GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */
- GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */
- GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */
- GLboolean ColorMaterialEnabled;
- GLenum ClampVertexColor;
-
- struct gl_light EnabledList; /**< List sentinel */
-
- /**
- * Derived state for optimizations:
- */
- /*@{*/
- GLboolean _NeedEyeCoords;
- GLboolean _NeedVertices; /**< Use fast shader? */
- GLbitfield _Flags; /**< LIGHT_* flags, see above */
- GLfloat _BaseColor[2][3];
- /*@}*/
-};
-
-
-/**
- * Line attribute group (GL_LINE_BIT).
- */
-struct gl_line_attrib
-{
- GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */
- GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */
- GLushort StipplePattern; /**< Stipple pattern */
- GLint StippleFactor; /**< Stipple repeat factor */
- GLfloat Width; /**< Line width */
-};
-
-
-/**
- * Display list attribute group (GL_LIST_BIT).
- */
-struct gl_list_attrib
-{
- GLuint ListBase;
-};
-
-
-/**
- * Multisample attribute group (GL_MULTISAMPLE_BIT).
- */
-struct gl_multisample_attrib
-{
- GLboolean Enabled;
- GLboolean _Enabled; /**< true if Enabled and multisample buffer */
- GLboolean SampleAlphaToCoverage;
- GLboolean SampleAlphaToOne;
- GLboolean SampleCoverage;
- GLfloat SampleCoverageValue;
- GLboolean SampleCoverageInvert;
-};
-
-
-/**
- * A pixelmap (see glPixelMap)
- */
-struct gl_pixelmap
-{
- GLint Size;
- GLfloat Map[MAX_PIXEL_MAP_TABLE];
- GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */
-};
-
-
-/**
- * Collection of all pixelmaps
- */
-struct gl_pixelmaps
-{
- struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */
- struct gl_pixelmap GtoG;
- struct gl_pixelmap BtoB;
- struct gl_pixelmap AtoA;
- struct gl_pixelmap ItoR;
- struct gl_pixelmap ItoG;
- struct gl_pixelmap ItoB;
- struct gl_pixelmap ItoA;
- struct gl_pixelmap ItoI;
- struct gl_pixelmap StoS;
-};
-
-
-/**
- * Pixel attribute group (GL_PIXEL_MODE_BIT).
- */
-struct gl_pixel_attrib
-{
- GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */
-
- /*--- Begin Pixel Transfer State ---*/
- /* Fields are in the order in which they're applied... */
-
- /** Scale & Bias (index shift, offset) */
- /*@{*/
- GLfloat RedBias, RedScale;
- GLfloat GreenBias, GreenScale;
- GLfloat BlueBias, BlueScale;
- GLfloat AlphaBias, AlphaScale;
- GLfloat DepthBias, DepthScale;
- GLint IndexShift, IndexOffset;
- /*@}*/
-
- /* Pixel Maps */
- /* Note: actual pixel maps are not part of this attrib group */
- GLboolean MapColorFlag;
- GLboolean MapStencilFlag;
-
- /*--- End Pixel Transfer State ---*/
-
- /** glPixelZoom */
- GLfloat ZoomX, ZoomY;
-
- /** GL_SGI_texture_color_table */
- GLfloat TextureColorTableScale[4]; /**< RGBA */
- GLfloat TextureColorTableBias[4]; /**< RGBA */
-};
-
-
-/**
- * Point attribute group (GL_POINT_BIT).
- */
-struct gl_point_attrib
-{
- GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */
- GLfloat Size; /**< User-specified point size */
- GLfloat Params[3]; /**< GL_EXT_point_parameters */
- GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */
- GLfloat Threshold; /**< GL_EXT_point_parameters */
- GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */
- GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */
- GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/
- GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */
- GLenum SpriteOrigin; /**< GL_ARB_point_sprite */
-};
-
-
-/**
- * Polygon attribute group (GL_POLYGON_BIT).
- */
-struct gl_polygon_attrib
-{
- GLenum FrontFace; /**< Either GL_CW or GL_CCW */
- GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
- GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
- GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */
- GLboolean CullFlag; /**< Culling on/off flag */
- GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */
- GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */
- GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */
- GLfloat OffsetFactor; /**< Polygon offset factor, from user */
- GLfloat OffsetUnits; /**< Polygon offset units, from user */
- GLboolean OffsetPoint; /**< Offset in GL_POINT mode */
- GLboolean OffsetLine; /**< Offset in GL_LINE mode */
- GLboolean OffsetFill; /**< Offset in GL_FILL mode */
-};
-
-
-/**
- * Scissor attributes (GL_SCISSOR_BIT).
- */
-struct gl_scissor_attrib
-{
- GLboolean Enabled; /**< Scissor test enabled? */
- GLint X, Y; /**< Lower left corner of box */
- GLsizei Width, Height; /**< Size of box */
-};
-
-
-/**
- * Stencil attribute group (GL_STENCIL_BUFFER_BIT).
- *
- * Three sets of stencil data are tracked so that OpenGL 2.0,
- * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported
- * simultaneously. In each of the stencil state arrays, element 0 corresponds
- * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 /
- * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the
- * GL_EXT_stencil_two_side GL_BACK state.
- *
- * The derived value \c _BackFace is either 1 or 2 depending on whether or
- * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled.
- *
- * The derived value \c _TestTwoSide is set when the front-face and back-face
- * stencil state are different.
- */
-struct gl_stencil_attrib
-{
- GLboolean Enabled; /**< Enabled flag */
- GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */
- GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */
- GLboolean _Enabled; /**< Enabled and stencil buffer present */
- GLboolean _TestTwoSide;
- GLubyte _BackFace; /**< Current back stencil state (1 or 2) */
- GLenum Function[3]; /**< Stencil function */
- GLenum FailFunc[3]; /**< Fail function */
- GLenum ZPassFunc[3]; /**< Depth buffer pass function */
- GLenum ZFailFunc[3]; /**< Depth buffer fail function */
- GLint Ref[3]; /**< Reference value */
- GLuint ValueMask[3]; /**< Value mask */
- GLuint WriteMask[3]; /**< Write mask */
- GLuint Clear; /**< Clear value */
-};
-
-
-/**
- * An index for each type of texture object. These correspond to the GL
- * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc.
- * Note: the order is from highest priority to lowest priority.
- */
-typedef enum
-{
- TEXTURE_2D_ARRAY_INDEX,
- TEXTURE_1D_ARRAY_INDEX,
- TEXTURE_CUBE_INDEX,
- TEXTURE_3D_INDEX,
- TEXTURE_RECT_INDEX,
- TEXTURE_2D_INDEX,
- TEXTURE_1D_INDEX,
- NUM_TEXTURE_TARGETS
-} gl_texture_index;
-
-
-/**
- * Bit flags for each type of texture object
- * Used for Texture.Unit[]._ReallyEnabled flags.
- */
-/*@{*/
-#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX)
-#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX)
-#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX)
-#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX)
-#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX)
-#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX)
-#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX)
-/*@}*/
-
-
-/**
- * TexGenEnabled flags.
- */
-/*@{*/
-#define S_BIT 1
-#define T_BIT 2
-#define R_BIT 4
-#define Q_BIT 8
-#define STR_BITS (S_BIT | T_BIT | R_BIT)
-/*@}*/
-
-
-/**
- * Bit flag versions of the corresponding GL_ constants.
- */
-/*@{*/
-#define TEXGEN_SPHERE_MAP 0x1
-#define TEXGEN_OBJ_LINEAR 0x2
-#define TEXGEN_EYE_LINEAR 0x4
-#define TEXGEN_REFLECTION_MAP_NV 0x8
-#define TEXGEN_NORMAL_MAP_NV 0x10
-
-#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \
- TEXGEN_REFLECTION_MAP_NV | \
- TEXGEN_NORMAL_MAP_NV)
-#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \
- TEXGEN_REFLECTION_MAP_NV | \
- TEXGEN_NORMAL_MAP_NV | \
- TEXGEN_EYE_LINEAR)
-/*@}*/
-
-
-
-/** Tex-gen enabled for texture unit? */
-#define ENABLE_TEXGEN(unit) (1 << (unit))
-
-/** Non-identity texture matrix for texture unit? */
-#define ENABLE_TEXMAT(unit) (1 << (unit))
-
-
-/**
- * Texel fetch function prototype. We use texel fetch functions to
- * extract RGBA, color indexes and depth components out of 1D, 2D and 3D
- * texture images. These functions help to isolate us from the gritty
- * details of all the various texture image encodings.
- *
- * \param texImage texture image.
- * \param col texel column.
- * \param row texel row.
- * \param img texel image level/layer.
- * \param texelOut output texel (up to 4 GLchans)
- */
-typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- GLchan *texelOut );
-
-/**
- * As above, but returns floats.
- * Used for depth component images and for upcoming signed/float
- * texture images.
- */
-typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- GLfloat *texelOut );
-
-
-typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- const void *texel);
-
-
-/**
- * Texture image state. Describes the dimensions of a texture image,
- * the texel format and pointers to Texel Fetch functions.
- */
-struct gl_texture_image
-{
- GLint InternalFormat; /**< Internal format as given by the user */
- GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA,
- * GL_LUMINANCE, GL_LUMINANCE_ALPHA,
- * GL_INTENSITY, GL_COLOR_INDEX,
- * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT
- * only. Used for choosing TexEnv arithmetic.
- */
- GLuint TexFormat; /**< The actual format: MESA_FORMAT_x */
-
- GLuint Border; /**< 0 or 1 */
- GLuint Width; /**< = 2^WidthLog2 + 2*Border */
- GLuint Height; /**< = 2^HeightLog2 + 2*Border */
- GLuint Depth; /**< = 2^DepthLog2 + 2*Border */
- GLuint Width2; /**< = Width - 2*Border */
- GLuint Height2; /**< = Height - 2*Border */
- GLuint Depth2; /**< = Depth - 2*Border */
- GLuint WidthLog2; /**< = log2(Width2) */
- GLuint HeightLog2; /**< = log2(Height2) */
- GLuint DepthLog2; /**< = log2(Depth2) */
- GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */
- GLfloat WidthScale; /**< used for mipmap LOD computation */
- GLfloat HeightScale; /**< used for mipmap LOD computation */
- GLfloat DepthScale; /**< used for mipmap LOD computation */
- GLboolean IsClientData; /**< Data owned by client? */
- GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */
-
- struct gl_texture_object *TexObject; /**< Pointer back to parent object */
-
- FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */
- FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */
-
- GLuint RowStride; /**< Padded width in units of texels */
- GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to
- each 2D slice in 'Data', in texels */
- GLvoid *Data; /**< Image data, accessed via FetchTexel() */
-
- /**
- * \name For device driver:
- */
- /*@{*/
- void *DriverData; /**< Arbitrary device driver data */
- /*@}*/
-};
-
-
-/**
- * Indexes for cube map faces.
- */
-typedef enum
-{
- FACE_POS_X = 0,
- FACE_NEG_X = 1,
- FACE_POS_Y = 2,
- FACE_NEG_Y = 3,
- FACE_POS_Z = 4,
- FACE_NEG_Z = 5,
- MAX_FACES = 6
-} gl_face_index;
-
-
-/**
- * Texture object state. Contains the array of mipmap images, border color,
- * wrap modes, filter modes, shadow/texcompare state, and the per-texture
- * color palette.
- */
-struct gl_texture_object
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- GLint RefCount; /**< reference count */
- GLuint Name; /**< the user-visible texture object ID */
- GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */
- GLfloat Priority; /**< in [0,1] */
- union {
- GLfloat f[4];
- GLuint ui[4];
- GLint i[4];
- } BorderColor; /**< Interpreted according to texture format */
- GLenum WrapS; /**< S-axis texture image wrap mode */
- GLenum WrapT; /**< T-axis texture image wrap mode */
- GLenum WrapR; /**< R-axis texture image wrap mode */
- GLenum MinFilter; /**< minification filter */
- GLenum MagFilter; /**< magnification filter */
- GLfloat MinLod; /**< min lambda, OpenGL 1.2 */
- GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */
- GLfloat LodBias; /**< OpenGL 1.4 */
- GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */
- GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */
- GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
- GLenum CompareMode; /**< GL_ARB_shadow */
- GLenum CompareFunc; /**< GL_ARB_shadow */
- GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */
- GLenum DepthMode; /**< GL_ARB_depth_texture */
- GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */
- GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */
- GLint CropRect[4]; /**< GL_OES_draw_texture */
- GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */
- GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */
- GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */
- GLboolean _Complete; /**< Is texture object complete? */
- GLboolean _RenderToTexture; /**< Any rendering to this texture? */
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
- GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */
-
- /** Actual texture images, indexed by [cube face] and [mipmap level] */
- struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
-
- /** GL_EXT_paletted_texture */
- struct gl_color_table Palette;
-
- /**
- * \name For device driver.
- * Note: instead of attaching driver data to this pointer, it's preferable
- * to instead use this struct as a base class for your own texture object
- * class. Driver->NewTextureObject() can be used to implement the
- * allocation.
- */
- void *DriverData; /**< Arbitrary device driver data */
-};
-
-
-/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */
-#define MAX_COMBINER_TERMS 4
-
-
-/**
- * Texture combine environment state.
- */
-struct gl_tex_env_combine_state
-{
- GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
- GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
- /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */
- GLenum SourceRGB[MAX_COMBINER_TERMS];
- GLenum SourceA[MAX_COMBINER_TERMS];
- /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */
- GLenum OperandRGB[MAX_COMBINER_TERMS];
- GLenum OperandA[MAX_COMBINER_TERMS];
- GLuint ScaleShiftRGB; /**< 0, 1 or 2 */
- GLuint ScaleShiftA; /**< 0, 1 or 2 */
- GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */
- GLuint _NumArgsA; /**< Number of inputs used for the A combiner */
-};
-
-
-/**
- * Texture coord generation state.
- */
-struct gl_texgen
-{
- GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */
- GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */
- GLfloat ObjectPlane[4];
- GLfloat EyePlane[4];
-};
-
-
-/**
- * Texture unit state. Contains enable flags, texture environment/function/
- * combiners, texgen state, pointers to current texture objects and
- * post-filter color tables.
- */
-struct gl_texture_unit
-{
- GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */
- GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */
-
- GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */
- GLfloat EnvColor[4];
-
- struct gl_texgen GenS;
- struct gl_texgen GenT;
- struct gl_texgen GenR;
- struct gl_texgen GenQ;
- GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */
- GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */
-
- GLfloat LodBias; /**< for biasing mipmap levels */
- GLenum BumpTarget;
- GLfloat RotMatrix[4]; /* 2x2 matrix */
-
- /**
- * \name GL_EXT_texture_env_combine
- */
- struct gl_tex_env_combine_state Combine;
-
- /**
- * Derived state based on \c EnvMode and the \c BaseFormat of the
- * currently enabled texture.
- */
- struct gl_tex_env_combine_state _EnvMode;
-
- /**
- * Currently enabled combiner state. This will point to either
- * \c Combine or \c _EnvMode.
- */
- struct gl_tex_env_combine_state *_CurrentCombine;
-
- /** Current texture object pointers */
- struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS];
-
- /** Points to highest priority, complete and enabled texture object */
- struct gl_texture_object *_Current;
-
- /** GL_SGI_texture_color_table */
- /*@{*/
- struct gl_color_table ColorTable;
- struct gl_color_table ProxyColorTable;
- GLboolean ColorTableEnabled;
- /*@}*/
-};
-
-
-/**
- * Texture attribute group (GL_TEXTURE_BIT).
- */
-struct gl_texture_attrib
-{
- GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */
- struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
-
- struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS];
-
- /** GL_ARB_seamless_cubemap */
- GLboolean CubeMapSeamless;
-
- /** GL_EXT_shared_texture_palette */
- GLboolean SharedPalette;
- struct gl_color_table Palette;
-
- /** Texture units/samplers used by vertex or fragment texturing */
- GLbitfield _EnabledUnits;
-
- /** Texture coord units/sets used for fragment texturing */
- GLbitfield _EnabledCoordUnits;
-
- /** Texture coord units that have texgen enabled */
- GLbitfield _TexGenEnabled;
-
- /** Texture coord units that have non-identity matrices */
- GLbitfield _TexMatEnabled;
-
- /** Bitwise-OR of all Texture.Unit[i]._GenFlags */
- GLbitfield _GenFlags;
-};
-
-
-/**
- * Transformation attribute group (GL_TRANSFORM_BIT).
- */
-struct gl_transform_attrib
-{
- GLenum MatrixMode; /**< Matrix mode */
- GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */
- GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */
- GLbitfield ClipPlanesEnabled; /**< on/off bitmask */
- GLboolean Normalize; /**< Normalize all normals? */
- GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */
- GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */
- GLboolean DepthClamp; /**< GL_ARB_depth_clamp */
-
- GLfloat CullEyePos[4];
- GLfloat CullObjPos[4];
-};
-
-
-/**
- * Viewport attribute group (GL_VIEWPORT_BIT).
- */
-struct gl_viewport_attrib
-{
- GLint X, Y; /**< position */
- GLsizei Width, Height; /**< size */
- GLfloat Near, Far; /**< Depth buffer range */
- GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */
-};
-
-
-/**
- * GL_ARB_vertex/pixel_buffer_object buffer object
- */
-struct gl_buffer_object
-{
- _glthread_Mutex Mutex;
- GLint RefCount;
- GLuint Name;
- GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */
- GLsizeiptrARB Size; /**< Size of buffer storage in bytes */
- GLubyte *Data; /**< Location of storage either in RAM or VRAM. */
- /** Fields describing a mapped buffer */
- /*@{*/
- GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */
- GLvoid *Pointer; /**< User-space address of mapping */
- GLintptr Offset; /**< Mapped offset */
- GLsizeiptr Length; /**< Mapped length */
- /*@}*/
- GLboolean Written; /**< Ever written to? (for debugging) */
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
-};
-
-
-/**
- * Client pixel packing/unpacking attributes
- */
-struct gl_pixelstore_attrib
-{
- GLint Alignment;
- GLint RowLength;
- GLint SkipPixels;
- GLint SkipRows;
- GLint ImageHeight;
- GLint SkipImages;
- GLboolean SwapBytes;
- GLboolean LsbFirst;
- GLboolean ClientStorage; /**< GL_APPLE_client_storage */
- GLboolean Invert; /**< GL_MESA_pack_invert */
- struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */
-};
-
-
-/**
- * Client vertex array attributes
- */
-struct gl_client_array
-{
- GLint Size; /**< components per element (1,2,3,4) */
- GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */
- GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */
- GLsizei Stride; /**< user-specified stride */
- GLsizei StrideB; /**< actual stride in bytes */
- const GLubyte *Ptr; /**< Points to array data */
- GLboolean Enabled; /**< Enabled flag is a boolean */
- GLboolean Normalized; /**< GL_ARB_vertex_program */
- GLboolean Integer; /**< Integer-valued? */
- GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */
- GLuint _ElementSize; /**< size of each element in bytes */
-
- struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */
- GLuint _MaxElement; /**< max element index into array buffer + 1 */
-};
-
-
-/**
- * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object
- * extension, but a nice encapsulation in any case.
- */
-struct gl_array_object
-{
- /** Name of the array object as received from glGenVertexArrayAPPLE. */
- GLuint Name;
-
- GLint RefCount;
- _glthread_Mutex Mutex;
- GLboolean VBOonly; /**< require all arrays to live in VBOs? */
-
- /** Conventional vertex arrays */
- /*@{*/
- struct gl_client_array Vertex;
- struct gl_client_array Weight;
- struct gl_client_array Normal;
- struct gl_client_array Color;
- struct gl_client_array SecondaryColor;
- struct gl_client_array FogCoord;
- struct gl_client_array Index;
- struct gl_client_array EdgeFlag;
- struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS];
- struct gl_client_array PointSize;
- /*@}*/
-
- /**
- * Generic arrays for vertex programs/shaders.
- * For NV vertex programs, these attributes alias and take priority
- * over the conventional attribs above. For ARB vertex programs and
- * GLSL vertex shaders, these attributes are separate.
- */
- struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS];
-
- /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */
- GLbitfield _Enabled;
-
- /**
- * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs
- * we can determine the max legal (in bounds) glDrawElements array index.
- */
- GLuint _MaxElement;
-};
-
-
-/**
- * Vertex array state
- */
-struct gl_array_attrib
-{
- /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */
- struct gl_array_object *ArrayObj;
-
- /** The default vertex array object */
- struct gl_array_object *DefaultArrayObj;
-
- /** Array objects (GL_ARB/APPLE_vertex_array_object) */
- struct _mesa_HashTable *Objects;
-
- GLint ActiveTexture; /**< Client Active Texture */
- GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */
- GLuint LockCount; /**< GL_EXT_compiled_vertex_array */
-
- /** GL 3.1 (slightly different from GL_NV_primitive_restart) */
- GLboolean PrimitiveRestart;
- GLuint RestartIndex;
-
- GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */
-
- /* GL_ARB_vertex_buffer_object */
- struct gl_buffer_object *ArrayBufferObj;
- struct gl_buffer_object *ElementArrayBufferObj;
-};
-
-
-/**
- * Feedback buffer state
- */
-struct gl_feedback
-{
- GLenum Type;
- GLbitfield _Mask; /**< FB_* bits */
- GLfloat *Buffer;
- GLuint BufferSize;
- GLuint Count;
-};
-
-
-/**
- * Selection buffer state
- */
-struct gl_selection
-{
- GLuint *Buffer; /**< selection buffer */
- GLuint BufferSize; /**< size of the selection buffer */
- GLuint BufferCount; /**< number of values in the selection buffer */
- GLuint Hits; /**< number of records in the selection buffer */
- GLuint NameStackDepth; /**< name stack depth */
- GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */
- GLboolean HitFlag; /**< hit flag */
- GLfloat HitMinZ; /**< minimum hit depth */
- GLfloat HitMaxZ; /**< maximum hit depth */
-};
-
-
-/**
- * 1-D Evaluator control points
- */
-struct gl_1d_map
-{
- GLuint Order; /**< Number of control points */
- GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */
- GLfloat *Points; /**< Points to contiguous control points */
-};
-
-
-/**
- * 2-D Evaluator control points
- */
-struct gl_2d_map
-{
- GLuint Uorder; /**< Number of control points in U dimension */
- GLuint Vorder; /**< Number of control points in V dimension */
- GLfloat u1, u2, du;
- GLfloat v1, v2, dv;
- GLfloat *Points; /**< Points to contiguous control points */
-};
-
-
-/**
- * All evaluator control point state
- */
-struct gl_evaluators
-{
- /**
- * \name 1-D maps
- */
- /*@{*/
- struct gl_1d_map Map1Vertex3;
- struct gl_1d_map Map1Vertex4;
- struct gl_1d_map Map1Index;
- struct gl_1d_map Map1Color4;
- struct gl_1d_map Map1Normal;
- struct gl_1d_map Map1Texture1;
- struct gl_1d_map Map1Texture2;
- struct gl_1d_map Map1Texture3;
- struct gl_1d_map Map1Texture4;
- struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */
- /*@}*/
-
- /**
- * \name 2-D maps
- */
- /*@{*/
- struct gl_2d_map Map2Vertex3;
- struct gl_2d_map Map2Vertex4;
- struct gl_2d_map Map2Index;
- struct gl_2d_map Map2Color4;
- struct gl_2d_map Map2Normal;
- struct gl_2d_map Map2Texture1;
- struct gl_2d_map Map2Texture2;
- struct gl_2d_map Map2Texture3;
- struct gl_2d_map Map2Texture4;
- struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */
- /*@}*/
-};
-
-
-/**
- * Names of the various vertex/fragment program register files, etc.
- *
- * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c)
- * All values should fit in a 4-bit field.
- *
- * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM,
- * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to
- * be "uniform" variables since they can only be set outside glBegin/End.
- * They're also all stored in the same Parameters array.
- */
-typedef enum
-{
- PROGRAM_TEMPORARY, /**< machine->Temporary[] */
- PROGRAM_INPUT, /**< machine->Inputs[] */
- PROGRAM_OUTPUT, /**< machine->Outputs[] */
- PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */
- PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */
- PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */
- PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */
- PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */
- PROGRAM_CONSTANT, /**< gl_program->Parameters[] */
- PROGRAM_UNIFORM, /**< gl_program->Parameters[] */
- PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */
- PROGRAM_ADDRESS, /**< machine->AddressReg */
- PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
- PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
- PROGRAM_UNDEFINED, /**< Invalid/TBD value */
- PROGRAM_FILE_MAX
-} gl_register_file;
-
-
-/**
- * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be
- * one of these values.
- */
-typedef enum
-{
- SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */
- SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */
- SYSTEM_VALUE_MAX /**< Number of values */
-} gl_system_value;
-
-
-/** Vertex and fragment instructions */
-struct prog_instruction;
-struct gl_program_parameter_list;
-struct gl_uniform_list;
-
-
-/**
- * Base class for any kind of program object
- */
-struct gl_program
-{
- GLuint Id;
- GLubyte *String; /**< Null-terminated program text */
- GLint RefCount;
- GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */
- GLenum Format; /**< String encoding format */
- GLboolean Resident;
-
- struct prog_instruction *Instructions;
-
- GLbitfield InputsRead; /**< Bitmask of which input regs are read */
- GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
- GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */
- GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */
- GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */
- GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */
- GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */
- GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
-
-
- /** Named parameters, constants, etc. from program text */
- struct gl_program_parameter_list *Parameters;
- /** Numbered local parameters */
- GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4];
-
- /** Vertex/fragment shader varying vars */
- struct gl_program_parameter_list *Varying;
- /** Vertex program user-defined attributes */
- struct gl_program_parameter_list *Attributes;
-
- /** Map from sampler unit to texture unit (set by glUniform1i()) */
- GLubyte SamplerUnits[MAX_SAMPLERS];
- /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */
- gl_texture_index SamplerTargets[MAX_SAMPLERS];
-
- /** Bitmask of which register files are read/written with indirect
- * addressing. Mask of (1 << PROGRAM_x) bits.
- */
- GLbitfield IndirectRegisterFiles;
-
- /** Logical counts */
- /*@{*/
- GLuint NumInstructions;
- GLuint NumTemporaries;
- GLuint NumParameters;
- GLuint NumAttributes;
- GLuint NumAddressRegs;
- GLuint NumAluInstructions;
- GLuint NumTexInstructions;
- GLuint NumTexIndirections;
- /*@}*/
- /** Native, actual h/w counts */
- /*@{*/
- GLuint NumNativeInstructions;
- GLuint NumNativeTemporaries;
- GLuint NumNativeParameters;
- GLuint NumNativeAttributes;
- GLuint NumNativeAddressRegs;
- GLuint NumNativeAluInstructions;
- GLuint NumNativeTexInstructions;
- GLuint NumNativeTexIndirections;
- /*@}*/
-};
-
-
-/** Vertex program object */
-struct gl_vertex_program
-{
- struct gl_program Base; /**< base class */
- GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */
- GLboolean IsPositionInvariant;
-};
-
-
-/** Geometry program object */
-struct gl_geometry_program
-{
- struct gl_program Base; /**< base class */
-
- GLint VerticesOut;
- GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
- GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
- GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
-};
-
-
-/** Fragment program object */
-struct gl_fragment_program
-{
- struct gl_program Base; /**< base class */
- GLenum FogOption;
- GLboolean UsesKill; /**< shader uses KIL instruction */
- GLboolean OriginUpperLeft;
- GLboolean PixelCenterInteger;
- enum gl_frag_depth_layout FragDepthLayout;
-};
-
-
-/**
- * State common to vertex and fragment programs.
- */
-struct gl_program_state
-{
- GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */
- const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */
-};
-
-
-/**
- * Context state for vertex programs.
- */
-struct gl_vertex_program_state
-{
- GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */
- GLboolean _Enabled; /**< Enabled and _valid_ user program? */
- GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */
- GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */
- struct gl_vertex_program *Current; /**< User-bound vertex program */
-
- /** Currently enabled and valid vertex program (including internal
- * programs, user-defined vertex programs and GLSL vertex shaders).
- * This is the program we must use when rendering.
- */
- struct gl_vertex_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /* For GL_NV_vertex_program only: */
- GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4];
- GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4];
-
- /** Should fixed-function T&L be implemented with a vertex prog? */
- GLboolean _MaintainTnlProgram;
-
- /** Program to emulate fixed-function T&L (see above) */
- struct gl_vertex_program *_TnlProgram;
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-
- GLboolean _Overriden;
-};
-
-
-/**
- * Context state for geometry programs.
- */
-struct gl_geometry_program_state
-{
- GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */
- GLboolean _Enabled; /**< Enabled and valid program? */
- struct gl_geometry_program *Current; /**< user-bound geometry program */
-
- /** Currently enabled and valid program (including internal programs
- * and compiled shader programs).
- */
- struct gl_geometry_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-};
-
-/**
- * Context state for fragment programs.
- */
-struct gl_fragment_program_state
-{
- GLboolean Enabled; /**< User-set fragment program enable flag */
- GLboolean _Enabled; /**< Enabled and _valid_ user program? */
- struct gl_fragment_program *Current; /**< User-bound fragment program */
-
- /** Currently enabled and valid fragment program (including internal
- * programs, user-defined fragment programs and GLSL fragment shaders).
- * This is the program we must use when rendering.
- */
- struct gl_fragment_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /** Should fixed-function texturing be implemented with a fragment prog? */
- GLboolean _MaintainTexEnvProgram;
-
- /** Program to emulate fixed-function texture env/combine (see above) */
- struct gl_fragment_program *_TexEnvProgram;
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-};
-
-
-/**
- * ATI_fragment_shader runtime state
- */
-#define ATI_FS_INPUT_PRIMARY 0
-#define ATI_FS_INPUT_SECONDARY 1
-
-struct atifs_instruction;
-struct atifs_setupinst;
-
-/**
- * ATI fragment shader
- */
-struct ati_fragment_shader
-{
- GLuint Id;
- GLint RefCount;
- struct atifs_instruction *Instructions[2];
- struct atifs_setupinst *SetupInst[2];
- GLfloat Constants[8][4];
- GLbitfield LocalConstDef; /**< Indicates which constants have been set */
- GLubyte numArithInstr[2];
- GLubyte regsAssigned[2];
- GLubyte NumPasses; /**< 1 or 2 */
- GLubyte cur_pass;
- GLubyte last_optype;
- GLboolean interpinp1;
- GLboolean isValid;
- GLuint swizzlerq;
-};
-
-/**
- * Context state for GL_ATI_fragment_shader
- */
-struct gl_ati_fragment_shader_state
-{
- GLboolean Enabled;
- GLboolean _Enabled; /**< enabled and valid shader? */
- GLboolean Compiling;
- GLfloat GlobalConstants[8][4];
- struct ati_fragment_shader *Current;
-};
-
-
-/**
- * Occlusion/timer query object.
- */
-struct gl_query_object
-{
- GLenum Target; /**< The query target, when active */
- GLuint Id; /**< hash table ID/name */
- GLuint64EXT Result; /**< the counter */
- GLboolean Active; /**< inside Begin/EndQuery */
- GLboolean Ready; /**< result is ready? */
-};
-
-
-/**
- * Context state for query objects.
- */
-struct gl_query_state
-{
- struct _mesa_HashTable *QueryObjects;
- struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */
- struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */
-
- /** GL_NV_conditional_render */
- struct gl_query_object *CondRenderQuery;
-
- /** GL_EXT_transform_feedback */
- struct gl_query_object *PrimitivesGenerated;
- struct gl_query_object *PrimitivesWritten;
-
- /** GL_ARB_timer_query */
- struct gl_query_object *TimeElapsed;
-
- GLenum CondRenderMode;
-};
-
-
-/** Sync object state */
-struct gl_sync_object {
- struct simple_node link;
- GLenum Type; /**< GL_SYNC_FENCE */
- GLuint Name; /**< Fence name */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending; /**< Object was deleted while there were still
- * live references (e.g., sync not yet finished)
- */
- GLenum SyncCondition;
- GLbitfield Flags; /**< Flags passed to glFenceSync */
- GLuint StatusFlag:1; /**< Has the sync object been signaled? */
-};
-
-
-/** Set by #pragma directives */
-struct gl_sl_pragmas
-{
- GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */
- GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */
- GLboolean Optimize; /**< defaults on */
- GLboolean Debug; /**< defaults off */
-};
-
-
-/**
- * A GLSL vertex or fragment shader object.
- */
-struct gl_shader
-{
- GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */
- GLuint Name; /**< AKA the handle */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending;
- GLboolean CompileStatus;
- const GLchar *Source; /**< Source code string */
- GLuint SourceChecksum; /**< for debug/logging purposes */
- struct gl_program *Program; /**< Post-compile assembly code */
- GLchar *InfoLog;
- struct gl_sl_pragmas Pragmas;
-
- unsigned Version; /**< GLSL version used for linking */
-
- struct exec_list *ir;
- struct glsl_symbol_table *symbols;
-
- /** Shaders containing built-in functions that are used for linking. */
- struct gl_shader *builtins_to_link[16];
- unsigned num_builtins_to_link;
-};
-
-
-/**
- * A GLSL program object.
- * Basically a linked collection of vertex and fragment shaders.
- */
-struct gl_shader_program
-{
- GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */
- GLuint Name; /**< aka handle or ID */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending;
-
- GLuint NumShaders; /**< number of attached shaders */
- struct gl_shader **Shaders; /**< List of attached the shaders */
-
- /** User-defined attribute bindings (glBindAttribLocation) */
- struct gl_program_parameter_list *Attributes;
-
- /** Transform feedback varyings */
- struct {
- GLenum BufferMode;
- GLuint NumVarying;
- GLchar **VaryingNames; /**< Array [NumVarying] of char * */
- } TransformFeedback;
-
- /** Geometry shader state - copied into gl_geometry_program at link time */
- struct {
- GLint VerticesOut;
- GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
- GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
- GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
- } Geom;
-
- /* post-link info: */
- struct gl_vertex_program *VertexProgram; /**< Linked vertex program */
- struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */
- struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */
- struct gl_uniform_list *Uniforms;
- struct gl_program_parameter_list *Varying;
- GLboolean LinkStatus; /**< GL_LINK_STATUS */
- GLboolean Validated;
- GLboolean _Used; /**< Ever used for drawing? */
- GLchar *InfoLog;
-
- unsigned Version; /**< GLSL version used for linking */
-
- /**
- * Per-stage shaders resulting from the first stage of linking.
- *
- * Set of linked shaders for this program. The array is accessed using the
- * \c MESA_SHADER_* defines. Entries for non-existent stages will be
- * \c NULL.
- */
- struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES];
-};
-
-
-#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */
-#define GLSL_LOG 0x2 /**< Write shaders to files */
-#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */
-#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */
-#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */
-#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */
-#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */
-#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */
-
-
-/**
- * Context state for GLSL vertex/fragment shaders.
- */
-struct gl_shader_state
-{
- /**
- * Programs used for rendering
- *
- * There is a separate program set for each shader stage. If
- * GL_EXT_separate_shader_objects is not supported, each of these must point
- * to \c NULL or to the same program.
- */
- struct gl_shader_program *CurrentVertexProgram;
- struct gl_shader_program *CurrentGeometryProgram;
- struct gl_shader_program *CurrentFragmentProgram;
-
- /**
- * Program used by glUniform calls.
- *
- * Explicitly set by \c glUseProgram and \c glActiveProgramEXT.
- */
- struct gl_shader_program *ActiveProgram;
-
- void *MemPool;
-
- GLbitfield Flags; /**< Mask of GLSL_x flags */
-};
-
-/**
- * Compiler options for a single GLSL shaders type
- */
-struct gl_shader_compiler_options
-{
- /** Driver-selectable options: */
- GLboolean EmitCondCodes; /**< Use condition codes? */
- GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */
- /**
- * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't
- * support control flow.
- */
- GLboolean EmitNoIfs;
- GLboolean EmitNoLoops;
- GLboolean EmitNoFunctions;
- GLboolean EmitNoCont; /**< Emit CONT opcode? */
- GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */
- GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */
- GLboolean EmitNoPow; /**< Emit POW opcodes? */
-
- /**
- * \name Forms of indirect addressing the driver cannot do.
- */
- /*@{*/
- GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */
- GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */
- GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */
- GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
- /*@}*/
-
- GLuint MaxUnrollIterations;
-
- struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */
-};
-
-/**
- * Transform feedback object state
- */
-struct gl_transform_feedback_object
-{
- GLuint Name; /**< AKA the object ID */
- GLint RefCount;
- GLboolean Active; /**< Is transform feedback enabled? */
- GLboolean Paused; /**< Is transform feedback paused? */
-
- /** The feedback buffers */
- GLuint BufferNames[MAX_FEEDBACK_ATTRIBS];
- struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS];
-
- /** Start of feedback data in dest buffer */
- GLintptr Offset[MAX_FEEDBACK_ATTRIBS];
- /** Max data to put into dest buffer (in bytes) */
- GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS];
-};
-
-
-/**
- * Context state for transform feedback.
- */
-struct gl_transform_feedback
-{
- GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */
-
- GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */
-
- /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */
- struct gl_buffer_object *CurrentBuffer;
-
- /** The table of all transform feedback objects */
- struct _mesa_HashTable *Objects;
-
- /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */
- struct gl_transform_feedback_object *CurrentObject;
-
- /** The default xform-fb object (Name==0) */
- struct gl_transform_feedback_object *DefaultObject;
-};
-
-
-
-/**
- * State which can be shared by multiple contexts:
- */
-struct gl_shared_state
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- GLint RefCount; /**< Reference count */
- struct _mesa_HashTable *DisplayList; /**< Display lists hash table */
- struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */
-
- /** Default texture objects (shared by all texture units) */
- struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS];
-
- /** Fallback texture used when a bound texture is incomplete */
- struct gl_texture_object *FallbackTex;
-
- /**
- * \name Thread safety and statechange notification for texture
- * objects.
- *
- * \todo Improve the granularity of locking.
- */
- /*@{*/
- _glthread_Mutex TexMutex; /**< texobj thread safety */
- GLuint TextureStateStamp; /**< state notification for shared tex */
- /*@}*/
-
- /** Default buffer object for vertex arrays that aren't in VBOs */
- struct gl_buffer_object *NullBufferObj;
-
- /**
- * \name Vertex/geometry/fragment programs
- */
- /*@{*/
- struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
- struct gl_vertex_program *DefaultVertexProgram;
- struct gl_fragment_program *DefaultFragmentProgram;
- struct gl_geometry_program *DefaultGeometryProgram;
- /*@}*/
-
- /* GL_ATI_fragment_shader */
- struct _mesa_HashTable *ATIShaders;
- struct ati_fragment_shader *DefaultFragmentShader;
-
- struct _mesa_HashTable *BufferObjects;
-
- /** Table of both gl_shader and gl_shader_program objects */
- struct _mesa_HashTable *ShaderObjects;
-
- /* GL_EXT_framebuffer_object */
- struct _mesa_HashTable *RenderBuffers;
- struct _mesa_HashTable *FrameBuffers;
-
- /* GL_ARB_sync */
- struct simple_node SyncObjects;
-
- void *DriverData; /**< Device driver shared state */
-};
-
-
-
-
-/**
- * A renderbuffer stores colors or depth values or stencil values.
- * A framebuffer object will have a collection of these.
- * Data are read/written to the buffer with a handful of Get/Put functions.
- *
- * Instances of this object are allocated with the Driver's NewRenderbuffer
- * hook. Drivers will likely wrap this class inside a driver-specific
- * class to simulate inheritance.
- */
-struct gl_renderbuffer
-{
-#define RB_MAGIC 0xaabbccdd
- int Magic; /** XXX TEMPORARY DEBUG INFO */
- _glthread_Mutex Mutex; /**< for thread safety */
- GLuint ClassID; /**< Useful for drivers */
- GLuint Name;
- GLint RefCount;
- GLuint Width, Height;
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
-
- GLenum InternalFormat; /**< The user-specified format */
- GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or
- GL_STENCIL_INDEX. */
- GLuint Format; /**< The actual format: MESA_FORMAT_x */
-
- GLubyte NumSamples;
-
- GLenum DataType; /**< Type of values passed to the Get/Put functions */
- GLvoid *Data; /**< This may not be used by some kinds of RBs */
-
- /* Used to wrap one renderbuffer around another: */
- struct gl_renderbuffer *Wrapped;
-
- /* Delete this renderbuffer */
- void (*Delete)(struct gl_renderbuffer *rb);
-
- /* Allocate new storage for this renderbuffer */
- GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLenum internalFormat,
- GLuint width, GLuint height);
-
- /* Lock/Unlock are called before/after calling the Get/Put functions.
- * Not sure this is the right place for these yet.
- void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
- void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
- */
-
- /* Return a pointer to the element/pixel at (x,y).
- * Should return NULL if the buffer memory can't be directly addressed.
- */
- void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLint x, GLint y);
-
- /* Get/Read a row of values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, void *values);
-
- /* Get/Read values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- const GLint x[], const GLint y[], void *values);
-
- /* Put/Write a row of values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *values, const GLubyte *mask);
-
- /* Put/Write a row of RGB values. This is a special-case routine that's
- * only used for RGBA renderbuffers when the source data is GL_RGB. That's
- * a common case for glDrawPixels and some triangle routines.
- * The values will be of format GL_RGB and type DataType.
- */
- void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *values, const GLubyte *mask);
-
-
- /* Put/Write a row of identical values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *value, const GLubyte *mask);
-
- /* Put/Write values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- const GLint x[], const GLint y[], const void *values,
- const GLubyte *mask);
- /* Put/Write identical values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLuint count, const GLint x[], const GLint y[],
- const void *value, const GLubyte *mask);
-};
-
-
-/**
- * A renderbuffer attachment points to either a texture object (and specifies
- * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer.
- */
-struct gl_renderbuffer_attachment
-{
- GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */
- GLboolean Complete;
-
- /**
- * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the
- * application supplied renderbuffer object.
- */
- struct gl_renderbuffer *Renderbuffer;
-
- /**
- * If \c Type is \c GL_TEXTURE, this stores a pointer to the application
- * supplied texture object.
- */
- struct gl_texture_object *Texture;
- GLuint TextureLevel; /**< Attached mipmap level. */
- GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */
- GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D
- * and 2D array textures */
-};
-
-
-/**
- * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc).
- * In C++ terms, think of this as a base class from which device drivers
- * will make derived classes.
- */
-struct gl_framebuffer
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- /**
- * If zero, this is a window system framebuffer. If non-zero, this
- * is a FBO framebuffer; note that for some devices (i.e. those with
- * a natural pixel coordinate system for FBOs that differs from the
- * OpenGL/Mesa coordinate system), this means that the viewport,
- * polygon face orientation, and polygon stipple will have to be inverted.
- */
- GLuint Name;
-
- GLint RefCount;
- GLboolean DeletePending;
-
- /**
- * The framebuffer's visual. Immutable if this is a window system buffer.
- * Computed from attachments if user-made FBO.
- */
- struct gl_config Visual;
-
- GLboolean Initialized;
-
- GLuint Width, Height; /**< size of frame buffer in pixels */
-
- /** \name Drawing bounds (Intersection of buffer size and scissor box) */
- /*@{*/
- GLint _Xmin, _Xmax; /**< inclusive */
- GLint _Ymin, _Ymax; /**< exclusive */
- /*@}*/
-
- /** \name Derived Z buffer stuff */
- /*@{*/
- GLuint _DepthMax; /**< Max depth buffer value */
- GLfloat _DepthMaxF; /**< Float max depth buffer value */
- GLfloat _MRD; /**< minimum resolvable difference in Z values */
- /*@}*/
-
- /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */
- GLenum _Status;
-
- /** Integer color values */
- GLboolean _IntegerColor;
-
- /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */
- struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT];
-
- /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER
- * attribute group and GL_PIXEL attribute group, respectively.
- */
- GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS];
- GLenum ColorReadBuffer;
-
- /** Computed from ColorDraw/ReadBuffer above */
- GLuint _NumColorDrawBuffers;
- GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */
- GLint _ColorReadBufferIndex; /* -1 = None */
- struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS];
- struct gl_renderbuffer *_ColorReadBuffer;
-
- /** The Actual depth/stencil buffers to use. May be wrappers around the
- * depth/stencil buffers attached above. */
- struct gl_renderbuffer *_DepthBuffer;
- struct gl_renderbuffer *_StencilBuffer;
-
- /** Delete this framebuffer */
- void (*Delete)(struct gl_framebuffer *fb);
-};
-
-
-/**
- * Precision info for shader datatypes. See glGetShaderPrecisionFormat().
- */
-struct gl_precision
-{
- GLushort RangeMin; /**< min value exponent */
- GLushort RangeMax; /**< max value exponent */
- GLushort Precision; /**< number of mantissa bits */
-};
-
-
-/**
- * Limits for vertex and fragment programs/shaders.
- */
-struct gl_program_constants
-{
- /* logical limits */
- GLuint MaxInstructions;
- GLuint MaxAluInstructions;
- GLuint MaxTexInstructions;
- GLuint MaxTexIndirections;
- GLuint MaxAttribs;
- GLuint MaxTemps;
- GLuint MaxAddressRegs;
- GLuint MaxParameters;
- GLuint MaxLocalParams;
- GLuint MaxEnvParams;
- /* native/hardware limits */
- GLuint MaxNativeInstructions;
- GLuint MaxNativeAluInstructions;
- GLuint MaxNativeTexInstructions;
- GLuint MaxNativeTexIndirections;
- GLuint MaxNativeAttribs;
- GLuint MaxNativeTemps;
- GLuint MaxNativeAddressRegs;
- GLuint MaxNativeParameters;
- /* For shaders */
- GLuint MaxUniformComponents;
- /* GL_ARB_geometry_shader4 */
- GLuint MaxGeometryTextureImageUnits;
- GLuint MaxGeometryVaryingComponents;
- GLuint MaxVertexVaryingComponents;
- GLuint MaxGeometryUniformComponents;
- GLuint MaxGeometryOutputVertices;
- GLuint MaxGeometryTotalOutputComponents;
- /* ES 2.0 and GL_ARB_ES2_compatibility */
- struct gl_precision LowFloat, MediumFloat, HighFloat;
- struct gl_precision LowInt, MediumInt, HighInt;
-};
-
-
-/**
- * Constants which may be overridden by device driver during context creation
- * but are never changed after that.
- */
-struct gl_constants
-{
- GLint MaxTextureMbytes; /**< Max memory per image, in MB */
- GLint MaxTextureLevels; /**< Max mipmap levels. */
- GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */
- GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */
- GLint MaxArrayTextureLayers; /**< Max layers in array textures */
- GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */
- GLuint MaxTextureCoordUnits;
- GLuint MaxTextureImageUnits;
- GLuint MaxVertexTextureImageUnits;
- GLuint MaxCombinedTextureImageUnits;
- GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */
- GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
- GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */
-
- GLuint MaxArrayLockSize;
-
- GLint SubPixelBits;
-
- GLfloat MinPointSize, MaxPointSize; /**< aliased */
- GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */
- GLfloat PointSizeGranularity;
- GLfloat MinLineWidth, MaxLineWidth; /**< aliased */
- GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */
- GLfloat LineWidthGranularity;
-
- GLuint MaxColorTableSize;
-
- GLuint MaxClipPlanes;
- GLuint MaxLights;
- GLfloat MaxShininess; /**< GL_NV_light_max_exponent */
- GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */
-
- GLuint MaxViewportWidth, MaxViewportHeight;
-
- struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */
- struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */
- struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */
- GLuint MaxProgramMatrices;
- GLuint MaxProgramMatrixStackDepth;
-
- /** vertex array / buffer object bounds checking */
- GLboolean CheckArrayBounds;
-
- GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */
-
- GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */
- GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */
- GLuint MaxSamples; /**< GL_ARB_framebuffer_object */
-
- GLuint MaxVarying; /**< Number of float[4] varying parameters */
-
- GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */
-
- /** Which texture units support GL_ATI_envmap_bumpmap as targets */
- GLbitfield SupportedBumpUnits;
-
- /**
- * Maximum amount of time, measured in nanseconds, that the server can wait.
- */
- GLuint64 MaxServerWaitTimeout;
-
- /** GL_EXT_provoking_vertex */
- GLboolean QuadsFollowProvokingVertexConvention;
-
- /** OpenGL version 3.0 */
- GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */
-
- /** OpenGL version 3.2 */
- GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */
-
- /** GL_EXT_transform_feedback */
- GLuint MaxTransformFeedbackSeparateAttribs;
- GLuint MaxTransformFeedbackSeparateComponents;
- GLuint MaxTransformFeedbackInterleavedComponents;
-
- /** GL_EXT_gpu_shader4 */
- GLint MinProgramTexelOffset, MaxProgramTexelOffset;
-
- /* GL_EXT_framebuffer_sRGB */
- GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */
-};
-
-
-/**
- * Enable flag for each OpenGL extension. Different device drivers will
- * enable different extensions at runtime.
- */
-struct gl_extensions
-{
- GLboolean dummy; /* don't remove this! */
- GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */
- GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */
- GLboolean ARB_ES2_compatibility;
- GLboolean ARB_blend_func_extended;
- GLboolean ARB_copy_buffer;
- GLboolean ARB_depth_buffer_float;
- GLboolean ARB_depth_clamp;
- GLboolean ARB_depth_texture;
- GLboolean ARB_draw_buffers;
- GLboolean ARB_draw_buffers_blend;
- GLboolean ARB_draw_elements_base_vertex;
- GLboolean ARB_draw_instanced;
- GLboolean ARB_fragment_coord_conventions;
- GLboolean ARB_fragment_program;
- GLboolean ARB_fragment_program_shadow;
- GLboolean ARB_fragment_shader;
- GLboolean ARB_framebuffer_object;
- GLboolean ARB_explicit_attrib_location;
- GLboolean ARB_geometry_shader4;
- GLboolean ARB_half_float_pixel;
- GLboolean ARB_half_float_vertex;
- GLboolean ARB_instanced_arrays;
- GLboolean ARB_map_buffer_range;
- GLboolean ARB_multisample;
- GLboolean ARB_multitexture;
- GLboolean ARB_occlusion_query;
- GLboolean ARB_occlusion_query2;
- GLboolean ARB_point_sprite;
- GLboolean ARB_sampler_objects;
- GLboolean ARB_seamless_cube_map;
- GLboolean ARB_shader_objects;
- GLboolean ARB_shader_stencil_export;
- GLboolean ARB_shading_language_100;
- GLboolean ARB_shadow;
- GLboolean ARB_shadow_ambient;
- GLboolean ARB_sync;
- GLboolean ARB_texture_border_clamp;
- GLboolean ARB_texture_buffer_object;
- GLboolean ARB_texture_compression;
- GLboolean ARB_texture_compression_rgtc;
- GLboolean ARB_texture_cube_map;
- GLboolean ARB_texture_env_combine;
- GLboolean ARB_texture_env_crossbar;
- GLboolean ARB_texture_env_dot3;
- GLboolean ARB_texture_float;
- GLboolean ARB_texture_mirrored_repeat;
- GLboolean ARB_texture_multisample;
- GLboolean ARB_texture_non_power_of_two;
- GLboolean ARB_texture_rg;
- GLboolean ARB_texture_rgb10_a2ui;
- GLboolean ARB_timer_query;
- GLboolean ARB_transform_feedback2;
- GLboolean ARB_transpose_matrix;
- GLboolean ARB_uniform_buffer_object;
- GLboolean ARB_vertex_array_object;
- GLboolean ARB_vertex_buffer_object;
- GLboolean ARB_vertex_program;
- GLboolean ARB_vertex_shader;
- GLboolean ARB_vertex_type_2_10_10_10_rev;
- GLboolean ARB_window_pos;
- GLboolean EXT_abgr;
- GLboolean EXT_bgra;
- GLboolean EXT_blend_color;
- GLboolean EXT_blend_equation_separate;
- GLboolean EXT_blend_func_separate;
- GLboolean EXT_blend_logic_op;
- GLboolean EXT_blend_minmax;
- GLboolean EXT_blend_subtract;
- GLboolean EXT_clip_volume_hint;
- GLboolean EXT_compiled_vertex_array;
- GLboolean EXT_copy_texture;
- GLboolean EXT_depth_bounds_test;
- GLboolean EXT_draw_buffers2;
- GLboolean EXT_draw_range_elements;
- GLboolean EXT_fog_coord;
- GLboolean EXT_framebuffer_blit;
- GLboolean EXT_framebuffer_multisample;
- GLboolean EXT_framebuffer_object;
- GLboolean EXT_framebuffer_sRGB;
- GLboolean EXT_gpu_program_parameters;
- GLboolean EXT_gpu_shader4;
- GLboolean EXT_multi_draw_arrays;
- GLboolean EXT_paletted_texture;
- GLboolean EXT_packed_depth_stencil;
- GLboolean EXT_packed_float;
- GLboolean EXT_packed_pixels;
- GLboolean EXT_pixel_buffer_object;
- GLboolean EXT_point_parameters;
- GLboolean EXT_polygon_offset;
- GLboolean EXT_provoking_vertex;
- GLboolean EXT_rescale_normal;
- GLboolean EXT_shadow_funcs;
- GLboolean EXT_secondary_color;
- GLboolean EXT_separate_shader_objects;
- GLboolean EXT_separate_specular_color;
- GLboolean EXT_shared_texture_palette;
- GLboolean EXT_stencil_wrap;
- GLboolean EXT_stencil_two_side;
- GLboolean EXT_subtexture;
- GLboolean EXT_texture;
- GLboolean EXT_texture_object;
- GLboolean EXT_texture3D;
- GLboolean EXT_texture_array;
- GLboolean EXT_texture_compression_s3tc;
- GLboolean EXT_texture_env_add;
- GLboolean EXT_texture_env_combine;
- GLboolean EXT_texture_env_dot3;
- GLboolean EXT_texture_filter_anisotropic;
- GLboolean EXT_texture_integer;
- GLboolean EXT_texture_lod_bias;
- GLboolean EXT_texture_mirror_clamp;
- GLboolean EXT_texture_shared_exponent;
- GLboolean EXT_texture_sRGB;
- GLboolean EXT_texture_sRGB_decode;
- GLboolean EXT_texture_swizzle;
- GLboolean EXT_transform_feedback;
- GLboolean EXT_timer_query;
- GLboolean EXT_vertex_array;
- GLboolean EXT_vertex_array_bgra;
- GLboolean EXT_vertex_array_set;
- GLboolean OES_standard_derivatives;
- /* vendor extensions */
- GLboolean AMD_conservative_depth;
- GLboolean APPLE_client_storage;
- GLboolean APPLE_packed_pixels;
- GLboolean APPLE_vertex_array_object;
- GLboolean APPLE_object_purgeable;
- GLboolean ATI_envmap_bumpmap;
- GLboolean ATI_texture_mirror_once;
- GLboolean ATI_texture_env_combine3;
- GLboolean ATI_fragment_shader;
- GLboolean ATI_separate_stencil;
- GLboolean IBM_rasterpos_clip;
- GLboolean IBM_multimode_draw_arrays;
- GLboolean MESA_pack_invert;
- GLboolean MESA_resize_buffers;
- GLboolean MESA_ycbcr_texture;
- GLboolean MESA_texture_array;
- GLboolean MESA_texture_signed_rgba;
- GLboolean NV_blend_square;
- GLboolean NV_conditional_render;
- GLboolean NV_fragment_program;
- GLboolean NV_fragment_program_option;
- GLboolean NV_light_max_exponent;
- GLboolean NV_point_sprite;
- GLboolean NV_primitive_restart;
- GLboolean NV_texgen_reflection;
- GLboolean NV_texture_env_combine4;
- GLboolean NV_texture_rectangle;
- GLboolean NV_vertex_program;
- GLboolean NV_vertex_program1_1;
- GLboolean OES_read_format;
- GLboolean SGI_texture_color_table;
- GLboolean SGIS_generate_mipmap;
- GLboolean SGIS_texture_edge_clamp;
- GLboolean SGIS_texture_lod;
- GLboolean TDFX_texture_compression_FXT1;
- GLboolean S3_s3tc;
- GLboolean OES_EGL_image;
- GLboolean OES_draw_texture;
- GLboolean EXT_texture_format_BGRA8888;
- GLboolean extension_sentinel;
- /** The extension string */
- const GLubyte *String;
- /** Number of supported extensions */
- GLuint Count;
-};
-
-
-/**
- * A stack of matrices (projection, modelview, color, texture, etc).
- */
-struct gl_matrix_stack
-{
- GLmatrix *Top; /**< points into Stack */
- GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */
- GLuint Depth; /**< 0 <= Depth < MaxDepth */
- GLuint MaxDepth; /**< size of Stack[] array */
- GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */
-};
-
-
-/**
- * \name Bits for image transfer operations
- * \sa __struct gl_contextRec::ImageTransferState.
- */
-/*@{*/
-#define IMAGE_SCALE_BIAS_BIT 0x1
-#define IMAGE_SHIFT_OFFSET_BIT 0x2
-#define IMAGE_MAP_COLOR_BIT 0x4
-#define IMAGE_CLAMP_BIT 0x800
-
-
-/** Pixel Transfer ops */
-#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \
- IMAGE_SHIFT_OFFSET_BIT | \
- IMAGE_MAP_COLOR_BIT)
-
-/**
- * \name Bits to indicate what state has changed.
- */
-/*@{*/
-#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */
-#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */
-#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */
-#define _NEW_COLOR (1 << 3) /**< gl_context::Color */
-#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */
-#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */
-#define _NEW_FOG (1 << 6) /**< gl_context::Fog */
-#define _NEW_HINT (1 << 7) /**< gl_context::Hint */
-#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */
-#define _NEW_LINE (1 << 9) /**< gl_context::Line */
-#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */
-#define _NEW_POINT (1 << 11) /**< gl_context::Point */
-#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */
-#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */
-#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */
-#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */
-#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */
-#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */
-#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */
-#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */
-#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */
-#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */
-#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */
-#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */
-#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */
-#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */
-#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */
-#define _NEW_PROGRAM_CONSTANTS (1 << 27)
-#define _NEW_BUFFER_OBJECT (1 << 28)
-#define _NEW_ALL ~0
-/*@}*/
-
-
-/**
- * \name Bits to track array state changes
- *
- * Also used to summarize array enabled.
- */
-/*@{*/
-#define _NEW_ARRAY_VERTEX VERT_BIT_POS
-#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT
-#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL
-#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0
-#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1
-#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG
-#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX
-#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG
-#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */
-#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0
-#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1
-#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2
-#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3
-#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4
-#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5
-#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6
-#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7
-#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */
-#define _NEW_ARRAY_ALL 0xffffffff
-
-
-#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i))
-#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i))
-/*@}*/
-
-
-
-/**
- * \name A bunch of flags that we think might be useful to drivers.
- *
- * Set in the __struct gl_contextRec::_TriangleCaps bitfield.
- */
-/*@{*/
-#define DD_FLATSHADE 0x1
-#define DD_SEPARATE_SPECULAR 0x2
-#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */
-#define DD_TRI_LIGHT_TWOSIDE 0x8
-#define DD_TRI_UNFILLED 0x10
-#define DD_TRI_SMOOTH 0x20
-#define DD_TRI_STIPPLE 0x40
-#define DD_TRI_OFFSET 0x80
-#define DD_LINE_SMOOTH 0x100
-#define DD_LINE_STIPPLE 0x200
-#define DD_POINT_SMOOTH 0x400
-#define DD_POINT_ATTEN 0x800
-#define DD_TRI_TWOSTENCIL 0x1000
-/*@}*/
-
-
-/**
- * \name Define the state changes under which each of these bits might change
- */
-/*@{*/
-#define _DD_NEW_FLATSHADE _NEW_LIGHT
-#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM)
-#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON
-#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT
-#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON
-#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON
-#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON
-#define _DD_NEW_TRI_OFFSET _NEW_POLYGON
-#define _DD_NEW_LINE_SMOOTH _NEW_LINE
-#define _DD_NEW_LINE_STIPPLE _NEW_LINE
-#define _DD_NEW_LINE_WIDTH _NEW_LINE
-#define _DD_NEW_POINT_SMOOTH _NEW_POINT
-#define _DD_NEW_POINT_SIZE _NEW_POINT
-#define _DD_NEW_POINT_ATTEN _NEW_POINT
-/*@}*/
-
-
-/**
- * Composite state flags
- */
-/*@{*/
-#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \
- _NEW_TEXTURE | \
- _NEW_POINT | \
- _NEW_PROGRAM | \
- _NEW_MODELVIEW)
-
-#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \
- _NEW_TEXTURE)
-
-#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL)
-/*@}*/
-
-
-
-
-/* This has to be included here. */
-#include "dd.h"
-
-
-/**
- * Display list flags.
- * Strictly this is a tnl-private concept, but it doesn't seem
- * worthwhile adding a tnl private structure just to hold this one bit
- * of information:
- */
-#define DLIST_DANGLING_REFS 0x1
-
-
-/** Opaque declaration of display list payload data type */
-union gl_dlist_node;
-
-
-/**
- * Provide a location where information about a display list can be
- * collected. Could be extended with driverPrivate structures,
- * etc. in the future.
- */
-struct gl_display_list
-{
- GLuint Name;
- GLbitfield Flags; /**< DLIST_x flags */
- /** The dlist commands are in a linked list of nodes */
- union gl_dlist_node *Head;
-};
-
-
-/**
- * State used during display list compilation and execution.
- */
-struct gl_dlist_state
-{
- GLuint CallDepth; /**< Current recursion calling depth */
-
- struct gl_display_list *CurrentList; /**< List currently being compiled */
- union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */
- GLuint CurrentPos; /**< Index into current block of nodes */
-
- GLvertexformat ListVtxfmt;
-
- GLubyte ActiveAttribSize[VERT_ATTRIB_MAX];
- GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4];
-
- GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX];
- GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4];
-
- GLubyte ActiveIndex;
- GLfloat CurrentIndex;
-
- GLubyte ActiveEdgeFlag;
- GLboolean CurrentEdgeFlag;
-
- struct {
- /* State known to have been set by the currently-compiling display
- * list. Used to eliminate some redundant state changes.
- */
- GLenum ShadeModel;
- } Current;
-};
-
-
-/**
- * Enum for the OpenGL APIs we know about and may support.
- */
-typedef enum
-{
- API_OPENGL,
- API_OPENGLES,
- API_OPENGLES2
-} gl_api;
-
-
-/**
- * Mesa rendering context.
- *
- * This is the central context data structure for Mesa. Almost all
- * OpenGL state is contained in this structure.
- * Think of this as a base class from which device drivers will derive
- * sub classes.
- *
- * The struct gl_context typedef names this structure.
- */
-struct gl_context
-{
- /** State possibly shared with other contexts in the address space */
- struct gl_shared_state *Shared;
-
- /** \name API function pointer tables */
- /*@{*/
- gl_api API;
- struct _glapi_table *Save; /**< Display list save functions */
- struct _glapi_table *Exec; /**< Execute functions */
- struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */
- /*@}*/
-
- struct gl_config Visual;
- struct gl_framebuffer *DrawBuffer; /**< buffer for writing */
- struct gl_framebuffer *ReadBuffer; /**< buffer for reading */
- struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */
- struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */
-
- /**
- * Device driver function pointer table
- */
- struct dd_function_table Driver;
-
- void *DriverCtx; /**< Points to device driver context/state */
-
- /** Core/Driver constants */
- struct gl_constants Const;
-
- /** \name The various 4x4 matrix stacks */
- /*@{*/
- struct gl_matrix_stack ModelviewMatrixStack;
- struct gl_matrix_stack ProjectionMatrixStack;
- struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS];
- struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES];
- struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */
- /*@}*/
-
- /** Combined modelview and projection matrix */
- GLmatrix _ModelProjectMatrix;
-
- /** \name Display lists */
- struct gl_dlist_state ListState;
-
- GLboolean ExecuteFlag; /**< Execute GL commands? */
- GLboolean CompileFlag; /**< Compile GL commands into display list? */
-
- /** Extension information */
- struct gl_extensions Extensions;
-
- /** Version info */
- GLuint VersionMajor, VersionMinor;
- char *VersionString;
-
- /** \name State attribute stack (for glPush/PopAttrib) */
- /*@{*/
- GLuint AttribStackDepth;
- struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH];
- /*@}*/
-
- /** \name Renderer attribute groups
- *
- * We define a struct for each attribute group to make pushing and popping
- * attributes easy. Also it's a good organization.
- */
- /*@{*/
- struct gl_accum_attrib Accum; /**< Accum buffer attributes */
- struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */
- struct gl_current_attrib Current; /**< Current attributes */
- struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */
- struct gl_eval_attrib Eval; /**< Eval attributes */
- struct gl_fog_attrib Fog; /**< Fog attributes */
- struct gl_hint_attrib Hint; /**< Hint attributes */
- struct gl_light_attrib Light; /**< Light attributes */
- struct gl_line_attrib Line; /**< Line attributes */
- struct gl_list_attrib List; /**< List attributes */
- struct gl_multisample_attrib Multisample;
- struct gl_pixel_attrib Pixel; /**< Pixel attributes */
- struct gl_point_attrib Point; /**< Point attributes */
- struct gl_polygon_attrib Polygon; /**< Polygon attributes */
- GLuint PolygonStipple[32]; /**< Polygon stipple */
- struct gl_scissor_attrib Scissor; /**< Scissor attributes */
- struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */
- struct gl_texture_attrib Texture; /**< Texture attributes */
- struct gl_transform_attrib Transform; /**< Transformation attributes */
- struct gl_viewport_attrib Viewport; /**< Viewport attributes */
- /*@}*/
-
- /** \name Client attribute stack */
- /*@{*/
- GLuint ClientAttribStackDepth;
- struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH];
- /*@}*/
-
- /** \name Client attribute groups */
- /*@{*/
- struct gl_array_attrib Array; /**< Vertex arrays */
- struct gl_pixelstore_attrib Pack; /**< Pixel packing */
- struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */
- struct gl_pixelstore_attrib DefaultPacking; /**< Default params */
- /*@}*/
-
- /** \name Other assorted state (not pushed/popped on attribute stack) */
- /*@{*/
- struct gl_pixelmaps PixelMaps;
-
- struct gl_evaluators EvalMap; /**< All evaluators */
- struct gl_feedback Feedback; /**< Feedback */
- struct gl_selection Select; /**< Selection */
-
- struct gl_program_state Program; /**< general program state */
- struct gl_vertex_program_state VertexProgram;
- struct gl_fragment_program_state FragmentProgram;
- struct gl_geometry_program_state GeometryProgram;
- struct gl_ati_fragment_shader_state ATIFragmentShader;
-
- struct gl_shader_state Shader; /**< GLSL shader object state */
- struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES];
-
- struct gl_query_state Query; /**< occlusion, timer queries */
-
- struct gl_transform_feedback TransformFeedback;
-
- struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
- struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
- /*@}*/
-
- struct gl_meta_state *Meta; /**< for "meta" operations */
-
- /* GL_EXT_framebuffer_object */
- struct gl_renderbuffer *CurrentRenderbuffer;
-
- GLenum ErrorValue; /**< Last error code */
-
- /**
- * Recognize and silence repeated error debug messages in buggy apps.
- */
- const char *ErrorDebugFmtString;
- GLuint ErrorDebugCount;
-
- GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
- GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
-
- GLboolean ViewportInitialized; /**< has viewport size been initialized? */
-
- GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
-
- /** \name Derived state */
- /*@{*/
- /** Bitwise-or of DD_* flags. Note that this bitfield may be used before
- * state validation so they need to always be current.
- */
- GLbitfield _TriangleCaps;
- GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */
- GLfloat _EyeZDir[3];
- GLfloat _ModelViewInvScale;
- GLboolean _NeedEyeCoords;
- GLboolean _ForceEyeCoords;
-
- GLuint TextureStateTimestamp; /**< detect changes to shared state */
-
- struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */
- struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */
- /**@}*/
-
- struct gl_list_extensions *ListExt; /**< driver dlist extensions */
-
- /** \name For debugging/development only */
- /*@{*/
- GLboolean FirstTimeCurrent;
- /*@}*/
-
- /** Dither disable via MESA_NO_DITHER env var */
- GLboolean NoDither;
-
- /** software compression/decompression supported or not */
- GLboolean Mesa_DXTn;
-
- GLboolean TextureFormatSupported[MESA_FORMAT_COUNT];
-
- /**
- * Use dp4 (rather than mul/mad) instructions for position
- * transformation?
- */
- GLboolean mvp_with_dp4;
-
- /**
- * \name Hooks for module contexts.
- *
- * These will eventually live in the driver or elsewhere.
- */
- /*@{*/
- void *swrast_context;
- void *swsetup_context;
- void *swtnl_context;
- void *swtnl_im;
- struct st_context *st;
- void *aelt_context;
- /*@}*/
-};
-
-
-#ifdef DEBUG
-extern int MESA_VERBOSE;
-extern int MESA_DEBUG_FLAGS;
-# define MESA_FUNCTION __FUNCTION__
-#else
-# define MESA_VERBOSE 0
-# define MESA_DEBUG_FLAGS 0
-# define MESA_FUNCTION "a function"
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-
-
-/** The MESA_VERBOSE var is a bitmask of these flags */
-enum _verbose
-{
- VERBOSE_VARRAY = 0x0001,
- VERBOSE_TEXTURE = 0x0002,
- VERBOSE_MATERIAL = 0x0004,
- VERBOSE_PIPELINE = 0x0008,
- VERBOSE_DRIVER = 0x0010,
- VERBOSE_STATE = 0x0020,
- VERBOSE_API = 0x0040,
- VERBOSE_DISPLAY_LIST = 0x0100,
- VERBOSE_LIGHTING = 0x0200,
- VERBOSE_PRIMS = 0x0400,
- VERBOSE_VERTS = 0x0800,
- VERBOSE_DISASSEM = 0x1000,
- VERBOSE_DRAW = 0x2000,
- VERBOSE_SWAPBUFFERS = 0x4000
-};
-
-
-/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */
-enum _debug
-{
- DEBUG_ALWAYS_FLUSH = 0x1
-};
-
-
-
-#endif /* MTYPES_H */
+/* + * Mesa 3-D graphics library + * Version: 7.7 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file mtypes.h + * Main Mesa data structures. + * + * Please try to mark derived values with a leading underscore ('_'). + */ + +#ifndef MTYPES_H +#define MTYPES_H + + +#include "main/glheader.h" +#include "main/config.h" +#include "main/mfeatures.h" +#include "glapi/glapi.h" +#include "math/m_matrix.h" /* GLmatrix */ +#include "main/simple_list.h" /* struct simple_node */ +#include "main/formats.h" /* MESA_FORMAT_COUNT */ + + +/** + * Color channel data type. + */ +#if CHAN_BITS == 8 + typedef GLubyte GLchan; +#define CHAN_MAX 255 +#define CHAN_MAXF 255.0F +#define CHAN_TYPE GL_UNSIGNED_BYTE +#elif CHAN_BITS == 16 + typedef GLushort GLchan; +#define CHAN_MAX 65535 +#define CHAN_MAXF 65535.0F +#define CHAN_TYPE GL_UNSIGNED_SHORT +#elif CHAN_BITS == 32 + typedef GLfloat GLchan; +#define CHAN_MAX 1.0 +#define CHAN_MAXF 1.0F +#define CHAN_TYPE GL_FLOAT +#else +#error "illegal number of color channel bits" +#endif + + +/** + * Stencil buffer data type. + */ +#if STENCIL_BITS==8 + typedef GLubyte GLstencil; +#elif STENCIL_BITS==16 + typedef GLushort GLstencil; +#else +# error "illegal number of stencil bits" +#endif + + +/** + * \name 64-bit extension of GLbitfield. + */ +/*@{*/ +typedef GLuint64 GLbitfield64; + +/** Set a single bit */ +#define BITFIELD64_BIT(b) (1ULL << (b)) + + +/** + * \name Some forward type declarations + */ +/*@{*/ +struct _mesa_HashTable; +struct gl_attrib_node; +struct gl_list_extensions; +struct gl_meta_state; +struct gl_pixelstore_attrib; +struct gl_program_cache; +struct gl_texture_format; +struct gl_texture_image; +struct gl_texture_object; +struct gl_context; +struct st_context; +/*@}*/ + + +/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */ +#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1) +#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2) +#define PRIM_UNKNOWN (GL_POLYGON+3) + + +/** + * Shader stages. Note that these will become 5 with tessellation. + * These MUST have the same values as gallium's PIPE_SHADER_* + */ +typedef enum +{ + MESA_SHADER_VERTEX = 0, + MESA_SHADER_FRAGMENT = 1, + MESA_SHADER_GEOMETRY = 2, + MESA_SHADER_TYPES = 3 +} gl_shader_type; + + + +/** + * Indexes for vertex program attributes. + * GL_NV_vertex_program aliases generic attributes over the conventional + * attributes. In GL_ARB_vertex_program shader the aliasing is optional. + * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the + * generic attributes are distinct/separate). + */ +typedef enum +{ + VERT_ATTRIB_POS = 0, + VERT_ATTRIB_WEIGHT = 1, + VERT_ATTRIB_NORMAL = 2, + VERT_ATTRIB_COLOR0 = 3, + VERT_ATTRIB_COLOR1 = 4, + VERT_ATTRIB_FOG = 5, + VERT_ATTRIB_COLOR_INDEX = 6, + VERT_ATTRIB_POINT_SIZE = 6, /*alias*/ + VERT_ATTRIB_EDGEFLAG = 7, + VERT_ATTRIB_TEX0 = 8, + VERT_ATTRIB_TEX1 = 9, + VERT_ATTRIB_TEX2 = 10, + VERT_ATTRIB_TEX3 = 11, + VERT_ATTRIB_TEX4 = 12, + VERT_ATTRIB_TEX5 = 13, + VERT_ATTRIB_TEX6 = 14, + VERT_ATTRIB_TEX7 = 15, + VERT_ATTRIB_GENERIC0 = 16, + VERT_ATTRIB_GENERIC1 = 17, + VERT_ATTRIB_GENERIC2 = 18, + VERT_ATTRIB_GENERIC3 = 19, + VERT_ATTRIB_GENERIC4 = 20, + VERT_ATTRIB_GENERIC5 = 21, + VERT_ATTRIB_GENERIC6 = 22, + VERT_ATTRIB_GENERIC7 = 23, + VERT_ATTRIB_GENERIC8 = 24, + VERT_ATTRIB_GENERIC9 = 25, + VERT_ATTRIB_GENERIC10 = 26, + VERT_ATTRIB_GENERIC11 = 27, + VERT_ATTRIB_GENERIC12 = 28, + VERT_ATTRIB_GENERIC13 = 29, + VERT_ATTRIB_GENERIC14 = 30, + VERT_ATTRIB_GENERIC15 = 31, + VERT_ATTRIB_MAX = 32 +} gl_vert_attrib; + +/** + * Bitflags for vertex attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define VERT_BIT_POS (1 << VERT_ATTRIB_POS) +#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT) +#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL) +#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0) +#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1) +#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG) +#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX) +#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG) +#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0) +#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1) +#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2) +#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3) +#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4) +#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5) +#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6) +#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7) +#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0) +#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1) +#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2) +#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3) +#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4) +#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5) +#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6) +#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7) +#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8) +#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9) +#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10) +#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11) +#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12) +#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13) +#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14) +#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15) + +#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u))) +#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g))) +/*@}*/ + + +/** + * Indexes for vertex program result attributes + */ +typedef enum +{ + VERT_RESULT_HPOS = 0, + VERT_RESULT_COL0 = 1, + VERT_RESULT_COL1 = 2, + VERT_RESULT_FOGC = 3, + VERT_RESULT_TEX0 = 4, + VERT_RESULT_TEX1 = 5, + VERT_RESULT_TEX2 = 6, + VERT_RESULT_TEX3 = 7, + VERT_RESULT_TEX4 = 8, + VERT_RESULT_TEX5 = 9, + VERT_RESULT_TEX6 = 10, + VERT_RESULT_TEX7 = 11, + VERT_RESULT_PSIZ = 12, + VERT_RESULT_BFC0 = 13, + VERT_RESULT_BFC1 = 14, + VERT_RESULT_EDGE = 15, + VERT_RESULT_VAR0 = 16, /**< shader varying */ + VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING) +} gl_vert_result; + + +/*********************************************/ + +/** + * Indexes for geometry program attributes. + */ +typedef enum +{ + GEOM_ATTRIB_POSITION = 0, + GEOM_ATTRIB_COLOR0 = 1, + GEOM_ATTRIB_COLOR1 = 2, + GEOM_ATTRIB_SECONDARY_COLOR0 = 3, + GEOM_ATTRIB_SECONDARY_COLOR1 = 4, + GEOM_ATTRIB_FOG_FRAG_COORD = 5, + GEOM_ATTRIB_POINT_SIZE = 6, + GEOM_ATTRIB_CLIP_VERTEX = 7, + GEOM_ATTRIB_PRIMITIVE_ID = 8, + GEOM_ATTRIB_TEX_COORD = 9, + + GEOM_ATTRIB_VAR0 = 16, + GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING) +} gl_geom_attrib; + +/** + * Bitflags for geometry attributes. + * These are used in bitfields in many places. + */ +/*@{*/ +#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0) +#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1) +#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0) +#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1) +#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD) +#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD) +#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION) +#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE) +#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX) +#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID) +#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0) + +#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g))) +/*@}*/ + + +/** + * Indexes for geometry program result attributes + */ +typedef enum +{ + GEOM_RESULT_POS = 0, + GEOM_RESULT_COL0 = 1, + GEOM_RESULT_COL1 = 2, + GEOM_RESULT_SCOL0 = 3, + GEOM_RESULT_SCOL1 = 4, + GEOM_RESULT_FOGC = 5, + GEOM_RESULT_TEX0 = 6, + GEOM_RESULT_TEX1 = 7, + GEOM_RESULT_TEX2 = 8, + GEOM_RESULT_TEX3 = 9, + GEOM_RESULT_TEX4 = 10, + GEOM_RESULT_TEX5 = 11, + GEOM_RESULT_TEX6 = 12, + GEOM_RESULT_TEX7 = 13, + GEOM_RESULT_PSIZ = 14, + GEOM_RESULT_CLPV = 15, + GEOM_RESULT_PRID = 16, + GEOM_RESULT_LAYR = 17, + GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */ + /* ### we need to -2 because var0 is 18 instead 16 like in the others */ + GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2) +} gl_geom_result; + + +/** + * Indexes for fragment program input attributes. + */ +typedef enum +{ + FRAG_ATTRIB_WPOS = 0, + FRAG_ATTRIB_COL0 = 1, + FRAG_ATTRIB_COL1 = 2, + FRAG_ATTRIB_FOGC = 3, + FRAG_ATTRIB_TEX0 = 4, + FRAG_ATTRIB_TEX1 = 5, + FRAG_ATTRIB_TEX2 = 6, + FRAG_ATTRIB_TEX3 = 7, + FRAG_ATTRIB_TEX4 = 8, + FRAG_ATTRIB_TEX5 = 9, + FRAG_ATTRIB_TEX6 = 10, + FRAG_ATTRIB_TEX7 = 11, + FRAG_ATTRIB_FACE = 12, /**< front/back face */ + FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */ + FRAG_ATTRIB_VAR0 = 14, /**< shader varying */ + FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING) +} gl_frag_attrib; + +/** + * Bitflags for fragment program input attributes. + */ +/*@{*/ +#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS) +#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0) +#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1) +#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC) +#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE) +#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC) +#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0) +#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1) +#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2) +#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3) +#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4) +#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5) +#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6) +#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7) +#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0) + +#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U)) +#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V)) + +#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \ + FRAG_BIT_TEX1| \ + FRAG_BIT_TEX2| \ + FRAG_BIT_TEX3| \ + FRAG_BIT_TEX4| \ + FRAG_BIT_TEX5| \ + FRAG_BIT_TEX6| \ + FRAG_BIT_TEX7) +/*@}*/ + + +/** + * Fragment program results + */ +typedef enum +{ + FRAG_RESULT_DEPTH = 0, + FRAG_RESULT_STENCIL = 1, + FRAG_RESULT_COLOR = 2, + FRAG_RESULT_DATA0 = 3, + FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) +} gl_frag_result; + + +/** + * Indexes for all renderbuffers + */ +typedef enum +{ + /* the four standard color buffers */ + BUFFER_FRONT_LEFT, + BUFFER_BACK_LEFT, + BUFFER_FRONT_RIGHT, + BUFFER_BACK_RIGHT, + BUFFER_DEPTH, + BUFFER_STENCIL, + BUFFER_ACCUM, + /* optional aux buffer */ + BUFFER_AUX0, + /* generic renderbuffers */ + BUFFER_COLOR0, + BUFFER_COLOR1, + BUFFER_COLOR2, + BUFFER_COLOR3, + BUFFER_COLOR4, + BUFFER_COLOR5, + BUFFER_COLOR6, + BUFFER_COLOR7, + BUFFER_COUNT +} gl_buffer_index; + +/** + * Bit flags for all renderbuffers + */ +#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT) +#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT) +#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT) +#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT) +#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0) +#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1) +#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2) +#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3) +#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH) +#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL) +#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM) +#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0) +#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1) +#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2) +#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3) +#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4) +#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5) +#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6) +#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7) + +/** + * Mask of all the color buffer bits (but not accum). + */ +#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \ + BUFFER_BIT_BACK_LEFT | \ + BUFFER_BIT_FRONT_RIGHT | \ + BUFFER_BIT_BACK_RIGHT | \ + BUFFER_BIT_AUX0 | \ + BUFFER_BIT_COLOR0 | \ + BUFFER_BIT_COLOR1 | \ + BUFFER_BIT_COLOR2 | \ + BUFFER_BIT_COLOR3 | \ + BUFFER_BIT_COLOR4 | \ + BUFFER_BIT_COLOR5 | \ + BUFFER_BIT_COLOR6 | \ + BUFFER_BIT_COLOR7) + + +/** + * Framebuffer configuration (aka visual / pixelformat) + * Note: some of these fields should be boolean, but it appears that + * code in drivers/dri/common/util.c requires int-sized fields. + */ +struct gl_config +{ + GLboolean rgbMode; + GLboolean floatMode; + GLboolean colorIndexMode; /* XXX is this used anywhere? */ + GLuint doubleBufferMode; + GLuint stereoMode; + + GLboolean haveAccumBuffer; + GLboolean haveDepthBuffer; + GLboolean haveStencilBuffer; + + GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */ + GLuint redMask, greenMask, blueMask, alphaMask; + GLint rgbBits; /* total bits for rgb */ + GLint indexBits; /* total bits for colorindex */ + + GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits; + GLint depthBits; + GLint stencilBits; + + GLint numAuxBuffers; + + GLint level; + + /* EXT_visual_rating / GLX 1.2 */ + GLint visualRating; + + /* EXT_visual_info / GLX 1.2 */ + GLint transparentPixel; + /* colors are floats scaled to ints */ + GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha; + GLint transparentIndex; + + /* ARB_multisample / SGIS_multisample */ + GLint sampleBuffers; + GLint samples; + + /* SGIX_pbuffer / GLX 1.3 */ + GLint maxPbufferWidth; + GLint maxPbufferHeight; + GLint maxPbufferPixels; + GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */ + GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */ + + /* OML_swap_method */ + GLint swapMethod; + + /* EXT_texture_from_pixmap */ + GLint bindToTextureRgb; + GLint bindToTextureRgba; + GLint bindToMipmapTexture; + GLint bindToTextureTargets; + GLint yInverted; + + /* EXT_framebuffer_sRGB */ + GLint sRGBCapable; +}; + + +/** + * Data structure for color tables + */ +struct gl_color_table +{ + GLenum InternalFormat; /**< The user-specified format */ + GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */ + GLuint Size; /**< number of entries in table */ + GLfloat *TableF; /**< Color table, floating point values */ + GLubyte *TableUB; /**< Color table, ubyte values */ + GLubyte RedSize; + GLubyte GreenSize; + GLubyte BlueSize; + GLubyte AlphaSize; + GLubyte LuminanceSize; + GLubyte IntensitySize; +}; + + +/** + * \name Bit flags used for updating material values. + */ +/*@{*/ +#define MAT_ATTRIB_FRONT_AMBIENT 0 +#define MAT_ATTRIB_BACK_AMBIENT 1 +#define MAT_ATTRIB_FRONT_DIFFUSE 2 +#define MAT_ATTRIB_BACK_DIFFUSE 3 +#define MAT_ATTRIB_FRONT_SPECULAR 4 +#define MAT_ATTRIB_BACK_SPECULAR 5 +#define MAT_ATTRIB_FRONT_EMISSION 6 +#define MAT_ATTRIB_BACK_EMISSION 7 +#define MAT_ATTRIB_FRONT_SHININESS 8 +#define MAT_ATTRIB_BACK_SHININESS 9 +#define MAT_ATTRIB_FRONT_INDEXES 10 +#define MAT_ATTRIB_BACK_INDEXES 11 +#define MAT_ATTRIB_MAX 12 + +#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f)) +#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f)) +#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f)) +#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f)) +#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f)) +#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f)) + +#define MAT_INDEX_AMBIENT 0 +#define MAT_INDEX_DIFFUSE 1 +#define MAT_INDEX_SPECULAR 2 + +#define MAT_BIT_FRONT_AMBIENT (1<<MAT_ATTRIB_FRONT_AMBIENT) +#define MAT_BIT_BACK_AMBIENT (1<<MAT_ATTRIB_BACK_AMBIENT) +#define MAT_BIT_FRONT_DIFFUSE (1<<MAT_ATTRIB_FRONT_DIFFUSE) +#define MAT_BIT_BACK_DIFFUSE (1<<MAT_ATTRIB_BACK_DIFFUSE) +#define MAT_BIT_FRONT_SPECULAR (1<<MAT_ATTRIB_FRONT_SPECULAR) +#define MAT_BIT_BACK_SPECULAR (1<<MAT_ATTRIB_BACK_SPECULAR) +#define MAT_BIT_FRONT_EMISSION (1<<MAT_ATTRIB_FRONT_EMISSION) +#define MAT_BIT_BACK_EMISSION (1<<MAT_ATTRIB_BACK_EMISSION) +#define MAT_BIT_FRONT_SHININESS (1<<MAT_ATTRIB_FRONT_SHININESS) +#define MAT_BIT_BACK_SHININESS (1<<MAT_ATTRIB_BACK_SHININESS) +#define MAT_BIT_FRONT_INDEXES (1<<MAT_ATTRIB_FRONT_INDEXES) +#define MAT_BIT_BACK_INDEXES (1<<MAT_ATTRIB_BACK_INDEXES) + + +#define FRONT_MATERIAL_BITS (MAT_BIT_FRONT_EMISSION | \ + MAT_BIT_FRONT_AMBIENT | \ + MAT_BIT_FRONT_DIFFUSE | \ + MAT_BIT_FRONT_SPECULAR | \ + MAT_BIT_FRONT_SHININESS | \ + MAT_BIT_FRONT_INDEXES) + +#define BACK_MATERIAL_BITS (MAT_BIT_BACK_EMISSION | \ + MAT_BIT_BACK_AMBIENT | \ + MAT_BIT_BACK_DIFFUSE | \ + MAT_BIT_BACK_SPECULAR | \ + MAT_BIT_BACK_SHININESS | \ + MAT_BIT_BACK_INDEXES) + +#define ALL_MATERIAL_BITS (FRONT_MATERIAL_BITS | BACK_MATERIAL_BITS) +/*@}*/ + + +#define EXP_TABLE_SIZE 512 /**< Specular exponent lookup table sizes */ +#define SHINE_TABLE_SIZE 256 /**< Material shininess lookup table sizes */ + +/** + * Material shininess lookup table. + */ +struct gl_shine_tab +{ + struct gl_shine_tab *next, *prev; + GLfloat tab[SHINE_TABLE_SIZE+1]; + GLfloat shininess; + GLuint refcount; +}; + + +/** + * Light source state. + */ +struct gl_light +{ + struct gl_light *next; /**< double linked list with sentinel */ + struct gl_light *prev; + + GLfloat Ambient[4]; /**< ambient color */ + GLfloat Diffuse[4]; /**< diffuse color */ + GLfloat Specular[4]; /**< specular color */ + GLfloat EyePosition[4]; /**< position in eye coordinates */ + GLfloat SpotDirection[4]; /**< spotlight direction in eye coordinates */ + GLfloat SpotExponent; + GLfloat SpotCutoff; /**< in degrees */ + GLfloat _CosCutoffNeg; /**< = cos(SpotCutoff) */ + GLfloat _CosCutoff; /**< = MAX(0, cos(SpotCutoff)) */ + GLfloat ConstantAttenuation; + GLfloat LinearAttenuation; + GLfloat QuadraticAttenuation; + GLboolean Enabled; /**< On/off flag */ + + /** + * \name Derived fields + */ + /*@{*/ + GLbitfield _Flags; /**< State */ + + GLfloat _Position[4]; /**< position in eye/obj coordinates */ + GLfloat _VP_inf_norm[3]; /**< Norm direction to infinite light */ + GLfloat _h_inf_norm[3]; /**< Norm( _VP_inf_norm + <0,0,1> ) */ + GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */ + GLfloat _VP_inf_spot_attenuation; + + GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */ + GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */ + GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */ + GLfloat _MatSpecular[2][3]; /**< material spec * light specular */ + GLfloat _dli; /**< CI diffuse light intensity */ + GLfloat _sli; /**< CI specular light intensity */ + /*@}*/ +}; + + +/** + * Light model state. + */ +struct gl_lightmodel +{ + GLfloat Ambient[4]; /**< ambient color */ + GLboolean LocalViewer; /**< Local (or infinite) view point? */ + GLboolean TwoSide; /**< Two (or one) sided lighting? */ + GLenum ColorControl; /**< either GL_SINGLE_COLOR + * or GL_SEPARATE_SPECULAR_COLOR */ +}; + + +/** + * Material state. + */ +struct gl_material +{ + GLfloat Attrib[MAT_ATTRIB_MAX][4]; +}; + + +/** + * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT) + */ +struct gl_accum_attrib +{ + GLfloat ClearColor[4]; /**< Accumulation buffer clear color */ +}; + + +/** + * Color buffer attribute group (GL_COLOR_BUFFER_BIT). + */ +struct gl_colorbuffer_attrib +{ + GLuint ClearIndex; /**< Index to use for glClear */ + GLclampf ClearColor[4]; /**< Color to use for glClear */ + + GLuint IndexMask; /**< Color index write mask */ + GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */ + + GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */ + + /** + * \name alpha testing + */ + /*@{*/ + GLboolean AlphaEnabled; /**< Alpha test enabled flag */ + GLenum AlphaFunc; /**< Alpha test function */ + GLclampf AlphaRef; /**< Alpha reference value */ + /*@}*/ + + /** + * \name Blending + */ + /*@{*/ + GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */ + GLfloat BlendColor[4]; /**< Blending color */ + struct + { + GLenum SrcRGB; /**< RGB blend source term */ + GLenum DstRGB; /**< RGB blend dest term */ + GLenum SrcA; /**< Alpha blend source term */ + GLenum DstA; /**< Alpha blend dest term */ + GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */ + GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */ + } Blend[MAX_DRAW_BUFFERS]; + /** Are the blend func terms currently different for each buffer/target? */ + GLboolean _BlendFuncPerBuffer; + /** Are the blend equations currently different for each buffer/target? */ + GLboolean _BlendEquationPerBuffer; + /*@}*/ + + /** + * \name Logic op + */ + /*@{*/ + GLenum LogicOp; /**< Logic operator */ + GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */ + GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */ + GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */ + /*@}*/ + + GLboolean DitherFlag; /**< Dither enable flag */ + + GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ + GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */ + + GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */ +}; + + +/** + * Current attribute group (GL_CURRENT_BIT). + */ +struct gl_current_attrib +{ + /** + * \name Current vertex attributes. + * \note Values are valid only after FLUSH_VERTICES has been called. + * \note Index and Edgeflag current values are stored as floats in the + * SIX and SEVEN attribute slots. + */ + GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */ + + /** + * \name Current raster position attributes (always valid). + * \note This set of attributes is very similar to the SWvertex struct. + */ + /*@{*/ + GLfloat RasterPos[4]; + GLfloat RasterDistance; + GLfloat RasterColor[4]; + GLfloat RasterSecondaryColor[4]; + GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4]; + GLboolean RasterPosValid; + /*@}*/ +}; + + +/** + * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT). + */ +struct gl_depthbuffer_attrib +{ + GLenum Func; /**< Function for depth buffer compare */ + GLclampd Clear; /**< Value to clear depth buffer to */ + GLboolean Test; /**< Depth buffering enabled flag */ + GLboolean Mask; /**< Depth buffer writable? */ + GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */ + GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */ +}; + + +/** + * Evaluator attribute group (GL_EVAL_BIT). + */ +struct gl_eval_attrib +{ + /** + * \name Enable bits + */ + /*@{*/ + GLboolean Map1Color4; + GLboolean Map1Index; + GLboolean Map1Normal; + GLboolean Map1TextureCoord1; + GLboolean Map1TextureCoord2; + GLboolean Map1TextureCoord3; + GLboolean Map1TextureCoord4; + GLboolean Map1Vertex3; + GLboolean Map1Vertex4; + GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */ + GLboolean Map2Color4; + GLboolean Map2Index; + GLboolean Map2Normal; + GLboolean Map2TextureCoord1; + GLboolean Map2TextureCoord2; + GLboolean Map2TextureCoord3; + GLboolean Map2TextureCoord4; + GLboolean Map2Vertex3; + GLboolean Map2Vertex4; + GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */ + GLboolean AutoNormal; + /*@}*/ + + /** + * \name Map Grid endpoints and divisions and calculated du values + */ + /*@{*/ + GLint MapGrid1un; + GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du; + GLint MapGrid2un, MapGrid2vn; + GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du; + GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv; + /*@}*/ +}; + + +/** + * Fog attribute group (GL_FOG_BIT). + */ +struct gl_fog_attrib +{ + GLboolean Enabled; /**< Fog enabled flag */ + GLfloat Color[4]; /**< Fog color */ + GLfloat Density; /**< Density >= 0.0 */ + GLfloat Start; /**< Start distance in eye coords */ + GLfloat End; /**< End distance in eye coords */ + GLfloat Index; /**< Fog index */ + GLenum Mode; /**< Fog mode */ + GLboolean ColorSumEnabled; + GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */ + GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */ +}; + + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with + * a layout qualifier. + * + * \see enum ir_depth_layout + */ +enum gl_frag_depth_layout { + FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ + FRAG_DEPTH_LAYOUT_ANY, + FRAG_DEPTH_LAYOUT_GREATER, + FRAG_DEPTH_LAYOUT_LESS, + FRAG_DEPTH_LAYOUT_UNCHANGED +}; + + +/** + * Hint attribute group (GL_HINT_BIT). + * + * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE. + */ +struct gl_hint_attrib +{ + GLenum PerspectiveCorrection; + GLenum PointSmooth; + GLenum LineSmooth; + GLenum PolygonSmooth; + GLenum Fog; + GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */ + GLenum TextureCompression; /**< GL_ARB_texture_compression */ + GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */ + GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */ +}; + +/** + * Light state flags. + */ +/*@{*/ +#define LIGHT_SPOT 0x1 +#define LIGHT_LOCAL_VIEWER 0x2 +#define LIGHT_POSITIONAL 0x4 +#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER) +/*@}*/ + + +/** + * Lighting attribute group (GL_LIGHT_BIT). + */ +struct gl_light_attrib +{ + struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */ + struct gl_lightmodel Model; /**< Lighting model */ + + /** + * Must flush FLUSH_VERTICES before referencing: + */ + /*@{*/ + struct gl_material Material; /**< Includes front & back values */ + /*@}*/ + + GLboolean Enabled; /**< Lighting enabled flag */ + GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */ + GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */ + GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */ + GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */ + GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */ + GLboolean ColorMaterialEnabled; + GLenum ClampVertexColor; + + struct gl_light EnabledList; /**< List sentinel */ + + /** + * Derived state for optimizations: + */ + /*@{*/ + GLboolean _NeedEyeCoords; + GLboolean _NeedVertices; /**< Use fast shader? */ + GLbitfield _Flags; /**< LIGHT_* flags, see above */ + GLfloat _BaseColor[2][3]; + /*@}*/ +}; + + +/** + * Line attribute group (GL_LINE_BIT). + */ +struct gl_line_attrib +{ + GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */ + GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */ + GLushort StipplePattern; /**< Stipple pattern */ + GLint StippleFactor; /**< Stipple repeat factor */ + GLfloat Width; /**< Line width */ +}; + + +/** + * Display list attribute group (GL_LIST_BIT). + */ +struct gl_list_attrib +{ + GLuint ListBase; +}; + + +/** + * Multisample attribute group (GL_MULTISAMPLE_BIT). + */ +struct gl_multisample_attrib +{ + GLboolean Enabled; + GLboolean _Enabled; /**< true if Enabled and multisample buffer */ + GLboolean SampleAlphaToCoverage; + GLboolean SampleAlphaToOne; + GLboolean SampleCoverage; + GLfloat SampleCoverageValue; + GLboolean SampleCoverageInvert; +}; + + +/** + * A pixelmap (see glPixelMap) + */ +struct gl_pixelmap +{ + GLint Size; + GLfloat Map[MAX_PIXEL_MAP_TABLE]; + GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */ +}; + + +/** + * Collection of all pixelmaps + */ +struct gl_pixelmaps +{ + struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */ + struct gl_pixelmap GtoG; + struct gl_pixelmap BtoB; + struct gl_pixelmap AtoA; + struct gl_pixelmap ItoR; + struct gl_pixelmap ItoG; + struct gl_pixelmap ItoB; + struct gl_pixelmap ItoA; + struct gl_pixelmap ItoI; + struct gl_pixelmap StoS; +}; + + +/** + * Pixel attribute group (GL_PIXEL_MODE_BIT). + */ +struct gl_pixel_attrib +{ + GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */ + + /*--- Begin Pixel Transfer State ---*/ + /* Fields are in the order in which they're applied... */ + + /** Scale & Bias (index shift, offset) */ + /*@{*/ + GLfloat RedBias, RedScale; + GLfloat GreenBias, GreenScale; + GLfloat BlueBias, BlueScale; + GLfloat AlphaBias, AlphaScale; + GLfloat DepthBias, DepthScale; + GLint IndexShift, IndexOffset; + /*@}*/ + + /* Pixel Maps */ + /* Note: actual pixel maps are not part of this attrib group */ + GLboolean MapColorFlag; + GLboolean MapStencilFlag; + + /*--- End Pixel Transfer State ---*/ + + /** glPixelZoom */ + GLfloat ZoomX, ZoomY; + + /** GL_SGI_texture_color_table */ + GLfloat TextureColorTableScale[4]; /**< RGBA */ + GLfloat TextureColorTableBias[4]; /**< RGBA */ +}; + + +/** + * Point attribute group (GL_POINT_BIT). + */ +struct gl_point_attrib +{ + GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */ + GLfloat Size; /**< User-specified point size */ + GLfloat Params[3]; /**< GL_EXT_point_parameters */ + GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */ + GLfloat Threshold; /**< GL_EXT_point_parameters */ + GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */ + GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */ + GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/ + GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */ + GLenum SpriteOrigin; /**< GL_ARB_point_sprite */ +}; + + +/** + * Polygon attribute group (GL_POLYGON_BIT). + */ +struct gl_polygon_attrib +{ + GLenum FrontFace; /**< Either GL_CW or GL_CCW */ + GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ + GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */ + GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */ + GLboolean CullFlag; /**< Culling on/off flag */ + GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */ + GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */ + GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */ + GLfloat OffsetFactor; /**< Polygon offset factor, from user */ + GLfloat OffsetUnits; /**< Polygon offset units, from user */ + GLboolean OffsetPoint; /**< Offset in GL_POINT mode */ + GLboolean OffsetLine; /**< Offset in GL_LINE mode */ + GLboolean OffsetFill; /**< Offset in GL_FILL mode */ +}; + + +/** + * Scissor attributes (GL_SCISSOR_BIT). + */ +struct gl_scissor_attrib +{ + GLboolean Enabled; /**< Scissor test enabled? */ + GLint X, Y; /**< Lower left corner of box */ + GLsizei Width, Height; /**< Size of box */ +}; + + +/** + * Stencil attribute group (GL_STENCIL_BUFFER_BIT). + * + * Three sets of stencil data are tracked so that OpenGL 2.0, + * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported + * simultaneously. In each of the stencil state arrays, element 0 corresponds + * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 / + * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the + * GL_EXT_stencil_two_side GL_BACK state. + * + * The derived value \c _BackFace is either 1 or 2 depending on whether or + * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled. + * + * The derived value \c _TestTwoSide is set when the front-face and back-face + * stencil state are different. + */ +struct gl_stencil_attrib +{ + GLboolean Enabled; /**< Enabled flag */ + GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */ + GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */ + GLboolean _Enabled; /**< Enabled and stencil buffer present */ + GLboolean _TestTwoSide; + GLubyte _BackFace; /**< Current back stencil state (1 or 2) */ + GLenum Function[3]; /**< Stencil function */ + GLenum FailFunc[3]; /**< Fail function */ + GLenum ZPassFunc[3]; /**< Depth buffer pass function */ + GLenum ZFailFunc[3]; /**< Depth buffer fail function */ + GLint Ref[3]; /**< Reference value */ + GLuint ValueMask[3]; /**< Value mask */ + GLuint WriteMask[3]; /**< Write mask */ + GLuint Clear; /**< Clear value */ +}; + + +/** + * An index for each type of texture object. These correspond to the GL + * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc. + * Note: the order is from highest priority to lowest priority. + */ +typedef enum +{ + TEXTURE_2D_ARRAY_INDEX, + TEXTURE_1D_ARRAY_INDEX, + TEXTURE_CUBE_INDEX, + TEXTURE_3D_INDEX, + TEXTURE_RECT_INDEX, + TEXTURE_2D_INDEX, + TEXTURE_1D_INDEX, + NUM_TEXTURE_TARGETS +} gl_texture_index; + + +/** + * Bit flags for each type of texture object + * Used for Texture.Unit[]._ReallyEnabled flags. + */ +/*@{*/ +#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX) +#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX) +#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX) +#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX) +#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX) +#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX) +#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX) +/*@}*/ + + +/** + * TexGenEnabled flags. + */ +/*@{*/ +#define S_BIT 1 +#define T_BIT 2 +#define R_BIT 4 +#define Q_BIT 8 +#define STR_BITS (S_BIT | T_BIT | R_BIT) +/*@}*/ + + +/** + * Bit flag versions of the corresponding GL_ constants. + */ +/*@{*/ +#define TEXGEN_SPHERE_MAP 0x1 +#define TEXGEN_OBJ_LINEAR 0x2 +#define TEXGEN_EYE_LINEAR 0x4 +#define TEXGEN_REFLECTION_MAP_NV 0x8 +#define TEXGEN_NORMAL_MAP_NV 0x10 + +#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \ + TEXGEN_REFLECTION_MAP_NV | \ + TEXGEN_NORMAL_MAP_NV) +#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \ + TEXGEN_REFLECTION_MAP_NV | \ + TEXGEN_NORMAL_MAP_NV | \ + TEXGEN_EYE_LINEAR) +/*@}*/ + + + +/** Tex-gen enabled for texture unit? */ +#define ENABLE_TEXGEN(unit) (1 << (unit)) + +/** Non-identity texture matrix for texture unit? */ +#define ENABLE_TEXMAT(unit) (1 << (unit)) + + +/** + * Texel fetch function prototype. We use texel fetch functions to + * extract RGBA, color indexes and depth components out of 1D, 2D and 3D + * texture images. These functions help to isolate us from the gritty + * details of all the various texture image encodings. + * + * \param texImage texture image. + * \param col texel column. + * \param row texel row. + * \param img texel image level/layer. + * \param texelOut output texel (up to 4 GLchans) + */ +typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + GLchan *texelOut ); + +/** + * As above, but returns floats. + * Used for depth component images and for upcoming signed/float + * texture images. + */ +typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + GLfloat *texelOut ); + + +typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage, + GLint col, GLint row, GLint img, + const void *texel); + + +/** + * Texture image state. Describes the dimensions of a texture image, + * the texel format and pointers to Texel Fetch functions. + */ +struct gl_texture_image +{ + GLint InternalFormat; /**< Internal format as given by the user */ + GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA, + * GL_LUMINANCE, GL_LUMINANCE_ALPHA, + * GL_INTENSITY, GL_COLOR_INDEX, + * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT + * only. Used for choosing TexEnv arithmetic. + */ + gl_format TexFormat; /**< The actual texture memory format */ + + GLuint Border; /**< 0 or 1 */ + GLuint Width; /**< = 2^WidthLog2 + 2*Border */ + GLuint Height; /**< = 2^HeightLog2 + 2*Border */ + GLuint Depth; /**< = 2^DepthLog2 + 2*Border */ + GLuint Width2; /**< = Width - 2*Border */ + GLuint Height2; /**< = Height - 2*Border */ + GLuint Depth2; /**< = Depth - 2*Border */ + GLuint WidthLog2; /**< = log2(Width2) */ + GLuint HeightLog2; /**< = log2(Height2) */ + GLuint DepthLog2; /**< = log2(Depth2) */ + GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */ + GLfloat WidthScale; /**< used for mipmap LOD computation */ + GLfloat HeightScale; /**< used for mipmap LOD computation */ + GLfloat DepthScale; /**< used for mipmap LOD computation */ + GLboolean IsClientData; /**< Data owned by client? */ + GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */ + + struct gl_texture_object *TexObject; /**< Pointer back to parent object */ + + FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */ + FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */ + + GLuint RowStride; /**< Padded width in units of texels */ + GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to + each 2D slice in 'Data', in texels */ + GLvoid *Data; /**< Image data, accessed via FetchTexel() */ + + /** + * \name For device driver: + */ + /*@{*/ + void *DriverData; /**< Arbitrary device driver data */ + /*@}*/ +}; + + +/** + * Indexes for cube map faces. + */ +typedef enum +{ + FACE_POS_X = 0, + FACE_NEG_X = 1, + FACE_POS_Y = 2, + FACE_NEG_Y = 3, + FACE_POS_Z = 4, + FACE_NEG_Z = 5, + MAX_FACES = 6 +} gl_face_index; + + +/** + * Texture object state. Contains the array of mipmap images, border color, + * wrap modes, filter modes, shadow/texcompare state, and the per-texture + * color palette. + */ +struct gl_texture_object +{ + _glthread_Mutex Mutex; /**< for thread safety */ + GLint RefCount; /**< reference count */ + GLuint Name; /**< the user-visible texture object ID */ + GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */ + GLfloat Priority; /**< in [0,1] */ + union { + GLfloat f[4]; + GLuint ui[4]; + GLint i[4]; + } BorderColor; /**< Interpreted according to texture format */ + GLenum WrapS; /**< S-axis texture image wrap mode */ + GLenum WrapT; /**< T-axis texture image wrap mode */ + GLenum WrapR; /**< R-axis texture image wrap mode */ + GLenum MinFilter; /**< minification filter */ + GLenum MagFilter; /**< magnification filter */ + GLfloat MinLod; /**< min lambda, OpenGL 1.2 */ + GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */ + GLfloat LodBias; /**< OpenGL 1.4 */ + GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */ + GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */ + GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ + GLenum CompareMode; /**< GL_ARB_shadow */ + GLenum CompareFunc; /**< GL_ARB_shadow */ + GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */ + GLenum DepthMode; /**< GL_ARB_depth_texture */ + GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */ + GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */ + GLint CropRect[4]; /**< GL_OES_draw_texture */ + GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */ + GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */ + GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */ + GLboolean _Complete; /**< Is texture object complete? */ + GLboolean _RenderToTexture; /**< Any rendering to this texture? */ + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ + GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */ + + /** Actual texture images, indexed by [cube face] and [mipmap level] */ + struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS]; + + /** GL_EXT_paletted_texture */ + struct gl_color_table Palette; + + /** + * \name For device driver. + * Note: instead of attaching driver data to this pointer, it's preferable + * to instead use this struct as a base class for your own texture object + * class. Driver->NewTextureObject() can be used to implement the + * allocation. + */ + void *DriverData; /**< Arbitrary device driver data */ +}; + + +/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */ +#define MAX_COMBINER_TERMS 4 + + +/** + * Texture combine environment state. + */ +struct gl_tex_env_combine_state +{ + GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ + GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */ + /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */ + GLenum SourceRGB[MAX_COMBINER_TERMS]; + GLenum SourceA[MAX_COMBINER_TERMS]; + /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */ + GLenum OperandRGB[MAX_COMBINER_TERMS]; + GLenum OperandA[MAX_COMBINER_TERMS]; + GLuint ScaleShiftRGB; /**< 0, 1 or 2 */ + GLuint ScaleShiftA; /**< 0, 1 or 2 */ + GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */ + GLuint _NumArgsA; /**< Number of inputs used for the A combiner */ +}; + + +/** + * Texture coord generation state. + */ +struct gl_texgen +{ + GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */ + GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */ + GLfloat ObjectPlane[4]; + GLfloat EyePlane[4]; +}; + + +/** + * Texture unit state. Contains enable flags, texture environment/function/ + * combiners, texgen state, pointers to current texture objects and + * post-filter color tables. + */ +struct gl_texture_unit +{ + GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */ + GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */ + + GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */ + GLfloat EnvColor[4]; + + struct gl_texgen GenS; + struct gl_texgen GenT; + struct gl_texgen GenR; + struct gl_texgen GenQ; + GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */ + GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */ + + GLfloat LodBias; /**< for biasing mipmap levels */ + GLenum BumpTarget; + GLfloat RotMatrix[4]; /* 2x2 matrix */ + + /** + * \name GL_EXT_texture_env_combine + */ + struct gl_tex_env_combine_state Combine; + + /** + * Derived state based on \c EnvMode and the \c BaseFormat of the + * currently enabled texture. + */ + struct gl_tex_env_combine_state _EnvMode; + + /** + * Currently enabled combiner state. This will point to either + * \c Combine or \c _EnvMode. + */ + struct gl_tex_env_combine_state *_CurrentCombine; + + /** Current texture object pointers */ + struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS]; + + /** Points to highest priority, complete and enabled texture object */ + struct gl_texture_object *_Current; + + /** GL_SGI_texture_color_table */ + /*@{*/ + struct gl_color_table ColorTable; + struct gl_color_table ProxyColorTable; + GLboolean ColorTableEnabled; + /*@}*/ +}; + + +/** + * Texture attribute group (GL_TEXTURE_BIT). + */ +struct gl_texture_attrib +{ + GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */ + struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS]; + + struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS]; + + /** GL_ARB_seamless_cubemap */ + GLboolean CubeMapSeamless; + + /** GL_EXT_shared_texture_palette */ + GLboolean SharedPalette; + struct gl_color_table Palette; + + /** Texture units/samplers used by vertex or fragment texturing */ + GLbitfield _EnabledUnits; + + /** Texture coord units/sets used for fragment texturing */ + GLbitfield _EnabledCoordUnits; + + /** Texture coord units that have texgen enabled */ + GLbitfield _TexGenEnabled; + + /** Texture coord units that have non-identity matrices */ + GLbitfield _TexMatEnabled; + + /** Bitwise-OR of all Texture.Unit[i]._GenFlags */ + GLbitfield _GenFlags; +}; + + +/** + * Transformation attribute group (GL_TRANSFORM_BIT). + */ +struct gl_transform_attrib +{ + GLenum MatrixMode; /**< Matrix mode */ + GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */ + GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */ + GLbitfield ClipPlanesEnabled; /**< on/off bitmask */ + GLboolean Normalize; /**< Normalize all normals? */ + GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */ + GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */ + GLboolean DepthClamp; /**< GL_ARB_depth_clamp */ + + GLfloat CullEyePos[4]; + GLfloat CullObjPos[4]; +}; + + +/** + * Viewport attribute group (GL_VIEWPORT_BIT). + */ +struct gl_viewport_attrib +{ + GLint X, Y; /**< position */ + GLsizei Width, Height; /**< size */ + GLfloat Near, Far; /**< Depth buffer range */ + GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */ +}; + + +/** + * GL_ARB_vertex/pixel_buffer_object buffer object + */ +struct gl_buffer_object +{ + _glthread_Mutex Mutex; + GLint RefCount; + GLuint Name; + GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */ + GLsizeiptrARB Size; /**< Size of buffer storage in bytes */ + GLubyte *Data; /**< Location of storage either in RAM or VRAM. */ + /** Fields describing a mapped buffer */ + /*@{*/ + GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */ + GLvoid *Pointer; /**< User-space address of mapping */ + GLintptr Offset; /**< Mapped offset */ + GLsizeiptr Length; /**< Mapped length */ + /*@}*/ + GLboolean Written; /**< Ever written to? (for debugging) */ + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ +}; + + +/** + * Client pixel packing/unpacking attributes + */ +struct gl_pixelstore_attrib +{ + GLint Alignment; + GLint RowLength; + GLint SkipPixels; + GLint SkipRows; + GLint ImageHeight; + GLint SkipImages; + GLboolean SwapBytes; + GLboolean LsbFirst; + GLboolean ClientStorage; /**< GL_APPLE_client_storage */ + GLboolean Invert; /**< GL_MESA_pack_invert */ + struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */ +}; + + +/** + * Client vertex array attributes + */ +struct gl_client_array +{ + GLint Size; /**< components per element (1,2,3,4) */ + GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */ + GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */ + GLsizei Stride; /**< user-specified stride */ + GLsizei StrideB; /**< actual stride in bytes */ + const GLubyte *Ptr; /**< Points to array data */ + GLboolean Enabled; /**< Enabled flag is a boolean */ + GLboolean Normalized; /**< GL_ARB_vertex_program */ + GLboolean Integer; /**< Integer-valued? */ + GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */ + GLuint _ElementSize; /**< size of each element in bytes */ + + struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */ + GLuint _MaxElement; /**< max element index into array buffer + 1 */ +}; + + +/** + * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object + * extension, but a nice encapsulation in any case. + */ +struct gl_array_object +{ + /** Name of the array object as received from glGenVertexArrayAPPLE. */ + GLuint Name; + + GLint RefCount; + _glthread_Mutex Mutex; + GLboolean VBOonly; /**< require all arrays to live in VBOs? */ + + /** Conventional vertex arrays */ + /*@{*/ + struct gl_client_array Vertex; + struct gl_client_array Weight; + struct gl_client_array Normal; + struct gl_client_array Color; + struct gl_client_array SecondaryColor; + struct gl_client_array FogCoord; + struct gl_client_array Index; + struct gl_client_array EdgeFlag; + struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS]; + struct gl_client_array PointSize; + /*@}*/ + + /** + * Generic arrays for vertex programs/shaders. + * For NV vertex programs, these attributes alias and take priority + * over the conventional attribs above. For ARB vertex programs and + * GLSL vertex shaders, these attributes are separate. + */ + struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS]; + + /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */ + GLbitfield _Enabled; + + /** + * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs + * we can determine the max legal (in bounds) glDrawElements array index. + */ + GLuint _MaxElement; +}; + + +/** + * Vertex array state + */ +struct gl_array_attrib +{ + /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */ + struct gl_array_object *ArrayObj; + + /** The default vertex array object */ + struct gl_array_object *DefaultArrayObj; + + /** Array objects (GL_ARB/APPLE_vertex_array_object) */ + struct _mesa_HashTable *Objects; + + GLint ActiveTexture; /**< Client Active Texture */ + GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */ + GLuint LockCount; /**< GL_EXT_compiled_vertex_array */ + + /** GL 3.1 (slightly different from GL_NV_primitive_restart) */ + GLboolean PrimitiveRestart; + GLuint RestartIndex; + + GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */ + GLboolean RebindArrays; /**< whether the VBO module should rebind arrays */ + + /* GL_ARB_vertex_buffer_object */ + struct gl_buffer_object *ArrayBufferObj; + struct gl_buffer_object *ElementArrayBufferObj; +}; + + +/** + * Feedback buffer state + */ +struct gl_feedback +{ + GLenum Type; + GLbitfield _Mask; /**< FB_* bits */ + GLfloat *Buffer; + GLuint BufferSize; + GLuint Count; +}; + + +/** + * Selection buffer state + */ +struct gl_selection +{ + GLuint *Buffer; /**< selection buffer */ + GLuint BufferSize; /**< size of the selection buffer */ + GLuint BufferCount; /**< number of values in the selection buffer */ + GLuint Hits; /**< number of records in the selection buffer */ + GLuint NameStackDepth; /**< name stack depth */ + GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */ + GLboolean HitFlag; /**< hit flag */ + GLfloat HitMinZ; /**< minimum hit depth */ + GLfloat HitMaxZ; /**< maximum hit depth */ +}; + + +/** + * 1-D Evaluator control points + */ +struct gl_1d_map +{ + GLuint Order; /**< Number of control points */ + GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */ + GLfloat *Points; /**< Points to contiguous control points */ +}; + + +/** + * 2-D Evaluator control points + */ +struct gl_2d_map +{ + GLuint Uorder; /**< Number of control points in U dimension */ + GLuint Vorder; /**< Number of control points in V dimension */ + GLfloat u1, u2, du; + GLfloat v1, v2, dv; + GLfloat *Points; /**< Points to contiguous control points */ +}; + + +/** + * All evaluator control point state + */ +struct gl_evaluators +{ + /** + * \name 1-D maps + */ + /*@{*/ + struct gl_1d_map Map1Vertex3; + struct gl_1d_map Map1Vertex4; + struct gl_1d_map Map1Index; + struct gl_1d_map Map1Color4; + struct gl_1d_map Map1Normal; + struct gl_1d_map Map1Texture1; + struct gl_1d_map Map1Texture2; + struct gl_1d_map Map1Texture3; + struct gl_1d_map Map1Texture4; + struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */ + /*@}*/ + + /** + * \name 2-D maps + */ + /*@{*/ + struct gl_2d_map Map2Vertex3; + struct gl_2d_map Map2Vertex4; + struct gl_2d_map Map2Index; + struct gl_2d_map Map2Color4; + struct gl_2d_map Map2Normal; + struct gl_2d_map Map2Texture1; + struct gl_2d_map Map2Texture2; + struct gl_2d_map Map2Texture3; + struct gl_2d_map Map2Texture4; + struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */ + /*@}*/ +}; + + +/** + * Names of the various vertex/fragment program register files, etc. + * + * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c) + * All values should fit in a 4-bit field. + * + * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM, + * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to + * be "uniform" variables since they can only be set outside glBegin/End. + * They're also all stored in the same Parameters array. + */ +typedef enum +{ + PROGRAM_TEMPORARY, /**< machine->Temporary[] */ + PROGRAM_INPUT, /**< machine->Inputs[] */ + PROGRAM_OUTPUT, /**< machine->Outputs[] */ + PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */ + PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */ + PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */ + PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */ + PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */ + PROGRAM_CONSTANT, /**< gl_program->Parameters[] */ + PROGRAM_UNIFORM, /**< gl_program->Parameters[] */ + PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */ + PROGRAM_ADDRESS, /**< machine->AddressReg */ + PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */ + PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */ + PROGRAM_UNDEFINED, /**< Invalid/TBD value */ + PROGRAM_FILE_MAX +} gl_register_file; + + +/** + * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be + * one of these values. + */ +typedef enum +{ + SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */ + SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */ + SYSTEM_VALUE_MAX /**< Number of values */ +} gl_system_value; + + +/** Vertex and fragment instructions */ +struct prog_instruction; +struct gl_program_parameter_list; +struct gl_uniform_list; + + +/** + * Base class for any kind of program object + */ +struct gl_program +{ + GLuint Id; + GLubyte *String; /**< Null-terminated program text */ + GLint RefCount; + GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */ + GLenum Format; /**< String encoding format */ + GLboolean Resident; + + struct prog_instruction *Instructions; + + GLbitfield InputsRead; /**< Bitmask of which input regs are read */ + GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ + GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */ + GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ + GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ + GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ + GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */ + GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */ + + + /** Named parameters, constants, etc. from program text */ + struct gl_program_parameter_list *Parameters; + /** Numbered local parameters */ + GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4]; + + /** Vertex/fragment shader varying vars */ + struct gl_program_parameter_list *Varying; + /** Vertex program user-defined attributes */ + struct gl_program_parameter_list *Attributes; + + /** Map from sampler unit to texture unit (set by glUniform1i()) */ + GLubyte SamplerUnits[MAX_SAMPLERS]; + /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */ + gl_texture_index SamplerTargets[MAX_SAMPLERS]; + + /** Bitmask of which register files are read/written with indirect + * addressing. Mask of (1 << PROGRAM_x) bits. + */ + GLbitfield IndirectRegisterFiles; + + /** Logical counts */ + /*@{*/ + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint NumAttributes; + GLuint NumAddressRegs; + GLuint NumAluInstructions; + GLuint NumTexInstructions; + GLuint NumTexIndirections; + /*@}*/ + /** Native, actual h/w counts */ + /*@{*/ + GLuint NumNativeInstructions; + GLuint NumNativeTemporaries; + GLuint NumNativeParameters; + GLuint NumNativeAttributes; + GLuint NumNativeAddressRegs; + GLuint NumNativeAluInstructions; + GLuint NumNativeTexInstructions; + GLuint NumNativeTexIndirections; + /*@}*/ +}; + + +/** Vertex program object */ +struct gl_vertex_program +{ + struct gl_program Base; /**< base class */ + GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */ + GLboolean IsPositionInvariant; +}; + + +/** Geometry program object */ +struct gl_geometry_program +{ + struct gl_program Base; /**< base class */ + + GLint VerticesOut; + GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, + GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ + GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ +}; + + +/** Fragment program object */ +struct gl_fragment_program +{ + struct gl_program Base; /**< base class */ + GLenum FogOption; + GLboolean UsesKill; /**< shader uses KIL instruction */ + GLboolean OriginUpperLeft; + GLboolean PixelCenterInteger; + enum gl_frag_depth_layout FragDepthLayout; +}; + + +/** + * State common to vertex and fragment programs. + */ +struct gl_program_state +{ + GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */ + const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */ +}; + + +/** + * Context state for vertex programs. + */ +struct gl_vertex_program_state +{ + GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */ + GLboolean _Enabled; /**< Enabled and _valid_ user program? */ + GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */ + GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */ + struct gl_vertex_program *Current; /**< User-bound vertex program */ + + /** Currently enabled and valid vertex program (including internal + * programs, user-defined vertex programs and GLSL vertex shaders). + * This is the program we must use when rendering. + */ + struct gl_vertex_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /* For GL_NV_vertex_program only: */ + GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4]; + GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4]; + + /** Should fixed-function T&L be implemented with a vertex prog? */ + GLboolean _MaintainTnlProgram; + + /** Program to emulate fixed-function T&L (see above) */ + struct gl_vertex_program *_TnlProgram; + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; + + GLboolean _Overriden; +}; + + +/** + * Context state for geometry programs. + */ +struct gl_geometry_program_state +{ + GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */ + GLboolean _Enabled; /**< Enabled and valid program? */ + struct gl_geometry_program *Current; /**< user-bound geometry program */ + + /** Currently enabled and valid program (including internal programs + * and compiled shader programs). + */ + struct gl_geometry_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; +}; + +/** + * Context state for fragment programs. + */ +struct gl_fragment_program_state +{ + GLboolean Enabled; /**< User-set fragment program enable flag */ + GLboolean _Enabled; /**< Enabled and _valid_ user program? */ + struct gl_fragment_program *Current; /**< User-bound fragment program */ + + /** Currently enabled and valid fragment program (including internal + * programs, user-defined fragment programs and GLSL fragment shaders). + * This is the program we must use when rendering. + */ + struct gl_fragment_program *_Current; + + GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */ + + /** Should fixed-function texturing be implemented with a fragment prog? */ + GLboolean _MaintainTexEnvProgram; + + /** Program to emulate fixed-function texture env/combine (see above) */ + struct gl_fragment_program *_TexEnvProgram; + + /** Cache of fixed-function programs */ + struct gl_program_cache *Cache; +}; + + +/** + * ATI_fragment_shader runtime state + */ +#define ATI_FS_INPUT_PRIMARY 0 +#define ATI_FS_INPUT_SECONDARY 1 + +struct atifs_instruction; +struct atifs_setupinst; + +/** + * ATI fragment shader + */ +struct ati_fragment_shader +{ + GLuint Id; + GLint RefCount; + struct atifs_instruction *Instructions[2]; + struct atifs_setupinst *SetupInst[2]; + GLfloat Constants[8][4]; + GLbitfield LocalConstDef; /**< Indicates which constants have been set */ + GLubyte numArithInstr[2]; + GLubyte regsAssigned[2]; + GLubyte NumPasses; /**< 1 or 2 */ + GLubyte cur_pass; + GLubyte last_optype; + GLboolean interpinp1; + GLboolean isValid; + GLuint swizzlerq; +}; + +/** + * Context state for GL_ATI_fragment_shader + */ +struct gl_ati_fragment_shader_state +{ + GLboolean Enabled; + GLboolean _Enabled; /**< enabled and valid shader? */ + GLboolean Compiling; + GLfloat GlobalConstants[8][4]; + struct ati_fragment_shader *Current; +}; + + +/** + * Occlusion/timer query object. + */ +struct gl_query_object +{ + GLenum Target; /**< The query target, when active */ + GLuint Id; /**< hash table ID/name */ + GLuint64EXT Result; /**< the counter */ + GLboolean Active; /**< inside Begin/EndQuery */ + GLboolean Ready; /**< result is ready? */ +}; + + +/** + * Context state for query objects. + */ +struct gl_query_state +{ + struct _mesa_HashTable *QueryObjects; + struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */ + struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */ + + /** GL_NV_conditional_render */ + struct gl_query_object *CondRenderQuery; + + /** GL_EXT_transform_feedback */ + struct gl_query_object *PrimitivesGenerated; + struct gl_query_object *PrimitivesWritten; + + /** GL_ARB_timer_query */ + struct gl_query_object *TimeElapsed; + + GLenum CondRenderMode; +}; + + +/** Sync object state */ +struct gl_sync_object { + struct simple_node link; + GLenum Type; /**< GL_SYNC_FENCE */ + GLuint Name; /**< Fence name */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; /**< Object was deleted while there were still + * live references (e.g., sync not yet finished) + */ + GLenum SyncCondition; + GLbitfield Flags; /**< Flags passed to glFenceSync */ + GLuint StatusFlag:1; /**< Has the sync object been signaled? */ +}; + + +/** Set by #pragma directives */ +struct gl_sl_pragmas +{ + GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */ + GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */ + GLboolean Optimize; /**< defaults on */ + GLboolean Debug; /**< defaults off */ +}; + + +/** + * A GLSL vertex or fragment shader object. + */ +struct gl_shader +{ + GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */ + GLuint Name; /**< AKA the handle */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; + GLboolean CompileStatus; + const GLchar *Source; /**< Source code string */ + GLuint SourceChecksum; /**< for debug/logging purposes */ + struct gl_program *Program; /**< Post-compile assembly code */ + GLchar *InfoLog; + struct gl_sl_pragmas Pragmas; + + unsigned Version; /**< GLSL version used for linking */ + + struct exec_list *ir; + struct glsl_symbol_table *symbols; + + /** Shaders containing built-in functions that are used for linking. */ + struct gl_shader *builtins_to_link[16]; + unsigned num_builtins_to_link; +}; + + +/** + * A GLSL program object. + * Basically a linked collection of vertex and fragment shaders. + */ +struct gl_shader_program +{ + GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */ + GLuint Name; /**< aka handle or ID */ + GLint RefCount; /**< Reference count */ + GLboolean DeletePending; + + GLuint NumShaders; /**< number of attached shaders */ + struct gl_shader **Shaders; /**< List of attached the shaders */ + + /** User-defined attribute bindings (glBindAttribLocation) */ + struct gl_program_parameter_list *Attributes; + + /** Transform feedback varyings */ + struct { + GLenum BufferMode; + GLuint NumVarying; + GLchar **VaryingNames; /**< Array [NumVarying] of char * */ + } TransformFeedback; + + /** Geometry shader state - copied into gl_geometry_program at link time */ + struct { + GLint VerticesOut; + GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB, + GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */ + GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */ + } Geom; + + /* post-link info: */ + struct gl_vertex_program *VertexProgram; /**< Linked vertex program */ + struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */ + struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */ + struct gl_uniform_list *Uniforms; + struct gl_program_parameter_list *Varying; + GLboolean LinkStatus; /**< GL_LINK_STATUS */ + GLboolean Validated; + GLboolean _Used; /**< Ever used for drawing? */ + GLchar *InfoLog; + + unsigned Version; /**< GLSL version used for linking */ + + /** + * Per-stage shaders resulting from the first stage of linking. + * + * Set of linked shaders for this program. The array is accessed using the + * \c MESA_SHADER_* defines. Entries for non-existent stages will be + * \c NULL. + */ + struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES]; +}; + + +#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */ +#define GLSL_LOG 0x2 /**< Write shaders to files */ +#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */ +#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */ +#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */ +#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */ +#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */ +#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */ + + +/** + * Context state for GLSL vertex/fragment shaders. + */ +struct gl_shader_state +{ + /** + * Programs used for rendering + * + * There is a separate program set for each shader stage. If + * GL_EXT_separate_shader_objects is not supported, each of these must point + * to \c NULL or to the same program. + */ + struct gl_shader_program *CurrentVertexProgram; + struct gl_shader_program *CurrentGeometryProgram; + struct gl_shader_program *CurrentFragmentProgram; + + /** + * Program used by glUniform calls. + * + * Explicitly set by \c glUseProgram and \c glActiveProgramEXT. + */ + struct gl_shader_program *ActiveProgram; + + void *MemPool; + + GLbitfield Flags; /**< Mask of GLSL_x flags */ +}; + +/** + * Compiler options for a single GLSL shaders type + */ +struct gl_shader_compiler_options +{ + /** Driver-selectable options: */ + GLboolean EmitCondCodes; /**< Use condition codes? */ + GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */ + /** + * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't + * support control flow. + */ + GLboolean EmitNoIfs; + GLboolean EmitNoLoops; + GLboolean EmitNoFunctions; + GLboolean EmitNoCont; /**< Emit CONT opcode? */ + GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ + GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ + GLboolean EmitNoPow; /**< Emit POW opcodes? */ + + /** + * \name Forms of indirect addressing the driver cannot do. + */ + /*@{*/ + GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */ + GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */ + GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */ + GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */ + /*@}*/ + + GLuint MaxUnrollIterations; + + struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */ +}; + +/** + * Transform feedback object state + */ +struct gl_transform_feedback_object +{ + GLuint Name; /**< AKA the object ID */ + GLint RefCount; + GLboolean Active; /**< Is transform feedback enabled? */ + GLboolean Paused; /**< Is transform feedback paused? */ + + /** The feedback buffers */ + GLuint BufferNames[MAX_FEEDBACK_ATTRIBS]; + struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS]; + + /** Start of feedback data in dest buffer */ + GLintptr Offset[MAX_FEEDBACK_ATTRIBS]; + /** Max data to put into dest buffer (in bytes) */ + GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS]; +}; + + +/** + * Context state for transform feedback. + */ +struct gl_transform_feedback +{ + GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */ + + GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */ + + /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */ + struct gl_buffer_object *CurrentBuffer; + + /** The table of all transform feedback objects */ + struct _mesa_HashTable *Objects; + + /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */ + struct gl_transform_feedback_object *CurrentObject; + + /** The default xform-fb object (Name==0) */ + struct gl_transform_feedback_object *DefaultObject; +}; + + + +/** + * State which can be shared by multiple contexts: + */ +struct gl_shared_state +{ + _glthread_Mutex Mutex; /**< for thread safety */ + GLint RefCount; /**< Reference count */ + struct _mesa_HashTable *DisplayList; /**< Display lists hash table */ + struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */ + + /** Default texture objects (shared by all texture units) */ + struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS]; + + /** Fallback texture used when a bound texture is incomplete */ + struct gl_texture_object *FallbackTex; + + /** + * \name Thread safety and statechange notification for texture + * objects. + * + * \todo Improve the granularity of locking. + */ + /*@{*/ + _glthread_Mutex TexMutex; /**< texobj thread safety */ + GLuint TextureStateStamp; /**< state notification for shared tex */ + /*@}*/ + + /** Default buffer object for vertex arrays that aren't in VBOs */ + struct gl_buffer_object *NullBufferObj; + + /** + * \name Vertex/geometry/fragment programs + */ + /*@{*/ + struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */ + struct gl_vertex_program *DefaultVertexProgram; + struct gl_fragment_program *DefaultFragmentProgram; + struct gl_geometry_program *DefaultGeometryProgram; + /*@}*/ + + /* GL_ATI_fragment_shader */ + struct _mesa_HashTable *ATIShaders; + struct ati_fragment_shader *DefaultFragmentShader; + + struct _mesa_HashTable *BufferObjects; + + /** Table of both gl_shader and gl_shader_program objects */ + struct _mesa_HashTable *ShaderObjects; + + /* GL_EXT_framebuffer_object */ + struct _mesa_HashTable *RenderBuffers; + struct _mesa_HashTable *FrameBuffers; + + /* GL_ARB_sync */ + struct simple_node SyncObjects; + + void *DriverData; /**< Device driver shared state */ +}; + + + + +/** + * A renderbuffer stores colors or depth values or stencil values. + * A framebuffer object will have a collection of these. + * Data are read/written to the buffer with a handful of Get/Put functions. + * + * Instances of this object are allocated with the Driver's NewRenderbuffer + * hook. Drivers will likely wrap this class inside a driver-specific + * class to simulate inheritance. + */ +struct gl_renderbuffer +{ +#define RB_MAGIC 0xaabbccdd + int Magic; /** XXX TEMPORARY DEBUG INFO */ + _glthread_Mutex Mutex; /**< for thread safety */ + GLuint ClassID; /**< Useful for drivers */ + GLuint Name; + GLint RefCount; + GLuint Width, Height; + GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */ + + GLenum InternalFormat; /**< The user-specified format */ + GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or + GL_STENCIL_INDEX. */ + gl_format Format; /**< The actual renderbuffer memory format */ + + GLubyte NumSamples; + + GLenum DataType; /**< Type of values passed to the Get/Put functions */ + GLvoid *Data; /**< This may not be used by some kinds of RBs */ + + /* Used to wrap one renderbuffer around another: */ + struct gl_renderbuffer *Wrapped; + + /* Delete this renderbuffer */ + void (*Delete)(struct gl_renderbuffer *rb); + + /* Allocate new storage for this renderbuffer */ + GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height); + + /* Lock/Unlock are called before/after calling the Get/Put functions. + * Not sure this is the right place for these yet. + void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb); + void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb); + */ + + /* Return a pointer to the element/pixel at (x,y). + * Should return NULL if the buffer memory can't be directly addressed. + */ + void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLint x, GLint y); + + /* Get/Read a row of values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, void *values); + + /* Get/Read values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + const GLint x[], const GLint y[], void *values); + + /* Put/Write a row of values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask); + + /* Put/Write a row of RGB values. This is a special-case routine that's + * only used for RGBA renderbuffers when the source data is GL_RGB. That's + * a common case for glDrawPixels and some triangle routines. + * The values will be of format GL_RGB and type DataType. + */ + void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *values, const GLubyte *mask); + + + /* Put/Write a row of identical values. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + GLint x, GLint y, const void *value, const GLubyte *mask); + + /* Put/Write values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count, + const GLint x[], const GLint y[], const void *values, + const GLubyte *mask); + /* Put/Write identical values at arbitrary locations. + * The values will be of format _BaseFormat and type DataType. + */ + void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLuint count, const GLint x[], const GLint y[], + const void *value, const GLubyte *mask); +}; + + +/** + * A renderbuffer attachment points to either a texture object (and specifies + * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer. + */ +struct gl_renderbuffer_attachment +{ + GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */ + GLboolean Complete; + + /** + * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the + * application supplied renderbuffer object. + */ + struct gl_renderbuffer *Renderbuffer; + + /** + * If \c Type is \c GL_TEXTURE, this stores a pointer to the application + * supplied texture object. + */ + struct gl_texture_object *Texture; + GLuint TextureLevel; /**< Attached mipmap level. */ + GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */ + GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D + * and 2D array textures */ +}; + + +/** + * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc). + * In C++ terms, think of this as a base class from which device drivers + * will make derived classes. + */ +struct gl_framebuffer +{ + _glthread_Mutex Mutex; /**< for thread safety */ + /** + * If zero, this is a window system framebuffer. If non-zero, this + * is a FBO framebuffer; note that for some devices (i.e. those with + * a natural pixel coordinate system for FBOs that differs from the + * OpenGL/Mesa coordinate system), this means that the viewport, + * polygon face orientation, and polygon stipple will have to be inverted. + */ + GLuint Name; + + GLint RefCount; + GLboolean DeletePending; + + /** + * The framebuffer's visual. Immutable if this is a window system buffer. + * Computed from attachments if user-made FBO. + */ + struct gl_config Visual; + + GLboolean Initialized; + + GLuint Width, Height; /**< size of frame buffer in pixels */ + + /** \name Drawing bounds (Intersection of buffer size and scissor box) */ + /*@{*/ + GLint _Xmin, _Xmax; /**< inclusive */ + GLint _Ymin, _Ymax; /**< exclusive */ + /*@}*/ + + /** \name Derived Z buffer stuff */ + /*@{*/ + GLuint _DepthMax; /**< Max depth buffer value */ + GLfloat _DepthMaxF; /**< Float max depth buffer value */ + GLfloat _MRD; /**< minimum resolvable difference in Z values */ + /*@}*/ + + /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */ + GLenum _Status; + + /** Integer color values */ + GLboolean _IntegerColor; + + /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */ + struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT]; + + /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER + * attribute group and GL_PIXEL attribute group, respectively. + */ + GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS]; + GLenum ColorReadBuffer; + + /** Computed from ColorDraw/ReadBuffer above */ + GLuint _NumColorDrawBuffers; + GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */ + GLint _ColorReadBufferIndex; /* -1 = None */ + struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS]; + struct gl_renderbuffer *_ColorReadBuffer; + + /** The Actual depth/stencil buffers to use. May be wrappers around the + * depth/stencil buffers attached above. */ + struct gl_renderbuffer *_DepthBuffer; + struct gl_renderbuffer *_StencilBuffer; + + /** Delete this framebuffer */ + void (*Delete)(struct gl_framebuffer *fb); +}; + + +/** + * Precision info for shader datatypes. See glGetShaderPrecisionFormat(). + */ +struct gl_precision +{ + GLushort RangeMin; /**< min value exponent */ + GLushort RangeMax; /**< max value exponent */ + GLushort Precision; /**< number of mantissa bits */ +}; + + +/** + * Limits for vertex and fragment programs/shaders. + */ +struct gl_program_constants +{ + /* logical limits */ + GLuint MaxInstructions; + GLuint MaxAluInstructions; + GLuint MaxTexInstructions; + GLuint MaxTexIndirections; + GLuint MaxAttribs; + GLuint MaxTemps; + GLuint MaxAddressRegs; + GLuint MaxParameters; + GLuint MaxLocalParams; + GLuint MaxEnvParams; + /* native/hardware limits */ + GLuint MaxNativeInstructions; + GLuint MaxNativeAluInstructions; + GLuint MaxNativeTexInstructions; + GLuint MaxNativeTexIndirections; + GLuint MaxNativeAttribs; + GLuint MaxNativeTemps; + GLuint MaxNativeAddressRegs; + GLuint MaxNativeParameters; + /* For shaders */ + GLuint MaxUniformComponents; + /* GL_ARB_geometry_shader4 */ + GLuint MaxGeometryTextureImageUnits; + GLuint MaxGeometryVaryingComponents; + GLuint MaxVertexVaryingComponents; + GLuint MaxGeometryUniformComponents; + GLuint MaxGeometryOutputVertices; + GLuint MaxGeometryTotalOutputComponents; + /* ES 2.0 and GL_ARB_ES2_compatibility */ + struct gl_precision LowFloat, MediumFloat, HighFloat; + struct gl_precision LowInt, MediumInt, HighInt; +}; + + +/** + * Constants which may be overridden by device driver during context creation + * but are never changed after that. + */ +struct gl_constants +{ + GLint MaxTextureMbytes; /**< Max memory per image, in MB */ + GLint MaxTextureLevels; /**< Max mipmap levels. */ + GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */ + GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */ + GLint MaxArrayTextureLayers; /**< Max layers in array textures */ + GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */ + GLuint MaxTextureCoordUnits; + GLuint MaxTextureImageUnits; + GLuint MaxVertexTextureImageUnits; + GLuint MaxCombinedTextureImageUnits; + GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */ + GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */ + GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */ + + GLuint MaxArrayLockSize; + + GLint SubPixelBits; + + GLfloat MinPointSize, MaxPointSize; /**< aliased */ + GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */ + GLfloat PointSizeGranularity; + GLfloat MinLineWidth, MaxLineWidth; /**< aliased */ + GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */ + GLfloat LineWidthGranularity; + + GLuint MaxColorTableSize; + + GLuint MaxClipPlanes; + GLuint MaxLights; + GLfloat MaxShininess; /**< GL_NV_light_max_exponent */ + GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */ + + GLuint MaxViewportWidth, MaxViewportHeight; + + struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */ + struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */ + struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */ + GLuint MaxProgramMatrices; + GLuint MaxProgramMatrixStackDepth; + + /** vertex array / buffer object bounds checking */ + GLboolean CheckArrayBounds; + + GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */ + + GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */ + GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */ + GLuint MaxSamples; /**< GL_ARB_framebuffer_object */ + + GLuint MaxVarying; /**< Number of float[4] varying parameters */ + + GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */ + + /** Which texture units support GL_ATI_envmap_bumpmap as targets */ + GLbitfield SupportedBumpUnits; + + /** + * Maximum amount of time, measured in nanseconds, that the server can wait. + */ + GLuint64 MaxServerWaitTimeout; + + /** GL_EXT_provoking_vertex */ + GLboolean QuadsFollowProvokingVertexConvention; + + /** OpenGL version 3.0 */ + GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */ + + /** OpenGL version 3.2 */ + GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */ + + /** GL_EXT_transform_feedback */ + GLuint MaxTransformFeedbackSeparateAttribs; + GLuint MaxTransformFeedbackSeparateComponents; + GLuint MaxTransformFeedbackInterleavedComponents; + + /** GL_EXT_gpu_shader4 */ + GLint MinProgramTexelOffset, MaxProgramTexelOffset; + + /* GL_EXT_framebuffer_sRGB */ + GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */ +}; + + +/** + * Enable flag for each OpenGL extension. Different device drivers will + * enable different extensions at runtime. + */ +struct gl_extensions +{ + GLboolean dummy; /* don't remove this! */ + GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */ + GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */ + GLboolean ARB_ES2_compatibility; + GLboolean ARB_blend_func_extended; + GLboolean ARB_copy_buffer; + GLboolean ARB_depth_buffer_float; + GLboolean ARB_depth_clamp; + GLboolean ARB_depth_texture; + GLboolean ARB_draw_buffers; + GLboolean ARB_draw_buffers_blend; + GLboolean ARB_draw_elements_base_vertex; + GLboolean ARB_draw_instanced; + GLboolean ARB_fragment_coord_conventions; + GLboolean ARB_fragment_program; + GLboolean ARB_fragment_program_shadow; + GLboolean ARB_fragment_shader; + GLboolean ARB_framebuffer_object; + GLboolean ARB_explicit_attrib_location; + GLboolean ARB_geometry_shader4; + GLboolean ARB_half_float_pixel; + GLboolean ARB_half_float_vertex; + GLboolean ARB_instanced_arrays; + GLboolean ARB_map_buffer_range; + GLboolean ARB_multisample; + GLboolean ARB_multitexture; + GLboolean ARB_occlusion_query; + GLboolean ARB_occlusion_query2; + GLboolean ARB_point_sprite; + GLboolean ARB_sampler_objects; + GLboolean ARB_seamless_cube_map; + GLboolean ARB_shader_objects; + GLboolean ARB_shader_stencil_export; + GLboolean ARB_shading_language_100; + GLboolean ARB_shadow; + GLboolean ARB_shadow_ambient; + GLboolean ARB_sync; + GLboolean ARB_texture_border_clamp; + GLboolean ARB_texture_buffer_object; + GLboolean ARB_texture_compression; + GLboolean ARB_texture_compression_rgtc; + GLboolean ARB_texture_cube_map; + GLboolean ARB_texture_env_combine; + GLboolean ARB_texture_env_crossbar; + GLboolean ARB_texture_env_dot3; + GLboolean ARB_texture_float; + GLboolean ARB_texture_mirrored_repeat; + GLboolean ARB_texture_multisample; + GLboolean ARB_texture_non_power_of_two; + GLboolean ARB_texture_rg; + GLboolean ARB_texture_rgb10_a2ui; + GLboolean ARB_timer_query; + GLboolean ARB_transform_feedback2; + GLboolean ARB_transpose_matrix; + GLboolean ARB_uniform_buffer_object; + GLboolean ARB_vertex_array_object; + GLboolean ARB_vertex_buffer_object; + GLboolean ARB_vertex_program; + GLboolean ARB_vertex_shader; + GLboolean ARB_vertex_type_2_10_10_10_rev; + GLboolean ARB_window_pos; + GLboolean EXT_abgr; + GLboolean EXT_bgra; + GLboolean EXT_blend_color; + GLboolean EXT_blend_equation_separate; + GLboolean EXT_blend_func_separate; + GLboolean EXT_blend_logic_op; + GLboolean EXT_blend_minmax; + GLboolean EXT_blend_subtract; + GLboolean EXT_clip_volume_hint; + GLboolean EXT_compiled_vertex_array; + GLboolean EXT_copy_texture; + GLboolean EXT_depth_bounds_test; + GLboolean EXT_draw_buffers2; + GLboolean EXT_draw_range_elements; + GLboolean EXT_fog_coord; + GLboolean EXT_framebuffer_blit; + GLboolean EXT_framebuffer_multisample; + GLboolean EXT_framebuffer_object; + GLboolean EXT_framebuffer_sRGB; + GLboolean EXT_gpu_program_parameters; + GLboolean EXT_gpu_shader4; + GLboolean EXT_multi_draw_arrays; + GLboolean EXT_paletted_texture; + GLboolean EXT_packed_depth_stencil; + GLboolean EXT_packed_float; + GLboolean EXT_packed_pixels; + GLboolean EXT_pixel_buffer_object; + GLboolean EXT_point_parameters; + GLboolean EXT_polygon_offset; + GLboolean EXT_provoking_vertex; + GLboolean EXT_rescale_normal; + GLboolean EXT_shadow_funcs; + GLboolean EXT_secondary_color; + GLboolean EXT_separate_shader_objects; + GLboolean EXT_separate_specular_color; + GLboolean EXT_shared_texture_palette; + GLboolean EXT_stencil_wrap; + GLboolean EXT_stencil_two_side; + GLboolean EXT_subtexture; + GLboolean EXT_texture; + GLboolean EXT_texture_object; + GLboolean EXT_texture3D; + GLboolean EXT_texture_array; + GLboolean EXT_texture_compression_s3tc; + GLboolean EXT_texture_env_add; + GLboolean EXT_texture_env_combine; + GLboolean EXT_texture_env_dot3; + GLboolean EXT_texture_filter_anisotropic; + GLboolean EXT_texture_integer; + GLboolean EXT_texture_lod_bias; + GLboolean EXT_texture_mirror_clamp; + GLboolean EXT_texture_shared_exponent; + GLboolean EXT_texture_sRGB; + GLboolean EXT_texture_sRGB_decode; + GLboolean EXT_texture_swizzle; + GLboolean EXT_transform_feedback; + GLboolean EXT_timer_query; + GLboolean EXT_vertex_array; + GLboolean EXT_vertex_array_bgra; + GLboolean EXT_vertex_array_set; + GLboolean OES_standard_derivatives; + /* vendor extensions */ + GLboolean AMD_conservative_depth; + GLboolean APPLE_client_storage; + GLboolean APPLE_packed_pixels; + GLboolean APPLE_vertex_array_object; + GLboolean APPLE_object_purgeable; + GLboolean ATI_envmap_bumpmap; + GLboolean ATI_texture_mirror_once; + GLboolean ATI_texture_env_combine3; + GLboolean ATI_fragment_shader; + GLboolean ATI_separate_stencil; + GLboolean IBM_rasterpos_clip; + GLboolean IBM_multimode_draw_arrays; + GLboolean MESA_pack_invert; + GLboolean MESA_resize_buffers; + GLboolean MESA_ycbcr_texture; + GLboolean MESA_texture_array; + GLboolean MESA_texture_signed_rgba; + GLboolean NV_blend_square; + GLboolean NV_conditional_render; + GLboolean NV_fragment_program; + GLboolean NV_fragment_program_option; + GLboolean NV_light_max_exponent; + GLboolean NV_point_sprite; + GLboolean NV_primitive_restart; + GLboolean NV_texgen_reflection; + GLboolean NV_texture_env_combine4; + GLboolean NV_texture_rectangle; + GLboolean NV_vertex_program; + GLboolean NV_vertex_program1_1; + GLboolean OES_read_format; + GLboolean SGI_texture_color_table; + GLboolean SGIS_generate_mipmap; + GLboolean SGIS_texture_edge_clamp; + GLboolean SGIS_texture_lod; + GLboolean TDFX_texture_compression_FXT1; + GLboolean S3_s3tc; + GLboolean OES_EGL_image; + GLboolean OES_draw_texture; + GLboolean EXT_texture_format_BGRA8888; + GLboolean extension_sentinel; + /** The extension string */ + const GLubyte *String; + /** Number of supported extensions */ + GLuint Count; +}; + + +/** + * A stack of matrices (projection, modelview, color, texture, etc). + */ +struct gl_matrix_stack +{ + GLmatrix *Top; /**< points into Stack */ + GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */ + GLuint Depth; /**< 0 <= Depth < MaxDepth */ + GLuint MaxDepth; /**< size of Stack[] array */ + GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */ +}; + + +/** + * \name Bits for image transfer operations + * \sa __struct gl_contextRec::ImageTransferState. + */ +/*@{*/ +#define IMAGE_SCALE_BIAS_BIT 0x1 +#define IMAGE_SHIFT_OFFSET_BIT 0x2 +#define IMAGE_MAP_COLOR_BIT 0x4 +#define IMAGE_CLAMP_BIT 0x800 + + +/** Pixel Transfer ops */ +#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \ + IMAGE_SHIFT_OFFSET_BIT | \ + IMAGE_MAP_COLOR_BIT) + +/** + * \name Bits to indicate what state has changed. + */ +/*@{*/ +#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */ +#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */ +#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */ +#define _NEW_COLOR (1 << 3) /**< gl_context::Color */ +#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */ +#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */ +#define _NEW_FOG (1 << 6) /**< gl_context::Fog */ +#define _NEW_HINT (1 << 7) /**< gl_context::Hint */ +#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */ +#define _NEW_LINE (1 << 9) /**< gl_context::Line */ +#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */ +#define _NEW_POINT (1 << 11) /**< gl_context::Point */ +#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */ +#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */ +#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */ +#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */ +#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */ +#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */ +#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */ +#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */ +#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */ +#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */ +#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */ +#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */ +#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */ +#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */ +#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */ +#define _NEW_PROGRAM_CONSTANTS (1 << 27) +#define _NEW_BUFFER_OBJECT (1 << 28) +#define _NEW_ALL ~0 +/*@}*/ + + +/** + * \name Bits to track array state changes + * + * Also used to summarize array enabled. + */ +/*@{*/ +#define _NEW_ARRAY_VERTEX VERT_BIT_POS +#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT +#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL +#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0 +#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1 +#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG +#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX +#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG +#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */ +#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0 +#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1 +#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2 +#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3 +#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4 +#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5 +#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6 +#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7 +#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */ +#define _NEW_ARRAY_ALL 0xffffffff + + +#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i)) +#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i)) +/*@}*/ + + + +/** + * \name A bunch of flags that we think might be useful to drivers. + * + * Set in the __struct gl_contextRec::_TriangleCaps bitfield. + */ +/*@{*/ +#define DD_FLATSHADE 0x1 +#define DD_SEPARATE_SPECULAR 0x2 +#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */ +#define DD_TRI_LIGHT_TWOSIDE 0x8 +#define DD_TRI_UNFILLED 0x10 +#define DD_TRI_SMOOTH 0x20 +#define DD_TRI_STIPPLE 0x40 +#define DD_TRI_OFFSET 0x80 +#define DD_LINE_SMOOTH 0x100 +#define DD_LINE_STIPPLE 0x200 +#define DD_POINT_SMOOTH 0x400 +#define DD_POINT_ATTEN 0x800 +#define DD_TRI_TWOSTENCIL 0x1000 +/*@}*/ + + +/** + * \name Define the state changes under which each of these bits might change + */ +/*@{*/ +#define _DD_NEW_FLATSHADE _NEW_LIGHT +#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM) +#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON +#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT +#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON +#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON +#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON +#define _DD_NEW_TRI_OFFSET _NEW_POLYGON +#define _DD_NEW_LINE_SMOOTH _NEW_LINE +#define _DD_NEW_LINE_STIPPLE _NEW_LINE +#define _DD_NEW_LINE_WIDTH _NEW_LINE +#define _DD_NEW_POINT_SMOOTH _NEW_POINT +#define _DD_NEW_POINT_SIZE _NEW_POINT +#define _DD_NEW_POINT_ATTEN _NEW_POINT +/*@}*/ + + +/** + * Composite state flags + */ +/*@{*/ +#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \ + _NEW_TEXTURE | \ + _NEW_POINT | \ + _NEW_PROGRAM | \ + _NEW_MODELVIEW) + +#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \ + _NEW_TEXTURE) + +#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL) +/*@}*/ + + + + +/* This has to be included here. */ +#include "dd.h" + + +/** + * Display list flags. + * Strictly this is a tnl-private concept, but it doesn't seem + * worthwhile adding a tnl private structure just to hold this one bit + * of information: + */ +#define DLIST_DANGLING_REFS 0x1 + + +/** Opaque declaration of display list payload data type */ +union gl_dlist_node; + + +/** + * Provide a location where information about a display list can be + * collected. Could be extended with driverPrivate structures, + * etc. in the future. + */ +struct gl_display_list +{ + GLuint Name; + GLbitfield Flags; /**< DLIST_x flags */ + /** The dlist commands are in a linked list of nodes */ + union gl_dlist_node *Head; +}; + + +/** + * State used during display list compilation and execution. + */ +struct gl_dlist_state +{ + GLuint CallDepth; /**< Current recursion calling depth */ + + struct gl_display_list *CurrentList; /**< List currently being compiled */ + union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */ + GLuint CurrentPos; /**< Index into current block of nodes */ + + GLvertexformat ListVtxfmt; + + GLubyte ActiveAttribSize[VERT_ATTRIB_MAX]; + GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4]; + + GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX]; + GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4]; + + GLubyte ActiveIndex; + GLfloat CurrentIndex; + + GLubyte ActiveEdgeFlag; + GLboolean CurrentEdgeFlag; + + struct { + /* State known to have been set by the currently-compiling display + * list. Used to eliminate some redundant state changes. + */ + GLenum ShadeModel; + } Current; +}; + + +/** + * Enum for the OpenGL APIs we know about and may support. + */ +typedef enum +{ + API_OPENGL, + API_OPENGLES, + API_OPENGLES2 +} gl_api; + + +/** + * Mesa rendering context. + * + * This is the central context data structure for Mesa. Almost all + * OpenGL state is contained in this structure. + * Think of this as a base class from which device drivers will derive + * sub classes. + * + * The struct gl_context typedef names this structure. + */ +struct gl_context +{ + /** State possibly shared with other contexts in the address space */ + struct gl_shared_state *Shared; + + /** \name API function pointer tables */ + /*@{*/ + gl_api API; + struct _glapi_table *Save; /**< Display list save functions */ + struct _glapi_table *Exec; /**< Execute functions */ + struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */ + /*@}*/ + + struct gl_config Visual; + struct gl_framebuffer *DrawBuffer; /**< buffer for writing */ + struct gl_framebuffer *ReadBuffer; /**< buffer for reading */ + struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */ + struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */ + + /** + * Device driver function pointer table + */ + struct dd_function_table Driver; + + void *DriverCtx; /**< Points to device driver context/state */ + + /** Core/Driver constants */ + struct gl_constants Const; + + /** \name The various 4x4 matrix stacks */ + /*@{*/ + struct gl_matrix_stack ModelviewMatrixStack; + struct gl_matrix_stack ProjectionMatrixStack; + struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS]; + struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES]; + struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */ + /*@}*/ + + /** Combined modelview and projection matrix */ + GLmatrix _ModelProjectMatrix; + + /** \name Display lists */ + struct gl_dlist_state ListState; + + GLboolean ExecuteFlag; /**< Execute GL commands? */ + GLboolean CompileFlag; /**< Compile GL commands into display list? */ + + /** Extension information */ + struct gl_extensions Extensions; + + /** Version info */ + GLuint VersionMajor, VersionMinor; + char *VersionString; + + /** \name State attribute stack (for glPush/PopAttrib) */ + /*@{*/ + GLuint AttribStackDepth; + struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH]; + /*@}*/ + + /** \name Renderer attribute groups + * + * We define a struct for each attribute group to make pushing and popping + * attributes easy. Also it's a good organization. + */ + /*@{*/ + struct gl_accum_attrib Accum; /**< Accum buffer attributes */ + struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */ + struct gl_current_attrib Current; /**< Current attributes */ + struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */ + struct gl_eval_attrib Eval; /**< Eval attributes */ + struct gl_fog_attrib Fog; /**< Fog attributes */ + struct gl_hint_attrib Hint; /**< Hint attributes */ + struct gl_light_attrib Light; /**< Light attributes */ + struct gl_line_attrib Line; /**< Line attributes */ + struct gl_list_attrib List; /**< List attributes */ + struct gl_multisample_attrib Multisample; + struct gl_pixel_attrib Pixel; /**< Pixel attributes */ + struct gl_point_attrib Point; /**< Point attributes */ + struct gl_polygon_attrib Polygon; /**< Polygon attributes */ + GLuint PolygonStipple[32]; /**< Polygon stipple */ + struct gl_scissor_attrib Scissor; /**< Scissor attributes */ + struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */ + struct gl_texture_attrib Texture; /**< Texture attributes */ + struct gl_transform_attrib Transform; /**< Transformation attributes */ + struct gl_viewport_attrib Viewport; /**< Viewport attributes */ + /*@}*/ + + /** \name Client attribute stack */ + /*@{*/ + GLuint ClientAttribStackDepth; + struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH]; + /*@}*/ + + /** \name Client attribute groups */ + /*@{*/ + struct gl_array_attrib Array; /**< Vertex arrays */ + struct gl_pixelstore_attrib Pack; /**< Pixel packing */ + struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */ + struct gl_pixelstore_attrib DefaultPacking; /**< Default params */ + /*@}*/ + + /** \name Other assorted state (not pushed/popped on attribute stack) */ + /*@{*/ + struct gl_pixelmaps PixelMaps; + + struct gl_evaluators EvalMap; /**< All evaluators */ + struct gl_feedback Feedback; /**< Feedback */ + struct gl_selection Select; /**< Selection */ + + struct gl_program_state Program; /**< general program state */ + struct gl_vertex_program_state VertexProgram; + struct gl_fragment_program_state FragmentProgram; + struct gl_geometry_program_state GeometryProgram; + struct gl_ati_fragment_shader_state ATIFragmentShader; + + struct gl_shader_state Shader; /**< GLSL shader object state */ + struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES]; + + struct gl_query_state Query; /**< occlusion, timer queries */ + + struct gl_transform_feedback TransformFeedback; + + struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */ + struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */ + /*@}*/ + + struct gl_meta_state *Meta; /**< for "meta" operations */ + + /* GL_EXT_framebuffer_object */ + struct gl_renderbuffer *CurrentRenderbuffer; + + GLenum ErrorValue; /**< Last error code */ + + /** + * Recognize and silence repeated error debug messages in buggy apps. + */ + const char *ErrorDebugFmtString; + GLuint ErrorDebugCount; + + GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ + GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ + + GLboolean ViewportInitialized; /**< has viewport size been initialized? */ + + GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ + + /** \name Derived state */ + /*@{*/ + /** Bitwise-or of DD_* flags. Note that this bitfield may be used before + * state validation so they need to always be current. + */ + GLbitfield _TriangleCaps; + GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */ + GLfloat _EyeZDir[3]; + GLfloat _ModelViewInvScale; + GLboolean _NeedEyeCoords; + GLboolean _ForceEyeCoords; + + GLuint TextureStateTimestamp; /**< detect changes to shared state */ + + struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */ + struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */ + /**@}*/ + + struct gl_list_extensions *ListExt; /**< driver dlist extensions */ + + /** \name For debugging/development only */ + /*@{*/ + GLboolean FirstTimeCurrent; + /*@}*/ + + /** Dither disable via MESA_NO_DITHER env var */ + GLboolean NoDither; + + /** software compression/decompression supported or not */ + GLboolean Mesa_DXTn; + + GLboolean TextureFormatSupported[MESA_FORMAT_COUNT]; + + /** + * Use dp4 (rather than mul/mad) instructions for position + * transformation? + */ + GLboolean mvp_with_dp4; + + /** + * \name Hooks for module contexts. + * + * These will eventually live in the driver or elsewhere. + */ + /*@{*/ + void *swrast_context; + void *swsetup_context; + void *swtnl_context; + void *swtnl_im; + struct st_context *st; + void *aelt_context; + /*@}*/ +}; + + +#ifdef DEBUG +extern int MESA_VERBOSE; +extern int MESA_DEBUG_FLAGS; +# define MESA_FUNCTION __FUNCTION__ +#else +# define MESA_VERBOSE 0 +# define MESA_DEBUG_FLAGS 0 +# define MESA_FUNCTION "a function" +# ifndef NDEBUG +# define NDEBUG +# endif +#endif + + +/** The MESA_VERBOSE var is a bitmask of these flags */ +enum _verbose +{ + VERBOSE_VARRAY = 0x0001, + VERBOSE_TEXTURE = 0x0002, + VERBOSE_MATERIAL = 0x0004, + VERBOSE_PIPELINE = 0x0008, + VERBOSE_DRIVER = 0x0010, + VERBOSE_STATE = 0x0020, + VERBOSE_API = 0x0040, + VERBOSE_DISPLAY_LIST = 0x0100, + VERBOSE_LIGHTING = 0x0200, + VERBOSE_PRIMS = 0x0400, + VERBOSE_VERTS = 0x0800, + VERBOSE_DISASSEM = 0x1000, + VERBOSE_DRAW = 0x2000, + VERBOSE_SWAPBUFFERS = 0x4000 +}; + + +/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */ +enum _debug +{ + DEBUG_ALWAYS_FLUSH = 0x1 +}; + + + +#endif /* MTYPES_H */ diff --git a/mesalib/src/mesa/main/state.c b/mesalib/src/mesa/main/state.c index c07e2c380..502c42929 100644 --- a/mesalib/src/mesa/main/state.c +++ b/mesalib/src/mesa/main/state.c @@ -1,732 +1,734 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.3
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * \file state.c
- * State management.
- *
- * This file manages recalculation of derived values in struct gl_context.
- */
-
-
-#include "glheader.h"
-#include "mtypes.h"
-#include "context.h"
-#include "debug.h"
-#include "macros.h"
-#include "ffvertex_prog.h"
-#include "framebuffer.h"
-#include "light.h"
-#include "matrix.h"
-#include "pixel.h"
-#include "program/program.h"
-#include "program/prog_parameter.h"
-#include "state.h"
-#include "stencil.h"
-#include "texenvprogram.h"
-#include "texobj.h"
-#include "texstate.h"
-
-
-static void
-update_separate_specular(struct gl_context *ctx)
-{
- if (NEED_SECONDARY_COLOR(ctx))
- ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
- else
- ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR;
-}
-
-
-/**
- * Compute the index of the last array element that can be safely accessed
- * in a vertex array. We can really only do this when the array lives in
- * a VBO.
- * The array->_MaxElement field will be updated.
- * Later in glDrawArrays/Elements/etc we can do some bounds checking.
- */
-static void
-compute_max_element(struct gl_client_array *array)
-{
- assert(array->Enabled);
- if (array->BufferObj->Name) {
- GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr;
- GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size;
-
- if (offset < obj_size) {
- array->_MaxElement = (obj_size - offset +
- array->StrideB -
- array->_ElementSize) / array->StrideB;
- } else {
- array->_MaxElement = 0;
- }
- }
- else {
- /* user-space array, no idea how big it is */
- array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */
- }
-}
-
-
-/**
- * Helper for update_arrays().
- * \return min(current min, array->_MaxElement).
- */
-static GLuint
-update_min(GLuint min, struct gl_client_array *array)
-{
- compute_max_element(array);
- return MIN2(min, array->_MaxElement);
-}
-
-
-/**
- * Update ctx->Array._MaxElement (the max legal index into all enabled arrays).
- * Need to do this upon new array state or new buffer object state.
- */
-static void
-update_arrays( struct gl_context *ctx )
-{
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- GLuint i, min = ~0;
-
- /* find min of _MaxElement values for all enabled arrays */
-
- /* 0 */
- if (ctx->VertexProgram._Current
- && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]);
- }
- else if (arrayObj->Vertex.Enabled) {
- min = update_min(min, &arrayObj->Vertex);
- }
-
- /* 1 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]);
- }
- /* no conventional vertex weight array */
-
- /* 2 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]);
- }
- else if (arrayObj->Normal.Enabled) {
- min = update_min(min, &arrayObj->Normal);
- }
-
- /* 3 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]);
- }
- else if (arrayObj->Color.Enabled) {
- min = update_min(min, &arrayObj->Color);
- }
-
- /* 4 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]);
- }
- else if (arrayObj->SecondaryColor.Enabled) {
- min = update_min(min, &arrayObj->SecondaryColor);
- }
-
- /* 5 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]);
- }
- else if (arrayObj->FogCoord.Enabled) {
- min = update_min(min, &arrayObj->FogCoord);
- }
-
- /* 6 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]);
- }
- else if (arrayObj->Index.Enabled) {
- min = update_min(min, &arrayObj->Index);
- }
-
- /* 7 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]);
- }
-
- /* 8..15 */
- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) {
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[i].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[i]);
- }
- else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits
- && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) {
- min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]);
- }
- }
-
- /* 16..31 */
- if (ctx->VertexProgram._Current) {
- for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
- if (arrayObj->VertexAttrib[i].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[i]);
- }
- }
- }
-
- if (arrayObj->EdgeFlag.Enabled) {
- min = update_min(min, &arrayObj->EdgeFlag);
- }
-
- /* _MaxElement is one past the last legal array element */
- arrayObj->_MaxElement = min;
-}
-
-
-/**
- * Update the following fields:
- * ctx->VertexProgram._Enabled
- * ctx->FragmentProgram._Enabled
- * ctx->ATIFragmentShader._Enabled
- * This needs to be done before texture state validation.
- */
-static void
-update_program_enables(struct gl_context *ctx)
-{
- /* These _Enabled flags indicate if the program is enabled AND valid. */
- ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled
- && ctx->VertexProgram.Current->Base.Instructions;
- ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled
- && ctx->FragmentProgram.Current->Base.Instructions;
- ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled
- && ctx->ATIFragmentShader.Current->Instructions[0];
-}
-
-
-/**
- * Update vertex/fragment program state. In particular, update these fields:
- * ctx->VertexProgram._Current
- * ctx->VertexProgram._TnlProgram,
- * These point to the highest priority enabled vertex/fragment program or are
- * NULL if fixed-function processing is to be done.
- *
- * This function needs to be called after texture state validation in case
- * we're generating a fragment program from fixed-function texture state.
- *
- * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex
- * or fragment program is being used.
- */
-static GLbitfield
-update_program(struct gl_context *ctx)
-{
- const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram;
- const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram;
- const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram;
- const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
- const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
- const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
- GLbitfield new_state = 0x0;
-
- /*
- * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current
- * pointers to the programs that should be used for rendering. If either
- * is NULL, use fixed-function code paths.
- *
- * These programs may come from several sources. The priority is as
- * follows:
- * 1. OpenGL 2.0/ARB vertex/fragment shaders
- * 2. ARB/NV vertex/fragment programs
- * 3. Programs derived from fixed-function state.
- *
- * Note: it's possible for a vertex shader to get used with a fragment
- * program (and vice versa) here, but in practice that shouldn't ever
- * come up, or matter.
- */
-
- if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) {
- /* Use shader programs */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- fsProg->FragmentProgram);
- }
- else if (ctx->FragmentProgram._Enabled) {
- /* use user-defined vertex program */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- ctx->FragmentProgram.Current);
- }
- else if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- /* Use fragment program generated from fixed-function state.
- */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- _mesa_get_fixed_func_fragment_program(ctx));
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram,
- ctx->FragmentProgram._Current);
- }
- else {
- /* no fragment program */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL);
- }
-
- if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) {
- /* Use shader programs */
- _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current,
- gsProg->GeometryProgram);
- } else {
- /* no fragment program */
- _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
- }
-
- /* Examine vertex program after fragment program as
- * _mesa_get_fixed_func_vertex_program() needs to know active
- * fragprog inputs.
- */
- if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) {
- /* Use shader programs */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- vsProg->VertexProgram);
- }
- else if (ctx->VertexProgram._Enabled) {
- /* use user-defined vertex program */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- ctx->VertexProgram.Current);
- }
- else if (ctx->VertexProgram._MaintainTnlProgram) {
- /* Use vertex program generated from fixed-function state.
- */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- _mesa_get_fixed_func_vertex_program(ctx));
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram,
- ctx->VertexProgram._Current);
- }
- else {
- /* no vertex program */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
- }
-
- /* Let the driver know what's happening:
- */
- if (ctx->FragmentProgram._Current != prevFP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
- (struct gl_program *) ctx->FragmentProgram._Current);
- }
- }
-
- if (ctx->GeometryProgram._Current != prevGP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM,
- (struct gl_program *) ctx->GeometryProgram._Current);
- }
- }
-
- if (ctx->VertexProgram._Current != prevVP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
- (struct gl_program *) ctx->VertexProgram._Current);
- }
- }
-
- return new_state;
-}
-
-
-/**
- * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0.
- */
-static GLbitfield
-update_program_constants(struct gl_context *ctx)
-{
- GLbitfield new_state = 0x0;
-
- if (ctx->FragmentProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->FragmentProgram._Current->Base.Parameters;
- if (params && params->StateFlags & ctx->NewState) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- if (ctx->GeometryProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->GeometryProgram._Current->Base.Parameters;
- /*FIXME: StateFlags is always 0 because we have unnamed constant
- * not state changes */
- if (params /*&& params->StateFlags & ctx->NewState*/) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- if (ctx->VertexProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->VertexProgram._Current->Base.Parameters;
- if (params && params->StateFlags & ctx->NewState) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- return new_state;
-}
-
-
-
-
-static void
-update_viewport_matrix(struct gl_context *ctx)
-{
- const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
-
- ASSERT(depthMax > 0);
-
- /* Compute scale and bias values. This is really driver-specific
- * and should be maintained elsewhere if at all.
- * NOTE: RasterPos uses this.
- */
- _math_matrix_viewport(&ctx->Viewport._WindowMap,
- ctx->Viewport.X, ctx->Viewport.Y,
- ctx->Viewport.Width, ctx->Viewport.Height,
- ctx->Viewport.Near, ctx->Viewport.Far,
- depthMax);
-}
-
-
-/**
- * Update derived multisample state.
- */
-static void
-update_multisample(struct gl_context *ctx)
-{
- ctx->Multisample._Enabled = GL_FALSE;
- if (ctx->Multisample.Enabled &&
- ctx->DrawBuffer &&
- ctx->DrawBuffer->Visual.sampleBuffers)
- ctx->Multisample._Enabled = GL_TRUE;
-}
-
-
-/**
- * Update derived color/blend/logicop state.
- */
-static void
-update_color(struct gl_context *ctx)
-{
- /* This is needed to support 1.1's RGB logic ops AND
- * 1.0's blending logicops.
- */
- ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx);
-}
-
-
-/*
- * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET
- * in ctx->_TriangleCaps if needed.
- */
-static void
-update_polygon(struct gl_context *ctx)
-{
- ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET);
-
- if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
-
- if ( ctx->Polygon.OffsetPoint
- || ctx->Polygon.OffsetLine
- || ctx->Polygon.OffsetFill)
- ctx->_TriangleCaps |= DD_TRI_OFFSET;
-}
-
-
-/**
- * Update the ctx->_TriangleCaps bitfield.
- * XXX that bitfield should really go away someday!
- * This function must be called after other update_*() functions since
- * there are dependencies on some other derived values.
- */
-#if 0
-static void
-update_tricaps(struct gl_context *ctx, GLbitfield new_state)
-{
- ctx->_TriangleCaps = 0;
-
- /*
- * Points
- */
- if (1/*new_state & _NEW_POINT*/) {
- if (ctx->Point.SmoothFlag)
- ctx->_TriangleCaps |= DD_POINT_SMOOTH;
- if (ctx->Point._Attenuated)
- ctx->_TriangleCaps |= DD_POINT_ATTEN;
- }
-
- /*
- * Lines
- */
- if (1/*new_state & _NEW_LINE*/) {
- if (ctx->Line.SmoothFlag)
- ctx->_TriangleCaps |= DD_LINE_SMOOTH;
- if (ctx->Line.StippleFlag)
- ctx->_TriangleCaps |= DD_LINE_STIPPLE;
- }
-
- /*
- * Polygons
- */
- if (1/*new_state & _NEW_POLYGON*/) {
- if (ctx->Polygon.SmoothFlag)
- ctx->_TriangleCaps |= DD_TRI_SMOOTH;
- if (ctx->Polygon.StippleFlag)
- ctx->_TriangleCaps |= DD_TRI_STIPPLE;
- if (ctx->Polygon.FrontMode != GL_FILL
- || ctx->Polygon.BackMode != GL_FILL)
- ctx->_TriangleCaps |= DD_TRI_UNFILLED;
- if (ctx->Polygon.CullFlag
- && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
- if (ctx->Polygon.OffsetPoint ||
- ctx->Polygon.OffsetLine ||
- ctx->Polygon.OffsetFill)
- ctx->_TriangleCaps |= DD_TRI_OFFSET;
- }
-
- /*
- * Lighting and shading
- */
- if (ctx->Light.Enabled && ctx->Light.Model.TwoSide)
- ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE;
- if (ctx->Light.ShadeModel == GL_FLAT)
- ctx->_TriangleCaps |= DD_FLATSHADE;
- if (NEED_SECONDARY_COLOR(ctx))
- ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
-
- /*
- * Stencil
- */
- if (ctx->Stencil._TestTwoSide)
- ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL;
-}
-#endif
-
-
-/**
- * Compute derived GL state.
- * If __struct gl_contextRec::NewState is non-zero then this function \b must
- * be called before rendering anything.
- *
- * Calls dd_function_table::UpdateState to perform any internal state
- * management necessary.
- *
- * \sa _mesa_update_modelview_project(), _mesa_update_texture(),
- * _mesa_update_buffer_bounds(),
- * _mesa_update_lighting() and _mesa_update_tnl_spaces().
- */
-void
-_mesa_update_state_locked( struct gl_context *ctx )
-{
- GLbitfield new_state = ctx->NewState;
- GLbitfield prog_flags = _NEW_PROGRAM;
- GLbitfield new_prog_state = 0x0;
-
- if (new_state == _NEW_CURRENT_ATTRIB)
- goto out;
-
- if (MESA_VERBOSE & VERBOSE_STATE)
- _mesa_print_state("_mesa_update_state", new_state);
-
- /* Determine which state flags effect vertex/fragment program state */
- if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG |
- _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE |
- _NEW_PROGRAM);
- }
- if (ctx->VertexProgram._MaintainTnlProgram) {
- prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
- _NEW_TRANSFORM | _NEW_POINT |
- _NEW_FOG | _NEW_LIGHT |
- _MESA_NEW_NEED_EYE_COORDS);
- }
-
- /*
- * Now update derived state info
- */
-
- if (new_state & prog_flags)
- update_program_enables( ctx );
-
- if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
- _mesa_update_modelview_project( ctx, new_state );
-
- if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX))
- _mesa_update_texture( ctx, new_state );
-
- if (new_state & _NEW_BUFFERS)
- _mesa_update_framebuffer(ctx);
-
- if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
- _mesa_update_draw_buffer_bounds( ctx );
-
- if (new_state & _NEW_POLYGON)
- update_polygon( ctx );
-
- if (new_state & _NEW_LIGHT)
- _mesa_update_lighting( ctx );
-
- if (new_state & (_NEW_STENCIL | _NEW_BUFFERS))
- _mesa_update_stencil( ctx );
-
- if (new_state & _MESA_NEW_TRANSFER_STATE)
- _mesa_update_pixel( ctx, new_state );
-
- if (new_state & _DD_NEW_SEPARATE_SPECULAR)
- update_separate_specular( ctx );
-
- if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT))
- update_viewport_matrix(ctx);
-
- if (new_state & _NEW_MULTISAMPLE)
- update_multisample( ctx );
-
- if (new_state & _NEW_COLOR)
- update_color( ctx );
-
-#if 0
- if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT
- | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR))
- update_tricaps( ctx, new_state );
-#endif
-
- /* ctx->_NeedEyeCoords is now up to date.
- *
- * If the truth value of this variable has changed, update for the
- * new lighting space and recompute the positions of lights and the
- * normal transform.
- *
- * If the lighting space hasn't changed, may still need to recompute
- * light positions & normal transforms for other reasons.
- */
- if (new_state & _MESA_NEW_NEED_EYE_COORDS)
- _mesa_update_tnl_spaces( ctx, new_state );
-
- if (new_state & prog_flags) {
- /* When we generate programs from fixed-function vertex/fragment state
- * this call may generate/bind a new program. If so, we need to
- * propogate the _NEW_PROGRAM flag to the driver.
- */
- new_prog_state |= update_program( ctx );
- }
-
- if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT))
- update_arrays( ctx );
-
- out:
- new_prog_state |= update_program_constants(ctx);
-
- /*
- * Give the driver a chance to act upon the new_state flags.
- * The driver might plug in different span functions, for example.
- * Also, this is where the driver can invalidate the state of any
- * active modules (such as swrast_setup, swrast, tnl, etc).
- *
- * Set ctx->NewState to zero to avoid recursion if
- * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?)
- */
- new_state = ctx->NewState | new_prog_state;
- ctx->NewState = 0;
- ctx->Driver.UpdateState(ctx, new_state);
- ctx->Array.NewState = 0;
-}
-
-
-/* This is the usual entrypoint for state updates:
- */
-void
-_mesa_update_state( struct gl_context *ctx )
-{
- _mesa_lock_context_textures(ctx);
- _mesa_update_state_locked(ctx);
- _mesa_unlock_context_textures(ctx);
-}
-
-
-
-
-/**
- * Want to figure out which fragment program inputs are actually
- * constant/current values from ctx->Current. These should be
- * referenced as a tracked state variable rather than a fragment
- * program input, to save the overhead of putting a constant value in
- * every submitted vertex, transferring it to hardware, interpolating
- * it across the triangle, etc...
- *
- * When there is a VP bound, just use vp->outputs. But when we're
- * generating vp from fixed function state, basically want to
- * calculate:
- *
- * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) |
- * potential_vp_outputs )
- *
- * Where potential_vp_outputs is calculated by looking at enabled
- * texgen, etc.
- *
- * The generated fragment program should then only declare inputs that
- * may vary or otherwise differ from the ctx->Current values.
- * Otherwise, the fp should track them as state values instead.
- */
-void
-_mesa_set_varying_vp_inputs( struct gl_context *ctx,
- GLbitfield varying_inputs )
-{
- if (ctx->varying_vp_inputs != varying_inputs) {
- ctx->varying_vp_inputs = varying_inputs;
- ctx->NewState |= _NEW_ARRAY;
- /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/
- }
-}
-
-
-/**
- * Used by drivers to tell core Mesa that the driver is going to
- * install/ use its own vertex program. In particular, this will
- * prevent generated fragment programs from using state vars instead
- * of ordinary varyings/inputs.
- */
-void
-_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag)
-{
- if (ctx->VertexProgram._Overriden != flag) {
- ctx->VertexProgram._Overriden = flag;
-
- /* Set one of the bits which will trigger fragment program
- * regeneration:
- */
- ctx->NewState |= _NEW_PROGRAM;
- }
-}
+/* + * Mesa 3-D graphics library + * Version: 7.3 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file state.c + * State management. + * + * This file manages recalculation of derived values in struct gl_context. + */ + + +#include "glheader.h" +#include "mtypes.h" +#include "context.h" +#include "debug.h" +#include "macros.h" +#include "ffvertex_prog.h" +#include "framebuffer.h" +#include "light.h" +#include "matrix.h" +#include "pixel.h" +#include "program/program.h" +#include "program/prog_parameter.h" +#include "state.h" +#include "stencil.h" +#include "texenvprogram.h" +#include "texobj.h" +#include "texstate.h" + + +static void +update_separate_specular(struct gl_context *ctx) +{ + if (NEED_SECONDARY_COLOR(ctx)) + ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; + else + ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR; +} + + +/** + * Compute the index of the last array element that can be safely accessed + * in a vertex array. We can really only do this when the array lives in + * a VBO. + * The array->_MaxElement field will be updated. + * Later in glDrawArrays/Elements/etc we can do some bounds checking. + */ +static void +compute_max_element(struct gl_client_array *array) +{ + assert(array->Enabled); + if (array->BufferObj->Name) { + GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr; + GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size; + + if (offset < obj_size) { + array->_MaxElement = (obj_size - offset + + array->StrideB - + array->_ElementSize) / array->StrideB; + } else { + array->_MaxElement = 0; + } + } + else { + /* user-space array, no idea how big it is */ + array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */ + } +} + + +/** + * Helper for update_arrays(). + * \return min(current min, array->_MaxElement). + */ +static GLuint +update_min(GLuint min, struct gl_client_array *array) +{ + compute_max_element(array); + return MIN2(min, array->_MaxElement); +} + + +/** + * Update ctx->Array._MaxElement (the max legal index into all enabled arrays). + * Need to do this upon new array state or new buffer object state. + */ +static void +update_arrays( struct gl_context *ctx ) +{ + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i, min = ~0; + + /* find min of _MaxElement values for all enabled arrays */ + + /* 0 */ + if (ctx->VertexProgram._Current + && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]); + } + else if (arrayObj->Vertex.Enabled) { + min = update_min(min, &arrayObj->Vertex); + } + + /* 1 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]); + } + /* no conventional vertex weight array */ + + /* 2 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]); + } + else if (arrayObj->Normal.Enabled) { + min = update_min(min, &arrayObj->Normal); + } + + /* 3 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]); + } + else if (arrayObj->Color.Enabled) { + min = update_min(min, &arrayObj->Color); + } + + /* 4 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]); + } + else if (arrayObj->SecondaryColor.Enabled) { + min = update_min(min, &arrayObj->SecondaryColor); + } + + /* 5 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]); + } + else if (arrayObj->FogCoord.Enabled) { + min = update_min(min, &arrayObj->FogCoord); + } + + /* 6 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]); + } + else if (arrayObj->Index.Enabled) { + min = update_min(min, &arrayObj->Index); + } + + /* 7 */ + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]); + } + + /* 8..15 */ + for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) { + if (ctx->VertexProgram._Enabled + && arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); + } + else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits + && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) { + min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]); + } + } + + /* 16..31 */ + if (ctx->VertexProgram._Current) { + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { + if (arrayObj->VertexAttrib[i].Enabled) { + min = update_min(min, &arrayObj->VertexAttrib[i]); + } + } + } + + if (arrayObj->EdgeFlag.Enabled) { + min = update_min(min, &arrayObj->EdgeFlag); + } + + /* _MaxElement is one past the last legal array element */ + arrayObj->_MaxElement = min; +} + + +/** + * Update the following fields: + * ctx->VertexProgram._Enabled + * ctx->FragmentProgram._Enabled + * ctx->ATIFragmentShader._Enabled + * This needs to be done before texture state validation. + */ +static void +update_program_enables(struct gl_context *ctx) +{ + /* These _Enabled flags indicate if the program is enabled AND valid. */ + ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled + && ctx->VertexProgram.Current->Base.Instructions; + ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled + && ctx->FragmentProgram.Current->Base.Instructions; + ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled + && ctx->ATIFragmentShader.Current->Instructions[0]; +} + + +/** + * Update vertex/fragment program state. In particular, update these fields: + * ctx->VertexProgram._Current + * ctx->VertexProgram._TnlProgram, + * These point to the highest priority enabled vertex/fragment program or are + * NULL if fixed-function processing is to be done. + * + * This function needs to be called after texture state validation in case + * we're generating a fragment program from fixed-function texture state. + * + * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex + * or fragment program is being used. + */ +static GLbitfield +update_program(struct gl_context *ctx) +{ + const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram; + const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram; + const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram; + const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current; + const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current; + const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current; + GLbitfield new_state = 0x0; + + /* + * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current + * pointers to the programs that should be used for rendering. If either + * is NULL, use fixed-function code paths. + * + * These programs may come from several sources. The priority is as + * follows: + * 1. OpenGL 2.0/ARB vertex/fragment shaders + * 2. ARB/NV vertex/fragment programs + * 3. Programs derived from fixed-function state. + * + * Note: it's possible for a vertex shader to get used with a fragment + * program (and vice versa) here, but in practice that shouldn't ever + * come up, or matter. + */ + + if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) { + /* Use shader programs */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + fsProg->FragmentProgram); + } + else if (ctx->FragmentProgram._Enabled) { + /* use user-defined vertex program */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + ctx->FragmentProgram.Current); + } + else if (ctx->FragmentProgram._MaintainTexEnvProgram) { + /* Use fragment program generated from fixed-function state. + */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, + _mesa_get_fixed_func_fragment_program(ctx)); + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, + ctx->FragmentProgram._Current); + } + else { + /* no fragment program */ + _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); + } + + if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) { + /* Use shader programs */ + _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, + gsProg->GeometryProgram); + } else { + /* no fragment program */ + _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL); + } + + /* Examine vertex program after fragment program as + * _mesa_get_fixed_func_vertex_program() needs to know active + * fragprog inputs. + */ + if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) { + /* Use shader programs */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + vsProg->VertexProgram); + } + else if (ctx->VertexProgram._Enabled) { + /* use user-defined vertex program */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + ctx->VertexProgram.Current); + } + else if (ctx->VertexProgram._MaintainTnlProgram) { + /* Use vertex program generated from fixed-function state. + */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, + _mesa_get_fixed_func_vertex_program(ctx)); + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram, + ctx->VertexProgram._Current); + } + else { + /* no vertex program */ + _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL); + } + + /* Let the driver know what's happening: + */ + if (ctx->FragmentProgram._Current != prevFP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, + (struct gl_program *) ctx->FragmentProgram._Current); + } + } + + if (ctx->GeometryProgram._Current != prevGP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM, + (struct gl_program *) ctx->GeometryProgram._Current); + } + } + + if (ctx->VertexProgram._Current != prevVP) { + new_state |= _NEW_PROGRAM; + if (ctx->Driver.BindProgram) { + ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, + (struct gl_program *) ctx->VertexProgram._Current); + } + } + + return new_state; +} + + +/** + * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0. + */ +static GLbitfield +update_program_constants(struct gl_context *ctx) +{ + GLbitfield new_state = 0x0; + + if (ctx->FragmentProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->FragmentProgram._Current->Base.Parameters; + if (params && params->StateFlags & ctx->NewState) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + if (ctx->GeometryProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->GeometryProgram._Current->Base.Parameters; + /*FIXME: StateFlags is always 0 because we have unnamed constant + * not state changes */ + if (params /*&& params->StateFlags & ctx->NewState*/) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + if (ctx->VertexProgram._Current) { + const struct gl_program_parameter_list *params = + ctx->VertexProgram._Current->Base.Parameters; + if (params && params->StateFlags & ctx->NewState) { + new_state |= _NEW_PROGRAM_CONSTANTS; + } + } + + return new_state; +} + + + + +static void +update_viewport_matrix(struct gl_context *ctx) +{ + const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF; + + ASSERT(depthMax > 0); + + /* Compute scale and bias values. This is really driver-specific + * and should be maintained elsewhere if at all. + * NOTE: RasterPos uses this. + */ + _math_matrix_viewport(&ctx->Viewport._WindowMap, + ctx->Viewport.X, ctx->Viewport.Y, + ctx->Viewport.Width, ctx->Viewport.Height, + ctx->Viewport.Near, ctx->Viewport.Far, + depthMax); +} + + +/** + * Update derived multisample state. + */ +static void +update_multisample(struct gl_context *ctx) +{ + ctx->Multisample._Enabled = GL_FALSE; + if (ctx->Multisample.Enabled && + ctx->DrawBuffer && + ctx->DrawBuffer->Visual.sampleBuffers) + ctx->Multisample._Enabled = GL_TRUE; +} + + +/** + * Update derived color/blend/logicop state. + */ +static void +update_color(struct gl_context *ctx) +{ + /* This is needed to support 1.1's RGB logic ops AND + * 1.0's blending logicops. + */ + ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx); +} + + +/* + * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET + * in ctx->_TriangleCaps if needed. + */ +static void +update_polygon(struct gl_context *ctx) +{ + ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET); + + if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; + + if ( ctx->Polygon.OffsetPoint + || ctx->Polygon.OffsetLine + || ctx->Polygon.OffsetFill) + ctx->_TriangleCaps |= DD_TRI_OFFSET; +} + + +/** + * Update the ctx->_TriangleCaps bitfield. + * XXX that bitfield should really go away someday! + * This function must be called after other update_*() functions since + * there are dependencies on some other derived values. + */ +#if 0 +static void +update_tricaps(struct gl_context *ctx, GLbitfield new_state) +{ + ctx->_TriangleCaps = 0; + + /* + * Points + */ + if (1/*new_state & _NEW_POINT*/) { + if (ctx->Point.SmoothFlag) + ctx->_TriangleCaps |= DD_POINT_SMOOTH; + if (ctx->Point._Attenuated) + ctx->_TriangleCaps |= DD_POINT_ATTEN; + } + + /* + * Lines + */ + if (1/*new_state & _NEW_LINE*/) { + if (ctx->Line.SmoothFlag) + ctx->_TriangleCaps |= DD_LINE_SMOOTH; + if (ctx->Line.StippleFlag) + ctx->_TriangleCaps |= DD_LINE_STIPPLE; + } + + /* + * Polygons + */ + if (1/*new_state & _NEW_POLYGON*/) { + if (ctx->Polygon.SmoothFlag) + ctx->_TriangleCaps |= DD_TRI_SMOOTH; + if (ctx->Polygon.StippleFlag) + ctx->_TriangleCaps |= DD_TRI_STIPPLE; + if (ctx->Polygon.FrontMode != GL_FILL + || ctx->Polygon.BackMode != GL_FILL) + ctx->_TriangleCaps |= DD_TRI_UNFILLED; + if (ctx->Polygon.CullFlag + && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK; + if (ctx->Polygon.OffsetPoint || + ctx->Polygon.OffsetLine || + ctx->Polygon.OffsetFill) + ctx->_TriangleCaps |= DD_TRI_OFFSET; + } + + /* + * Lighting and shading + */ + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) + ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE; + if (ctx->Light.ShadeModel == GL_FLAT) + ctx->_TriangleCaps |= DD_FLATSHADE; + if (NEED_SECONDARY_COLOR(ctx)) + ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR; + + /* + * Stencil + */ + if (ctx->Stencil._TestTwoSide) + ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL; +} +#endif + + +/** + * Compute derived GL state. + * If __struct gl_contextRec::NewState is non-zero then this function \b must + * be called before rendering anything. + * + * Calls dd_function_table::UpdateState to perform any internal state + * management necessary. + * + * \sa _mesa_update_modelview_project(), _mesa_update_texture(), + * _mesa_update_buffer_bounds(), + * _mesa_update_lighting() and _mesa_update_tnl_spaces(). + */ +void +_mesa_update_state_locked( struct gl_context *ctx ) +{ + GLbitfield new_state = ctx->NewState; + GLbitfield prog_flags = _NEW_PROGRAM; + GLbitfield new_prog_state = 0x0; + + if (new_state == _NEW_CURRENT_ATTRIB) + goto out; + + if (MESA_VERBOSE & VERBOSE_STATE) + _mesa_print_state("_mesa_update_state", new_state); + + /* Determine which state flags effect vertex/fragment program state */ + if (ctx->FragmentProgram._MaintainTexEnvProgram) { + prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG | + _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE | + _NEW_PROGRAM); + } + if (ctx->VertexProgram._MaintainTnlProgram) { + prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | + _NEW_TRANSFORM | _NEW_POINT | + _NEW_FOG | _NEW_LIGHT | + _MESA_NEW_NEED_EYE_COORDS); + } + + /* + * Now update derived state info + */ + + if (new_state & prog_flags) + update_program_enables( ctx ); + + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + _mesa_update_modelview_project( ctx, new_state ); + + if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) + _mesa_update_texture( ctx, new_state ); + + if (new_state & _NEW_BUFFERS) + _mesa_update_framebuffer(ctx); + + if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) + _mesa_update_draw_buffer_bounds( ctx ); + + if (new_state & _NEW_POLYGON) + update_polygon( ctx ); + + if (new_state & _NEW_LIGHT) + _mesa_update_lighting( ctx ); + + if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) + _mesa_update_stencil( ctx ); + + if (new_state & _MESA_NEW_TRANSFER_STATE) + _mesa_update_pixel( ctx, new_state ); + + if (new_state & _DD_NEW_SEPARATE_SPECULAR) + update_separate_specular( ctx ); + + if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT)) + update_viewport_matrix(ctx); + + if (new_state & _NEW_MULTISAMPLE) + update_multisample( ctx ); + + if (new_state & _NEW_COLOR) + update_color( ctx ); + +#if 0 + if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT + | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR)) + update_tricaps( ctx, new_state ); +#endif + + /* ctx->_NeedEyeCoords is now up to date. + * + * If the truth value of this variable has changed, update for the + * new lighting space and recompute the positions of lights and the + * normal transform. + * + * If the lighting space hasn't changed, may still need to recompute + * light positions & normal transforms for other reasons. + */ + if (new_state & _MESA_NEW_NEED_EYE_COORDS) + _mesa_update_tnl_spaces( ctx, new_state ); + + if (new_state & prog_flags) { + /* When we generate programs from fixed-function vertex/fragment state + * this call may generate/bind a new program. If so, we need to + * propogate the _NEW_PROGRAM flag to the driver. + */ + new_prog_state |= update_program( ctx ); + } + + if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT)) + update_arrays( ctx ); + + out: + new_prog_state |= update_program_constants(ctx); + + /* + * Give the driver a chance to act upon the new_state flags. + * The driver might plug in different span functions, for example. + * Also, this is where the driver can invalidate the state of any + * active modules (such as swrast_setup, swrast, tnl, etc). + * + * Set ctx->NewState to zero to avoid recursion if + * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?) + */ + new_state = ctx->NewState | new_prog_state; + ctx->NewState = 0; + ctx->Driver.UpdateState(ctx, new_state); + ctx->Array.NewState = 0; + if (!ctx->Array.RebindArrays) + ctx->Array.RebindArrays = (new_state & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; +} + + +/* This is the usual entrypoint for state updates: + */ +void +_mesa_update_state( struct gl_context *ctx ) +{ + _mesa_lock_context_textures(ctx); + _mesa_update_state_locked(ctx); + _mesa_unlock_context_textures(ctx); +} + + + + +/** + * Want to figure out which fragment program inputs are actually + * constant/current values from ctx->Current. These should be + * referenced as a tracked state variable rather than a fragment + * program input, to save the overhead of putting a constant value in + * every submitted vertex, transferring it to hardware, interpolating + * it across the triangle, etc... + * + * When there is a VP bound, just use vp->outputs. But when we're + * generating vp from fixed function state, basically want to + * calculate: + * + * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | + * potential_vp_outputs ) + * + * Where potential_vp_outputs is calculated by looking at enabled + * texgen, etc. + * + * The generated fragment program should then only declare inputs that + * may vary or otherwise differ from the ctx->Current values. + * Otherwise, the fp should track them as state values instead. + */ +void +_mesa_set_varying_vp_inputs( struct gl_context *ctx, + GLbitfield varying_inputs ) +{ + if (ctx->varying_vp_inputs != varying_inputs) { + ctx->varying_vp_inputs = varying_inputs; + ctx->NewState |= _NEW_ARRAY; + /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/ + } +} + + +/** + * Used by drivers to tell core Mesa that the driver is going to + * install/ use its own vertex program. In particular, this will + * prevent generated fragment programs from using state vars instead + * of ordinary varyings/inputs. + */ +void +_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag) +{ + if (ctx->VertexProgram._Overriden != flag) { + ctx->VertexProgram._Overriden = flag; + + /* Set one of the bits which will trigger fragment program + * regeneration: + */ + ctx->NewState |= _NEW_PROGRAM; + } +} diff --git a/mesalib/src/mesa/state_tracker/st_atom_blend.c b/mesalib/src/mesa/state_tracker/st_atom_blend.c index 26bb3dab9..fb1c7a4ef 100644 --- a/mesalib/src/mesa/state_tracker/st_atom_blend.c +++ b/mesalib/src/mesa/state_tracker/st_atom_blend.c @@ -1,299 +1,302 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-
-#include "st_context.h"
-#include "st_atom.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "cso_cache/cso_context.h"
-
-#include "main/macros.h"
-
-/**
- * Convert GLenum blend tokens to pipe tokens.
- * Both blend factors and blend funcs are accepted.
- */
-static GLuint
-translate_blend(GLenum blend)
-{
- switch (blend) {
- /* blend functions */
- case GL_FUNC_ADD:
- return PIPE_BLEND_ADD;
- case GL_FUNC_SUBTRACT:
- return PIPE_BLEND_SUBTRACT;
- case GL_FUNC_REVERSE_SUBTRACT:
- return PIPE_BLEND_REVERSE_SUBTRACT;
- case GL_MIN:
- return PIPE_BLEND_MIN;
- case GL_MAX:
- return PIPE_BLEND_MAX;
-
- /* blend factors */
- case GL_ONE:
- return PIPE_BLENDFACTOR_ONE;
- case GL_SRC_COLOR:
- return PIPE_BLENDFACTOR_SRC_COLOR;
- case GL_SRC_ALPHA:
- return PIPE_BLENDFACTOR_SRC_ALPHA;
- case GL_DST_ALPHA:
- return PIPE_BLENDFACTOR_DST_ALPHA;
- case GL_DST_COLOR:
- return PIPE_BLENDFACTOR_DST_COLOR;
- case GL_SRC_ALPHA_SATURATE:
- return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case GL_CONSTANT_COLOR:
- return PIPE_BLENDFACTOR_CONST_COLOR;
- case GL_CONSTANT_ALPHA:
- return PIPE_BLENDFACTOR_CONST_ALPHA;
- /*
- return PIPE_BLENDFACTOR_SRC1_COLOR;
- return PIPE_BLENDFACTOR_SRC1_ALPHA;
- */
- case GL_ZERO:
- return PIPE_BLENDFACTOR_ZERO;
- case GL_ONE_MINUS_SRC_COLOR:
- return PIPE_BLENDFACTOR_INV_SRC_COLOR;
- case GL_ONE_MINUS_SRC_ALPHA:
- return PIPE_BLENDFACTOR_INV_SRC_ALPHA;
- case GL_ONE_MINUS_DST_COLOR:
- return PIPE_BLENDFACTOR_INV_DST_COLOR;
- case GL_ONE_MINUS_DST_ALPHA:
- return PIPE_BLENDFACTOR_INV_DST_ALPHA;
- case GL_ONE_MINUS_CONSTANT_COLOR:
- return PIPE_BLENDFACTOR_INV_CONST_COLOR;
- case GL_ONE_MINUS_CONSTANT_ALPHA:
- return PIPE_BLENDFACTOR_INV_CONST_ALPHA;
- /*
- return PIPE_BLENDFACTOR_INV_SRC1_COLOR;
- return PIPE_BLENDFACTOR_INV_SRC1_ALPHA;
- */
- default:
- assert("invalid GL token in translate_blend()" == NULL);
- return 0;
- }
-}
-
-
-/**
- * Convert GLenum logicop tokens to pipe tokens.
- */
-static GLuint
-translate_logicop(GLenum logicop)
-{
- switch (logicop) {
- case GL_CLEAR:
- return PIPE_LOGICOP_CLEAR;
- case GL_NOR:
- return PIPE_LOGICOP_NOR;
- case GL_AND_INVERTED:
- return PIPE_LOGICOP_AND_INVERTED;
- case GL_COPY_INVERTED:
- return PIPE_LOGICOP_COPY_INVERTED;
- case GL_AND_REVERSE:
- return PIPE_LOGICOP_AND_REVERSE;
- case GL_INVERT:
- return PIPE_LOGICOP_INVERT;
- case GL_XOR:
- return PIPE_LOGICOP_XOR;
- case GL_NAND:
- return PIPE_LOGICOP_NAND;
- case GL_AND:
- return PIPE_LOGICOP_AND;
- case GL_EQUIV:
- return PIPE_LOGICOP_EQUIV;
- case GL_NOOP:
- return PIPE_LOGICOP_NOOP;
- case GL_OR_INVERTED:
- return PIPE_LOGICOP_OR_INVERTED;
- case GL_COPY:
- return PIPE_LOGICOP_COPY;
- case GL_OR_REVERSE:
- return PIPE_LOGICOP_OR_REVERSE;
- case GL_OR:
- return PIPE_LOGICOP_OR;
- case GL_SET:
- return PIPE_LOGICOP_SET;
- default:
- assert("invalid GL token in translate_logicop()" == NULL);
- return 0;
- }
-}
-
-/**
- * Figure out if colormasks are different per rt.
- */
-static GLboolean
-colormask_per_rt(struct gl_context *ctx)
-{
- /* a bit suboptimal have to compare lots of values */
- unsigned i;
- for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) {
- if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-/**
- * Figure out if blend enables/state are different per rt.
- */
-static GLboolean
-blend_per_rt(struct gl_context *ctx)
-{
- if (ctx->Color.BlendEnabled &&
- (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) {
- /* This can only happen if GL_EXT_draw_buffers2 is enabled */
- return GL_TRUE;
- }
- if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) {
- /* this can only happen if GL_ARB_draw_buffers_blend is enabled */
- return GL_TRUE;
- }
- return GL_FALSE;
-}
-
-static void
-update_blend( struct st_context *st )
-{
- struct pipe_blend_state *blend = &st->state.blend;
- unsigned num_state = 1;
- unsigned i;
-
- memset(blend, 0, sizeof(*blend));
-
- if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) {
- num_state = st->ctx->Const.MaxDrawBuffers;
- blend->independent_blend_enable = 1;
- }
- /* Note it is impossible to correctly deal with EXT_blend_logic_op and
- EXT_draw_buffers2/EXT_blend_equation_separate at the same time.
- These combinations would require support for per-rt logicop enables
- and separate alpha/rgb logicop/blend support respectively. Neither
- possible in gallium nor most hardware. Assume these combinations
- don't happen. */
- if (st->ctx->Color.ColorLogicOpEnabled ||
- (st->ctx->Color.BlendEnabled &&
- st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) {
- /* logicop enabled */
- blend->logicop_enable = 1;
- blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp);
- }
- else if (st->ctx->Color.BlendEnabled) {
- /* blending enabled */
- for (i = 0; i < num_state; i++) {
-
- blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1;
-
- blend->rt[i].rgb_func =
- translate_blend(st->ctx->Color.Blend[i].EquationRGB);
-
- if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN ||
- st->ctx->Color.Blend[i].EquationRGB == GL_MAX) {
- /* Min/max are special */
- blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
- }
- else {
- blend->rt[i].rgb_src_factor =
- translate_blend(st->ctx->Color.Blend[i].SrcRGB);
- blend->rt[i].rgb_dst_factor =
- translate_blend(st->ctx->Color.Blend[i].DstRGB);
- }
-
- blend->rt[i].alpha_func =
- translate_blend(st->ctx->Color.Blend[i].EquationA);
-
- if (st->ctx->Color.Blend[i].EquationA == GL_MIN ||
- st->ctx->Color.Blend[i].EquationA == GL_MAX) {
- /* Min/max are special */
- blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
- }
- else {
- blend->rt[i].alpha_src_factor =
- translate_blend(st->ctx->Color.Blend[i].SrcA);
- blend->rt[i].alpha_dst_factor =
- translate_blend(st->ctx->Color.Blend[i].DstA);
- }
- }
- }
- else {
- /* no blending / logicop */
- }
-
- /* Colormask - maybe reverse these bits? */
- for (i = 0; i < num_state; i++) {
- if (st->ctx->Color.ColorMask[i][0])
- blend->rt[i].colormask |= PIPE_MASK_R;
- if (st->ctx->Color.ColorMask[i][1])
- blend->rt[i].colormask |= PIPE_MASK_G;
- if (st->ctx->Color.ColorMask[i][2])
- blend->rt[i].colormask |= PIPE_MASK_B;
- if (st->ctx->Color.ColorMask[i][3])
- blend->rt[i].colormask |= PIPE_MASK_A;
- }
-
- if (st->ctx->Color.DitherFlag)
- blend->dither = 1;
-
- if (st->ctx->Multisample.Enabled) {
- /* unlike in gallium/d3d10 these operations are only performed
- if msaa is enabled */
- if (st->ctx->Multisample.SampleAlphaToCoverage)
- blend->alpha_to_coverage = 1;
- if (st->ctx->Multisample.SampleAlphaToOne)
- blend->alpha_to_one = 1;
- }
-
- cso_set_blend(st->cso_context, blend);
-
- {
- struct pipe_blend_color bc;
- COPY_4FV(bc.color, st->ctx->Color.BlendColor);
- cso_set_blend_color(st->cso_context, &bc);
- }
-}
-
-
-const struct st_tracked_state st_update_blend = {
- "st_update_blend", /* name */
- { /* dirty */
- (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */
- 0, /* st */
- },
- update_blend, /* update */
-};
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#include "st_context.h" +#include "st_atom.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" + +#include "main/macros.h" + +/** + * Convert GLenum blend tokens to pipe tokens. + * Both blend factors and blend funcs are accepted. + */ +static GLuint +translate_blend(GLenum blend) +{ + switch (blend) { + /* blend functions */ + case GL_FUNC_ADD: + return PIPE_BLEND_ADD; + case GL_FUNC_SUBTRACT: + return PIPE_BLEND_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return PIPE_BLEND_REVERSE_SUBTRACT; + case GL_MIN: + return PIPE_BLEND_MIN; + case GL_MAX: + return PIPE_BLEND_MAX; + + /* blend factors */ + case GL_ONE: + return PIPE_BLENDFACTOR_ONE; + case GL_SRC_COLOR: + return PIPE_BLENDFACTOR_SRC_COLOR; + case GL_SRC_ALPHA: + return PIPE_BLENDFACTOR_SRC_ALPHA; + case GL_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_ALPHA; + case GL_DST_COLOR: + return PIPE_BLENDFACTOR_DST_COLOR; + case GL_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_SRC1_COLOR; + return PIPE_BLENDFACTOR_SRC1_ALPHA; + */ + case GL_ZERO: + return PIPE_BLENDFACTOR_ZERO; + case GL_ONE_MINUS_SRC_COLOR: + return PIPE_BLENDFACTOR_INV_SRC_COLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return PIPE_BLENDFACTOR_INV_SRC_ALPHA; + case GL_ONE_MINUS_DST_COLOR: + return PIPE_BLENDFACTOR_INV_DST_COLOR; + case GL_ONE_MINUS_DST_ALPHA: + return PIPE_BLENDFACTOR_INV_DST_ALPHA; + case GL_ONE_MINUS_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_INV_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_INV_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_INV_SRC1_COLOR; + return PIPE_BLENDFACTOR_INV_SRC1_ALPHA; + */ + default: + assert("invalid GL token in translate_blend()" == NULL); + return 0; + } +} + + +/** + * Convert GLenum logicop tokens to pipe tokens. + */ +static GLuint +translate_logicop(GLenum logicop) +{ + switch (logicop) { + case GL_CLEAR: + return PIPE_LOGICOP_CLEAR; + case GL_NOR: + return PIPE_LOGICOP_NOR; + case GL_AND_INVERTED: + return PIPE_LOGICOP_AND_INVERTED; + case GL_COPY_INVERTED: + return PIPE_LOGICOP_COPY_INVERTED; + case GL_AND_REVERSE: + return PIPE_LOGICOP_AND_REVERSE; + case GL_INVERT: + return PIPE_LOGICOP_INVERT; + case GL_XOR: + return PIPE_LOGICOP_XOR; + case GL_NAND: + return PIPE_LOGICOP_NAND; + case GL_AND: + return PIPE_LOGICOP_AND; + case GL_EQUIV: + return PIPE_LOGICOP_EQUIV; + case GL_NOOP: + return PIPE_LOGICOP_NOOP; + case GL_OR_INVERTED: + return PIPE_LOGICOP_OR_INVERTED; + case GL_COPY: + return PIPE_LOGICOP_COPY; + case GL_OR_REVERSE: + return PIPE_LOGICOP_OR_REVERSE; + case GL_OR: + return PIPE_LOGICOP_OR; + case GL_SET: + return PIPE_LOGICOP_SET; + default: + assert("invalid GL token in translate_logicop()" == NULL); + return 0; + } +} + +/** + * Figure out if colormasks are different per rt. + */ +static GLboolean +colormask_per_rt(struct gl_context *ctx) +{ + /* a bit suboptimal have to compare lots of values */ + unsigned i; + for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) { + if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) { + return GL_TRUE; + } + } + return GL_FALSE; +} + +/** + * Figure out if blend enables/state are different per rt. + */ +static GLboolean +blend_per_rt(struct gl_context *ctx) +{ + if (ctx->Color.BlendEnabled && + (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) { + /* This can only happen if GL_EXT_draw_buffers2 is enabled */ + return GL_TRUE; + } + if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) { + /* this can only happen if GL_ARB_draw_buffers_blend is enabled */ + return GL_TRUE; + } + return GL_FALSE; +} + +static void +update_blend( struct st_context *st ) +{ + struct pipe_blend_state *blend = &st->state.blend; + unsigned num_state = 1; + unsigned i, j; + + memset(blend, 0, sizeof(*blend)); + + if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) { + num_state = st->ctx->Const.MaxDrawBuffers; + blend->independent_blend_enable = 1; + } + /* Note it is impossible to correctly deal with EXT_blend_logic_op and + EXT_draw_buffers2/EXT_blend_equation_separate at the same time. + These combinations would require support for per-rt logicop enables + and separate alpha/rgb logicop/blend support respectively. Neither + possible in gallium nor most hardware. Assume these combinations + don't happen. */ + if (st->ctx->Color.ColorLogicOpEnabled || + (st->ctx->Color.BlendEnabled && + st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) { + /* logicop enabled */ + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); + } + else if (st->ctx->Color.BlendEnabled) { + /* blending enabled */ + for (i = 0, j = 0; i < num_state; i++) { + + blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1; + + if (st->ctx->Extensions.ARB_draw_buffers_blend) + j = i; + + blend->rt[i].rgb_func = + translate_blend(st->ctx->Color.Blend[j].EquationRGB); + + if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN || + st->ctx->Color.Blend[i].EquationRGB == GL_MAX) { + /* Min/max are special */ + blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->rt[i].rgb_src_factor = + translate_blend(st->ctx->Color.Blend[j].SrcRGB); + blend->rt[i].rgb_dst_factor = + translate_blend(st->ctx->Color.Blend[j].DstRGB); + } + + blend->rt[i].alpha_func = + translate_blend(st->ctx->Color.Blend[j].EquationA); + + if (st->ctx->Color.Blend[i].EquationA == GL_MIN || + st->ctx->Color.Blend[i].EquationA == GL_MAX) { + /* Min/max are special */ + blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->rt[i].alpha_src_factor = + translate_blend(st->ctx->Color.Blend[j].SrcA); + blend->rt[i].alpha_dst_factor = + translate_blend(st->ctx->Color.Blend[j].DstA); + } + } + } + else { + /* no blending / logicop */ + } + + /* Colormask - maybe reverse these bits? */ + for (i = 0; i < num_state; i++) { + if (st->ctx->Color.ColorMask[i][0]) + blend->rt[i].colormask |= PIPE_MASK_R; + if (st->ctx->Color.ColorMask[i][1]) + blend->rt[i].colormask |= PIPE_MASK_G; + if (st->ctx->Color.ColorMask[i][2]) + blend->rt[i].colormask |= PIPE_MASK_B; + if (st->ctx->Color.ColorMask[i][3]) + blend->rt[i].colormask |= PIPE_MASK_A; + } + + if (st->ctx->Color.DitherFlag) + blend->dither = 1; + + if (st->ctx->Multisample.Enabled) { + /* unlike in gallium/d3d10 these operations are only performed + if msaa is enabled */ + if (st->ctx->Multisample.SampleAlphaToCoverage) + blend->alpha_to_coverage = 1; + if (st->ctx->Multisample.SampleAlphaToOne) + blend->alpha_to_one = 1; + } + + cso_set_blend(st->cso_context, blend); + + { + struct pipe_blend_color bc; + COPY_4FV(bc.color, st->ctx->Color.BlendColor); + cso_set_blend_color(st->cso_context, &bc); + } +} + + +const struct st_tracked_state st_update_blend = { + "st_update_blend", /* name */ + { /* dirty */ + (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */ + 0, /* st */ + }, + update_blend, /* update */ +}; diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c index ddd130a81..0ea567155 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c @@ -1,890 +1,893 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Brian Paul
- */
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_atom_constbuf.h"
-#include "st_program.h"
-#include "st_cb_bitmap.h"
-#include "st_texture.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_inlines.h"
-#include "util/u_draw_quad.h"
-#include "util/u_simple_shaders.h"
-#include "program/prog_instruction.h"
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_drawpix
-
-/**
- * glBitmaps are drawn as textured quads. The user's bitmap pattern
- * is stored in a texture image. An alpha8 texture format is used.
- * The fragment shader samples a bit (texel) from the texture, then
- * discards the fragment if the bit is off.
- *
- * Note that we actually store the inverse image of the bitmap to
- * simplify the fragment program. An "on" bit gets stored as texel=0x0
- * and an "off" bit is stored as texel=0xff. Then we kill the
- * fragment if the negated texel value is less than zero.
- */
-
-
-/**
- * The bitmap cache attempts to accumulate multiple glBitmap calls in a
- * buffer which is then rendered en mass upon a flush, state change, etc.
- * A wide, short buffer is used to target the common case of a series
- * of glBitmap calls being used to draw text.
- */
-static GLboolean UseBitmapCache = GL_TRUE;
-
-
-#define BITMAP_CACHE_WIDTH 512
-#define BITMAP_CACHE_HEIGHT 32
-
-struct bitmap_cache
-{
- /** Window pos to render the cached image */
- GLint xpos, ypos;
- /** Bounds of region used in window coords */
- GLint xmin, ymin, xmax, ymax;
-
- GLfloat color[4];
-
- /** Bitmap's Z position */
- GLfloat zpos;
-
- struct pipe_resource *texture;
- struct pipe_transfer *trans;
-
- GLboolean empty;
-
- /** An I8 texture image: */
- ubyte *buffer;
-};
-
-
-/** Epsilon for Z comparisons */
-#define Z_EPSILON 1e-06
-
-
-/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
- struct st_context *st = st_context(ctx);
- struct st_fragment_program *stfp;
- struct gl_program *p;
- GLuint ic = 0;
-
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = 3;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
- p->Instructions[ic].DstReg.Index = 0;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = samplerIndex;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- p->Instructions[ic].Opcode = OPCODE_KIL;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
- p->Instructions[ic].SrcReg[0].Index = 0;
- p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
- ic++;
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = FRAG_BIT_TEX0;
- p->OutputsWritten = 0x0;
- p->SamplersUsed = (1 << samplerIndex);
-
- stfp = (struct st_fragment_program *) p;
- stfp->Base.UsesKill = GL_TRUE;
-
- return stfp;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0) {
- return i;
- }
- }
- return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st current context
- * \param fpIn the incoming fragment program
- * \param fpOut the new fragment program which does fragment culling
- * \param bitmap_sampler sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler)
-{
- struct st_fragment_program *bitmap_prog;
- struct gl_program *newProg;
- uint sampler;
-
- /*
- * Generate new program which is the user-defined program prefixed
- * with the bitmap sampler/kill instructions.
- */
- sampler = find_free_bit(fpIn->Base.SamplersUsed);
- bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
- newProg = _mesa_combine_programs(st->ctx,
- &bitmap_prog->Base.Base,
- &fpIn->Base);
- /* done with this after combining */
- st_reference_fragprog(st, &bitmap_prog, NULL);
-
-#if 0
- {
- printf("Combined bitmap program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
- }
-#endif
-
- /* return results */
- *fpOut = (struct gl_fragment_program *) newProg;
- *bitmap_sampler = sampler;
-}
-
-
-/**
- * Copy user-provide bitmap bits into texture buffer, expanding
- * bits into texels.
- * "On" bits will set texels to 0x0.
- * "Off" bits will not modify texels.
- * Note that the image is actually going to be upside down in
- * the texture. We deal with that with texcoords.
- */
-static void
-unpack_bitmap(struct st_context *st,
- GLint px, GLint py, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap,
- ubyte *destBuffer, uint destStride)
-{
- destBuffer += py * destStride + px;
-
- _mesa_expand_bitmap(width, height, unpack, bitmap,
- destBuffer, destStride, 0x0);
-}
-
-
-/**
- * Create a texture which represents a bitmap image.
- */
-static struct pipe_resource *
-make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_transfer *transfer;
- ubyte *dest;
- struct pipe_resource *pt;
-
- /* PBO source... */
- bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
- if (!bitmap) {
- return NULL;
- }
-
- /**
- * Create texture to hold bitmap pattern.
- */
- pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
- 0, width, height, 1, 1,
- PIPE_BIND_SAMPLER_VIEW);
- if (!pt) {
- _mesa_unmap_pbo_source(ctx, unpack);
- return NULL;
- }
-
- transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
- PIPE_TRANSFER_WRITE,
- 0, 0, width, height);
-
- dest = pipe_transfer_map(pipe, transfer);
-
- /* Put image into texture transfer */
- memset(dest, 0xff, height * transfer->stride);
- unpack_bitmap(st, 0, 0, width, height, unpack, bitmap,
- dest, transfer->stride);
-
- _mesa_unmap_pbo_source(ctx, unpack);
-
- /* Release transfer */
- pipe_transfer_unmap(pipe, transfer);
- pipe->transfer_destroy(pipe, transfer);
-
- return pt;
-}
-
-static GLuint
-setup_bitmap_vertex_data(struct st_context *st, bool normalized,
- int x, int y, int width, int height,
- float z, const float color[4])
-{
- struct pipe_context *pipe = st->pipe;
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat)fb->Width;
- const GLfloat fb_height = (GLfloat)fb->Height;
- const GLfloat x0 = (GLfloat)x;
- const GLfloat x1 = (GLfloat)(x + width);
- const GLfloat y0 = (GLfloat)y;
- const GLfloat y1 = (GLfloat)(y + height);
- GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
- GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
- const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
- const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
- const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
- GLuint i;
-
- if(!normalized)
- {
- sRight = width;
- tBot = height;
- }
-
- /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
- * no_flush) updates to buffers where we know there is no conflict
- * with previous data. Currently using max_slots > 1 will cause
- * synchronous rendering if the driver flushes its command buffers
- * between one bitmap and the next. Our flush hook below isn't
- * sufficient to catch this as the driver doesn't tell us when it
- * flushes its own command buffers. Until this gets fixed, pay the
- * price of allocating a new buffer for each bitmap cache-flush to
- * avoid synchronous rendering.
- */
- if (st->bitmap.vbuf_slot >= max_slots) {
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf_slot = 0;
- }
-
- if (!st->bitmap.vbuf) {
- st->bitmap.vbuf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- max_slots *
- sizeof(st->bitmap.vertices));
- }
-
- /* Positions are in clip coords since we need to do clipping in case
- * the bitmap quad goes beyond the window bounds.
- */
- st->bitmap.vertices[0][0][0] = clip_x0;
- st->bitmap.vertices[0][0][1] = clip_y0;
- st->bitmap.vertices[0][2][0] = sLeft;
- st->bitmap.vertices[0][2][1] = tTop;
-
- st->bitmap.vertices[1][0][0] = clip_x1;
- st->bitmap.vertices[1][0][1] = clip_y0;
- st->bitmap.vertices[1][2][0] = sRight;
- st->bitmap.vertices[1][2][1] = tTop;
-
- st->bitmap.vertices[2][0][0] = clip_x1;
- st->bitmap.vertices[2][0][1] = clip_y1;
- st->bitmap.vertices[2][2][0] = sRight;
- st->bitmap.vertices[2][2][1] = tBot;
-
- st->bitmap.vertices[3][0][0] = clip_x0;
- st->bitmap.vertices[3][0][1] = clip_y1;
- st->bitmap.vertices[3][2][0] = sLeft;
- st->bitmap.vertices[3][2][1] = tBot;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- st->bitmap.vertices[i][0][2] = z;
- st->bitmap.vertices[i][0][3] = 1.0;
- st->bitmap.vertices[i][1][0] = color[0];
- st->bitmap.vertices[i][1][1] = color[1];
- st->bitmap.vertices[i][1][2] = color[2];
- st->bitmap.vertices[i][1][3] = color[3];
- st->bitmap.vertices[i][2][2] = 0.0; /*R*/
- st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
- }
-
- /* put vertex data into vbuf */
- pipe_buffer_write_nooverlap(st->pipe,
- st->bitmap.vbuf,
- st->bitmap.vbuf_slot
- * sizeof(st->bitmap.vertices),
- sizeof st->bitmap.vertices,
- st->bitmap.vertices);
-
- return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
-}
-
-
-
-/**
- * Render a glBitmap by drawing a textured quad
- */
-static void
-draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
- GLsizei width, GLsizei height,
- struct pipe_sampler_view *sv,
- const GLfloat *color)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = st->cso_context;
- struct st_fp_variant *fpv;
- struct st_fp_variant_key key;
- GLuint maxSize;
- GLuint offset;
-
- memset(&key, 0, sizeof(key));
- key.st = st;
- key.bitmap = GL_TRUE;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- /* As an optimization, Mesa's fragment programs will sometimes get the
- * primary color from a statevar/constant rather than a varying variable.
- * when that's the case, we need to ensure that we use the 'color'
- * parameter and not the current attribute color (which may have changed
- * through glRasterPos and state validation.
- * So, we force the proper color here. Not elegant, but it works.
- */
- {
- GLfloat colorSave[4];
- COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
- COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
- COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
- }
-
-
- /* limit checks */
- /* XXX if the bitmap is larger than the max texture size, break
- * it up into chunks.
- */
- maxSize = 1 << (pipe->screen->get_param(pipe->screen,
- PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
- assert(width <= (GLsizei)maxSize);
- assert(height <= (GLsizei)maxSize);
-
- cso_save_rasterizer(cso);
- cso_save_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_viewport(cso);
- cso_save_fragment_shader(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
-
- /* rasterizer state: just scissor */
- st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
- cso_set_rasterizer(cso, &st->bitmap.rasterizer);
-
- /* fragment shader state: TEX lookup program */
- cso_set_fragment_shader_handle(cso, fpv->driver_shader);
-
- /* vertex shader state: position + texcoord pass-through */
- cso_set_vertex_shader_handle(cso, st->bitmap.vs);
-
- /* user samplers, plus our bitmap sampler */
- {
- struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
- uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
- uint i;
- for (i = 0; i < st->state.num_samplers; i++) {
- samplers[i] = &st->state.samplers[i];
- }
- samplers[fpv->bitmap_sampler] =
- &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
- cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
- }
-
- /* user textures, plus the bitmap texture */
- {
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
- uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
- memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
- sampler_views[fpv->bitmap_sampler] = sv;
- cso_set_fragment_sampler_views(cso, num, sampler_views);
- }
-
- /* viewport state: viewport matching window dims */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- const GLfloat width = (GLfloat)fb->Width;
- const GLfloat height = (GLfloat)fb->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.5f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
-
- /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
-
- /* draw textured quad */
- offset = setup_bitmap_vertex_data(st,
- sv->texture->target != PIPE_TEXTURE_RECT,
- x, y, width, height, z, color);
-
- util_draw_vertex_buffer(pipe, st->bitmap.vbuf, offset,
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 3); /* attribs/vert */
-
-
- /* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
-}
-
-
-static void
-reset_cache(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
- cache->empty = GL_TRUE;
-
- cache->xmin = 1000000;
- cache->xmax = -1000000;
- cache->ymin = 1000000;
- cache->ymax = -1000000;
-
- if (cache->trans) {
- pipe->transfer_destroy(pipe, cache->trans);
- cache->trans = NULL;
- }
-
- assert(!cache->texture);
-
- /* allocate a new texture */
- cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
- st->bitmap.tex_format, 0,
- BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
- 1, 1,
- PIPE_BIND_SAMPLER_VIEW);
-}
-
-
-/** Print bitmap image to stdout (debug) */
-static void
-print_cache(const struct bitmap_cache *cache)
-{
- int i, j, k;
-
- for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) {
- k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1);
- for (j = 0; j < BITMAP_CACHE_WIDTH; j++) {
- if (cache->buffer[k])
- printf("X");
- else
- printf(" ");
- k++;
- }
- printf("\n");
- }
-}
-
-
-/**
- * Create gallium pipe_transfer object for the bitmap cache.
- */
-static void
-create_cache_trans(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (cache->trans)
- return;
-
- /* Map the texture transfer.
- * Subsequent glBitmap calls will write into the texture image.
- */
- cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0,
- BITMAP_CACHE_WIDTH,
- BITMAP_CACHE_HEIGHT);
- cache->buffer = pipe_transfer_map(pipe, cache->trans);
-
- /* init image to all 0xff */
- memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
-}
-
-
-/**
- * If there's anything in the bitmap cache, draw/flush it now.
- */
-void
-st_flush_bitmap_cache(struct st_context *st)
-{
- if (!st->bitmap.cache->empty) {
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (st->ctx->DrawBuffer) {
- struct pipe_context *pipe = st->pipe;
- struct pipe_sampler_view *sv;
-
- assert(cache->xmin <= cache->xmax);
-
-/* printf("flush size %d x %d at %d, %d\n",
- cache->xmax - cache->xmin,
- cache->ymax - cache->ymin,
- cache->xpos, cache->ypos);
-*/
-
- /* The texture transfer has been mapped until now.
- * So unmap and release the texture transfer before drawing.
- */
- if (cache->trans) {
- if (0)
- print_cache(cache);
- pipe_transfer_unmap(pipe, cache->trans);
- cache->buffer = NULL;
-
- pipe->transfer_destroy(pipe, cache->trans);
- cache->trans = NULL;
- }
-
- sv = st_create_texture_sampler_view(st->pipe, cache->texture);
- if (sv) {
- draw_bitmap_quad(st->ctx,
- cache->xpos,
- cache->ypos,
- cache->zpos,
- BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
- sv,
- cache->color);
-
- pipe_sampler_view_reference(&sv, NULL);
- }
- }
-
- /* release/free the texture */
- pipe_resource_reference(&cache->texture, NULL);
-
- reset_cache(st);
- }
-}
-
-
-/**
- * Flush bitmap cache and release vertex buffer.
- */
-void
-st_flush_bitmap( struct st_context *st )
-{
- st_flush_bitmap_cache(st);
-
- /* Release vertex buffer to avoid synchronous rendering if we were
- * to map it in the next frame.
- */
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf_slot = 0;
-}
-
-
-/**
- * Try to accumulate this glBitmap call in the bitmap cache.
- * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
- */
-static GLboolean
-accum_bitmap(struct st_context *st,
- GLint x, GLint y, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap )
-{
- struct bitmap_cache *cache = st->bitmap.cache;
- int px = -999, py = -999;
- const GLfloat z = st->ctx->Current.RasterPos[2];
-
- if (width > BITMAP_CACHE_WIDTH ||
- height > BITMAP_CACHE_HEIGHT)
- return GL_FALSE; /* too big to cache */
-
- if (!cache->empty) {
- px = x - cache->xpos; /* pos in buffer */
- py = y - cache->ypos;
- if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
- py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
- !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
- ((fabs(z - cache->zpos) > Z_EPSILON))) {
- /* This bitmap would extend beyond cache bounds, or the bitmap
- * color is changing
- * so flush and continue.
- */
- st_flush_bitmap_cache(st);
- }
- }
-
- if (cache->empty) {
- /* Initialize. Center bitmap vertically in the buffer. */
- px = 0;
- py = (BITMAP_CACHE_HEIGHT - height) / 2;
- cache->xpos = x;
- cache->ypos = y - py;
- cache->zpos = z;
- cache->empty = GL_FALSE;
- COPY_4FV(cache->color, st->ctx->Current.RasterColor);
- }
-
- assert(px != -999);
- assert(py != -999);
-
- if (x < cache->xmin)
- cache->xmin = x;
- if (y < cache->ymin)
- cache->ymin = y;
- if (x + width > cache->xmax)
- cache->xmax = x + width;
- if (y + height > cache->ymax)
- cache->ymax = y + height;
-
- /* create the transfer if needed */
- create_cache_trans(st);
-
- unpack_bitmap(st, px, py, width, height, unpack, bitmap,
- cache->buffer, BITMAP_CACHE_WIDTH);
-
- return GL_TRUE; /* accumulated */
-}
-
-
-
-/**
- * Called via ctx->Driver.Bitmap()
- */
-static void
-st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
-{
- struct st_context *st = st_context(ctx);
- struct pipe_resource *pt;
-
- if (width == 0 || height == 0)
- return;
-
- st_validate_state(st);
-
- if (!st->bitmap.vs) {
- /* create pass-through vertex shader now */
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_COLOR,
- TGSI_SEMANTIC_GENERIC };
- const uint semantic_indexes[] = { 0, 0, 0 };
- st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
- semantic_names,
- semantic_indexes);
- }
-
- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
- return;
-
- pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
- if (pt) {
- struct pipe_sampler_view *sv =
- st_create_texture_sampler_view(st->pipe, pt);
-
- assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
-
- if (sv) {
- draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height, sv,
- st->ctx->Current.RasterColor);
-
- pipe_sampler_view_reference(&sv, NULL);
- }
-
- /* release/free the texture */
- pipe_resource_reference(&pt, NULL);
- }
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap_functions(struct dd_function_table *functions)
-{
- functions->Bitmap = st_Bitmap;
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap(struct st_context *st)
-{
- struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
-
- /* init sampler state once */
- memset(sampler, 0, sizeof(*sampler));
- sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
- sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
- sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
- sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- st->bitmap.samplers[1] = *sampler;
- st->bitmap.samplers[1].normalized_coords = 1;
-
- /* init baseline rasterizer state once */
- memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
- st->bitmap.rasterizer.gl_rasterization_rules = 1;
-
- /* find a usable texture format */
- if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
- }
- else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
- }
- else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
- }
- else {
- /* XXX support more formats */
- assert(0);
- }
-
- /* alloc bitmap cache object */
- st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
-
- reset_cache(st);
-}
-
-
-/** Per-context tear-down */
-void
-st_destroy_bitmap(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (st->bitmap.vs) {
- cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
- st->bitmap.vs = NULL;
- }
-
- if (st->bitmap.vbuf) {
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf = NULL;
- }
-
- if (cache) {
- if (cache->trans) {
- pipe_transfer_unmap(pipe, cache->trans);
- pipe->transfer_destroy(pipe, cache->trans);
- }
- pipe_resource_reference(&st->bitmap.cache->texture, NULL);
- free(st->bitmap.cache);
- st->bitmap.cache = NULL;
- }
-}
-
-#endif /* FEATURE_drawpix */
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/program.h" +#include "program/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_program.h" +#include "st_cb_bitmap.h" +#include "st_texture.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_inlines.h" +#include "util/u_draw_quad.h" +#include "util/u_simple_shaders.h" +#include "program/prog_instruction.h" +#include "cso_cache/cso_context.h" + + +#if FEATURE_drawpix + +/** + * glBitmaps are drawn as textured quads. The user's bitmap pattern + * is stored in a texture image. An alpha8 texture format is used. + * The fragment shader samples a bit (texel) from the texture, then + * discards the fragment if the bit is off. + * + * Note that we actually store the inverse image of the bitmap to + * simplify the fragment program. An "on" bit gets stored as texel=0x0 + * and an "off" bit is stored as texel=0xff. Then we kill the + * fragment if the negated texel value is less than zero. + */ + + +/** + * The bitmap cache attempts to accumulate multiple glBitmap calls in a + * buffer which is then rendered en mass upon a flush, state change, etc. + * A wide, short buffer is used to target the common case of a series + * of glBitmap calls being used to draw text. + */ +static GLboolean UseBitmapCache = GL_TRUE; + + +#define BITMAP_CACHE_WIDTH 512 +#define BITMAP_CACHE_HEIGHT 32 + +struct bitmap_cache +{ + /** Window pos to render the cached image */ + GLint xpos, ypos; + /** Bounds of region used in window coords */ + GLint xmin, ymin, xmax, ymax; + + GLfloat color[4]; + + /** Bitmap's Z position */ + GLfloat zpos; + + struct pipe_resource *texture; + struct pipe_transfer *trans; + + GLboolean empty; + + /** An I8 texture image: */ + ubyte *buffer; +}; + + +/** Epsilon for Z comparisons */ +#define Z_EPSILON 1e-06 + + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + */ +static struct st_fragment_program * +make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex) +{ + struct st_context *st = st_context(ctx); + struct st_fragment_program *stfp; + struct gl_program *p; + GLuint ic = 0; + + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = 3; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY; + p->Instructions[ic].DstReg.Index = 0; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = samplerIndex; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + p->Instructions[ic].Opcode = OPCODE_KIL; + p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + + if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) + p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX; + + p->Instructions[ic].SrcReg[0].Index = 0; + p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW; + ic++; + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0; + p->OutputsWritten = 0x0; + p->SamplersUsed = (1 << samplerIndex); + + stfp = (struct st_fragment_program *) p; + stfp->Base.UsesKill = GL_TRUE; + + return stfp; +} + + +static int +find_free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) { + return i; + } + } + return -1; +} + + +/** + * Combine basic bitmap fragment program with the user-defined program. + * \param st current context + * \param fpIn the incoming fragment program + * \param fpOut the new fragment program which does fragment culling + * \param bitmap_sampler sampler number for the bitmap texture + */ +void +st_make_bitmap_fragment_program(struct st_context *st, + struct gl_fragment_program *fpIn, + struct gl_fragment_program **fpOut, + GLuint *bitmap_sampler) +{ + struct st_fragment_program *bitmap_prog; + struct gl_program *newProg; + uint sampler; + + /* + * Generate new program which is the user-defined program prefixed + * with the bitmap sampler/kill instructions. + */ + sampler = find_free_bit(fpIn->Base.SamplersUsed); + bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler); + + newProg = _mesa_combine_programs(st->ctx, + &bitmap_prog->Base.Base, + &fpIn->Base); + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + +#if 0 + { + printf("Combined bitmap program:\n"); + _mesa_print_program(newProg); + printf("InputsRead: 0x%x\n", newProg->InputsRead); + printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); + _mesa_print_parameter_list(newProg->Parameters); + } +#endif + + /* return results */ + *fpOut = (struct gl_fragment_program *) newProg; + *bitmap_sampler = sampler; +} + + +/** + * Copy user-provide bitmap bits into texture buffer, expanding + * bits into texels. + * "On" bits will set texels to 0x0. + * "Off" bits will not modify texels. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ +static void +unpack_bitmap(struct st_context *st, + GLint px, GLint py, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + ubyte *destBuffer, uint destStride) +{ + destBuffer += py * destStride + px; + + _mesa_expand_bitmap(width, height, unpack, bitmap, + destBuffer, destStride, 0x0); +} + + +/** + * Create a texture which represents a bitmap image. + */ +static struct pipe_resource * +make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_transfer *transfer; + ubyte *dest; + struct pipe_resource *pt; + + /* PBO source... */ + bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap); + if (!bitmap) { + return NULL; + } + + /** + * Create texture to hold bitmap pattern. + */ + pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format, + 0, width, height, 1, 1, + PIPE_BIND_SAMPLER_VIEW); + if (!pt) { + _mesa_unmap_pbo_source(ctx, unpack); + return NULL; + } + + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, + 0, 0, width, height); + + dest = pipe_transfer_map(pipe, transfer); + + /* Put image into texture transfer */ + memset(dest, 0xff, height * transfer->stride); + unpack_bitmap(st, 0, 0, width, height, unpack, bitmap, + dest, transfer->stride); + + _mesa_unmap_pbo_source(ctx, unpack); + + /* Release transfer */ + pipe_transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); + + return pt; +} + +static GLuint +setup_bitmap_vertex_data(struct st_context *st, bool normalized, + int x, int y, int width, int height, + float z, const float color[4]) +{ + struct pipe_context *pipe = st->pipe; + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat)fb->Width; + const GLfloat fb_height = (GLfloat)fb->Height; + const GLfloat x0 = (GLfloat)x; + const GLfloat x1 = (GLfloat)(x + width); + const GLfloat y0 = (GLfloat)y; + const GLfloat y1 = (GLfloat)(y + height); + GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; + GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; + const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); + const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); + const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); + const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */ + GLuint i; + + if(!normalized) + { + sRight = width; + tBot = height; + } + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + if (st->bitmap.vbuf_slot >= max_slots) { + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; + } + + if (!st->bitmap.vbuf) { + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + max_slots * + sizeof(st->bitmap.vertices)); + } + + /* Positions are in clip coords since we need to do clipping in case + * the bitmap quad goes beyond the window bounds. + */ + st->bitmap.vertices[0][0][0] = clip_x0; + st->bitmap.vertices[0][0][1] = clip_y0; + st->bitmap.vertices[0][2][0] = sLeft; + st->bitmap.vertices[0][2][1] = tTop; + + st->bitmap.vertices[1][0][0] = clip_x1; + st->bitmap.vertices[1][0][1] = clip_y0; + st->bitmap.vertices[1][2][0] = sRight; + st->bitmap.vertices[1][2][1] = tTop; + + st->bitmap.vertices[2][0][0] = clip_x1; + st->bitmap.vertices[2][0][1] = clip_y1; + st->bitmap.vertices[2][2][0] = sRight; + st->bitmap.vertices[2][2][1] = tBot; + + st->bitmap.vertices[3][0][0] = clip_x0; + st->bitmap.vertices[3][0][1] = clip_y1; + st->bitmap.vertices[3][2][0] = sLeft; + st->bitmap.vertices[3][2][1] = tBot; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->bitmap.vertices[i][0][2] = z; + st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][1][0] = color[0]; + st->bitmap.vertices[i][1][1] = color[1]; + st->bitmap.vertices[i][1][2] = color[2]; + st->bitmap.vertices[i][1][3] = color[3]; + st->bitmap.vertices[i][2][2] = 0.0; /*R*/ + st->bitmap.vertices[i][2][3] = 1.0; /*Q*/ + } + + /* put vertex data into vbuf */ + pipe_buffer_write_nooverlap(st->pipe, + st->bitmap.vbuf, + st->bitmap.vbuf_slot + * sizeof(st->bitmap.vertices), + sizeof st->bitmap.vertices, + st->bitmap.vertices); + + return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices; +} + + + +/** + * Render a glBitmap by drawing a textured quad + */ +static void +draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + struct pipe_sampler_view *sv, + const GLfloat *color) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; + struct st_fp_variant *fpv; + struct st_fp_variant_key key; + GLuint maxSize; + GLuint offset; + + memset(&key, 0, sizeof(key)); + key.st = st; + key.bitmap = GL_TRUE; + + fpv = st_get_fp_variant(st, st->fp, &key); + + /* As an optimization, Mesa's fragment programs will sometimes get the + * primary color from a statevar/constant rather than a varying variable. + * when that's the case, we need to ensure that we use the 'color' + * parameter and not the current attribute color (which may have changed + * through glRasterPos and state validation. + * So, we force the proper color here. Not elegant, but it works. + */ + { + GLfloat colorSave[4]; + COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); + COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color); + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave); + } + + + /* limit checks */ + /* XXX if the bitmap is larger than the max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= (GLsizei)maxSize); + assert(height <= (GLsizei)maxSize); + + cso_save_rasterizer(cso); + cso_save_samplers(cso); + cso_save_fragment_sampler_views(cso); + cso_save_viewport(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + + /* rasterizer state: just scissor */ + st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &st->bitmap.rasterizer); + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, fpv->driver_shader); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, st->bitmap.vs); + + /* user samplers, plus our bitmap sampler */ + { + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers); + uint i; + for (i = 0; i < st->state.num_samplers; i++) { + samplers[i] = &st->state.samplers[i]; + } + samplers[fpv->bitmap_sampler] = + &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT]; + cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers); + } + + /* user textures, plus the bitmap texture */ + { + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures); + memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views)); + sampler_views[fpv->bitmap_sampler] = sv; + cso_set_fragment_sampler_views(cso, num, sampler_views); + } + + /* viewport state: viewport matching window dims */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + const GLfloat width = (GLfloat)fb->Width; + const GLfloat height = (GLfloat)fb->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 0.5f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.5f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + cso_set_vertex_elements(cso, 3, st->velems_util_draw); + + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + z = z * 2.0 - 1.0; + + /* draw textured quad */ + offset = setup_bitmap_vertex_data(st, + sv->texture->target != PIPE_TEXTURE_RECT, + x, y, width, height, z, color); + + util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 3); /* attribs/vert */ + + + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_samplers(cso); + cso_restore_fragment_sampler_views(cso); + cso_restore_viewport(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); +} + + +static void +reset_cache(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/ + cache->empty = GL_TRUE; + + cache->xmin = 1000000; + cache->xmax = -1000000; + cache->ymin = 1000000; + cache->ymax = -1000000; + + if (cache->trans) { + pipe->transfer_destroy(pipe, cache->trans); + cache->trans = NULL; + } + + assert(!cache->texture); + + /* allocate a new texture */ + cache->texture = st_texture_create(st, PIPE_TEXTURE_2D, + st->bitmap.tex_format, 0, + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + 1, 1, + PIPE_BIND_SAMPLER_VIEW); +} + + +/** Print bitmap image to stdout (debug) */ +static void +print_cache(const struct bitmap_cache *cache) +{ + int i, j, k; + + for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) { + k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1); + for (j = 0; j < BITMAP_CACHE_WIDTH; j++) { + if (cache->buffer[k]) + printf("X"); + else + printf(" "); + k++; + } + printf("\n"); + } +} + + +/** + * Create gallium pipe_transfer object for the bitmap cache. + */ +static void +create_cache_trans(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + if (cache->trans) + return; + + /* Map the texture transfer. + * Subsequent glBitmap calls will write into the texture image. + */ + cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + BITMAP_CACHE_WIDTH, + BITMAP_CACHE_HEIGHT); + cache->buffer = pipe_transfer_map(pipe, cache->trans); + + /* init image to all 0xff */ + memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT); +} + + +/** + * If there's anything in the bitmap cache, draw/flush it now. + */ +void +st_flush_bitmap_cache(struct st_context *st) +{ + if (!st->bitmap.cache->empty) { + struct bitmap_cache *cache = st->bitmap.cache; + + if (st->ctx->DrawBuffer) { + struct pipe_context *pipe = st->pipe; + struct pipe_sampler_view *sv; + + assert(cache->xmin <= cache->xmax); + +/* printf("flush size %d x %d at %d, %d\n", + cache->xmax - cache->xmin, + cache->ymax - cache->ymin, + cache->xpos, cache->ypos); +*/ + + /* The texture transfer has been mapped until now. + * So unmap and release the texture transfer before drawing. + */ + if (cache->trans) { + if (0) + print_cache(cache); + pipe_transfer_unmap(pipe, cache->trans); + cache->buffer = NULL; + + pipe->transfer_destroy(pipe, cache->trans); + cache->trans = NULL; + } + + sv = st_create_texture_sampler_view(st->pipe, cache->texture); + if (sv) { + draw_bitmap_quad(st->ctx, + cache->xpos, + cache->ypos, + cache->zpos, + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + sv, + cache->color); + + pipe_sampler_view_reference(&sv, NULL); + } + } + + /* release/free the texture */ + pipe_resource_reference(&cache->texture, NULL); + + reset_cache(st); + } +} + + +/** + * Flush bitmap cache and release vertex buffer. + */ +void +st_flush_bitmap( struct st_context *st ) +{ + st_flush_bitmap_cache(st); + + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; +} + + +/** + * Try to accumulate this glBitmap call in the bitmap cache. + * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. + */ +static GLboolean +accum_bitmap(struct st_context *st, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct bitmap_cache *cache = st->bitmap.cache; + int px = -999, py = -999; + const GLfloat z = st->ctx->Current.RasterPos[2]; + + if (width > BITMAP_CACHE_WIDTH || + height > BITMAP_CACHE_HEIGHT) + return GL_FALSE; /* too big to cache */ + + if (!cache->empty) { + px = x - cache->xpos; /* pos in buffer */ + py = y - cache->ypos; + if (px < 0 || px + width > BITMAP_CACHE_WIDTH || + py < 0 || py + height > BITMAP_CACHE_HEIGHT || + !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) || + ((fabs(z - cache->zpos) > Z_EPSILON))) { + /* This bitmap would extend beyond cache bounds, or the bitmap + * color is changing + * so flush and continue. + */ + st_flush_bitmap_cache(st); + } + } + + if (cache->empty) { + /* Initialize. Center bitmap vertically in the buffer. */ + px = 0; + py = (BITMAP_CACHE_HEIGHT - height) / 2; + cache->xpos = x; + cache->ypos = y - py; + cache->zpos = z; + cache->empty = GL_FALSE; + COPY_4FV(cache->color, st->ctx->Current.RasterColor); + } + + assert(px != -999); + assert(py != -999); + + if (x < cache->xmin) + cache->xmin = x; + if (y < cache->ymin) + cache->ymin = y; + if (x + width > cache->xmax) + cache->xmax = x + width; + if (y + height > cache->ymax) + cache->ymax = y + height; + + /* create the transfer if needed */ + create_cache_trans(st); + + unpack_bitmap(st, px, py, width, height, unpack, bitmap, + cache->buffer, BITMAP_CACHE_WIDTH); + + return GL_TRUE; /* accumulated */ +} + + + +/** + * Called via ctx->Driver.Bitmap() + */ +static void +st_Bitmap(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ) +{ + struct st_context *st = st_context(ctx); + struct pipe_resource *pt; + + if (width == 0 || height == 0) + return; + + st_validate_state(st); + + if (!st->bitmap.vs) { + /* create pass-through vertex shader now */ + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0, 0 }; + st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3, + semantic_names, + semantic_indexes); + } + + if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) + return; + + pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); + if (pt) { + struct pipe_sampler_view *sv = + st_create_texture_sampler_view(st->pipe, pt); + + assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT); + + if (sv) { + draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, sv, + st->ctx->Current.RasterColor); + + pipe_sampler_view_reference(&sv, NULL); + } + + /* release/free the texture */ + pipe_resource_reference(&pt, NULL); + } +} + + +/** Per-context init */ +void +st_init_bitmap_functions(struct dd_function_table *functions) +{ + functions->Bitmap = st_Bitmap; +} + + +/** Per-context init */ +void +st_init_bitmap(struct st_context *st) +{ + struct pipe_sampler_state *sampler = &st->bitmap.samplers[0]; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + /* init sampler state once */ + memset(sampler, 0, sizeof(*sampler)); + sampler->wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST; + st->bitmap.samplers[1] = *sampler; + st->bitmap.samplers[1].normalized_coords = 1; + + /* init baseline rasterizer state once */ + memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); + st->bitmap.rasterizer.gl_rasterization_rules = 1; + + /* find a usable texture format */ + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; + } + else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM; + } + else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM, + PIPE_TEXTURE_2D, 0, + PIPE_BIND_SAMPLER_VIEW, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM; + } + else { + /* XXX support more formats */ + assert(0); + } + + /* alloc bitmap cache object */ + st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache); + + reset_cache(st); +} + + +/** Per-context tear-down */ +void +st_destroy_bitmap(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct bitmap_cache *cache = st->bitmap.cache; + + if (st->bitmap.vs) { + cso_delete_vertex_shader(st->cso_context, st->bitmap.vs); + st->bitmap.vs = NULL; + } + + if (st->bitmap.vbuf) { + pipe_resource_reference(&st->bitmap.vbuf, NULL); + st->bitmap.vbuf = NULL; + } + + if (cache) { + if (cache->trans) { + pipe_transfer_unmap(pipe, cache->trans); + pipe->transfer_destroy(pipe, cache->trans); + } + pipe_resource_reference(&st->bitmap.cache->texture, NULL); + free(st->bitmap.cache); + st->bitmap.cache = NULL; + } +} + +#endif /* FEATURE_drawpix */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c index ba8a8cf89..12528f49f 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -1,447 +1,468 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * Functions for pixel buffer objects and vertex/element buffer objects.
- */
-
-
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "main/arrayobj.h"
-#include "main/bufferobj.h"
-
-#include "st_context.h"
-#include "st_cb_bufferobjects.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-
-
-/**
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers. Both have an integer handle and a hashtable to
- * lookup an opaque structure. It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *
-st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target)
-{
- struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object);
-
- if (!st_obj)
- return NULL;
-
- _mesa_initialize_buffer_object(&st_obj->Base, name, target);
-
- return &st_obj->Base;
-}
-
-
-
-/**
- * Deallocate/free a vertex/pixel buffer object.
- * Called via glDeleteBuffersARB().
- */
-static void
-st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- assert(obj->RefCount == 0);
- assert(st_obj->transfer == NULL);
-
- if (st_obj->buffer)
- pipe_resource_reference(&st_obj->buffer, NULL);
-
- free(st_obj);
-}
-
-
-
-/**
- * Replace data in a subrange of buffer object. If the data range
- * specified by size + offset extends beyond the end of the buffer or
- * if data is NULL, no copy is performed.
- * Called via glBufferSubDataARB().
- */
-static void
-st_bufferobj_subdata(struct gl_context *ctx,
- GLenum target,
- GLintptrARB offset,
- GLsizeiptrARB size,
- const GLvoid * data, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* we may be called from VBO code, so double-check params here */
- ASSERT(offset >= 0);
- ASSERT(size >= 0);
- ASSERT(offset + size <= obj->Size);
-
- if (!size)
- return;
-
- /*
- * According to ARB_vertex_buffer_object specification, if data is null,
- * then the contents of the buffer object's data store is undefined. We just
- * ignore, and leave it unchanged.
- */
- if (!data)
- return;
-
- /* Now that transfers are per-context, we don't have to figure out
- * flushing here. Usually drivers won't need to flush in this case
- * even if the buffer is currently referenced by hardware - they
- * just queue the upload as dma rather than mapping the underlying
- * buffer directly.
- */
- pipe_buffer_write(st_context(ctx)->pipe,
- st_obj->buffer,
- offset, size, data);
-}
-
-
-/**
- * Called via glGetBufferSubDataARB().
- */
-static void
-st_bufferobj_get_subdata(struct gl_context *ctx,
- GLenum target,
- GLintptrARB offset,
- GLsizeiptrARB size,
- GLvoid * data, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* we may be called from VBO code, so double-check params here */
- ASSERT(offset >= 0);
- ASSERT(size >= 0);
- ASSERT(offset + size <= obj->Size);
-
- if (!size)
- return;
-
- pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer,
- offset, size, data);
-}
-
-
-/**
- * Allocate space for and store data in a buffer object. Any data that was
- * previously stored in the buffer object is lost. If data is NULL,
- * memory will be allocated, but no copy will occur.
- * Called via ctx->Driver.BufferData().
- * \return GL_TRUE for success, GL_FALSE if out of memory
- */
-static GLboolean
-st_bufferobj_data(struct gl_context *ctx,
- GLenum target,
- GLsizeiptrARB size,
- const GLvoid * data,
- GLenum usage,
- struct gl_buffer_object *obj)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- unsigned buffer_usage;
-
- st_obj->Base.Size = size;
- st_obj->Base.Usage = usage;
-
- switch(target) {
- case GL_PIXEL_PACK_BUFFER_ARB:
- case GL_PIXEL_UNPACK_BUFFER_ARB:
- buffer_usage = PIPE_BIND_RENDER_TARGET;
- break;
- case GL_ARRAY_BUFFER_ARB:
- buffer_usage = PIPE_BIND_VERTEX_BUFFER;
- break;
- case GL_ELEMENT_ARRAY_BUFFER_ARB:
- buffer_usage = PIPE_BIND_INDEX_BUFFER;
- break;
- default:
- buffer_usage = 0;
- }
-
- pipe_resource_reference( &st_obj->buffer, NULL );
-
- if (size != 0) {
- st_obj->buffer = pipe_buffer_create(pipe->screen, buffer_usage, size);
-
- if (!st_obj->buffer) {
- return GL_FALSE;
- }
-
- if (data)
- pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0,
- size, data);
- return GL_TRUE;
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Dummy data whose's pointer is used for zero size buffers or ranges.
- */
-static long st_bufferobj_zero_length = 0;
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- uint flags;
-
- switch (access) {
- case GL_WRITE_ONLY:
- flags = PIPE_TRANSFER_WRITE;
- break;
- case GL_READ_ONLY:
- flags = PIPE_TRANSFER_READ;
- break;
- case GL_READ_WRITE:
- default:
- flags = PIPE_TRANSFER_READ_WRITE;
- break;
- }
-
- /* Handle zero-size buffers here rather than in drivers */
- if (obj->Size == 0) {
- obj->Pointer = &st_bufferobj_zero_length;
- }
- else {
- obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
- st_obj->buffer,
- flags,
- &st_obj->transfer);
- }
-
- if (obj->Pointer) {
- obj->Offset = 0;
- obj->Length = obj->Size;
- }
- return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferRange().
- */
-static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target,
- GLintptr offset, GLsizeiptr length, GLbitfield access,
- struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- enum pipe_transfer_usage flags = 0x0;
-
- if (access & GL_MAP_WRITE_BIT)
- flags |= PIPE_TRANSFER_WRITE;
-
- if (access & GL_MAP_READ_BIT)
- flags |= PIPE_TRANSFER_READ;
-
- if (access & GL_MAP_FLUSH_EXPLICIT_BIT)
- flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
-
- if (access & GL_MAP_INVALIDATE_RANGE_BIT)
- flags |= PIPE_TRANSFER_DISCARD;
-
- if (access & GL_MAP_INVALIDATE_BUFFER_BIT)
- flags |= PIPE_TRANSFER_DISCARD;
-
- if (access & GL_MAP_UNSYNCHRONIZED_BIT)
- flags |= PIPE_TRANSFER_UNSYNCHRONIZED;
-
- /* ... other flags ...
- */
-
- if (access & MESA_MAP_NOWAIT_BIT)
- flags |= PIPE_TRANSFER_DONTBLOCK;
-
- assert(offset >= 0);
- assert(length >= 0);
- assert(offset < obj->Size);
- assert(offset + length <= obj->Size);
-
- /*
- * We go out of way here to hide the degenerate yet valid case of zero
- * length range from the pipe driver.
- */
- if (!length) {
- obj->Pointer = &st_bufferobj_zero_length;
- }
- else {
- obj->Pointer = pipe_buffer_map_range(pipe,
- st_obj->buffer,
- offset, length,
- flags,
- &st_obj->transfer);
- if (obj->Pointer) {
- obj->Pointer = (ubyte *) obj->Pointer + offset;
- }
- }
-
- if (obj->Pointer) {
- obj->Offset = offset;
- obj->Length = length;
- obj->AccessFlags = access;
- }
-
- return obj->Pointer;
-}
-
-
-static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
- GLintptr offset, GLsizeiptr length,
- struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* Subrange is relative to mapped range */
- assert(offset >= 0);
- assert(length >= 0);
- assert(offset + length <= obj->Length);
- assert(obj->Pointer);
-
- if (!length)
- return;
-
- pipe_buffer_flush_mapped_range(pipe, st_obj->transfer,
- obj->Offset + offset, length);
-}
-
-
-/**
- * Called via glUnmapBufferARB().
- */
-static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- if (obj->Length)
- pipe_buffer_unmap(pipe, st_obj->transfer);
-
- st_obj->transfer = NULL;
- obj->Pointer = NULL;
- obj->Offset = 0;
- obj->Length = 0;
- return GL_TRUE;
-}
-
-
-/**
- * Called via glCopyBufferSubData().
- */
-static void
-st_copy_buffer_subdata(struct gl_context *ctx,
- struct gl_buffer_object *src,
- struct gl_buffer_object *dst,
- GLintptr readOffset, GLintptr writeOffset,
- GLsizeiptr size)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *srcObj = st_buffer_object(src);
- struct st_buffer_object *dstObj = st_buffer_object(dst);
- struct pipe_transfer *src_transfer;
- struct pipe_transfer *dst_transfer;
- ubyte *srcPtr, *dstPtr;
-
- if(!size)
- return;
-
- /* buffer should not already be mapped */
- assert(!src->Pointer);
- assert(!dst->Pointer);
-
- srcPtr = (ubyte *) pipe_buffer_map_range(pipe,
- srcObj->buffer,
- readOffset, size,
- PIPE_TRANSFER_READ,
- &src_transfer);
-
- dstPtr = (ubyte *) pipe_buffer_map_range(pipe,
- dstObj->buffer,
- writeOffset, size,
- PIPE_TRANSFER_WRITE,
- &dst_transfer);
-
- if (srcPtr && dstPtr)
- memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
-
- pipe_buffer_unmap(pipe, src_transfer);
- pipe_buffer_unmap(pipe, dst_transfer);
-}
-
-
-/* TODO: if buffer wasn't created with appropriate usage flags, need
- * to recreate it now and copy contents -- or possibly create a
- * gallium entrypoint to extend the usage flags and let the driver
- * decide if a copy is necessary.
- */
-void
-st_bufferobj_validate_usage(struct st_context *st,
- struct st_buffer_object *obj,
- unsigned usage)
-{
-}
-
-
-void
-st_init_bufferobject_functions(struct dd_function_table *functions)
-{
- functions->NewBufferObject = st_bufferobj_alloc;
- functions->DeleteBuffer = st_bufferobj_free;
- functions->BufferData = st_bufferobj_data;
- functions->BufferSubData = st_bufferobj_subdata;
- functions->GetBufferSubData = st_bufferobj_get_subdata;
- functions->MapBuffer = st_bufferobj_map;
- functions->MapBufferRange = st_bufferobj_map_range;
- functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
- functions->UnmapBuffer = st_bufferobj_unmap;
- functions->CopyBufferSubData = st_copy_buffer_subdata;
-
- /* For GL_APPLE_vertex_array_object */
- functions->NewArrayObject = _mesa_new_array_object;
- functions->DeleteArrayObject = _mesa_delete_array_object;
-}
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Functions for pixel buffer objects and vertex/element buffer objects. + */ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/arrayobj.h" +#include "main/bufferobj.h" + +#include "st_context.h" +#include "st_cb_bufferobjects.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" + + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target) +{ + struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object); + + if (!st_obj) + return NULL; + + _mesa_initialize_buffer_object(&st_obj->Base, name, target); + + return &st_obj->Base; +} + + + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + assert(obj->RefCount == 0); + assert(st_obj->transfer == NULL); + + if (st_obj->buffer) + pipe_resource_reference(&st_obj->buffer, NULL); + + free(st_obj); +} + + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +st_bufferobj_subdata(struct gl_context *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* we may be called from VBO code, so double-check params here */ + ASSERT(offset >= 0); + ASSERT(size >= 0); + ASSERT(offset + size <= obj->Size); + + if (!size) + return; + + /* + * According to ARB_vertex_buffer_object specification, if data is null, + * then the contents of the buffer object's data store is undefined. We just + * ignore, and leave it unchanged. + */ + if (!data) + return; + + /* Now that transfers are per-context, we don't have to figure out + * flushing here. Usually drivers won't need to flush in this case + * even if the buffer is currently referenced by hardware - they + * just queue the upload as dma rather than mapping the underlying + * buffer directly. + */ + pipe_buffer_write(st_context(ctx)->pipe, + st_obj->buffer, + offset, size, data); +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +st_bufferobj_get_subdata(struct gl_context *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* we may be called from VBO code, so double-check params here */ + ASSERT(offset >= 0); + ASSERT(size >= 0); + ASSERT(offset + size <= obj->Size); + + if (!size) + return; + + pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer, + offset, size, data); +} + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via ctx->Driver.BufferData(). + * \return GL_TRUE for success, GL_FALSE if out of memory + */ +static GLboolean +st_bufferobj_data(struct gl_context *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, + struct gl_buffer_object *obj) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + unsigned bind, pipe_usage; + + st_obj->Base.Size = size; + st_obj->Base.Usage = usage; + + switch(target) { + case GL_PIXEL_PACK_BUFFER_ARB: + case GL_PIXEL_UNPACK_BUFFER_ARB: + bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW; + break; + case GL_ARRAY_BUFFER_ARB: + bind = PIPE_BIND_VERTEX_BUFFER; + break; + case GL_ELEMENT_ARRAY_BUFFER_ARB: + bind = PIPE_BIND_INDEX_BUFFER; + break; + default: + bind = 0; + } + + switch (usage) { + case GL_STATIC_DRAW: + case GL_STATIC_READ: + case GL_STATIC_COPY: + pipe_usage = PIPE_USAGE_STATIC; + break; + case GL_DYNAMIC_DRAW: + case GL_DYNAMIC_READ: + case GL_DYNAMIC_COPY: + pipe_usage = PIPE_USAGE_DYNAMIC; + break; + case GL_STREAM_DRAW: + case GL_STREAM_READ: + case GL_STREAM_COPY: + pipe_usage = PIPE_USAGE_STREAM; + break; + default: + pipe_usage = PIPE_USAGE_DEFAULT; + } + + pipe_resource_reference( &st_obj->buffer, NULL ); + + if (size != 0) { + st_obj->buffer = pipe_buffer_create(pipe->screen, bind, + pipe_usage, size); + + if (!st_obj->buffer) { + return GL_FALSE; + } + + if (data) + pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0, + size, data); + return GL_TRUE; + } + + return GL_TRUE; +} + + +/** + * Dummy data whose's pointer is used for zero size buffers or ranges. + */ +static long st_bufferobj_zero_length = 0; + + + +/** + * Called via glMapBufferARB(). + */ +static void * +st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access, + struct gl_buffer_object *obj) +{ + struct st_buffer_object *st_obj = st_buffer_object(obj); + uint flags; + + switch (access) { + case GL_WRITE_ONLY: + flags = PIPE_TRANSFER_WRITE; + break; + case GL_READ_ONLY: + flags = PIPE_TRANSFER_READ; + break; + case GL_READ_WRITE: + default: + flags = PIPE_TRANSFER_READ_WRITE; + break; + } + + /* Handle zero-size buffers here rather than in drivers */ + if (obj->Size == 0) { + obj->Pointer = &st_bufferobj_zero_length; + } + else { + obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe, + st_obj->buffer, + flags, + &st_obj->transfer); + } + + if (obj->Pointer) { + obj->Offset = 0; + obj->Length = obj->Size; + } + return obj->Pointer; +} + + +/** + * Called via glMapBufferRange(). + */ +static void * +st_bufferobj_map_range(struct gl_context *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, GLbitfield access, + struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + enum pipe_transfer_usage flags = 0x0; + + if (access & GL_MAP_WRITE_BIT) + flags |= PIPE_TRANSFER_WRITE; + + if (access & GL_MAP_READ_BIT) + flags |= PIPE_TRANSFER_READ; + + if (access & GL_MAP_FLUSH_EXPLICIT_BIT) + flags |= PIPE_TRANSFER_FLUSH_EXPLICIT; + + if (access & GL_MAP_INVALIDATE_RANGE_BIT) + flags |= PIPE_TRANSFER_DISCARD; + + if (access & GL_MAP_INVALIDATE_BUFFER_BIT) + flags |= PIPE_TRANSFER_DISCARD; + + if (access & GL_MAP_UNSYNCHRONIZED_BIT) + flags |= PIPE_TRANSFER_UNSYNCHRONIZED; + + /* ... other flags ... + */ + + if (access & MESA_MAP_NOWAIT_BIT) + flags |= PIPE_TRANSFER_DONTBLOCK; + + assert(offset >= 0); + assert(length >= 0); + assert(offset < obj->Size); + assert(offset + length <= obj->Size); + + /* + * We go out of way here to hide the degenerate yet valid case of zero + * length range from the pipe driver. + */ + if (!length) { + obj->Pointer = &st_bufferobj_zero_length; + } + else { + obj->Pointer = pipe_buffer_map_range(pipe, + st_obj->buffer, + offset, length, + flags, + &st_obj->transfer); + if (obj->Pointer) { + obj->Pointer = (ubyte *) obj->Pointer + offset; + } + } + + if (obj->Pointer) { + obj->Offset = offset; + obj->Length = length; + obj->AccessFlags = access; + } + + return obj->Pointer; +} + + +static void +st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target, + GLintptr offset, GLsizeiptr length, + struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + /* Subrange is relative to mapped range */ + assert(offset >= 0); + assert(length >= 0); + assert(offset + length <= obj->Length); + assert(obj->Pointer); + + if (!length) + return; + + pipe_buffer_flush_mapped_range(pipe, st_obj->transfer, + obj->Offset + offset, length); +} + + +/** + * Called via glUnmapBufferARB(). + */ +static GLboolean +st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + if (obj->Length) + pipe_buffer_unmap(pipe, st_obj->transfer); + + st_obj->transfer = NULL; + obj->Pointer = NULL; + obj->Offset = 0; + obj->Length = 0; + return GL_TRUE; +} + + +/** + * Called via glCopyBufferSubData(). + */ +static void +st_copy_buffer_subdata(struct gl_context *ctx, + struct gl_buffer_object *src, + struct gl_buffer_object *dst, + GLintptr readOffset, GLintptr writeOffset, + GLsizeiptr size) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *srcObj = st_buffer_object(src); + struct st_buffer_object *dstObj = st_buffer_object(dst); + struct pipe_transfer *src_transfer; + struct pipe_transfer *dst_transfer; + ubyte *srcPtr, *dstPtr; + + if(!size) + return; + + /* buffer should not already be mapped */ + assert(!src->Pointer); + assert(!dst->Pointer); + + srcPtr = (ubyte *) pipe_buffer_map_range(pipe, + srcObj->buffer, + readOffset, size, + PIPE_TRANSFER_READ, + &src_transfer); + + dstPtr = (ubyte *) pipe_buffer_map_range(pipe, + dstObj->buffer, + writeOffset, size, + PIPE_TRANSFER_WRITE, + &dst_transfer); + + if (srcPtr && dstPtr) + memcpy(dstPtr + writeOffset, srcPtr + readOffset, size); + + pipe_buffer_unmap(pipe, src_transfer); + pipe_buffer_unmap(pipe, dst_transfer); +} + + +/* TODO: if buffer wasn't created with appropriate usage flags, need + * to recreate it now and copy contents -- or possibly create a + * gallium entrypoint to extend the usage flags and let the driver + * decide if a copy is necessary. + */ +void +st_bufferobj_validate_usage(struct st_context *st, + struct st_buffer_object *obj, + unsigned usage) +{ +} + + +void +st_init_bufferobject_functions(struct dd_function_table *functions) +{ + functions->NewBufferObject = st_bufferobj_alloc; + functions->DeleteBuffer = st_bufferobj_free; + functions->BufferData = st_bufferobj_data; + functions->BufferSubData = st_bufferobj_subdata; + functions->GetBufferSubData = st_bufferobj_get_subdata; + functions->MapBuffer = st_bufferobj_map; + functions->MapBufferRange = st_bufferobj_map_range; + functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range; + functions->UnmapBuffer = st_bufferobj_unmap; + functions->CopyBufferSubData = st_copy_buffer_subdata; + + /* For GL_APPLE_vertex_array_object */ + functions->NewArrayObject = _mesa_new_array_object; + functions->DeleteArrayObject = _mesa_delete_array_object; +} diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index 3e27be271..d2e0cd73c 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -1,559 +1,563 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- * Michel Dänzer
- */
-
-#include "main/glheader.h"
-#include "main/formats.h"
-#include "main/macros.h"
-#include "program/prog_instruction.h"
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_accum.h"
-#include "st_cb_clear.h"
-#include "st_cb_fbo.h"
-#include "st_program.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
-#include "util/u_simple_shaders.h"
-#include "util/u_draw_quad.h"
-
-#include "cso_cache/cso_context.h"
-
-
-/**
- * Do per-context initialization for glClear.
- */
-void
-st_init_clear(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *pscreen = st->pipe->screen;
-
- memset(&st->clear, 0, sizeof(st->clear));
-
- st->clear.raster.gl_rasterization_rules = 1;
- st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE);
-
- /* fragment shader state: color pass-through program */
- st->clear.fs = util_make_fragment_passthrough_shader(pipe);
-
- /* vertex shader state: color/position pass-through */
- {
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_COLOR };
- const uint semantic_indexes[] = { 0, 0 };
- st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2,
- semantic_names,
- semantic_indexes);
- }
-}
-
-
-/**
- * Free per-context state for glClear.
- */
-void
-st_destroy_clear(struct st_context *st)
-{
- if (st->clear.fs) {
- cso_delete_fragment_shader(st->cso_context, st->clear.fs);
- st->clear.fs = NULL;
- }
- if (st->clear.vs) {
- cso_delete_vertex_shader(st->cso_context, st->clear.vs);
- st->clear.vs = NULL;
- }
- if (st->clear.vbuf) {
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf = NULL;
- }
-}
-
-
-/**
- * Draw a screen-aligned quadrilateral.
- * Coords are clip coords with y=0=bottom.
- */
-static void
-draw_quad(struct st_context *st,
- float x0, float y0, float x1, float y1, GLfloat z,
- const GLfloat color[4])
-{
- struct pipe_context *pipe = st->pipe;
-
- /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
- * no_flush) updates to buffers where we know there is no conflict
- * with previous data. Currently using max_slots > 1 will cause
- * synchronous rendering if the driver flushes its command buffers
- * between one bitmap and the next. Our flush hook below isn't
- * sufficient to catch this as the driver doesn't tell us when it
- * flushes its own command buffers. Until this gets fixed, pay the
- * price of allocating a new buffer for each bitmap cache-flush to
- * avoid synchronous rendering.
- */
- const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */
- GLuint i;
-
- if (st->clear.vbuf_slot >= max_slots) {
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf_slot = 0;
- }
-
- if (!st->clear.vbuf) {
- st->clear.vbuf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- max_slots * sizeof(st->clear.vertices));
- }
-
- /* positions */
- st->clear.vertices[0][0][0] = x0;
- st->clear.vertices[0][0][1] = y0;
-
- st->clear.vertices[1][0][0] = x1;
- st->clear.vertices[1][0][1] = y0;
-
- st->clear.vertices[2][0][0] = x1;
- st->clear.vertices[2][0][1] = y1;
-
- st->clear.vertices[3][0][0] = x0;
- st->clear.vertices[3][0][1] = y1;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- st->clear.vertices[i][0][2] = z;
- st->clear.vertices[i][0][3] = 1.0;
- st->clear.vertices[i][1][0] = color[0];
- st->clear.vertices[i][1][1] = color[1];
- st->clear.vertices[i][1][2] = color[2];
- st->clear.vertices[i][1][3] = color[3];
- }
-
- /* put vertex data into vbuf */
- pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf,
- st->clear.vbuf_slot
- * sizeof(st->clear.vertices),
- sizeof(st->clear.vertices),
- st->clear.vertices);
-
- /* draw */
- util_draw_vertex_buffer(pipe,
- st->clear.vbuf,
- st->clear.vbuf_slot * sizeof(st->clear.vertices),
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 2); /* attribs/vert */
-
- /* Increment slot */
- st->clear.vbuf_slot++;
-}
-
-
-
-/**
- * Do glClear by drawing a quadrilateral.
- * The vertices of the quad will be computed from the
- * ctx->DrawBuffer->_X/Ymin/max fields.
- */
-static void
-clear_with_quad(struct gl_context *ctx,
- GLboolean color, GLboolean depth, GLboolean stencil)
-{
- struct st_context *st = st_context(ctx);
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f;
- const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
- const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
- const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
-
- /*
- printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__,
- color ? "color, " : "",
- depth ? "depth, " : "",
- stencil ? "stencil" : "",
- x0, y0,
- x1, y1);
- */
-
- cso_save_blend(st->cso_context);
- cso_save_stencil_ref(st->cso_context);
- cso_save_depth_stencil_alpha(st->cso_context);
- cso_save_rasterizer(st->cso_context);
- cso_save_viewport(st->cso_context);
- cso_save_clip(st->cso_context);
- cso_save_fragment_shader(st->cso_context);
- cso_save_vertex_shader(st->cso_context);
- cso_save_vertex_elements(st->cso_context);
-
- /* blend state: RGBA masking */
- {
- struct pipe_blend_state blend;
- memset(&blend, 0, sizeof(blend));
- blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
- blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
- if (color) {
- if (ctx->Color.ColorMask[0][0])
- blend.rt[0].colormask |= PIPE_MASK_R;
- if (ctx->Color.ColorMask[0][1])
- blend.rt[0].colormask |= PIPE_MASK_G;
- if (ctx->Color.ColorMask[0][2])
- blend.rt[0].colormask |= PIPE_MASK_B;
- if (ctx->Color.ColorMask[0][3])
- blend.rt[0].colormask |= PIPE_MASK_A;
- if (st->ctx->Color.DitherFlag)
- blend.dither = 1;
- }
- cso_set_blend(st->cso_context, &blend);
- }
-
- /* depth_stencil state: always pass/set to ref value */
- {
- struct pipe_depth_stencil_alpha_state depth_stencil;
- memset(&depth_stencil, 0, sizeof(depth_stencil));
- if (depth) {
- depth_stencil.depth.enabled = 1;
- depth_stencil.depth.writemask = 1;
- depth_stencil.depth.func = PIPE_FUNC_ALWAYS;
- }
-
- if (stencil) {
- struct pipe_stencil_ref stencil_ref;
- memset(&stencil_ref, 0, sizeof(stencil_ref));
- depth_stencil.stencil[0].enabled = 1;
- depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS;
- depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].valuemask = 0xff;
- depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
- stencil_ref.ref_value[0] = ctx->Stencil.Clear;
- cso_set_stencil_ref(st->cso_context, &stencil_ref);
- }
-
- cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
- }
-
- cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
-
- cso_set_rasterizer(st->cso_context, &st->clear.raster);
-
- /* viewport state: viewport matching window dims */
- {
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * fb_width;
- vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 1.0f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * fb_width;
- vp.translate[1] = 0.5f * fb_height;
- vp.translate[2] = 0.0f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(st->cso_context, &vp);
- }
-
- cso_set_clip(st->cso_context, &st->clear.clip);
- cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
- cso_set_vertex_shader_handle(st->cso_context, st->clear.vs);
-
- /* draw quad matching scissor rect (XXX verify coord round-off) */
- draw_quad(st, x0, y0, x1, y1,
- (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor);
-
- /* Restore pipe state */
- cso_restore_blend(st->cso_context);
- cso_restore_stencil_ref(st->cso_context);
- cso_restore_depth_stencil_alpha(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_clip(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_vertex_elements(st->cso_context);
-}
-
-
-/**
- * Determine if we need to clear the depth buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (!ctx->Color.ColorMask[0][0] ||
- !ctx->Color.ColorMask[0][1] ||
- !ctx->Color.ColorMask[0][2] ||
- !ctx->Color.ColorMask[0][3])
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the combiend depth/stencil buffer by
- * drawing a quad.
- */
-static INLINE GLboolean
-check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- const GLuint stencilMax = 0xff;
- GLboolean maskStencil
- = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
-
- assert(rb->Format == MESA_FORMAT_S8 ||
- rb->Format == MESA_FORMAT_Z24_S8 ||
- rb->Format == MESA_FORMAT_S8_Z24);
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (maskStencil)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the depth buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
- boolean ds_separate)
-{
- const struct st_renderbuffer *strb = st_renderbuffer(rb);
- const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the stencil buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
- boolean ds_separate)
-{
- const struct st_renderbuffer *strb = st_renderbuffer(rb);
- const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
- const GLuint stencilMax = 0xff;
- const GLboolean maskStencil
- = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
-
- assert(rb->Format == MESA_FORMAT_S8 ||
- rb->Format == MESA_FORMAT_Z24_S8 ||
- rb->Format == MESA_FORMAT_S8_Z24);
-
- if (maskStencil)
- return GL_TRUE;
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- /* This is correct, but it is necessary to look at the depth clear
- * value held in the surface when it comes time to issue the clear,
- * rather than taking depth and stencil clear values from the
- * current state.
- */
- if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-
-/**
- * Called when we need to flush.
- */
-void
-st_flush_clear(struct st_context *st)
-{
- /* Release vertex buffer to avoid synchronous rendering if we were
- * to map it in the next frame.
- */
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf_slot = 0;
-}
-
-
-
-/**
- * Called via ctx->Driver.Clear()
- */
-static void
-st_Clear(struct gl_context *ctx, GLbitfield mask)
-{
- static const GLbitfield BUFFER_BITS_DS
- = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
- struct st_context *st = st_context(ctx);
- struct gl_renderbuffer *depthRb
- = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
- struct gl_renderbuffer *stencilRb
- = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
- GLbitfield quad_buffers = 0x0;
- GLbitfield clear_buffers = 0x0;
- GLuint i;
-
- /* This makes sure the pipe has the latest scissor, etc values */
- st_validate_state( st );
-
- if (mask & BUFFER_BITS_COLOR) {
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
-
- if (mask & (1 << b)) {
- struct gl_renderbuffer *rb
- = ctx->DrawBuffer->Attachment[b].Renderbuffer;
- struct st_renderbuffer *strb = st_renderbuffer(rb);
-
- if (!strb || !strb->surface)
- continue;
-
- if (check_clear_color_with_quad( ctx, rb ))
- quad_buffers |= PIPE_CLEAR_COLOR;
- else
- clear_buffers |= PIPE_CLEAR_COLOR;
- }
- }
- }
-
- if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) {
- /* clearing combined depth + stencil */
- struct st_renderbuffer *strb = st_renderbuffer(depthRb);
-
- if (strb->surface) {
- if (check_clear_depth_stencil_with_quad(ctx, depthRb))
- quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- else
- clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- }
- }
- else {
- /* separate depth/stencil clears */
- /* I don't think truly separate buffers are actually possible in gallium or hw? */
- if (mask & BUFFER_BIT_DEPTH) {
- struct st_renderbuffer *strb = st_renderbuffer(depthRb);
-
- if (strb->surface) {
- if (check_clear_depth_with_quad(ctx, depthRb,
- st->clear.enable_ds_separate))
- quad_buffers |= PIPE_CLEAR_DEPTH;
- else
- clear_buffers |= PIPE_CLEAR_DEPTH;
- }
- }
- if (mask & BUFFER_BIT_STENCIL) {
- struct st_renderbuffer *strb = st_renderbuffer(stencilRb);
-
- if (strb->surface) {
- if (check_clear_stencil_with_quad(ctx, stencilRb,
- st->clear.enable_ds_separate))
- quad_buffers |= PIPE_CLEAR_STENCIL;
- else
- clear_buffers |= PIPE_CLEAR_STENCIL;
- }
- }
- }
-
- /*
- * If we're going to use clear_with_quad() for any reason, use it for
- * everything possible.
- */
- if (quad_buffers) {
- quad_buffers |= clear_buffers;
- clear_with_quad(ctx,
- quad_buffers & PIPE_CLEAR_COLOR,
- quad_buffers & PIPE_CLEAR_DEPTH,
- quad_buffers & PIPE_CLEAR_STENCIL);
- } else if (clear_buffers) {
- /* driver cannot know it can clear everything if the buffer
- * is a combined depth/stencil buffer but this wasn't actually
- * required from the visual. Hence fix this up to avoid potential
- * read-modify-write in the driver.
- */
- if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) &&
- ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
- (depthRb == stencilRb) &&
- (ctx->DrawBuffer->Visual.depthBits == 0 ||
- ctx->DrawBuffer->Visual.stencilBits == 0))
- clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
- ctx->Depth.Clear, ctx->Stencil.Clear);
- }
- if (mask & BUFFER_BIT_ACCUM)
- st_clear_accum_buffer(ctx,
- ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
-}
-
-
-void
-st_init_clear_functions(struct dd_function_table *functions)
-{
- functions->Clear = st_Clear;
-}
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Michel Dänzer + */ + +#include "main/glheader.h" +#include "main/formats.h" +#include "main/macros.h" +#include "program/prog_instruction.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_accum.h" +#include "st_cb_clear.h" +#include "st_cb_fbo.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_simple_shaders.h" +#include "util/u_draw_quad.h" + +#include "cso_cache/cso_context.h" + + +/** + * Do per-context initialization for glClear. + */ +void +st_init_clear(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *pscreen = st->pipe->screen; + + memset(&st->clear, 0, sizeof(st->clear)); + + st->clear.raster.gl_rasterization_rules = 1; + st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); + + /* fragment shader state: color pass-through program */ + st->clear.fs = util_make_fragment_passthrough_shader(pipe); + + /* vertex shader state: color/position pass-through */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR }; + const uint semantic_indexes[] = { 0, 0 }; + st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, + semantic_names, + semantic_indexes); + } +} + + +/** + * Free per-context state for glClear. + */ +void +st_destroy_clear(struct st_context *st) +{ + if (st->clear.fs) { + cso_delete_fragment_shader(st->cso_context, st->clear.fs); + st->clear.fs = NULL; + } + if (st->clear.vs) { + cso_delete_vertex_shader(st->cso_context, st->clear.vs); + st->clear.vs = NULL; + } + if (st->clear.vbuf) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf = NULL; + } +} + + +/** + * Draw a screen-aligned quadrilateral. + * Coords are clip coords with y=0=bottom. + */ +static void +draw_quad(struct st_context *st, + float x0, float y0, float x1, float y1, GLfloat z, + const GLfloat color[4]) +{ + struct pipe_context *pipe = st->pipe; + + /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as + * no_flush) updates to buffers where we know there is no conflict + * with previous data. Currently using max_slots > 1 will cause + * synchronous rendering if the driver flushes its command buffers + * between one bitmap and the next. Our flush hook below isn't + * sufficient to catch this as the driver doesn't tell us when it + * flushes its own command buffers. Until this gets fixed, pay the + * price of allocating a new buffer for each bitmap cache-flush to + * avoid synchronous rendering. + */ + const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ + GLuint i; + + if (st->clear.vbuf_slot >= max_slots) { + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; + } + + if (!st->clear.vbuf) { + st->clear.vbuf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + max_slots * sizeof(st->clear.vertices)); + } + + /* positions */ + st->clear.vertices[0][0][0] = x0; + st->clear.vertices[0][0][1] = y0; + + st->clear.vertices[1][0][0] = x1; + st->clear.vertices[1][0][1] = y0; + + st->clear.vertices[2][0][0] = x1; + st->clear.vertices[2][0][1] = y1; + + st->clear.vertices[3][0][0] = x0; + st->clear.vertices[3][0][1] = y1; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->clear.vertices[i][0][2] = z; + st->clear.vertices[i][0][3] = 1.0; + st->clear.vertices[i][1][0] = color[0]; + st->clear.vertices[i][1][1] = color[1]; + st->clear.vertices[i][1][2] = color[2]; + st->clear.vertices[i][1][3] = color[3]; + } + + /* put vertex data into vbuf */ + pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, + st->clear.vbuf_slot + * sizeof(st->clear.vertices), + sizeof(st->clear.vertices), + st->clear.vertices); + + /* draw */ + util_draw_vertex_buffer(pipe, + st->cso_context, + st->clear.vbuf, + st->clear.vbuf_slot * sizeof(st->clear.vertices), + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* Increment slot */ + st->clear.vbuf_slot++; +} + + + +/** + * Do glClear by drawing a quadrilateral. + * The vertices of the quad will be computed from the + * ctx->DrawBuffer->_X/Ymin/max fields. + */ +static void +clear_with_quad(struct gl_context *ctx, + GLboolean color, GLboolean depth, GLboolean stencil) +{ + struct st_context *st = st_context(ctx); + const struct gl_framebuffer *fb = ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; + const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; + const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; + + /* + printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, + color ? "color, " : "", + depth ? "depth, " : "", + stencil ? "stencil" : "", + x0, y0, + x1, y1); + */ + + cso_save_blend(st->cso_context); + cso_save_stencil_ref(st->cso_context); + cso_save_depth_stencil_alpha(st->cso_context); + cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + cso_save_clip(st->cso_context); + cso_save_fragment_shader(st->cso_context); + cso_save_vertex_shader(st->cso_context); + cso_save_vertex_elements(st->cso_context); + cso_save_vertex_buffers(st->cso_context); + + /* blend state: RGBA masking */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + if (color) { + if (ctx->Color.ColorMask[0][0]) + blend.rt[0].colormask |= PIPE_MASK_R; + if (ctx->Color.ColorMask[0][1]) + blend.rt[0].colormask |= PIPE_MASK_G; + if (ctx->Color.ColorMask[0][2]) + blend.rt[0].colormask |= PIPE_MASK_B; + if (ctx->Color.ColorMask[0][3]) + blend.rt[0].colormask |= PIPE_MASK_A; + if (st->ctx->Color.DitherFlag) + blend.dither = 1; + } + cso_set_blend(st->cso_context, &blend); + } + + /* depth_stencil state: always pass/set to ref value */ + { + struct pipe_depth_stencil_alpha_state depth_stencil; + memset(&depth_stencil, 0, sizeof(depth_stencil)); + if (depth) { + depth_stencil.depth.enabled = 1; + depth_stencil.depth.writemask = 1; + depth_stencil.depth.func = PIPE_FUNC_ALWAYS; + } + + if (stencil) { + struct pipe_stencil_ref stencil_ref; + memset(&stencil_ref, 0, sizeof(stencil_ref)); + depth_stencil.stencil[0].enabled = 1; + depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; + depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].valuemask = 0xff; + depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; + stencil_ref.ref_value[0] = ctx->Stencil.Clear; + cso_set_stencil_ref(st->cso_context, &stencil_ref); + } + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + } + + cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); + + cso_set_rasterizer(st->cso_context, &st->clear.raster); + + /* viewport state: viewport matching window dims */ + { + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * fb_width; + vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * fb_width; + vp.translate[1] = 0.5f * fb_height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(st->cso_context, &vp); + } + + cso_set_clip(st->cso_context, &st->clear.clip); + cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); + cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); + + /* draw quad matching scissor rect (XXX verify coord round-off) */ + draw_quad(st, x0, y0, x1, y1, + (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor); + + /* Restore pipe state */ + cso_restore_blend(st->cso_context); + cso_restore_stencil_ref(st->cso_context); + cso_restore_depth_stencil_alpha(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_clip(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_vertex_shader(st->cso_context); + cso_restore_vertex_elements(st->cso_context); + cso_restore_vertex_buffers(st->cso_context); +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ctx->Color.ColorMask[0][0] || + !ctx->Color.ColorMask[0][1] || + !ctx->Color.ColorMask[0][2] || + !ctx->Color.ColorMask[0][3]) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the combiend depth/stencil buffer by + * drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) +{ + const GLuint stencilMax = 0xff; + GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (maskStencil) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + +/** + * Determine if we need to clear the stencil buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, + boolean ds_separate) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); + const GLuint stencilMax = 0xff; + const GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + assert(rb->Format == MESA_FORMAT_S8 || + rb->Format == MESA_FORMAT_Z24_S8 || + rb->Format == MESA_FORMAT_S8_Z24); + + if (maskStencil) + return GL_TRUE; + + if (ctx->Scissor.Enabled && + (ctx->Scissor.X != 0 || + ctx->Scissor.Y != 0 || + ctx->Scissor.Width < rb->Width || + ctx->Scissor.Height < rb->Height)) + return GL_TRUE; + + /* This is correct, but it is necessary to look at the depth clear + * value held in the surface when it comes time to issue the clear, + * rather than taking depth and stencil clear values from the + * current state. + */ + if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) + return GL_TRUE; + + return GL_FALSE; +} + + + +/** + * Called when we need to flush. + */ +void +st_flush_clear(struct st_context *st) +{ + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_resource_reference(&st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; +} + + + +/** + * Called via ctx->Driver.Clear() + */ +static void +st_Clear(struct gl_context *ctx, GLbitfield mask) +{ + static const GLbitfield BUFFER_BITS_DS + = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + struct st_context *st = st_context(ctx); + struct gl_renderbuffer *depthRb + = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + struct gl_renderbuffer *stencilRb + = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + GLbitfield quad_buffers = 0x0; + GLbitfield clear_buffers = 0x0; + GLuint i; + + /* This makes sure the pipe has the latest scissor, etc values */ + st_validate_state( st ); + + if (mask & BUFFER_BITS_COLOR) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; + + if (mask & (1 << b)) { + struct gl_renderbuffer *rb + = ctx->DrawBuffer->Attachment[b].Renderbuffer; + struct st_renderbuffer *strb = st_renderbuffer(rb); + + if (!strb || !strb->surface) + continue; + + if (check_clear_color_with_quad( ctx, rb )) + quad_buffers |= PIPE_CLEAR_COLOR; + else + clear_buffers |= PIPE_CLEAR_COLOR; + } + } + } + + if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { + /* clearing combined depth + stencil */ + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_stencil_with_quad(ctx, depthRb)) + quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + else + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + } + } + else { + /* separate depth/stencil clears */ + /* I don't think truly separate buffers are actually possible in gallium or hw? */ + if (mask & BUFFER_BIT_DEPTH) { + struct st_renderbuffer *strb = st_renderbuffer(depthRb); + + if (strb->surface) { + if (check_clear_depth_with_quad(ctx, depthRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_DEPTH; + else + clear_buffers |= PIPE_CLEAR_DEPTH; + } + } + if (mask & BUFFER_BIT_STENCIL) { + struct st_renderbuffer *strb = st_renderbuffer(stencilRb); + + if (strb->surface) { + if (check_clear_stencil_with_quad(ctx, stencilRb, + st->clear.enable_ds_separate)) + quad_buffers |= PIPE_CLEAR_STENCIL; + else + clear_buffers |= PIPE_CLEAR_STENCIL; + } + } + } + + /* + * If we're going to use clear_with_quad() for any reason, use it for + * everything possible. + */ + if (quad_buffers) { + quad_buffers |= clear_buffers; + clear_with_quad(ctx, + quad_buffers & PIPE_CLEAR_COLOR, + quad_buffers & PIPE_CLEAR_DEPTH, + quad_buffers & PIPE_CLEAR_STENCIL); + } else if (clear_buffers) { + /* driver cannot know it can clear everything if the buffer + * is a combined depth/stencil buffer but this wasn't actually + * required from the visual. Hence fix this up to avoid potential + * read-modify-write in the driver. + */ + if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && + ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && + (depthRb == stencilRb) && + (ctx->DrawBuffer->Visual.depthBits == 0 || + ctx->DrawBuffer->Visual.stencilBits == 0)) + clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; + st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, + ctx->Depth.Clear, ctx->Stencil.Clear); + } + if (mask & BUFFER_BIT_ACCUM) + st_clear_accum_buffer(ctx, + ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); +} + + +void +st_init_clear_functions(struct dd_function_table *functions) +{ + functions->Clear = st_Clear; +} diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c index 56c7e8581..07527002b 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1,1368 +1,1371 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Brian Paul
- */
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "main/mtypes.h"
-#include "main/pack.h"
-#include "main/texformat.h"
-#include "main/texstore.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-#include "program/prog_instruction.h"
-
-#include "st_atom.h"
-#include "st_atom_constbuf.h"
-#include "st_cb_drawpixels.h"
-#include "st_cb_readpixels.h"
-#include "st_cb_fbo.h"
-#include "st_context.h"
-#include "st_debug.h"
-#include "st_format.h"
-#include "st_program.h"
-#include "st_texture.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "tgsi/tgsi_ureg.h"
-#include "util/u_draw_quad.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
-#include "util/u_math.h"
-#include "util/u_tile.h"
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_drawpix
-
-/**
- * Check if the given program is:
- * 0: MOVE result.color, fragment.color;
- * 1: END;
- */
-static GLboolean
-is_passthrough_program(const struct gl_fragment_program *prog)
-{
- if (prog->Base.NumInstructions == 2) {
- const struct prog_instruction *inst = prog->Base.Instructions;
- if (inst[0].Opcode == OPCODE_MOV &&
- inst[1].Opcode == OPCODE_END &&
- inst[0].DstReg.File == PROGRAM_OUTPUT &&
- inst[0].DstReg.Index == FRAG_RESULT_COLOR &&
- inst[0].DstReg.WriteMask == WRITEMASK_XYZW &&
- inst[0].SrcReg[0].File == PROGRAM_INPUT &&
- inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 &&
- inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-
-
-/**
- * Make fragment shader for glDraw/CopyPixels. This shader is made
- * by combining the pixel transfer shader with the user-defined shader.
- * \param fpIn the current/incoming fragment program
- * \param fpOut returns the combined fragment program
- */
-void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut)
-{
- struct gl_program *newProg;
-
- if (is_passthrough_program(fpIn)) {
- newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
- &st->pixel_xfer.program->Base);
- }
- else {
-#if 0
- /* debug */
- printf("Base program:\n");
- _mesa_print_program(&fpIn->Base);
- printf("DrawPix program:\n");
- _mesa_print_program(&st->pixel_xfer.program->Base.Base);
-#endif
- newProg = _mesa_combine_programs(st->ctx,
- &st->pixel_xfer.program->Base.Base,
- &fpIn->Base);
- }
-
-#if 0
- /* debug */
- printf("Combined DrawPixels program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
-#endif
-
- *fpOut = (struct gl_fragment_program *) newProg;
-}
-
-
-/**
- * Create fragment program that does a TEX() instruction to get a Z and/or
- * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
- * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
- * Pass fragment color through as-is.
- * \return pointer to the gl_fragment program
- */
-struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct gl_context *ctx = st->ctx;
- struct gl_program *p;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
- const GLuint shaderIndex = write_depth * 2 + write_stencil;
-
- assert(shaderIndex < Elements(st->drawpix.shaders));
-
- if (st->drawpix.shaders[shaderIndex]) {
- /* already have the proper shader */
- return st->drawpix.shaders[shaderIndex];
- }
-
- /*
- * Create shader now
- */
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = write_depth ? 2 : 1;
- p->NumInstructions += write_stencil ? 1 : 0;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- if (write_depth) {
- /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = 0;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- }
-
- if (write_stencil) {
- /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = 1;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- }
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0;
- p->OutputsWritten = 0;
- if (write_depth)
- p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH);
- if (write_stencil)
- p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL);
-
- p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
- if (write_stencil)
- p->SamplersUsed |= 1 << 1;
-
- fp = (struct gl_fragment_program *) p;
-
- /* save the new shader */
- st->drawpix.shaders[shaderIndex] = fp;
-
- return fp;
-}
-
-
-/**
- * Create a simple vertex shader that just passes through the
- * vertex position and texcoord (and optionally, color).
- */
-static void *
-make_passthrough_vertex_shader(struct st_context *st,
- GLboolean passColor)
-{
- if (!st->drawpix.vert_shaders[passColor]) {
- struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
-
- if (ureg == NULL)
- return NULL;
-
- /* MOV result.pos, vertex.pos; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
- ureg_DECL_vs_input( ureg, 0 ));
-
- /* MOV result.texcoord0, vertex.attr[1]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ),
- ureg_DECL_vs_input( ureg, 1 ));
-
- if (passColor) {
- /* MOV result.color0, vertex.attr[2]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
- ureg_DECL_vs_input( ureg, 2 ));
- }
-
- ureg_END( ureg );
-
- st->drawpix.vert_shaders[passColor] =
- ureg_create_shader_and_destroy( ureg, st->pipe );
- }
-
- return st->drawpix.vert_shaders[passColor];
-}
-
-
-/**
- * Return a texture base format for drawing/copying an image
- * of the given format.
- */
-static GLenum
-base_format(GLenum format)
-{
- switch (format) {
- case GL_DEPTH_COMPONENT:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH_STENCIL:
- return GL_DEPTH_STENCIL;
- case GL_STENCIL_INDEX:
- return GL_STENCIL_INDEX;
- default:
- return GL_RGBA;
- }
-}
-
-
-/**
- * Return a texture internalFormat for drawing/copying an image
- * of the given format and type.
- */
-static GLenum
-internal_format(GLenum format, GLenum type)
-{
- switch (format) {
- case GL_DEPTH_COMPONENT:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH_STENCIL:
- return GL_DEPTH_STENCIL;
- case GL_STENCIL_INDEX:
- return GL_STENCIL_INDEX;
- default:
- if (_mesa_is_integer_format(format)) {
- switch (type) {
- case GL_BYTE:
- return GL_RGBA8I;
- case GL_UNSIGNED_BYTE:
- return GL_RGBA8UI;
- case GL_SHORT:
- return GL_RGBA16I;
- case GL_UNSIGNED_SHORT:
- return GL_RGBA16UI;
- case GL_INT:
- return GL_RGBA32I;
- case GL_UNSIGNED_INT:
- return GL_RGBA32UI;
- default:
- assert(0 && "Unexpected type in internal_format()");
- return GL_RGBA_INTEGER;
- }
- }
- else {
- return GL_RGBA;
- }
- }
-}
-
-
-/**
- * Create a temporary texture to hold an image of the given size.
- * If width, height are not POT and the driver only handles POT textures,
- * allocate the next larger size of texture that is POT.
- */
-static struct pipe_resource *
-alloc_texture(struct st_context *st, GLsizei width, GLsizei height,
- enum pipe_format texFormat)
-{
- struct pipe_resource *pt;
-
- pt = st_texture_create(st, st->internal_target, texFormat, 0,
- width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-
- return pt;
-}
-
-
-/**
- * Make texture containing an image for glDrawPixels image.
- * If 'pixels' is NULL, leave the texture image data undefined.
- */
-static struct pipe_resource *
-make_texture(struct st_context *st,
- GLsizei width, GLsizei height, GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct gl_context *ctx = st->ctx;
- struct pipe_context *pipe = st->pipe;
- gl_format mformat;
- struct pipe_resource *pt;
- enum pipe_format pipeFormat;
- GLuint cpp;
- GLenum baseFormat, intFormat;
-
- baseFormat = base_format(format);
- intFormat = internal_format(format, type);
-
- mformat = st_ChooseTextureFormat_renderable(ctx, intFormat,
- format, type, GL_FALSE);
- assert(mformat);
-
- pipeFormat = st_mesa_format_to_pipe_format(mformat);
- assert(pipeFormat);
- cpp = util_format_get_blocksize(pipeFormat);
-
- pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
- if (!pixels)
- return NULL;
-
- /* alloc temporary texture */
- pt = alloc_texture(st, width, height, pipeFormat);
- if (!pt) {
- _mesa_unmap_pbo_source(ctx, unpack);
- return NULL;
- }
-
- {
- struct pipe_transfer *transfer;
- static const GLuint dstImageOffsets = 0;
- GLboolean success;
- GLubyte *dest;
- const GLbitfield imageTransferStateSave = ctx->_ImageTransferState;
-
- /* we'll do pixel transfer in a fragment shader */
- ctx->_ImageTransferState = 0x0;
-
- transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0,
- width, height);
-
- /* map texture transfer */
- dest = pipe_transfer_map(pipe, transfer);
-
-
- /* Put image into texture transfer.
- * Note that the image is actually going to be upside down in
- * the texture. We deal with that with texcoords.
- */
- success = _mesa_texstore(ctx, 2, /* dims */
- baseFormat, /* baseInternalFormat */
- mformat, /* gl_format */
- dest, /* dest */
- 0, 0, 0, /* dstX/Y/Zoffset */
- transfer->stride, /* dstRowStride, bytes */
- &dstImageOffsets, /* dstImageOffsets */
- width, height, 1, /* size */
- format, type, /* src format/type */
- pixels, /* data source */
- unpack);
-
- /* unmap */
- pipe_transfer_unmap(pipe, transfer);
- pipe->transfer_destroy(pipe, transfer);
-
- assert(success);
-
- /* restore */
- ctx->_ImageTransferState = imageTransferStateSave;
- }
-
- _mesa_unmap_pbo_source(ctx, unpack);
-
- return pt;
-}
-
-
-/**
- * Draw quad with texcoords and optional color.
- * Coords are gallium window coords with y=0=top.
- * \param color may be null
- * \param invertTex if true, flip texcoords vertically
- */
-static void
-draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
- GLfloat x1, GLfloat y1, const GLfloat *color,
- GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */
-
- /* setup vertex data */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f;
- const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- const GLfloat sLeft = 0.0f, sRight = maxXcoord;
- const GLfloat tTop = invertTex ? maxYcoord : 0.0f;
- const GLfloat tBot = invertTex ? 0.0f : maxYcoord;
- GLuint i;
-
- /* upper-left */
- verts[0][0][0] = clip_x0; /* v[0].attr[0].x */
- verts[0][0][1] = clip_y0; /* v[0].attr[0].y */
-
- /* upper-right */
- verts[1][0][0] = clip_x1;
- verts[1][0][1] = clip_y0;
-
- /* lower-right */
- verts[2][0][0] = clip_x1;
- verts[2][0][1] = clip_y1;
-
- /* lower-left */
- verts[3][0][0] = clip_x0;
- verts[3][0][1] = clip_y1;
-
- verts[0][1][0] = sLeft; /* v[0].attr[1].S */
- verts[0][1][1] = tTop; /* v[0].attr[1].T */
- verts[1][1][0] = sRight;
- verts[1][1][1] = tTop;
- verts[2][1][0] = sRight;
- verts[2][1][1] = tBot;
- verts[3][1][0] = sLeft;
- verts[3][1][1] = tBot;
-
- /* same for all verts: */
- if (color) {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /* v[i].attr[0].z */
- verts[i][0][3] = 1.0f; /* v[i].attr[0].w */
- verts[i][2][0] = color[0]; /* v[i].attr[2].r */
- verts[i][2][1] = color[1]; /* v[i].attr[2].g */
- verts[i][2][2] = color[2]; /* v[i].attr[2].b */
- verts[i][2][3] = color[3]; /* v[i].attr[2].a */
- verts[i][1][2] = 0.0f; /* v[i].attr[1].R */
- verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */
- }
- }
- else {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /*Z*/
- verts[i][0][3] = 1.0f; /*W*/
- verts[i][1][2] = 0.0f; /*R*/
- verts[i][1][3] = 1.0f; /*Q*/
- }
- }
- }
-
- {
- struct pipe_resource *buf;
-
- /* allocate/load buffer object with vertex data */
- buf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- sizeof(verts));
- pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts);
-
- util_draw_vertex_buffer(pipe, buf, 0,
- PIPE_PRIM_QUADS,
- 4, /* verts */
- 3); /* attribs/vert */
- pipe_resource_reference(&buf, NULL);
- }
-}
-
-
-
-static void
-draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
- GLsizei width, GLsizei height,
- GLfloat zoomX, GLfloat zoomY,
- struct pipe_sampler_view **sv,
- int num_sampler_view,
- void *driver_vp,
- void *driver_fp,
- const GLfloat *color,
- GLboolean invertTex,
- GLboolean write_depth, GLboolean write_stencil)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = st->cso_context;
- GLfloat x0, y0, x1, y1;
- GLsizei maxSize;
- boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT;
-
- /* limit checks */
- /* XXX if DrawPixels image is larger than max texture size, break
- * it up into chunks.
- */
- maxSize = 1 << (pipe->screen->get_param(pipe->screen,
- PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
- assert(width <= maxSize);
- assert(height <= maxSize);
-
- cso_save_rasterizer(cso);
- cso_save_viewport(cso);
- cso_save_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_fragment_shader(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
- if (write_stencil) {
- cso_save_depth_stencil_alpha(cso);
- cso_save_blend(cso);
- }
-
- /* rasterizer state: just scissor */
- {
- struct pipe_rasterizer_state rasterizer;
- memset(&rasterizer, 0, sizeof(rasterizer));
- rasterizer.gl_rasterization_rules = 1;
- rasterizer.scissor = ctx->Scissor.Enabled;
- cso_set_rasterizer(cso, &rasterizer);
- }
-
- if (write_stencil) {
- /* Stencil writing bypasses the normal fragment pipeline to
- * disable color writing and set stencil test to always pass.
- */
- struct pipe_depth_stencil_alpha_state dsa;
- struct pipe_blend_state blend;
-
- /* depth/stencil */
- memset(&dsa, 0, sizeof(dsa));
- dsa.stencil[0].enabled = 1;
- dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
- dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
- dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
- if (write_depth) {
- /* writing depth+stencil: depth test always passes */
- dsa.depth.enabled = 1;
- dsa.depth.writemask = ctx->Depth.Mask;
- dsa.depth.func = PIPE_FUNC_ALWAYS;
- }
- cso_set_depth_stencil_alpha(cso, &dsa);
-
- /* blend (colormask) */
- memset(&blend, 0, sizeof(blend));
- cso_set_blend(cso, &blend);
- }
-
- /* fragment shader state: TEX lookup program */
- cso_set_fragment_shader_handle(cso, driver_fp);
-
- /* vertex shader state: position + texcoord pass-through */
- cso_set_vertex_shader_handle(cso, driver_vp);
-
-
- /* texture sampling state: */
- {
- struct pipe_sampler_state sampler;
- memset(&sampler, 0, sizeof(sampler));
- sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
- sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
- sampler.wrap_r = PIPE_TEX_WRAP_CLAMP;
- sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.normalized_coords = normalized;
-
- cso_single_sampler(cso, 0, &sampler);
- if (num_sampler_view > 1) {
- cso_single_sampler(cso, 1, &sampler);
- }
- cso_single_sampler_done(cso);
- }
-
- /* viewport state: viewport matching window dims */
- {
- const float w = (float) ctx->DrawBuffer->Width;
- const float h = (float) ctx->DrawBuffer->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * w;
- vp.scale[1] = -0.5f * h;
- vp.scale[2] = 0.5f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * w;
- vp.translate[1] = 0.5f * h;
- vp.translate[2] = 0.5f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
-
- /* texture state: */
- cso_set_fragment_sampler_views(cso, num_sampler_view, sv);
-
- /* Compute Gallium window coords (y=0=top) with pixel zoom.
- * Recall that these coords are transformed by the current
- * vertex shader and viewport transformation.
- */
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
- y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
- invertTex = !invertTex;
- }
-
- x0 = (GLfloat) x;
- x1 = x + width * ctx->Pixel.ZoomX;
- y0 = (GLfloat) y;
- y1 = y + height * ctx->Pixel.ZoomY;
-
- /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
-
- draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
- normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
- normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
-
- /* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_viewport(cso);
- cso_restore_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
- if (write_stencil) {
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_blend(cso);
- }
-}
-
-
-/**
- * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we
- * can't use a fragment shader to write stencil values.
- */
-static void
-draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height, GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct st_renderbuffer *strb;
- enum pipe_transfer_usage usage;
- struct pipe_transfer *pt;
- const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
- GLint skipPixels;
- ubyte *stmap;
- struct gl_pixelstore_attrib clippedUnpack = *unpack;
-
- if (!zoom) {
- if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
- &clippedUnpack)) {
- /* totally clipped */
- return;
- }
- }
-
- strb = st_renderbuffer(ctx->DrawBuffer->
- Attachment[BUFFER_STENCIL].Renderbuffer);
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- y = ctx->DrawBuffer->Height - y - height;
- }
-
- if(format != GL_DEPTH_STENCIL &&
- util_format_get_component_bits(strb->format,
- UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
- usage = PIPE_TRANSFER_READ_WRITE;
- else
- usage = PIPE_TRANSFER_WRITE;
-
- pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0,
- usage, x, y,
- width, height);
-
- stmap = pipe_transfer_map(pipe, pt);
-
- pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
- assert(pixels);
-
- /* if width > MAX_WIDTH, have to process image in chunks */
- skipPixels = 0;
- while (skipPixels < width) {
- const GLint spanX = skipPixels;
- const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
- GLint row;
- for (row = 0; row < height; row++) {
- GLubyte sValues[MAX_WIDTH];
- GLuint zValues[MAX_WIDTH];
- GLenum destType = GL_UNSIGNED_BYTE;
- const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels,
- width, height,
- format, type,
- row, skipPixels);
- _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues,
- type, source, &clippedUnpack,
- ctx->_ImageTransferState);
-
- if (format == GL_DEPTH_STENCIL) {
- _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues,
- (1 << 24) - 1, type, source,
- &clippedUnpack);
- }
-
- if (zoom) {
- _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with "
- "zoom not complete");
- }
-
- {
- GLint spanY;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- spanY = height - row - 1;
- }
- else {
- spanY = row;
- }
-
- /* now pack the stencil (and Z) values in the dest format */
- switch (pt->resource->format) {
- case PIPE_FORMAT_S8_USCALED:
- {
- ubyte *dest = stmap + spanY * pt->stride + spanX;
- assert(usage == PIPE_TRANSFER_WRITE);
- memcpy(dest, sValues, spanWidth);
- }
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- if (format == GL_DEPTH_STENCIL) {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = zValues[k] | (sValues[k] << 24);
- }
- }
- else {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24);
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- if (format == GL_DEPTH_STENCIL) {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff);
- }
- }
- else {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff);
- }
- }
- break;
- default:
- assert(0);
- }
- }
- }
- skipPixels += spanWidth;
- }
-
- _mesa_unmap_pbo_source(ctx, &clippedUnpack);
-
- /* unmap the stencil buffer */
- pipe_transfer_unmap(pipe, pt);
- pipe->transfer_destroy(pipe, pt);
-}
-
-
-/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for RGBA data.
- */
-static struct st_fp_variant *
-get_color_fp_variant(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
- ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 ||
- ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 ||
- ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 ||
- ctx->Pixel.AlphaScale != 1.0);
- key.pixelMaps = ctx->Pixel.MapColorFlag;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for depth/stencil data.
- */
-static struct st_fp_variant *
-get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.drawpixels_z = write_depth;
- key.drawpixels_stencil = write_stencil;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
- * Called via ctx->Driver.DrawPixels()
- */
-static void
-st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels)
-{
- void *driver_vp, *driver_fp;
- struct st_context *st = st_context(ctx);
- const GLfloat *color;
- struct pipe_context *pipe = st->pipe;
- GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
- struct pipe_sampler_view *sv[2];
- int num_sampler_view = 1;
- enum pipe_format stencil_format = PIPE_FORMAT_NONE;
- struct st_fp_variant *fpv;
-
- if (format == GL_DEPTH_STENCIL)
- write_stencil = write_depth = GL_TRUE;
- else if (format == GL_STENCIL_INDEX)
- write_stencil = GL_TRUE;
- else if (format == GL_DEPTH_COMPONENT)
- write_depth = GL_TRUE;
-
- if (write_stencil) {
- enum pipe_format tex_format;
- /* can we write to stencil if not fallback */
- if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT))
- goto stencil_fallback;
-
- tex_format = st_choose_format(st->pipe->screen, base_format(format),
- PIPE_TEXTURE_2D,
- 0, PIPE_BIND_SAMPLER_VIEW);
- if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
- stencil_format = PIPE_FORMAT_X24S8_USCALED;
- else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM)
- stencil_format = PIPE_FORMAT_S8X24_USCALED;
- else
- stencil_format = PIPE_FORMAT_S8_USCALED;
- if (stencil_format == PIPE_FORMAT_NONE)
- goto stencil_fallback;
- }
-
- /* Mesa state should be up to date by now */
- assert(ctx->NewState == 0x0);
-
- st_validate_state(st);
-
- /*
- * Get vertex/fragment shaders
- */
- if (write_depth || write_stencil) {
- fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil);
-
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
-
- color = ctx->Current.RasterColor;
- }
- else {
- fpv = get_color_fp_variant(st);
-
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
-
- color = NULL;
- if (st->pixel_xfer.pixelmap_enabled) {
- sv[1] = st->pixel_xfer.pixelmap_sampler_view;
- num_sampler_view++;
- }
- }
-
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
- /* draw with textured quad */
- {
- struct pipe_resource *pt
- = make_texture(st, width, height, format, type, unpack, pixels);
- if (pt) {
- sv[0] = st_create_texture_sampler_view(st->pipe, pt);
-
- if (sv[0]) {
- if (write_stencil) {
- sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
- stencil_format);
- num_sampler_view++;
- }
-
- draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height,
- ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
- sv,
- num_sampler_view,
- driver_vp,
- driver_fp,
- color, GL_FALSE, write_depth, write_stencil);
- pipe_sampler_view_reference(&sv[0], NULL);
- if (num_sampler_view > 1)
- pipe_sampler_view_reference(&sv[1], NULL);
- }
- pipe_resource_reference(&pt, NULL);
- }
- }
- return;
-
-stencil_fallback:
- draw_stencil_pixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
-}
-
-
-
-/**
- * Software fallback for glCopyPixels(GL_STENCIL).
- */
-static void
-copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty)
-{
- struct st_renderbuffer *rbDraw;
- struct pipe_context *pipe = st_context(ctx)->pipe;
- enum pipe_transfer_usage usage;
- struct pipe_transfer *ptDraw;
- ubyte *drawMap;
- ubyte *buffer;
- int i;
-
- buffer = malloc(width * height * sizeof(ubyte));
- if (!buffer) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)");
- return;
- }
-
- /* Get the dest renderbuffer. If there's a wrapper, use the
- * underlying renderbuffer.
- */
- rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer);
- if (rbDraw->Base.Wrapped)
- rbDraw = st_renderbuffer(rbDraw->Base.Wrapped);
-
- /* this will do stencil pixel transfer ops */
- st_read_stencil_pixels(ctx, srcx, srcy, width, height,
- GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
- &ctx->DefaultPacking, buffer);
-
- if (0) {
- /* debug code: dump stencil values */
- GLint row, col;
- for (row = 0; row < height; row++) {
- printf("%3d: ", row);
- for (col = 0; col < width; col++) {
- printf("%02x ", buffer[col + row * width]);
- }
- printf("\n");
- }
- }
-
- if (util_format_get_component_bits(rbDraw->format,
- UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
- usage = PIPE_TRANSFER_READ_WRITE;
- else
- usage = PIPE_TRANSFER_WRITE;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- dsty = rbDraw->Base.Height - dsty - height;
- }
-
- ptDraw = pipe_get_transfer(st_context(ctx)->pipe,
- rbDraw->texture, 0, 0,
- usage, dstx, dsty,
- width, height);
-
- assert(util_format_get_blockwidth(ptDraw->resource->format) == 1);
- assert(util_format_get_blockheight(ptDraw->resource->format) == 1);
-
- /* map the stencil buffer */
- drawMap = pipe_transfer_map(pipe, ptDraw);
-
- /* draw */
- /* XXX PixelZoom not handled yet */
- for (i = 0; i < height; i++) {
- ubyte *dst;
- const ubyte *src;
- int y;
-
- y = i;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- y = height - y - 1;
- }
-
- dst = drawMap + y * ptDraw->stride;
- src = buffer + i * width;
-
- switch (ptDraw->resource->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- {
- uint *dst4 = (uint *) dst;
- int j;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (j = 0; j < width; j++) {
- *dst4 = (*dst4 & 0xffffff) | (src[j] << 24);
- dst4++;
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- {
- uint *dst4 = (uint *) dst;
- int j;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (j = 0; j < width; j++) {
- *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff);
- dst4++;
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED:
- assert(usage == PIPE_TRANSFER_WRITE);
- memcpy(dst, src, width);
- break;
- default:
- assert(0);
- }
- }
-
- free(buffer);
-
- /* unmap the stencil buffer */
- pipe_transfer_unmap(pipe, ptDraw);
- pipe->transfer_destroy(pipe, ptDraw);
-}
-
-
-static void
-st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
- struct st_renderbuffer *rbRead;
- void *driver_vp, *driver_fp;
- struct pipe_resource *pt;
- struct pipe_sampler_view *sv[2];
- int num_sampler_view = 1;
- GLfloat *color;
- enum pipe_format srcFormat, texFormat;
- GLboolean invertTex = GL_FALSE;
- GLint readX, readY, readW, readH;
- GLuint sample_count;
- struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
- struct st_fp_variant *fpv;
-
- st_validate_state(st);
-
- if (type == GL_STENCIL) {
- /* can't use texturing to do stencil */
- copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty);
- return;
- }
-
- /*
- * Get vertex/fragment shaders
- */
- if (type == GL_COLOR) {
- rbRead = st_get_color_read_renderbuffer(ctx);
- color = NULL;
-
- fpv = get_color_fp_variant(st);
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
-
- if (st->pixel_xfer.pixelmap_enabled) {
- sv[1] = st->pixel_xfer.pixelmap_sampler_view;
- num_sampler_view++;
- }
- }
- else {
- assert(type == GL_DEPTH);
- rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
- color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
-
- fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
- }
-
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
-
- if (rbRead->Base.Wrapped)
- rbRead = st_renderbuffer(rbRead->Base.Wrapped);
-
- sample_count = rbRead->texture->nr_samples;
- /* I believe this would be legal, presumably would need to do a resolve
- for color, and for depth/stencil spec says to just use one of the
- depth/stencil samples per pixel? Need some transfer clarifications. */
- assert(sample_count < 2);
-
- srcFormat = rbRead->texture->format;
-
- if (screen->is_format_supported(screen, srcFormat, st->internal_target,
- sample_count,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- texFormat = srcFormat;
- }
- else {
- /* srcFormat can't be used as a texture format */
- if (type == GL_DEPTH) {
- texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT,
- st->internal_target, sample_count,
- PIPE_BIND_DEPTH_STENCIL);
- assert(texFormat != PIPE_FORMAT_NONE);
- }
- else {
- /* default color format */
- texFormat = st_choose_format(screen, GL_RGBA, st->internal_target,
- sample_count, PIPE_BIND_SAMPLER_VIEW);
- assert(texFormat != PIPE_FORMAT_NONE);
- }
- }
-
- /* Invert src region if needed */
- if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
- srcy = ctx->ReadBuffer->Height - srcy - height;
- invertTex = !invertTex;
- }
-
- /* Clip the read region against the src buffer bounds.
- * We'll still allocate a temporary buffer/texture for the original
- * src region size but we'll only read the region which is on-screen.
- * This may mean that we draw garbage pixels into the dest region, but
- * that's expected.
- */
- readX = srcx;
- readY = srcy;
- readW = width;
- readH = height;
- _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack);
- readW = MAX2(0, readW);
- readH = MAX2(0, readH);
-
- /* alloc temporary texture */
- pt = alloc_texture(st, width, height, texFormat);
- if (!pt)
- return;
-
- sv[0] = st_create_texture_sampler_view(st->pipe, pt);
- if (!sv[0]) {
- pipe_resource_reference(&pt, NULL);
- return;
- }
-
- /* Make temporary texture which is a copy of the src region.
- */
- if (srcFormat == texFormat) {
- struct pipe_box src_box;
- u_box_2d(readX, readY, readW, readH, &src_box);
- /* copy source framebuffer surface into mipmap/texture */
- pipe->resource_copy_region(pipe,
- pt, /* dest tex */
- 0,
- pack.SkipPixels, pack.SkipRows, 0, /* dest pos */
- rbRead->texture, /* src tex */
- 0,
- &src_box);
-
- }
- else {
- /* CPU-based fallback/conversion */
- struct pipe_transfer *ptRead =
- pipe_get_transfer(st->pipe, rbRead->texture, 0, 0,
- PIPE_TRANSFER_READ,
- readX, readY, readW, readH);
- struct pipe_transfer *ptTex;
- enum pipe_transfer_usage transfer_usage;
-
- if (ST_DEBUG & DEBUG_FALLBACK)
- debug_printf("%s: fallback processing\n", __FUNCTION__);
-
- if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format))
- transfer_usage = PIPE_TRANSFER_READ_WRITE;
- else
- transfer_usage = PIPE_TRANSFER_WRITE;
-
- ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage,
- 0, 0, width, height);
-
- /* copy image from ptRead surface to ptTex surface */
- if (type == GL_COLOR) {
- /* alternate path using get/put_tile() */
- GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
- enum pipe_format readFormat, drawFormat;
- readFormat = util_format_linear(rbRead->texture->format);
- drawFormat = util_format_linear(pt->format);
- pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH,
- readFormat, buf);
- pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
- readW, readH, drawFormat, buf);
- free(buf);
- }
- else {
- /* GL_DEPTH */
- GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
- pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf);
- pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
- readW, readH, buf);
- free(buf);
- }
-
- pipe->transfer_destroy(pipe, ptRead);
- pipe->transfer_destroy(pipe, ptTex);
- }
-
- /* OK, the texture 'pt' contains the src image/pixels. Now draw a
- * textured quad with that texture.
- */
- draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2],
- width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
- sv,
- num_sampler_view,
- driver_vp,
- driver_fp,
- color, invertTex, GL_FALSE, GL_FALSE);
-
- pipe_resource_reference(&pt, NULL);
- pipe_sampler_view_reference(&sv[0], NULL);
-}
-
-
-
-void st_init_drawpixels_functions(struct dd_function_table *functions)
-{
- functions->DrawPixels = st_DrawPixels;
- functions->CopyPixels = st_CopyPixels;
-}
-
-
-void
-st_destroy_drawpix(struct st_context *st)
-{
- GLuint i;
-
- for (i = 0; i < Elements(st->drawpix.shaders); i++) {
- if (st->drawpix.shaders[i])
- _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL);
- }
-
- st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
- if (st->drawpix.vert_shaders[0])
- ureg_free_tokens(st->drawpix.vert_shaders[0]);
- if (st->drawpix.vert_shaders[1])
- ureg_free_tokens(st->drawpix.vert_shaders[1]);
-}
-
-#endif /* FEATURE_drawpix */
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "main/mtypes.h" +#include "main/pack.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "program/program.h" +#include "program/prog_print.h" +#include "program/prog_instruction.h" + +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_cb_drawpixels.h" +#include "st_cb_readpixels.h" +#include "st_cb_fbo.h" +#include "st_context.h" +#include "st_debug.h" +#include "st_format.h" +#include "st_program.h" +#include "st_texture.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_ureg.h" +#include "util/u_draw_quad.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_math.h" +#include "util/u_tile.h" +#include "cso_cache/cso_context.h" + + +#if FEATURE_drawpix + +/** + * Check if the given program is: + * 0: MOVE result.color, fragment.color; + * 1: END; + */ +static GLboolean +is_passthrough_program(const struct gl_fragment_program *prog) +{ + if (prog->Base.NumInstructions == 2) { + const struct prog_instruction *inst = prog->Base.Instructions; + if (inst[0].Opcode == OPCODE_MOV && + inst[1].Opcode == OPCODE_END && + inst[0].DstReg.File == PROGRAM_OUTPUT && + inst[0].DstReg.Index == FRAG_RESULT_COLOR && + inst[0].DstReg.WriteMask == WRITEMASK_XYZW && + inst[0].SrcReg[0].File == PROGRAM_INPUT && + inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 && + inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) { + return GL_TRUE; + } + } + return GL_FALSE; +} + + + +/** + * Make fragment shader for glDraw/CopyPixels. This shader is made + * by combining the pixel transfer shader with the user-defined shader. + * \param fpIn the current/incoming fragment program + * \param fpOut returns the combined fragment program + */ +void +st_make_drawpix_fragment_program(struct st_context *st, + struct gl_fragment_program *fpIn, + struct gl_fragment_program **fpOut) +{ + struct gl_program *newProg; + + if (is_passthrough_program(fpIn)) { + newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx, + &st->pixel_xfer.program->Base); + } + else { +#if 0 + /* debug */ + printf("Base program:\n"); + _mesa_print_program(&fpIn->Base); + printf("DrawPix program:\n"); + _mesa_print_program(&st->pixel_xfer.program->Base.Base); +#endif + newProg = _mesa_combine_programs(st->ctx, + &st->pixel_xfer.program->Base.Base, + &fpIn->Base); + } + +#if 0 + /* debug */ + printf("Combined DrawPixels program:\n"); + _mesa_print_program(newProg); + printf("InputsRead: 0x%x\n", newProg->InputsRead); + printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten); + _mesa_print_parameter_list(newProg->Parameters); +#endif + + *fpOut = (struct gl_fragment_program *) newProg; +} + + +/** + * Create fragment program that does a TEX() instruction to get a Z and/or + * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL. + * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX). + * Pass fragment color through as-is. + * \return pointer to the gl_fragment program + */ +struct gl_fragment_program * +st_make_drawpix_z_stencil_program(struct st_context *st, + GLboolean write_depth, + GLboolean write_stencil) +{ + struct gl_context *ctx = st->ctx; + struct gl_program *p; + struct gl_fragment_program *fp; + GLuint ic = 0; + const GLuint shaderIndex = write_depth * 2 + write_stencil; + + assert(shaderIndex < Elements(st->drawpix.shaders)); + + if (st->drawpix.shaders[shaderIndex]) { + /* already have the proper shader */ + return st->drawpix.shaders[shaderIndex]; + } + + /* + * Create shader now + */ + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = write_depth ? 2 : 1; + p->NumInstructions += write_stencil ? 1 : 0; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + if (write_depth) { + /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH; + p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 0; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + } + + if (write_stencil) { + /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL; + p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 1; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + } + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0; + p->OutputsWritten = 0; + if (write_depth) + p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH); + if (write_stencil) + p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL); + + p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ + if (write_stencil) + p->SamplersUsed |= 1 << 1; + + fp = (struct gl_fragment_program *) p; + + /* save the new shader */ + st->drawpix.shaders[shaderIndex] = fp; + + return fp; +} + + +/** + * Create a simple vertex shader that just passes through the + * vertex position and texcoord (and optionally, color). + */ +static void * +make_passthrough_vertex_shader(struct st_context *st, + GLboolean passColor) +{ + if (!st->drawpix.vert_shaders[passColor]) { + struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + + if (ureg == NULL) + return NULL; + + /* MOV result.pos, vertex.pos; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ), + ureg_DECL_vs_input( ureg, 0 )); + + /* MOV result.texcoord0, vertex.attr[1]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ), + ureg_DECL_vs_input( ureg, 1 )); + + if (passColor) { + /* MOV result.color0, vertex.attr[2]; */ + ureg_MOV(ureg, + ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ), + ureg_DECL_vs_input( ureg, 2 )); + } + + ureg_END( ureg ); + + st->drawpix.vert_shaders[passColor] = + ureg_create_shader_and_destroy( ureg, st->pipe ); + } + + return st->drawpix.vert_shaders[passColor]; +} + + +/** + * Return a texture base format for drawing/copying an image + * of the given format. + */ +static GLenum +base_format(GLenum format) +{ + switch (format) { + case GL_DEPTH_COMPONENT: + return GL_DEPTH_COMPONENT; + case GL_DEPTH_STENCIL: + return GL_DEPTH_STENCIL; + case GL_STENCIL_INDEX: + return GL_STENCIL_INDEX; + default: + return GL_RGBA; + } +} + + +/** + * Return a texture internalFormat for drawing/copying an image + * of the given format and type. + */ +static GLenum +internal_format(GLenum format, GLenum type) +{ + switch (format) { + case GL_DEPTH_COMPONENT: + return GL_DEPTH_COMPONENT; + case GL_DEPTH_STENCIL: + return GL_DEPTH_STENCIL; + case GL_STENCIL_INDEX: + return GL_STENCIL_INDEX; + default: + if (_mesa_is_integer_format(format)) { + switch (type) { + case GL_BYTE: + return GL_RGBA8I; + case GL_UNSIGNED_BYTE: + return GL_RGBA8UI; + case GL_SHORT: + return GL_RGBA16I; + case GL_UNSIGNED_SHORT: + return GL_RGBA16UI; + case GL_INT: + return GL_RGBA32I; + case GL_UNSIGNED_INT: + return GL_RGBA32UI; + default: + assert(0 && "Unexpected type in internal_format()"); + return GL_RGBA_INTEGER; + } + } + else { + return GL_RGBA; + } + } +} + + +/** + * Create a temporary texture to hold an image of the given size. + * If width, height are not POT and the driver only handles POT textures, + * allocate the next larger size of texture that is POT. + */ +static struct pipe_resource * +alloc_texture(struct st_context *st, GLsizei width, GLsizei height, + enum pipe_format texFormat) +{ + struct pipe_resource *pt; + + pt = st_texture_create(st, st->internal_target, texFormat, 0, + width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW); + + return pt; +} + + +/** + * Make texture containing an image for glDrawPixels image. + * If 'pixels' is NULL, leave the texture image data undefined. + */ +static struct pipe_resource * +make_texture(struct st_context *st, + GLsizei width, GLsizei height, GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct gl_context *ctx = st->ctx; + struct pipe_context *pipe = st->pipe; + gl_format mformat; + struct pipe_resource *pt; + enum pipe_format pipeFormat; + GLuint cpp; + GLenum baseFormat, intFormat; + + baseFormat = base_format(format); + intFormat = internal_format(format, type); + + mformat = st_ChooseTextureFormat_renderable(ctx, intFormat, + format, type, GL_FALSE); + assert(mformat); + + pipeFormat = st_mesa_format_to_pipe_format(mformat); + assert(pipeFormat); + cpp = util_format_get_blocksize(pipeFormat); + + pixels = _mesa_map_pbo_source(ctx, unpack, pixels); + if (!pixels) + return NULL; + + /* alloc temporary texture */ + pt = alloc_texture(st, width, height, pipeFormat); + if (!pt) { + _mesa_unmap_pbo_source(ctx, unpack); + return NULL; + } + + { + struct pipe_transfer *transfer; + static const GLuint dstImageOffsets = 0; + GLboolean success; + GLubyte *dest; + const GLbitfield imageTransferStateSave = ctx->_ImageTransferState; + + /* we'll do pixel transfer in a fragment shader */ + ctx->_ImageTransferState = 0x0; + + transfer = pipe_get_transfer(st->pipe, pt, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, + width, height); + + /* map texture transfer */ + dest = pipe_transfer_map(pipe, transfer); + + + /* Put image into texture transfer. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ + success = _mesa_texstore(ctx, 2, /* dims */ + baseFormat, /* baseInternalFormat */ + mformat, /* gl_format */ + dest, /* dest */ + 0, 0, 0, /* dstX/Y/Zoffset */ + transfer->stride, /* dstRowStride, bytes */ + &dstImageOffsets, /* dstImageOffsets */ + width, height, 1, /* size */ + format, type, /* src format/type */ + pixels, /* data source */ + unpack); + + /* unmap */ + pipe_transfer_unmap(pipe, transfer); + pipe->transfer_destroy(pipe, transfer); + + assert(success); + + /* restore */ + ctx->_ImageTransferState = imageTransferStateSave; + } + + _mesa_unmap_pbo_source(ctx, unpack); + + return pt; +} + + +/** + * Draw quad with texcoords and optional color. + * Coords are gallium window coords with y=0=top. + * \param color may be null + * \param invertTex if true, flip texcoords vertically + */ +static void +draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z, + GLfloat x1, GLfloat y1, const GLfloat *color, + GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ + + /* setup vertex data */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; + const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; + const GLfloat sLeft = 0.0f, sRight = maxXcoord; + const GLfloat tTop = invertTex ? maxYcoord : 0.0f; + const GLfloat tBot = invertTex ? 0.0f : maxYcoord; + GLuint i; + + /* upper-left */ + verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ + verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ + + /* upper-right */ + verts[1][0][0] = clip_x1; + verts[1][0][1] = clip_y0; + + /* lower-right */ + verts[2][0][0] = clip_x1; + verts[2][0][1] = clip_y1; + + /* lower-left */ + verts[3][0][0] = clip_x0; + verts[3][0][1] = clip_y1; + + verts[0][1][0] = sLeft; /* v[0].attr[1].S */ + verts[0][1][1] = tTop; /* v[0].attr[1].T */ + verts[1][1][0] = sRight; + verts[1][1][1] = tTop; + verts[2][1][0] = sRight; + verts[2][1][1] = tBot; + verts[3][1][0] = sLeft; + verts[3][1][1] = tBot; + + /* same for all verts: */ + if (color) { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /* v[i].attr[0].z */ + verts[i][0][3] = 1.0f; /* v[i].attr[0].w */ + verts[i][2][0] = color[0]; /* v[i].attr[2].r */ + verts[i][2][1] = color[1]; /* v[i].attr[2].g */ + verts[i][2][2] = color[2]; /* v[i].attr[2].b */ + verts[i][2][3] = color[3]; /* v[i].attr[2].a */ + verts[i][1][2] = 0.0f; /* v[i].attr[1].R */ + verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */ + } + } + else { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /*Z*/ + verts[i][0][3] = 1.0f; /*W*/ + verts[i][1][2] = 0.0f; /*R*/ + verts[i][1][3] = 1.0f; /*Q*/ + } + } + } + + { + struct pipe_resource *buf; + + /* allocate/load buffer object with vertex data */ + buf = pipe_buffer_create(pipe->screen, + PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STATIC, + sizeof(verts)); + pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts); + + util_draw_vertex_buffer(pipe, st->cso_context, buf, 0, + PIPE_PRIM_QUADS, + 4, /* verts */ + 3); /* attribs/vert */ + pipe_resource_reference(&buf, NULL); + } +} + + + +static void +draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + GLfloat zoomX, GLfloat zoomY, + struct pipe_sampler_view **sv, + int num_sampler_view, + void *driver_vp, + void *driver_fp, + const GLfloat *color, + GLboolean invertTex, + GLboolean write_depth, GLboolean write_stencil) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; + GLfloat x0, y0, x1, y1; + GLsizei maxSize; + boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT; + + /* limit checks */ + /* XXX if DrawPixels image is larger than max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, + PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= maxSize); + assert(height <= maxSize); + + cso_save_rasterizer(cso); + cso_save_viewport(cso); + cso_save_samplers(cso); + cso_save_fragment_sampler_views(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + if (write_stencil) { + cso_save_depth_stencil_alpha(cso); + cso_save_blend(cso); + } + + /* rasterizer state: just scissor */ + { + struct pipe_rasterizer_state rasterizer; + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.gl_rasterization_rules = 1; + rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &rasterizer); + } + + if (write_stencil) { + /* Stencil writing bypasses the normal fragment pipeline to + * disable color writing and set stencil test to always pass. + */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + + /* depth/stencil */ + memset(&dsa, 0, sizeof(dsa)); + dsa.stencil[0].enabled = 1; + dsa.stencil[0].func = PIPE_FUNC_ALWAYS; + dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; + dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + if (write_depth) { + /* writing depth+stencil: depth test always passes */ + dsa.depth.enabled = 1; + dsa.depth.writemask = ctx->Depth.Mask; + dsa.depth.func = PIPE_FUNC_ALWAYS; + } + cso_set_depth_stencil_alpha(cso, &dsa); + + /* blend (colormask) */ + memset(&blend, 0, sizeof(blend)); + cso_set_blend(cso, &blend); + } + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, driver_fp); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, driver_vp); + + + /* texture sampling state: */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = normalized; + + cso_single_sampler(cso, 0, &sampler); + if (num_sampler_view > 1) { + cso_single_sampler(cso, 1, &sampler); + } + cso_single_sampler_done(cso); + } + + /* viewport state: viewport matching window dims */ + { + const float w = (float) ctx->DrawBuffer->Width; + const float h = (float) ctx->DrawBuffer->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * w; + vp.scale[1] = -0.5f * h; + vp.scale[2] = 0.5f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * w; + vp.translate[1] = 0.5f * h; + vp.translate[2] = 0.5f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + cso_set_vertex_elements(cso, 3, st->velems_util_draw); + + /* texture state: */ + cso_set_fragment_sampler_views(cso, num_sampler_view, sv); + + /* Compute Gallium window coords (y=0=top) with pixel zoom. + * Recall that these coords are transformed by the current + * vertex shader and viewport transformation. + */ + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) { + y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY); + invertTex = !invertTex; + } + + x0 = (GLfloat) x; + x1 = x + width * ctx->Pixel.ZoomX; + y0 = (GLfloat) y; + y1 = y + height * ctx->Pixel.ZoomY; + + /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */ + z = z * 2.0 - 1.0; + + draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex, + normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width, + normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height); + + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_viewport(cso); + cso_restore_samplers(cso); + cso_restore_fragment_sampler_views(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); + if (write_stencil) { + cso_restore_depth_stencil_alpha(cso); + cso_restore_blend(cso); + } +} + + +/** + * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we + * can't use a fragment shader to write stencil values. + */ +static void +draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *strb; + enum pipe_transfer_usage usage; + struct pipe_transfer *pt; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + GLint skipPixels; + ubyte *stmap; + struct gl_pixelstore_attrib clippedUnpack = *unpack; + + if (!zoom) { + if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height, + &clippedUnpack)) { + /* totally clipped */ + return; + } + } + + strb = st_renderbuffer(ctx->DrawBuffer-> + Attachment[BUFFER_STENCIL].Renderbuffer); + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y = ctx->DrawBuffer->Height - y - height; + } + + if(format != GL_DEPTH_STENCIL && + util_format_get_component_bits(strb->format, + UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) + usage = PIPE_TRANSFER_READ_WRITE; + else + usage = PIPE_TRANSFER_WRITE; + + pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0, + usage, x, y, + width, height); + + stmap = pipe_transfer_map(pipe, pt); + + pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels); + assert(pixels); + + /* if width > MAX_WIDTH, have to process image in chunks */ + skipPixels = 0; + while (skipPixels < width) { + const GLint spanX = skipPixels; + const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + GLint row; + for (row = 0; row < height; row++) { + GLubyte sValues[MAX_WIDTH]; + GLuint zValues[MAX_WIDTH]; + GLenum destType = GL_UNSIGNED_BYTE; + const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels, + width, height, + format, type, + row, skipPixels); + _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues, + type, source, &clippedUnpack, + ctx->_ImageTransferState); + + if (format == GL_DEPTH_STENCIL) { + _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues, + (1 << 24) - 1, type, source, + &clippedUnpack); + } + + if (zoom) { + _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with " + "zoom not complete"); + } + + { + GLint spanY; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + spanY = height - row - 1; + } + else { + spanY = row; + } + + /* now pack the stencil (and Z) values in the dest format */ + switch (pt->resource->format) { + case PIPE_FORMAT_S8_USCALED: + { + ubyte *dest = stmap + spanY * pt->stride + spanX; + assert(usage == PIPE_TRANSFER_WRITE); + memcpy(dest, sValues, spanWidth); + } + break; + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = zValues[k] | (sValues[k] << 24); + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24); + } + } + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + if (format == GL_DEPTH_STENCIL) { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff); + } + } + else { + uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4); + GLint k; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (k = 0; k < spanWidth; k++) { + dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff); + } + } + break; + default: + assert(0); + } + } + } + skipPixels += spanWidth; + } + + _mesa_unmap_pbo_source(ctx, &clippedUnpack); + + /* unmap the stencil buffer */ + pipe_transfer_unmap(pipe, pt); + pipe->transfer_destroy(pipe, pt); +} + + +/** + * Get fragment program variant for a glDrawPixels or glCopyPixels + * command for RGBA data. + */ +static struct st_fp_variant * +get_color_fp_variant(struct st_context *st) +{ + struct gl_context *ctx = st->ctx; + struct st_fp_variant_key key; + struct st_fp_variant *fpv; + + memset(&key, 0, sizeof(key)); + + key.st = st; + key.drawpixels = 1; + key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 || + ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || + ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || + ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || + ctx->Pixel.AlphaScale != 1.0); + key.pixelMaps = ctx->Pixel.MapColorFlag; + + fpv = st_get_fp_variant(st, st->fp, &key); + + return fpv; +} + + +/** + * Get fragment program variant for a glDrawPixels or glCopyPixels + * command for depth/stencil data. + */ +static struct st_fp_variant * +get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth, + GLboolean write_stencil) +{ + struct st_fp_variant_key key; + struct st_fp_variant *fpv; + + memset(&key, 0, sizeof(key)); + + key.st = st; + key.drawpixels = 1; + key.drawpixels_z = write_depth; + key.drawpixels_stencil = write_stencil; + + fpv = st_get_fp_variant(st, st->fp, &key); + + return fpv; +} + + +/** + * Called via ctx->Driver.DrawPixels() + */ +static void +st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) +{ + void *driver_vp, *driver_fp; + struct st_context *st = st_context(ctx); + const GLfloat *color; + struct pipe_context *pipe = st->pipe; + GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE; + struct pipe_sampler_view *sv[2]; + int num_sampler_view = 1; + enum pipe_format stencil_format = PIPE_FORMAT_NONE; + struct st_fp_variant *fpv; + + if (format == GL_DEPTH_STENCIL) + write_stencil = write_depth = GL_TRUE; + else if (format == GL_STENCIL_INDEX) + write_stencil = GL_TRUE; + else if (format == GL_DEPTH_COMPONENT) + write_depth = GL_TRUE; + + if (write_stencil) { + enum pipe_format tex_format; + /* can we write to stencil if not fallback */ + if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT)) + goto stencil_fallback; + + tex_format = st_choose_format(st->pipe->screen, base_format(format), + PIPE_TEXTURE_2D, + 0, PIPE_BIND_SAMPLER_VIEW); + if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED) + stencil_format = PIPE_FORMAT_X24S8_USCALED; + else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM) + stencil_format = PIPE_FORMAT_S8X24_USCALED; + else + stencil_format = PIPE_FORMAT_S8_USCALED; + if (stencil_format == PIPE_FORMAT_NONE) + goto stencil_fallback; + } + + /* Mesa state should be up to date by now */ + assert(ctx->NewState == 0x0); + + st_validate_state(st); + + /* + * Get vertex/fragment shaders + */ + if (write_depth || write_stencil) { + fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil); + + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); + + color = ctx->Current.RasterColor; + } + else { + fpv = get_color_fp_variant(st); + + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); + + color = NULL; + if (st->pixel_xfer.pixelmap_enabled) { + sv[1] = st->pixel_xfer.pixelmap_sampler_view; + num_sampler_view++; + } + } + + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + + /* draw with textured quad */ + { + struct pipe_resource *pt + = make_texture(st, width, height, format, type, unpack, pixels); + if (pt) { + sv[0] = st_create_texture_sampler_view(st->pipe, pt); + + if (sv[0]) { + if (write_stencil) { + sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, + stencil_format); + num_sampler_view++; + } + + draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, + ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + sv, + num_sampler_view, + driver_vp, + driver_fp, + color, GL_FALSE, write_depth, write_stencil); + pipe_sampler_view_reference(&sv[0], NULL); + if (num_sampler_view > 1) + pipe_sampler_view_reference(&sv[1], NULL); + } + pipe_resource_reference(&pt, NULL); + } + } + return; + +stencil_fallback: + draw_stencil_pixels(ctx, x, y, width, height, format, type, + unpack, pixels); +} + + + +/** + * Software fallback for glCopyPixels(GL_STENCIL). + */ +static void +copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty) +{ + struct st_renderbuffer *rbDraw; + struct pipe_context *pipe = st_context(ctx)->pipe; + enum pipe_transfer_usage usage; + struct pipe_transfer *ptDraw; + ubyte *drawMap; + ubyte *buffer; + int i; + + buffer = malloc(width * height * sizeof(ubyte)); + if (!buffer) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)"); + return; + } + + /* Get the dest renderbuffer. If there's a wrapper, use the + * underlying renderbuffer. + */ + rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); + if (rbDraw->Base.Wrapped) + rbDraw = st_renderbuffer(rbDraw->Base.Wrapped); + + /* this will do stencil pixel transfer ops */ + st_read_stencil_pixels(ctx, srcx, srcy, width, height, + GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, + &ctx->DefaultPacking, buffer); + + if (0) { + /* debug code: dump stencil values */ + GLint row, col; + for (row = 0; row < height; row++) { + printf("%3d: ", row); + for (col = 0; col < width; col++) { + printf("%02x ", buffer[col + row * width]); + } + printf("\n"); + } + } + + if (util_format_get_component_bits(rbDraw->format, + UTIL_FORMAT_COLORSPACE_ZS, 0) != 0) + usage = PIPE_TRANSFER_READ_WRITE; + else + usage = PIPE_TRANSFER_WRITE; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + dsty = rbDraw->Base.Height - dsty - height; + } + + ptDraw = pipe_get_transfer(st_context(ctx)->pipe, + rbDraw->texture, 0, 0, + usage, dstx, dsty, + width, height); + + assert(util_format_get_blockwidth(ptDraw->resource->format) == 1); + assert(util_format_get_blockheight(ptDraw->resource->format) == 1); + + /* map the stencil buffer */ + drawMap = pipe_transfer_map(pipe, ptDraw); + + /* draw */ + /* XXX PixelZoom not handled yet */ + for (i = 0; i < height; i++) { + ubyte *dst; + const ubyte *src; + int y; + + y = i; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y = height - y - 1; + } + + dst = drawMap + y * ptDraw->stride; + src = buffer + i * width; + + switch (ptDraw->resource->format) { + case PIPE_FORMAT_Z24_UNORM_S8_USCALED: + { + uint *dst4 = (uint *) dst; + int j; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = (*dst4 & 0xffffff) | (src[j] << 24); + dst4++; + } + } + break; + case PIPE_FORMAT_S8_USCALED_Z24_UNORM: + { + uint *dst4 = (uint *) dst; + int j; + assert(usage == PIPE_TRANSFER_READ_WRITE); + for (j = 0; j < width; j++) { + *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff); + dst4++; + } + } + break; + case PIPE_FORMAT_S8_USCALED: + assert(usage == PIPE_TRANSFER_WRITE); + memcpy(dst, src, width); + break; + default: + assert(0); + } + } + + free(buffer); + + /* unmap the stencil buffer */ + pipe_transfer_unmap(pipe, ptDraw); + pipe->transfer_destroy(pipe, ptDraw); +} + + +static void +st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct st_renderbuffer *rbRead; + void *driver_vp, *driver_fp; + struct pipe_resource *pt; + struct pipe_sampler_view *sv[2]; + int num_sampler_view = 1; + GLfloat *color; + enum pipe_format srcFormat, texFormat; + GLboolean invertTex = GL_FALSE; + GLint readX, readY, readW, readH; + GLuint sample_count; + struct gl_pixelstore_attrib pack = ctx->DefaultPacking; + struct st_fp_variant *fpv; + + st_validate_state(st); + + if (type == GL_STENCIL) { + /* can't use texturing to do stencil */ + copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty); + return; + } + + /* + * Get vertex/fragment shaders + */ + if (type == GL_COLOR) { + rbRead = st_get_color_read_renderbuffer(ctx); + color = NULL; + + fpv = get_color_fp_variant(st); + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_FALSE); + + if (st->pixel_xfer.pixelmap_enabled) { + sv[1] = st->pixel_xfer.pixelmap_sampler_view; + num_sampler_view++; + } + } + else { + assert(type == GL_DEPTH); + rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + + fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE); + driver_fp = fpv->driver_shader; + + driver_vp = make_passthrough_vertex_shader(st, GL_TRUE); + } + + /* update fragment program constants */ + st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT); + + + if (rbRead->Base.Wrapped) + rbRead = st_renderbuffer(rbRead->Base.Wrapped); + + sample_count = rbRead->texture->nr_samples; + /* I believe this would be legal, presumably would need to do a resolve + for color, and for depth/stencil spec says to just use one of the + depth/stencil samples per pixel? Need some transfer clarifications. */ + assert(sample_count < 2); + + srcFormat = rbRead->texture->format; + + if (screen->is_format_supported(screen, srcFormat, st->internal_target, + sample_count, + PIPE_BIND_SAMPLER_VIEW, 0)) { + texFormat = srcFormat; + } + else { + /* srcFormat can't be used as a texture format */ + if (type == GL_DEPTH) { + texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT, + st->internal_target, sample_count, + PIPE_BIND_DEPTH_STENCIL); + assert(texFormat != PIPE_FORMAT_NONE); + } + else { + /* default color format */ + texFormat = st_choose_format(screen, GL_RGBA, st->internal_target, + sample_count, PIPE_BIND_SAMPLER_VIEW); + assert(texFormat != PIPE_FORMAT_NONE); + } + } + + /* Invert src region if needed */ + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + srcy = ctx->ReadBuffer->Height - srcy - height; + invertTex = !invertTex; + } + + /* Clip the read region against the src buffer bounds. + * We'll still allocate a temporary buffer/texture for the original + * src region size but we'll only read the region which is on-screen. + * This may mean that we draw garbage pixels into the dest region, but + * that's expected. + */ + readX = srcx; + readY = srcy; + readW = width; + readH = height; + _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack); + readW = MAX2(0, readW); + readH = MAX2(0, readH); + + /* alloc temporary texture */ + pt = alloc_texture(st, width, height, texFormat); + if (!pt) + return; + + sv[0] = st_create_texture_sampler_view(st->pipe, pt); + if (!sv[0]) { + pipe_resource_reference(&pt, NULL); + return; + } + + /* Make temporary texture which is a copy of the src region. + */ + if (srcFormat == texFormat) { + struct pipe_box src_box; + u_box_2d(readX, readY, readW, readH, &src_box); + /* copy source framebuffer surface into mipmap/texture */ + pipe->resource_copy_region(pipe, + pt, /* dest tex */ + 0, + pack.SkipPixels, pack.SkipRows, 0, /* dest pos */ + rbRead->texture, /* src tex */ + 0, + &src_box); + + } + else { + /* CPU-based fallback/conversion */ + struct pipe_transfer *ptRead = + pipe_get_transfer(st->pipe, rbRead->texture, 0, 0, + PIPE_TRANSFER_READ, + readX, readY, readW, readH); + struct pipe_transfer *ptTex; + enum pipe_transfer_usage transfer_usage; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + + if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format)) + transfer_usage = PIPE_TRANSFER_READ_WRITE; + else + transfer_usage = PIPE_TRANSFER_WRITE; + + ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage, + 0, 0, width, height); + + /* copy image from ptRead surface to ptTex surface */ + if (type == GL_COLOR) { + /* alternate path using get/put_tile() */ + GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + enum pipe_format readFormat, drawFormat; + readFormat = util_format_linear(rbRead->texture->format); + drawFormat = util_format_linear(pt->format); + pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH, + readFormat, buf); + pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows, + readW, readH, drawFormat, buf); + free(buf); + } + else { + /* GL_DEPTH */ + GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); + pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf); + pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows, + readW, readH, buf); + free(buf); + } + + pipe->transfer_destroy(pipe, ptRead); + pipe->transfer_destroy(pipe, ptTex); + } + + /* OK, the texture 'pt' contains the src image/pixels. Now draw a + * textured quad with that texture. + */ + draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2], + width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + sv, + num_sampler_view, + driver_vp, + driver_fp, + color, invertTex, GL_FALSE, GL_FALSE); + + pipe_resource_reference(&pt, NULL); + pipe_sampler_view_reference(&sv[0], NULL); +} + + + +void st_init_drawpixels_functions(struct dd_function_table *functions) +{ + functions->DrawPixels = st_DrawPixels; + functions->CopyPixels = st_CopyPixels; +} + + +void +st_destroy_drawpix(struct st_context *st) +{ + GLuint i; + + for (i = 0; i < Elements(st->drawpix.shaders); i++) { + if (st->drawpix.shaders[i]) + _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL); + } + + st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL); + if (st->drawpix.vert_shaders[0]) + ureg_free_tokens(st->drawpix.vert_shaders[0]); + if (st->drawpix.vert_shaders[1]) + ureg_free_tokens(st->drawpix.vert_shaders[1]); +} + +#endif /* FEATURE_drawpix */ diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c index 5976f1048..86ceb9d78 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c +++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c @@ -1,304 +1,307 @@ -/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- **************************************************************************/
-
-
-/**
- * Implementation of glDrawTex() for GL_OES_draw_tex
- */
-
-
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_drawtex.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_draw_quad.h"
-#include "util/u_simple_shaders.h"
-
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_OES_draw_texture
-
-
-struct cached_shader
-{
- void *handle;
-
- uint num_attribs;
- uint semantic_names[2 + MAX_TEXTURE_UNITS];
- uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
-};
-
-#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS)
-
-/**
- * Simple linear list cache.
- * Most of the time there'll only be one cached shader.
- */
-static struct cached_shader CachedShaders[MAX_SHADERS];
-static GLuint NumCachedShaders = 0;
-
-
-static void *
-lookup_shader(struct pipe_context *pipe,
- uint num_attribs,
- const uint *semantic_names,
- const uint *semantic_indexes)
-{
- GLuint i, j;
-
- /* look for existing shader with same attributes */
- for (i = 0; i < NumCachedShaders; i++) {
- if (CachedShaders[i].num_attribs == num_attribs) {
- GLboolean match = GL_TRUE;
- for (j = 0; j < num_attribs; j++) {
- if (semantic_names[j] != CachedShaders[i].semantic_names[j] ||
- semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) {
- match = GL_FALSE;
- break;
- }
- }
- if (match)
- return CachedShaders[i].handle;
- }
- }
-
- /* not found - create new one now */
- if (NumCachedShaders >= MAX_SHADERS) {
- return NULL;
- }
-
- CachedShaders[i].num_attribs = num_attribs;
- for (j = 0; j < num_attribs; j++) {
- CachedShaders[i].semantic_names[j] = semantic_names[j];
- CachedShaders[i].semantic_indexes[j] = semantic_indexes[j];
- }
-
- CachedShaders[i].handle =
- util_make_vertex_passthrough_shader(pipe,
- num_attribs,
- semantic_names,
- semantic_indexes);
- NumCachedShaders++;
-
- return CachedShaders[i].handle;
-}
-
-static void
-st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
- GLfloat width, GLfloat height)
-{
- struct st_context *st = ctx->st;
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = ctx->st->cso_context;
- struct pipe_resource *vbuffer;
- struct pipe_transfer *vbuffer_transfer;
- GLuint i, numTexCoords, numAttribs;
- GLboolean emitColor;
- uint semantic_names[2 + MAX_TEXTURE_UNITS];
- uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
- struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
- GLbitfield inputs = VERT_BIT_POS;
-
- st_validate_state(st);
-
- /* determine if we need vertex color */
- if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0)
- emitColor = GL_TRUE;
- else
- emitColor = GL_FALSE;
-
- /* determine how many enabled sets of texcoords */
- numTexCoords = 0;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
- inputs |= VERT_BIT_TEX(i);
- numTexCoords++;
- }
- }
-
- /* total number of attributes per vertex */
- numAttribs = 1 + emitColor + numTexCoords;
-
-
- /* create the vertex buffer */
- vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
- numAttribs * 4 * 4 * sizeof(GLfloat));
-
- /* load vertex buffer */
- {
-#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \
- do { \
- GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \
- assert(k < 4 * 4 * numAttribs); \
- vbuf[k + 0] = X; \
- vbuf[k + 1] = Y; \
- vbuf[k + 2] = Z; \
- vbuf[k + 3] = W; \
- } while (0)
-
- const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height;
- GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer,
- PIPE_TRANSFER_WRITE,
- &vbuffer_transfer);
- GLuint attr;
-
- z = CLAMP(z, 0.0f, 1.0f);
-
- /* positions (in clip coords) */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat)fb->Width;
- const GLfloat fb_height = (GLfloat)fb->Height;
-
- const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
- const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
-
- SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */
- SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */
- SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */
- SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */
-
- semantic_names[0] = TGSI_SEMANTIC_POSITION;
- semantic_indexes[0] = 0;
- }
-
- /* colors */
- if (emitColor) {
- const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
- SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]);
- semantic_names[1] = TGSI_SEMANTIC_COLOR;
- semantic_indexes[1] = 0;
- attr = 2;
- }
- else {
- attr = 1;
- }
-
- /* texcoords */
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
- struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current;
- struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
- const GLfloat wt = (GLfloat) img->Width;
- const GLfloat ht = (GLfloat) img->Height;
- const GLfloat s0 = obj->CropRect[0] / wt;
- const GLfloat t0 = obj->CropRect[1] / ht;
- const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt;
- const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht;
-
- /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/
- SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */
- SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */
- SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */
- SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */
-
- semantic_names[attr] = TGSI_SEMANTIC_GENERIC;
- semantic_indexes[attr] = 0;
-
- attr++;
- }
- }
-
- pipe_buffer_unmap(pipe, vbuffer_transfer);
-
-#undef SET_ATTRIB
- }
-
-
- cso_save_viewport(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
-
- {
- void *vs = lookup_shader(pipe, numAttribs,
- semantic_names, semantic_indexes);
- cso_set_vertex_shader_handle(cso, vs);
- }
-
- for (i = 0; i < numAttribs; i++) {
- velements[i].src_offset = i * 4 * sizeof(float);
- velements[i].instance_divisor = 0;
- velements[i].vertex_buffer_index = 0;
- velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- }
- cso_set_vertex_elements(cso, numAttribs, velements);
-
- /* viewport state: viewport matching window dims */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- const GLfloat width = (GLfloat)fb->Width;
- const GLfloat height = (GLfloat)fb->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 1.0f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.0f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
-
- util_draw_vertex_buffer(pipe, vbuffer,
- 0, /* offset */
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- numAttribs); /* attribs/vert */
-
-
- pipe_resource_reference(&vbuffer, NULL);
-
- /* restore state */
- cso_restore_viewport(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
-}
-
-
-void
-st_init_drawtex_functions(struct dd_function_table *functions)
-{
- functions->DrawTex = st_DrawTex;
-}
-
-
-/**
- * Free any cached shaders
- */
-void
-st_destroy_drawtex(struct st_context *st)
-{
- GLuint i;
- for (i = 0; i < NumCachedShaders; i++) {
- cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle);
- }
- NumCachedShaders = 0;
-}
-
-
-#endif /* FEATURE_OES_draw_texture */
+/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + **************************************************************************/ + + +/** + * Implementation of glDrawTex() for GL_OES_draw_tex + */ + + + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/program.h" +#include "program/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_drawtex.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_draw_quad.h" +#include "util/u_simple_shaders.h" + +#include "cso_cache/cso_context.h" + + +#if FEATURE_OES_draw_texture + + +struct cached_shader +{ + void *handle; + + uint num_attribs; + uint semantic_names[2 + MAX_TEXTURE_UNITS]; + uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; +}; + +#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS) + +/** + * Simple linear list cache. + * Most of the time there'll only be one cached shader. + */ +static struct cached_shader CachedShaders[MAX_SHADERS]; +static GLuint NumCachedShaders = 0; + + +static void * +lookup_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes) +{ + GLuint i, j; + + /* look for existing shader with same attributes */ + for (i = 0; i < NumCachedShaders; i++) { + if (CachedShaders[i].num_attribs == num_attribs) { + GLboolean match = GL_TRUE; + for (j = 0; j < num_attribs; j++) { + if (semantic_names[j] != CachedShaders[i].semantic_names[j] || + semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) { + match = GL_FALSE; + break; + } + } + if (match) + return CachedShaders[i].handle; + } + } + + /* not found - create new one now */ + if (NumCachedShaders >= MAX_SHADERS) { + return NULL; + } + + CachedShaders[i].num_attribs = num_attribs; + for (j = 0; j < num_attribs; j++) { + CachedShaders[i].semantic_names[j] = semantic_names[j]; + CachedShaders[i].semantic_indexes[j] = semantic_indexes[j]; + } + + CachedShaders[i].handle = + util_make_vertex_passthrough_shader(pipe, + num_attribs, + semantic_names, + semantic_indexes); + NumCachedShaders++; + + return CachedShaders[i].handle; +} + +static void +st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z, + GLfloat width, GLfloat height) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = ctx->st->cso_context; + struct pipe_resource *vbuffer; + struct pipe_transfer *vbuffer_transfer; + GLuint i, numTexCoords, numAttribs; + GLboolean emitColor; + uint semantic_names[2 + MAX_TEXTURE_UNITS]; + uint semantic_indexes[2 + MAX_TEXTURE_UNITS]; + struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS]; + GLbitfield inputs = VERT_BIT_POS; + + st_validate_state(st); + + /* determine if we need vertex color */ + if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0) + emitColor = GL_TRUE; + else + emitColor = GL_FALSE; + + /* determine how many enabled sets of texcoords */ + numTexCoords = 0; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { + inputs |= VERT_BIT_TEX(i); + numTexCoords++; + } + } + + /* total number of attributes per vertex */ + numAttribs = 1 + emitColor + numTexCoords; + + + /* create the vertex buffer */ + vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER, + PIPE_USAGE_STREAM, + numAttribs * 4 * 4 * sizeof(GLfloat)); + + /* load vertex buffer */ + { +#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \ + do { \ + GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \ + assert(k < 4 * 4 * numAttribs); \ + vbuf[k + 0] = X; \ + vbuf[k + 1] = Y; \ + vbuf[k + 2] = Z; \ + vbuf[k + 3] = W; \ + } while (0) + + const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height; + GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer, + PIPE_TRANSFER_WRITE, + &vbuffer_transfer); + GLuint attr; + + z = CLAMP(z, 0.0f, 1.0f); + + /* positions (in clip coords) */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat)fb->Width; + const GLfloat fb_height = (GLfloat)fb->Height; + + const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); + const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); + const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); + const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + + SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */ + SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */ + SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */ + SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */ + + semantic_names[0] = TGSI_SEMANTIC_POSITION; + semantic_indexes[0] = 0; + } + + /* colors */ + if (emitColor) { + const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]); + SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]); + semantic_names[1] = TGSI_SEMANTIC_COLOR; + semantic_indexes[1] = 0; + attr = 2; + } + else { + attr = 1; + } + + /* texcoords */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) { + struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current; + struct gl_texture_image *img = obj->Image[0][obj->BaseLevel]; + const GLfloat wt = (GLfloat) img->Width; + const GLfloat ht = (GLfloat) img->Height; + const GLfloat s0 = obj->CropRect[0] / wt; + const GLfloat t0 = obj->CropRect[1] / ht; + const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt; + const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht; + + /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/ + SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */ + SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */ + SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */ + SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */ + + semantic_names[attr] = TGSI_SEMANTIC_GENERIC; + semantic_indexes[attr] = 0; + + attr++; + } + } + + pipe_buffer_unmap(pipe, vbuffer_transfer); + +#undef SET_ATTRIB + } + + + cso_save_viewport(cso); + cso_save_vertex_shader(cso); + cso_save_vertex_elements(cso); + cso_save_vertex_buffers(cso); + + { + void *vs = lookup_shader(pipe, numAttribs, + semantic_names, semantic_indexes); + cso_set_vertex_shader_handle(cso, vs); + } + + for (i = 0; i < numAttribs; i++) { + velements[i].src_offset = i * 4 * sizeof(float); + velements[i].instance_divisor = 0; + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + } + cso_set_vertex_elements(cso, numAttribs, velements); + + /* viewport state: viewport matching window dims */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + const GLfloat width = (GLfloat)fb->Width; + const GLfloat height = (GLfloat)fb->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + + util_draw_vertex_buffer(pipe, cso, vbuffer, + 0, /* offset */ + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + numAttribs); /* attribs/vert */ + + + pipe_resource_reference(&vbuffer, NULL); + + /* restore state */ + cso_restore_viewport(cso); + cso_restore_vertex_shader(cso); + cso_restore_vertex_elements(cso); + cso_restore_vertex_buffers(cso); +} + + +void +st_init_drawtex_functions(struct dd_function_table *functions) +{ + functions->DrawTex = st_DrawTex; +} + + +/** + * Free any cached shaders + */ +void +st_destroy_drawtex(struct st_context *st) +{ + GLuint i; + for (i = 0; i < NumCachedShaders; i++) { + cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle); + } + NumCachedShaders = 0; +} + + +#endif /* FEATURE_OES_draw_texture */ diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c index dccbff3c1..7a19f35bb 100644 --- a/mesalib/src/mesa/state_tracker/st_context.c +++ b/mesalib/src/mesa/state_tracker/st_context.c @@ -203,6 +203,11 @@ static void st_destroy_context_priv( struct st_context *st ) st_destroy_drawpix(st); st_destroy_drawtex(st); + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + for (i = 0; i < Elements(st->state.sampler_views); i++) { pipe_sampler_view_reference(&st->state.sampler_views[i], NULL); } diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h index 492ee600e..77765f023 100644 --- a/mesalib/src/mesa/state_tracker/st_context.h +++ b/mesalib/src/mesa/state_tracker/st_context.h @@ -1,265 +1,270 @@ -/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef ST_CONTEXT_H
-#define ST_CONTEXT_H
-
-#include "main/mtypes.h"
-#include "pipe/p_state.h"
-#include "state_tracker/st_api.h"
-
-struct bitmap_cache;
-struct blit_state;
-struct dd_function_table;
-struct draw_context;
-struct draw_stage;
-struct gen_mipmap_state;
-struct st_context;
-struct st_fragment_program;
-
-
-#define ST_NEW_MESA 0x1 /* Mesa state has changed */
-#define ST_NEW_FRAGMENT_PROGRAM 0x2
-#define ST_NEW_VERTEX_PROGRAM 0x4
-#define ST_NEW_FRAMEBUFFER 0x8
-#define ST_NEW_EDGEFLAGS_DATA 0x10
-#define ST_NEW_GEOMETRY_PROGRAM 0x20
-
-
-struct st_state_flags {
- GLuint mesa;
- GLuint st;
-};
-
-struct st_tracked_state {
- const char *name;
- struct st_state_flags dirty;
- void (*update)( struct st_context *st );
-};
-
-
-
-struct st_context
-{
- struct st_context_iface iface;
-
- struct gl_context *ctx;
-
- struct pipe_context *pipe;
-
- struct draw_context *draw; /**< For selection/feedback/rastpos only */
- struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */
- struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */
- struct draw_stage *rastpos_stage; /**< For glRasterPos */
-
-
- /* On old libGL's for linux we need to invalidate the drawables
- * on glViewpport calls, this is set via a option.
- */
- boolean invalidate_on_gl_viewport;
-
- /* Some state is contained in constant objects.
- * Other state is just parameter values.
- */
- struct {
- struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depth_stencil;
- struct pipe_rasterizer_state rasterizer;
- struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS];
- struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS];
- struct pipe_clip_state clip;
- struct {
- void *ptr;
- unsigned size;
- } constants[PIPE_SHADER_TYPES];
- struct pipe_framebuffer_state framebuffer;
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
- struct pipe_scissor_state scissor;
- struct pipe_viewport_state viewport;
- unsigned sample_mask;
-
- GLuint num_samplers;
- GLuint num_textures;
-
- GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */
- } state;
-
- char vendor[100];
- char renderer[100];
-
- struct st_state_flags dirty;
-
- GLboolean missing_textures;
- GLboolean vertdata_edgeflags;
-
- /** Mapping from VERT_RESULT_x to post-transformed vertex slot */
- const GLuint *vertex_result_to_slot;
-
- struct st_vertex_program *vp; /**< Currently bound vertex program */
- struct st_fragment_program *fp; /**< Currently bound fragment program */
- struct st_geometry_program *gp; /**< Currently bound geometry program */
-
- struct st_vp_variant *vp_variant;
- struct st_fp_variant *fp_variant;
- struct st_gp_variant *gp_variant;
-
- struct gl_texture_object *default_texture;
-
- struct {
- struct gl_program_cache *cache;
- struct st_fragment_program *program; /**< cur pixel transfer prog */
- GLuint xfer_prog_sn; /**< pixel xfer program serial no. */
- GLuint user_prog_sn; /**< user fragment program serial no. */
- struct st_fragment_program *combined_prog;
- GLuint combined_prog_sn;
- struct pipe_resource *pixelmap_texture;
- struct pipe_sampler_view *pixelmap_sampler_view;
- boolean pixelmap_enabled; /**< use the pixelmap texture? */
- } pixel_xfer;
-
- /** for glBitmap */
- struct {
- struct pipe_rasterizer_state rasterizer;
- struct pipe_sampler_state samplers[2];
- enum pipe_format tex_format;
- void *vs;
- float vertices[4][3][4]; /**< vertex pos + color + texcoord */
- struct pipe_resource *vbuf;
- unsigned vbuf_slot; /* next free slot in vbuf */
- struct bitmap_cache *cache;
- } bitmap;
-
- /** for glDraw/CopyPixels */
- struct {
- struct gl_fragment_program *shaders[4];
- void *vert_shaders[2]; /**< ureg shaders */
- } drawpix;
-
- /** for glClear */
- struct {
- struct pipe_rasterizer_state raster;
- struct pipe_viewport_state viewport;
- struct pipe_clip_state clip;
- void *vs;
- void *fs;
- float vertices[4][2][4]; /**< vertex pos + color */
- struct pipe_resource *vbuf;
- unsigned vbuf_slot;
- boolean enable_ds_separate;
- } clear;
-
- /** used for anything using util_draw_vertex_buffer */
- struct pipe_vertex_element velems_util_draw[3];
-
- void *passthrough_fs; /**< simple pass-through frag shader */
-
- enum pipe_texture_target internal_target;
- struct gen_mipmap_state *gen_mipmap;
- struct blit_state *blit;
-
- struct cso_context *cso_context;
-
- int force_msaa;
- void *winsys_drawable_handle;
-};
-
-
-/* Need this so that we can implement Mesa callbacks in this module.
- */
-static INLINE struct st_context *st_context(struct gl_context *ctx)
-{
- return ctx->st;
-}
-
-
-/**
- * Wrapper for struct gl_framebuffer.
- * This is an opaque type to the outside world.
- */
-struct st_framebuffer
-{
- struct gl_framebuffer Base;
- void *Private;
-
- struct st_framebuffer_iface *iface;
- enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
- unsigned num_statts;
- int32_t revalidate;
-};
-
-
-extern void st_init_driver_functions(struct dd_function_table *functions);
-
-void st_invalidate_state(struct gl_context * ctx, GLuint new_state);
-
-
-
-#define Y_0_TOP 1
-#define Y_0_BOTTOM 2
-
-static INLINE GLuint
-st_fb_orientation(const struct gl_framebuffer *fb)
-{
- if (fb && fb->Name == 0) {
- /* Drawing into a window (on-screen buffer).
- *
- * Negate Y scale to flip image vertically.
- * The NDC Y coords prior to viewport transformation are in the range
- * [y=-1=bottom, y=1=top]
- * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where
- * H is the window height.
- * Use the viewport transformation to invert Y.
- */
- return Y_0_TOP;
- }
- else {
- /* Drawing into user-created FBO (very likely a texture).
- *
- * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering.
- */
- return Y_0_BOTTOM;
- }
-}
-
-
-/** clear-alloc a struct-sized object, with casting */
-#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
-
-
-extern int
-st_get_msaa(void);
-
-extern struct st_context *
-st_create_context(gl_api api, struct pipe_context *pipe,
- const struct gl_config *visual,
- struct st_context *share);
-
-extern void
-st_destroy_context(struct st_context *st);
-
-
-#endif
+/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CONTEXT_H +#define ST_CONTEXT_H + +#include "main/mtypes.h" +#include "pipe/p_state.h" +#include "state_tracker/st_api.h" + +struct bitmap_cache; +struct blit_state; +struct dd_function_table; +struct draw_context; +struct draw_stage; +struct gen_mipmap_state; +struct st_context; +struct st_fragment_program; + + +#define ST_NEW_MESA 0x1 /* Mesa state has changed */ +#define ST_NEW_FRAGMENT_PROGRAM 0x2 +#define ST_NEW_VERTEX_PROGRAM 0x4 +#define ST_NEW_FRAMEBUFFER 0x8 +#define ST_NEW_EDGEFLAGS_DATA 0x10 +#define ST_NEW_GEOMETRY_PROGRAM 0x20 + + +struct st_state_flags { + GLuint mesa; + GLuint st; +}; + +struct st_tracked_state { + const char *name; + struct st_state_flags dirty; + void (*update)( struct st_context *st ); +}; + + + +struct st_context +{ + struct st_context_iface iface; + + struct gl_context *ctx; + + struct pipe_context *pipe; + + struct draw_context *draw; /**< For selection/feedback/rastpos only */ + struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */ + struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */ + struct draw_stage *rastpos_stage; /**< For glRasterPos */ + + + /* On old libGL's for linux we need to invalidate the drawables + * on glViewpport calls, this is set via a option. + */ + boolean invalidate_on_gl_viewport; + + /* Some state is contained in constant objects. + * Other state is just parameter values. + */ + struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; + struct pipe_clip_state clip; + struct { + void *ptr; + unsigned size; + } constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS]; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + unsigned sample_mask; + + GLuint num_samplers; + GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ + } state; + + char vendor[100]; + char renderer[100]; + + struct st_state_flags dirty; + + GLboolean missing_textures; + GLboolean vertdata_edgeflags; + + /** Mapping from VERT_RESULT_x to post-transformed vertex slot */ + const GLuint *vertex_result_to_slot; + + struct st_vertex_program *vp; /**< Currently bound vertex program */ + struct st_fragment_program *fp; /**< Currently bound fragment program */ + struct st_geometry_program *gp; /**< Currently bound geometry program */ + + struct st_vp_variant *vp_variant; + struct st_fp_variant *fp_variant; + struct st_gp_variant *gp_variant; + + struct gl_texture_object *default_texture; + + struct { + struct gl_program_cache *cache; + struct st_fragment_program *program; /**< cur pixel transfer prog */ + GLuint xfer_prog_sn; /**< pixel xfer program serial no. */ + GLuint user_prog_sn; /**< user fragment program serial no. */ + struct st_fragment_program *combined_prog; + GLuint combined_prog_sn; + struct pipe_resource *pixelmap_texture; + struct pipe_sampler_view *pixelmap_sampler_view; + boolean pixelmap_enabled; /**< use the pixelmap texture? */ + } pixel_xfer; + + /** for glBitmap */ + struct { + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[2]; + enum pipe_format tex_format; + void *vs; + float vertices[4][3][4]; /**< vertex pos + color + texcoord */ + struct pipe_resource *vbuf; + unsigned vbuf_slot; /* next free slot in vbuf */ + struct bitmap_cache *cache; + } bitmap; + + /** for glDraw/CopyPixels */ + struct { + struct gl_fragment_program *shaders[4]; + void *vert_shaders[2]; /**< ureg shaders */ + } drawpix; + + /** for glClear */ + struct { + struct pipe_rasterizer_state raster; + struct pipe_viewport_state viewport; + struct pipe_clip_state clip; + void *vs; + void *fs; + float vertices[4][2][4]; /**< vertex pos + color */ + struct pipe_resource *vbuf; + unsigned vbuf_slot; + boolean enable_ds_separate; + } clear; + + /** used for anything using util_draw_vertex_buffer */ + struct pipe_vertex_element velems_util_draw[3]; + + void *passthrough_fs; /**< simple pass-through frag shader */ + + enum pipe_texture_target internal_target; + struct gen_mipmap_state *gen_mipmap; + struct blit_state *blit; + + struct cso_context *cso_context; + + int force_msaa; + void *winsys_drawable_handle; + + /* User vertex buffers. */ + struct pipe_resource *user_vb[PIPE_MAX_ATTRIBS]; + unsigned user_vb_stride[PIPE_MAX_ATTRIBS]; + unsigned num_user_vbs; +}; + + +/* Need this so that we can implement Mesa callbacks in this module. + */ +static INLINE struct st_context *st_context(struct gl_context *ctx) +{ + return ctx->st; +} + + +/** + * Wrapper for struct gl_framebuffer. + * This is an opaque type to the outside world. + */ +struct st_framebuffer +{ + struct gl_framebuffer Base; + void *Private; + + struct st_framebuffer_iface *iface; + enum st_attachment_type statts[ST_ATTACHMENT_COUNT]; + unsigned num_statts; + int32_t revalidate; +}; + + +extern void st_init_driver_functions(struct dd_function_table *functions); + +void st_invalidate_state(struct gl_context * ctx, GLuint new_state); + + + +#define Y_0_TOP 1 +#define Y_0_BOTTOM 2 + +static INLINE GLuint +st_fb_orientation(const struct gl_framebuffer *fb) +{ + if (fb && fb->Name == 0) { + /* Drawing into a window (on-screen buffer). + * + * Negate Y scale to flip image vertically. + * The NDC Y coords prior to viewport transformation are in the range + * [y=-1=bottom, y=1=top] + * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where + * H is the window height. + * Use the viewport transformation to invert Y. + */ + return Y_0_TOP; + } + else { + /* Drawing into user-created FBO (very likely a texture). + * + * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering. + */ + return Y_0_BOTTOM; + } +} + + +/** clear-alloc a struct-sized object, with casting */ +#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) + + +extern int +st_get_msaa(void); + +extern struct st_context * +st_create_context(gl_api api, struct pipe_context *pipe, + const struct gl_config *visual, + struct st_context *share); + +extern void +st_destroy_context(struct st_context *st); + + +#endif diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c index 19466ea44..830e3e3c1 100644 --- a/mesalib/src/mesa/state_tracker/st_draw.c +++ b/mesalib/src/mesa/state_tracker/st_draw.c @@ -1,788 +1,749 @@ -/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * This file implements the st_draw_vbo() function which is called from
- * Mesa's VBO module. All point/line/triangle rendering is done through
- * this function whether the user called glBegin/End, glDrawArrays,
- * glDrawElements, glEvalMesh, or glCalList, etc.
- *
- * We basically convert the VBO's vertex attribute/array information into
- * Gallium vertex state, bind the vertex buffer objects and call
- * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/prog_uniform.h"
-
-#include "vbo/vbo.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_bufferobjects.h"
-#include "st_draw.h"
-#include "st_program.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_prim.h"
-#include "util/u_draw_quad.h"
-#include "draw/draw_context.h"
-#include "cso_cache/cso_context.h"
-
-
-static GLuint double_types[4] = {
- PIPE_FORMAT_R64_FLOAT,
- PIPE_FORMAT_R64G64_FLOAT,
- PIPE_FORMAT_R64G64B64_FLOAT,
- PIPE_FORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[4] = {
- PIPE_FORMAT_R32_FLOAT,
- PIPE_FORMAT_R32G32_FLOAT,
- PIPE_FORMAT_R32G32B32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint half_float_types[4] = {
- PIPE_FORMAT_R16_FLOAT,
- PIPE_FORMAT_R16G16_FLOAT,
- PIPE_FORMAT_R16G16B16_FLOAT,
- PIPE_FORMAT_R16G16B16A16_FLOAT
-};
-
-static GLuint uint_types_norm[4] = {
- PIPE_FORMAT_R32_UNORM,
- PIPE_FORMAT_R32G32_UNORM,
- PIPE_FORMAT_R32G32B32_UNORM,
- PIPE_FORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[4] = {
- PIPE_FORMAT_R32_USCALED,
- PIPE_FORMAT_R32G32_USCALED,
- PIPE_FORMAT_R32G32B32_USCALED,
- PIPE_FORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint int_types_norm[4] = {
- PIPE_FORMAT_R32_SNORM,
- PIPE_FORMAT_R32G32_SNORM,
- PIPE_FORMAT_R32G32B32_SNORM,
- PIPE_FORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[4] = {
- PIPE_FORMAT_R32_SSCALED,
- PIPE_FORMAT_R32G32_SSCALED,
- PIPE_FORMAT_R32G32B32_SSCALED,
- PIPE_FORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint ushort_types_norm[4] = {
- PIPE_FORMAT_R16_UNORM,
- PIPE_FORMAT_R16G16_UNORM,
- PIPE_FORMAT_R16G16B16_UNORM,
- PIPE_FORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[4] = {
- PIPE_FORMAT_R16_USCALED,
- PIPE_FORMAT_R16G16_USCALED,
- PIPE_FORMAT_R16G16B16_USCALED,
- PIPE_FORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint short_types_norm[4] = {
- PIPE_FORMAT_R16_SNORM,
- PIPE_FORMAT_R16G16_SNORM,
- PIPE_FORMAT_R16G16B16_SNORM,
- PIPE_FORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[4] = {
- PIPE_FORMAT_R16_SSCALED,
- PIPE_FORMAT_R16G16_SSCALED,
- PIPE_FORMAT_R16G16B16_SSCALED,
- PIPE_FORMAT_R16G16B16A16_SSCALED
-};
-
-static GLuint ubyte_types_norm[4] = {
- PIPE_FORMAT_R8_UNORM,
- PIPE_FORMAT_R8G8_UNORM,
- PIPE_FORMAT_R8G8B8_UNORM,
- PIPE_FORMAT_R8G8B8A8_UNORM
-};
-
-static GLuint ubyte_types_scale[4] = {
- PIPE_FORMAT_R8_USCALED,
- PIPE_FORMAT_R8G8_USCALED,
- PIPE_FORMAT_R8G8B8_USCALED,
- PIPE_FORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint byte_types_norm[4] = {
- PIPE_FORMAT_R8_SNORM,
- PIPE_FORMAT_R8G8_SNORM,
- PIPE_FORMAT_R8G8B8_SNORM,
- PIPE_FORMAT_R8G8B8A8_SNORM
-};
-
-static GLuint byte_types_scale[4] = {
- PIPE_FORMAT_R8_SSCALED,
- PIPE_FORMAT_R8G8_SSCALED,
- PIPE_FORMAT_R8G8B8_SSCALED,
- PIPE_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint fixed_types[4] = {
- PIPE_FORMAT_R32_FIXED,
- PIPE_FORMAT_R32G32_FIXED,
- PIPE_FORMAT_R32G32B32_FIXED,
- PIPE_FORMAT_R32G32B32A32_FIXED
-};
-
-
-
-/**
- * Return a PIPE_FORMAT_x for the given GL datatype and size.
- */
-GLuint
-st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
- GLboolean normalized)
-{
- assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
- type == GL_FIXED || type == GL_HALF_FLOAT);
- assert(size >= 1);
- assert(size <= 4);
- assert(format == GL_RGBA || format == GL_BGRA);
-
- if (format == GL_BGRA) {
- /* this is an odd-ball case */
- assert(type == GL_UNSIGNED_BYTE);
- assert(normalized);
- return PIPE_FORMAT_B8G8R8A8_UNORM;
- }
-
- if (normalized) {
- switch (type) {
- case GL_DOUBLE: return double_types[size-1];
- case GL_FLOAT: return float_types[size-1];
- case GL_HALF_FLOAT: return half_float_types[size-1];
- case GL_INT: return int_types_norm[size-1];
- case GL_SHORT: return short_types_norm[size-1];
- case GL_BYTE: return byte_types_norm[size-1];
- case GL_UNSIGNED_INT: return uint_types_norm[size-1];
- case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
- case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
- case GL_FIXED: return fixed_types[size-1];
- default: assert(0); return 0;
- }
- }
- else {
- switch (type) {
- case GL_DOUBLE: return double_types[size-1];
- case GL_FLOAT: return float_types[size-1];
- case GL_HALF_FLOAT: return half_float_types[size-1];
- case GL_INT: return int_types_scale[size-1];
- case GL_SHORT: return short_types_scale[size-1];
- case GL_BYTE: return byte_types_scale[size-1];
- case GL_UNSIGNED_INT: return uint_types_scale[size-1];
- case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
- case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
- case GL_FIXED: return fixed_types[size-1];
- default: assert(0); return 0;
- }
- }
- return 0; /* silence compiler warning */
-}
-
-
-
-
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- * \param userSpace returns whether the arrays are in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLboolean *userSpace)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- GLuint num_client_arrays = 0;
- const GLubyte *client_addr = NULL;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
- const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
-
- if (firstStride < 0) {
- firstStride = stride;
- }
- else if (firstStride != stride) {
- return GL_FALSE;
- }
-
- if (!bufObj || !bufObj->Name) {
- num_client_arrays++;
- /* Try to detect if the client-space arrays are
- * "close" to each other.
- */
- if (!client_addr) {
- client_addr = arrays[mesaAttr]->Ptr;
- }
- else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
- /* arrays start too far apart */
- return GL_FALSE;
- }
- }
- else if (!firstBufObj) {
- firstBufObj = bufObj;
- }
- else if (bufObj != firstBufObj) {
- return GL_FALSE;
- }
- }
-
- *userSpace = (num_client_arrays == vpv->num_inputs);
- /* debug_printf("user space: %s (%d arrays, %d inputs)\n",
- (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
-
- return GL_TRUE;
-}
-
-
-/**
- * Compute the memory range occupied by the arrays.
- */
-static void
-get_arrays_bounds(const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- const GLubyte **low, const GLubyte **high)
-{
- const GLubyte *low_addr = NULL;
- const GLubyte *high_addr = NULL;
- GLuint attr;
-
- /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- const GLint stride = arrays[mesaAttr]->StrideB;
- const GLubyte *start = arrays[mesaAttr]->Ptr;
- const unsigned sz = (arrays[mesaAttr]->Size *
- _mesa_sizeof_type(arrays[mesaAttr]->Type));
- const GLubyte *end = start + (max_index * stride) + sz;
-
- /* debug_printf("attr %u: stride %d size %u start %p end %p\n",
- attr, stride, sz, start, end); */
-
- if (attr == 0) {
- low_addr = start;
- high_addr = end;
- }
- else {
- low_addr = MIN2(low_addr, start);
- high_addr = MAX2(high_addr, end);
- }
- }
-
- *low = low_addr;
- *high = high_addr;
-}
-
-
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct gl_context *ctx,
- const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean userSpace,
- struct pipe_vertex_buffer *vbuffer,
- struct pipe_vertex_element velements[])
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLuint attr;
- const GLubyte *offset0 = NULL;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- GLsizei stride = arrays[mesaAttr]->StrideB;
-
- /*printf("stobj %u = %p\n", attr, (void*)stobj);*/
-
- if (attr == 0) {
- const GLubyte *low, *high;
-
- get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
- /* debug_printf("buffer range: %p %p range %d max index %u\n",
- low, high, high - low, max_index); */
-
- offset0 = low;
- if (userSpace) {
- vbuffer->buffer =
- pipe_user_buffer_create(pipe->screen, (void *) low, high - low,
- PIPE_BIND_VERTEX_BUFFER);
- vbuffer->buffer_offset = 0;
- }
- else {
- vbuffer->buffer = NULL;
- pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
- vbuffer->buffer_offset = pointer_to_offset(low);
- }
- vbuffer->stride = stride; /* in bytes */
- vbuffer->max_index = max_index;
- }
-
- /*
- if (arrays[mesaAttr]->InstanceDivisor)
- vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
- else
- vbuffer[attr].max_index = max_index;
- */
-
- velements[attr].src_offset =
- (unsigned) (arrays[mesaAttr]->Ptr - offset0);
- velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
- velements[attr].vertex_buffer_index = 0;
- velements[attr].src_format =
- st_pipe_vertex_format(arrays[mesaAttr]->Type,
- arrays[mesaAttr]->Size,
- arrays[mesaAttr]->Format,
- arrays[mesaAttr]->Normalized);
- assert(velements[attr].src_format);
- }
-}
-
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct gl_context *ctx,
- const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean *userSpace,
- struct pipe_vertex_buffer vbuffer[],
- struct pipe_vertex_element velements[])
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLuint attr;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
- GLsizei stride = arrays[mesaAttr]->StrideB;
-
- *userSpace = GL_FALSE;
-
- if (bufobj && bufobj->Name) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_client_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- assert(stobj->buffer);
- /*printf("stobj %u = %p\n", attr, (void*) stobj);*/
-
- vbuffer[attr].buffer = NULL;
- pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
- vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
- }
- else {
- /* attribute data is in user-space memory, not a VBO */
- uint bytes;
- /*printf("user-space array %d stride %d\n", attr, stride);*/
-
- *userSpace = GL_TRUE;
-
- /* wrap user data */
- if (arrays[mesaAttr]->Ptr) {
- /* user's vertex array */
- if (arrays[mesaAttr]->StrideB) {
- bytes = arrays[mesaAttr]->StrideB * (max_index + 1);
- }
- else {
- bytes = arrays[mesaAttr]->Size
- * _mesa_sizeof_type(arrays[mesaAttr]->Type);
- }
- vbuffer[attr].buffer =
- pipe_user_buffer_create(pipe->screen,
- (void *) arrays[mesaAttr]->Ptr, bytes,
- PIPE_BIND_VERTEX_BUFFER);
- }
- else {
- /* no array, use ctx->Current.Attrib[] value */
- bytes = sizeof(ctx->Current.Attrib[0]);
- vbuffer[attr].buffer =
- pipe_user_buffer_create(pipe->screen,
- (void *) ctx->Current.Attrib[mesaAttr],
- bytes,
- PIPE_BIND_VERTEX_BUFFER);
- stride = 0;
- }
-
- vbuffer[attr].buffer_offset = 0;
- }
-
- assert(velements[attr].src_offset <= 2048); /* 11-bit field */
-
- /* common-case setup */
- vbuffer[attr].stride = stride; /* in bytes */
- if (arrays[mesaAttr]->InstanceDivisor)
- vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
- else
- vbuffer[attr].max_index = max_index;
-
- velements[attr].src_offset = 0;
- velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
- velements[attr].vertex_buffer_index = attr;
- velements[attr].src_format
- = st_pipe_vertex_format(arrays[mesaAttr]->Type,
- arrays[mesaAttr]->Size,
- arrays[mesaAttr]->Format,
- arrays[mesaAttr]->Normalized);
- assert(velements[attr].src_format);
- }
-}
-
-
-static void
-setup_index_buffer(struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- struct pipe_index_buffer *ibuffer)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
-
- memset(ibuffer, 0, sizeof(*ibuffer));
- if (ib) {
- struct gl_buffer_object *bufobj = ib->obj;
-
- switch (ib->type) {
- case GL_UNSIGNED_INT:
- ibuffer->index_size = 4;
- break;
- case GL_UNSIGNED_SHORT:
- ibuffer->index_size = 2;
- break;
- case GL_UNSIGNED_BYTE:
- ibuffer->index_size = 1;
- break;
- default:
- assert(0);
- return;
- }
-
- /* get/create the index buffer object */
- if (bufobj && bufobj->Name) {
- /* elements/indexes are in a real VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
- ibuffer->offset = pointer_to_offset(ib->ptr);
- }
- else {
- /* element/indicies are in user space memory */
- ibuffer->buffer =
- pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
- ib->count * ibuffer->index_size,
- PIPE_BIND_INDEX_BUFFER);
- }
- }
-}
-
-/**
- * Prior to drawing, check that any uniforms referenced by the
- * current shader have been set. If a uniform has not been set,
- * issue a warning.
- */
-static void
-check_uniforms(struct gl_context *ctx)
-{
- struct gl_shader_program *shProg[3] = {
- ctx->Shader.CurrentVertexProgram,
- ctx->Shader.CurrentGeometryProgram,
- ctx->Shader.CurrentFragmentProgram,
- };
- unsigned j;
-
- for (j = 0; j < 3; j++) {
- unsigned i;
-
- if (shProg[j] == NULL || !shProg[j]->LinkStatus)
- continue;
-
- for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
- const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
- if (!u->Initialized) {
- _mesa_warning(ctx,
- "Using shader with uninitialized uniform: %s",
- u->Name);
- }
- }
- }
-}
-
-
-/**
- * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
- * the corresponding Gallium type.
- */
-static unsigned
-translate_prim(const struct gl_context *ctx, unsigned prim)
-{
- /* GL prims should match Gallium prims, spot-check a few */
- assert(GL_POINTS == PIPE_PRIM_POINTS);
- assert(GL_QUADS == PIPE_PRIM_QUADS);
- assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
-
- /* Avoid quadstrips if it's easy to do so:
- * Note: it's imporant to do the correct trimming if we change the prim type!
- * We do that wherever this function is called.
- */
- if (prim == GL_QUAD_STRIP &&
- ctx->Light.ShadeModel != GL_FLAT &&
- ctx->Polygon.FrontMode == GL_FILL &&
- ctx->Polygon.BackMode == GL_FILL)
- prim = GL_TRIANGLE_STRIP;
-
- return prim;
-}
-
-
-
-/**
- * This function gets plugged into the VBO module and is called when
- * we have something to render.
- * Basically, translate the information into the format expected by gallium.
- */
-void
-st_draw_vbo(struct gl_context *ctx,
- const struct gl_client_array **arrays,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- const struct st_vertex_program *vp;
- const struct st_vp_variant *vpv;
- struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
- GLuint attr;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- unsigned num_vbuffers, num_velements;
- struct pipe_index_buffer ibuffer;
- GLboolean userSpace = GL_FALSE;
- GLboolean vertDataEdgeFlags;
- struct pipe_draw_info info;
- unsigned i;
-
- /* Mesa core state should have been validated already */
- assert(ctx->NewState == 0x0);
-
- /* Gallium probably doesn't want this in some cases. */
- if (!index_bounds_valid)
- if (!vbo_all_varyings_in_vbos(arrays))
- vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
-
- /* sanity check for pointer arithmetic below */
- assert(sizeof(arrays[0]->Ptr[0]) == 1);
-
- vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
- arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
- if (vertDataEdgeFlags != st->vertdata_edgeflags) {
- st->vertdata_edgeflags = vertDataEdgeFlags;
- st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
- }
-
- st_validate_state(st);
-
- /* must get these after state validation! */
- vp = st->vp;
- vpv = st->vp_variant;
-
-#if 0
- if (MESA_VERBOSE & VERBOSE_GLSL) {
- check_uniforms(ctx);
- }
-#else
- (void) check_uniforms;
-#endif
-
- memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
- /*
- * Setup the vbuffer[] and velements[] arrays.
- */
- if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) {
- /*printf("Draw interleaved\n");*/
- setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace,
- vbuffer, velements);
- num_vbuffers = 1;
- num_velements = vpv->num_inputs;
- if (num_velements == 0)
- num_vbuffers = 0;
- }
- else {
- /*printf("Draw non-interleaved\n");*/
- setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index,
- &userSpace, vbuffer, velements);
- num_vbuffers = vpv->num_inputs;
- num_velements = vpv->num_inputs;
- }
-
-#if 0
- {
- GLuint i;
- for (i = 0; i < num_vbuffers; i++) {
- printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride);
- printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index);
- printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset);
- printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer);
- }
- for (i = 0; i < num_velements; i++) {
- printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
- printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
- printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
- }
- }
-#endif
-
- pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer);
- cso_set_vertex_elements(st->cso_context, num_velements, velements);
-
- setup_index_buffer(ctx, ib, &ibuffer);
- pipe->set_index_buffer(pipe, &ibuffer);
-
- util_draw_init_info(&info);
- if (ib) {
- info.indexed = TRUE;
- if (min_index != ~0 && max_index != ~0) {
- info.min_index = min_index;
- info.max_index = max_index;
- }
- }
-
- info.primitive_restart = st->ctx->Array.PrimitiveRestart;
- info.restart_index = st->ctx->Array.RestartIndex;
-
- /* do actual drawing */
- for (i = 0; i < nr_prims; i++) {
- info.mode = translate_prim( ctx, prims[i].mode );
- info.start = prims[i].start;
- info.count = prims[i].count;
- info.instance_count = prims[i].num_instances;
- info.index_bias = prims[i].basevertex;
- if (!ib) {
- info.min_index = info.start;
- info.max_index = info.start + info.count - 1;
- }
-
- if (u_trim_pipe_prim(info.mode, &info.count))
- pipe->draw_vbo(pipe, &info);
- }
-
- pipe_resource_reference(&ibuffer.buffer, NULL);
-
- /* unreference buffers (frees wrapped user-space buffer objects) */
- for (attr = 0; attr < num_vbuffers; attr++) {
- pipe_resource_reference(&vbuffer[attr].buffer, NULL);
- assert(!vbuffer[attr].buffer);
- }
-
- if (userSpace)
- {
- pipe->set_vertex_buffers(pipe, 0, NULL);
- }
-}
-
-
-void st_init_draw( struct st_context *st )
-{
- struct gl_context *ctx = st->ctx;
-
- vbo_set_draw_func(ctx, st_draw_vbo);
-
-#if FEATURE_feedback || FEATURE_rastpos
- st->draw = draw_create(st->pipe); /* for selection/feedback */
-
- /* Disable draw options that might convert points/lines to tris, etc.
- * as that would foul-up feedback/selection mode.
- */
- draw_wide_line_threshold(st->draw, 1000.0f);
- draw_wide_point_threshold(st->draw, 1000.0f);
- draw_enable_line_stipple(st->draw, FALSE);
- draw_enable_point_sprites(st->draw, FALSE);
-#endif
-}
-
-
-void st_destroy_draw( struct st_context *st )
-{
-#if FEATURE_feedback || FEATURE_rastpos
- draw_destroy(st->draw);
-#endif
-}
-
-
+/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * This file implements the st_draw_vbo() function which is called from + * Mesa's VBO module. All point/line/triangle rendering is done through + * this function whether the user called glBegin/End, glDrawArrays, + * glDrawElements, glEvalMesh, or glCalList, etc. + * + * We basically convert the VBO's vertex attribute/array information into + * Gallium vertex state, bind the vertex buffer objects and call + * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays(). + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/mfeatures.h" +#include "program/prog_uniform.h" + +#include "vbo/vbo.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_bufferobjects.h" +#include "st_draw.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_prim.h" +#include "util/u_draw_quad.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_context.h" + + +static GLuint double_types[4] = { + PIPE_FORMAT_R64_FLOAT, + PIPE_FORMAT_R64G64_FLOAT, + PIPE_FORMAT_R64G64B64_FLOAT, + PIPE_FORMAT_R64G64B64A64_FLOAT +}; + +static GLuint float_types[4] = { + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT +}; + +static GLuint half_float_types[4] = { + PIPE_FORMAT_R16_FLOAT, + PIPE_FORMAT_R16G16_FLOAT, + PIPE_FORMAT_R16G16B16_FLOAT, + PIPE_FORMAT_R16G16B16A16_FLOAT +}; + +static GLuint uint_types_norm[4] = { + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM +}; + +static GLuint uint_types_scale[4] = { + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32A32_USCALED +}; + +static GLuint int_types_norm[4] = { + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM +}; + +static GLuint int_types_scale[4] = { + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED +}; + +static GLuint ushort_types_norm[4] = { + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16B16_UNORM, + PIPE_FORMAT_R16G16B16A16_UNORM +}; + +static GLuint ushort_types_scale[4] = { + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16B16_USCALED, + PIPE_FORMAT_R16G16B16A16_USCALED +}; + +static GLuint short_types_norm[4] = { + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16_SNORM, + PIPE_FORMAT_R16G16B16A16_SNORM +}; + +static GLuint short_types_scale[4] = { + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R16G16B16_SSCALED, + PIPE_FORMAT_R16G16B16A16_SSCALED +}; + +static GLuint ubyte_types_norm[4] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8B8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM +}; + +static GLuint ubyte_types_scale[4] = { + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8B8_USCALED, + PIPE_FORMAT_R8G8B8A8_USCALED +}; + +static GLuint byte_types_norm[4] = { + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM +}; + +static GLuint byte_types_scale[4] = { + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R8G8B8_SSCALED, + PIPE_FORMAT_R8G8B8A8_SSCALED +}; + +static GLuint fixed_types[4] = { + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED +}; + + + +/** + * Return a PIPE_FORMAT_x for the given GL datatype and size. + */ +GLuint +st_pipe_vertex_format(GLenum type, GLuint size, GLenum format, + GLboolean normalized) +{ + assert((type >= GL_BYTE && type <= GL_DOUBLE) || + type == GL_FIXED || type == GL_HALF_FLOAT); + assert(size >= 1); + assert(size <= 4); + assert(format == GL_RGBA || format == GL_BGRA); + + if (format == GL_BGRA) { + /* this is an odd-ball case */ + assert(type == GL_UNSIGNED_BYTE); + assert(normalized); + return PIPE_FORMAT_B8G8R8A8_UNORM; + } + + if (normalized) { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_HALF_FLOAT: return half_float_types[size-1]; + case GL_INT: return int_types_norm[size-1]; + case GL_SHORT: return short_types_norm[size-1]; + case GL_BYTE: return byte_types_norm[size-1]; + case GL_UNSIGNED_INT: return uint_types_norm[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + else { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_HALF_FLOAT: return half_float_types[size-1]; + case GL_INT: return int_types_scale[size-1]; + case GL_SHORT: return short_types_scale[size-1]; + case GL_BYTE: return byte_types_scale[size-1]; + case GL_UNSIGNED_INT: return uint_types_scale[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + return 0; /* silence compiler warning */ +} + + + + + +/** + * Examine the active arrays to determine if we have interleaved + * vertex arrays all living in one VBO, or all living in user space. + * \param userSpace returns whether the arrays are in user space. + */ +static GLboolean +is_interleaved_arrays(const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays) +{ + GLuint attr; + const struct gl_buffer_object *firstBufObj = NULL; + GLint firstStride = -1; + const GLubyte *client_addr = NULL; + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj; + const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */ + + if (firstStride < 0) { + firstStride = stride; + } + else if (firstStride != stride) { + return GL_FALSE; + } + + if (!bufObj || !bufObj->Name) { + /* Try to detect if the client-space arrays are + * "close" to each other. + */ + if (!client_addr) { + client_addr = arrays[mesaAttr]->Ptr; + } + else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) { + /* arrays start too far apart */ + return GL_FALSE; + } + } + else if (!firstBufObj) { + firstBufObj = bufObj; + } + else if (bufObj != firstBufObj) { + return GL_FALSE; + } + } + + return GL_TRUE; +} + + +/** + * Set up for drawing interleaved arrays that all live in one VBO + * or all live in user space. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_interleaved_attribs(struct gl_context *ctx, + const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays, + struct pipe_vertex_buffer *vbuffer, + struct pipe_vertex_element velements[], + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLuint attr; + const GLubyte *low_addr = NULL; + + /* Find the lowest address. */ + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr; + + low_addr = !low_addr ? start : MIN2(low_addr, start); + } + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + struct st_buffer_object *stobj = st_buffer_object(bufobj); + GLsizei stride = arrays[mesaAttr]->StrideB; + + if (attr == 0) { + if (bufobj && bufobj->Name) { + vbuffer->buffer = NULL; + pipe_resource_reference(&vbuffer->buffer, stobj->buffer); + vbuffer->buffer_offset = pointer_to_offset(low_addr); + } else { + vbuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void*)low_addr, + stride * (max_index + 1), + PIPE_BIND_VERTEX_BUFFER); + vbuffer->buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[0], vbuffer->buffer); + st->user_vb_stride[0] = stride; + st->num_user_vbs = 1; + } + vbuffer->stride = stride; /* in bytes */ + } + + velements[attr].src_offset = + (unsigned) (arrays[mesaAttr]->Ptr - low_addr); + velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; + velements[attr].vertex_buffer_index = 0; + velements[attr].src_format = + st_pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + +/** + * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each + * vertex attribute. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_non_interleaved_attribs(struct gl_context *ctx, + const struct st_vertex_program *vp, + const struct st_vp_variant *vpv, + const struct gl_client_array **arrays, + struct pipe_vertex_buffer vbuffer[], + struct pipe_vertex_element velements[], + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + GLuint attr; + + for (attr = 0; attr < vpv->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + GLsizei stride = arrays[mesaAttr]->StrideB; + + if (bufobj && bufobj->Name) { + /* Attribute data is in a VBO. + * Recall that for VBOs, the gl_client_array->Ptr field is + * really an offset from the start of the VBO, not a pointer. + */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + assert(stobj->buffer); + + vbuffer[attr].buffer = NULL; + pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer); + vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr); + } + else { + /* wrap user data */ + if (arrays[mesaAttr]->Ptr) { + vbuffer[attr].buffer = + pipe_user_buffer_create(pipe->screen, + (void *) arrays[mesaAttr]->Ptr, + stride * (max_index + 1), + PIPE_BIND_VERTEX_BUFFER); + } + else { + /* no array, use ctx->Current.Attrib[] value */ + uint bytes = sizeof(ctx->Current.Attrib[0]); + vbuffer[attr].buffer = + pipe_user_buffer_create(pipe->screen, + (void *) ctx->Current.Attrib[mesaAttr], + bytes, + PIPE_BIND_VERTEX_BUFFER); + stride = 0; + } + + vbuffer[attr].buffer_offset = 0; + + /* Track user vertex buffers. */ + pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer); + st->user_vb_stride[attr] = stride; + st->num_user_vbs = MAX2(st->num_user_vbs, attr+1); + } + + /* common-case setup */ + vbuffer[attr].stride = stride; /* in bytes */ + + velements[attr].src_offset = 0; + velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor; + velements[attr].vertex_buffer_index = attr; + velements[attr].src_format + = st_pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Format, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + +static void +setup_index_buffer(struct gl_context *ctx, + const struct _mesa_index_buffer *ib, + struct pipe_index_buffer *ibuffer) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + + memset(ibuffer, 0, sizeof(*ibuffer)); + if (ib) { + struct gl_buffer_object *bufobj = ib->obj; + + switch (ib->type) { + case GL_UNSIGNED_INT: + ibuffer->index_size = 4; + break; + case GL_UNSIGNED_SHORT: + ibuffer->index_size = 2; + break; + case GL_UNSIGNED_BYTE: + ibuffer->index_size = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_resource_reference(&ibuffer->buffer, stobj->buffer); + ibuffer->offset = pointer_to_offset(ib->ptr); + } + else { + /* element/indicies are in user space memory */ + ibuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * ibuffer->index_size, + PIPE_BIND_INDEX_BUFFER); + } + } +} + +/** + * Prior to drawing, check that any uniforms referenced by the + * current shader have been set. If a uniform has not been set, + * issue a warning. + */ +static void +check_uniforms(struct gl_context *ctx) +{ + struct gl_shader_program *shProg[3] = { + ctx->Shader.CurrentVertexProgram, + ctx->Shader.CurrentGeometryProgram, + ctx->Shader.CurrentFragmentProgram, + }; + unsigned j; + + for (j = 0; j < 3; j++) { + unsigned i; + + if (shProg[j] == NULL || !shProg[j]->LinkStatus) + continue; + + for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) { + const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i]; + if (!u->Initialized) { + _mesa_warning(ctx, + "Using shader with uninitialized uniform: %s", + u->Name); + } + } + } +} + + +/** + * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to + * the corresponding Gallium type. + */ +static unsigned +translate_prim(const struct gl_context *ctx, unsigned prim) +{ + /* GL prims should match Gallium prims, spot-check a few */ + assert(GL_POINTS == PIPE_PRIM_POINTS); + assert(GL_QUADS == PIPE_PRIM_QUADS); + assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY); + + /* Avoid quadstrips if it's easy to do so: + * Note: it's imporant to do the correct trimming if we change the prim type! + * We do that wherever this function is called. + */ + if (prim == GL_QUAD_STRIP && + ctx->Light.ShadeModel != GL_FLAT && + ctx->Polygon.FrontMode == GL_FILL && + ctx->Polygon.BackMode == GL_FILL) + prim = GL_TRIANGLE_STRIP; + + return prim; +} + + +static void +st_validate_varrays(struct gl_context *ctx, + const struct gl_client_array **arrays, + unsigned max_index) +{ + struct st_context *st = st_context(ctx); + const struct st_vertex_program *vp; + const struct st_vp_variant *vpv; + struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers, num_velements; + GLuint attr; + unsigned i; + + /* must get these after state validation! */ + vp = st->vp; + vpv = st->vp_variant; + + memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs); + + /* Unreference any user vertex buffers. */ + for (i = 0; i < st->num_user_vbs; i++) { + pipe_resource_reference(&st->user_vb[i], NULL); + } + st->num_user_vbs = 0; + + /* + * Setup the vbuffer[] and velements[] arrays. + */ + if (is_interleaved_arrays(vp, vpv, arrays)) { + setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements, + max_index); + num_vbuffers = 1; + num_velements = vpv->num_inputs; + if (num_velements == 0) + num_vbuffers = 0; + } + else { + setup_non_interleaved_attribs(ctx, vp, vpv, arrays, + vbuffer, velements, max_index); + num_vbuffers = vpv->num_inputs; + num_velements = vpv->num_inputs; + } + + cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer); + cso_set_vertex_elements(st->cso_context, num_velements, velements); + + /* unreference buffers (frees wrapped user-space buffer objects) + * This is OK, because the pipe driver should reference buffers by itself + * in set_vertex_buffers. */ + for (attr = 0; attr < num_vbuffers; attr++) { + pipe_resource_reference(&vbuffer[attr].buffer, NULL); + assert(!vbuffer[attr].buffer); + } +} + + +/** + * This function gets plugged into the VBO module and is called when + * we have something to render. + * Basically, translate the information into the format expected by gallium. + */ +void +st_draw_vbo(struct gl_context *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_index_buffer ibuffer; + struct pipe_draw_info info; + unsigned i; + GLboolean new_array = + st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0; + + /* Mesa core state should have been validated already */ + assert(ctx->NewState == 0x0); + + if (ib) { + /* Gallium probably doesn't want this in some cases. */ + if (!index_bounds_valid) + if (!vbo_all_varyings_in_vbos(arrays)) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + } else { + /* Get min/max index for non-indexed drawing. */ + min_index = ~0; + max_index = 0; + + for (i = 0; i < nr_prims; i++) { + min_index = MIN2(min_index, prims[i].start); + max_index = MAX2(max_index, prims[i].start + prims[i].count - 1); + } + } + + /* Validate state. */ + if (st->dirty.st) { + GLboolean vertDataEdgeFlags; + + /* sanity check for pointer arithmetic below */ + assert(sizeof(arrays[0]->Ptr[0]) == 1); + + vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj && + arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name; + if (vertDataEdgeFlags != st->vertdata_edgeflags) { + st->vertdata_edgeflags = vertDataEdgeFlags; + st->dirty.st |= ST_NEW_EDGEFLAGS_DATA; + } + + st_validate_state(st); + + if (new_array) { + st_validate_varrays(ctx, arrays, max_index); + } + +#if 0 + if (MESA_VERBOSE & VERBOSE_GLSL) { + check_uniforms(ctx); + } +#else + (void) check_uniforms; +#endif + } + + /* Notify the driver that the content of user buffers may have been + * changed. */ + if (!new_array && st->num_user_vbs) { + for (i = 0; i < st->num_user_vbs; i++) { + if (st->user_vb[i]) { + unsigned stride = st->user_vb_stride[i]; + + if (stride) { + pipe->redefine_user_buffer(pipe, st->user_vb[i], + min_index * stride, + (max_index + 1 - min_index) * stride); + } else { + /* stride == 0 */ + pipe->redefine_user_buffer(pipe, st->user_vb[i], + 0, st->user_vb[i]->width0); + } + } + } + } + + setup_index_buffer(ctx, ib, &ibuffer); + pipe->set_index_buffer(pipe, &ibuffer); + + util_draw_init_info(&info); + if (ib) { + info.indexed = TRUE; + if (min_index != ~0 && max_index != ~0) { + info.min_index = min_index; + info.max_index = max_index; + } + } + + info.primitive_restart = st->ctx->Array.PrimitiveRestart; + info.restart_index = st->ctx->Array.RestartIndex; + + /* do actual drawing */ + for (i = 0; i < nr_prims; i++) { + info.mode = translate_prim( ctx, prims[i].mode ); + info.start = prims[i].start; + info.count = prims[i].count; + info.instance_count = prims[i].num_instances; + info.index_bias = prims[i].basevertex; + if (!ib) { + info.min_index = info.start; + info.max_index = info.start + info.count - 1; + } + + if (u_trim_pipe_prim(info.mode, &info.count)) + pipe->draw_vbo(pipe, &info); + } + + pipe_resource_reference(&ibuffer.buffer, NULL); +} + + +void st_init_draw( struct st_context *st ) +{ + struct gl_context *ctx = st->ctx; + + vbo_set_draw_func(ctx, st_draw_vbo); + +#if FEATURE_feedback || FEATURE_rastpos + st->draw = draw_create(st->pipe); /* for selection/feedback */ + + /* Disable draw options that might convert points/lines to tris, etc. + * as that would foul-up feedback/selection mode. + */ + draw_wide_line_threshold(st->draw, 1000.0f); + draw_wide_point_threshold(st->draw, 1000.0f); + draw_enable_line_stipple(st->draw, FALSE); + draw_enable_point_sprites(st->draw, FALSE); +#endif +} + + +void st_destroy_draw( struct st_context *st ) +{ +#if FEATURE_feedback || FEATURE_rastpos + draw_destroy(st->draw); +#endif +} + + diff --git a/mesalib/src/mesa/state_tracker/st_draw_feedback.c b/mesalib/src/mesa/state_tracker/st_draw_feedback.c index 545b32d75..1e1220bfe 100644 --- a/mesalib/src/mesa/state_tracker/st_draw_feedback.c +++ b/mesalib/src/mesa/state_tracker/st_draw_feedback.c @@ -179,7 +179,6 @@ st_feedback_draw_vbo(struct gl_context *ctx, /* common-case setup */ vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffers[attr].max_index = max_index; velements[attr].instance_divisor = 0; velements[attr].vertex_buffer_index = attr; velements[attr].src_format = diff --git a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c index 0be66a2c2..18eb3be68 100644 --- a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c +++ b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c @@ -1,424 +1,422 @@ -/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "main/imports.h"
-#include "main/mipmap.h"
-#include "main/teximage.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_gen_mipmap.h"
-
-#include "st_debug.h"
-#include "st_context.h"
-#include "st_texture.h"
-#include "st_gen_mipmap.h"
-#include "st_cb_texture.h"
-
-
-/**
- * one-time init for generate mipmap
- * XXX Note: there may be other times we need no-op/simple state like this.
- * In that case, some code refactoring would be good.
- */
-void
-st_init_generate_mipmap(struct st_context *st)
-{
- st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context);
-}
-
-
-void
-st_destroy_generate_mipmap(struct st_context *st)
-{
- util_destroy_gen_mipmap(st->gen_mipmap);
- st->gen_mipmap = NULL;
-}
-
-
-/**
- * Generate mipmap levels using hardware rendering.
- * \return TRUE if successful, FALSE if not possible
- */
-static boolean
-st_render_mipmap(struct st_context *st,
- GLenum target,
- struct st_texture_object *stObj,
- uint baseLevel, uint lastLevel)
-{
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
- struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe);
- const uint face = _mesa_tex_target_to_face(target);
-
- assert(psv->texture == stObj->pt);
-#if 0
- assert(target != GL_TEXTURE_3D); /* implemented but untested */
-#endif
-
- /* check if we can render in the texture's format */
- /* XXX should probably kill this and always use util_gen_mipmap
- since this implements a sw fallback as well */
- if (!screen->is_format_supported(screen, psv->format, psv->texture->target,
- 0, PIPE_BIND_RENDER_TARGET, 0)) {
- return FALSE;
- }
-
- util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel,
- PIPE_TEX_FILTER_LINEAR);
-
- return TRUE;
-}
-
-
-/**
- * Helper function to decompress an image. The result is a 32-bpp RGBA
- * image with stride==width.
- */
-static void
-decompress_image(enum pipe_format format,
- const uint8_t *src, uint8_t *dst,
- unsigned width, unsigned height)
-{
- const struct util_format_description *desc = util_format_description(format);
- const uint bw = util_format_get_blockwidth(format);
- const uint bh = util_format_get_blockheight(format);
- const uint dst_stride = 4 * MAX2(width, bw);
- const uint src_stride = util_format_get_stride(format, width);
-
- desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
-
- if (width < bw || height < bh) {
- /* We're decompressing an image smaller than the compression
- * block size. We don't want garbage pixel values in the region
- * outside (width x height) so replicate pixels from the (width
- * x height) region to fill out the (bw x bh) block size.
- */
- uint x, y;
- for (y = 0; y < bh; y++) {
- for (x = 0; x < bw; x++) {
- if (x >= width || y >= height) {
- uint p = (y * bw + x) * 4;
- dst[p + 0] = dst[0];
- dst[p + 1] = dst[1];
- dst[p + 2] = dst[2];
- dst[p + 3] = dst[3];
- }
- }
- }
- }
-}
-
-
-/**
- * Helper function to compress an image. The source is a 32-bpp RGBA image
- * with stride==width.
- */
-static void
-compress_image(enum pipe_format format,
- const uint8_t *src, uint8_t *dst,
- unsigned width, unsigned height)
-{
- const struct util_format_description *desc = util_format_description(format);
- const uint dst_stride = util_format_get_stride(format, width);
- const uint src_stride = 4 * width;
-
- desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
-}
-
-
-/**
- * Software fallback for generate mipmap levels.
- */
-static void
-fallback_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct pipe_resource *pt = st_get_texobj_resource(texObj);
- const uint baseLevel = texObj->BaseLevel;
- const uint lastLevel = pt->last_level;
- const uint face = _mesa_tex_target_to_face(target);
- uint dstLevel;
- GLenum datatype;
- GLuint comps;
- GLboolean compressed;
-
- if (ST_DEBUG & DEBUG_FALLBACK)
- debug_printf("%s: fallback processing\n", __FUNCTION__);
-
- assert(target != GL_TEXTURE_3D); /* not done yet */
-
- compressed =
- _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat);
-
- if (compressed) {
- datatype = GL_UNSIGNED_BYTE;
- comps = 4;
- }
- else {
- _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat,
- &datatype, &comps);
- assert(comps > 0 && "bad texture format in fallback_generate_mipmap()");
- }
-
- for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
- const uint srcLevel = dstLevel - 1;
- const uint srcWidth = u_minify(pt->width0, srcLevel);
- const uint srcHeight = u_minify(pt->height0, srcLevel);
- const uint srcDepth = u_minify(pt->depth0, srcLevel);
- const uint dstWidth = u_minify(pt->width0, dstLevel);
- const uint dstHeight = u_minify(pt->height0, dstLevel);
- const uint dstDepth = u_minify(pt->depth0, dstLevel);
- struct pipe_transfer *srcTrans, *dstTrans;
- const ubyte *srcData;
- ubyte *dstData;
- int srcStride, dstStride;
-
- srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel,
- face,
- PIPE_TRANSFER_READ, 0, 0,
- srcWidth, srcHeight);
-
- dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel,
- face,
- PIPE_TRANSFER_WRITE, 0, 0,
- dstWidth, dstHeight);
-
- srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans);
- dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans);
-
- srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format);
- dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format);
-
- /* this cannot work correctly for 3d since it does
- not respect layerStride. */
- if (compressed) {
- const enum pipe_format format = pt->format;
- const uint bw = util_format_get_blockwidth(format);
- const uint bh = util_format_get_blockheight(format);
- const uint srcWidth2 = align(srcWidth, bw);
- const uint srcHeight2 = align(srcHeight, bh);
- const uint dstWidth2 = align(dstWidth, bw);
- const uint dstHeight2 = align(dstHeight, bh);
- uint8_t *srcTemp, *dstTemp;
-
- assert(comps == 4);
-
- srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000);
- dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000);
-
- /* decompress the src image: srcData -> srcTemp */
- decompress_image(format, srcData, srcTemp, srcWidth, srcHeight);
-
- _mesa_generate_mipmap_level(target, datatype, comps,
- 0 /*border*/,
- srcWidth2, srcHeight2, srcDepth,
- srcTemp,
- srcWidth2, /* stride in texels */
- dstWidth2, dstHeight2, dstDepth,
- dstTemp,
- dstWidth2); /* stride in texels */
-
- /* compress the new image: dstTemp -> dstData */
- compress_image(format, dstTemp, dstData, dstWidth, dstHeight);
-
- free(srcTemp);
- free(dstTemp);
- }
- else {
- _mesa_generate_mipmap_level(target, datatype, comps,
- 0 /*border*/,
- srcWidth, srcHeight, srcDepth,
- srcData,
- srcStride, /* stride in texels */
- dstWidth, dstHeight, dstDepth,
- dstData,
- dstStride); /* stride in texels */
- }
-
- pipe_transfer_unmap(pipe, srcTrans);
- pipe_transfer_unmap(pipe, dstTrans);
-
- pipe->transfer_destroy(pipe, srcTrans);
- pipe->transfer_destroy(pipe, dstTrans);
- }
-}
-
-
-/**
- * Compute the expected number of mipmap levels in the texture given
- * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/
- * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap
- * levels should be generated.
- */
-static GLuint
-compute_num_levels(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- GLenum target)
-{
- if (target == GL_TEXTURE_RECTANGLE_ARB) {
- return 1;
- }
- else {
- const struct gl_texture_image *baseImage =
- _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel);
- GLuint size, numLevels;
-
- size = MAX2(baseImage->Width2, baseImage->Height2);
- size = MAX2(size, baseImage->Depth2);
-
- numLevels = texObj->BaseLevel;
-
- while (size > 0) {
- numLevels++;
- size >>= 1;
- }
-
- numLevels = MIN2(numLevels, texObj->MaxLevel + 1);
-
- assert(numLevels >= 1);
-
- return numLevels;
- }
-}
-
-
-/**
- * Called via ctx->Driver.GenerateMipmap().
- */
-void
-st_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct st_context *st = st_context(ctx);
- struct st_texture_object *stObj = st_texture_object(texObj);
- struct pipe_resource *pt = st_get_texobj_resource(texObj);
- const uint baseLevel = texObj->BaseLevel;
- uint lastLevel;
- uint dstLevel;
-
- if (!pt)
- return;
-
- /* not sure if this ultimately actually should work,
- but we're not supporting multisampled textures yet. */
- assert(pt->nr_samples < 2);
-
- /* find expected last mipmap level to generate*/
- lastLevel = compute_num_levels(ctx, texObj, target) - 1;
-
- if (lastLevel == 0)
- return;
-
- if (pt->last_level < lastLevel) {
- /* The current gallium texture doesn't have space for all the
- * mipmap levels we need to generate. So allocate a new texture.
- */
- struct pipe_resource *oldTex = stObj->pt;
-
- /* create new texture with space for more levels */
- stObj->pt = st_texture_create(st,
- oldTex->target,
- oldTex->format,
- lastLevel,
- oldTex->width0,
- oldTex->height0,
- oldTex->depth0,
- oldTex->array_size,
- oldTex->bind);
-
- /* The texture isn't in a "complete" state yet so set the expected
- * lastLevel here, since it won't get done in st_finalize_texture().
- */
- stObj->lastLevel = lastLevel;
-
- /* This will copy the old texture's base image into the new texture
- * which we just allocated.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
-
- /* release the old tex (will likely be freed too) */
- pipe_resource_reference(&oldTex, NULL);
- pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-
- pt = stObj->pt;
- }
- else {
- /* Make sure that the base texture image data is present in the
- * texture buffer.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
- }
-
- assert(pt->last_level >= lastLevel);
-
- /* Try to generate the mipmap by rendering/texturing. If that fails,
- * use the software fallback.
- */
- if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) {
- /* since the util code actually also has a fallback, should
- probably make it never fail and kill this */
- fallback_generate_mipmap(ctx, target, texObj);
- }
-
- /* Fill in the Mesa gl_texture_image fields */
- for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
- const uint srcLevel = dstLevel - 1;
- const struct gl_texture_image *srcImage
- = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
- struct gl_texture_image *dstImage;
- struct st_texture_image *stImage;
- uint dstWidth = u_minify(pt->width0, dstLevel);
- uint dstHeight = u_minify(pt->height0, dstLevel);
- uint dstDepth = u_minify(pt->depth0, dstLevel);
- uint border = srcImage->Border;
-
- dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel);
- if (!dstImage) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
- return;
- }
-
- /* Free old image data */
- if (dstImage->Data)
- ctx->Driver.FreeTexImageData(ctx, dstImage);
-
- /* initialize new image */
- _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight,
- dstDepth, border, srcImage->InternalFormat,
- srcImage->TexFormat);
-
- stImage = st_texture_image(dstImage);
- stImage->level = dstLevel;
-
- pipe_resource_reference(&stImage->pt, pt);
- }
-}
+/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mipmap.h" +#include "main/teximage.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_gen_mipmap.h" + +#include "st_debug.h" +#include "st_context.h" +#include "st_texture.h" +#include "st_gen_mipmap.h" +#include "st_cb_texture.h" + + +/** + * one-time init for generate mipmap + * XXX Note: there may be other times we need no-op/simple state like this. + * In that case, some code refactoring would be good. + */ +void +st_init_generate_mipmap(struct st_context *st) +{ + st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context); +} + + +void +st_destroy_generate_mipmap(struct st_context *st) +{ + util_destroy_gen_mipmap(st->gen_mipmap); + st->gen_mipmap = NULL; +} + + +/** + * Generate mipmap levels using hardware rendering. + * \return TRUE if successful, FALSE if not possible + */ +static boolean +st_render_mipmap(struct st_context *st, + GLenum target, + struct st_texture_object *stObj, + uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe); + const uint face = _mesa_tex_target_to_face(target); + + assert(psv->texture == stObj->pt); +#if 0 + assert(target != GL_TEXTURE_3D); /* implemented but untested */ +#endif + + /* check if we can render in the texture's format */ + /* XXX should probably kill this and always use util_gen_mipmap + since this implements a sw fallback as well */ + if (!screen->is_format_supported(screen, psv->format, psv->texture->target, + 0, PIPE_BIND_RENDER_TARGET, 0)) { + return FALSE; + } + + util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel, + PIPE_TEX_FILTER_LINEAR); + + return TRUE; +} + + +/** + * Helper function to decompress an image. The result is a 32-bpp RGBA + * image with stride==width. + */ +static void +decompress_image(enum pipe_format format, + const uint8_t *src, uint8_t *dst, + unsigned width, unsigned height, unsigned src_stride) +{ + const struct util_format_description *desc = util_format_description(format); + const uint bw = util_format_get_blockwidth(format); + const uint bh = util_format_get_blockheight(format); + const uint dst_stride = 4 * MAX2(width, bw); + + desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); + + if (width < bw || height < bh) { + /* We're decompressing an image smaller than the compression + * block size. We don't want garbage pixel values in the region + * outside (width x height) so replicate pixels from the (width + * x height) region to fill out the (bw x bh) block size. + */ + uint x, y; + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { + if (x >= width || y >= height) { + uint p = (y * bw + x) * 4; + dst[p + 0] = dst[0]; + dst[p + 1] = dst[1]; + dst[p + 2] = dst[2]; + dst[p + 3] = dst[3]; + } + } + } + } +} + + +/** + * Helper function to compress an image. The source is a 32-bpp RGBA image + * with stride==width. + */ +static void +compress_image(enum pipe_format format, + const uint8_t *src, uint8_t *dst, + unsigned width, unsigned height, unsigned dst_stride) +{ + const struct util_format_description *desc = util_format_description(format); + const uint src_stride = 4 * width; + + desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height); +} + + +/** + * Software fallback for generate mipmap levels. + */ +static void +fallback_generate_mipmap(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct pipe_resource *pt = st_get_texobj_resource(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + const uint face = _mesa_tex_target_to_face(target); + uint dstLevel; + GLenum datatype; + GLuint comps; + GLboolean compressed; + + if (ST_DEBUG & DEBUG_FALLBACK) + debug_printf("%s: fallback processing\n", __FUNCTION__); + + assert(target != GL_TEXTURE_3D); /* not done yet */ + + compressed = + _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat); + + if (compressed) { + datatype = GL_UNSIGNED_BYTE; + comps = 4; + } + else { + _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat, + &datatype, &comps); + assert(comps > 0 && "bad texture format in fallback_generate_mipmap()"); + } + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const uint srcWidth = u_minify(pt->width0, srcLevel); + const uint srcHeight = u_minify(pt->height0, srcLevel); + const uint srcDepth = u_minify(pt->depth0, srcLevel); + const uint dstWidth = u_minify(pt->width0, dstLevel); + const uint dstHeight = u_minify(pt->height0, dstLevel); + const uint dstDepth = u_minify(pt->depth0, dstLevel); + struct pipe_transfer *srcTrans, *dstTrans; + const ubyte *srcData; + ubyte *dstData; + int srcStride, dstStride; + + srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel, + face, + PIPE_TRANSFER_READ, 0, 0, + srcWidth, srcHeight); + + dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel, + face, + PIPE_TRANSFER_WRITE, 0, 0, + dstWidth, dstHeight); + + srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans); + dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans); + + srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format); + dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format); + + /* this cannot work correctly for 3d since it does + not respect layerStride. */ + if (compressed) { + const enum pipe_format format = pt->format; + const uint bw = util_format_get_blockwidth(format); + const uint bh = util_format_get_blockheight(format); + const uint srcWidth2 = align(srcWidth, bw); + const uint srcHeight2 = align(srcHeight, bh); + const uint dstWidth2 = align(dstWidth, bw); + const uint dstHeight2 = align(dstHeight, bh); + uint8_t *srcTemp, *dstTemp; + + assert(comps == 4); + + srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000); + dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000); + + /* decompress the src image: srcData -> srcTemp */ + decompress_image(format, srcData, srcTemp, srcWidth, srcHeight, srcTrans->stride); + + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + srcWidth2, srcHeight2, srcDepth, + srcTemp, + srcWidth2, /* stride in texels */ + dstWidth2, dstHeight2, dstDepth, + dstTemp, + dstWidth2); /* stride in texels */ + + /* compress the new image: dstTemp -> dstData */ + compress_image(format, dstTemp, dstData, dstWidth, dstHeight, dstTrans->stride); + + free(srcTemp); + free(dstTemp); + } + else { + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + srcWidth, srcHeight, srcDepth, + srcData, + srcStride, /* stride in texels */ + dstWidth, dstHeight, dstDepth, + dstData, + dstStride); /* stride in texels */ + } + + pipe_transfer_unmap(pipe, srcTrans); + pipe_transfer_unmap(pipe, dstTrans); + + pipe->transfer_destroy(pipe, srcTrans); + pipe->transfer_destroy(pipe, dstTrans); + } +} + + +/** + * Compute the expected number of mipmap levels in the texture given + * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/ + * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap + * levels should be generated. + */ +static GLuint +compute_num_levels(struct gl_context *ctx, + struct gl_texture_object *texObj, + GLenum target) +{ + if (target == GL_TEXTURE_RECTANGLE_ARB) { + return 1; + } + else { + const struct gl_texture_image *baseImage = + _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel); + GLuint size, numLevels; + + size = MAX2(baseImage->Width2, baseImage->Height2); + size = MAX2(size, baseImage->Depth2); + + numLevels = texObj->BaseLevel; + + while (size > 0) { + numLevels++; + size >>= 1; + } + + numLevels = MIN2(numLevels, texObj->MaxLevel + 1); + + assert(numLevels >= 1); + + return numLevels; + } +} + + +/** + * Called via ctx->Driver.GenerateMipmap(). + */ +void +st_generate_mipmap(struct gl_context *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct st_context *st = st_context(ctx); + struct st_texture_object *stObj = st_texture_object(texObj); + struct pipe_resource *pt = st_get_texobj_resource(texObj); + const uint baseLevel = texObj->BaseLevel; + uint lastLevel; + uint dstLevel; + + if (!pt) + return; + + /* not sure if this ultimately actually should work, + but we're not supporting multisampled textures yet. */ + assert(pt->nr_samples < 2); + + /* find expected last mipmap level to generate*/ + lastLevel = compute_num_levels(ctx, texObj, target) - 1; + + if (lastLevel == 0) + return; + + if (pt->last_level < lastLevel) { + /* The current gallium texture doesn't have space for all the + * mipmap levels we need to generate. So allocate a new texture. + */ + struct pipe_resource *oldTex = stObj->pt; + + /* create new texture with space for more levels */ + stObj->pt = st_texture_create(st, + oldTex->target, + oldTex->format, + lastLevel, + oldTex->width0, + oldTex->height0, + oldTex->depth0, + oldTex->array_size, + oldTex->bind); + + /* The texture isn't in a "complete" state yet so set the expected + * lastLevel here, since it won't get done in st_finalize_texture(). + */ + stObj->lastLevel = lastLevel; + + /* This will copy the old texture's base image into the new texture + * which we just allocated. + */ + st_finalize_texture(ctx, st->pipe, texObj); + + /* release the old tex (will likely be freed too) */ + pipe_resource_reference(&oldTex, NULL); + pipe_sampler_view_reference(&stObj->sampler_view, NULL); + + pt = stObj->pt; + } + else { + /* Make sure that the base texture image data is present in the + * texture buffer. + */ + st_finalize_texture(ctx, st->pipe, texObj); + } + + assert(pt->last_level >= lastLevel); + + /* Try to generate the mipmap by rendering/texturing. If that fails, + * use the software fallback. + */ + if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) { + /* since the util code actually also has a fallback, should + probably make it never fail and kill this */ + fallback_generate_mipmap(ctx, target, texObj); + } + + /* Fill in the Mesa gl_texture_image fields */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const struct gl_texture_image *srcImage + = _mesa_get_tex_image(ctx, texObj, target, srcLevel); + struct gl_texture_image *dstImage; + struct st_texture_image *stImage; + uint dstWidth = u_minify(pt->width0, dstLevel); + uint dstHeight = u_minify(pt->height0, dstLevel); + uint dstDepth = u_minify(pt->depth0, dstLevel); + uint border = srcImage->Border; + + dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); + if (!dstImage) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); + return; + } + + /* Free old image data */ + if (dstImage->Data) + ctx->Driver.FreeTexImageData(ctx, dstImage); + + /* initialize new image */ + _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, + dstDepth, border, srcImage->InternalFormat, + srcImage->TexFormat); + + stImage = st_texture_image(dstImage); + stImage->level = dstLevel; + + pipe_resource_reference(&stImage->pt, pt); + } +} diff --git a/mesalib/src/mesa/tnl/t_draw.c b/mesalib/src/mesa/tnl/t_draw.c index 741f0ed3f..b1967e654 100644 --- a/mesalib/src/mesa/tnl/t_draw.c +++ b/mesalib/src/mesa/tnl/t_draw.c @@ -1,494 +1,534 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.1
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/condrender.h"
-#include "main/context.h"
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
-#include "t_context.h"
-#include "tnl.h"
-
-
-
-static GLubyte *get_space(struct gl_context *ctx, GLuint bytes)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLubyte *space = malloc(bytes);
-
- tnl->block[tnl->nr_blocks++] = space;
- return space;
-}
-
-
-static void free_space(struct gl_context *ctx)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLuint i;
- for (i = 0; i < tnl->nr_blocks; i++)
- free(tnl->block[i]);
- tnl->nr_blocks = 0;
-}
-
-
-/* Convert the incoming array to GLfloats. Understands the
- * array->Normalized flag and selects the correct conversion method.
- */
-#define CONVERT( TYPE, MACRO ) do { \
- GLuint i, j; \
- if (input->Normalized) { \
- for (i = 0; i < count; i++) { \
- const TYPE *in = (TYPE *)ptr; \
- for (j = 0; j < sz; j++) { \
- *fptr++ = MACRO(*in); \
- in++; \
- } \
- ptr += input->StrideB; \
- } \
- } else { \
- for (i = 0; i < count; i++) { \
- const TYPE *in = (TYPE *)ptr; \
- for (j = 0; j < sz; j++) { \
- *fptr++ = (GLfloat)(*in); \
- in++; \
- } \
- ptr += input->StrideB; \
- } \
- } \
-} while (0)
-
-
-/**
- * Convert array of BGRA/GLubyte[4] values to RGBA/float[4]
- * \param ptr input/ubyte array
- * \param fptr output/float array
- */
-static void
-convert_bgra_to_float(const struct gl_client_array *input,
- const GLubyte *ptr, GLfloat *fptr,
- GLuint count )
-{
- GLuint i;
- assert(input->Normalized);
- assert(input->Size == 4);
- for (i = 0; i < count; i++) {
- const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */
- *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */
- *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */
- *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */
- *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */
- ptr += input->StrideB;
- }
-}
-
-static void
-convert_half_to_float(const struct gl_client_array *input,
- const GLubyte *ptr, GLfloat *fptr,
- GLuint count, GLuint sz)
-{
- GLuint i, j;
-
- for (i = 0; i < count; i++) {
- GLhalfARB *in = (GLhalfARB *)ptr;
-
- for (j = 0; j < sz; j++) {
- *fptr++ = _mesa_half_to_float(in[j]);
- }
- ptr += input->StrideB;
- }
-}
-
-/* Adjust pointer to point at first requested element, convert to
- * floating point, populate VB->AttribPtr[].
- */
-static void _tnl_import_array( struct gl_context *ctx,
- GLuint attrib,
- GLuint count,
- const struct gl_client_array *input,
- const GLubyte *ptr )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint stride = input->StrideB;
-
- if (input->Type != GL_FLOAT) {
- const GLuint sz = input->Size;
- GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat));
- GLfloat *fptr = (GLfloat *)buf;
-
- switch (input->Type) {
- case GL_BYTE:
- CONVERT(GLbyte, BYTE_TO_FLOAT);
- break;
- case GL_UNSIGNED_BYTE:
- if (input->Format == GL_BGRA) {
- /* See GL_EXT_vertex_array_bgra */
- convert_bgra_to_float(input, ptr, fptr, count);
- }
- else {
- CONVERT(GLubyte, UBYTE_TO_FLOAT);
- }
- break;
- case GL_SHORT:
- CONVERT(GLshort, SHORT_TO_FLOAT);
- break;
- case GL_UNSIGNED_SHORT:
- CONVERT(GLushort, USHORT_TO_FLOAT);
- break;
- case GL_INT:
- CONVERT(GLint, INT_TO_FLOAT);
- break;
- case GL_UNSIGNED_INT:
- CONVERT(GLuint, UINT_TO_FLOAT);
- break;
- case GL_DOUBLE:
- CONVERT(GLdouble, (GLfloat));
- break;
- case GL_HALF_FLOAT:
- convert_half_to_float(input, ptr, fptr, count, sz);
- break;
- default:
- assert(0);
- break;
- }
-
- ptr = buf;
- stride = sz * sizeof(GLfloat);
- }
-
- VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib];
- VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr;
- VB->AttribPtr[attrib]->start = (GLfloat *)ptr;
- VB->AttribPtr[attrib]->count = count;
- VB->AttribPtr[attrib]->stride = stride;
- VB->AttribPtr[attrib]->size = input->Size;
-
- /* This should die, but so should the whole GLvector4f concept:
- */
- VB->AttribPtr[attrib]->flags = (((1<<input->Size)-1) |
- VEC_NOT_WRITEABLE |
- (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE));
-
- VB->AttribPtr[attrib]->storage = NULL;
-}
-
-#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2)
-
-
-static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx,
- const GLvector4f *input,
- GLuint count)
-{
- const GLubyte *ptr = (const GLubyte *)input->data;
- const GLuint stride = input->stride;
- GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS);
- GLboolean *bptr = space;
- GLuint i;
-
- for (i = 0; i < count; i++) {
- *bptr++ = ((GLfloat *)ptr)[0] == 1.0;
- ptr += stride;
- }
-
- return space;
-}
-
-
-static void bind_inputs( struct gl_context *ctx,
- const struct gl_client_array *inputs[],
- GLint count,
- struct gl_buffer_object **bo,
- GLuint *nr_bo )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint i;
-
- /* Map all the VBOs
- */
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- const void *ptr;
-
- if (inputs[i]->BufferObj->Name) {
- if (!inputs[i]->BufferObj->Pointer) {
- bo[*nr_bo] = inputs[i]->BufferObj;
- (*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- inputs[i]->BufferObj);
-
- assert(inputs[i]->BufferObj->Pointer);
- }
-
- ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer,
- inputs[i]->Ptr);
- }
- else
- ptr = inputs[i]->Ptr;
-
- /* Just make sure the array is floating point, otherwise convert to
- * temporary storage.
- *
- * XXX: remove the GLvector4f type at some stage and just use
- * client arrays.
- */
- _tnl_import_array(ctx, i, count, inputs[i], ptr);
- }
-
- /* We process only the vertices between min & max index:
- */
- VB->Count = count;
-
- /* These should perhaps be part of _TNL_ATTRIB_* */
- VB->BackfaceColorPtr = NULL;
- VB->BackfaceIndexPtr = NULL;
- VB->BackfaceSecondaryColorPtr = NULL;
-
- /* Clipping and drawing code still requires this to be a packed
- * array of ubytes which can be written into. TODO: Fix and
- * remove.
- */
- if (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL)
- {
- VB->EdgeFlag = _tnl_import_edgeflag( ctx,
- VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG],
- VB->Count );
- }
- else {
- /* the data previously pointed to by EdgeFlag may have been freed */
- VB->EdgeFlag = NULL;
- }
-}
-
-
-/* Translate indices to GLuints and store in VB->Elts.
- */
-static void bind_indices( struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- struct gl_buffer_object **bo,
- GLuint *nr_bo)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint i;
- void *ptr;
-
- if (!ib) {
- VB->Elts = NULL;
- return;
- }
-
- if (ib->obj->Name && !ib->obj->Pointer) {
- bo[*nr_bo] = ib->obj;
- (*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- ib->obj);
-
- assert(ib->obj->Pointer);
- }
-
- ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
- if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
- VB->Elts = (GLuint *) ptr;
- }
- else {
- GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint));
- VB->Elts = elts;
-
- if (ib->type == GL_UNSIGNED_INT) {
- const GLuint *in = (GLuint *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- else if (ib->type == GL_UNSIGNED_SHORT) {
- const GLushort *in = (GLushort *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- else {
- const GLubyte *in = (GLubyte *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- }
-}
-
-static void bind_prims( struct gl_context *ctx,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
-
- VB->Primitive = prim;
- VB->PrimitiveCount = nr_prims;
-}
-
-static void unmap_vbos( struct gl_context *ctx,
- struct gl_buffer_object **bo,
- GLuint nr_bo )
-{
- GLuint i;
- for (i = 0; i < nr_bo; i++) {
- ctx->Driver.UnmapBuffer(ctx,
- 0, /* target -- I don't see why this would be needed */
- bo[i]);
- }
-}
-
-
-void _tnl_vbo_draw_prims(struct gl_context *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- if (!index_bounds_valid)
- vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
-
- _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
-}
-
-/* This is the main entrypoint into the slimmed-down software tnl
- * module. In a regular swtnl driver, this can be plugged straight
- * into the vbo->Driver.DrawPrims() callback.
- */
-void _tnl_draw_prims( struct gl_context *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint min_index,
- GLuint max_index)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- const GLuint TEST_SPLIT = 0;
- const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES;
- GLint max_basevertex = prim->basevertex;
- GLuint i;
-
- /* Mesa core state should have been validated already */
- assert(ctx->NewState == 0x0);
-
- if (!_mesa_check_conditional_render(ctx))
- return; /* don't draw */
-
- for (i = 1; i < nr_prims; i++)
- max_basevertex = MAX2(max_basevertex, prim[i].basevertex);
-
- if (0)
- {
- printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
- for (i = 0; i < nr_prims; i++)
- printf("prim %d: %s start %d count %d\n", i,
- _mesa_lookup_enum_by_nr(prim[i].mode),
- prim[i].start,
- prim[i].count);
- }
-
- if (min_index) {
- /* We always translate away calls with min_index != 0.
- */
- vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib,
- min_index, max_index,
- _tnl_vbo_draw_prims );
- return;
- }
- else if ((GLint)max_index + max_basevertex > max) {
- /* The software TNL pipeline has a fixed amount of storage for
- * vertices and it is necessary to split incoming drawing commands
- * if they exceed that limit.
- */
- struct split_limits limits;
- limits.max_verts = max;
- limits.max_vb_size = ~0;
- limits.max_indices = ~0;
-
- /* This will split the buffers one way or another and
- * recursively call back into this function.
- */
- vbo_split_prims( ctx, arrays, prim, nr_prims, ib,
- 0, max_index + prim->basevertex,
- _tnl_vbo_draw_prims,
- &limits );
- }
- else {
- /* May need to map a vertex buffer object for every attribute plus
- * one for the index buffer.
- */
- struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1];
- GLuint nr_bo = 0;
- GLuint inst;
-
- for (i = 0; i < nr_prims;) {
- GLuint this_nr_prims;
-
- /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
- * will rebase the elements to the basevertex, and we'll only
- * emit strings of prims with the same basevertex in one draw call.
- */
- for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
- this_nr_prims++) {
- if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
- break;
- }
-
- assert(prim[i].num_instances > 0);
-
- /* Binding inputs may imply mapping some vertex buffer objects.
- * They will need to be unmapped below.
- */
- for (inst = 0; inst < prim[i].num_instances; inst++) {
-
- bind_prims(ctx, &prim[i], this_nr_prims);
- bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1,
- bo, &nr_bo);
- bind_indices(ctx, ib, bo, &nr_bo);
-
- tnl->CurInstance = inst;
- TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx);
-
- unmap_vbos(ctx, bo, nr_bo);
- free_space(ctx);
- }
-
- i += this_nr_prims;
- }
- }
-}
-
+/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/condrender.h" +#include "main/context.h" +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "t_context.h" +#include "tnl.h" + + + +static GLubyte *get_space(struct gl_context *ctx, GLuint bytes) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLubyte *space = malloc(bytes); + + tnl->block[tnl->nr_blocks++] = space; + return space; +} + + +static void free_space(struct gl_context *ctx) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + GLuint i; + for (i = 0; i < tnl->nr_blocks; i++) + free(tnl->block[i]); + tnl->nr_blocks = 0; +} + + +/* Convert the incoming array to GLfloats. Understands the + * array->Normalized flag and selects the correct conversion method. + */ +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)ptr; \ + for (j = 0; j < sz; j++) { \ + *fptr++ = MACRO(*in); \ + in++; \ + } \ + ptr += input->StrideB; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)ptr; \ + for (j = 0; j < sz; j++) { \ + *fptr++ = (GLfloat)(*in); \ + in++; \ + } \ + ptr += input->StrideB; \ + } \ + } \ +} while (0) + + +/** + * Convert array of BGRA/GLubyte[4] values to RGBA/float[4] + * \param ptr input/ubyte array + * \param fptr output/float array + */ +static void +convert_bgra_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count ) +{ + GLuint i; + assert(input->Normalized); + assert(input->Size == 4); + for (i = 0; i < count; i++) { + const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */ + *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */ + *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */ + *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */ + *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */ + ptr += input->StrideB; + } +} + +static void +convert_half_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count, GLuint sz) +{ + GLuint i, j; + + for (i = 0; i < count; i++) { + GLhalfARB *in = (GLhalfARB *)ptr; + + for (j = 0; j < sz; j++) { + *fptr++ = _mesa_half_to_float(in[j]); + } + ptr += input->StrideB; + } +} + +/** + * \brief Convert fixed-point to floating-point. + * + * In OpenGL, a fixed-point number is a "signed 2's complement 16.16 scaled + * integer" (Table 2.2 of the OpenGL ES 2.0 spec). + * + * If the buffer has the \c normalized flag set, the formula + * \code normalize(x) := (2*x + 1) / (2^16 - 1) \endcode + * is used to map the fixed-point numbers into the range [-1, 1]. + */ +static void +convert_fixed_to_float(const struct gl_client_array *input, + const GLubyte *ptr, GLfloat *fptr, + GLuint count) +{ + GLuint i, j; + const GLint size = input->Size; + + if (input->Normalized) { + for (i = 0; i < count; ++i) { + const GLfixed *in = (GLfixed *) ptr; + for (j = 0; j < size; ++j) { + *fptr++ = (GLfloat) (2 * in[j] + 1) / (GLfloat) ((1 << 16) - 1); + } + ptr += input->StrideB; + } + } else { + for (i = 0; i < count; ++i) { + const GLfixed *in = (GLfixed *) ptr; + for (j = 0; j < size; ++j) { + *fptr++ = in[j] / (GLfloat) (1 << 16); + } + ptr += input->StrideB; + } + } +} + +/* Adjust pointer to point at first requested element, convert to + * floating point, populate VB->AttribPtr[]. + */ +static void _tnl_import_array( struct gl_context *ctx, + GLuint attrib, + GLuint count, + const struct gl_client_array *input, + const GLubyte *ptr ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint stride = input->StrideB; + + if (input->Type != GL_FLOAT) { + const GLuint sz = input->Size; + GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat)); + GLfloat *fptr = (GLfloat *)buf; + + switch (input->Type) { + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + if (input->Format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + convert_bgra_to_float(input, ptr, fptr, count); + } + else { + CONVERT(GLubyte, UBYTE_TO_FLOAT); + } + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_HALF_FLOAT: + convert_half_to_float(input, ptr, fptr, count, sz); + break; + case GL_FIXED: + convert_fixed_to_float(input, ptr, fptr, count); + break; + default: + assert(0); + break; + } + + ptr = buf; + stride = sz * sizeof(GLfloat); + } + + VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib]; + VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr; + VB->AttribPtr[attrib]->start = (GLfloat *)ptr; + VB->AttribPtr[attrib]->count = count; + VB->AttribPtr[attrib]->stride = stride; + VB->AttribPtr[attrib]->size = input->Size; + + /* This should die, but so should the whole GLvector4f concept: + */ + VB->AttribPtr[attrib]->flags = (((1<<input->Size)-1) | + VEC_NOT_WRITEABLE | + (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE)); + + VB->AttribPtr[attrib]->storage = NULL; +} + +#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2) + + +static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx, + const GLvector4f *input, + GLuint count) +{ + const GLubyte *ptr = (const GLubyte *)input->data; + const GLuint stride = input->stride; + GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS); + GLboolean *bptr = space; + GLuint i; + + for (i = 0; i < count; i++) { + *bptr++ = ((GLfloat *)ptr)[0] == 1.0; + ptr += stride; + } + + return space; +} + + +static void bind_inputs( struct gl_context *ctx, + const struct gl_client_array *inputs[], + GLint count, + struct gl_buffer_object **bo, + GLuint *nr_bo ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; + + /* Map all the VBOs + */ + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + const void *ptr; + + if (inputs[i]->BufferObj->Name) { + if (!inputs[i]->BufferObj->Pointer) { + bo[*nr_bo] = inputs[i]->BufferObj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER, + GL_READ_ONLY_ARB, + inputs[i]->BufferObj); + + assert(inputs[i]->BufferObj->Pointer); + } + + ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer, + inputs[i]->Ptr); + } + else + ptr = inputs[i]->Ptr; + + /* Just make sure the array is floating point, otherwise convert to + * temporary storage. + * + * XXX: remove the GLvector4f type at some stage and just use + * client arrays. + */ + _tnl_import_array(ctx, i, count, inputs[i], ptr); + } + + /* We process only the vertices between min & max index: + */ + VB->Count = count; + + /* These should perhaps be part of _TNL_ATTRIB_* */ + VB->BackfaceColorPtr = NULL; + VB->BackfaceIndexPtr = NULL; + VB->BackfaceSecondaryColorPtr = NULL; + + /* Clipping and drawing code still requires this to be a packed + * array of ubytes which can be written into. TODO: Fix and + * remove. + */ + if (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL) + { + VB->EdgeFlag = _tnl_import_edgeflag( ctx, + VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG], + VB->Count ); + } + else { + /* the data previously pointed to by EdgeFlag may have been freed */ + VB->EdgeFlag = NULL; + } +} + + +/* Translate indices to GLuints and store in VB->Elts. + */ +static void bind_indices( struct gl_context *ctx, + const struct _mesa_index_buffer *ib, + struct gl_buffer_object **bo, + GLuint *nr_bo) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; + void *ptr; + + if (!ib) { + VB->Elts = NULL; + return; + } + + if (ib->obj->Name && !ib->obj->Pointer) { + bo[*nr_bo] = ib->obj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER, + GL_READ_ONLY_ARB, + ib->obj); + + assert(ib->obj->Pointer); + } + + ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr); + + if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) { + VB->Elts = (GLuint *) ptr; + } + else { + GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint)); + VB->Elts = elts; + + if (ib->type == GL_UNSIGNED_INT) { + const GLuint *in = (GLuint *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + else if (ib->type == GL_UNSIGNED_SHORT) { + const GLushort *in = (GLushort *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + else { + const GLubyte *in = (GLubyte *)ptr; + for (i = 0; i < ib->count; i++) + *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex; + } + } +} + +static void bind_prims( struct gl_context *ctx, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + + VB->Primitive = prim; + VB->PrimitiveCount = nr_prims; +} + +static void unmap_vbos( struct gl_context *ctx, + struct gl_buffer_object **bo, + GLuint nr_bo ) +{ + GLuint i; + for (i = 0; i < nr_bo; i++) { + ctx->Driver.UnmapBuffer(ctx, + 0, /* target -- I don't see why this would be needed */ + bo[i]); + } +} + + +void _tnl_vbo_draw_prims(struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) +{ + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +/* This is the main entrypoint into the slimmed-down software tnl + * module. In a regular swtnl driver, this can be plugged straight + * into the vbo->Driver.DrawPrims() callback. + */ +void _tnl_draw_prims( struct gl_context *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + TNLcontext *tnl = TNL_CONTEXT(ctx); + const GLuint TEST_SPLIT = 0; + const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES; + GLint max_basevertex = prim->basevertex; + GLuint i; + + /* Mesa core state should have been validated already */ + assert(ctx->NewState == 0x0); + + if (!_mesa_check_conditional_render(ctx)) + return; /* don't draw */ + + for (i = 1; i < nr_prims; i++) + max_basevertex = MAX2(max_basevertex, prim[i].basevertex); + + if (0) + { + printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + for (i = 0; i < nr_prims; i++) + printf("prim %d: %s start %d count %d\n", i, + _mesa_lookup_enum_by_nr(prim[i].mode), + prim[i].start, + prim[i].count); + } + + if (min_index) { + /* We always translate away calls with min_index != 0. + */ + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, + min_index, max_index, + _tnl_vbo_draw_prims ); + return; + } + else if ((GLint)max_index + max_basevertex > max) { + /* The software TNL pipeline has a fixed amount of storage for + * vertices and it is necessary to split incoming drawing commands + * if they exceed that limit. + */ + struct split_limits limits; + limits.max_verts = max; + limits.max_vb_size = ~0; + limits.max_indices = ~0; + + /* This will split the buffers one way or another and + * recursively call back into this function. + */ + vbo_split_prims( ctx, arrays, prim, nr_prims, ib, + 0, max_index + prim->basevertex, + _tnl_vbo_draw_prims, + &limits ); + } + else { + /* May need to map a vertex buffer object for every attribute plus + * one for the index buffer. + */ + struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1]; + GLuint nr_bo = 0; + GLuint inst; + + for (i = 0; i < nr_prims;) { + GLuint this_nr_prims; + + /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices + * will rebase the elements to the basevertex, and we'll only + * emit strings of prims with the same basevertex in one draw call. + */ + for (this_nr_prims = 1; i + this_nr_prims < nr_prims; + this_nr_prims++) { + if (prim[i].basevertex != prim[i + this_nr_prims].basevertex) + break; + } + + assert(prim[i].num_instances > 0); + + /* Binding inputs may imply mapping some vertex buffer objects. + * They will need to be unmapped below. + */ + for (inst = 0; inst < prim[i].num_instances; inst++) { + + bind_prims(ctx, &prim[i], this_nr_prims); + bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1, + bo, &nr_bo); + bind_indices(ctx, ib, bo, &nr_bo); + + tnl->CurInstance = inst; + TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx); + + unmap_vbos(ctx, bo, nr_bo); + free_space(ctx); + } + + i += this_nr_prims; + } + } +} + diff --git a/mesalib/src/mesa/vbo/vbo_exec_array.c b/mesalib/src/mesa/vbo/vbo_exec_array.c index 13b54d59c..6749541b7 100644 --- a/mesalib/src/mesa/vbo/vbo_exec_array.c +++ b/mesalib/src/mesa/vbo/vbo_exec_array.c @@ -1,1277 +1,1282 @@ -/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/api_validate.h"
-#include "main/varray.h"
-#include "main/bufferobj.h"
-#include "main/enums.h"
-#include "main/macros.h"
-
-#include "vbo_context.h"
-
-
-/**
- * Compute min and max elements by scanning the index buffer for
- * glDraw[Range]Elements() calls.
- * If primitive restart is enabled, we need to ignore restart
- * indexes when computing min/max.
- */
-void
-vbo_get_minmax_index(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index, GLuint *max_index)
-{
- const GLboolean restart = ctx->Array.PrimitiveRestart;
- const GLuint restartIndex = ctx->Array.RestartIndex;
- const GLuint count = prim->count;
- const void *indices;
- GLuint i;
-
- if (_mesa_is_bufferobj(ib->obj)) {
- const GLvoid *map =
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, ib->obj);
- indices = ADD_POINTERS(map, ib->ptr);
- } else {
- indices = ib->ptr;
- }
-
- switch (ib->type) {
- case GL_UNSIGNED_INT: {
- const GLuint *ui_indices = (const GLuint *)indices;
- GLuint max_ui = 0;
- GLuint min_ui = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] != restartIndex) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- *min_index = min_ui;
- *max_index = max_ui;
- break;
- }
- case GL_UNSIGNED_SHORT: {
- const GLushort *us_indices = (const GLushort *)indices;
- GLuint max_us = 0;
- GLuint min_us = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (us_indices[i] != restartIndex) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- *min_index = min_us;
- *max_index = max_us;
- break;
- }
- case GL_UNSIGNED_BYTE: {
- const GLubyte *ub_indices = (const GLubyte *)indices;
- GLuint max_ub = 0;
- GLuint min_ub = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] != restartIndex) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- *min_index = min_ub;
- *max_index = max_ub;
- break;
- }
- default:
- assert(0);
- break;
- }
-
- if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
- }
-}
-
-
-/**
- * Check that element 'j' of the array has reasonable data.
- * Map VBO if needed.
- * For debugging purposes; not normally used.
- */
-static void
-check_array_data(struct gl_context *ctx, struct gl_client_array *array,
- GLuint attrib, GLuint j)
-{
- if (array->Enabled) {
- const void *data = array->Ptr;
- if (_mesa_is_bufferobj(array->BufferObj)) {
- if (!array->BufferObj->Pointer) {
- /* need to map now */
- array->BufferObj->Pointer =
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, array->BufferObj);
- }
- data = ADD_POINTERS(data, array->BufferObj->Pointer);
- }
- switch (array->Type) {
- case GL_FLOAT:
- {
- GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j);
- GLint k;
- for (k = 0; k < array->Size; k++) {
- if (IS_INF_OR_NAN(f[k]) ||
- f[k] >= 1.0e20 || f[k] <= -1.0e10) {
- printf("Bad array data:\n");
- printf(" Element[%u].%u = %f\n", j, k, f[k]);
- printf(" Array %u at %p\n", attrib, (void* ) array);
- printf(" Type 0x%x, Size %d, Stride %d\n",
- array->Type, array->Size, array->Stride);
- printf(" Address/offset %p in Buffer Object %u\n",
- array->Ptr, array->BufferObj->Name);
- f[k] = 1.0; /* XXX replace the bad value! */
- }
- /*assert(!IS_INF_OR_NAN(f[k]));*/
- }
- }
- break;
- default:
- ;
- }
- }
-}
-
-
-/**
- * Unmap the buffer object referenced by given array, if mapped.
- */
-static void
-unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
-{
- if (array->Enabled &&
- _mesa_is_bufferobj(array->BufferObj) &&
- _mesa_bufferobj_mapped(array->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
- }
-}
-
-
-/**
- * Examine the array's data for NaNs, etc.
- * For debug purposes; not normally used.
- */
-static void
-check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
- const void *elements, GLint basevertex)
-{
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- const void *elemMap;
- GLint i, k;
-
- if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- elemMap = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
- elements = ADD_POINTERS(elements, elemMap);
- }
-
- for (i = 0; i < count; i++) {
- GLuint j;
-
- /* j = element[i] */
- switch (elemType) {
- case GL_UNSIGNED_BYTE:
- j = ((const GLubyte *) elements)[i];
- break;
- case GL_UNSIGNED_SHORT:
- j = ((const GLushort *) elements)[i];
- break;
- case GL_UNSIGNED_INT:
- j = ((const GLuint *) elements)[i];
- break;
- default:
- assert(0);
- }
-
- /* check element j of each enabled array */
- check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j);
- check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j);
- check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j);
- check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j);
- for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
- check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j);
- }
- for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
- check_array_data(ctx, &arrayObj->VertexAttrib[k],
- VERT_ATTRIB_GENERIC0 + k, j);
- }
- }
-
- if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
- }
-
- unmap_array_buffer(ctx, &arrayObj->Vertex);
- unmap_array_buffer(ctx, &arrayObj->Normal);
- unmap_array_buffer(ctx, &arrayObj->Color);
- for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
- unmap_array_buffer(ctx, &arrayObj->TexCoord[k]);
- }
- for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
- unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]);
- }
-}
-
-
-/**
- * Check array data, looking for NaNs, etc.
- */
-static void
-check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count)
-{
- /* TO DO */
-}
-
-
-/**
- * Print info/data for glDrawArrays(), for debugging.
- */
-static void
-print_draw_arrays(struct gl_context *ctx,
- GLenum mode, GLint start, GLsizei count)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- int i;
-
- printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n",
- mode, start, count);
-
- for (i = 0; i < 32; i++) {
- GLuint bufName = exec->array.inputs[i]->BufferObj->Name;
- GLint stride = exec->array.inputs[i]->Stride;
- printf("attr %2d: size %d stride %d enabled %d "
- "ptr %p Bufobj %u\n",
- i,
- exec->array.inputs[i]->Size,
- stride,
- /*exec->array.inputs[i]->Enabled,*/
- exec->array.legacy_array[i]->Enabled,
- exec->array.inputs[i]->Ptr,
- bufName);
-
- if (bufName) {
- struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName);
- GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY_ARB, buf);
- int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
- float *f = (float *) (p + offset);
- int *k = (int *) f;
- int i;
- int n = (count * stride) / 4;
- if (n > 32)
- n = 32;
- printf(" Data at offset %d:\n", offset);
- for (i = 0; i < n; i++) {
- printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]);
- }
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf);
- }
- }
-}
-
-
-/**
- * Bind the VBO executor to the current vertex array object prior
- * to drawing.
- *
- * Just translate the arrayobj into a sane layout.
- */
-static void
-bind_array_obj(struct gl_context *ctx)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- GLuint i;
-
- /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array
- * rather than as individual named arrays. Then this function can
- * go away.
- */
- exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex;
- exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight;
- exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal;
- exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color;
- exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor;
- exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord;
- exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index;
- if (arrayObj->PointSize.Enabled) {
- /* this aliases COLOR_INDEX */
- exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize;
- }
- exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag;
-
- for (i = 0; i < Elements(arrayObj->TexCoord); i++)
- exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i];
-
- for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
- assert(i < Elements(exec->array.generic_array));
- exec->array.generic_array[i] = &arrayObj->VertexAttrib[i];
- }
-
- exec->array.array_obj = arrayObj->Name;
-}
-
-
-/**
- * Set the vbo->exec->inputs[] pointers to point to the enabled
- * vertex arrays. This depends on the current vertex program/shader
- * being executed because of whether or not generic vertex arrays
- * alias the conventional vertex arrays.
- * For arrays that aren't enabled, we set the input[attrib] pointer
- * to point at a zero-stride current value "array".
- */
-static void
-recalculate_input_bindings(struct gl_context *ctx)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- const struct gl_client_array **inputs = &exec->array.inputs[0];
- GLbitfield const_inputs = 0x0;
- GLuint i;
-
- exec->array.program_mode = get_program_mode(ctx);
- exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled;
-
- switch (exec->array.program_mode) {
- case VP_NONE:
- /* When no vertex program is active (or the vertex program is generated
- * from fixed-function state). We put the material values into the
- * generic slots. This is the only situation where material values
- * are available as per-vertex attributes.
- */
- for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- for (i = 0; i < MAT_ATTRIB_MAX; i++) {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
-
- /* Could use just about anything, just to fill in the empty
- * slots:
- */
- for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
- break;
-
- case VP_NV:
- /* NV_vertex_program - attribute arrays alias and override
- * conventional, legacy arrays. No materials, and the generic
- * slots are vacant.
- */
- for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.generic_array[i]->Enabled)
- inputs[i] = exec->array.generic_array[i];
- else if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- /* Could use just about anything, just to fill in the empty
- * slots:
- */
- for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
- inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
- const_inputs |= 1 << i;
- }
- break;
-
- case VP_ARB:
- /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0]
- * attribute array aliases and overrides the legacy position array.
- *
- * Otherwise, legacy attributes available in the legacy slots,
- * generic attributes in the generic slots and materials are not
- * available as per-vertex attributes.
- */
- if (exec->array.generic_array[0]->Enabled)
- inputs[0] = exec->array.generic_array[0];
- else if (exec->array.legacy_array[0]->Enabled)
- inputs[0] = exec->array.legacy_array[0];
- else {
- inputs[0] = &vbo->legacy_currval[0];
- const_inputs |= 1 << 0;
- }
-
- for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
- if (exec->array.generic_array[i]->Enabled)
- inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
- else {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
-
- }
- break;
- }
-
- _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
-}
-
-
-/**
- * Examine the enabled vertex arrays to set the exec->array.inputs[] values.
- * These will point to the arrays to actually use for drawing. Some will
- * be user-provided arrays, other will be zero-stride const-valued arrays.
- * Note that this might set the _NEW_ARRAY dirty flag so state validation
- * must be done after this call.
- */
-static void
-bind_arrays(struct gl_context *ctx)
-{
- bind_array_obj(ctx);
- recalculate_input_bindings(ctx);
-}
-
-
-/**
- * Helper function called by the other DrawArrays() functions below.
- * This is where we handle primitive restart for drawing non-indexed
- * arrays. If primitive restart is enabled, it typically means
- * splitting one DrawArrays() into two.
- */
-static void
-vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
- GLsizei count, GLuint numInstances)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_prim prim[2];
-
- bind_arrays(ctx);
-
- /* Again... because we may have changed the bitmask of per-vertex varying
- * attributes. If we regenerate the fixed-function vertex program now
- * we may be able to prune down the number of vertex attributes which we
- * need in the shader.
- */
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0; /* filled in below */
- prim[0].count = 0; /* filled in below */
- prim[0].indexed = 0;
- prim[0].basevertex = 0;
- prim[0].num_instances = numInstances;
-
- /* Implement the primitive restart index */
- if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) {
- GLuint primCount = 0;
-
- if (ctx->Array.RestartIndex == start) {
- /* special case: RestartIndex at beginning */
- if (count > 1) {
- prim[0].start = start + 1;
- prim[0].count = count - 1;
- primCount = 1;
- }
- }
- else if (ctx->Array.RestartIndex == start + count - 1) {
- /* special case: RestartIndex at end */
- if (count > 1) {
- prim[0].start = start;
- prim[0].count = count - 1;
- primCount = 1;
- }
- }
- else {
- /* general case: RestartIndex in middle, split into two prims */
- prim[0].start = start;
- prim[0].count = ctx->Array.RestartIndex - start;
-
- prim[1] = prim[0];
- prim[1].start = ctx->Array.RestartIndex + 1;
- prim[1].count = count - prim[1].start;
-
- primCount = 2;
- }
-
- if (primCount > 0) {
- /* draw one or two prims */
- vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL,
- GL_TRUE, start, start + count - 1);
- }
- }
- else {
- /* no prim restart */
- prim[0].start = start;
- prim[0].count = count;
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL,
- GL_TRUE, start, start + count - 1);
- }
-}
-
-
-
-/**
- * Called from glDrawArrays when in immediate mode (not display list mode).
- */
-static void GLAPIENTRY
-vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count);
-
- if (!_mesa_validate_DrawArrays( ctx, mode, start, count ))
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDrawArrays")) {
- return;
- }
-
- if (0)
- check_draw_arrays_data(ctx, start, count);
-
- vbo_draw_arrays(ctx, mode, start, count, 1);
-
- if (0)
- print_draw_arrays(ctx, mode, start, count);
-}
-
-
-/**
- * Called from glDrawArraysInstanced when in immediate mode (not
- * display list mode).
- */
-static void GLAPIENTRY
-vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
- GLsizei numInstances)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
-
- if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) {
- return;
- }
-
- if (0)
- check_draw_arrays_data(ctx, start, count);
-
- vbo_draw_arrays(ctx, mode, start, count, numInstances);
-
- if (0)
- print_draw_arrays(ctx, mode, start, count);
-}
-
-
-/**
- * Map GL_ELEMENT_ARRAY_BUFFER and print contents.
- * For debugging.
- */
-static void
-dump_element_buffer(struct gl_context *ctx, GLenum type)
-{
- const GLvoid *map = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
- switch (type) {
- case GL_UNSIGNED_BYTE:
- {
- const GLubyte *us = (const GLubyte *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) {
- printf("%02x ", us[i]);
- if (i % 32 == 31)
- printf("\n");
- }
- printf("\n");
- }
- break;
- case GL_UNSIGNED_SHORT:
- {
- const GLushort *us = (const GLushort *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) {
- printf("%04x ", us[i]);
- if (i % 16 == 15)
- printf("\n");
- }
- printf("\n");
- }
- break;
- case GL_UNSIGNED_INT:
- {
- const GLuint *us = (const GLuint *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) {
- printf("%08x ", us[i]);
- if (i % 8 == 7)
- printf("\n");
- }
- printf("\n");
- }
- break;
- default:
- ;
- }
-
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
-}
-
-
-/**
- * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements.
- * Do the rendering for a glDrawElements or glDrawRangeElements call after
- * we've validated buffer bounds, etc.
- */
-static void
-vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
- GLboolean index_bounds_valid,
- GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices,
- GLint basevertex, GLint numInstances)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_index_buffer ib;
- struct _mesa_prim prim[1];
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) {
- return;
- }
-
- bind_arrays( ctx );
-
- /* check for dirty state again */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- ib.count = count;
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = indices;
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0;
- prim[0].count = count;
- prim[0].indexed = 1;
- prim[0].basevertex = basevertex;
- prim[0].num_instances = numInstances;
-
- /* Need to give special consideration to rendering a range of
- * indices starting somewhere above zero. Typically the
- * application is issuing multiple DrawRangeElements() to draw
- * successive primitives layed out linearly in the vertex arrays.
- * Unless the vertex arrays are all in a VBO (or locked as with
- * CVA), the OpenGL semantics imply that we need to re-read or
- * re-upload the vertex data on each draw call.
- *
- * In the case of hardware tnl, we want to avoid starting the
- * upload at zero, as it will mean every draw call uploads an
- * increasing amount of not-used vertex data. Worse - in the
- * software tnl module, all those vertices might be transformed and
- * lit but never rendered.
- *
- * If we just upload or transform the vertices in start..end,
- * however, the indices will be incorrect.
- *
- * At this level, we don't know exactly what the requirements of
- * the backend are going to be, though it will likely boil down to
- * either:
- *
- * 1) Do nothing, everything is in a VBO and is processed once
- * only.
- *
- * 2) Adjust the indices and vertex arrays so that start becomes
- * zero.
- *
- * Rather than doing anything here, I'll provide a helper function
- * for the latter case elsewhere.
- */
-
- vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib,
- index_bounds_valid, start, end );
-}
-
-
-/**
- * Called by glDrawRangeElementsBaseVertex() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
- GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices,
- GLint basevertex)
-{
- static GLuint warnCount = 0;
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx,
- "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
-
- if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count,
- type, indices, basevertex ))
- return;
-
- /* NOTE: It's important that 'end' is a reasonable value.
- * in _tnl_draw_prims(), we use end to determine how many vertices
- * to transform. If it's too large, we can unnecessarily split prims
- * or we can read/write out of memory in several different places!
- */
-
- /* Catch/fix some potential user errors */
- if (type == GL_UNSIGNED_BYTE) {
- start = MIN2(start, 0xff);
- end = MIN2(end, 0xff);
- }
- else if (type == GL_UNSIGNED_SHORT) {
- start = MIN2(start, 0xffff);
- end = MIN2(end, 0xffff);
- }
-
- if (end >= ctx->Array.ArrayObj->_MaxElement) {
- /* the max element is out of bounds of one or more enabled arrays */
- warnCount++;
-
- if (warnCount < 10) {
- _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, "
- "type 0x%x, indices=%p)\n"
- "\tend is out of bounds (max=%u) "
- "Element Buffer %u (size %d)\n"
- "\tThis should probably be fixed in the application.",
- start, end, count, type, indices,
- ctx->Array.ArrayObj->_MaxElement - 1,
- ctx->Array.ElementArrayBufferObj->Name,
- (int) ctx->Array.ElementArrayBufferObj->Size);
- }
-
- if (0)
- dump_element_buffer(ctx, type);
-
- if (0)
- _mesa_print_arrays(ctx);
-
-#ifdef DEBUG
- /* 'end' was out of bounds, but now let's check the actual array
- * indexes to see if any of them are out of bounds.
- */
- {
- GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
- ctx->Array.ElementArrayBufferObj);
- if (max >= ctx->Array.ArrayObj->_MaxElement) {
- if (warnCount < 10) {
- _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, "
- "count %d, type 0x%x, indices=%p)\n"
- "\tindex=%u is out of bounds (max=%u) "
- "Element Buffer %u (size %d)\n"
- "\tSkipping the glDrawRangeElements() call",
- start, end, count, type, indices, max,
- ctx->Array.ArrayObj->_MaxElement - 1,
- ctx->Array.ElementArrayBufferObj->Name,
- (int) ctx->Array.ElementArrayBufferObj->Size);
- }
- }
- /* XXX we could also find the min index and compare to 'start'
- * to see if start is correct. But it's more likely to get the
- * upper bound wrong.
- */
- }
-#endif
-
- /* Set 'end' to the max possible legal value */
- assert(ctx->Array.ArrayObj->_MaxElement >= 1);
- end = ctx->Array.ArrayObj->_MaxElement - 1;
- }
- else if (0) {
- printf("glDraw[Range]Elements{,BaseVertex}"
- "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, "
- "base %d\n",
- start, end, type, count,
- ctx->Array.ElementArrayBufferObj->Name,
- basevertex);
- }
-
-#if 0
- check_draw_elements_data(ctx, count, type, indices);
-#else
- (void) check_draw_elements_data;
-#endif
-
- vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end,
- count, type, indices, basevertex, 1);
-}
-
-
-/**
- * Called by glDrawRangeElements() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
- GLsizei count, GLenum type, const GLvoid *indices)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx,
- "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices);
-
- vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
- indices, 0);
-}
-
-
-/**
- * Called by glDrawElements() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices);
-
- if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 ))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, 0, 1);
-}
-
-
-/**
- * Called by glDrawElementsBaseVertex() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
-
- if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices,
- basevertex ))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, basevertex, 1);
-}
-
-
-/**
- * Called by glDrawElementsInstanced() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLsizei numInstances)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, numInstances);
-
- if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
- numInstances))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, 0, numInstances);
-}
-
-
-/**
- * Inner support for both _mesa_MultiDrawElements() and
- * _mesa_MultiDrawRangeElements().
- * This does the actual rendering after we've checked array indexes, etc.
- */
-static void
-vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount,
- const GLint *basevertex)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_index_buffer ib;
- struct _mesa_prim *prim;
- unsigned int index_type_size = 0;
- uintptr_t min_index_ptr, max_index_ptr;
- GLboolean fallback = GL_FALSE;
- int i;
-
- if (primcount == 0)
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) {
- return;
- }
-
- prim = calloc(1, primcount * sizeof(*prim));
- if (prim == NULL) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements");
- return;
- }
-
- /* Decide if we can do this all as one set of primitives sharing the
- * same index buffer, or if we have to reset the index pointer per
- * primitive.
- */
- bind_arrays( ctx );
-
- /* check for dirty state again */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- switch (type) {
- case GL_UNSIGNED_INT:
- index_type_size = 4;
- break;
- case GL_UNSIGNED_SHORT:
- index_type_size = 2;
- break;
- case GL_UNSIGNED_BYTE:
- index_type_size = 1;
- break;
- default:
- assert(0);
- }
-
- min_index_ptr = (uintptr_t)indices[0];
- max_index_ptr = 0;
- for (i = 0; i < primcount; i++) {
- min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]);
- max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] +
- index_type_size * count[i]);
- }
-
- /* Check if we can handle this thing as a bunch of index offsets from the
- * same index pointer. If we can't, then we have to fall back to doing
- * a draw_prims per primitive.
- * Check that the difference between each prim's indexes is a multiple of
- * the index/element size.
- */
- if (index_type_size != 1) {
- for (i = 0; i < primcount; i++) {
- if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) {
- fallback = GL_TRUE;
- break;
- }
- }
- }
-
- /* If the index buffer isn't in a VBO, then treating the application's
- * subranges of the index buffer as one large index buffer may lead to
- * us reading unmapped memory.
- */
- if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj))
- fallback = GL_TRUE;
-
- if (!fallback) {
- ib.count = (max_index_ptr - min_index_ptr) / index_type_size;
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = (void *)min_index_ptr;
-
- for (i = 0; i < primcount; i++) {
- prim[i].begin = (i == 0);
- prim[i].end = (i == primcount - 1);
- prim[i].weak = 0;
- prim[i].pad = 0;
- prim[i].mode = mode;
- prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size;
- prim[i].count = count[i];
- prim[i].indexed = 1;
- prim[i].num_instances = 1;
- if (basevertex != NULL)
- prim[i].basevertex = basevertex[i];
- else
- prim[i].basevertex = 0;
- }
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib,
- GL_FALSE, ~0, ~0);
- } else {
- /* render one prim at a time */
- for (i = 0; i < primcount; i++) {
- ib.count = count[i];
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = indices[i];
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0;
- prim[0].count = count[i];
- prim[0].indexed = 1;
- prim[0].num_instances = 1;
- if (basevertex != NULL)
- prim[0].basevertex = basevertex[i];
- else
- prim[0].basevertex = 0;
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib,
- GL_FALSE, ~0, ~0);
- }
- }
-
- free(prim);
-}
-
-
-static void GLAPIENTRY
-vbo_exec_MultiDrawElements(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices,
- GLsizei primcount)
-{
- GET_CURRENT_CONTEXT(ctx);
- GLint i;
-
- ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
-
- for (i = 0; i < primcount; i++) {
- if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
- 0))
- return;
- }
-
- vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
- NULL);
-}
-
-
-static void GLAPIENTRY
-vbo_exec_MultiDrawElementsBaseVertex(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices,
- GLsizei primcount,
- const GLsizei *basevertex)
-{
- GET_CURRENT_CONTEXT(ctx);
- GLint i;
-
- ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
-
- for (i = 0; i < primcount; i++) {
- if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
- basevertex[i]))
- return;
- }
-
- vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
- basevertex);
-}
-
-
-/**
- * Plug in the immediate-mode vertex array drawing commands into the
- * givven vbo_exec_context object.
- */
-void
-vbo_exec_array_init( struct vbo_exec_context *exec )
-{
- exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays;
- exec->vtxfmt.DrawElements = vbo_exec_DrawElements;
- exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements;
- exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements;
- exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex;
- exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex;
- exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex;
- exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced;
- exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced;
-}
-
-
-void
-vbo_exec_array_destroy( struct vbo_exec_context *exec )
-{
- /* nothing to do */
-}
-
-
-
-/**
- * The following functions are only used for OpenGL ES 1/2 support.
- * And some aren't even supported (yet) in ES 1/2.
- */
-
-
-void GLAPIENTRY
-_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count)
-{
- vbo_exec_DrawArrays(mode, first, count);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices)
-{
- vbo_exec_DrawElements(mode, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count,
- GLenum type, const GLvoid *indices)
-{
- vbo_exec_DrawRangeElements(mode, start, end, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
- indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount)
-{
- vbo_exec_MultiDrawElements(mode, count, type, indices, primcount);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsBaseVertex(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount,
- const GLint *basevertex)
-{
- vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices,
- primcount, basevertex);
-}
+/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/varray.h" +#include "main/bufferobj.h" +#include "main/enums.h" +#include "main/macros.h" + +#include "vbo_context.h" + + +/** + * Compute min and max elements by scanning the index buffer for + * glDraw[Range]Elements() calls. + * If primitive restart is enabled, we need to ignore restart + * indexes when computing min/max. + */ +void +vbo_get_minmax_index(struct gl_context *ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint *min_index, GLuint *max_index) +{ + const GLboolean restart = ctx->Array.PrimitiveRestart; + const GLuint restartIndex = ctx->Array.RestartIndex; + const GLuint count = prim->count; + const void *indices; + GLuint i; + + if (_mesa_is_bufferobj(ib->obj)) { + const GLvoid *map = + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, ib->obj); + indices = ADD_POINTERS(map, ib->ptr); + } else { + indices = ib->ptr; + } + + switch (ib->type) { + case GL_UNSIGNED_INT: { + const GLuint *ui_indices = (const GLuint *)indices; + GLuint max_ui = 0; + GLuint min_ui = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ui_indices[i] != restartIndex) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (ui_indices[i] > max_ui) max_ui = ui_indices[i]; + if (ui_indices[i] < min_ui) min_ui = ui_indices[i]; + } + } + *min_index = min_ui; + *max_index = max_ui; + break; + } + case GL_UNSIGNED_SHORT: { + const GLushort *us_indices = (const GLushort *)indices; + GLuint max_us = 0; + GLuint min_us = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (us_indices[i] != restartIndex) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (us_indices[i] > max_us) max_us = us_indices[i]; + if (us_indices[i] < min_us) min_us = us_indices[i]; + } + } + *min_index = min_us; + *max_index = max_us; + break; + } + case GL_UNSIGNED_BYTE: { + const GLubyte *ub_indices = (const GLubyte *)indices; + GLuint max_ub = 0; + GLuint min_ub = ~0U; + if (restart) { + for (i = 0; i < count; i++) { + if (ub_indices[i] != restartIndex) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + } + else { + for (i = 0; i < count; i++) { + if (ub_indices[i] > max_ub) max_ub = ub_indices[i]; + if (ub_indices[i] < min_ub) min_ub = ub_indices[i]; + } + } + *min_index = min_ub; + *max_index = max_ub; + break; + } + default: + assert(0); + break; + } + + if (_mesa_is_bufferobj(ib->obj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj); + } +} + + +/** + * Check that element 'j' of the array has reasonable data. + * Map VBO if needed. + * For debugging purposes; not normally used. + */ +static void +check_array_data(struct gl_context *ctx, struct gl_client_array *array, + GLuint attrib, GLuint j) +{ + if (array->Enabled) { + const void *data = array->Ptr; + if (_mesa_is_bufferobj(array->BufferObj)) { + if (!array->BufferObj->Pointer) { + /* need to map now */ + array->BufferObj->Pointer = + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY, array->BufferObj); + } + data = ADD_POINTERS(data, array->BufferObj->Pointer); + } + switch (array->Type) { + case GL_FLOAT: + { + GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j); + GLint k; + for (k = 0; k < array->Size; k++) { + if (IS_INF_OR_NAN(f[k]) || + f[k] >= 1.0e20 || f[k] <= -1.0e10) { + printf("Bad array data:\n"); + printf(" Element[%u].%u = %f\n", j, k, f[k]); + printf(" Array %u at %p\n", attrib, (void* ) array); + printf(" Type 0x%x, Size %d, Stride %d\n", + array->Type, array->Size, array->Stride); + printf(" Address/offset %p in Buffer Object %u\n", + array->Ptr, array->BufferObj->Name); + f[k] = 1.0; /* XXX replace the bad value! */ + } + /*assert(!IS_INF_OR_NAN(f[k]));*/ + } + } + break; + default: + ; + } + } +} + + +/** + * Unmap the buffer object referenced by given array, if mapped. + */ +static void +unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array) +{ + if (array->Enabled && + _mesa_is_bufferobj(array->BufferObj) && + _mesa_bufferobj_mapped(array->BufferObj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj); + } +} + + +/** + * Examine the array's data for NaNs, etc. + * For debug purposes; not normally used. + */ +static void +check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType, + const void *elements, GLint basevertex) +{ + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + const void *elemMap; + GLint i, k; + + if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { + elemMap = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, + ctx->Array.ElementArrayBufferObj); + elements = ADD_POINTERS(elements, elemMap); + } + + for (i = 0; i < count; i++) { + GLuint j; + + /* j = element[i] */ + switch (elemType) { + case GL_UNSIGNED_BYTE: + j = ((const GLubyte *) elements)[i]; + break; + case GL_UNSIGNED_SHORT: + j = ((const GLushort *) elements)[i]; + break; + case GL_UNSIGNED_INT: + j = ((const GLuint *) elements)[i]; + break; + default: + assert(0); + } + + /* check element j of each enabled array */ + check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j); + check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j); + check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j); + check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j); + for (k = 0; k < Elements(arrayObj->TexCoord); k++) { + check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j); + } + for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { + check_array_data(ctx, &arrayObj->VertexAttrib[k], + VERT_ATTRIB_GENERIC0 + k, j); + } + } + + if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + ctx->Array.ElementArrayBufferObj); + } + + unmap_array_buffer(ctx, &arrayObj->Vertex); + unmap_array_buffer(ctx, &arrayObj->Normal); + unmap_array_buffer(ctx, &arrayObj->Color); + for (k = 0; k < Elements(arrayObj->TexCoord); k++) { + unmap_array_buffer(ctx, &arrayObj->TexCoord[k]); + } + for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) { + unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]); + } +} + + +/** + * Check array data, looking for NaNs, etc. + */ +static void +check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count) +{ + /* TO DO */ +} + + +/** + * Print info/data for glDrawArrays(), for debugging. + */ +static void +print_draw_arrays(struct gl_context *ctx, + GLenum mode, GLint start, GLsizei count) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + int i; + + printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n", + mode, start, count); + + for (i = 0; i < 32; i++) { + GLuint bufName = exec->array.inputs[i]->BufferObj->Name; + GLint stride = exec->array.inputs[i]->Stride; + printf("attr %2d: size %d stride %d enabled %d " + "ptr %p Bufobj %u\n", + i, + exec->array.inputs[i]->Size, + stride, + /*exec->array.inputs[i]->Enabled,*/ + exec->array.legacy_array[i]->Enabled, + exec->array.inputs[i]->Ptr, + bufName); + + if (bufName) { + struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName); + GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY_ARB, buf); + int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr; + float *f = (float *) (p + offset); + int *k = (int *) f; + int i; + int n = (count * stride) / 4; + if (n > 32) + n = 32; + printf(" Data at offset %d:\n", offset); + for (i = 0; i < n; i++) { + printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]); + } + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf); + } + } +} + + +/** + * Bind the VBO executor to the current vertex array object prior + * to drawing. + * + * Just translate the arrayobj into a sane layout. + */ +static void +bind_array_obj(struct gl_context *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct gl_array_object *arrayObj = ctx->Array.ArrayObj; + GLuint i; + + /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array + * rather than as individual named arrays. Then this function can + * go away. + */ + exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex; + exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight; + exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal; + exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color; + exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor; + exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord; + exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index; + if (arrayObj->PointSize.Enabled) { + /* this aliases COLOR_INDEX */ + exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize; + } + exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag; + + for (i = 0; i < Elements(arrayObj->TexCoord); i++) + exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i]; + + for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) { + assert(i < Elements(exec->array.generic_array)); + exec->array.generic_array[i] = &arrayObj->VertexAttrib[i]; + } + + exec->array.array_obj = arrayObj->Name; +} + + +/** + * Set the vbo->exec->inputs[] pointers to point to the enabled + * vertex arrays. This depends on the current vertex program/shader + * being executed because of whether or not generic vertex arrays + * alias the conventional vertex arrays. + * For arrays that aren't enabled, we set the input[attrib] pointer + * to point at a zero-stride current value "array". + */ +static void +recalculate_input_bindings(struct gl_context *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + const struct gl_client_array **inputs = &exec->array.inputs[0]; + GLbitfield const_inputs = 0x0; + GLuint i; + + exec->array.program_mode = get_program_mode(ctx); + exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled; + + switch (exec->array.program_mode) { + case VP_NONE: + /* When no vertex program is active (or the vertex program is generated + * from fixed-function state). We put the material values into the + * generic slots. This is the only situation where material values + * are available as per-vertex attributes. + */ + for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + for (i = 0; i < MAT_ATTRIB_MAX; i++) { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + + /* Could use just about anything, just to fill in the empty + * slots: + */ + for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + break; + + case VP_NV: + /* NV_vertex_program - attribute arrays alias and override + * conventional, legacy arrays. No materials, and the generic + * slots are vacant. + */ + for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.generic_array[i]->Enabled) + inputs[i] = exec->array.generic_array[i]; + else if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + /* Could use just about anything, just to fill in the empty + * slots: + */ + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { + inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; + const_inputs |= 1 << i; + } + break; + + case VP_ARB: + /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0] + * attribute array aliases and overrides the legacy position array. + * + * Otherwise, legacy attributes available in the legacy slots, + * generic attributes in the generic slots and materials are not + * available as per-vertex attributes. + */ + if (exec->array.generic_array[0]->Enabled) + inputs[0] = exec->array.generic_array[0]; + else if (exec->array.legacy_array[0]->Enabled) + inputs[0] = exec->array.legacy_array[0]; + else { + inputs[0] = &vbo->legacy_currval[0]; + const_inputs |= 1 << 0; + } + + for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { + if (exec->array.legacy_array[i]->Enabled) + inputs[i] = exec->array.legacy_array[i]; + else { + inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } + } + + for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) { + if (exec->array.generic_array[i]->Enabled) + inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; + else { + inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + + } + break; + } + + _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); +} + + +/** + * Examine the enabled vertex arrays to set the exec->array.inputs[] values. + * These will point to the arrays to actually use for drawing. Some will + * be user-provided arrays, other will be zero-stride const-valued arrays. + * Note that this might set the _NEW_ARRAY dirty flag so state validation + * must be done after this call. + */ +static void +bind_arrays(struct gl_context *ctx) +{ + if (!ctx->Array.RebindArrays) { + return; + } + + bind_array_obj(ctx); + recalculate_input_bindings(ctx); + ctx->Array.RebindArrays = GL_FALSE; +} + + +/** + * Helper function called by the other DrawArrays() functions below. + * This is where we handle primitive restart for drawing non-indexed + * arrays. If primitive restart is enabled, it typically means + * splitting one DrawArrays() into two. + */ +static void +vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start, + GLsizei count, GLuint numInstances) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_prim prim[2]; + + bind_arrays(ctx); + + /* Again... because we may have changed the bitmask of per-vertex varying + * attributes. If we regenerate the fixed-function vertex program now + * we may be able to prune down the number of vertex attributes which we + * need in the shader. + */ + if (ctx->NewState) + _mesa_update_state(ctx); + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; /* filled in below */ + prim[0].count = 0; /* filled in below */ + prim[0].indexed = 0; + prim[0].basevertex = 0; + prim[0].num_instances = numInstances; + + /* Implement the primitive restart index */ + if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) { + GLuint primCount = 0; + + if (ctx->Array.RestartIndex == start) { + /* special case: RestartIndex at beginning */ + if (count > 1) { + prim[0].start = start + 1; + prim[0].count = count - 1; + primCount = 1; + } + } + else if (ctx->Array.RestartIndex == start + count - 1) { + /* special case: RestartIndex at end */ + if (count > 1) { + prim[0].start = start; + prim[0].count = count - 1; + primCount = 1; + } + } + else { + /* general case: RestartIndex in middle, split into two prims */ + prim[0].start = start; + prim[0].count = ctx->Array.RestartIndex - start; + + prim[1] = prim[0]; + prim[1].start = ctx->Array.RestartIndex + 1; + prim[1].count = count - prim[1].start; + + primCount = 2; + } + + if (primCount > 0) { + /* draw one or two prims */ + vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL, + GL_TRUE, start, start + count - 1); + } + } + else { + /* no prim restart */ + prim[0].start = start; + prim[0].count = count; + + vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL, + GL_TRUE, start, start + count - 1); + } +} + + + +/** + * Called from glDrawArrays when in immediate mode (not display list mode). + */ +static void GLAPIENTRY +vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, count); + + if (!_mesa_validate_DrawArrays( ctx, mode, start, count )) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDrawArrays")) { + return; + } + + if (0) + check_draw_arrays_data(ctx, start, count); + + vbo_draw_arrays(ctx, mode, start, count, 1); + + if (0) + print_draw_arrays(ctx, mode, start, count); +} + + +/** + * Called from glDrawArraysInstanced when in immediate mode (not + * display list mode). + */ +static void GLAPIENTRY +vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count, + GLsizei numInstances) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, count, numInstances); + + if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances)) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) { + return; + } + + if (0) + check_draw_arrays_data(ctx, start, count); + + vbo_draw_arrays(ctx, mode, start, count, numInstances); + + if (0) + print_draw_arrays(ctx, mode, start, count); +} + + +/** + * Map GL_ELEMENT_ARRAY_BUFFER and print contents. + * For debugging. + */ +static void +dump_element_buffer(struct gl_context *ctx, GLenum type) +{ + const GLvoid *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_READ_ONLY, + ctx->Array.ElementArrayBufferObj); + switch (type) { + case GL_UNSIGNED_BYTE: + { + const GLubyte *us = (const GLubyte *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) { + printf("%02x ", us[i]); + if (i % 32 == 31) + printf("\n"); + } + printf("\n"); + } + break; + case GL_UNSIGNED_SHORT: + { + const GLushort *us = (const GLushort *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) { + printf("%04x ", us[i]); + if (i % 16 == 15) + printf("\n"); + } + printf("\n"); + } + break; + case GL_UNSIGNED_INT: + { + const GLuint *us = (const GLuint *) map; + GLint i; + for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) { + printf("%08x ", us[i]); + if (i % 8 == 7) + printf("\n"); + } + printf("\n"); + } + break; + default: + ; + } + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, + ctx->Array.ElementArrayBufferObj); +} + + +/** + * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements. + * Do the rendering for a glDrawElements or glDrawRangeElements call after + * we've validated buffer bounds, etc. + */ +static void +vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode, + GLboolean index_bounds_valid, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, + GLint basevertex, GLint numInstances) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_index_buffer ib; + struct _mesa_prim prim[1]; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) { + return; + } + + bind_arrays( ctx ); + + /* check for dirty state again */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + ib.count = count; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = indices; + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count; + prim[0].indexed = 1; + prim[0].basevertex = basevertex; + prim[0].num_instances = numInstances; + + /* Need to give special consideration to rendering a range of + * indices starting somewhere above zero. Typically the + * application is issuing multiple DrawRangeElements() to draw + * successive primitives layed out linearly in the vertex arrays. + * Unless the vertex arrays are all in a VBO (or locked as with + * CVA), the OpenGL semantics imply that we need to re-read or + * re-upload the vertex data on each draw call. + * + * In the case of hardware tnl, we want to avoid starting the + * upload at zero, as it will mean every draw call uploads an + * increasing amount of not-used vertex data. Worse - in the + * software tnl module, all those vertices might be transformed and + * lit but never rendered. + * + * If we just upload or transform the vertices in start..end, + * however, the indices will be incorrect. + * + * At this level, we don't know exactly what the requirements of + * the backend are going to be, though it will likely boil down to + * either: + * + * 1) Do nothing, everything is in a VBO and is processed once + * only. + * + * 2) Adjust the indices and vertex arrays so that start becomes + * zero. + * + * Rather than doing anything here, I'll provide a helper function + * for the latter case elsewhere. + */ + + vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib, + index_bounds_valid, start, end ); +} + + +/** + * Called by glDrawRangeElementsBaseVertex() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawRangeElementsBaseVertex(GLenum mode, + GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, + GLint basevertex) +{ + static GLuint warnCount = 0; + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, + "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), start, end, count, + _mesa_lookup_enum_by_nr(type), indices, basevertex); + + if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count, + type, indices, basevertex )) + return; + + /* NOTE: It's important that 'end' is a reasonable value. + * in _tnl_draw_prims(), we use end to determine how many vertices + * to transform. If it's too large, we can unnecessarily split prims + * or we can read/write out of memory in several different places! + */ + + /* Catch/fix some potential user errors */ + if (type == GL_UNSIGNED_BYTE) { + start = MIN2(start, 0xff); + end = MIN2(end, 0xff); + } + else if (type == GL_UNSIGNED_SHORT) { + start = MIN2(start, 0xffff); + end = MIN2(end, 0xffff); + } + + if (end >= ctx->Array.ArrayObj->_MaxElement) { + /* the max element is out of bounds of one or more enabled arrays */ + warnCount++; + + if (warnCount < 10) { + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, " + "type 0x%x, indices=%p)\n" + "\tend is out of bounds (max=%u) " + "Element Buffer %u (size %d)\n" + "\tThis should probably be fixed in the application.", + start, end, count, type, indices, + ctx->Array.ArrayObj->_MaxElement - 1, + ctx->Array.ElementArrayBufferObj->Name, + (int) ctx->Array.ElementArrayBufferObj->Size); + } + + if (0) + dump_element_buffer(ctx, type); + + if (0) + _mesa_print_arrays(ctx); + +#ifdef DEBUG + /* 'end' was out of bounds, but now let's check the actual array + * indexes to see if any of them are out of bounds. + */ + { + GLuint max = _mesa_max_buffer_index(ctx, count, type, indices, + ctx->Array.ElementArrayBufferObj); + if (max >= ctx->Array.ArrayObj->_MaxElement) { + if (warnCount < 10) { + _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, " + "count %d, type 0x%x, indices=%p)\n" + "\tindex=%u is out of bounds (max=%u) " + "Element Buffer %u (size %d)\n" + "\tSkipping the glDrawRangeElements() call", + start, end, count, type, indices, max, + ctx->Array.ArrayObj->_MaxElement - 1, + ctx->Array.ElementArrayBufferObj->Name, + (int) ctx->Array.ElementArrayBufferObj->Size); + } + } + /* XXX we could also find the min index and compare to 'start' + * to see if start is correct. But it's more likely to get the + * upper bound wrong. + */ + } +#endif + + /* Set 'end' to the max possible legal value */ + assert(ctx->Array.ArrayObj->_MaxElement >= 1); + end = ctx->Array.ArrayObj->_MaxElement - 1; + } + else if (0) { + printf("glDraw[Range]Elements{,BaseVertex}" + "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, " + "base %d\n", + start, end, type, count, + ctx->Array.ElementArrayBufferObj->Name, + basevertex); + } + +#if 0 + check_draw_elements_data(ctx, count, type, indices); +#else + (void) check_draw_elements_data; +#endif + + vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end, + count, type, indices, basevertex, 1); +} + + +/** + * Called by glDrawRangeElements() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, + "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n", + _mesa_lookup_enum_by_nr(mode), start, end, count, + _mesa_lookup_enum_by_nr(type), indices); + + vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, + indices, 0); +} + + +/** + * Called by glDrawElements() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices); + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 )) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, 0, 1); +} + + +/** + * Called by glDrawElementsBaseVertex() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices, basevertex); + + if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, + basevertex )) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, basevertex, 1); +} + + +/** + * Called by glDrawElementsInstanced() in immediate mode. + */ +static void GLAPIENTRY +vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLsizei numInstances) +{ + GET_CURRENT_CONTEXT(ctx); + + if (MESA_VERBOSE & VERBOSE_DRAW) + _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n", + _mesa_lookup_enum_by_nr(mode), count, + _mesa_lookup_enum_by_nr(type), indices, numInstances); + + if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices, + numInstances)) + return; + + vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0, + count, type, indices, 0, numInstances); +} + + +/** + * Inner support for both _mesa_MultiDrawElements() and + * _mesa_MultiDrawRangeElements(). + * This does the actual rendering after we've checked array indexes, etc. + */ +static void +vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount, + const GLint *basevertex) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct _mesa_index_buffer ib; + struct _mesa_prim *prim; + unsigned int index_type_size = 0; + uintptr_t min_index_ptr, max_index_ptr; + GLboolean fallback = GL_FALSE; + int i; + + if (primcount == 0) + return; + + FLUSH_CURRENT( ctx, 0 ); + + if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) { + return; + } + + prim = calloc(1, primcount * sizeof(*prim)); + if (prim == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements"); + return; + } + + /* Decide if we can do this all as one set of primitives sharing the + * same index buffer, or if we have to reset the index pointer per + * primitive. + */ + bind_arrays( ctx ); + + /* check for dirty state again */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + switch (type) { + case GL_UNSIGNED_INT: + index_type_size = 4; + break; + case GL_UNSIGNED_SHORT: + index_type_size = 2; + break; + case GL_UNSIGNED_BYTE: + index_type_size = 1; + break; + default: + assert(0); + } + + min_index_ptr = (uintptr_t)indices[0]; + max_index_ptr = 0; + for (i = 0; i < primcount; i++) { + min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]); + max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] + + index_type_size * count[i]); + } + + /* Check if we can handle this thing as a bunch of index offsets from the + * same index pointer. If we can't, then we have to fall back to doing + * a draw_prims per primitive. + * Check that the difference between each prim's indexes is a multiple of + * the index/element size. + */ + if (index_type_size != 1) { + for (i = 0; i < primcount; i++) { + if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) { + fallback = GL_TRUE; + break; + } + } + } + + /* If the index buffer isn't in a VBO, then treating the application's + * subranges of the index buffer as one large index buffer may lead to + * us reading unmapped memory. + */ + if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) + fallback = GL_TRUE; + + if (!fallback) { + ib.count = (max_index_ptr - min_index_ptr) / index_type_size; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = (void *)min_index_ptr; + + for (i = 0; i < primcount; i++) { + prim[i].begin = (i == 0); + prim[i].end = (i == primcount - 1); + prim[i].weak = 0; + prim[i].pad = 0; + prim[i].mode = mode; + prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size; + prim[i].count = count[i]; + prim[i].indexed = 1; + prim[i].num_instances = 1; + if (basevertex != NULL) + prim[i].basevertex = basevertex[i]; + else + prim[i].basevertex = 0; + } + + vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib, + GL_FALSE, ~0, ~0); + } else { + /* render one prim at a time */ + for (i = 0; i < primcount; i++) { + ib.count = count[i]; + ib.type = type; + ib.obj = ctx->Array.ElementArrayBufferObj; + ib.ptr = indices[i]; + + prim[0].begin = 1; + prim[0].end = 1; + prim[0].weak = 0; + prim[0].pad = 0; + prim[0].mode = mode; + prim[0].start = 0; + prim[0].count = count[i]; + prim[0].indexed = 1; + prim[0].num_instances = 1; + if (basevertex != NULL) + prim[0].basevertex = basevertex[i]; + else + prim[0].basevertex = 0; + + vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib, + GL_FALSE, ~0, ~0); + } + } + + free(prim); +} + + +static void GLAPIENTRY +vbo_exec_MultiDrawElements(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, + GLsizei primcount) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); + + for (i = 0; i < primcount; i++) { + if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], + 0)) + return; + } + + vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, + NULL); +} + + +static void GLAPIENTRY +vbo_exec_MultiDrawElementsBaseVertex(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, + GLsizei primcount, + const GLsizei *basevertex) +{ + GET_CURRENT_CONTEXT(ctx); + GLint i; + + ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); + + for (i = 0; i < primcount; i++) { + if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i], + basevertex[i])) + return; + } + + vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount, + basevertex); +} + + +/** + * Plug in the immediate-mode vertex array drawing commands into the + * givven vbo_exec_context object. + */ +void +vbo_exec_array_init( struct vbo_exec_context *exec ) +{ + exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays; + exec->vtxfmt.DrawElements = vbo_exec_DrawElements; + exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements; + exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements; + exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex; + exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex; + exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex; + exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced; + exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced; +} + + +void +vbo_exec_array_destroy( struct vbo_exec_context *exec ) +{ + /* nothing to do */ +} + + + +/** + * The following functions are only used for OpenGL ES 1/2 support. + * And some aren't even supported (yet) in ES 1/2. + */ + + +void GLAPIENTRY +_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count) +{ + vbo_exec_DrawArrays(mode, first, count); +} + + +void GLAPIENTRY +_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + vbo_exec_DrawElements(mode, count, type, indices); +} + + +void GLAPIENTRY +_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex); +} + + +void GLAPIENTRY +_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, + GLenum type, const GLvoid *indices) +{ + vbo_exec_DrawRangeElements(mode, start, end, count, type, indices); +} + + +void GLAPIENTRY +_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, + GLsizei count, GLenum type, + const GLvoid *indices, GLint basevertex) +{ + vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type, + indices, basevertex); +} + + +void GLAPIENTRY +_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount) +{ + vbo_exec_MultiDrawElements(mode, count, type, indices, primcount); +} + + +void GLAPIENTRY +_mesa_MultiDrawElementsBaseVertex(GLenum mode, + const GLsizei *count, GLenum type, + const GLvoid **indices, GLsizei primcount, + const GLint *basevertex) +{ + vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices, + primcount, basevertex); +} diff --git a/mesalib/src/mesa/vbo/vbo_exec_draw.c b/mesalib/src/mesa/vbo/vbo_exec_draw.c index 048f3d170..f8be83ea8 100644 --- a/mesalib/src/mesa/vbo/vbo_exec_draw.c +++ b/mesalib/src/mesa/vbo/vbo_exec_draw.c @@ -1,420 +1,421 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.2
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/compiler.h"
-#include "main/enums.h"
-#include "main/mfeatures.h"
-#include "main/state.h"
-
-#include "vbo_context.h"
-
-
-#if FEATURE_beginend
-
-
-static void
-vbo_exec_debug_verts( struct vbo_exec_context *exec )
-{
- GLuint count = exec->vtx.vert_count;
- GLuint i;
-
- printf("%s: %u vertices %d primitives, %d vertsize\n",
- __FUNCTION__,
- count,
- exec->vtx.prim_count,
- exec->vtx.vertex_size);
-
- for (i = 0 ; i < exec->vtx.prim_count ; i++) {
- struct _mesa_prim *prim = &exec->vtx.prim[i];
- printf(" prim %d: %s%s %d..%d %s %s\n",
- i,
- _mesa_lookup_prim_by_nr(prim->mode),
- prim->weak ? " (weak)" : "",
- prim->start,
- prim->start + prim->count,
- prim->begin ? "BEGIN" : "(wrap)",
- prim->end ? "END" : "(wrap)");
- }
-}
-
-
-/*
- * NOTE: Need to have calculated primitives by this point -- do it on the fly.
- * NOTE: Old 'parity' issue is gone.
- */
-static GLuint
-vbo_copy_vertices( struct vbo_exec_context *exec )
-{
- GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count;
- GLuint ovf, i;
- GLuint sz = exec->vtx.vertex_size;
- GLfloat *dst = exec->vtx.copied.buffer;
- const GLfloat *src = (exec->vtx.buffer_map +
- exec->vtx.prim[exec->vtx.prim_count-1].start *
- exec->vtx.vertex_size);
-
-
- switch (exec->ctx->Driver.CurrentExecPrimitive) {
- case GL_POINTS:
- return 0;
- case GL_LINES:
- ovf = nr&1;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_TRIANGLES:
- ovf = nr%3;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_QUADS:
- ovf = nr&3;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_LINE_STRIP:
- if (nr == 0) {
- return 0;
- }
- else {
- memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) );
- return 1;
- }
- case GL_LINE_LOOP:
- case GL_TRIANGLE_FAN:
- case GL_POLYGON:
- if (nr == 0) {
- return 0;
- }
- else if (nr == 1) {
- memcpy( dst, src+0, sz * sizeof(GLfloat) );
- return 1;
- }
- else {
- memcpy( dst, src+0, sz * sizeof(GLfloat) );
- memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) );
- return 2;
- }
- case GL_TRIANGLE_STRIP:
- /* no parity issue, but need to make sure the tri is not drawn twice */
- if (nr & 1) {
- exec->vtx.prim[exec->vtx.prim_count-1].count--;
- }
- /* fallthrough */
- case GL_QUAD_STRIP:
- switch (nr) {
- case 0:
- ovf = 0;
- break;
- case 1:
- ovf = 1;
- break;
- default:
- ovf = 2 + (nr & 1);
- break;
- }
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case PRIM_OUTSIDE_BEGIN_END:
- return 0;
- default:
- assert(0);
- return 0;
- }
-}
-
-
-
-/* TODO: populate these as the vertex is defined:
- */
-static void
-vbo_exec_bind_arrays( struct gl_context *ctx )
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct gl_client_array *arrays = exec->vtx.arrays;
- const GLuint count = exec->vtx.vert_count;
- const GLuint *map;
- GLuint attr;
- GLbitfield varying_inputs = 0x0;
-
- /* Install the default (ie Current) attributes first, then overlay
- * all active ones.
- */
- switch (get_program_mode(exec->ctx)) {
- case VP_NONE:
- for (attr = 0; attr < 16; attr++) {
- exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
- }
- for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
- ASSERT(attr + 16 < Elements(exec->vtx.inputs));
- exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr];
- }
- map = vbo->map_vp_none;
- break;
- case VP_NV:
- case VP_ARB:
- /* The aliasing of attributes for NV vertex programs has already
- * occurred. NV vertex programs cannot access material values,
- * nor attributes greater than VERT_ATTRIB_TEX7.
- */
- for (attr = 0; attr < 16; attr++) {
- exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
- ASSERT(attr + 16 < Elements(exec->vtx.inputs));
- exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr];
- }
- map = vbo->map_vp_arb;
-
- /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
- * In that case we effectively need to route the data from
- * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
- */
- if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
- (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
- exec->vtx.inputs[16] = exec->vtx.inputs[0];
- exec->vtx.attrsz[16] = exec->vtx.attrsz[0];
- exec->vtx.attrptr[16] = exec->vtx.attrptr[0];
- exec->vtx.attrsz[0] = 0;
- }
- break;
- default:
- assert(0);
- }
-
- /* Make all active attributes (including edgeflag) available as
- * arrays of floats.
- */
- for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) {
- const GLuint src = map[attr];
-
- if (exec->vtx.attrsz[src]) {
- GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
- (GLbyte *)exec->vtx.vertex;
-
- /* override the default array set above */
- ASSERT(attr < Elements(exec->vtx.inputs));
- ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
- exec->vtx.inputs[attr] = &arrays[attr];
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- /* a real buffer obj: Ptr is an offset, not a pointer*/
- assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */
- assert(offset >= 0);
- arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset;
- }
- else {
- /* Ptr into ordinary app memory */
- arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset;
- }
- arrays[attr].Size = exec->vtx.attrsz[src];
- arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat);
- arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat);
- arrays[attr].Type = GL_FLOAT;
- arrays[attr].Format = GL_RGBA;
- arrays[attr].Enabled = 1;
- _mesa_reference_buffer_object(ctx,
- &arrays[attr].BufferObj,
- exec->vtx.bufferobj);
- arrays[attr]._MaxElement = count; /* ??? */
-
- varying_inputs |= 1 << attr;
- }
- }
-
- _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-}
-
-
-static void
-vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
-{
- GLenum target = GL_ARRAY_BUFFER_ARB;
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- struct gl_context *ctx = exec->ctx;
-
- if (ctx->Driver.FlushMappedBufferRange) {
- GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset;
- GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
-
- if (length)
- ctx->Driver.FlushMappedBufferRange(ctx, target,
- offset, length,
- exec->vtx.bufferobj);
- }
-
- exec->vtx.buffer_used += (exec->vtx.buffer_ptr -
- exec->vtx.buffer_map) * sizeof(float);
-
- assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
- assert(exec->vtx.buffer_ptr != NULL);
-
- ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
- exec->vtx.buffer_map = NULL;
- exec->vtx.buffer_ptr = NULL;
- exec->vtx.max_vert = 0;
- }
-}
-
-
-void
-vbo_exec_vtx_map( struct vbo_exec_context *exec )
-{
- struct gl_context *ctx = exec->ctx;
- const GLenum target = GL_ARRAY_BUFFER_ARB;
- const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
- const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */
- GL_MAP_INVALIDATE_RANGE_BIT |
- GL_MAP_UNSYNCHRONIZED_BIT |
- GL_MAP_FLUSH_EXPLICIT_BIT |
- MESA_MAP_NOWAIT_BIT;
- const GLenum usage = GL_STREAM_DRAW_ARB;
-
- if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
- return;
-
- if (exec->vtx.buffer_map != NULL) {
- assert(0);
- exec->vtx.buffer_map = NULL;
- exec->vtx.buffer_ptr = NULL;
- }
-
- if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
- ctx->Driver.MapBufferRange) {
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBufferRange(ctx,
- target,
- exec->vtx.buffer_used,
- (VBO_VERT_BUFFER_SIZE -
- exec->vtx.buffer_used),
- accessRange,
- exec->vtx.bufferobj);
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- }
-
- if (!exec->vtx.buffer_map) {
- exec->vtx.buffer_used = 0;
-
- ctx->Driver.BufferData(ctx, target,
- VBO_VERT_BUFFER_SIZE,
- NULL, usage, exec->vtx.bufferobj);
-
-
- if (ctx->Driver.MapBufferRange)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
- 0, VBO_VERT_BUFFER_SIZE,
- accessRange,
- exec->vtx.bufferobj);
- if (!exec->vtx.buffer_map)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
- assert(exec->vtx.buffer_map);
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- }
-
- if (0)
- printf("map %d..\n", exec->vtx.buffer_used);
-}
-
-
-
-/**
- * Execute the buffer and save copied verts.
- */
-void
-vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
-{
- if (0)
- vbo_exec_debug_verts( exec );
-
- if (exec->vtx.prim_count &&
- exec->vtx.vert_count) {
-
- exec->vtx.copied.nr = vbo_copy_vertices( exec );
-
- if (exec->vtx.copied.nr != exec->vtx.vert_count) {
- struct gl_context *ctx = exec->ctx;
-
- /* Before the update_state() as this may raise _NEW_ARRAY
- * from _mesa_set_varying_vp_inputs().
- */
- vbo_exec_bind_arrays( ctx );
-
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- vbo_exec_vtx_unmap( exec );
- }
-
- if (0)
- printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count,
- exec->vtx.vert_count);
-
- vbo_context(ctx)->draw_prims( ctx,
- exec->vtx.inputs,
- exec->vtx.prim,
- exec->vtx.prim_count,
- NULL,
- GL_TRUE,
- 0,
- exec->vtx.vert_count - 1);
-
- /* If using a real VBO, get new storage -- unless asked not to.
- */
- if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) {
- vbo_exec_vtx_map( exec );
- }
- }
- }
-
- /* May have to unmap explicitly if we didn't draw:
- */
- if (unmap &&
- _mesa_is_bufferobj(exec->vtx.bufferobj) &&
- exec->vtx.buffer_map) {
- vbo_exec_vtx_unmap( exec );
- }
-
-
- if (unmap || exec->vtx.vertex_size == 0)
- exec->vtx.max_vert = 0;
- else
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
- (exec->vtx.vertex_size * sizeof(GLfloat)));
-
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- exec->vtx.prim_count = 0;
- exec->vtx.vert_count = 0;
-}
-
-
-#endif /* FEATURE_beginend */
+/* + * Mesa 3-D graphics library + * Version: 7.2 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/bufferobj.h" +#include "main/compiler.h" +#include "main/enums.h" +#include "main/mfeatures.h" +#include "main/state.h" + +#include "vbo_context.h" + + +#if FEATURE_beginend + + +static void +vbo_exec_debug_verts( struct vbo_exec_context *exec ) +{ + GLuint count = exec->vtx.vert_count; + GLuint i; + + printf("%s: %u vertices %d primitives, %d vertsize\n", + __FUNCTION__, + count, + exec->vtx.prim_count, + exec->vtx.vertex_size); + + for (i = 0 ; i < exec->vtx.prim_count ; i++) { + struct _mesa_prim *prim = &exec->vtx.prim[i]; + printf(" prim %d: %s%s %d..%d %s %s\n", + i, + _mesa_lookup_prim_by_nr(prim->mode), + prim->weak ? " (weak)" : "", + prim->start, + prim->start + prim->count, + prim->begin ? "BEGIN" : "(wrap)", + prim->end ? "END" : "(wrap)"); + } +} + + +/* + * NOTE: Need to have calculated primitives by this point -- do it on the fly. + * NOTE: Old 'parity' issue is gone. + */ +static GLuint +vbo_copy_vertices( struct vbo_exec_context *exec ) +{ + GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count; + GLuint ovf, i; + GLuint sz = exec->vtx.vertex_size; + GLfloat *dst = exec->vtx.copied.buffer; + const GLfloat *src = (exec->vtx.buffer_map + + exec->vtx.prim[exec->vtx.prim_count-1].start * + exec->vtx.vertex_size); + + + switch (exec->ctx->Driver.CurrentExecPrimitive) { + case GL_POINTS: + return 0; + case GL_LINES: + ovf = nr&1; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_TRIANGLES: + ovf = nr%3; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_QUADS: + ovf = nr&3; + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case GL_LINE_STRIP: + if (nr == 0) { + return 0; + } + else { + memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 1; + } + case GL_LINE_LOOP: + case GL_TRIANGLE_FAN: + case GL_POLYGON: + if (nr == 0) { + return 0; + } + else if (nr == 1) { + memcpy( dst, src+0, sz * sizeof(GLfloat) ); + return 1; + } + else { + memcpy( dst, src+0, sz * sizeof(GLfloat) ); + memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) ); + return 2; + } + case GL_TRIANGLE_STRIP: + /* no parity issue, but need to make sure the tri is not drawn twice */ + if (nr & 1) { + exec->vtx.prim[exec->vtx.prim_count-1].count--; + } + /* fallthrough */ + case GL_QUAD_STRIP: + switch (nr) { + case 0: + ovf = 0; + break; + case 1: + ovf = 1; + break; + default: + ovf = 2 + (nr & 1); + break; + } + for (i = 0 ; i < ovf ; i++) + memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) ); + return i; + case PRIM_OUTSIDE_BEGIN_END: + return 0; + default: + assert(0); + return 0; + } +} + + + +/* TODO: populate these as the vertex is defined: + */ +static void +vbo_exec_bind_arrays( struct gl_context *ctx ) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_exec_context *exec = &vbo->exec; + struct gl_client_array *arrays = exec->vtx.arrays; + const GLuint count = exec->vtx.vert_count; + const GLuint *map; + GLuint attr; + GLbitfield varying_inputs = 0x0; + + /* Install the default (ie Current) attributes first, then overlay + * all active ones. + */ + switch (get_program_mode(exec->ctx)) { + case VP_NONE: + for (attr = 0; attr < 16; attr++) { + exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; + } + for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { + ASSERT(attr + 16 < Elements(exec->vtx.inputs)); + exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr]; + } + map = vbo->map_vp_none; + break; + case VP_NV: + case VP_ARB: + /* The aliasing of attributes for NV vertex programs has already + * occurred. NV vertex programs cannot access material values, + * nor attributes greater than VERT_ATTRIB_TEX7. + */ + for (attr = 0; attr < 16; attr++) { + exec->vtx.inputs[attr] = &vbo->legacy_currval[attr]; + ASSERT(attr + 16 < Elements(exec->vtx.inputs)); + exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr]; + } + map = vbo->map_vp_arb; + + /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. + * In that case we effectively need to route the data from + * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. + */ + if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && + (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { + exec->vtx.inputs[16] = exec->vtx.inputs[0]; + exec->vtx.attrsz[16] = exec->vtx.attrsz[0]; + exec->vtx.attrptr[16] = exec->vtx.attrptr[0]; + exec->vtx.attrsz[0] = 0; + } + break; + default: + assert(0); + } + + /* Make all active attributes (including edgeflag) available as + * arrays of floats. + */ + for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) { + const GLuint src = map[attr]; + + if (exec->vtx.attrsz[src]) { + GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] - + (GLbyte *)exec->vtx.vertex; + + /* override the default array set above */ + ASSERT(attr < Elements(exec->vtx.inputs)); + ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */ + exec->vtx.inputs[attr] = &arrays[attr]; + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + /* a real buffer obj: Ptr is an offset, not a pointer*/ + assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */ + assert(offset >= 0); + arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset; + } + else { + /* Ptr into ordinary app memory */ + arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset; + } + arrays[attr].Size = exec->vtx.attrsz[src]; + arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Format = GL_RGBA; + arrays[attr].Enabled = 1; + _mesa_reference_buffer_object(ctx, + &arrays[attr].BufferObj, + exec->vtx.bufferobj); + arrays[attr]._MaxElement = count; /* ??? */ + + varying_inputs |= 1 << attr; + ctx->NewState |= _NEW_ARRAY; + } + } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); +} + + +static void +vbo_exec_vtx_unmap( struct vbo_exec_context *exec ) +{ + GLenum target = GL_ARRAY_BUFFER_ARB; + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + struct gl_context *ctx = exec->ctx; + + if (ctx->Driver.FlushMappedBufferRange) { + GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset; + GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float); + + if (length) + ctx->Driver.FlushMappedBufferRange(ctx, target, + offset, length, + exec->vtx.bufferobj); + } + + exec->vtx.buffer_used += (exec->vtx.buffer_ptr - + exec->vtx.buffer_map) * sizeof(float); + + assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE); + assert(exec->vtx.buffer_ptr != NULL); + + ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); + exec->vtx.buffer_map = NULL; + exec->vtx.buffer_ptr = NULL; + exec->vtx.max_vert = 0; + } +} + + +void +vbo_exec_vtx_map( struct vbo_exec_context *exec ) +{ + struct gl_context *ctx = exec->ctx; + const GLenum target = GL_ARRAY_BUFFER_ARB; + const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */ + const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */ + GL_MAP_INVALIDATE_RANGE_BIT | + GL_MAP_UNSYNCHRONIZED_BIT | + GL_MAP_FLUSH_EXPLICIT_BIT | + MESA_MAP_NOWAIT_BIT; + const GLenum usage = GL_STREAM_DRAW_ARB; + + if (!_mesa_is_bufferobj(exec->vtx.bufferobj)) + return; + + if (exec->vtx.buffer_map != NULL) { + assert(0); + exec->vtx.buffer_map = NULL; + exec->vtx.buffer_ptr = NULL; + } + + if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 && + ctx->Driver.MapBufferRange) { + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, + target, + exec->vtx.buffer_used, + (VBO_VERT_BUFFER_SIZE - + exec->vtx.buffer_used), + accessRange, + exec->vtx.bufferobj); + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + } + + if (!exec->vtx.buffer_map) { + exec->vtx.buffer_used = 0; + + ctx->Driver.BufferData(ctx, target, + VBO_VERT_BUFFER_SIZE, + NULL, usage, exec->vtx.bufferobj); + + + if (ctx->Driver.MapBufferRange) + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBufferRange(ctx, target, + 0, VBO_VERT_BUFFER_SIZE, + accessRange, + exec->vtx.bufferobj); + if (!exec->vtx.buffer_map) + exec->vtx.buffer_map = + (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj); + assert(exec->vtx.buffer_map); + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + } + + if (0) + printf("map %d..\n", exec->vtx.buffer_used); +} + + + +/** + * Execute the buffer and save copied verts. + */ +void +vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap ) +{ + if (0) + vbo_exec_debug_verts( exec ); + + if (exec->vtx.prim_count && + exec->vtx.vert_count) { + + exec->vtx.copied.nr = vbo_copy_vertices( exec ); + + if (exec->vtx.copied.nr != exec->vtx.vert_count) { + struct gl_context *ctx = exec->ctx; + + /* Before the update_state() as this may raise _NEW_ARRAY + * from _mesa_set_varying_vp_inputs(). + */ + vbo_exec_bind_arrays( ctx ); + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (_mesa_is_bufferobj(exec->vtx.bufferobj)) { + vbo_exec_vtx_unmap( exec ); + } + + if (0) + printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count, + exec->vtx.vert_count); + + vbo_context(ctx)->draw_prims( ctx, + exec->vtx.inputs, + exec->vtx.prim, + exec->vtx.prim_count, + NULL, + GL_TRUE, + 0, + exec->vtx.vert_count - 1); + + /* If using a real VBO, get new storage -- unless asked not to. + */ + if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) { + vbo_exec_vtx_map( exec ); + } + } + } + + /* May have to unmap explicitly if we didn't draw: + */ + if (unmap && + _mesa_is_bufferobj(exec->vtx.bufferobj) && + exec->vtx.buffer_map) { + vbo_exec_vtx_unmap( exec ); + } + + + if (unmap || exec->vtx.vertex_size == 0) + exec->vtx.max_vert = 0; + else + exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / + (exec->vtx.vertex_size * sizeof(GLfloat))); + + exec->vtx.buffer_ptr = exec->vtx.buffer_map; + exec->vtx.prim_count = 0; + exec->vtx.vert_count = 0; +} + + +#endif /* FEATURE_beginend */ diff --git a/mesalib/src/mesa/vbo/vbo_save_draw.c b/mesalib/src/mesa/vbo/vbo_save_draw.c index 6d8dbdb86..634a6d3f8 100644 --- a/mesalib/src/mesa/vbo/vbo_save_draw.c +++ b/mesalib/src/mesa/vbo/vbo_save_draw.c @@ -1,304 +1,305 @@ -/*
- * Mesa 3-D graphics library
- * Version: 7.2
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* Author:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/imports.h"
-#include "main/mfeatures.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/light.h"
-#include "main/state.h"
-
-#include "vbo_context.h"
-
-
-#if FEATURE_dlist
-
-
-/**
- * After playback, copy everything but the position from the
- * last vertex to the saved state
- */
-static void
-_playback_copy_to_current(struct gl_context *ctx,
- const struct vbo_save_vertex_list *node)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- GLfloat vertex[VBO_ATTRIB_MAX * 4];
- GLfloat *data;
- GLuint i, offset;
-
- if (node->current_size == 0)
- return;
-
- if (node->current_data) {
- data = node->current_data;
- }
- else {
- data = vertex;
-
- if (node->count)
- offset = (node->buffer_offset +
- (node->count-1) * node->vertex_size * sizeof(GLfloat));
- else
- offset = node->buffer_offset;
-
- ctx->Driver.GetBufferSubData( ctx, 0, offset,
- node->vertex_size * sizeof(GLfloat),
- data, node->vertex_store->bufferobj );
-
- data += node->attrsz[0]; /* skip vertex position */
- }
-
- for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
- if (node->attrsz[i]) {
- GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
- GLfloat tmp[4];
-
- COPY_CLEAN_4V(tmp,
- node->attrsz[i],
- data);
-
- if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) {
- memcpy(current, tmp, 4 * sizeof(GLfloat));
-
- vbo->currval[i].Size = node->attrsz[i];
-
- if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
- i <= VBO_ATTRIB_LAST_MATERIAL)
- ctx->NewState |= _NEW_LIGHT;
-
- ctx->NewState |= _NEW_CURRENT_ATTRIB;
- }
-
- data += node->attrsz[i];
- }
- }
-
- /* Colormaterial -- this kindof sucks.
- */
- if (ctx->Light.ColorMaterialEnabled) {
- _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]);
- }
-
- /* CurrentExecPrimitive
- */
- if (node->prim_count) {
- const struct _mesa_prim *prim = &node->prim[node->prim_count - 1];
- if (prim->end)
- ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
- else
- ctx->Driver.CurrentExecPrimitive = prim->mode;
- }
-}
-
-
-
-/**
- * Treat the vertex storage as a VBO, define vertex arrays pointing
- * into it:
- */
-static void vbo_bind_vertex_list(struct gl_context *ctx,
- const struct vbo_save_vertex_list *node)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_save_context *save = &vbo->save;
- struct gl_client_array *arrays = save->arrays;
- GLuint buffer_offset = node->buffer_offset;
- const GLuint *map;
- GLuint attr;
- GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */
- GLbitfield varying_inputs = 0x0;
-
- memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
-
- /* Install the default (ie Current) attributes first, then overlay
- * all active ones.
- */
- switch (get_program_mode(ctx)) {
- case VP_NONE:
- for (attr = 0; attr < 16; attr++) {
- save->inputs[attr] = &vbo->legacy_currval[attr];
- }
- for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
- save->inputs[attr + 16] = &vbo->mat_currval[attr];
- }
- map = vbo->map_vp_none;
- break;
- case VP_NV:
- case VP_ARB:
- /* The aliasing of attributes for NV vertex programs has already
- * occurred. NV vertex programs cannot access material values,
- * nor attributes greater than VERT_ATTRIB_TEX7.
- */
- for (attr = 0; attr < 16; attr++) {
- save->inputs[attr] = &vbo->legacy_currval[attr];
- save->inputs[attr + 16] = &vbo->generic_currval[attr];
- }
- map = vbo->map_vp_arb;
-
- /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
- * In that case we effectively need to route the data from
- * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
- */
- if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
- (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
- save->inputs[16] = save->inputs[0];
- node_attrsz[16] = node_attrsz[0];
- node_attrsz[0] = 0;
- }
- break;
- default:
- assert(0);
- }
-
- for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
- const GLuint src = map[attr];
-
- if (node_attrsz[src]) {
- /* override the default array set above */
- save->inputs[attr] = &arrays[attr];
-
- arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset;
- arrays[attr].Size = node->attrsz[src];
- arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat);
- arrays[attr].Stride = node->vertex_size * sizeof(GLfloat);
- arrays[attr].Type = GL_FLOAT;
- arrays[attr].Format = GL_RGBA;
- arrays[attr].Enabled = 1;
- _mesa_reference_buffer_object(ctx,
- &arrays[attr].BufferObj,
- node->vertex_store->bufferobj);
- arrays[attr]._MaxElement = node->count; /* ??? */
-
- assert(arrays[attr].BufferObj->Name);
-
- buffer_offset += node->attrsz[src] * sizeof(GLfloat);
- varying_inputs |= 1<<attr;
- }
- }
-
- _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-}
-
-
-static void
-vbo_save_loopback_vertex_list(struct gl_context *ctx,
- const struct vbo_save_vertex_list *list)
-{
- const char *buffer = ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, /* ? */
- list->vertex_store->bufferobj);
-
- vbo_loopback_vertex_list(ctx,
- (const GLfloat *)(buffer + list->buffer_offset),
- list->attrsz,
- list->prim,
- list->prim_count,
- list->wrap_count,
- list->vertex_size);
-
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- list->vertex_store->bufferobj);
-}
-
-
-/**
- * Execute the buffer and save copied verts.
- * This is called from the display list code when executing
- * a drawing command.
- */
-void
-vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
-{
- const struct vbo_save_vertex_list *node =
- (const struct vbo_save_vertex_list *) data;
- struct vbo_save_context *save = &vbo_context(ctx)->save;
-
- FLUSH_CURRENT(ctx, 0);
-
- if (node->prim_count > 0 && node->count > 0) {
-
- if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END &&
- node->prim[0].begin) {
-
- /* Degenerate case: list is called inside begin/end pair and
- * includes operations such as glBegin or glDrawArrays.
- */
- if (0)
- printf("displaylist recursive begin");
-
- vbo_save_loopback_vertex_list( ctx, node );
- return;
- }
- else if (save->replay_flags) {
- /* Various degnerate cases: translate into immediate mode
- * calls rather than trying to execute in place.
- */
- vbo_save_loopback_vertex_list( ctx, node );
- return;
- }
-
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- /* XXX also need to check if shader enabled, but invalid */
- if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) ||
- (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glBegin (invalid vertex/fragment program)");
- return;
- }
-
- vbo_bind_vertex_list( ctx, node );
-
- /* Again...
- */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- vbo_context(ctx)->draw_prims(ctx,
- save->inputs,
- node->prim,
- node->prim_count,
- NULL,
- GL_TRUE,
- 0, /* Node is a VBO, so this is ok */
- node->count - 1);
- }
-
- /* Copy to current?
- */
- _playback_copy_to_current( ctx, node );
-}
-
-
-#endif /* FEATURE_dlist */
+/* + * Mesa 3-D graphics library + * Version: 7.2 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/imports.h" +#include "main/mfeatures.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/light.h" +#include "main/state.h" + +#include "vbo_context.h" + + +#if FEATURE_dlist + + +/** + * After playback, copy everything but the position from the + * last vertex to the saved state + */ +static void +_playback_copy_to_current(struct gl_context *ctx, + const struct vbo_save_vertex_list *node) +{ + struct vbo_context *vbo = vbo_context(ctx); + GLfloat vertex[VBO_ATTRIB_MAX * 4]; + GLfloat *data; + GLuint i, offset; + + if (node->current_size == 0) + return; + + if (node->current_data) { + data = node->current_data; + } + else { + data = vertex; + + if (node->count) + offset = (node->buffer_offset + + (node->count-1) * node->vertex_size * sizeof(GLfloat)); + else + offset = node->buffer_offset; + + ctx->Driver.GetBufferSubData( ctx, 0, offset, + node->vertex_size * sizeof(GLfloat), + data, node->vertex_store->bufferobj ); + + data += node->attrsz[0]; /* skip vertex position */ + } + + for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { + if (node->attrsz[i]) { + GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; + + COPY_CLEAN_4V(tmp, + node->attrsz[i], + data); + + if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) { + memcpy(current, tmp, 4 * sizeof(GLfloat)); + + vbo->currval[i].Size = node->attrsz[i]; + + if (i >= VBO_ATTRIB_FIRST_MATERIAL && + i <= VBO_ATTRIB_LAST_MATERIAL) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } + + data += node->attrsz[i]; + } + } + + /* Colormaterial -- this kindof sucks. + */ + if (ctx->Light.ColorMaterialEnabled) { + _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]); + } + + /* CurrentExecPrimitive + */ + if (node->prim_count) { + const struct _mesa_prim *prim = &node->prim[node->prim_count - 1]; + if (prim->end) + ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END; + else + ctx->Driver.CurrentExecPrimitive = prim->mode; + } +} + + + +/** + * Treat the vertex storage as a VBO, define vertex arrays pointing + * into it: + */ +static void vbo_bind_vertex_list(struct gl_context *ctx, + const struct vbo_save_vertex_list *node) +{ + struct vbo_context *vbo = vbo_context(ctx); + struct vbo_save_context *save = &vbo->save; + struct gl_client_array *arrays = save->arrays; + GLuint buffer_offset = node->buffer_offset; + const GLuint *map; + GLuint attr; + GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */ + GLbitfield varying_inputs = 0x0; + + memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz)); + + /* Install the default (ie Current) attributes first, then overlay + * all active ones. + */ + switch (get_program_mode(ctx)) { + case VP_NONE: + for (attr = 0; attr < 16; attr++) { + save->inputs[attr] = &vbo->legacy_currval[attr]; + } + for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) { + save->inputs[attr + 16] = &vbo->mat_currval[attr]; + } + map = vbo->map_vp_none; + break; + case VP_NV: + case VP_ARB: + /* The aliasing of attributes for NV vertex programs has already + * occurred. NV vertex programs cannot access material values, + * nor attributes greater than VERT_ATTRIB_TEX7. + */ + for (attr = 0; attr < 16; attr++) { + save->inputs[attr] = &vbo->legacy_currval[attr]; + save->inputs[attr + 16] = &vbo->generic_currval[attr]; + } + map = vbo->map_vp_arb; + + /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read. + * In that case we effectively need to route the data from + * glVertexAttrib(0, val) calls to feed into the GENERIC0 input. + */ + if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 && + (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) { + save->inputs[16] = save->inputs[0]; + node_attrsz[16] = node_attrsz[0]; + node_attrsz[0] = 0; + } + break; + default: + assert(0); + } + + for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { + const GLuint src = map[attr]; + + if (node_attrsz[src]) { + /* override the default array set above */ + save->inputs[attr] = &arrays[attr]; + + arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset; + arrays[attr].Size = node->attrsz[src]; + arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat); + arrays[attr].Stride = node->vertex_size * sizeof(GLfloat); + arrays[attr].Type = GL_FLOAT; + arrays[attr].Format = GL_RGBA; + arrays[attr].Enabled = 1; + _mesa_reference_buffer_object(ctx, + &arrays[attr].BufferObj, + node->vertex_store->bufferobj); + arrays[attr]._MaxElement = node->count; /* ??? */ + + assert(arrays[attr].BufferObj->Name); + + buffer_offset += node->attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<<attr; + ctx->NewState |= _NEW_ARRAY; + } + } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); +} + + +static void +vbo_save_loopback_vertex_list(struct gl_context *ctx, + const struct vbo_save_vertex_list *list) +{ + const char *buffer = ctx->Driver.MapBuffer(ctx, + GL_ARRAY_BUFFER_ARB, + GL_READ_ONLY, /* ? */ + list->vertex_store->bufferobj); + + vbo_loopback_vertex_list(ctx, + (const GLfloat *)(buffer + list->buffer_offset), + list->attrsz, + list->prim, + list->prim_count, + list->wrap_count, + list->vertex_size); + + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, + list->vertex_store->bufferobj); +} + + +/** + * Execute the buffer and save copied verts. + * This is called from the display list code when executing + * a drawing command. + */ +void +vbo_save_playback_vertex_list(struct gl_context *ctx, void *data) +{ + const struct vbo_save_vertex_list *node = + (const struct vbo_save_vertex_list *) data; + struct vbo_save_context *save = &vbo_context(ctx)->save; + + FLUSH_CURRENT(ctx, 0); + + if (node->prim_count > 0 && node->count > 0) { + + if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END && + node->prim[0].begin) { + + /* Degenerate case: list is called inside begin/end pair and + * includes operations such as glBegin or glDrawArrays. + */ + if (0) + printf("displaylist recursive begin"); + + vbo_save_loopback_vertex_list( ctx, node ); + return; + } + else if (save->replay_flags) { + /* Various degnerate cases: translate into immediate mode + * calls rather than trying to execute in place. + */ + vbo_save_loopback_vertex_list( ctx, node ); + return; + } + + if (ctx->NewState) + _mesa_update_state( ctx ); + + /* XXX also need to check if shader enabled, but invalid */ + if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) || + (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glBegin (invalid vertex/fragment program)"); + return; + } + + vbo_bind_vertex_list( ctx, node ); + + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + + vbo_context(ctx)->draw_prims(ctx, + save->inputs, + node->prim, + node->prim_count, + NULL, + GL_TRUE, + 0, /* Node is a VBO, so this is ok */ + node->count - 1); + } + + /* Copy to current? + */ + _playback_copy_to_current( ctx, node ); +} + + +#endif /* FEATURE_dlist */ diff --git a/pixman/Makefile.am b/pixman/Makefile.am index 63b08c1fb..062c58a89 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = pixman test +SUBDIRS = pixman test demos pkgconfigdir=$(libdir)/pkgconfig pkgconfig_DATA=pixman-1.pc diff --git a/pixman/configure.ac b/pixman/configure.ac index ab2ecde1b..5242799bb 100644 --- a/pixman/configure.ac +++ b/pixman/configure.ac @@ -794,6 +794,7 @@ AC_OUTPUT([pixman-1.pc Makefile pixman/Makefile pixman/pixman-version.h + demos/Makefile test/Makefile]) m4_if(m4_eval(pixman_minor % 2), [1], [ diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am new file mode 100644 index 000000000..2dcdfd350 --- /dev/null +++ b/pixman/demos/Makefile.am @@ -0,0 +1,34 @@ +if HAVE_GTK + +AM_CFLAGS = @OPENMP_CFLAGS@ +AM_LDFLAGS = @OPENMP_CFLAGS@ + +LDADD = $(GTK_LIBS) $(top_builddir)/pixman/libpixman-1.la -lm +INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS) + +GTK_UTILS = gtk-utils.c gtk-utils.h + +DEMOS = \ + clip-test \ + clip-in \ + composite-test \ + gradient-test \ + radial-test \ + alpha-test \ + screen-test \ + convolution-test \ + trap-test + +gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) +alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) +composite_test_SOURCES = composite-test.c $(GTK_UTILS) +clip_test_SOURCES = clip-test.c $(GTK_UTILS) +clip_in_SOURCES = clip-in.c $(GTK_UTILS) +trap_test_SOURCES = trap-test.c $(GTK_UTILS) +screen_test_SOURCES = screen-test.c $(GTK_UTILS) +convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) +radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS) + +noinst_PROGRAMS = $(DEMOS) + +endif diff --git a/pixman/test/alpha-test.c b/pixman/demos/alpha-test.c index 92c208142..92c208142 100644 --- a/pixman/test/alpha-test.c +++ b/pixman/demos/alpha-test.c diff --git a/pixman/test/clip-in.c b/pixman/demos/clip-in.c index 51579811f..51579811f 100644 --- a/pixman/test/clip-in.c +++ b/pixman/demos/clip-in.c diff --git a/pixman/test/clip-test.c b/pixman/demos/clip-test.c index aa0df4482..aa0df4482 100644 --- a/pixman/test/clip-test.c +++ b/pixman/demos/clip-test.c diff --git a/pixman/test/composite-test.c b/pixman/demos/composite-test.c index 79d5d5eac..79d5d5eac 100644 --- a/pixman/test/composite-test.c +++ b/pixman/demos/composite-test.c diff --git a/pixman/test/convolution-test.c b/pixman/demos/convolution-test.c index da284af7b..da284af7b 100644 --- a/pixman/test/convolution-test.c +++ b/pixman/demos/convolution-test.c diff --git a/pixman/test/gradient-test.c b/pixman/demos/gradient-test.c index fc84844b0..fc84844b0 100644 --- a/pixman/test/gradient-test.c +++ b/pixman/demos/gradient-test.c diff --git a/pixman/test/gtk-utils.c b/pixman/demos/gtk-utils.c index f45cdc912..f45cdc912 100644 --- a/pixman/test/gtk-utils.c +++ b/pixman/demos/gtk-utils.c diff --git a/pixman/test/gtk-utils.h b/pixman/demos/gtk-utils.h index 2cb13bcf0..2cb13bcf0 100644 --- a/pixman/test/gtk-utils.h +++ b/pixman/demos/gtk-utils.h diff --git a/pixman/test/radial-test.c b/pixman/demos/radial-test.c index 5d716c339..35e90d786 100644 --- a/pixman/test/radial-test.c +++ b/pixman/demos/radial-test.c @@ -1,4 +1,4 @@ -#include "utils.h" +#include "../test/utils.h" #include "gtk-utils.h" #define NUM_GRADIENTS 7 diff --git a/pixman/test/screen-test.c b/pixman/demos/screen-test.c index e69dba3de..e69dba3de 100644 --- a/pixman/test/screen-test.c +++ b/pixman/demos/screen-test.c diff --git a/pixman/test/trap-test.c b/pixman/demos/trap-test.c index 19295e7a5..19295e7a5 100644 --- a/pixman/test/trap-test.c +++ b/pixman/demos/trap-test.c diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h index 372e9f9a8..9b1322b6c 100644 --- a/pixman/pixman/pixman-arm-common.h +++ b/pixman/pixman/pixman-arm-common.h @@ -282,19 +282,20 @@ cputype##_composite_##name (pixman_implementation_t *imp, \ src_type, dst_type) \ void \ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ - int32_t w, \ - dst_type * dst, \ - src_type * src, \ - pixman_fixed_t vx, \ - pixman_fixed_t unit_x);\ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x); \ \ static force_inline void \ scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \ - src_type * ps, \ + const src_type * ps, \ int32_t w, \ pixman_fixed_t vx, \ pixman_fixed_t unit_x, \ - pixman_fixed_t max_vx) \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ { \ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ vx, unit_x);\ @@ -316,4 +317,48 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func) +#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \ + src_type, dst_type) \ +void \ +pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \ + int32_t w, \ + dst_type * dst, \ + const src_type * src, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + const uint8_t * mask); \ + \ +static force_inline void \ +scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \ + dst_type * pd, \ + const src_type * ps, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t zero_src) \ +{ \ + if ((flags & SKIP_ZERO_SRC) && zero_src) \ + return; \ + pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \ + vx, unit_x, \ + mask); \ +} \ + \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ +FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ + scaled_nearest_scanline_##cputype##_##name##_##op,\ + src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + +/* Provide entries for the fast path table */ +#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + #endif diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S index 51533d42f..47daf457c 100644 --- a/pixman/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman/pixman-arm-neon-asm.S @@ -1,2365 +1,2393 @@ -/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains implementations of NEON optimized pixel processing
- * functions. There is no full and detailed tutorial, but some functions
- * (those which are exposing some new or interesting features) are
- * extensively commented and can be used as examples.
- *
- * You may want to have a look at the comments for following functions:
- * - pixman_composite_over_8888_0565_asm_neon
- * - pixman_composite_over_n_8_0565_asm_neon
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
- .text
- .fpu neon
- .arch armv7a
- .object_arch armv4
- .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
- .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
- .arm
- .altmacro
-
-#include "pixman-arm-neon-asm.h"
-
-/* Global configuration options and preferences */
-
-/*
- * The code can optionally make use of unaligned memory accesses to improve
- * performance of handling leading/trailing pixels for each scanline.
- * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
- * example in linux if unaligned memory accesses are not configured to
- * generate.exceptions.
- */
-.set RESPECT_STRICT_ALIGNMENT, 1
-
-/*
- * Set default prefetch type. There is a choice between the following options:
- *
- * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
- * as NOP to workaround some HW bugs or for whatever other reason)
- *
- * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
- * advanced prefetch intruduces heavy overhead)
- *
- * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
- * which can run ARM and NEON instructions simultaneously so that extra ARM
- * instructions do not add (many) extra cycles, but improve prefetch efficiency)
- *
- * Note: some types of function can't support advanced prefetch and fallback
- * to simple one (those which handle 24bpp pixels)
- */
-.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
-
-/* Prefetch distance in pixels for simple prefetch */
-.set PREFETCH_DISTANCE_SIMPLE, 64
-
-/*
- * Implementation of pixman_composite_over_8888_0565_asm_neon
- *
- * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
- * performs OVER compositing operation. Function fast_composite_over_8888_0565
- * from pixman-fast-path.c does the same in C and can be used as a reference.
- *
- * First we need to have some NEON assembly code which can do the actual
- * operation on the pixels and provide it to the template macro.
- *
- * Template macro quite conveniently takes care of emitting all the necessary
- * code for memory reading and writing (including quite tricky cases of
- * handling unaligned leading/trailing pixels), so we only need to deal with
- * the data in NEON registers.
- *
- * NEON registers allocation in general is recommented to be the following:
- * d0, d1, d2, d3 - contain loaded source pixel data
- * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed)
- * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
- * d28, d29, d30, d31 - place for storing the result (destination pixels)
- *
- * As can be seen above, four 64-bit NEON registers are used for keeping
- * intermediate pixel data and up to 8 pixels can be processed in one step
- * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
- *
- * This particular function uses the following registers allocation:
- * d0, d1, d2, d3 - contain loaded source pixel data
- * d4, d5 - contain loaded destination pixels (they are needed)
- * d28, d29 - place for storing the result (destination pixels)
- */
-
-/*
- * Step one. We need to have some code to do some arithmetics on pixel data.
- * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
- * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
- * perform all the needed calculations and write the result to {d28, d29}.
- * The rationale for having two macros and not just one will be explained
- * later. In practice, any single monolitic function which does the work can
- * be split into two parts in any arbitrary way without affecting correctness.
- *
- * There is one special trick here too. Common template macro can optionally
- * make our life a bit easier by doing R, G, B, A color components
- * deinterleaving for 32bpp pixel formats (and this feature is used in
- * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
- * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
- * actually use d0 register for blue channel (a vector of eight 8-bit
- * values), d1 register for green, d2 for red and d3 for alpha. This
- * simple conversion can be also done with a few NEON instructions:
- *
- * Packed to planar conversion:
- * vuzp.8 d0, d1
- * vuzp.8 d2, d3
- * vuzp.8 d1, d3
- * vuzp.8 d0, d2
- *
- * Planar to packed conversion:
- * vzip.8 d0, d2
- * vzip.8 d1, d3
- * vzip.8 d2, d3
- * vzip.8 d0, d1
- *
- * But pixel can be loaded directly in planar format using VLD4.8 NEON
- * instruction. It is 1 cycle slower than VLD1.32, so this is not always
- * desirable, that's why deinterleaving is optional.
- *
- * But anyway, here is the code:
- */
-.macro pixman_composite_over_8888_0565_process_pixblock_head
- /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
- and put data into d6 - red, d7 - green, d30 - blue */
- vshrn.u16 d6, q2, #8
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vsri.u8 d6, d6, #5
- vmvn.8 d3, d3 /* invert source alpha */
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- /* now do alpha blending, storing results in 8-bit planar format
- into d16 - red, d19 - green, d18 - blue */
- vmull.u8 q10, d3, d6
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- vrshr.u16 q13, q10, #8
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail
- /* ... continue alpha blending */
- vqadd.u8 d16, d2, d20
- vqadd.u8 q9, q0, q11
- /* convert the result to r5g6b5 and store it into {d28, d29} */
- vshll.u8 q14, d16, #8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-/*
- * OK, now we got almost everything that we need. Using the above two
- * macros, the work can be done right. But now we want to optimize
- * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
- * a lot from good code scheduling and software pipelining.
- *
- * Let's construct some code, which will run in the core main loop.
- * Some pseudo-code of the main loop will look like this:
- * head
- * while (...) {
- * tail
- * head
- * }
- * tail
- *
- * It may look a bit weird, but this setup allows to hide instruction
- * latencies better and also utilize dual-issue capability more
- * efficiently (make pairs of load-store and ALU instructions).
- *
- * So what we need now is a '*_tail_head' macro, which will be used
- * in the core main loop. A trivial straightforward implementation
- * of this macro would look like this:
- *
- * pixman_composite_over_8888_0565_process_pixblock_tail
- * vst1.16 {d28, d29}, [DST_W, :128]!
- * vld1.16 {d4, d5}, [DST_R, :128]!
- * vld4.32 {d0, d1, d2, d3}, [SRC]!
- * pixman_composite_over_8888_0565_process_pixblock_head
- * cache_preload 8, 8
- *
- * Now it also got some VLD/VST instructions. We simply can't move from
- * processing one block of pixels to the other one with just arithmetics.
- * The previously processed data needs to be written to memory and new
- * data needs to be fetched. Fortunately, this main loop does not deal
- * with partial leading/trailing pixels and can load/store a full block
- * of pixels in a bulk. Additionally, destination buffer is already
- * 16 bytes aligned here (which is good for performance).
- *
- * New things here are DST_R, DST_W, SRC and MASK identifiers. These
- * are the aliases for ARM registers which are used as pointers for
- * accessing data. We maintain separate pointers for reading and writing
- * destination buffer (DST_R and DST_W).
- *
- * Another new thing is 'cache_preload' macro. It is used for prefetching
- * data into CPU L2 cache and improve performance when dealing with large
- * images which are far larger than cache size. It uses one argument
- * (actually two, but they need to be the same here) - number of pixels
- * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
- * details about this macro. Moreover, if good performance is needed
- * the code from this macro needs to be copied into '*_tail_head' macro
- * and mixed with the rest of code for optimal instructions scheduling.
- * We are actually doing it below.
- *
- * Now after all the explanations, here is the optimized code.
- * Different instruction streams (originaling from '*_head', '*_tail'
- * and 'cache_preload' macro) use different indentation levels for
- * better readability. Actually taking the code from one of these
- * indentation levels and ignoring a few VLD/VST instructions would
- * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
- * macro!
- */
-
-#if 1
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
- vqadd.u8 d16, d2, d20
- vld1.16 {d4, d5}, [DST_R, :128]!
- vqadd.u8 q9, q0, q11
- vshrn.u16 d6, q2, #8
- fetch_src_pixblock
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vshll.u8 q14, d16, #8
- PF add PF_X, PF_X, #8
- vshll.u8 q8, d19, #8
- PF tst PF_CTL, #0xF
- vsri.u8 d6, d6, #5
- PF addne PF_X, PF_X, #8
- vmvn.8 d3, d3
- PF subne PF_CTL, PF_CTL, #1
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- vmull.u8 q10, d3, d6
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vsri.u16 q14, q8, #5
- PF cmp PF_X, ORIG_W
- vshll.u8 q9, d18, #8
- vrshr.u16 q13, q10, #8
- PF subge PF_X, PF_X, ORIG_W
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- PF subges PF_CTL, PF_CTL, #0x10
- vsri.u16 q14, q9, #11
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vraddhn.u16 d22, q12, q15
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-#else
-
-/* If we did not care much about the performance, we would just use this... */
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
- pixman_composite_over_8888_0565_process_pixblock_tail
- vst1.16 {d28, d29}, [DST_W, :128]!
- vld1.16 {d4, d5}, [DST_R, :128]!
- fetch_src_pixblock
- pixman_composite_over_8888_0565_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-#endif
-
-/*
- * And now the final part. We are using 'generate_composite_function' macro
- * to put all the stuff together. We are specifying the name of the function
- * which we want to get, number of bits per pixel for the source, mask and
- * destination (0 if unused, like mask in this case). Next come some bit
- * flags:
- * FLAG_DST_READWRITE - tells that the destination buffer is both read
- * and written, for write-only buffer we would use
- * FLAG_DST_WRITEONLY flag instead
- * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
- * and separate color channels for 32bpp format.
- * The next things are:
- * - the number of pixels processed per iteration (8 in this case, because
- * that's the maximum what can fit into four 64-bit NEON registers).
- * - prefetch distance, measured in pixel blocks. In this case it is 5 times
- * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
- * prefetch distance can be selected by running some benchmarks.
- *
- * After that we specify some macros, these are 'default_init',
- * 'default_cleanup' here which are empty (but it is possible to have custom
- * init/cleanup macros to be able to save/restore some extra NEON registers
- * like d8-d15 or do anything else) followed by
- * 'pixman_composite_over_8888_0565_process_pixblock_head',
- * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
- * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
- * which we got implemented above.
- *
- * The last part is the NEON registers allocation scheme.
- */
-generate_composite_function \
- pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_0565_process_pixblock_head, \
- pixman_composite_over_8888_0565_process_pixblock_tail, \
- pixman_composite_over_8888_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_0565_process_pixblock_head
- /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
- and put data into d6 - red, d7 - green, d30 - blue */
- vshrn.u16 d6, q2, #8
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vsri.u8 d6, d6, #5
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- /* now do alpha blending, storing results in 8-bit planar format
- into d16 - red, d19 - green, d18 - blue */
- vmull.u8 q10, d3, d6
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- vrshr.u16 q13, q10, #8
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_n_0565_process_pixblock_tail
- /* ... continue alpha blending */
- vqadd.u8 d16, d2, d20
- vqadd.u8 q9, q0, q11
- /* convert the result to r5g6b5 and store it into {d28, d29} */
- vshll.u8 q14, d16, #8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_0565_process_pixblock_tail_head
- pixman_composite_over_n_0565_process_pixblock_tail
- vld1.16 {d4, d5}, [DST_R, :128]!
- vst1.16 {d28, d29}, [DST_W, :128]!
- pixman_composite_over_n_0565_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
- vmvn.8 d3, d3 /* invert source alpha */
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_0565_init, \
- default_cleanup, \
- pixman_composite_over_n_0565_process_pixblock_head, \
- pixman_composite_over_n_0565_process_pixblock_tail, \
- pixman_composite_over_n_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_0565_process_pixblock_head
- vshll.u8 q8, d1, #8
- vshll.u8 q14, d2, #8
- vshll.u8 q9, d0, #8
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
- vsri.u16 q14, q8, #5
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- fetch_src_pixblock
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vsri.u16 q14, q9, #11
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vshll.u8 q8, d1, #8
- vst1.16 {d28, d29}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vshll.u8 q14, d2, #8
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vshll.u8 q9, d0, #8
-.endm
-
-generate_composite_function \
- pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_0565_process_pixblock_head, \
- pixman_composite_src_8888_0565_process_pixblock_tail, \
- pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_8888_process_pixblock_head
- vshrn.u16 d30, q0, #8
- vshrn.u16 d29, q0, #3
- vsli.u16 q0, q0, #5
- vmov.u8 d31, #255
- vsri.u8 d30, d30, #5
- vsri.u8 d29, d29, #6
- vshrn.u16 d28, q0, #2
-.endm
-
-.macro pixman_composite_src_0565_8888_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
- pixman_composite_src_0565_8888_process_pixblock_tail
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- fetch_src_pixblock
- pixman_composite_src_0565_8888_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_8888_process_pixblock_head, \
- pixman_composite_src_0565_8888_process_pixblock_tail, \
- pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_process_pixblock_head
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail_head
- fetch_src_pixblock
- PF add PF_X, PF_X, #32
- PF tst PF_CTL, #0xF
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- PF addne PF_X, PF_X, #32
- PF subne PF_CTL, PF_CTL, #1
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vqadd.u8 q14, q0, q2
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vqadd.u8 q15, q1, q3
-.endm
-
-generate_composite_function \
- pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
- fetch_src_pixblock
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vld1.32 {d4, d5, d6, d7}, [DST_R, :128]!
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vst1.32 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vqadd.u8 q14, q0, q2
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vqadd.u8 q15, q1, q3
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head
- vmvn.8 d24, d3 /* get inverted alpha */
- /* do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- fetch_src_pixblock
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_head, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8888_process_pixblock_head
- pixman_composite_out_reverse_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- fetch_src_pixblock
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8888_process_pixblock_tail_head
- pixman_composite_over_8888_8888_process_pixblock_tail
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- pixman_composite_over_8888_8888_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8888_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- vld4.8 {d0, d1, d2, d3}, [DST_R, :128]!
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-.macro pixman_composite_over_reverse_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d7[0]}, [DUMMY]
- vdup.8 d4, d7[0]
- vdup.8 d5, d7[1]
- vdup.8 d6, d7[2]
- vdup.8 d7, d7[3]
-.endm
-
-generate_composite_function \
- pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_reverse_n_8888_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 4, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_head
- vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */
- vmull.u8 q1, d24, d9
- vmull.u8 q6, d24, d10
- vmull.u8 q7, d24, d11
- vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */
- vrshr.u16 q9, q1, #8
- vrshr.u16 q10, q6, #8
- vrshr.u16 q11, q7, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q6, q10
- vraddhn.u16 d3, q7, q11
- vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */
- vsri.u8 d7, d7, #6
- vmvn.8 d3, d3
- vshrn.u16 d30, q2, #2
- vmull.u8 q8, d3, d6 /* now do alpha blending */
- vmull.u8 q9, d3, d7
- vmull.u8 q10, d3, d30
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
- /* 3 cycle bubble (after vmull.u8) */
- vrshr.u16 q13, q8, #8
- vrshr.u16 q11, q9, #8
- vrshr.u16 q15, q10, #8
- vraddhn.u16 d16, q8, q13
- vraddhn.u16 d27, q9, q11
- vraddhn.u16 d26, q10, q15
- vqadd.u8 d16, d2, d16
- /* 1 cycle bubble */
- vqadd.u8 q9, q0, q13
- vshll.u8 q14, d16, #8 /* convert to 16bpp */
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- /* 1 cycle bubble */
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
- vld1.16 {d4, d5}, [DST_R, :128]!
- vshrn.u16 d6, q2, #8
- fetch_mask_pixblock
- vshrn.u16 d7, q2, #3
- fetch_src_pixblock
- vmull.u8 q6, d24, d10
- vrshr.u16 q13, q8, #8
- vrshr.u16 q11, q9, #8
- vrshr.u16 q15, q10, #8
- vraddhn.u16 d16, q8, q13
- vraddhn.u16 d27, q9, q11
- vraddhn.u16 d26, q10, q15
- vqadd.u8 d16, d2, d16
- vmull.u8 q1, d24, d9
- vqadd.u8 q9, q0, q13
- vshll.u8 q14, d16, #8
- vmull.u8 q0, d24, d8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vmull.u8 q7, d24, d11
- vsri.u16 q14, q9, #11
-
- cache_preload 8, 8
-
- vsli.u16 q2, q2, #5
- vrshr.u16 q8, q0, #8
- vrshr.u16 q9, q1, #8
- vrshr.u16 q10, q6, #8
- vrshr.u16 q11, q7, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q6, q10
- vraddhn.u16 d3, q7, q11
- vsri.u8 d6, d6, #5
- vsri.u8 d7, d7, #6
- vmvn.8 d3, d3
- vshrn.u16 d30, q2, #2
- vst1.16 {d28, d29}, [DST_W, :128]!
- vmull.u8 q8, d3, d6
- vmull.u8 q9, d3, d7
- vmull.u8 q10, d3, d30
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-/*
- * This function needs a special initialization of solid mask.
- * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
- * offset, split into color components and replicated in d8-d11
- * registers. Additionally, this function needs all the NEON registers,
- * so it has to save d8-d15 registers which are callee saved according
- * to ABI. These registers are restored from 'cleanup' macro. All the
- * other NEON registers are caller saved, so can be clobbered freely
- * without introducing any problems.
- */
-.macro pixman_composite_over_n_8_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_0565_init, \
- pixman_composite_over_n_8_0565_cleanup, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_0565_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vpush {d8-d15}
- vld1.32 {d24[0]}, [DUMMY]
- vdup.8 d24, d24[3]
-.endm
-
-.macro pixman_composite_over_8888_n_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_8888_n_0565_init, \
- pixman_composite_over_8888_n_0565_cleanup, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
- vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- cache_preload 16, 16
-.endm
-
-generate_composite_function \
- pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 16, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_0565_process_pixblock_head, \
- pixman_composite_src_0565_0565_process_pixblock_tail, \
- pixman_composite_src_0565_0565_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail_head
- vst1.8 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #8
- vsli.u64 d0, d0, #16
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
- FLAG_DST_WRITEONLY, \
- 32, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_8_init, \
- pixman_composite_src_n_8_cleanup, \
- pixman_composite_src_n_8_process_pixblock_head, \
- pixman_composite_src_n_8_process_pixblock_tail, \
- pixman_composite_src_n_8_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail_head
- vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #16
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_0565_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 16, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_0565_init, \
- pixman_composite_src_n_0565_cleanup, \
- pixman_composite_src_n_0565_process_pixblock_head, \
- pixman_composite_src_n_0565_process_pixblock_tail, \
- pixman_composite_src_n_0565_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_8888_init, \
- pixman_composite_src_n_8888_cleanup, \
- pixman_composite_src_n_8888_process_pixblock_head, \
- pixman_composite_src_n_8888_process_pixblock_tail, \
- pixman_composite_src_n_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_8888_process_pixblock_head, \
- pixman_composite_src_8888_8888_process_pixblock_tail, \
- pixman_composite_src_8888_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_x888_8888_process_pixblock_head
- vorr q0, q0, q2
- vorr q1, q1, q2
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- vorr q0, q0, q2
- vorr q1, q1, q2
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_x888_8888_init
- vmov.u8 q2, #0xFF
- vshl.u32 q2, q2, #24
-.endm
-
-generate_composite_function \
- pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- pixman_composite_src_x888_8888_init, \
- default_cleanup, \
- pixman_composite_src_x888_8888_process_pixblock_head, \
- pixman_composite_src_x888_8888_process_pixblock_tail, \
- pixman_composite_src_x888_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_head
- /* expecting deinterleaved source data in {d8, d9, d10, d11} */
- /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
- /* and destination data in {d4, d5, d6, d7} */
- /* mask is in d24 (d25, d26, d27 are unused) */
-
- /* in */
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d24, d9
- vmull.u8 q6, d24, d10
- vmull.u8 q7, d24, d11
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vmvn.8 d24, d3 /* get inverted alpha */
- /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
- /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
- pixman_composite_over_n_8_8888_process_pixblock_tail
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- cache_preload 8, 8
- pixman_composite_over_n_8_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_8888_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_8888_init, \
- pixman_composite_over_n_8_8888_cleanup, \
- pixman_composite_over_n_8_8888_process_pixblock_head, \
- pixman_composite_over_n_8_8888_process_pixblock_tail, \
- pixman_composite_over_n_8_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8_process_pixblock_head
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d25, d8
- vmull.u8 q6, d26, d8
- vmull.u8 q7, d27, d8
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vmvn.8 q12, q0
- vmvn.8 q13, q1
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d25, d5
- vmull.u8 q10, d26, d6
- vmull.u8 q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8_8_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_n_8_8_process_pixblock_tail
- fetch_mask_pixblock
- cache_preload 32, 32
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- pixman_composite_over_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d8[0]}, [DUMMY]
- vdup.8 d8, d8[3]
-.endm
-
-.macro pixman_composite_over_n_8_8_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_8_init, \
- pixman_composite_over_n_8_8_cleanup, \
- pixman_composite_over_n_8_8_process_pixblock_head, \
- pixman_composite_over_n_8_8_process_pixblock_tail, \
- pixman_composite_over_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
- /*
- * 'combine_mask_ca' replacement
- *
- * input: solid src (n) in {d8, d9, d10, d11}
- * dest in {d4, d5, d6, d7 }
- * mask in {d24, d25, d26, d27}
- * output: updated src in {d0, d1, d2, d3 }
- * updated mask in {d24, d25, d26, d3 }
- */
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d25, d9
- vmull.u8 q6, d26, d10
- vmull.u8 q7, d27, d11
- vmull.u8 q9, d11, d25
- vmull.u8 q12, d11, d24
- vmull.u8 q13, d11, d26
- vrshr.u16 q8, q0, #8
- vrshr.u16 q10, q1, #8
- vrshr.u16 q11, q6, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q10
- vraddhn.u16 d2, q6, q11
- vrshr.u16 q11, q12, #8
- vrshr.u16 q8, q9, #8
- vrshr.u16 q6, q13, #8
- vrshr.u16 q10, q7, #8
- vraddhn.u16 d24, q12, q11
- vraddhn.u16 d25, q9, q8
- vraddhn.u16 d26, q13, q6
- vraddhn.u16 d3, q7, q10
- /*
- * 'combine_over_ca' replacement
- *
- * output: updated dest in {d28, d29, d30, d31}
- */
- vmvn.8 d24, d24
- vmvn.8 d25, d25
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d25, d5
- vmvn.8 d26, d26
- vmvn.8 d27, d3
- vmull.u8 q10, d26, d6
- vmull.u8 q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
- /* ... continue 'combine_over_ca' replacement */
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q6, q10, #8
- vrshr.u16 q7, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q6, q10
- vraddhn.u16 d31, q7, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q6, q10, #8
- vrshr.u16 q7, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q6, q10
- vraddhn.u16 d31, q7, q11
- fetch_mask_pixblock
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- cache_preload 8, 8
- pixman_composite_over_n_8888_8888_ca_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8888_8888_ca_init, \
- pixman_composite_over_n_8888_8888_ca_cleanup, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_in_n_8_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* and destination data in {d4, d5, d6, d7} */
- vmull.u8 q8, d4, d3
- vmull.u8 q9, d5, d3
- vmull.u8 q10, d6, d3
- vmull.u8 q11, d7, d3
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q8, q14
- vraddhn.u16 d29, q9, q15
- vraddhn.u16 d30, q10, q12
- vraddhn.u16 d31, q11, q13
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail_head
- pixman_composite_in_n_8_process_pixblock_tail
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- cache_preload 32, 32
- pixman_composite_in_n_8_process_pixblock_head
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_in_n_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d3, d3[3]
-.endm
-
-.macro pixman_composite_in_n_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_in_n_8_init, \
- pixman_composite_in_n_8_cleanup, \
- pixman_composite_in_n_8_process_pixblock_head, \
- pixman_composite_in_n_8_process_pixblock_tail, \
- pixman_composite_in_n_8_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-.macro pixman_composite_add_n_8_8_process_pixblock_head
- /* expecting source data in {d8, d9, d10, d11} */
- /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
- /* and destination data in {d4, d5, d6, d7} */
- /* mask is in d24, d25, d26, d27 */
- vmull.u8 q0, d24, d11
- vmull.u8 q1, d25, d11
- vmull.u8 q6, d26, d11
- vmull.u8 q7, d27, d11
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_n_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
- pixman_composite_add_n_8_8_process_pixblock_tail
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- cache_preload 32, 32
- pixman_composite_add_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_n_8_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_add_n_8_8_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_n_8_8_init, \
- pixman_composite_add_n_8_8_cleanup, \
- pixman_composite_add_n_8_8_process_pixblock_head, \
- pixman_composite_add_n_8_8_process_pixblock_tail, \
- pixman_composite_add_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_8_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* mask in {d24, d25, d26, d27} */
- vmull.u8 q8, d24, d0
- vmull.u8 q9, d25, d1
- vmull.u8 q10, d26, d2
- vmull.u8 q11, d27, d3
- vrshr.u16 q0, q8, #8
- vrshr.u16 q1, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q12, q10
- vraddhn.u16 d3, q13, q11
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
- pixman_composite_add_8_8_8_process_pixblock_tail
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- fetch_src_pixblock
- cache_preload 32, 32
- pixman_composite_add_8_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_8_8_8_init
-.endm
-
-.macro pixman_composite_add_8_8_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_8_8_8_init, \
- pixman_composite_add_8_8_8_cleanup, \
- pixman_composite_add_8_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_8_process_pixblock_tail, \
- pixman_composite_add_8_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* mask in {d24, d25, d26, d27} */
- vmull.u8 q8, d27, d0
- vmull.u8 q9, d27, d1
- vmull.u8 q10, d27, d2
- vmull.u8 q11, d27, d3
- /* 1 cycle bubble */
- vrsra.u16 q8, q8, #8
- vrsra.u16 q9, q9, #8
- vrsra.u16 q10, q10, #8
- vrsra.u16 q11, q11, #8
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
- /* 2 cycle bubble */
- vrshrn.u16 d28, q8, #8
- vrshrn.u16 d29, q9, #8
- vrshrn.u16 d30, q10, #8
- vrshrn.u16 d31, q11, #8
- vqadd.u8 q14, q2, q14
- /* 1 cycle bubble */
- vqadd.u8 q15, q3, q15
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
- fetch_src_pixblock
- vrshrn.u16 d28, q8, #8
- fetch_mask_pixblock
- vrshrn.u16 d29, q9, #8
- vmull.u8 q8, d27, d0
- vrshrn.u16 d30, q10, #8
- vmull.u8 q9, d27, d1
- vrshrn.u16 d31, q11, #8
- vmull.u8 q10, d27, d2
- vqadd.u8 q14, q2, q14
- vmull.u8 q11, d27, d3
- vqadd.u8 q15, q3, q15
- vrsra.u16 q8, q8, #8
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrsra.u16 q9, q9, #8
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vrsra.u16 q10, q10, #8
-
- cache_preload 8, 8
-
- vrsra.u16 q11, q11, #8
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-generate_composite_function \
- pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_n_8_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
-.endm
-
-.macro pixman_composite_add_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_n_8_8888_init, \
- pixman_composite_add_n_8_8888_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_n_8888_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vld1.32 {d27[0]}, [DUMMY]
- vdup.8 d27, d27[3]
-.endm
-
-.macro pixman_composite_add_8888_n_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_8888_n_8888_init, \
- pixman_composite_add_8888_n_8888_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* solid mask is in d15 */
-
- /* 'in' */
- vmull.u8 q8, d15, d3
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vrshr.u16 q13, q8, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d3, q8, q13
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
- vmvn.8 d24, d3 /* get inverted alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
- pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_head
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_8888_n_8888_init
- add DUMMY, sp, #48
- vpush {d8-d15}
- vld1.32 {d15[0]}, [DUMMY]
- vdup.8 d15, d15[3]
-.endm
-
-.macro pixman_composite_over_8888_n_8888_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_8888_n_8888_init, \
- pixman_composite_over_8888_n_8888_cleanup, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
- vst3.8 {d0, d1, d2}, [DST_W]!
- fetch_src_pixblock
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0888_0888_process_pixblock_head, \
- pixman_composite_src_0888_0888_process_pixblock_tail, \
- pixman_composite_src_0888_0888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
- vswp d0, d2
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
- vst4.8 {d0, d1, d2, d3}, [DST_W]!
- fetch_src_pixblock
- vswp d0, d2
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_init
- veor d3, d3, d3
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- pixman_composite_src_0888_8888_rev_init, \
- default_cleanup, \
- pixman_composite_src_0888_8888_rev_process_pixblock_head, \
- pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
- pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
- vshll.u8 q8, d1, #8
- vshll.u8 q9, d2, #8
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
- vshll.u8 q14, d0, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
- vshll.u8 q14, d0, #8
- fetch_src_pixblock
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
- vshll.u8 q8, d1, #8
- vst1.16 {d28, d29}, [DST_W, :128]!
- vshll.u8 q9, d2, #8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0888_0565_rev_process_pixblock_head, \
- pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
- pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- vraddhn.u16 d30, q11, q8
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d28, q13, q10
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- fetch_src_pixblock
- vraddhn.u16 d30, q11, q8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d28, q13, q10
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
- pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_pixbuf_8888_process_pixblock_head, \
- pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
- pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- vraddhn.u16 d28, q11, q8
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d30, q13, q10
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- fetch_src_pixblock
- vraddhn.u16 d28, q11, q8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d30, q13, q10
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
- pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q4, d2, d1, d0
- convert_0565_to_x888 q5, d6, d5, d4
- /* source pixel data is in {d0, d1, d2, XX} */
- /* destination pixel data is in {d4, d5, d6, XX} */
- vmvn.8 d7, d15
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vmull.u8 q8, d7, d4
- vmull.u8 q9, d7, d5
- vmull.u8 q13, d7, d6
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
-.endm
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q13, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q13
- vqadd.u8 q0, q0, q14
- vqadd.u8 q1, q1, q15
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
- fetch_mask_pixblock
- pixman_composite_over_0565_8_0565_process_pixblock_tail
- fetch_src_pixblock
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_over_0565_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_0565_8_0565_process_pixblock_head, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_n_0565_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vpush {d8-d15}
- vld1.32 {d15[0]}, [DUMMY]
- vdup.8 d15, d15[3]
-.endm
-
-.macro pixman_composite_over_0565_n_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_0565_n_0565_init, \
- pixman_composite_over_0565_n_0565_cleanup, \
- pixman_composite_over_0565_8_0565_process_pixblock_head, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q4, d2, d1, d0
- convert_0565_to_x888 q5, d6, d5, d4
- /* source pixel data is in {d0, d1, d2, XX} */
- /* destination pixel data is in {d4, d5, d6, XX} */
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
-.endm
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
- vqadd.u8 q0, q0, q2
- vqadd.u8 q1, q1, q3
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
- fetch_mask_pixblock
- pixman_composite_add_0565_8_0565_process_pixblock_tail
- fetch_src_pixblock
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_add_0565_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_add_0565_8_0565_process_pixblock_head, \
- pixman_composite_add_0565_8_0565_process_pixblock_tail, \
- pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q5, d6, d5, d4
- /* destination pixel data is in {d4, d5, d6, xx} */
- vmvn.8 d24, d15 /* get inverted alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
-.endm
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vraddhn.u16 d0, q14, q8
- vraddhn.u16 d1, q15, q9
- vraddhn.u16 d2, q12, q10
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
- fetch_src_pixblock
- pixman_composite_out_reverse_8_0565_process_pixblock_tail
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_out_reverse_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_out_reverse_8_0565_process_pixblock_head, \
- pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
- pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 15, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_0565_process_pixblock_head, \
- pixman_composite_over_8888_0565_process_pixblock_tail, \
- pixman_composite_over_8888_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_0565_process_pixblock_head, \
- pixman_composite_src_8888_0565_process_pixblock_tail, \
- pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_8888_process_pixblock_head, \
- pixman_composite_src_0565_8888_process_pixblock_tail, \
- pixman_composite_src_0565_8888_process_pixblock_tail_head
+/* + * Copyright © 2009 Nokia Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) + */ + +/* + * This file contains implementations of NEON optimized pixel processing + * functions. There is no full and detailed tutorial, but some functions + * (those which are exposing some new or interesting features) are + * extensively commented and can be used as examples. + * + * You may want to have a look at the comments for following functions: + * - pixman_composite_over_8888_0565_asm_neon + * - pixman_composite_over_n_8_0565_asm_neon + */ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + + .text + .fpu neon + .arch armv7a + .object_arch armv4 + .eabi_attribute 10, 0 /* suppress Tag_FP_arch */ + .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */ + .arm + .altmacro + +#include "pixman-arm-neon-asm.h" + +/* Global configuration options and preferences */ + +/* + * The code can optionally make use of unaligned memory accesses to improve + * performance of handling leading/trailing pixels for each scanline. + * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for + * example in linux if unaligned memory accesses are not configured to + * generate.exceptions. + */ +.set RESPECT_STRICT_ALIGNMENT, 1 + +/* + * Set default prefetch type. There is a choice between the following options: + * + * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work + * as NOP to workaround some HW bugs or for whatever other reason) + * + * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where + * advanced prefetch intruduces heavy overhead) + * + * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8 + * which can run ARM and NEON instructions simultaneously so that extra ARM + * instructions do not add (many) extra cycles, but improve prefetch efficiency) + * + * Note: some types of function can't support advanced prefetch and fallback + * to simple one (those which handle 24bpp pixels) + */ +.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED + +/* Prefetch distance in pixels for simple prefetch */ +.set PREFETCH_DISTANCE_SIMPLE, 64 + +/* + * Implementation of pixman_composite_over_8888_0565_asm_neon + * + * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and + * performs OVER compositing operation. Function fast_composite_over_8888_0565 + * from pixman-fast-path.c does the same in C and can be used as a reference. + * + * First we need to have some NEON assembly code which can do the actual + * operation on the pixels and provide it to the template macro. + * + * Template macro quite conveniently takes care of emitting all the necessary + * code for memory reading and writing (including quite tricky cases of + * handling unaligned leading/trailing pixels), so we only need to deal with + * the data in NEON registers. + * + * NEON registers allocation in general is recommented to be the following: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed) + * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used) + * d28, d29, d30, d31 - place for storing the result (destination pixels) + * + * As can be seen above, four 64-bit NEON registers are used for keeping + * intermediate pixel data and up to 8 pixels can be processed in one step + * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp). + * + * This particular function uses the following registers allocation: + * d0, d1, d2, d3 - contain loaded source pixel data + * d4, d5 - contain loaded destination pixels (they are needed) + * d28, d29 - place for storing the result (destination pixels) + */ + +/* + * Step one. We need to have some code to do some arithmetics on pixel data. + * This is implemented as a pair of macros: '*_head' and '*_tail'. When used + * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5}, + * perform all the needed calculations and write the result to {d28, d29}. + * The rationale for having two macros and not just one will be explained + * later. In practice, any single monolitic function which does the work can + * be split into two parts in any arbitrary way without affecting correctness. + * + * There is one special trick here too. Common template macro can optionally + * make our life a bit easier by doing R, G, B, A color components + * deinterleaving for 32bpp pixel formats (and this feature is used in + * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that + * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we + * actually use d0 register for blue channel (a vector of eight 8-bit + * values), d1 register for green, d2 for red and d3 for alpha. This + * simple conversion can be also done with a few NEON instructions: + * + * Packed to planar conversion: + * vuzp.8 d0, d1 + * vuzp.8 d2, d3 + * vuzp.8 d1, d3 + * vuzp.8 d0, d2 + * + * Planar to packed conversion: + * vzip.8 d0, d2 + * vzip.8 d1, d3 + * vzip.8 d2, d3 + * vzip.8 d0, d1 + * + * But pixel can be loaded directly in planar format using VLD4.8 NEON + * instruction. It is 1 cycle slower than VLD1.32, so this is not always + * desirable, that's why deinterleaving is optional. + * + * But anyway, here is the code: + */ +.macro pixman_composite_over_8888_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vmvn.8 d3, d3 /* invert source alpha */ + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_8888_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* + * OK, now we got almost everything that we need. Using the above two + * macros, the work can be done right. But now we want to optimize + * it a bit. ARM Cortex-A8 is an in-order core, and benefits really + * a lot from good code scheduling and software pipelining. + * + * Let's construct some code, which will run in the core main loop. + * Some pseudo-code of the main loop will look like this: + * head + * while (...) { + * tail + * head + * } + * tail + * + * It may look a bit weird, but this setup allows to hide instruction + * latencies better and also utilize dual-issue capability more + * efficiently (make pairs of load-store and ALU instructions). + * + * So what we need now is a '*_tail_head' macro, which will be used + * in the core main loop. A trivial straightforward implementation + * of this macro would look like this: + * + * pixman_composite_over_8888_0565_process_pixblock_tail + * vst1.16 {d28, d29}, [DST_W, :128]! + * vld1.16 {d4, d5}, [DST_R, :128]! + * vld4.32 {d0, d1, d2, d3}, [SRC]! + * pixman_composite_over_8888_0565_process_pixblock_head + * cache_preload 8, 8 + * + * Now it also got some VLD/VST instructions. We simply can't move from + * processing one block of pixels to the other one with just arithmetics. + * The previously processed data needs to be written to memory and new + * data needs to be fetched. Fortunately, this main loop does not deal + * with partial leading/trailing pixels and can load/store a full block + * of pixels in a bulk. Additionally, destination buffer is already + * 16 bytes aligned here (which is good for performance). + * + * New things here are DST_R, DST_W, SRC and MASK identifiers. These + * are the aliases for ARM registers which are used as pointers for + * accessing data. We maintain separate pointers for reading and writing + * destination buffer (DST_R and DST_W). + * + * Another new thing is 'cache_preload' macro. It is used for prefetching + * data into CPU L2 cache and improve performance when dealing with large + * images which are far larger than cache size. It uses one argument + * (actually two, but they need to be the same here) - number of pixels + * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some + * details about this macro. Moreover, if good performance is needed + * the code from this macro needs to be copied into '*_tail_head' macro + * and mixed with the rest of code for optimal instructions scheduling. + * We are actually doing it below. + * + * Now after all the explanations, here is the optimized code. + * Different instruction streams (originaling from '*_head', '*_tail' + * and 'cache_preload' macro) use different indentation levels for + * better readability. Actually taking the code from one of these + * indentation levels and ignoring a few VLD/VST instructions would + * result in exactly the code from '*_head', '*_tail' or 'cache_preload' + * macro! + */ + +#if 1 + +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + vqadd.u8 d16, d2, d20 + vld1.16 {d4, d5}, [DST_R, :128]! + vqadd.u8 q9, q0, q11 + vshrn.u16 d6, q2, #8 + fetch_src_pixblock + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vshll.u8 q14, d16, #8 + PF add PF_X, PF_X, #8 + vshll.u8 q8, d19, #8 + PF tst PF_CTL, #0xF + vsri.u8 d6, d6, #5 + PF addne PF_X, PF_X, #8 + vmvn.8 d3, d3 + PF subne PF_CTL, PF_CTL, #1 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + vmull.u8 q10, d3, d6 + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vsri.u16 q14, q8, #5 + PF cmp PF_X, ORIG_W + vshll.u8 q9, d18, #8 + vrshr.u16 q13, q10, #8 + PF subge PF_X, PF_X, ORIG_W + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + PF subges PF_CTL, PF_CTL, #0x10 + vsri.u16 q14, q9, #11 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vraddhn.u16 d22, q12, q15 + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +#else + +/* If we did not care much about the performance, we would just use this... */ +.macro pixman_composite_over_8888_0565_process_pixblock_tail_head + pixman_composite_over_8888_0565_process_pixblock_tail + vst1.16 {d28, d29}, [DST_W, :128]! + vld1.16 {d4, d5}, [DST_R, :128]! + fetch_src_pixblock + pixman_composite_over_8888_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +#endif + +/* + * And now the final part. We are using 'generate_composite_function' macro + * to put all the stuff together. We are specifying the name of the function + * which we want to get, number of bits per pixel for the source, mask and + * destination (0 if unused, like mask in this case). Next come some bit + * flags: + * FLAG_DST_READWRITE - tells that the destination buffer is both read + * and written, for write-only buffer we would use + * FLAG_DST_WRITEONLY flag instead + * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data + * and separate color channels for 32bpp format. + * The next things are: + * - the number of pixels processed per iteration (8 in this case, because + * that's the maximum what can fit into four 64-bit NEON registers). + * - prefetch distance, measured in pixel blocks. In this case it is 5 times + * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal + * prefetch distance can be selected by running some benchmarks. + * + * After that we specify some macros, these are 'default_init', + * 'default_cleanup' here which are empty (but it is possible to have custom + * init/cleanup macros to be able to save/restore some extra NEON registers + * like d8-d15 or do anything else) followed by + * 'pixman_composite_over_8888_0565_process_pixblock_head', + * 'pixman_composite_over_8888_0565_process_pixblock_tail' and + * 'pixman_composite_over_8888_0565_process_pixblock_tail_head' + * which we got implemented above. + * + * The last part is the NEON registers allocation scheme. + */ +generate_composite_function \ + pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_0565_process_pixblock_head + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 +.endm + +.macro pixman_composite_over_n_0565_process_pixblock_tail + /* ... continue alpha blending */ + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_0565_process_pixblock_tail_head + pixman_composite_over_n_0565_process_pixblock_tail + vld1.16 {d4, d5}, [DST_R, :128]! + vst1.16 {d28, d29}, [DST_W, :128]! + pixman_composite_over_n_0565_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] + vmvn.8 d3, d3 /* invert source alpha */ +.endm + +generate_composite_function \ + pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_0565_init, \ + default_cleanup, \ + pixman_composite_over_n_0565_process_pixblock_head, \ + pixman_composite_over_n_0565_process_pixblock_tail, \ + pixman_composite_over_n_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_0565_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q14, d2, #8 + vshll.u8 q9, d0, #8 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_8888_0565_process_pixblock_tail_head + vsri.u16 q14, q8, #5 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + fetch_src_pixblock + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vsri.u16 q14, q9, #11 + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vshll.u8 q14, d2, #8 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vshll.u8 q9, d0, #8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_src_0565_8888_process_pixblock_head + vshrn.u16 d30, q0, #8 + vshrn.u16 d29, q0, #3 + vsli.u16 q0, q0, #5 + vmov.u8 d31, #255 + vsri.u8 d30, d30, #5 + vsri.u8 d29, d29, #6 + vshrn.u16 d28, q0, #2 +.endm + +.macro pixman_composite_src_0565_8888_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_src_0565_8888_process_pixblock_tail_head + pixman_composite_src_0565_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + fetch_src_pixblock + pixman_composite_src_0565_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_process_pixblock_head + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail +.endm + +.macro pixman_composite_add_8_8_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #32 + PF tst PF_CTL, #0xF + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #32 + PF subne PF_CTL, PF_CTL, #1 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8_8_asm_neon, 8, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vld1.32 {d4, d5, d6, d7}, [DST_R, :128]! + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vst1.32 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + vqadd.u8 q14, q0, q2 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q15, q1, q3 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_process_pixblock_tail, \ + pixman_composite_add_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head + vmvn.8 d24, d3 /* get inverted alpha */ + /* do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8888_process_pixblock_head + pixman_composite_out_reverse_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + fetch_src_pixblock + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8888_process_pixblock_tail_head + pixman_composite_over_8888_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_8888_8888_process_pixblock_head + cache_preload 8, 8 +.endm + +.macro pixman_composite_over_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + PF cmp PF_X, ORIG_W + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + vld4.8 {d0, d1, d2, d3}, [DST_R, :128]! + vmvn.8 d22, d3 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d22, d4 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d22, d5 + vmull.u8 q10, d22, d6 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vmull.u8 q11, d22, d7 +.endm + +.macro pixman_composite_over_reverse_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d7[0]}, [DUMMY] + vdup.8 d4, d7[0] + vdup.8 d5, d7[1] + vdup.8 d6, d7[2] + vdup.8 d7, d7[3] +.endm + +generate_composite_function \ + pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_reverse_n_8888_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 4, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_8_0565_process_pixblock_head + vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */ + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */ + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */ + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */ + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vmull.u8 q8, d3, d6 /* now do alpha blending */ + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail + /* 3 cycle bubble (after vmull.u8) */ + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + /* 1 cycle bubble */ + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 /* convert to 16bpp */ + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + /* 1 cycle bubble */ + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head + vld1.16 {d4, d5}, [DST_R, :128]! + vshrn.u16 d6, q2, #8 + fetch_mask_pixblock + vshrn.u16 d7, q2, #3 + fetch_src_pixblock + vmull.u8 q6, d24, d10 + vrshr.u16 q13, q8, #8 + vrshr.u16 q11, q9, #8 + vrshr.u16 q15, q10, #8 + vraddhn.u16 d16, q8, q13 + vraddhn.u16 d27, q9, q11 + vraddhn.u16 d26, q10, q15 + vqadd.u8 d16, d2, d16 + vmull.u8 q1, d24, d9 + vqadd.u8 q9, q0, q13 + vshll.u8 q14, d16, #8 + vmull.u8 q0, d24, d8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vmull.u8 q7, d24, d11 + vsri.u16 q14, q9, #11 + + cache_preload 8, 8 + + vsli.u16 q2, q2, #5 + vrshr.u16 q8, q0, #8 + vrshr.u16 q9, q1, #8 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q6, q10 + vraddhn.u16 d3, q7, q11 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vmvn.8 d3, d3 + vshrn.u16 d30, q2, #2 + vst1.16 {d28, d29}, [DST_W, :128]! + vmull.u8 q8, d3, d6 + vmull.u8 q9, d3, d7 + vmull.u8 q10, d3, d30 +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +/* + * This function needs a special initialization of solid mask. + * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET + * offset, split into color components and replicated in d8-d11 + * registers. Additionally, this function needs all the NEON registers, + * so it has to save d8-d15 registers which are callee saved according + * to ABI. These registers are restored from 'cleanup' macro. All the + * other NEON registers are caller saved, so can be clobbered freely + * without introducing any problems. + */ +.macro pixman_composite_over_n_8_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_0565_init, \ + pixman_composite_over_n_8_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d24[0]}, [DUMMY] + vdup.8 d24, d24[3] +.endm + +.macro pixman_composite_over_8888_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_0565_init, \ + pixman_composite_over_8888_n_0565_cleanup, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0565_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_0565_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 16, 16 +.endm + +generate_composite_function \ + pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_0565_process_pixblock_head, \ + pixman_composite_src_0565_0565_process_pixblock_tail, \ + pixman_composite_src_0565_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8_process_pixblock_tail_head + vst1.8 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #8 + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8_init, \ + pixman_composite_src_n_8_cleanup, \ + pixman_composite_src_n_8_process_pixblock_head, \ + pixman_composite_src_n_8_process_pixblock_tail, \ + pixman_composite_src_n_8_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_0565_process_pixblock_head +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_0565_process_pixblock_tail_head + vst1.16 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_0565_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #16 + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_0565_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 16, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_0565_init, \ + pixman_composite_src_n_0565_cleanup, \ + pixman_composite_src_n_0565_process_pixblock_head, \ + pixman_composite_src_n_0565_process_pixblock_tail, \ + pixman_composite_src_n_0565_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_n_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! +.endm + +.macro pixman_composite_src_n_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d0[0]}, [DUMMY] + vsli.u64 d0, d0, #32 + vorr d1, d0, d0 + vorr q1, q0, q0 +.endm + +.macro pixman_composite_src_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 0, /* prefetch distance */ \ + pixman_composite_src_n_8888_init, \ + pixman_composite_src_n_8888_cleanup, \ + pixman_composite_src_n_8888_process_pixblock_head, \ + pixman_composite_src_n_8888_process_pixblock_tail, \ + pixman_composite_src_n_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_8888_8888_process_pixblock_head +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_8888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_8888_process_pixblock_head, \ + pixman_composite_src_8888_8888_process_pixblock_tail, \ + pixman_composite_src_8888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_x888_8888_process_pixblock_head + vorr q0, q0, q2 + vorr q1, q1, q2 +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail +.endm + +.macro pixman_composite_src_x888_8888_process_pixblock_tail_head + vst1.32 {d0, d1, d2, d3}, [DST_W, :128]! + fetch_src_pixblock + vorr q0, q0, q2 + vorr q1, q1, q2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_x888_8888_init + vmov.u8 q2, #0xFF + vshl.u32 q2, q2, #24 +.endm + +generate_composite_function \ + pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_x888_8888_init, \ + default_cleanup, \ + pixman_composite_src_x888_8888_process_pixblock_head, \ + pixman_composite_src_x888_8888_process_pixblock_tail, \ + pixman_composite_src_x888_8888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d24, d9 + vmull.u8 q6, d24, d10 + vmull.u8 q7, d24, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 d24, d3 /* get inverted alpha */ + /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ + /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head + pixman_composite_over_n_8_8888_process_pixblock_tail + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 8, 8 + pixman_composite_over_n_8_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8888_init, \ + pixman_composite_over_n_8_8888_cleanup, \ + pixman_composite_over_n_8_8888_process_pixblock_head, \ + pixman_composite_over_n_8_8888_process_pixblock_tail, \ + pixman_composite_over_n_8_8888_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d8 + vmull.u8 q6, d26, d8 + vmull.u8 q7, d27, d8 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vmvn.8 q12, q0 + vmvn.8 q13, q1 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8_8_process_pixblock_tail_head + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_n_8_8_process_pixblock_tail + fetch_mask_pixblock + cache_preload 32, 32 + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + pixman_composite_over_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_over_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d8[0]}, [DUMMY] + vdup.8 d8, d8[3] +.endm + +.macro pixman_composite_over_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8_8_init, \ + pixman_composite_over_n_8_8_cleanup, \ + pixman_composite_over_n_8_8_process_pixblock_head, \ + pixman_composite_over_n_8_8_process_pixblock_tail, \ + pixman_composite_over_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head + /* + * 'combine_mask_ca' replacement + * + * input: solid src (n) in {d8, d9, d10, d11} + * dest in {d4, d5, d6, d7 } + * mask in {d24, d25, d26, d27} + * output: updated src in {d0, d1, d2, d3 } + * updated mask in {d24, d25, d26, d3 } + */ + vmull.u8 q0, d24, d8 + vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 + vmull.u8 q7, d27, d11 + vmull.u8 q9, d11, d25 + vmull.u8 q12, d11, d24 + vmull.u8 q13, d11, d26 + vrshr.u16 q8, q0, #8 + vrshr.u16 q10, q1, #8 + vrshr.u16 q11, q6, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q10 + vraddhn.u16 d2, q6, q11 + vrshr.u16 q11, q12, #8 + vrshr.u16 q8, q9, #8 + vrshr.u16 q6, q13, #8 + vrshr.u16 q10, q7, #8 + vraddhn.u16 d24, q12, q11 + vraddhn.u16 d25, q9, q8 + vraddhn.u16 d26, q13, q6 + vraddhn.u16 d3, q7, q10 + /* + * 'combine_over_ca' replacement + * + * output: updated dest in {d28, d29, d30, d31} + */ + vmvn.8 d24, d24 + vmvn.8 d25, d25 + vmull.u8 q8, d24, d4 + vmull.u8 q9, d25, d5 + vmvn.8 d26, d26 + vmvn.8 d27, d3 + vmull.u8 q10, d26, d6 + vmull.u8 q11, d27, d7 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail + /* ... continue 'combine_over_ca' replacement */ + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 + fetch_mask_pixblock + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 + cache_preload 8, 8 + pixman_composite_over_n_8888_8888_ca_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_n_8888_8888_ca_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d8, d11[0] + vdup.8 d9, d11[1] + vdup.8 d10, d11[2] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_over_n_8888_8888_ca_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_n_8888_8888_ca_init, \ + pixman_composite_over_n_8888_8888_ca_cleanup, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \ + pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_in_n_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* and destination data in {d4, d5, d6, d7} */ + vmull.u8 q8, d4, d3 + vmull.u8 q9, d5, d3 + vmull.u8 q10, d6, d3 + vmull.u8 q11, d7, d3 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q8, q14 + vraddhn.u16 d29, q9, q15 + vraddhn.u16 d30, q10, q12 + vraddhn.u16 d31, q11, q13 +.endm + +.macro pixman_composite_in_n_8_process_pixblock_tail_head + pixman_composite_in_n_8_process_pixblock_tail + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 32, 32 + pixman_composite_in_n_8_process_pixblock_head + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_in_n_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_in_n_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_in_n_8_asm_neon, 0, 0, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_in_n_8_init, \ + pixman_composite_in_n_8_cleanup, \ + pixman_composite_in_n_8_process_pixblock_head, \ + pixman_composite_in_n_8_process_pixblock_tail, \ + pixman_composite_in_n_8_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +.macro pixman_composite_add_n_8_8_process_pixblock_head + /* expecting source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ + /* and destination data in {d4, d5, d6, d7} */ + /* mask is in d24, d25, d26, d27 */ + vmull.u8 q0, d24, d11 + vmull.u8 q1, d25, d11 + vmull.u8 q6, d26, d11 + vmull.u8 q7, d27, d11 + vrshr.u16 q10, q0, #8 + vrshr.u16 q11, q1, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q13, q7, #8 + vraddhn.u16 d0, q0, q10 + vraddhn.u16 d1, q1, q11 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d3, q7, q13 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_n_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_n_8_8_process_pixblock_tail_head + pixman_composite_add_n_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + cache_preload 32, 32 + pixman_composite_add_n_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vpush {d8-d15} + vld1.32 {d11[0]}, [DUMMY] + vdup.8 d11, d11[3] +.endm + +.macro pixman_composite_add_n_8_8_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8_init, \ + pixman_composite_add_n_8_8_cleanup, \ + pixman_composite_add_n_8_8_process_pixblock_head, \ + pixman_composite_add_n_8_8_process_pixblock_tail, \ + pixman_composite_add_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8_8_8_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d25, d1 + vmull.u8 q10, d26, d2 + vmull.u8 q11, d27, d3 + vrshr.u16 q0, q8, #8 + vrshr.u16 q1, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d0, q0, q8 + vraddhn.u16 d1, q1, q9 + vraddhn.u16 d2, q12, q10 + vraddhn.u16 d3, q13, q11 + vqadd.u8 q14, q0, q2 + vqadd.u8 q15, q1, q3 +.endm + +.macro pixman_composite_add_8_8_8_process_pixblock_tail +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_8_8_8_process_pixblock_tail_head + pixman_composite_add_8_8_8_process_pixblock_tail + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vld1.8 {d4, d5, d6, d7}, [DST_R, :128]! + fetch_mask_pixblock + fetch_src_pixblock + cache_preload 32, 32 + pixman_composite_add_8_8_8_process_pixblock_head +.endm + +.macro pixman_composite_add_8_8_8_init +.endm + +.macro pixman_composite_add_8_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \ + FLAG_DST_READWRITE, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8_8_8_init, \ + pixman_composite_add_8_8_8_cleanup, \ + pixman_composite_add_8_8_8_process_pixblock_head, \ + pixman_composite_add_8_8_8_process_pixblock_tail, \ + pixman_composite_add_8_8_8_process_pixblock_tail_head + +/******************************************************************************/ + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* mask in {d24, d25, d26, d27} */ + vmull.u8 q8, d27, d0 + vmull.u8 q9, d27, d1 + vmull.u8 q10, d27, d2 + vmull.u8 q11, d27, d3 + /* 1 cycle bubble */ + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail + /* 2 cycle bubble */ + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 + vqadd.u8 q14, q2, q14 + /* 1 cycle bubble */ + vqadd.u8 q15, q3, q15 +.endm + +.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + fetch_src_pixblock + vrshrn.u16 d28, q8, #8 + fetch_mask_pixblock + vrshrn.u16 d29, q9, #8 + vmull.u8 q8, d27, d0 + vrshrn.u16 d30, q10, #8 + vmull.u8 q9, d27, d1 + vrshrn.u16 d31, q11, #8 + vmull.u8 q10, d27, d2 + vqadd.u8 q14, q2, q14 + vmull.u8 q11, d27, d3 + vqadd.u8 q15, q3, q15 + vrsra.u16 q8, q8, #8 + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrsra.u16 q9, q9, #8 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q10, q10, #8 + + cache_preload 8, 8 + + vrsra.u16 q11, q11, #8 +.endm + +generate_composite_function \ + pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +generate_composite_function_single_scanline \ + pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head + +/******************************************************************************/ + +generate_composite_function \ + pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_add_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_n_8_8888_init, \ + pixman_composite_add_n_8_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_8888_n_8888_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vld1.32 {d27[0]}, [DUMMY] + vdup.8 d27, d27[3] +.endm + +.macro pixman_composite_add_8888_n_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_add_8888_n_8888_init, \ + pixman_composite_add_8888_n_8888_cleanup, \ + pixman_composite_add_8888_8888_8888_process_pixblock_head, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail, \ + pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 27 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + /* expecting source data in {d0, d1, d2, d3} */ + /* destination data in {d4, d5, d6, d7} */ + /* solid mask is in d15 */ + + /* 'in' */ + vmull.u8 q8, d15, d3 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q13, q8, #8 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d3, q8, q13 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 + vmvn.8 d24, d3 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function_single_scanline \ + pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_8888_n_8888_process_pixblock_head + pixman_composite_out_reverse_8888_n_8888_process_pixblock_head +.endm + +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail + pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +.macro pixman_composite_over_8888_n_8888_init + add DUMMY, sp, #48 + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_8888_n_8888_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_8888_n_8888_init, \ + pixman_composite_over_8888_n_8888_cleanup, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail_head + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +generate_composite_function_single_scanline \ + pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 12 /* mask_basereg */ + +/******************************************************************************/ + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + pixman_composite_over_8888_n_8888_process_pixblock_tail + fetch_src_pixblock + cache_preload 8, 8 + fetch_mask_pixblock + pixman_composite_over_8888_n_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_n_8888_process_pixblock_head, \ + pixman_composite_over_8888_n_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8_8888_process_pixblock_tail_head \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0888_process_pixblock_head +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_0888_process_pixblock_tail_head + vst3.8 {d0, d1, d2}, [DST_W]! + fetch_src_pixblock + cache_preload 8, 8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0888_process_pixblock_head, \ + pixman_composite_src_0888_0888_process_pixblock_tail, \ + pixman_composite_src_0888_0888_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_head + vswp d0, d2 +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail +.endm + +.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head + vst4.8 {d0, d1, d2, d3}, [DST_W]! + fetch_src_pixblock + vswp d0, d2 + cache_preload 8, 8 +.endm + +.macro pixman_composite_src_0888_8888_rev_init + veor d3, d3, d3 +.endm + +generate_composite_function \ + pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + pixman_composite_src_0888_8888_rev_init, \ + default_cleanup, \ + pixman_composite_src_0888_8888_rev_process_pixblock_head, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail, \ + pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \ + 0, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q9, d2, #8 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail + vshll.u8 q14, d0, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 +.endm + +.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head + vshll.u8 q14, d0, #8 + fetch_src_pixblock + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + vshll.u8 q8, d1, #8 + vst1.16 {d28, d29}, [DST_W, :128]! + vshll.u8 q9, d2, #8 +.endm + +generate_composite_function \ + pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \ + FLAG_DST_WRITEONLY, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0888_0565_rev_process_pixblock_head, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail, \ + pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d30, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 +.endm + +.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d30, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d28, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_pixbuf_8888_process_pixblock_head, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + vraddhn.u16 d28, q11, q8 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 +.endm + +.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head + vrshr.u16 q11, q8, #8 + vswp d3, d31 + vrshr.u16 q12, q9, #8 + vrshr.u16 q13, q10, #8 + fetch_src_pixblock + vraddhn.u16 d28, q11, q8 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0xF + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d29, q12, q9 + vraddhn.u16 d30, q13, q10 + vmull.u8 q8, d3, d0 + vmull.u8 q9, d3, d1 + vmull.u8 q10, d3, d2 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + PF cmp PF_X, ORIG_W + PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift] + PF subge PF_X, PF_X, ORIG_W + PF subges PF_CTL, PF_CTL, #0x10 + PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]! +.endm + +generate_composite_function \ + pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 10, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_head, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \ + pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 0, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmvn.8 d7, d15 + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vmull.u8 q8, d7, d4 + vmull.u8 q9, d7, d5 + vmull.u8 q13, d7, d6 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q13, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q13 + vqadd.u8 q0, q0, q14 + vqadd.u8 q1, q1, q15 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_over_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_over_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_over_0565_n_0565_init + add DUMMY, sp, #(ARGS_STACK_OFFSET + 8) + vpush {d8-d15} + vld1.32 {d15[0]}, [DUMMY] + vdup.8 d15, d15[3] +.endm + +.macro pixman_composite_over_0565_n_0565_cleanup + vpop {d8-d15} +.endm + +generate_composite_function \ + pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_over_0565_n_0565_init, \ + pixman_composite_over_0565_n_0565_cleanup, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_add_0565_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q4, d2, d1, d0 + convert_0565_to_x888 q5, d6, d5, d4 + /* source pixel data is in {d0, d1, d2, XX} */ + /* destination pixel data is in {d4, d5, d6, XX} */ + vmull.u8 q6, d15, d2 + vmull.u8 q5, d15, d1 + vmull.u8 q4, d15, d0 + vrshr.u16 q12, q6, #8 + vrshr.u16 q11, q5, #8 + vrshr.u16 q10, q4, #8 + vraddhn.u16 d2, q6, q12 + vraddhn.u16 d1, q5, q11 + vraddhn.u16 d0, q4, q10 +.endm + +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail + vqadd.u8 q0, q0, q2 + vqadd.u8 q1, q1, q3 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head + fetch_mask_pixblock + pixman_composite_add_0565_8_0565_process_pixblock_tail + fetch_src_pixblock + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_add_0565_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_add_0565_8_0565_process_pixblock_head, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail, \ + pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ + +/******************************************************************************/ + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_head + /* mask is in d15 */ + convert_0565_to_x888 q5, d6, d5, d4 + /* destination pixel data is in {d4, d5, d6, xx} */ + vmvn.8 d24, d15 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 +.endm + +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vraddhn.u16 d0, q14, q8 + vraddhn.u16 d1, q15, q9 + vraddhn.u16 d2, q12, q10 + /* 32bpp result is in {d0, d1, d2, XX} */ + convert_8888_to_0565 d2, d1, d0, q14, q15, q3 +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_0565_process_pixblock_tail + vld1.16 {d10, d11}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_0565_process_pixblock_head + vst1.16 {d28, d29}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_out_reverse_8_0565_process_pixblock_head, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ + pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 15, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_8888_process_pixblock_head, \ + pixman_composite_over_8888_8888_process_pixblock_tail, \ + pixman_composite_over_8888_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_over_8888_0565_process_pixblock_head, \ + pixman_composite_over_8888_0565_process_pixblock_tail, \ + pixman_composite_over_8888_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_8888_0565_process_pixblock_head, \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init, \ + default_cleanup, \ + pixman_composite_src_0565_8888_process_pixblock_head, \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_8888_8_0565_process_pixblock_head, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail, \ + pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 24 /* mask_basereg */ + +generate_composite_function_nearest_scanline \ + pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \ + FLAG_DST_READWRITE, \ + 8, /* number of pixels, processed in a single block */ \ + default_init_need_all_regs, \ + default_cleanup_need_all_regs, \ + pixman_composite_over_0565_8_0565_process_pixblock_head, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail, \ + pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 10, /* dst_r_basereg */ \ + 8, /* src_basereg */ \ + 15 /* mask_basereg */ diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c index 7d6c83775..3e0c0d1c2 100644 --- a/pixman/pixman/pixman-arm-neon.c +++ b/pixman/pixman/pixman-arm-neon.c @@ -122,6 +122,11 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC, PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC, uint16_t, uint32_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565, + OVER, uint32_t, uint16_t) +PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565, + OVER, uint16_t, uint16_t) + void pixman_composite_src_n_8_asm_neon (int32_t w, int32_t h, @@ -332,6 +337,12 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888), SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565), + + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565), + PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c index 4cb8321aa..92f030871 100644 --- a/pixman/pixman/pixman-fast-path.c +++ b/pixman/pixman/pixman-fast-path.c @@ -1,2227 +1,2228 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-static force_inline uint32_t
-fetch_24 (uint8_t *a)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- return (*a << 16) | (*(uint16_t *)(a + 1));
-#else
- return *a | (*(uint16_t *)(a + 1) << 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- return (*(uint16_t *)a << 8) | *(a + 2);
-#else
- return *(uint16_t *)a | (*(a + 2) << 16);
-#endif
- }
-}
-
-static force_inline void
-store_24 (uint8_t *a,
- uint32_t v)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- *a = (uint8_t) (v >> 16);
- *(uint16_t *)(a + 1) = (uint16_t) (v);
-#else
- *a = (uint8_t) (v);
- *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- *(uint16_t *)a = (uint16_t)(v >> 8);
- *(a + 2) = (uint8_t)v;
-#else
- *(uint16_t *)a = (uint16_t)v;
- *(a + 2) = (uint8_t)(v >> 16);
-#endif
- }
-}
-
-static force_inline uint32_t
-over (uint32_t src,
- uint32_t dest)
-{
- uint32_t a = ~src >> 24;
-
- UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
-
- return dest;
-}
-
-static uint32_t
-in (uint32_t x,
- uint8_t y)
-{
- uint16_t a = y;
-
- UN8x4_MUL_UN8 (x, a);
-
- return x;
-}
-
-/*
- * Naming convention:
- *
- * op_src_mask_dest
- */
-static void
-fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line;
- uint32_t *dst, *dst_line;
- uint8_t *mask, *mask_line;
- int src_stride, mask_stride, dst_stride;
- uint8_t m;
- uint32_t s, d;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
- while (w--)
- {
- m = *mask++;
- if (m)
- {
- s = *src | 0xff000000;
-
- if (m == 0xff)
- {
- *dst = s;
- }
- else
- {
- d = in (s, m);
- *dst = over (d, *dst);
- }
- }
- src++;
- dst++;
- }
- }
-}
-
-static void
-fast_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
- uint16_t t;
-
- src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
- srca = src >> 24;
-
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- m = MUL_UN8 (m, srca, t);
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
-
- if (s == 0)
- *dst = 0;
- else if (s != 0xff)
- *dst = MUL_UN8 (s, *dst, t);
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (m)
- {
- d = in (src, m);
- *dst = over (d, *dst);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
-
- if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
-
- *dst = s;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = d;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = fetch_24 (dst);
- d = over (src, d);
- }
- store_24 (dst, d);
- }
- else if (m)
- {
- d = over (in (src, m), fetch_24 (dst));
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- else if (m)
- {
- d = *dst;
- d = over (in (src, m), CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint16_t src16;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- src16 = CONVERT_8888_TO_0565 (src);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- {
- *dst = src16;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- }
- else if (ma)
- {
- d = *dst;
- d = CONVERT_0565_TO_0888 (d);
-
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- uint8_t a;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a == 0xff)
- *dst = s;
- else if (s)
- *dst = over (s, *dst);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- *dst++ = (*src++) | 0xff000000;
- }
-}
-
-#if 0
-static void
-fast_composite_over_8888_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a)
- {
- if (a == 0xff)
- d = s;
- else
- d = over (s, fetch_24 (dst));
-
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-#endif
-
-static void
-fast_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (s)
- {
- if (a == 0xff)
- {
- d = s;
- }
- else
- {
- d = *dst;
- d = over (s, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = CONVERT_8888_TO_0565 (s);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s, d;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xff)
- {
- d = *dst;
- t = d + s;
- s = t | (0 - (t >> 8));
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t s, d;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xffffffff)
- {
- d = *dst;
- if (d)
- UN8x4_ADD_UN8x4 (s, d);
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t src;
- uint8_t sa;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- sa = (src >> 24);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- uint16_t tmp;
- uint16_t a;
- uint32_t m, d;
- uint32_t r;
-
- a = *mask++;
- d = *dst;
-
- m = MUL_UN8 (sa, a, tmp);
- r = ADD_UN8 (m, d, tmp);
-
- *dst++ = r;
- }
- }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define CREATE_BITMASK(n) (0x80000000 >> (n))
-#define UPDATE_BITMASK(n) ((n) >> 1)
-#else
-#define CREATE_BITMASK(n) (1 << (n))
-#define UPDATE_BITMASK(n) ((n) << 1)
-#endif
-
-#define TEST_BIT(p, n) \
- (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
-#define SET_BIT(p, n) \
- do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
-
-static void
-fast_composite_add_1000_1000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
- src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
- dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- /*
- * TODO: improve performance by processing uint32_t data instead
- * of individual bits
- */
- if (TEST_BIT (src, src_x + w))
- SET_BIT (dst, dest_x + w);
- }
- }
-}
-
-static void
-fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = over (src, *dst);
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
- uint32_t d;
- uint16_t src565;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- src565 = CONVERT_8888_TO_0565 (src);
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src565;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- {
- d = over (src, CONVERT_0565_TO_0888 (*dst));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-/*
- * Simple bitblt
- */
-
-static void
-fast_composite_solid_fill (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (dst_image->bits.format == PIXMAN_a1)
- {
- src = src >> 31;
- }
- else if (dst_image->bits.format == PIXMAN_a8)
- {
- src = src >> 24;
- }
- else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
- dst_image->bits.format == PIXMAN_b5g6r5)
- {
- src = CONVERT_8888_TO_0565 (src);
- }
-
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- dest_x, dest_y,
- width, height,
- src);
-}
-
-static void
-fast_composite_src_memcpy (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
- uint32_t n_bytes = width * bpp;
- int dst_stride, src_stride;
- uint8_t *dst;
- uint8_t *src;
-
- src_stride = src_image->bits.rowstride * 4;
- dst_stride = dst_image->bits.rowstride * 4;
-
- src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
- dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
-
- while (height--)
- {
- memcpy (dst, src, n_bytes);
-
- dst += dst_stride;
- src += src_stride;
- }
-}
-
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-
-/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
-static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
- uint16_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
-{
- uint16_t tmp1, tmp2, tmp3, tmp4;
- while ((w -= 4) >= 0)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp3 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp4 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- *dst++ = tmp3;
- *dst++ = tmp4;
- }
- if (w & 2)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- }
- if (w & 1)
- *dst++ = src[pixman_fixed_to_int (vx)];
-}
-
-FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, COVER)
-FAST_NEAREST_MAINLOOP (565_565_none_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, NONE)
-FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, PAD)
-
-static force_inline uint32_t
-fetch_nearest (pixman_repeat_t src_repeat,
- pixman_format_code_t format,
- uint32_t *src, int x, int src_width)
-{
- if (repeat (src_repeat, &x, src_width))
- {
- if (format == PIXMAN_x8r8g8b8)
- return *(src + x) | 0xff000000;
- else
- return *(src + x);
- }
- else
- {
- return 0;
- }
-}
-
-static force_inline void
-combine_over (uint32_t s, uint32_t *dst)
-{
- if (s)
- {
- uint8_t ia = 0xff - (s >> 24);
-
- if (ia)
- UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
- else
- *dst = s;
- }
-}
-
-static force_inline void
-combine_src (uint32_t s, uint32_t *dst)
-{
- *dst = s;
-}
-
-static void
-fast_composite_scaled_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line;
- uint32_t *src_line;
- int dst_stride, src_stride;
- int src_width, src_height;
- pixman_repeat_t src_repeat;
- pixman_fixed_t unit_x, unit_y;
- pixman_format_code_t src_format;
- pixman_vector_t v;
- pixman_fixed_t vy;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space
- */
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (src_image->common.transform, &v))
- return;
-
- unit_x = src_image->common.transform->matrix[0][0];
- unit_y = src_image->common.transform->matrix[1][1];
-
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
- v.vector[0] -= pixman_fixed_e;
- v.vector[1] -= pixman_fixed_e;
-
- src_height = src_image->bits.height;
- src_width = src_image->bits.width;
- src_repeat = src_image->common.repeat;
- src_format = src_image->bits.format;
-
- vy = v.vector[1];
- while (height--)
- {
- pixman_fixed_t vx = v.vector[0];
- int y = pixman_fixed_to_int (vy);
- uint32_t *dst = dst_line;
-
- dst_line += dst_stride;
-
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point to source space */
- vy += unit_y;
-
- if (!repeat (src_repeat, &y, src_height))
- {
- if (op == PIXMAN_OP_SRC)
- memset (dst, 0, sizeof (*dst) * width);
- }
- else
- {
- int w = width;
-
- uint32_t *src = src_line + y * src_stride;
-
- while (w >= 2)
- {
- uint32_t s1, s2;
- int x1, x2;
-
- x1 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- x2 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- w -= 2;
-
- s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
- s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
-
- if (op == PIXMAN_OP_OVER)
- {
- combine_over (s1, dst++);
- combine_over (s2, dst++);
- }
- else
- {
- combine_src (s1, dst++);
- combine_src (s2, dst++);
- }
- }
-
- while (w--)
- {
- uint32_t s;
- int x;
-
- x = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- s = fetch_nearest (src_repeat, src_format, src, x, src_width);
-
- if (op == PIXMAN_OP_OVER)
- combine_over (s, dst++);
- else
- combine_src (s, dst++);
- }
- }
- }
-}
-
-#define CACHE_LINE_SIZE 64
-
-#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
- \
-static void \
-blt_rotated_90_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + (h - y - 1); \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_270_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + src_stride * (w - 1) + y; \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s -= src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_90_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src, \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- src += leading_pixels * src_stride; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * x, \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src + W * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-blt_rotated_270_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src + src_stride * (W - leading_pixels), \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- src += trailing_pixels * src_stride; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * (W - x - TILE_SIZE), \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src - trailing_pixels * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = -src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
- src_y_t = src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-} \
- \
-static void \
-fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- src_y_t = -src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-}
-
-FAST_SIMPLE_ROTATE (8, uint8_t)
-FAST_SIMPLE_ROTATE (565, uint16_t)
-FAST_SIMPLE_ROTATE (8888, uint32_t)
-
-static const pixman_fast_path_t c_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
- PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
-
-#define NEAREST_FAST_PATH(op,s,d) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest, \
- }
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
-
-#define SIMPLE_ROTATE_FLAGS(angle) \
- (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_SAMPLES_COVER_CLIP | \
- FAST_PATH_STANDARD_FLAGS)
-
-#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_90_##suffix, \
- }, \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_270_##suffix, \
- }
-
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
-
- { PIXMAN_OP_NONE },
-};
-
-#ifdef WORDS_BIGENDIAN
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
-#else
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
-#endif
-
-static force_inline void
-pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
-{
- if (offs)
- {
- int leading_pixels = 32 - offs;
- if (leading_pixels >= width)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, offs);
- else
- *dst &= ~A1_FILL_MASK (width, offs);
- return;
- }
- else
- {
- if (v)
- *dst++ |= A1_FILL_MASK (leading_pixels, offs);
- else
- *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
- width -= leading_pixels;
- }
- }
- while (width >= 32)
- {
- if (v)
- *dst++ = 0xFFFFFFFF;
- else
- *dst++ = 0;
- width -= 32;
- }
- if (width > 0)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, 0);
- else
- *dst &= ~A1_FILL_MASK (width, 0);
- }
-}
-
-static void
-pixman_fill1 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- uint32_t *dst = bits + y * stride + (x >> 5);
- int offs = x & 31;
-
- if (xor & 1)
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 1);
- dst += stride;
- }
- }
- else
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 0);
- dst += stride;
- }
- }
-}
-
-static void
-pixman_fill8 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int byte_stride = stride * (int) sizeof (uint32_t);
- uint8_t *dst = (uint8_t *) bits;
- uint8_t v = xor & 0xff;
- int i;
-
- dst = dst + y * byte_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += byte_stride;
- }
-}
-
-static void
-pixman_fill16 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int short_stride =
- (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
- uint16_t *dst = (uint16_t *)bits;
- uint16_t v = xor & 0xffff;
- int i;
-
- dst = dst + y * short_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += short_stride;
- }
-}
-
-static void
-pixman_fill32 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int i;
-
- bits = bits + y * stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- bits[i] = xor;
-
- bits += stride;
- }
-}
-
-static pixman_bool_t
-fast_path_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- switch (bpp)
- {
- case 1:
- pixman_fill1 (bits, stride, x, y, width, height, xor);
- break;
-
- case 8:
- pixman_fill8 (bits, stride, x, y, width, height, xor);
- break;
-
- case 16:
- pixman_fill16 (bits, stride, x, y, width, height, xor);
- break;
-
- case 32:
- pixman_fill32 (bits, stride, x, y, width, height, xor);
- break;
-
- default:
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
- break;
- }
-
- return TRUE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
-
- imp->fill = fast_path_fill;
-
- return imp;
-}
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif +#include <string.h> +#include <stdlib.h> +#include "pixman-private.h" +#include "pixman-combine32.h" +#include "pixman-fast-path.h" + +static force_inline uint32_t +fetch_24 (uint8_t *a) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + return (*a << 16) | (*(uint16_t *)(a + 1)); +#else + return *a | (*(uint16_t *)(a + 1) << 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + return (*(uint16_t *)a << 8) | *(a + 2); +#else + return *(uint16_t *)a | (*(a + 2) << 16); +#endif + } +} + +static force_inline void +store_24 (uint8_t *a, + uint32_t v) +{ + if (((unsigned long)a) & 1) + { +#ifdef WORDS_BIGENDIAN + *a = (uint8_t) (v >> 16); + *(uint16_t *)(a + 1) = (uint16_t) (v); +#else + *a = (uint8_t) (v); + *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); +#endif + } + else + { +#ifdef WORDS_BIGENDIAN + *(uint16_t *)a = (uint16_t)(v >> 8); + *(a + 2) = (uint8_t)v; +#else + *(uint16_t *)a = (uint16_t)v; + *(a + 2) = (uint8_t)(v >> 16); +#endif + } +} + +static force_inline uint32_t +over (uint32_t src, + uint32_t dest) +{ + uint32_t a = ~src >> 24; + + UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); + + return dest; +} + +static uint32_t +in (uint32_t x, + uint8_t y) +{ + uint16_t a = y; + + UN8x4_MUL_UN8 (x, a); + + return x; +} + +/* + * Naming convention: + * + * op_src_mask_dest + */ +static void +fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *src, *src_line; + uint32_t *dst, *dst_line; + uint8_t *mask, *mask_line; + int src_stride, mask_stride, dst_stride; + uint8_t m; + uint32_t s, d; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + src = src_line; + src_line += src_stride; + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + + w = width; + while (w--) + { + m = *mask++; + if (m) + { + s = *src | 0xff000000; + + if (m == 0xff) + { + *dst = s; + } + else + { + d = in (s, m); + *dst = over (d, *dst); + } + } + src++; + dst++; + } + } +} + +static void +fast_composite_in_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + uint16_t t; + + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); + + srca = src >> 24; + + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + m = MUL_UN8 (m, srca, t); + + if (m == 0) + *dst = 0; + else if (m != 0xff) + *dst = MUL_UN8 (m, *dst, t); + + dst++; + } + } + } +} + +static void +fast_composite_in_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dest_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + + if (s == 0) + *dst = 0; + else if (s != 0xff) + *dst = MUL_UN8 (s, *dst, t); + + dst++; + } + } +} + +static void +fast_composite_over_n_8_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst_line, *dst, d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (m) + { + d = in (src, m); + *dst = over (d, *dst); + } + dst++; + } + } +} + +static void +fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + + if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); + + *dst = s; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint32_t *dst_line, *dst, d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + *dst = src; + else + *dst = over (src, *dst); + } + else if (ma) + { + d = *dst; + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = d; + } + + dst++; + } + } +} + +static void +fast_composite_over_n_8_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint8_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = fetch_24 (dst); + d = over (src, d); + } + store_24 (dst, d); + } + else if (m) + { + d = over (in (src, m), fetch_24 (dst)); + store_24 (dst, d); + } + dst += 3; + } + } +} + +static void +fast_composite_over_n_8_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst_line, *dst; + uint32_t d; + uint8_t *mask_line, *mask, m; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + m = *mask++; + if (m == 0xff) + { + if (srca == 0xff) + { + d = src; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + else if (m) + { + d = *dst; + d = over (in (src, m), CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca, s; + uint16_t src16; + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *mask_line, *mask, ma; + int dst_stride, mask_stride; + int32_t w; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + srca = src >> 24; + if (src == 0) + return; + + src16 = CONVERT_8888_TO_0565 (src); + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + ma = *mask++; + if (ma == 0xffffffff) + { + if (srca == 0xff) + { + *dst = src16; + } + else + { + d = *dst; + d = over (src, CONVERT_0565_TO_0888 (d)); + *dst = CONVERT_8888_TO_0565 (d); + } + } + else if (ma) + { + d = *dst; + d = CONVERT_0565_TO_0888 (d); + + s = src; + + UN8x4_MUL_UN8x4 (s, ma); + UN8x4_MUL_UN8 (ma, srca); + ma = ~ma; + UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); + + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_over_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + uint8_t a; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a == 0xff) + *dst = s; + else if (s) + *dst = over (s, *dst); + dst++; + } + } +} + +static void +fast_composite_src_x888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + *dst++ = (*src++) | 0xff000000; + } +} + +#if 0 +static void +fast_composite_over_8888_0888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (a) + { + if (a == 0xff) + d = s; + else + d = over (s, fetch_24 (dst)); + + store_24 (dst, d); + } + dst += 3; + } + } +} +#endif + +static void +fast_composite_over_8888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t d; + uint32_t *src_line, *src, s; + uint8_t a; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + a = s >> 24; + if (s) + { + if (a == 0xff) + { + d = s; + } + else + { + d = *dst; + d = over (s, CONVERT_0565_TO_0888 (d)); + } + *dst = CONVERT_8888_TO_0565 (d); + } + dst++; + } + } +} + +static void +fast_composite_src_x888_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint16_t *dst_line, *dst; + uint32_t *src_line, *src, s; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + *dst = CONVERT_8888_TO_0565 (s); + dst++; + } + } +} + +static void +fast_composite_add_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint8_t s, d; + uint16_t t; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xff) + { + d = *dst; + t = d + s; + s = t | (0 - (t >> 8)); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_8888_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + uint32_t s, d; + + PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + s = *src++; + if (s) + { + if (s != 0xffffffff) + { + d = *dst; + if (d) + UN8x4_ADD_UN8x4 (s, d); + } + *dst = s; + } + dst++; + } + } +} + +static void +fast_composite_add_n_8_8 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint8_t *dst_line, *dst; + uint8_t *mask_line, *mask; + int dst_stride, mask_stride; + int32_t w; + uint32_t src; + uint8_t sa; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + sa = (src >> 24); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + while (w--) + { + uint16_t tmp; + uint16_t a; + uint32_t m, d; + uint32_t r; + + a = *mask++; + d = *dst; + + m = MUL_UN8 (sa, a, tmp); + r = ADD_UN8 (m, d, tmp); + + *dst++ = r; + } + } +} + +#ifdef WORDS_BIGENDIAN +#define CREATE_BITMASK(n) (0x80000000 >> (n)) +#define UPDATE_BITMASK(n) ((n) >> 1) +#else +#define CREATE_BITMASK(n) (1 << (n)) +#define UPDATE_BITMASK(n) ((n) << 1) +#endif + +#define TEST_BIT(p, n) \ + (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) +#define SET_BIT(p, n) \ + do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); + +static void +fast_composite_add_1000_1000 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line, *dst; + uint32_t *src_line, *src; + int dst_stride, src_stride; + int32_t w; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, + src_stride, src_line, 1); + PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + dst_stride, dst_line, 1); + + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + src = src_line; + src_line += src_stride; + w = width; + + while (w--) + { + /* + * TODO: improve performance by processing uint32_t data instead + * of individual bits + */ + if (TEST_BIT (src, src_x + w)) + SET_BIT (dst, dest_x + w); + } + } +} + +static void +fast_composite_over_n_1_8888 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint32_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = over (src, *dst); + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +static void +fast_composite_over_n_1_0565 (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src, srca; + uint16_t *dst, *dst_line; + uint32_t *mask, *mask_line; + int mask_stride, dst_stride; + uint32_t bitcache, bitmask; + int32_t w; + uint32_t d; + uint16_t src565; + + if (width <= 0) + return; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + srca = src >> 24; + if (src == 0) + return; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, + dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, + mask_stride, mask_line, 1); + mask_line += mask_x >> 5; + + if (srca == 0xff) + { + src565 = CONVERT_8888_TO_0565 (src); + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + *dst = src565; + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } + else + { + while (height--) + { + dst = dst_line; + dst_line += dst_stride; + mask = mask_line; + mask_line += mask_stride; + w = width; + + bitcache = *mask++; + bitmask = CREATE_BITMASK (mask_x & 31); + + while (w--) + { + if (bitmask == 0) + { + bitcache = *mask++; + bitmask = CREATE_BITMASK (0); + } + if (bitcache & bitmask) + { + d = over (src, CONVERT_0565_TO_0888 (*dst)); + *dst = CONVERT_8888_TO_0565 (d); + } + bitmask = UPDATE_BITMASK (bitmask); + dst++; + } + } + } +} + +/* + * Simple bitblt + */ + +static void +fast_composite_solid_fill (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t src; + + src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + + if (dst_image->bits.format == PIXMAN_a1) + { + src = src >> 31; + } + else if (dst_image->bits.format == PIXMAN_a8) + { + src = src >> 24; + } + else if (dst_image->bits.format == PIXMAN_r5g6b5 || + dst_image->bits.format == PIXMAN_b5g6r5) + { + src = CONVERT_8888_TO_0565 (src); + } + + pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dst_image->bits.format), + dest_x, dest_y, + width, height, + src); +} + +static void +fast_composite_src_memcpy (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; + uint32_t n_bytes = width * bpp; + int dst_stride, src_stride; + uint8_t *dst; + uint8_t *src; + + src_stride = src_image->bits.rowstride * 4; + dst_stride = dst_image->bits.rowstride * 4; + + src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; + dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + + while (height--) + { + memcpy (dst, src, n_bytes); + + dst += dst_stride; + src += src_stride; + } +} + +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) +FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) +FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) +FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) +FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) +FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) +FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) +FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) + +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + const uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) +{ + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst++ = src[pixman_fixed_to_int (vx)]; +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER) +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE) +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD) + +static force_inline uint32_t +fetch_nearest (pixman_repeat_t src_repeat, + pixman_format_code_t format, + uint32_t *src, int x, int src_width) +{ + if (repeat (src_repeat, &x, src_width)) + { + if (format == PIXMAN_x8r8g8b8) + return *(src + x) | 0xff000000; + else + return *(src + x); + } + else + { + return 0; + } +} + +static force_inline void +combine_over (uint32_t s, uint32_t *dst) +{ + if (s) + { + uint8_t ia = 0xff - (s >> 24); + + if (ia) + UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); + else + *dst = s; + } +} + +static force_inline void +combine_src (uint32_t s, uint32_t *dst) +{ + *dst = s; +} + +static void +fast_composite_scaled_nearest (pixman_implementation_t *imp, + pixman_op_t op, + pixman_image_t * src_image, + pixman_image_t * mask_image, + pixman_image_t * dst_image, + int32_t src_x, + int32_t src_y, + int32_t mask_x, + int32_t mask_y, + int32_t dest_x, + int32_t dest_y, + int32_t width, + int32_t height) +{ + uint32_t *dst_line; + uint32_t *src_line; + int dst_stride, src_stride; + int src_width, src_height; + pixman_repeat_t src_repeat; + pixman_fixed_t unit_x, unit_y; + pixman_format_code_t src_format; + pixman_vector_t v; + pixman_fixed_t vy; + + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be + * transformed from destination space to source space + */ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); + + /* reference point is the center of the pixel */ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; + v.vector[2] = pixman_fixed_1; + + if (!pixman_transform_point_3d (src_image->common.transform, &v)) + return; + + unit_x = src_image->common.transform->matrix[0][0]; + unit_y = src_image->common.transform->matrix[1][1]; + + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ + v.vector[0] -= pixman_fixed_e; + v.vector[1] -= pixman_fixed_e; + + src_height = src_image->bits.height; + src_width = src_image->bits.width; + src_repeat = src_image->common.repeat; + src_format = src_image->bits.format; + + vy = v.vector[1]; + while (height--) + { + pixman_fixed_t vx = v.vector[0]; + int y = pixman_fixed_to_int (vy); + uint32_t *dst = dst_line; + + dst_line += dst_stride; + + /* adjust the y location by a unit vector in the y direction + * this is equivalent to transforming y+1 of the destination point to source space */ + vy += unit_y; + + if (!repeat (src_repeat, &y, src_height)) + { + if (op == PIXMAN_OP_SRC) + memset (dst, 0, sizeof (*dst) * width); + } + else + { + int w = width; + + uint32_t *src = src_line + y * src_stride; + + while (w >= 2) + { + uint32_t s1, s2; + int x1, x2; + + x1 = pixman_fixed_to_int (vx); + vx += unit_x; + + x2 = pixman_fixed_to_int (vx); + vx += unit_x; + + w -= 2; + + s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); + s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); + + if (op == PIXMAN_OP_OVER) + { + combine_over (s1, dst++); + combine_over (s2, dst++); + } + else + { + combine_src (s1, dst++); + combine_src (s2, dst++); + } + } + + while (w--) + { + uint32_t s; + int x; + + x = pixman_fixed_to_int (vx); + vx += unit_x; + + s = fetch_nearest (src_repeat, src_format, src, x, src_width); + + if (op == PIXMAN_OP_OVER) + combine_over (s, dst++); + else + combine_src (s, dst++); + } + } + } +} + +#define CACHE_LINE_SIZE 64 + +#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ + \ +static void \ +blt_rotated_90_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + (h - y - 1); \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s += src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_270_trivial_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int w, \ + int h) \ +{ \ + int x, y; \ + for (y = 0; y < h; y++) \ + { \ + const pix_type *s = src + src_stride * (w - 1) + y; \ + pix_type *d = dst + dst_stride * y; \ + for (x = 0; x < w; x++) \ + { \ + *d++ = *s; \ + s -= src_stride; \ + } \ + } \ +} \ + \ +static void \ +blt_rotated_90_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src, \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + src += leading_pixels * src_stride; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * x, \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_90_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src + W * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +blt_rotated_270_##suffix (pix_type *dst, \ + int dst_stride, \ + const pix_type *src, \ + int src_stride, \ + int W, \ + int H) \ +{ \ + int x; \ + int leading_pixels = 0, trailing_pixels = 0; \ + const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ + \ + /* \ + * split processing into handling destination as TILE_SIZExH cache line \ + * aligned vertical stripes (optimistically assuming that destination \ + * stride is a multiple of cache line, if not - it will be just a bit \ + * slower) \ + */ \ + \ + if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ + { \ + leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (leading_pixels > W) \ + leading_pixels = W; \ + \ + /* unaligned leading part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst, \ + dst_stride, \ + src + src_stride * (W - leading_pixels), \ + src_stride, \ + leading_pixels, \ + H); \ + \ + dst += leading_pixels; \ + W -= leading_pixels; \ + } \ + \ + if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ + { \ + trailing_pixels = (((uintptr_t)(dst + W) & \ + (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ + if (trailing_pixels > W) \ + trailing_pixels = W; \ + W -= trailing_pixels; \ + src += trailing_pixels * src_stride; \ + } \ + \ + for (x = 0; x < W; x += TILE_SIZE) \ + { \ + /* aligned middle part TILE_SIZExH */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + x, \ + dst_stride, \ + src + src_stride * (W - x - TILE_SIZE), \ + src_stride, \ + TILE_SIZE, \ + H); \ + } \ + \ + if (trailing_pixels) \ + { \ + /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ + blt_rotated_270_trivial_##suffix ( \ + dst + W, \ + dst_stride, \ + src - trailing_pixels * src_stride, \ + src_stride, \ + trailing_pixels, \ + H); \ + } \ +} \ + \ +static void \ +fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = -src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ + src_y_t = src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} \ + \ +static void \ +fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dest_x, \ + int32_t dest_y, \ + int32_t width, \ + int32_t height) \ +{ \ + pix_type *dst_line; \ + pix_type *src_line; \ + int dst_stride, src_stride; \ + int src_x_t, src_y_t; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + dst_stride, dst_line, 1); \ + src_x_t = src_y + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[0][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e); \ + src_y_t = -src_x + pixman_fixed_to_int ( \ + src_image->common.transform->matrix[1][2] + \ + pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ + PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ + src_stride, src_line, 1); \ + blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ + width, height); \ +} + +FAST_SIMPLE_ROTATE (8, uint8_t) +FAST_SIMPLE_ROTATE (565, uint16_t) +FAST_SIMPLE_ROTATE (8888, uint32_t) + +static const pixman_fast_path_t c_fast_paths[] = +{ + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), + PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), + PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), + PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), + PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), + PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565), + PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), + PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), + SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), + + SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), + + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), + +#define NEAREST_FAST_PATH(op,s,d) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest, \ + } + + NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), + + NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), + NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), + NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), + +#define SIMPLE_ROTATE_FLAGS(angle) \ + (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_SAMPLES_COVER_CLIP | \ + FAST_PATH_STANDARD_FLAGS) + +#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_90_##suffix, \ + }, \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_rotate_270_##suffix, \ + } + + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), + SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), + SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + + { PIXMAN_OP_NONE }, +}; + +#ifdef WORDS_BIGENDIAN +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) +#else +#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) +#endif + +static force_inline void +pixman_fill1_line (uint32_t *dst, int offs, int width, int v) +{ + if (offs) + { + int leading_pixels = 32 - offs; + if (leading_pixels >= width) + { + if (v) + *dst |= A1_FILL_MASK (width, offs); + else + *dst &= ~A1_FILL_MASK (width, offs); + return; + } + else + { + if (v) + *dst++ |= A1_FILL_MASK (leading_pixels, offs); + else + *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); + width -= leading_pixels; + } + } + while (width >= 32) + { + if (v) + *dst++ = 0xFFFFFFFF; + else + *dst++ = 0; + width -= 32; + } + if (width > 0) + { + if (v) + *dst |= A1_FILL_MASK (width, 0); + else + *dst &= ~A1_FILL_MASK (width, 0); + } +} + +static void +pixman_fill1 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + uint32_t *dst = bits + y * stride + (x >> 5); + int offs = x & 31; + + if (xor & 1) + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 1); + dst += stride; + } + } + else + { + while (height--) + { + pixman_fill1_line (dst, offs, width, 0); + dst += stride; + } + } +} + +static void +pixman_fill8 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int byte_stride = stride * (int) sizeof (uint32_t); + uint8_t *dst = (uint8_t *) bits; + uint8_t v = xor & 0xff; + int i; + + dst = dst + y * byte_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += byte_stride; + } +} + +static void +pixman_fill16 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int short_stride = + (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); + uint16_t *dst = (uint16_t *)bits; + uint16_t v = xor & 0xffff; + int i; + + dst = dst + y * short_stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + dst[i] = v; + + dst += short_stride; + } +} + +static void +pixman_fill32 (uint32_t *bits, + int stride, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + int i; + + bits = bits + y * stride + x; + + while (height--) + { + for (i = 0; i < width; ++i) + bits[i] = xor; + + bits += stride; + } +} + +static pixman_bool_t +fast_path_fill (pixman_implementation_t *imp, + uint32_t * bits, + int stride, + int bpp, + int x, + int y, + int width, + int height, + uint32_t xor) +{ + switch (bpp) + { + case 1: + pixman_fill1 (bits, stride, x, y, width, height, xor); + break; + + case 8: + pixman_fill8 (bits, stride, x, y, width, height, xor); + break; + + case 16: + pixman_fill16 (bits, stride, x, y, width, height, xor); + break; + + case 32: + pixman_fill32 (bits, stride, x, y, width, height, xor); + break; + + default: + return _pixman_implementation_fill ( + imp->delegate, bits, stride, bpp, x, y, width, height, xor); + break; + } + + return TRUE; +} + +pixman_implementation_t * +_pixman_implementation_create_fast_path (pixman_implementation_t *fallback) +{ + pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); + + imp->fill = fast_path_fill; + + return imp; +} diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h index bb7032d86..d08122293 100644 --- a/pixman/pixman/pixman-fast-path.h +++ b/pixman/pixman/pixman-fast-path.h @@ -1,449 +1,590 @@ -/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifndef PIXMAN_FAST_PATH_H__
-#define PIXMAN_FAST_PATH_H__
-
-#include "pixman-private.h"
-
-#define PIXMAN_REPEAT_COVER -1
-
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/*
- * For each scanline fetched from source image with PAD repeat:
- * - calculate how many pixels need to be padded on the left side
- * - calculate how many pixels need to be padded on the right side
- * - update width to only count pixels which are fetched from the image
- * All this information is returned via 'width', 'left_pad', 'right_pad'
- * arguments. The code is assuming that 'unit_x' is positive.
- *
- * Note: 64-bit math is used in order to avoid potential overflows, which
- * is probably excessive in many cases. This particular function
- * may need its own correctness test and performance tuning.
- */
-static force_inline void
-pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * width,
- int32_t * left_pad,
- int32_t * right_pad)
-{
- int64_t max_vx = (int64_t) source_image_width << 16;
- int64_t tmp;
- if (vx < 0)
- {
- tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
- if (tmp > *width)
- {
- *left_pad = *width;
- *width = 0;
- }
- else
- {
- *left_pad = (int32_t) tmp;
- *width -= (int32_t) tmp;
- }
- }
- else
- {
- *left_pad = 0;
- }
- tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
- if (tmp < 0)
- {
- *right_pad = *width;
- *width = 0;
- }
- else if (tmp >= *width)
- {
- *right_pad = 0;
- }
- else
- {
- *right_pad = *width - (int32_t) tmp;
- *width = (int32_t) tmp;
- }
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static force_inline void \
-scanline_func_name (dst_type_t *dst, \
- src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
-{ \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int x1, x2; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
-}
-
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
-static void \
-fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- src_type_t *src_first_line; \
- int y; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_fixed_t max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, right_pad; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- int src_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
- &width, &left_pad, &right_pad); \
- vx += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (dst, src, left_pad, 0, 0, 0); \
- } \
- if (width > 0) \
- { \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- static src_type_t zero[1] = { 0 }; \
- if (y < 0 || y >= src_image->bits.height) \
- { \
- scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0); \
- continue; \
- } \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (dst, zero, left_pad, 0, 0, 0); \
- } \
- if (width > 0) \
- { \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0); \
- } \
- } \
- else \
- { \
- src = src_first_line + src_stride * y; \
- scanline_func (dst, src, width, vx, unit_x, max_vx); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
- FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
- OP, repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \
- scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- src_type_t, dst_type_t, repeat_mode)
-
-
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
-#endif
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s1 = src[x1]; \ + \ + x2 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s2 = src[x2]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = vx >> 16; \ + s1 = src[x1]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_op_t op, \ + pixman_image_t * src_image, \ + pixman_image_t * mask_image, \ + pixman_image_t * dst_image, \ + int32_t src_x, \ + int32_t src_y, \ + int32_t mask_x, \ + int32_t mask_y, \ + int32_t dst_x, \ + int32_t dst_y, \ + int32_t width, \ + int32_t height) \ +{ \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* Clamp repeating positions inside the actual samples */ \ + max_vx = src_image->bits.width << 16; \ + max_vy = src_image->bits.height << 16; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = vy >> 16; \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width - 1, \ + right_pad, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +#endif diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c index 91adc0560..2e135e2fe 100644 --- a/pixman/pixman/pixman-sse2.c +++ b/pixman/pixman/pixman-sse2.c @@ -5800,7 +5800,8 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, - pixman_fixed_t max_vx) + pixman_fixed_t max_vx, + pixman_bool_t fully_transparent_src) { uint32_t s, d; const uint32_t* pm = NULL; @@ -5809,6 +5810,9 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, __m128i xmm_src_lo, xmm_src_hi; __m128i xmm_alpha_lo, xmm_alpha_hi; + if (fully_transparent_src) + return; + /* Align dst on a 16-byte boundary */ while (w && ((unsigned long)pd & 15)) { @@ -5894,6 +5898,119 @@ FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER, scaled_nearest_scanline_sse2_8888_8888_OVER, uint32_t, uint32_t, PAD) +static force_inline void +scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, + uint32_t * dst, + const uint32_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx, + pixman_bool_t zero_src) +{ + __m128i xmm_mask; + __m128i xmm_src, xmm_src_lo, xmm_src_hi; + __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; + __m128i xmm_alpha_lo, xmm_alpha_hi; + + if (zero_src || (*mask >> 24) == 0) + return; + + xmm_mask = create_mask_16_128 (*mask >> 24); + + while (w && (unsigned long)dst & 15) + { + uint32_t s = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + if (s) + { + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 dest = _mm_movepi64_pi64 (xmm_mask); + __m64 alpha_dst = unpack_32_1x64 (d); + + *dst = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &dest, &alpha_dst)); + } + dst++; + w--; + } + + while (w >= 4) + { + uint32_t tmp1, tmp2, tmp3, tmp4; + + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); + + if (!is_zero (xmm_src)) + { + xmm_dst = load_128_aligned ((__m128i*)dst); + + unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); + unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); + expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi); + + in_over_2x128 (&xmm_src_lo, &xmm_src_hi, + &xmm_alpha_lo, &xmm_alpha_hi, + &xmm_mask, &xmm_mask, + &xmm_dst_lo, &xmm_dst_hi); + + save_128_aligned ( + (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); + } + + dst += 4; + w -= 4; + } + + while (w) + { + uint32_t s = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + + if (s) + { + uint32_t d = *dst; + + __m64 ms = unpack_32_1x64 (s); + __m64 alpha = expand_alpha_1x64 (ms); + __m64 mask = _mm_movepi64_pi64 (xmm_mask); + __m64 dest = unpack_32_1x64 (d); + + *dst = pack_1x64_32 ( + in_over_1x64 (&ms, &alpha, &mask, &dest)); + } + + dst++; + w--; + } + + _mm_empty (); +} + +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE) +FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER, + scaled_nearest_scanline_sse2_8888_n_8888_OVER, + uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE) + static const pixman_fast_path_t sse2_fast_paths[] = { /* PIXMAN_OP_OVER */ @@ -5990,6 +6107,11 @@ static const pixman_fast_path_t sse2_fast_paths[] = SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888), SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888), + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888), + { PIXMAN_OP_NONE }, }; diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am index 8d8471d1c..3ce466eec 100644 --- a/pixman/test/Makefile.am +++ b/pixman/test/Makefile.am @@ -1,7 +1,6 @@ AM_CFLAGS = @OPENMP_CFLAGS@ -AM_LDFLAGS = @OPENMP_CFLAGS@ - -TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la -lm +AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@ +LDADD = $(top_builddir)/pixman/libpixman-1.la -lm INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman TESTPROGRAMS = \ @@ -22,121 +21,24 @@ TESTPROGRAMS = \ affine-test \ composite -a1_trap_test_LDADD = $(TEST_LDADD) -a1_trap_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -fetch_test_LDADD = $(TEST_LDADD) -fetch_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -trap_crasher_LDADD = $(TEST_LDADD) -trap_crasher_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -oob_test_LDADD = $(TEST_LDADD) -oob_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -scaling_crash_test_LDADD = $(TEST_LDADD) -scaling_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -region_translate_test_LDADD = $(TEST_LDADD) -region_translate_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ - -pdf_op_test_LDADD = $(TEST_LDADD) -pdf_op_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h - -region_test_LDADD = $(TEST_LDADD) -region_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ region_test_SOURCES = region-test.c utils.c utils.h - -blitters_test_LDADD = $(TEST_LDADD) -blitters_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ blitters_test_SOURCES = blitters-test.c utils.c utils.h - -scaling_test_LDADD = $(TEST_LDADD) -scaling_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ scaling_test_SOURCES = scaling-test.c utils.c utils.h - -affine_test_LDADD = $(TEST_LDADD) -affine_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ affine_test_SOURCES = affine-test.c utils.c utils.h - -alphamap_LDADD = $(TEST_LDADD) -alphamap_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ alphamap_SOURCES = alphamap.c utils.c utils.h - -alpha_loop_LDADD = $(TEST_LDADD) -alpha_loop_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h - -composite_LDADD = $(TEST_LDADD) -composite_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ composite_SOURCES = composite.c utils.c utils.h - -gradient_crash_test_LDADD = $(TEST_LDADD) -gradient_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h - -stress_test_LDADD = $(TEST_LDADD) -stress_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ stress_test_SOURCES = stress-test.c utils.c utils.h -# GTK using test programs - -if HAVE_GTK - -GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS) -GTK_UTILS = gtk-utils.c gtk-utils.h - -TESTPROGRAMS_GTK = \ - clip-test \ - clip-in \ - composite-test \ - gradient-test \ - radial-test \ - alpha-test \ - screen-test \ - convolution-test \ - trap-test - -INCLUDES += $(GTK_CFLAGS) - -gradient_test_LDADD = $(GTK_LDADD) -gradient_test_SOURCES = gradient-test.c $(GTK_UTILS) - -radial_test_LDADD = $(GTK_LDADD) -radial_test_SOURCES = radial-test.c utils.c utils.h $(GTK_UTILS) - -alpha_test_LDADD = $(GTK_LDADD) -alpha_test_SOURCES = alpha-test.c $(GTK_UTILS) - -composite_test_LDADD = $(GTK_LDADD) -composite_test_SOURCES = composite-test.c $(GTK_UTILS) - -clip_test_LDADD = $(GTK_LDADD) -clip_test_SOURCES = clip-test.c $(GTK_UTILS) - -clip_in_LDADD = $(GTK_LDADD) -clip_in_SOURCES = clip-in.c $(GTK_UTILS) - -trap_test_LDADD = $(GTK_LDADD) -trap_test_SOURCES = trap-test.c $(GTK_UTILS) - -screen_test_LDADD = $(GTK_LDADD) -screen_test_SOURCES = screen-test.c $(GTK_UTILS) - -convolution_test_LDADD = $(GTK_LDADD) -convolution_test_SOURCES = convolution-test.c $(GTK_UTILS) - -endif - # Benchmarks BENCHMARKS = \ lowlevel-blt-bench lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h -lowlevel_blt_bench_LDADD = $(TEST_LDADD) -noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK) $(BENCHMARKS) +noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS) TESTS = $(TESTPROGRAMS) diff --git a/pixman/test/scaling-test.c b/pixman/test/scaling-test.c index 7b78017a3..dbb9d39b0 100644 --- a/pixman/test/scaling-test.c +++ b/pixman/test/scaling-test.c @@ -1,250 +1,368 @@ -/*
- * Test program, which can detect some problems with nearest neighbour
- * and bilinear scaling in pixman. Testing is done by running lots
- * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8
- * and r5g6b5 color formats.
- *
- * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in
- * the case of test failure.
- */
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "utils.h"
-
-#define MAX_SRC_WIDTH 16
-#define MAX_SRC_HEIGHT 16
-#define MAX_DST_WIDTH 16
-#define MAX_DST_HEIGHT 16
-#define MAX_STRIDE 4
-
-/*
- * Composite operation with pseudorandom images
- */
-uint32_t
-test_composite (int testnum,
- int verbose)
-{
- int i;
- pixman_image_t * src_img;
- pixman_image_t * dst_img;
- pixman_transform_t transform;
- pixman_region16_t clip;
- int src_width, src_height;
- int dst_width, dst_height;
- int src_stride, dst_stride;
- int src_x, src_y;
- int dst_x, dst_y;
- int src_bpp;
- int dst_bpp;
- int w, h;
- pixman_fixed_t scale_x = 65536, scale_y = 65536;
- pixman_fixed_t translate_x = 0, translate_y = 0;
- pixman_op_t op;
- pixman_repeat_t repeat = PIXMAN_REPEAT_NONE;
- pixman_format_code_t src_fmt, dst_fmt;
- uint32_t * srcbuf;
- uint32_t * dstbuf;
- uint32_t crc32;
- FLOAT_REGS_CORRUPTION_DETECTOR_START ();
-
- lcg_srand (testnum);
-
- src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
- dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
- op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
-
- src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
- src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
- dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
- dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
- src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
- dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
-
- if (src_stride & 3)
- src_stride += 2;
-
- if (dst_stride & 3)
- dst_stride += 2;
-
- src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
- src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
- dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
- dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
- w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
- h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
-
- srcbuf = (uint32_t *)malloc (src_stride * src_height);
- dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
-
- for (i = 0; i < src_stride * src_height; i++)
- *((uint8_t *)srcbuf + i) = lcg_rand_n (256);
-
- for (i = 0; i < dst_stride * dst_height; i++)
- *((uint8_t *)dstbuf + i) = lcg_rand_n (256);
-
- src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
- PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
-
- dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
- PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
-
- src_img = pixman_image_create_bits (
- src_fmt, src_width, src_height, srcbuf, src_stride);
-
- dst_img = pixman_image_create_bits (
- dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
-
- image_endian_swap (src_img, src_bpp * 8);
- image_endian_swap (dst_img, dst_bpp * 8);
-
- if (lcg_rand_n (8) > 0)
- {
- scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
- scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
- translate_x = lcg_rand_N (65536);
- translate_y = lcg_rand_N (65536);
- pixman_transform_init_scale (&transform, scale_x, scale_y);
- pixman_transform_translate (&transform, NULL, translate_x, translate_y);
- pixman_image_set_transform (src_img, &transform);
- }
-
- switch (lcg_rand_n (4))
- {
- case 0:
- repeat = PIXMAN_REPEAT_NONE;
- break;
-
- case 1:
- repeat = PIXMAN_REPEAT_NORMAL;
- break;
-
- case 2:
- repeat = PIXMAN_REPEAT_PAD;
- break;
-
- case 3:
- repeat = PIXMAN_REPEAT_REFLECT;
- break;
-
- default:
- break;
- }
- pixman_image_set_repeat (src_img, repeat);
-
- if (lcg_rand_n (2))
- pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
- else
- pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
-
- if (verbose)
- {
- printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
- printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
- op, scale_x, scale_y, repeat);
- printf ("translate_x=%d, translate_y=%d\n",
- translate_x, translate_y);
- printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
- src_width, src_height, dst_width, dst_height);
- printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
- src_x, src_y, dst_x, dst_y);
- printf ("w=%d, h=%d\n", w, h);
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
-
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (src_width);
- clip_boxes[i].y1 = lcg_rand_n (src_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("source clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
-
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (src_img, &clip);
- pixman_image_set_source_clipping (src_img, 1);
- pixman_region_fini (&clip);
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (dst_width);
- clip_boxes[i].y1 = lcg_rand_n (dst_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("destination clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (dst_img, &clip);
- pixman_region_fini (&clip);
- }
-
- pixman_image_composite (op, src_img, NULL, dst_img,
- src_x, src_y, 0, 0, dst_x, dst_y, w, h);
-
- if (dst_fmt == PIXMAN_x8r8g8b8)
- {
- /* ignore unused part */
- for (i = 0; i < dst_stride * dst_height / 4; i++)
- dstbuf[i] &= 0xFFFFFF;
- }
-
- image_endian_swap (dst_img, dst_bpp * 8);
-
- if (verbose)
- {
- int j;
-
- for (i = 0; i < dst_height; i++)
- {
- for (j = 0; j < dst_stride; j++)
- printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
-
- printf ("\n");
- }
- }
-
- pixman_image_unref (src_img);
- pixman_image_unref (dst_img);
-
- crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
- free (srcbuf);
- free (dstbuf);
-
- FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
- return crc32;
-}
-
-int
-main (int argc, const char *argv[])
-{
- pixman_disable_out_of_bounds_workaround ();
-
- return fuzzer_test_main("scaling", 8000000, 0x7F1AB59F,
- test_composite, argc, argv);
-}
+/* + * Test program, which can detect some problems with nearest neighbour + * and bilinear scaling in pixman. Testing is done by running lots + * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8 + * and r5g6b5 color formats. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include "utils.h" + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 8 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 8 +#define MAX_STRIDE 4 + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ + int i; + pixman_image_t * src_img; + pixman_image_t * mask_img; + pixman_image_t * dst_img; + pixman_transform_t transform; + pixman_region16_t clip; + int src_width, src_height; + int mask_width, mask_height; + int dst_width, dst_height; + int src_stride, mask_stride, dst_stride; + int src_x, src_y; + int mask_x, mask_y; + int dst_x, dst_y; + int src_bpp; + int mask_bpp = 1; + int dst_bpp; + int w, h; + pixman_fixed_t scale_x = 65536, scale_y = 65536; + pixman_fixed_t translate_x = 0, translate_y = 0; + pixman_fixed_t mask_scale_x = 65536, mask_scale_y = 65536; + pixman_fixed_t mask_translate_x = 0, mask_translate_y = 0; + pixman_op_t op; + pixman_repeat_t repeat = PIXMAN_REPEAT_NONE; + pixman_repeat_t mask_repeat = PIXMAN_REPEAT_NONE; + pixman_format_code_t src_fmt, dst_fmt; + uint32_t * srcbuf; + uint32_t * dstbuf; + uint32_t * maskbuf; + uint32_t crc32; + FLOAT_REGS_CORRUPTION_DETECTOR_START (); + + lcg_srand (testnum); + + src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4; + switch (lcg_rand_n (3)) + { + case 0: + op = PIXMAN_OP_SRC; + break; + case 1: + op = PIXMAN_OP_OVER; + break; + default: + op = PIXMAN_OP_ADD; + break; + } + + src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + + if (lcg_rand_n (2)) + { + mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + } + else + { + mask_width = mask_height = 1; + } + + dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1; + dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1; + src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp; + dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp; + + if (src_stride & 3) + src_stride += 2; + + if (mask_stride & 1) + mask_stride += 1; + if (mask_stride & 2) + mask_stride += 2; + + if (dst_stride & 3) + dst_stride += 2; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2); + mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2); + dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2); + dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2); + w = lcg_rand_n (dst_width * 3 / 2 - dst_x); + h = lcg_rand_n (dst_height * 3 / 2 - dst_y); + + srcbuf = (uint32_t *)malloc (src_stride * src_height); + maskbuf = (uint32_t *)malloc (mask_stride * mask_height); + dstbuf = (uint32_t *)malloc (dst_stride * dst_height); + + for (i = 0; i < src_stride * src_height; i++) + *((uint8_t *)srcbuf + i) = lcg_rand_n (256); + + for (i = 0; i < mask_stride * mask_height; i++) + *((uint8_t *)maskbuf + i) = lcg_rand_n (256); + + for (i = 0; i < dst_stride * dst_height; i++) + *((uint8_t *)dstbuf + i) = lcg_rand_n (256); + + src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ? + PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; + + src_img = pixman_image_create_bits ( + src_fmt, src_width, src_height, srcbuf, src_stride); + + mask_img = pixman_image_create_bits ( + PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride); + + dst_img = pixman_image_create_bits ( + dst_fmt, dst_width, dst_height, dstbuf, dst_stride); + + image_endian_swap (src_img, src_bpp * 8); + image_endian_swap (dst_img, dst_bpp * 8); + + if (lcg_rand_n (4) > 0) + { + scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); + scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); + translate_x = lcg_rand_N (65536); + translate_y = lcg_rand_N (65536); + pixman_transform_init_scale (&transform, scale_x, scale_y); + pixman_transform_translate (&transform, NULL, translate_x, translate_y); + pixman_image_set_transform (src_img, &transform); + } + + if (lcg_rand_n (2) > 0) + { + mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5); + mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5); + mask_translate_x = lcg_rand_N (65536); + mask_translate_y = lcg_rand_N (65536); + pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y); + pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y); + pixman_image_set_transform (mask_img, &transform); + } + + switch (lcg_rand_n (4)) + { + case 0: + mask_repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + mask_repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + mask_repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + mask_repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (mask_img, mask_repeat); + + switch (lcg_rand_n (4)) + { + case 0: + repeat = PIXMAN_REPEAT_NONE; + break; + + case 1: + repeat = PIXMAN_REPEAT_NORMAL; + break; + + case 2: + repeat = PIXMAN_REPEAT_PAD; + break; + + case 3: + repeat = PIXMAN_REPEAT_REFLECT; + break; + + default: + break; + } + pixman_image_set_repeat (src_img, repeat); + + if (lcg_rand_n (2)) + pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (lcg_rand_n (2)) + pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); + + if (verbose) + { + printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt); + printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n", + op, scale_x, scale_y, repeat); + printf ("translate_x=%d, translate_y=%d\n", + translate_x, translate_y); + printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n", + src_width, src_height, dst_width, dst_height); + printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n", + src_x, src_y, dst_x, dst_y); + printf ("w=%d, h=%d\n", w, h); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("source clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (src_img, &clip); + pixman_image_set_source_clipping (src_img, 1); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (mask_width); + clip_boxes[i].y1 = lcg_rand_n (mask_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("mask clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (mask_img, &clip); + pixman_image_set_source_clipping (mask_img, 1); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + int n = lcg_rand_n (2) + 1; + for (i = 0; i < n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (dst_width); + clip_boxes[i].y1 = lcg_rand_n (dst_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1); + + if (verbose) + { + printf ("destination clip box: [%d,%d-%d,%d]\n", + clip_boxes[i].x1, clip_boxes[i].y1, + clip_boxes[i].x2, clip_boxes[i].y2); + } + } + pixman_region_init_rects (&clip, clip_boxes, n); + pixman_image_set_clip_region (dst_img, &clip); + pixman_region_fini (&clip); + } + + if (lcg_rand_n (2) == 0) + pixman_image_composite (op, src_img, NULL, dst_img, + src_x, src_y, 0, 0, dst_x, dst_y, w, h); + else + pixman_image_composite (op, src_img, mask_img, dst_img, + src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); + + if (dst_fmt == PIXMAN_x8r8g8b8) + { + /* ignore unused part */ + for (i = 0; i < dst_stride * dst_height / 4; i++) + dstbuf[i] &= 0xFFFFFF; + } + + image_endian_swap (dst_img, dst_bpp * 8); + + if (verbose) + { + int j; + + for (i = 0; i < dst_height; i++) + { + for (j = 0; j < dst_stride; j++) + printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j)); + + printf ("\n"); + } + } + + pixman_image_unref (src_img); + pixman_image_unref (mask_img); + pixman_image_unref (dst_img); + + crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); + free (srcbuf); + free (maskbuf); + free (dstbuf); + + FLOAT_REGS_CORRUPTION_DETECTOR_FINISH (); + return crc32; +} + +int +main (int argc, const char *argv[]) +{ + pixman_disable_out_of_bounds_workaround (); + + return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2, + test_composite, argc, argv); +} |