aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mesalib/src/glsl/ir_constant_expression.cpp58
-rw-r--r--mesalib/src/glsl/linker.cpp2
-rw-r--r--mesalib/src/mesa/main/mtypes.h6739
-rw-r--r--mesalib/src/mesa/main/state.c1466
-rw-r--r--mesalib/src/mesa/state_tracker/st_atom_blend.c601
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_bitmap.c1783
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c915
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_clear.c1122
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_drawpixels.c2739
-rw-r--r--mesalib/src/mesa/state_tracker/st_cb_drawtex.c611
-rw-r--r--mesalib/src/mesa/state_tracker/st_context.c5
-rw-r--r--mesalib/src/mesa/state_tracker/st_context.h535
-rw-r--r--mesalib/src/mesa/state_tracker/st_draw.c1537
-rw-r--r--mesalib/src/mesa/state_tracker/st_draw_feedback.c1
-rw-r--r--mesalib/src/mesa/state_tracker/st_gen_mipmap.c846
-rw-r--r--mesalib/src/mesa/tnl/t_draw.c1028
-rw-r--r--mesalib/src/mesa/vbo/vbo_exec_array.c2559
-rw-r--r--mesalib/src/mesa/vbo/vbo_exec_draw.c841
-rw-r--r--mesalib/src/mesa/vbo/vbo_save_draw.c609
-rw-r--r--pixman/Makefile.am2
-rw-r--r--pixman/configure.ac1
-rw-r--r--pixman/demos/Makefile.am34
-rw-r--r--pixman/demos/alpha-test.c (renamed from pixman/test/alpha-test.c)0
-rw-r--r--pixman/demos/clip-in.c (renamed from pixman/test/clip-in.c)0
-rw-r--r--pixman/demos/clip-test.c (renamed from pixman/test/clip-test.c)0
-rw-r--r--pixman/demos/composite-test.c (renamed from pixman/test/composite-test.c)0
-rw-r--r--pixman/demos/convolution-test.c (renamed from pixman/test/convolution-test.c)0
-rw-r--r--pixman/demos/gradient-test.c (renamed from pixman/test/gradient-test.c)0
-rw-r--r--pixman/demos/gtk-utils.c (renamed from pixman/test/gtk-utils.c)0
-rw-r--r--pixman/demos/gtk-utils.h (renamed from pixman/test/gtk-utils.h)0
-rw-r--r--pixman/demos/radial-test.c (renamed from pixman/test/radial-test.c)2
-rw-r--r--pixman/demos/screen-test.c (renamed from pixman/test/screen-test.c)0
-rw-r--r--pixman/demos/trap-test.c (renamed from pixman/test/trap-test.c)0
-rw-r--r--pixman/pixman/pixman-arm-common.h59
-rw-r--r--pixman/pixman/pixman-arm-neon-asm.S4758
-rw-r--r--pixman/pixman/pixman-arm-neon.c11
-rw-r--r--pixman/pixman/pixman-fast-path.c4455
-rw-r--r--pixman/pixman/pixman-fast-path.h1039
-rw-r--r--pixman/pixman/pixman-sse2.c124
-rw-r--r--pixman/test/Makefile.am104
-rw-r--r--pixman/test/scaling-test.c618
41 files changed, 17829 insertions, 17375 deletions
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 2841fb350..2a3084896 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -288,24 +288,20 @@ ir_expression::constant_expression_value()
break;
case ir_unop_rcp:
- /* FINISHME: Emit warning when division-by-zero is detected. */
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
switch (this->type->base_type) {
case GLSL_TYPE_UINT:
- if (op[0]->value.u[c] == 0.0)
- return NULL;
- data.u[c] = 1 / op[0]->value.u[c];
+ if (op[0]->value.u[c] != 0.0)
+ data.u[c] = 1 / op[0]->value.u[c];
break;
case GLSL_TYPE_INT:
- if (op[0]->value.i[c] == 0.0)
- return NULL;
- data.i[c] = 1 / op[0]->value.i[c];
+ if (op[0]->value.i[c] != 0.0)
+ data.i[c] = 1 / op[0]->value.i[c];
break;
case GLSL_TYPE_FLOAT:
- if (op[0]->value.f[c] == 0.0)
- return NULL;
- data.f[c] = 1.0F / op[0]->value.f[c];
+ if (op[0]->value.f[c] != 0.0)
+ data.f[c] = 1.0F / op[0]->value.f[c];
break;
default:
assert(0);
@@ -314,13 +310,9 @@ ir_expression::constant_expression_value()
break;
case ir_unop_rsq:
- /* FINISHME: Emit warning when division-by-zero is detected. */
assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
for (unsigned c = 0; c < op[0]->type->components(); c++) {
- float s = sqrtf(op[0]->value.f[c]);
- if (s == 0)
- return NULL;
- data.f[c] = 1.0F / s;
+ data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
}
break;
@@ -523,18 +515,20 @@ ir_expression::constant_expression_value()
switch (op[0]->type->base_type) {
case GLSL_TYPE_UINT:
- if (op[1]->value.u[c1] == 0)
- return NULL;
- data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1];
+ if (op[1]->value.u[c1] == 0) {
+ data.u[c] = 0;
+ } else {
+ data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1];
+ }
break;
case GLSL_TYPE_INT:
- if (op[1]->value.i[c1] == 0)
- return NULL;
- data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1];
+ if (op[1]->value.i[c1] == 0) {
+ data.i[c] = 0;
+ } else {
+ data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1];
+ }
break;
case GLSL_TYPE_FLOAT:
- if (op[1]->value.f[c1] == 0)
- return NULL;
data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1];
break;
default:
@@ -552,18 +546,20 @@ ir_expression::constant_expression_value()
switch (op[0]->type->base_type) {
case GLSL_TYPE_UINT:
- if (op[1]->value.u[c1] == 0)
- return NULL;
- data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1];
+ if (op[1]->value.u[c1] == 0) {
+ data.u[c] = 0;
+ } else {
+ data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1];
+ }
break;
case GLSL_TYPE_INT:
- if (op[1]->value.i[c1] == 0)
- return NULL;
- data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1];
+ if (op[1]->value.i[c1] == 0) {
+ data.i[c] = 0;
+ } else {
+ data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1];
+ }
break;
case GLSL_TYPE_FLOAT:
- if (op[1]->value.f[c1] == 0)
- return NULL;
/* We don't use fmod because it rounds toward zero; GLSL specifies
* the use of floor.
*/
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 46cd1950c..6c003bb02 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -926,7 +926,7 @@ link_intrastage_shaders(void *mem_ctx,
if (var->type->is_array() && (var->type->length == 0)) {
const glsl_type *type =
glsl_type::get_array_instance(var->type->fields.array,
- var->max_array_access);
+ var->max_array_access + 1);
assert(type != NULL);
var->type = type;
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index 020595bd0..b5966dffe 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -1,3369 +1,3370 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.7
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file mtypes.h
- * Main Mesa data structures.
- *
- * Please try to mark derived values with a leading underscore ('_').
- */
-
-#ifndef MTYPES_H
-#define MTYPES_H
-
-
-#include "main/glheader.h"
-#include "main/config.h"
-#include "main/mfeatures.h"
-#include "glapi/glapi.h"
-#include "math/m_matrix.h" /* GLmatrix */
-#include "main/simple_list.h" /* struct simple_node */
-#include "main/formats.h" /* MESA_FORMAT_COUNT */
-
-
-/**
- * Color channel data type.
- */
-#if CHAN_BITS == 8
- typedef GLubyte GLchan;
-#define CHAN_MAX 255
-#define CHAN_MAXF 255.0F
-#define CHAN_TYPE GL_UNSIGNED_BYTE
-#elif CHAN_BITS == 16
- typedef GLushort GLchan;
-#define CHAN_MAX 65535
-#define CHAN_MAXF 65535.0F
-#define CHAN_TYPE GL_UNSIGNED_SHORT
-#elif CHAN_BITS == 32
- typedef GLfloat GLchan;
-#define CHAN_MAX 1.0
-#define CHAN_MAXF 1.0F
-#define CHAN_TYPE GL_FLOAT
-#else
-#error "illegal number of color channel bits"
-#endif
-
-
-/**
- * Stencil buffer data type.
- */
-#if STENCIL_BITS==8
- typedef GLubyte GLstencil;
-#elif STENCIL_BITS==16
- typedef GLushort GLstencil;
-#else
-# error "illegal number of stencil bits"
-#endif
-
-
-/**
- * \name 64-bit extension of GLbitfield.
- */
-/*@{*/
-typedef GLuint64 GLbitfield64;
-
-/** Set a single bit */
-#define BITFIELD64_BIT(b) (1ULL << (b))
-
-
-/**
- * \name Some forward type declarations
- */
-/*@{*/
-struct _mesa_HashTable;
-struct gl_attrib_node;
-struct gl_list_extensions;
-struct gl_meta_state;
-struct gl_pixelstore_attrib;
-struct gl_program_cache;
-struct gl_texture_format;
-struct gl_texture_image;
-struct gl_texture_object;
-struct gl_context;
-struct st_context;
-/*@}*/
-
-
-/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
-#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1)
-#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
-#define PRIM_UNKNOWN (GL_POLYGON+3)
-
-
-/**
- * Shader stages. Note that these will become 5 with tessellation.
- * These MUST have the same values as gallium's PIPE_SHADER_*
- */
-typedef enum
-{
- MESA_SHADER_VERTEX = 0,
- MESA_SHADER_FRAGMENT = 1,
- MESA_SHADER_GEOMETRY = 2,
- MESA_SHADER_TYPES = 3
-} gl_shader_type;
-
-
-
-/**
- * Indexes for vertex program attributes.
- * GL_NV_vertex_program aliases generic attributes over the conventional
- * attributes. In GL_ARB_vertex_program shader the aliasing is optional.
- * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
- * generic attributes are distinct/separate).
- */
-typedef enum
-{
- VERT_ATTRIB_POS = 0,
- VERT_ATTRIB_WEIGHT = 1,
- VERT_ATTRIB_NORMAL = 2,
- VERT_ATTRIB_COLOR0 = 3,
- VERT_ATTRIB_COLOR1 = 4,
- VERT_ATTRIB_FOG = 5,
- VERT_ATTRIB_COLOR_INDEX = 6,
- VERT_ATTRIB_POINT_SIZE = 6, /*alias*/
- VERT_ATTRIB_EDGEFLAG = 7,
- VERT_ATTRIB_TEX0 = 8,
- VERT_ATTRIB_TEX1 = 9,
- VERT_ATTRIB_TEX2 = 10,
- VERT_ATTRIB_TEX3 = 11,
- VERT_ATTRIB_TEX4 = 12,
- VERT_ATTRIB_TEX5 = 13,
- VERT_ATTRIB_TEX6 = 14,
- VERT_ATTRIB_TEX7 = 15,
- VERT_ATTRIB_GENERIC0 = 16,
- VERT_ATTRIB_GENERIC1 = 17,
- VERT_ATTRIB_GENERIC2 = 18,
- VERT_ATTRIB_GENERIC3 = 19,
- VERT_ATTRIB_GENERIC4 = 20,
- VERT_ATTRIB_GENERIC5 = 21,
- VERT_ATTRIB_GENERIC6 = 22,
- VERT_ATTRIB_GENERIC7 = 23,
- VERT_ATTRIB_GENERIC8 = 24,
- VERT_ATTRIB_GENERIC9 = 25,
- VERT_ATTRIB_GENERIC10 = 26,
- VERT_ATTRIB_GENERIC11 = 27,
- VERT_ATTRIB_GENERIC12 = 28,
- VERT_ATTRIB_GENERIC13 = 29,
- VERT_ATTRIB_GENERIC14 = 30,
- VERT_ATTRIB_GENERIC15 = 31,
- VERT_ATTRIB_MAX = 32
-} gl_vert_attrib;
-
-/**
- * Bitflags for vertex attributes.
- * These are used in bitfields in many places.
- */
-/*@{*/
-#define VERT_BIT_POS (1 << VERT_ATTRIB_POS)
-#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT)
-#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL)
-#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0)
-#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1)
-#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG)
-#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX)
-#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG)
-#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0)
-#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1)
-#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2)
-#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3)
-#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4)
-#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5)
-#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6)
-#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7)
-#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0)
-#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1)
-#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2)
-#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3)
-#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4)
-#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5)
-#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6)
-#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7)
-#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8)
-#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9)
-#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10)
-#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11)
-#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12)
-#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13)
-#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14)
-#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15)
-
-#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u)))
-#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g)))
-/*@}*/
-
-
-/**
- * Indexes for vertex program result attributes
- */
-typedef enum
-{
- VERT_RESULT_HPOS = 0,
- VERT_RESULT_COL0 = 1,
- VERT_RESULT_COL1 = 2,
- VERT_RESULT_FOGC = 3,
- VERT_RESULT_TEX0 = 4,
- VERT_RESULT_TEX1 = 5,
- VERT_RESULT_TEX2 = 6,
- VERT_RESULT_TEX3 = 7,
- VERT_RESULT_TEX4 = 8,
- VERT_RESULT_TEX5 = 9,
- VERT_RESULT_TEX6 = 10,
- VERT_RESULT_TEX7 = 11,
- VERT_RESULT_PSIZ = 12,
- VERT_RESULT_BFC0 = 13,
- VERT_RESULT_BFC1 = 14,
- VERT_RESULT_EDGE = 15,
- VERT_RESULT_VAR0 = 16, /**< shader varying */
- VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING)
-} gl_vert_result;
-
-
-/*********************************************/
-
-/**
- * Indexes for geometry program attributes.
- */
-typedef enum
-{
- GEOM_ATTRIB_POSITION = 0,
- GEOM_ATTRIB_COLOR0 = 1,
- GEOM_ATTRIB_COLOR1 = 2,
- GEOM_ATTRIB_SECONDARY_COLOR0 = 3,
- GEOM_ATTRIB_SECONDARY_COLOR1 = 4,
- GEOM_ATTRIB_FOG_FRAG_COORD = 5,
- GEOM_ATTRIB_POINT_SIZE = 6,
- GEOM_ATTRIB_CLIP_VERTEX = 7,
- GEOM_ATTRIB_PRIMITIVE_ID = 8,
- GEOM_ATTRIB_TEX_COORD = 9,
-
- GEOM_ATTRIB_VAR0 = 16,
- GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING)
-} gl_geom_attrib;
-
-/**
- * Bitflags for geometry attributes.
- * These are used in bitfields in many places.
- */
-/*@{*/
-#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0)
-#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1)
-#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0)
-#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1)
-#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD)
-#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD)
-#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION)
-#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE)
-#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX)
-#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID)
-#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0)
-
-#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g)))
-/*@}*/
-
-
-/**
- * Indexes for geometry program result attributes
- */
-typedef enum
-{
- GEOM_RESULT_POS = 0,
- GEOM_RESULT_COL0 = 1,
- GEOM_RESULT_COL1 = 2,
- GEOM_RESULT_SCOL0 = 3,
- GEOM_RESULT_SCOL1 = 4,
- GEOM_RESULT_FOGC = 5,
- GEOM_RESULT_TEX0 = 6,
- GEOM_RESULT_TEX1 = 7,
- GEOM_RESULT_TEX2 = 8,
- GEOM_RESULT_TEX3 = 9,
- GEOM_RESULT_TEX4 = 10,
- GEOM_RESULT_TEX5 = 11,
- GEOM_RESULT_TEX6 = 12,
- GEOM_RESULT_TEX7 = 13,
- GEOM_RESULT_PSIZ = 14,
- GEOM_RESULT_CLPV = 15,
- GEOM_RESULT_PRID = 16,
- GEOM_RESULT_LAYR = 17,
- GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */
- /* ### we need to -2 because var0 is 18 instead 16 like in the others */
- GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2)
-} gl_geom_result;
-
-
-/**
- * Indexes for fragment program input attributes.
- */
-typedef enum
-{
- FRAG_ATTRIB_WPOS = 0,
- FRAG_ATTRIB_COL0 = 1,
- FRAG_ATTRIB_COL1 = 2,
- FRAG_ATTRIB_FOGC = 3,
- FRAG_ATTRIB_TEX0 = 4,
- FRAG_ATTRIB_TEX1 = 5,
- FRAG_ATTRIB_TEX2 = 6,
- FRAG_ATTRIB_TEX3 = 7,
- FRAG_ATTRIB_TEX4 = 8,
- FRAG_ATTRIB_TEX5 = 9,
- FRAG_ATTRIB_TEX6 = 10,
- FRAG_ATTRIB_TEX7 = 11,
- FRAG_ATTRIB_FACE = 12, /**< front/back face */
- FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */
- FRAG_ATTRIB_VAR0 = 14, /**< shader varying */
- FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
-} gl_frag_attrib;
-
-/**
- * Bitflags for fragment program input attributes.
- */
-/*@{*/
-#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS)
-#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0)
-#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1)
-#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC)
-#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE)
-#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC)
-#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0)
-#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1)
-#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2)
-#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3)
-#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4)
-#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5)
-#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6)
-#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7)
-#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0)
-
-#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U))
-#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V))
-
-#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \
- FRAG_BIT_TEX1| \
- FRAG_BIT_TEX2| \
- FRAG_BIT_TEX3| \
- FRAG_BIT_TEX4| \
- FRAG_BIT_TEX5| \
- FRAG_BIT_TEX6| \
- FRAG_BIT_TEX7)
-/*@}*/
-
-
-/**
- * Fragment program results
- */
-typedef enum
-{
- FRAG_RESULT_DEPTH = 0,
- FRAG_RESULT_STENCIL = 1,
- FRAG_RESULT_COLOR = 2,
- FRAG_RESULT_DATA0 = 3,
- FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
-} gl_frag_result;
-
-
-/**
- * Indexes for all renderbuffers
- */
-typedef enum
-{
- /* the four standard color buffers */
- BUFFER_FRONT_LEFT,
- BUFFER_BACK_LEFT,
- BUFFER_FRONT_RIGHT,
- BUFFER_BACK_RIGHT,
- BUFFER_DEPTH,
- BUFFER_STENCIL,
- BUFFER_ACCUM,
- /* optional aux buffer */
- BUFFER_AUX0,
- /* generic renderbuffers */
- BUFFER_COLOR0,
- BUFFER_COLOR1,
- BUFFER_COLOR2,
- BUFFER_COLOR3,
- BUFFER_COLOR4,
- BUFFER_COLOR5,
- BUFFER_COLOR6,
- BUFFER_COLOR7,
- BUFFER_COUNT
-} gl_buffer_index;
-
-/**
- * Bit flags for all renderbuffers
- */
-#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT)
-#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT)
-#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT)
-#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT)
-#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0)
-#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1)
-#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2)
-#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3)
-#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH)
-#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL)
-#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM)
-#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0)
-#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1)
-#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2)
-#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3)
-#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4)
-#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5)
-#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6)
-#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7)
-
-/**
- * Mask of all the color buffer bits (but not accum).
- */
-#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \
- BUFFER_BIT_BACK_LEFT | \
- BUFFER_BIT_FRONT_RIGHT | \
- BUFFER_BIT_BACK_RIGHT | \
- BUFFER_BIT_AUX0 | \
- BUFFER_BIT_COLOR0 | \
- BUFFER_BIT_COLOR1 | \
- BUFFER_BIT_COLOR2 | \
- BUFFER_BIT_COLOR3 | \
- BUFFER_BIT_COLOR4 | \
- BUFFER_BIT_COLOR5 | \
- BUFFER_BIT_COLOR6 | \
- BUFFER_BIT_COLOR7)
-
-
-/**
- * Framebuffer configuration (aka visual / pixelformat)
- * Note: some of these fields should be boolean, but it appears that
- * code in drivers/dri/common/util.c requires int-sized fields.
- */
-struct gl_config
-{
- GLboolean rgbMode;
- GLboolean floatMode;
- GLboolean colorIndexMode; /* XXX is this used anywhere? */
- GLuint doubleBufferMode;
- GLuint stereoMode;
-
- GLboolean haveAccumBuffer;
- GLboolean haveDepthBuffer;
- GLboolean haveStencilBuffer;
-
- GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */
- GLuint redMask, greenMask, blueMask, alphaMask;
- GLint rgbBits; /* total bits for rgb */
- GLint indexBits; /* total bits for colorindex */
-
- GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits;
- GLint depthBits;
- GLint stencilBits;
-
- GLint numAuxBuffers;
-
- GLint level;
-
- /* EXT_visual_rating / GLX 1.2 */
- GLint visualRating;
-
- /* EXT_visual_info / GLX 1.2 */
- GLint transparentPixel;
- /* colors are floats scaled to ints */
- GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha;
- GLint transparentIndex;
-
- /* ARB_multisample / SGIS_multisample */
- GLint sampleBuffers;
- GLint samples;
-
- /* SGIX_pbuffer / GLX 1.3 */
- GLint maxPbufferWidth;
- GLint maxPbufferHeight;
- GLint maxPbufferPixels;
- GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */
- GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */
-
- /* OML_swap_method */
- GLint swapMethod;
-
- /* EXT_texture_from_pixmap */
- GLint bindToTextureRgb;
- GLint bindToTextureRgba;
- GLint bindToMipmapTexture;
- GLint bindToTextureTargets;
- GLint yInverted;
-
- /* EXT_framebuffer_sRGB */
- GLint sRGBCapable;
-};
-
-
-/**
- * Data structure for color tables
- */
-struct gl_color_table
-{
- GLenum InternalFormat; /**< The user-specified format */
- GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */
- GLuint Size; /**< number of entries in table */
- GLfloat *TableF; /**< Color table, floating point values */
- GLubyte *TableUB; /**< Color table, ubyte values */
- GLubyte RedSize;
- GLubyte GreenSize;
- GLubyte BlueSize;
- GLubyte AlphaSize;
- GLubyte LuminanceSize;
- GLubyte IntensitySize;
-};
-
-
-/**
- * \name Bit flags used for updating material values.
- */
-/*@{*/
-#define MAT_ATTRIB_FRONT_AMBIENT 0
-#define MAT_ATTRIB_BACK_AMBIENT 1
-#define MAT_ATTRIB_FRONT_DIFFUSE 2
-#define MAT_ATTRIB_BACK_DIFFUSE 3
-#define MAT_ATTRIB_FRONT_SPECULAR 4
-#define MAT_ATTRIB_BACK_SPECULAR 5
-#define MAT_ATTRIB_FRONT_EMISSION 6
-#define MAT_ATTRIB_BACK_EMISSION 7
-#define MAT_ATTRIB_FRONT_SHININESS 8
-#define MAT_ATTRIB_BACK_SHININESS 9
-#define MAT_ATTRIB_FRONT_INDEXES 10
-#define MAT_ATTRIB_BACK_INDEXES 11
-#define MAT_ATTRIB_MAX 12
-
-#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f))
-#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f))
-#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f))
-#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f))
-#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f))
-#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f))
-
-#define MAT_INDEX_AMBIENT 0
-#define MAT_INDEX_DIFFUSE 1
-#define MAT_INDEX_SPECULAR 2
-
-#define MAT_BIT_FRONT_AMBIENT (1<<MAT_ATTRIB_FRONT_AMBIENT)
-#define MAT_BIT_BACK_AMBIENT (1<<MAT_ATTRIB_BACK_AMBIENT)
-#define MAT_BIT_FRONT_DIFFUSE (1<<MAT_ATTRIB_FRONT_DIFFUSE)
-#define MAT_BIT_BACK_DIFFUSE (1<<MAT_ATTRIB_BACK_DIFFUSE)
-#define MAT_BIT_FRONT_SPECULAR (1<<MAT_ATTRIB_FRONT_SPECULAR)
-#define MAT_BIT_BACK_SPECULAR (1<<MAT_ATTRIB_BACK_SPECULAR)
-#define MAT_BIT_FRONT_EMISSION (1<<MAT_ATTRIB_FRONT_EMISSION)
-#define MAT_BIT_BACK_EMISSION (1<<MAT_ATTRIB_BACK_EMISSION)
-#define MAT_BIT_FRONT_SHININESS (1<<MAT_ATTRIB_FRONT_SHININESS)
-#define MAT_BIT_BACK_SHININESS (1<<MAT_ATTRIB_BACK_SHININESS)
-#define MAT_BIT_FRONT_INDEXES (1<<MAT_ATTRIB_FRONT_INDEXES)
-#define MAT_BIT_BACK_INDEXES (1<<MAT_ATTRIB_BACK_INDEXES)
-
-
-#define FRONT_MATERIAL_BITS (MAT_BIT_FRONT_EMISSION | \
- MAT_BIT_FRONT_AMBIENT | \
- MAT_BIT_FRONT_DIFFUSE | \
- MAT_BIT_FRONT_SPECULAR | \
- MAT_BIT_FRONT_SHININESS | \
- MAT_BIT_FRONT_INDEXES)
-
-#define BACK_MATERIAL_BITS (MAT_BIT_BACK_EMISSION | \
- MAT_BIT_BACK_AMBIENT | \
- MAT_BIT_BACK_DIFFUSE | \
- MAT_BIT_BACK_SPECULAR | \
- MAT_BIT_BACK_SHININESS | \
- MAT_BIT_BACK_INDEXES)
-
-#define ALL_MATERIAL_BITS (FRONT_MATERIAL_BITS | BACK_MATERIAL_BITS)
-/*@}*/
-
-
-#define EXP_TABLE_SIZE 512 /**< Specular exponent lookup table sizes */
-#define SHINE_TABLE_SIZE 256 /**< Material shininess lookup table sizes */
-
-/**
- * Material shininess lookup table.
- */
-struct gl_shine_tab
-{
- struct gl_shine_tab *next, *prev;
- GLfloat tab[SHINE_TABLE_SIZE+1];
- GLfloat shininess;
- GLuint refcount;
-};
-
-
-/**
- * Light source state.
- */
-struct gl_light
-{
- struct gl_light *next; /**< double linked list with sentinel */
- struct gl_light *prev;
-
- GLfloat Ambient[4]; /**< ambient color */
- GLfloat Diffuse[4]; /**< diffuse color */
- GLfloat Specular[4]; /**< specular color */
- GLfloat EyePosition[4]; /**< position in eye coordinates */
- GLfloat SpotDirection[4]; /**< spotlight direction in eye coordinates */
- GLfloat SpotExponent;
- GLfloat SpotCutoff; /**< in degrees */
- GLfloat _CosCutoffNeg; /**< = cos(SpotCutoff) */
- GLfloat _CosCutoff; /**< = MAX(0, cos(SpotCutoff)) */
- GLfloat ConstantAttenuation;
- GLfloat LinearAttenuation;
- GLfloat QuadraticAttenuation;
- GLboolean Enabled; /**< On/off flag */
-
- /**
- * \name Derived fields
- */
- /*@{*/
- GLbitfield _Flags; /**< State */
-
- GLfloat _Position[4]; /**< position in eye/obj coordinates */
- GLfloat _VP_inf_norm[3]; /**< Norm direction to infinite light */
- GLfloat _h_inf_norm[3]; /**< Norm( _VP_inf_norm + <0,0,1> ) */
- GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */
- GLfloat _VP_inf_spot_attenuation;
-
- GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */
- GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */
- GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */
- GLfloat _MatSpecular[2][3]; /**< material spec * light specular */
- GLfloat _dli; /**< CI diffuse light intensity */
- GLfloat _sli; /**< CI specular light intensity */
- /*@}*/
-};
-
-
-/**
- * Light model state.
- */
-struct gl_lightmodel
-{
- GLfloat Ambient[4]; /**< ambient color */
- GLboolean LocalViewer; /**< Local (or infinite) view point? */
- GLboolean TwoSide; /**< Two (or one) sided lighting? */
- GLenum ColorControl; /**< either GL_SINGLE_COLOR
- * or GL_SEPARATE_SPECULAR_COLOR */
-};
-
-
-/**
- * Material state.
- */
-struct gl_material
-{
- GLfloat Attrib[MAT_ATTRIB_MAX][4];
-};
-
-
-/**
- * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT)
- */
-struct gl_accum_attrib
-{
- GLfloat ClearColor[4]; /**< Accumulation buffer clear color */
-};
-
-
-/**
- * Color buffer attribute group (GL_COLOR_BUFFER_BIT).
- */
-struct gl_colorbuffer_attrib
-{
- GLuint ClearIndex; /**< Index to use for glClear */
- GLclampf ClearColor[4]; /**< Color to use for glClear */
-
- GLuint IndexMask; /**< Color index write mask */
- GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */
-
- GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */
-
- /**
- * \name alpha testing
- */
- /*@{*/
- GLboolean AlphaEnabled; /**< Alpha test enabled flag */
- GLenum AlphaFunc; /**< Alpha test function */
- GLclampf AlphaRef; /**< Alpha reference value */
- /*@}*/
-
- /**
- * \name Blending
- */
- /*@{*/
- GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */
- GLfloat BlendColor[4]; /**< Blending color */
- struct
- {
- GLenum SrcRGB; /**< RGB blend source term */
- GLenum DstRGB; /**< RGB blend dest term */
- GLenum SrcA; /**< Alpha blend source term */
- GLenum DstA; /**< Alpha blend dest term */
- GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */
- GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */
- } Blend[MAX_DRAW_BUFFERS];
- /** Are the blend func terms currently different for each buffer/target? */
- GLboolean _BlendFuncPerBuffer;
- /** Are the blend equations currently different for each buffer/target? */
- GLboolean _BlendEquationPerBuffer;
- /*@}*/
-
- /**
- * \name Logic op
- */
- /*@{*/
- GLenum LogicOp; /**< Logic operator */
- GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */
- GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */
- GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */
- /*@}*/
-
- GLboolean DitherFlag; /**< Dither enable flag */
-
- GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
- GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
-
- GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */
-};
-
-
-/**
- * Current attribute group (GL_CURRENT_BIT).
- */
-struct gl_current_attrib
-{
- /**
- * \name Current vertex attributes.
- * \note Values are valid only after FLUSH_VERTICES has been called.
- * \note Index and Edgeflag current values are stored as floats in the
- * SIX and SEVEN attribute slots.
- */
- GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */
-
- /**
- * \name Current raster position attributes (always valid).
- * \note This set of attributes is very similar to the SWvertex struct.
- */
- /*@{*/
- GLfloat RasterPos[4];
- GLfloat RasterDistance;
- GLfloat RasterColor[4];
- GLfloat RasterSecondaryColor[4];
- GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4];
- GLboolean RasterPosValid;
- /*@}*/
-};
-
-
-/**
- * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT).
- */
-struct gl_depthbuffer_attrib
-{
- GLenum Func; /**< Function for depth buffer compare */
- GLclampd Clear; /**< Value to clear depth buffer to */
- GLboolean Test; /**< Depth buffering enabled flag */
- GLboolean Mask; /**< Depth buffer writable? */
- GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */
- GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */
-};
-
-
-/**
- * Evaluator attribute group (GL_EVAL_BIT).
- */
-struct gl_eval_attrib
-{
- /**
- * \name Enable bits
- */
- /*@{*/
- GLboolean Map1Color4;
- GLboolean Map1Index;
- GLboolean Map1Normal;
- GLboolean Map1TextureCoord1;
- GLboolean Map1TextureCoord2;
- GLboolean Map1TextureCoord3;
- GLboolean Map1TextureCoord4;
- GLboolean Map1Vertex3;
- GLboolean Map1Vertex4;
- GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */
- GLboolean Map2Color4;
- GLboolean Map2Index;
- GLboolean Map2Normal;
- GLboolean Map2TextureCoord1;
- GLboolean Map2TextureCoord2;
- GLboolean Map2TextureCoord3;
- GLboolean Map2TextureCoord4;
- GLboolean Map2Vertex3;
- GLboolean Map2Vertex4;
- GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */
- GLboolean AutoNormal;
- /*@}*/
-
- /**
- * \name Map Grid endpoints and divisions and calculated du values
- */
- /*@{*/
- GLint MapGrid1un;
- GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du;
- GLint MapGrid2un, MapGrid2vn;
- GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du;
- GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv;
- /*@}*/
-};
-
-
-/**
- * Fog attribute group (GL_FOG_BIT).
- */
-struct gl_fog_attrib
-{
- GLboolean Enabled; /**< Fog enabled flag */
- GLfloat Color[4]; /**< Fog color */
- GLfloat Density; /**< Density >= 0.0 */
- GLfloat Start; /**< Start distance in eye coords */
- GLfloat End; /**< End distance in eye coords */
- GLfloat Index; /**< Fog index */
- GLenum Mode; /**< Fog mode */
- GLboolean ColorSumEnabled;
- GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */
- GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */
-};
-
-
-/**
- * \brief Layout qualifiers for gl_FragDepth.
- *
- * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
- * a layout qualifier.
- *
- * \see enum ir_depth_layout
- */
-enum gl_frag_depth_layout {
- FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
- FRAG_DEPTH_LAYOUT_ANY,
- FRAG_DEPTH_LAYOUT_GREATER,
- FRAG_DEPTH_LAYOUT_LESS,
- FRAG_DEPTH_LAYOUT_UNCHANGED
-};
-
-
-/**
- * Hint attribute group (GL_HINT_BIT).
- *
- * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE.
- */
-struct gl_hint_attrib
-{
- GLenum PerspectiveCorrection;
- GLenum PointSmooth;
- GLenum LineSmooth;
- GLenum PolygonSmooth;
- GLenum Fog;
- GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */
- GLenum TextureCompression; /**< GL_ARB_texture_compression */
- GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */
- GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */
-};
-
-/**
- * Light state flags.
- */
-/*@{*/
-#define LIGHT_SPOT 0x1
-#define LIGHT_LOCAL_VIEWER 0x2
-#define LIGHT_POSITIONAL 0x4
-#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER)
-/*@}*/
-
-
-/**
- * Lighting attribute group (GL_LIGHT_BIT).
- */
-struct gl_light_attrib
-{
- struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */
- struct gl_lightmodel Model; /**< Lighting model */
-
- /**
- * Must flush FLUSH_VERTICES before referencing:
- */
- /*@{*/
- struct gl_material Material; /**< Includes front & back values */
- /*@}*/
-
- GLboolean Enabled; /**< Lighting enabled flag */
- GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */
- GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */
- GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */
- GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */
- GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */
- GLboolean ColorMaterialEnabled;
- GLenum ClampVertexColor;
-
- struct gl_light EnabledList; /**< List sentinel */
-
- /**
- * Derived state for optimizations:
- */
- /*@{*/
- GLboolean _NeedEyeCoords;
- GLboolean _NeedVertices; /**< Use fast shader? */
- GLbitfield _Flags; /**< LIGHT_* flags, see above */
- GLfloat _BaseColor[2][3];
- /*@}*/
-};
-
-
-/**
- * Line attribute group (GL_LINE_BIT).
- */
-struct gl_line_attrib
-{
- GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */
- GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */
- GLushort StipplePattern; /**< Stipple pattern */
- GLint StippleFactor; /**< Stipple repeat factor */
- GLfloat Width; /**< Line width */
-};
-
-
-/**
- * Display list attribute group (GL_LIST_BIT).
- */
-struct gl_list_attrib
-{
- GLuint ListBase;
-};
-
-
-/**
- * Multisample attribute group (GL_MULTISAMPLE_BIT).
- */
-struct gl_multisample_attrib
-{
- GLboolean Enabled;
- GLboolean _Enabled; /**< true if Enabled and multisample buffer */
- GLboolean SampleAlphaToCoverage;
- GLboolean SampleAlphaToOne;
- GLboolean SampleCoverage;
- GLfloat SampleCoverageValue;
- GLboolean SampleCoverageInvert;
-};
-
-
-/**
- * A pixelmap (see glPixelMap)
- */
-struct gl_pixelmap
-{
- GLint Size;
- GLfloat Map[MAX_PIXEL_MAP_TABLE];
- GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */
-};
-
-
-/**
- * Collection of all pixelmaps
- */
-struct gl_pixelmaps
-{
- struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */
- struct gl_pixelmap GtoG;
- struct gl_pixelmap BtoB;
- struct gl_pixelmap AtoA;
- struct gl_pixelmap ItoR;
- struct gl_pixelmap ItoG;
- struct gl_pixelmap ItoB;
- struct gl_pixelmap ItoA;
- struct gl_pixelmap ItoI;
- struct gl_pixelmap StoS;
-};
-
-
-/**
- * Pixel attribute group (GL_PIXEL_MODE_BIT).
- */
-struct gl_pixel_attrib
-{
- GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */
-
- /*--- Begin Pixel Transfer State ---*/
- /* Fields are in the order in which they're applied... */
-
- /** Scale & Bias (index shift, offset) */
- /*@{*/
- GLfloat RedBias, RedScale;
- GLfloat GreenBias, GreenScale;
- GLfloat BlueBias, BlueScale;
- GLfloat AlphaBias, AlphaScale;
- GLfloat DepthBias, DepthScale;
- GLint IndexShift, IndexOffset;
- /*@}*/
-
- /* Pixel Maps */
- /* Note: actual pixel maps are not part of this attrib group */
- GLboolean MapColorFlag;
- GLboolean MapStencilFlag;
-
- /*--- End Pixel Transfer State ---*/
-
- /** glPixelZoom */
- GLfloat ZoomX, ZoomY;
-
- /** GL_SGI_texture_color_table */
- GLfloat TextureColorTableScale[4]; /**< RGBA */
- GLfloat TextureColorTableBias[4]; /**< RGBA */
-};
-
-
-/**
- * Point attribute group (GL_POINT_BIT).
- */
-struct gl_point_attrib
-{
- GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */
- GLfloat Size; /**< User-specified point size */
- GLfloat Params[3]; /**< GL_EXT_point_parameters */
- GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */
- GLfloat Threshold; /**< GL_EXT_point_parameters */
- GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */
- GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */
- GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/
- GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */
- GLenum SpriteOrigin; /**< GL_ARB_point_sprite */
-};
-
-
-/**
- * Polygon attribute group (GL_POLYGON_BIT).
- */
-struct gl_polygon_attrib
-{
- GLenum FrontFace; /**< Either GL_CW or GL_CCW */
- GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
- GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
- GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */
- GLboolean CullFlag; /**< Culling on/off flag */
- GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */
- GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */
- GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */
- GLfloat OffsetFactor; /**< Polygon offset factor, from user */
- GLfloat OffsetUnits; /**< Polygon offset units, from user */
- GLboolean OffsetPoint; /**< Offset in GL_POINT mode */
- GLboolean OffsetLine; /**< Offset in GL_LINE mode */
- GLboolean OffsetFill; /**< Offset in GL_FILL mode */
-};
-
-
-/**
- * Scissor attributes (GL_SCISSOR_BIT).
- */
-struct gl_scissor_attrib
-{
- GLboolean Enabled; /**< Scissor test enabled? */
- GLint X, Y; /**< Lower left corner of box */
- GLsizei Width, Height; /**< Size of box */
-};
-
-
-/**
- * Stencil attribute group (GL_STENCIL_BUFFER_BIT).
- *
- * Three sets of stencil data are tracked so that OpenGL 2.0,
- * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported
- * simultaneously. In each of the stencil state arrays, element 0 corresponds
- * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 /
- * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the
- * GL_EXT_stencil_two_side GL_BACK state.
- *
- * The derived value \c _BackFace is either 1 or 2 depending on whether or
- * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled.
- *
- * The derived value \c _TestTwoSide is set when the front-face and back-face
- * stencil state are different.
- */
-struct gl_stencil_attrib
-{
- GLboolean Enabled; /**< Enabled flag */
- GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */
- GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */
- GLboolean _Enabled; /**< Enabled and stencil buffer present */
- GLboolean _TestTwoSide;
- GLubyte _BackFace; /**< Current back stencil state (1 or 2) */
- GLenum Function[3]; /**< Stencil function */
- GLenum FailFunc[3]; /**< Fail function */
- GLenum ZPassFunc[3]; /**< Depth buffer pass function */
- GLenum ZFailFunc[3]; /**< Depth buffer fail function */
- GLint Ref[3]; /**< Reference value */
- GLuint ValueMask[3]; /**< Value mask */
- GLuint WriteMask[3]; /**< Write mask */
- GLuint Clear; /**< Clear value */
-};
-
-
-/**
- * An index for each type of texture object. These correspond to the GL
- * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc.
- * Note: the order is from highest priority to lowest priority.
- */
-typedef enum
-{
- TEXTURE_2D_ARRAY_INDEX,
- TEXTURE_1D_ARRAY_INDEX,
- TEXTURE_CUBE_INDEX,
- TEXTURE_3D_INDEX,
- TEXTURE_RECT_INDEX,
- TEXTURE_2D_INDEX,
- TEXTURE_1D_INDEX,
- NUM_TEXTURE_TARGETS
-} gl_texture_index;
-
-
-/**
- * Bit flags for each type of texture object
- * Used for Texture.Unit[]._ReallyEnabled flags.
- */
-/*@{*/
-#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX)
-#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX)
-#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX)
-#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX)
-#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX)
-#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX)
-#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX)
-/*@}*/
-
-
-/**
- * TexGenEnabled flags.
- */
-/*@{*/
-#define S_BIT 1
-#define T_BIT 2
-#define R_BIT 4
-#define Q_BIT 8
-#define STR_BITS (S_BIT | T_BIT | R_BIT)
-/*@}*/
-
-
-/**
- * Bit flag versions of the corresponding GL_ constants.
- */
-/*@{*/
-#define TEXGEN_SPHERE_MAP 0x1
-#define TEXGEN_OBJ_LINEAR 0x2
-#define TEXGEN_EYE_LINEAR 0x4
-#define TEXGEN_REFLECTION_MAP_NV 0x8
-#define TEXGEN_NORMAL_MAP_NV 0x10
-
-#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \
- TEXGEN_REFLECTION_MAP_NV | \
- TEXGEN_NORMAL_MAP_NV)
-#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \
- TEXGEN_REFLECTION_MAP_NV | \
- TEXGEN_NORMAL_MAP_NV | \
- TEXGEN_EYE_LINEAR)
-/*@}*/
-
-
-
-/** Tex-gen enabled for texture unit? */
-#define ENABLE_TEXGEN(unit) (1 << (unit))
-
-/** Non-identity texture matrix for texture unit? */
-#define ENABLE_TEXMAT(unit) (1 << (unit))
-
-
-/**
- * Texel fetch function prototype. We use texel fetch functions to
- * extract RGBA, color indexes and depth components out of 1D, 2D and 3D
- * texture images. These functions help to isolate us from the gritty
- * details of all the various texture image encodings.
- *
- * \param texImage texture image.
- * \param col texel column.
- * \param row texel row.
- * \param img texel image level/layer.
- * \param texelOut output texel (up to 4 GLchans)
- */
-typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- GLchan *texelOut );
-
-/**
- * As above, but returns floats.
- * Used for depth component images and for upcoming signed/float
- * texture images.
- */
-typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- GLfloat *texelOut );
-
-
-typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage,
- GLint col, GLint row, GLint img,
- const void *texel);
-
-
-/**
- * Texture image state. Describes the dimensions of a texture image,
- * the texel format and pointers to Texel Fetch functions.
- */
-struct gl_texture_image
-{
- GLint InternalFormat; /**< Internal format as given by the user */
- GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA,
- * GL_LUMINANCE, GL_LUMINANCE_ALPHA,
- * GL_INTENSITY, GL_COLOR_INDEX,
- * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT
- * only. Used for choosing TexEnv arithmetic.
- */
- GLuint TexFormat; /**< The actual format: MESA_FORMAT_x */
-
- GLuint Border; /**< 0 or 1 */
- GLuint Width; /**< = 2^WidthLog2 + 2*Border */
- GLuint Height; /**< = 2^HeightLog2 + 2*Border */
- GLuint Depth; /**< = 2^DepthLog2 + 2*Border */
- GLuint Width2; /**< = Width - 2*Border */
- GLuint Height2; /**< = Height - 2*Border */
- GLuint Depth2; /**< = Depth - 2*Border */
- GLuint WidthLog2; /**< = log2(Width2) */
- GLuint HeightLog2; /**< = log2(Height2) */
- GLuint DepthLog2; /**< = log2(Depth2) */
- GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */
- GLfloat WidthScale; /**< used for mipmap LOD computation */
- GLfloat HeightScale; /**< used for mipmap LOD computation */
- GLfloat DepthScale; /**< used for mipmap LOD computation */
- GLboolean IsClientData; /**< Data owned by client? */
- GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */
-
- struct gl_texture_object *TexObject; /**< Pointer back to parent object */
-
- FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */
- FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */
-
- GLuint RowStride; /**< Padded width in units of texels */
- GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to
- each 2D slice in 'Data', in texels */
- GLvoid *Data; /**< Image data, accessed via FetchTexel() */
-
- /**
- * \name For device driver:
- */
- /*@{*/
- void *DriverData; /**< Arbitrary device driver data */
- /*@}*/
-};
-
-
-/**
- * Indexes for cube map faces.
- */
-typedef enum
-{
- FACE_POS_X = 0,
- FACE_NEG_X = 1,
- FACE_POS_Y = 2,
- FACE_NEG_Y = 3,
- FACE_POS_Z = 4,
- FACE_NEG_Z = 5,
- MAX_FACES = 6
-} gl_face_index;
-
-
-/**
- * Texture object state. Contains the array of mipmap images, border color,
- * wrap modes, filter modes, shadow/texcompare state, and the per-texture
- * color palette.
- */
-struct gl_texture_object
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- GLint RefCount; /**< reference count */
- GLuint Name; /**< the user-visible texture object ID */
- GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */
- GLfloat Priority; /**< in [0,1] */
- union {
- GLfloat f[4];
- GLuint ui[4];
- GLint i[4];
- } BorderColor; /**< Interpreted according to texture format */
- GLenum WrapS; /**< S-axis texture image wrap mode */
- GLenum WrapT; /**< T-axis texture image wrap mode */
- GLenum WrapR; /**< R-axis texture image wrap mode */
- GLenum MinFilter; /**< minification filter */
- GLenum MagFilter; /**< magnification filter */
- GLfloat MinLod; /**< min lambda, OpenGL 1.2 */
- GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */
- GLfloat LodBias; /**< OpenGL 1.4 */
- GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */
- GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */
- GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
- GLenum CompareMode; /**< GL_ARB_shadow */
- GLenum CompareFunc; /**< GL_ARB_shadow */
- GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */
- GLenum DepthMode; /**< GL_ARB_depth_texture */
- GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */
- GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */
- GLint CropRect[4]; /**< GL_OES_draw_texture */
- GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */
- GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */
- GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */
- GLboolean _Complete; /**< Is texture object complete? */
- GLboolean _RenderToTexture; /**< Any rendering to this texture? */
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
- GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */
-
- /** Actual texture images, indexed by [cube face] and [mipmap level] */
- struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
-
- /** GL_EXT_paletted_texture */
- struct gl_color_table Palette;
-
- /**
- * \name For device driver.
- * Note: instead of attaching driver data to this pointer, it's preferable
- * to instead use this struct as a base class for your own texture object
- * class. Driver->NewTextureObject() can be used to implement the
- * allocation.
- */
- void *DriverData; /**< Arbitrary device driver data */
-};
-
-
-/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */
-#define MAX_COMBINER_TERMS 4
-
-
-/**
- * Texture combine environment state.
- */
-struct gl_tex_env_combine_state
-{
- GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
- GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
- /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */
- GLenum SourceRGB[MAX_COMBINER_TERMS];
- GLenum SourceA[MAX_COMBINER_TERMS];
- /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */
- GLenum OperandRGB[MAX_COMBINER_TERMS];
- GLenum OperandA[MAX_COMBINER_TERMS];
- GLuint ScaleShiftRGB; /**< 0, 1 or 2 */
- GLuint ScaleShiftA; /**< 0, 1 or 2 */
- GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */
- GLuint _NumArgsA; /**< Number of inputs used for the A combiner */
-};
-
-
-/**
- * Texture coord generation state.
- */
-struct gl_texgen
-{
- GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */
- GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */
- GLfloat ObjectPlane[4];
- GLfloat EyePlane[4];
-};
-
-
-/**
- * Texture unit state. Contains enable flags, texture environment/function/
- * combiners, texgen state, pointers to current texture objects and
- * post-filter color tables.
- */
-struct gl_texture_unit
-{
- GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */
- GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */
-
- GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */
- GLfloat EnvColor[4];
-
- struct gl_texgen GenS;
- struct gl_texgen GenT;
- struct gl_texgen GenR;
- struct gl_texgen GenQ;
- GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */
- GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */
-
- GLfloat LodBias; /**< for biasing mipmap levels */
- GLenum BumpTarget;
- GLfloat RotMatrix[4]; /* 2x2 matrix */
-
- /**
- * \name GL_EXT_texture_env_combine
- */
- struct gl_tex_env_combine_state Combine;
-
- /**
- * Derived state based on \c EnvMode and the \c BaseFormat of the
- * currently enabled texture.
- */
- struct gl_tex_env_combine_state _EnvMode;
-
- /**
- * Currently enabled combiner state. This will point to either
- * \c Combine or \c _EnvMode.
- */
- struct gl_tex_env_combine_state *_CurrentCombine;
-
- /** Current texture object pointers */
- struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS];
-
- /** Points to highest priority, complete and enabled texture object */
- struct gl_texture_object *_Current;
-
- /** GL_SGI_texture_color_table */
- /*@{*/
- struct gl_color_table ColorTable;
- struct gl_color_table ProxyColorTable;
- GLboolean ColorTableEnabled;
- /*@}*/
-};
-
-
-/**
- * Texture attribute group (GL_TEXTURE_BIT).
- */
-struct gl_texture_attrib
-{
- GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */
- struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
-
- struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS];
-
- /** GL_ARB_seamless_cubemap */
- GLboolean CubeMapSeamless;
-
- /** GL_EXT_shared_texture_palette */
- GLboolean SharedPalette;
- struct gl_color_table Palette;
-
- /** Texture units/samplers used by vertex or fragment texturing */
- GLbitfield _EnabledUnits;
-
- /** Texture coord units/sets used for fragment texturing */
- GLbitfield _EnabledCoordUnits;
-
- /** Texture coord units that have texgen enabled */
- GLbitfield _TexGenEnabled;
-
- /** Texture coord units that have non-identity matrices */
- GLbitfield _TexMatEnabled;
-
- /** Bitwise-OR of all Texture.Unit[i]._GenFlags */
- GLbitfield _GenFlags;
-};
-
-
-/**
- * Transformation attribute group (GL_TRANSFORM_BIT).
- */
-struct gl_transform_attrib
-{
- GLenum MatrixMode; /**< Matrix mode */
- GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */
- GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */
- GLbitfield ClipPlanesEnabled; /**< on/off bitmask */
- GLboolean Normalize; /**< Normalize all normals? */
- GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */
- GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */
- GLboolean DepthClamp; /**< GL_ARB_depth_clamp */
-
- GLfloat CullEyePos[4];
- GLfloat CullObjPos[4];
-};
-
-
-/**
- * Viewport attribute group (GL_VIEWPORT_BIT).
- */
-struct gl_viewport_attrib
-{
- GLint X, Y; /**< position */
- GLsizei Width, Height; /**< size */
- GLfloat Near, Far; /**< Depth buffer range */
- GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */
-};
-
-
-/**
- * GL_ARB_vertex/pixel_buffer_object buffer object
- */
-struct gl_buffer_object
-{
- _glthread_Mutex Mutex;
- GLint RefCount;
- GLuint Name;
- GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */
- GLsizeiptrARB Size; /**< Size of buffer storage in bytes */
- GLubyte *Data; /**< Location of storage either in RAM or VRAM. */
- /** Fields describing a mapped buffer */
- /*@{*/
- GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */
- GLvoid *Pointer; /**< User-space address of mapping */
- GLintptr Offset; /**< Mapped offset */
- GLsizeiptr Length; /**< Mapped length */
- /*@}*/
- GLboolean Written; /**< Ever written to? (for debugging) */
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
-};
-
-
-/**
- * Client pixel packing/unpacking attributes
- */
-struct gl_pixelstore_attrib
-{
- GLint Alignment;
- GLint RowLength;
- GLint SkipPixels;
- GLint SkipRows;
- GLint ImageHeight;
- GLint SkipImages;
- GLboolean SwapBytes;
- GLboolean LsbFirst;
- GLboolean ClientStorage; /**< GL_APPLE_client_storage */
- GLboolean Invert; /**< GL_MESA_pack_invert */
- struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */
-};
-
-
-/**
- * Client vertex array attributes
- */
-struct gl_client_array
-{
- GLint Size; /**< components per element (1,2,3,4) */
- GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */
- GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */
- GLsizei Stride; /**< user-specified stride */
- GLsizei StrideB; /**< actual stride in bytes */
- const GLubyte *Ptr; /**< Points to array data */
- GLboolean Enabled; /**< Enabled flag is a boolean */
- GLboolean Normalized; /**< GL_ARB_vertex_program */
- GLboolean Integer; /**< Integer-valued? */
- GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */
- GLuint _ElementSize; /**< size of each element in bytes */
-
- struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */
- GLuint _MaxElement; /**< max element index into array buffer + 1 */
-};
-
-
-/**
- * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object
- * extension, but a nice encapsulation in any case.
- */
-struct gl_array_object
-{
- /** Name of the array object as received from glGenVertexArrayAPPLE. */
- GLuint Name;
-
- GLint RefCount;
- _glthread_Mutex Mutex;
- GLboolean VBOonly; /**< require all arrays to live in VBOs? */
-
- /** Conventional vertex arrays */
- /*@{*/
- struct gl_client_array Vertex;
- struct gl_client_array Weight;
- struct gl_client_array Normal;
- struct gl_client_array Color;
- struct gl_client_array SecondaryColor;
- struct gl_client_array FogCoord;
- struct gl_client_array Index;
- struct gl_client_array EdgeFlag;
- struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS];
- struct gl_client_array PointSize;
- /*@}*/
-
- /**
- * Generic arrays for vertex programs/shaders.
- * For NV vertex programs, these attributes alias and take priority
- * over the conventional attribs above. For ARB vertex programs and
- * GLSL vertex shaders, these attributes are separate.
- */
- struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS];
-
- /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */
- GLbitfield _Enabled;
-
- /**
- * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs
- * we can determine the max legal (in bounds) glDrawElements array index.
- */
- GLuint _MaxElement;
-};
-
-
-/**
- * Vertex array state
- */
-struct gl_array_attrib
-{
- /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */
- struct gl_array_object *ArrayObj;
-
- /** The default vertex array object */
- struct gl_array_object *DefaultArrayObj;
-
- /** Array objects (GL_ARB/APPLE_vertex_array_object) */
- struct _mesa_HashTable *Objects;
-
- GLint ActiveTexture; /**< Client Active Texture */
- GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */
- GLuint LockCount; /**< GL_EXT_compiled_vertex_array */
-
- /** GL 3.1 (slightly different from GL_NV_primitive_restart) */
- GLboolean PrimitiveRestart;
- GLuint RestartIndex;
-
- GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */
-
- /* GL_ARB_vertex_buffer_object */
- struct gl_buffer_object *ArrayBufferObj;
- struct gl_buffer_object *ElementArrayBufferObj;
-};
-
-
-/**
- * Feedback buffer state
- */
-struct gl_feedback
-{
- GLenum Type;
- GLbitfield _Mask; /**< FB_* bits */
- GLfloat *Buffer;
- GLuint BufferSize;
- GLuint Count;
-};
-
-
-/**
- * Selection buffer state
- */
-struct gl_selection
-{
- GLuint *Buffer; /**< selection buffer */
- GLuint BufferSize; /**< size of the selection buffer */
- GLuint BufferCount; /**< number of values in the selection buffer */
- GLuint Hits; /**< number of records in the selection buffer */
- GLuint NameStackDepth; /**< name stack depth */
- GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */
- GLboolean HitFlag; /**< hit flag */
- GLfloat HitMinZ; /**< minimum hit depth */
- GLfloat HitMaxZ; /**< maximum hit depth */
-};
-
-
-/**
- * 1-D Evaluator control points
- */
-struct gl_1d_map
-{
- GLuint Order; /**< Number of control points */
- GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */
- GLfloat *Points; /**< Points to contiguous control points */
-};
-
-
-/**
- * 2-D Evaluator control points
- */
-struct gl_2d_map
-{
- GLuint Uorder; /**< Number of control points in U dimension */
- GLuint Vorder; /**< Number of control points in V dimension */
- GLfloat u1, u2, du;
- GLfloat v1, v2, dv;
- GLfloat *Points; /**< Points to contiguous control points */
-};
-
-
-/**
- * All evaluator control point state
- */
-struct gl_evaluators
-{
- /**
- * \name 1-D maps
- */
- /*@{*/
- struct gl_1d_map Map1Vertex3;
- struct gl_1d_map Map1Vertex4;
- struct gl_1d_map Map1Index;
- struct gl_1d_map Map1Color4;
- struct gl_1d_map Map1Normal;
- struct gl_1d_map Map1Texture1;
- struct gl_1d_map Map1Texture2;
- struct gl_1d_map Map1Texture3;
- struct gl_1d_map Map1Texture4;
- struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */
- /*@}*/
-
- /**
- * \name 2-D maps
- */
- /*@{*/
- struct gl_2d_map Map2Vertex3;
- struct gl_2d_map Map2Vertex4;
- struct gl_2d_map Map2Index;
- struct gl_2d_map Map2Color4;
- struct gl_2d_map Map2Normal;
- struct gl_2d_map Map2Texture1;
- struct gl_2d_map Map2Texture2;
- struct gl_2d_map Map2Texture3;
- struct gl_2d_map Map2Texture4;
- struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */
- /*@}*/
-};
-
-
-/**
- * Names of the various vertex/fragment program register files, etc.
- *
- * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c)
- * All values should fit in a 4-bit field.
- *
- * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM,
- * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to
- * be "uniform" variables since they can only be set outside glBegin/End.
- * They're also all stored in the same Parameters array.
- */
-typedef enum
-{
- PROGRAM_TEMPORARY, /**< machine->Temporary[] */
- PROGRAM_INPUT, /**< machine->Inputs[] */
- PROGRAM_OUTPUT, /**< machine->Outputs[] */
- PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */
- PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */
- PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */
- PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */
- PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */
- PROGRAM_CONSTANT, /**< gl_program->Parameters[] */
- PROGRAM_UNIFORM, /**< gl_program->Parameters[] */
- PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */
- PROGRAM_ADDRESS, /**< machine->AddressReg */
- PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
- PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
- PROGRAM_UNDEFINED, /**< Invalid/TBD value */
- PROGRAM_FILE_MAX
-} gl_register_file;
-
-
-/**
- * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be
- * one of these values.
- */
-typedef enum
-{
- SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */
- SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */
- SYSTEM_VALUE_MAX /**< Number of values */
-} gl_system_value;
-
-
-/** Vertex and fragment instructions */
-struct prog_instruction;
-struct gl_program_parameter_list;
-struct gl_uniform_list;
-
-
-/**
- * Base class for any kind of program object
- */
-struct gl_program
-{
- GLuint Id;
- GLubyte *String; /**< Null-terminated program text */
- GLint RefCount;
- GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */
- GLenum Format; /**< String encoding format */
- GLboolean Resident;
-
- struct prog_instruction *Instructions;
-
- GLbitfield InputsRead; /**< Bitmask of which input regs are read */
- GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
- GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */
- GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */
- GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */
- GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */
- GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */
- GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
-
-
- /** Named parameters, constants, etc. from program text */
- struct gl_program_parameter_list *Parameters;
- /** Numbered local parameters */
- GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4];
-
- /** Vertex/fragment shader varying vars */
- struct gl_program_parameter_list *Varying;
- /** Vertex program user-defined attributes */
- struct gl_program_parameter_list *Attributes;
-
- /** Map from sampler unit to texture unit (set by glUniform1i()) */
- GLubyte SamplerUnits[MAX_SAMPLERS];
- /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */
- gl_texture_index SamplerTargets[MAX_SAMPLERS];
-
- /** Bitmask of which register files are read/written with indirect
- * addressing. Mask of (1 << PROGRAM_x) bits.
- */
- GLbitfield IndirectRegisterFiles;
-
- /** Logical counts */
- /*@{*/
- GLuint NumInstructions;
- GLuint NumTemporaries;
- GLuint NumParameters;
- GLuint NumAttributes;
- GLuint NumAddressRegs;
- GLuint NumAluInstructions;
- GLuint NumTexInstructions;
- GLuint NumTexIndirections;
- /*@}*/
- /** Native, actual h/w counts */
- /*@{*/
- GLuint NumNativeInstructions;
- GLuint NumNativeTemporaries;
- GLuint NumNativeParameters;
- GLuint NumNativeAttributes;
- GLuint NumNativeAddressRegs;
- GLuint NumNativeAluInstructions;
- GLuint NumNativeTexInstructions;
- GLuint NumNativeTexIndirections;
- /*@}*/
-};
-
-
-/** Vertex program object */
-struct gl_vertex_program
-{
- struct gl_program Base; /**< base class */
- GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */
- GLboolean IsPositionInvariant;
-};
-
-
-/** Geometry program object */
-struct gl_geometry_program
-{
- struct gl_program Base; /**< base class */
-
- GLint VerticesOut;
- GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
- GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
- GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
-};
-
-
-/** Fragment program object */
-struct gl_fragment_program
-{
- struct gl_program Base; /**< base class */
- GLenum FogOption;
- GLboolean UsesKill; /**< shader uses KIL instruction */
- GLboolean OriginUpperLeft;
- GLboolean PixelCenterInteger;
- enum gl_frag_depth_layout FragDepthLayout;
-};
-
-
-/**
- * State common to vertex and fragment programs.
- */
-struct gl_program_state
-{
- GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */
- const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */
-};
-
-
-/**
- * Context state for vertex programs.
- */
-struct gl_vertex_program_state
-{
- GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */
- GLboolean _Enabled; /**< Enabled and _valid_ user program? */
- GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */
- GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */
- struct gl_vertex_program *Current; /**< User-bound vertex program */
-
- /** Currently enabled and valid vertex program (including internal
- * programs, user-defined vertex programs and GLSL vertex shaders).
- * This is the program we must use when rendering.
- */
- struct gl_vertex_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /* For GL_NV_vertex_program only: */
- GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4];
- GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4];
-
- /** Should fixed-function T&L be implemented with a vertex prog? */
- GLboolean _MaintainTnlProgram;
-
- /** Program to emulate fixed-function T&L (see above) */
- struct gl_vertex_program *_TnlProgram;
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-
- GLboolean _Overriden;
-};
-
-
-/**
- * Context state for geometry programs.
- */
-struct gl_geometry_program_state
-{
- GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */
- GLboolean _Enabled; /**< Enabled and valid program? */
- struct gl_geometry_program *Current; /**< user-bound geometry program */
-
- /** Currently enabled and valid program (including internal programs
- * and compiled shader programs).
- */
- struct gl_geometry_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-};
-
-/**
- * Context state for fragment programs.
- */
-struct gl_fragment_program_state
-{
- GLboolean Enabled; /**< User-set fragment program enable flag */
- GLboolean _Enabled; /**< Enabled and _valid_ user program? */
- struct gl_fragment_program *Current; /**< User-bound fragment program */
-
- /** Currently enabled and valid fragment program (including internal
- * programs, user-defined fragment programs and GLSL fragment shaders).
- * This is the program we must use when rendering.
- */
- struct gl_fragment_program *_Current;
-
- GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
-
- /** Should fixed-function texturing be implemented with a fragment prog? */
- GLboolean _MaintainTexEnvProgram;
-
- /** Program to emulate fixed-function texture env/combine (see above) */
- struct gl_fragment_program *_TexEnvProgram;
-
- /** Cache of fixed-function programs */
- struct gl_program_cache *Cache;
-};
-
-
-/**
- * ATI_fragment_shader runtime state
- */
-#define ATI_FS_INPUT_PRIMARY 0
-#define ATI_FS_INPUT_SECONDARY 1
-
-struct atifs_instruction;
-struct atifs_setupinst;
-
-/**
- * ATI fragment shader
- */
-struct ati_fragment_shader
-{
- GLuint Id;
- GLint RefCount;
- struct atifs_instruction *Instructions[2];
- struct atifs_setupinst *SetupInst[2];
- GLfloat Constants[8][4];
- GLbitfield LocalConstDef; /**< Indicates which constants have been set */
- GLubyte numArithInstr[2];
- GLubyte regsAssigned[2];
- GLubyte NumPasses; /**< 1 or 2 */
- GLubyte cur_pass;
- GLubyte last_optype;
- GLboolean interpinp1;
- GLboolean isValid;
- GLuint swizzlerq;
-};
-
-/**
- * Context state for GL_ATI_fragment_shader
- */
-struct gl_ati_fragment_shader_state
-{
- GLboolean Enabled;
- GLboolean _Enabled; /**< enabled and valid shader? */
- GLboolean Compiling;
- GLfloat GlobalConstants[8][4];
- struct ati_fragment_shader *Current;
-};
-
-
-/**
- * Occlusion/timer query object.
- */
-struct gl_query_object
-{
- GLenum Target; /**< The query target, when active */
- GLuint Id; /**< hash table ID/name */
- GLuint64EXT Result; /**< the counter */
- GLboolean Active; /**< inside Begin/EndQuery */
- GLboolean Ready; /**< result is ready? */
-};
-
-
-/**
- * Context state for query objects.
- */
-struct gl_query_state
-{
- struct _mesa_HashTable *QueryObjects;
- struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */
- struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */
-
- /** GL_NV_conditional_render */
- struct gl_query_object *CondRenderQuery;
-
- /** GL_EXT_transform_feedback */
- struct gl_query_object *PrimitivesGenerated;
- struct gl_query_object *PrimitivesWritten;
-
- /** GL_ARB_timer_query */
- struct gl_query_object *TimeElapsed;
-
- GLenum CondRenderMode;
-};
-
-
-/** Sync object state */
-struct gl_sync_object {
- struct simple_node link;
- GLenum Type; /**< GL_SYNC_FENCE */
- GLuint Name; /**< Fence name */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending; /**< Object was deleted while there were still
- * live references (e.g., sync not yet finished)
- */
- GLenum SyncCondition;
- GLbitfield Flags; /**< Flags passed to glFenceSync */
- GLuint StatusFlag:1; /**< Has the sync object been signaled? */
-};
-
-
-/** Set by #pragma directives */
-struct gl_sl_pragmas
-{
- GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */
- GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */
- GLboolean Optimize; /**< defaults on */
- GLboolean Debug; /**< defaults off */
-};
-
-
-/**
- * A GLSL vertex or fragment shader object.
- */
-struct gl_shader
-{
- GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */
- GLuint Name; /**< AKA the handle */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending;
- GLboolean CompileStatus;
- const GLchar *Source; /**< Source code string */
- GLuint SourceChecksum; /**< for debug/logging purposes */
- struct gl_program *Program; /**< Post-compile assembly code */
- GLchar *InfoLog;
- struct gl_sl_pragmas Pragmas;
-
- unsigned Version; /**< GLSL version used for linking */
-
- struct exec_list *ir;
- struct glsl_symbol_table *symbols;
-
- /** Shaders containing built-in functions that are used for linking. */
- struct gl_shader *builtins_to_link[16];
- unsigned num_builtins_to_link;
-};
-
-
-/**
- * A GLSL program object.
- * Basically a linked collection of vertex and fragment shaders.
- */
-struct gl_shader_program
-{
- GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */
- GLuint Name; /**< aka handle or ID */
- GLint RefCount; /**< Reference count */
- GLboolean DeletePending;
-
- GLuint NumShaders; /**< number of attached shaders */
- struct gl_shader **Shaders; /**< List of attached the shaders */
-
- /** User-defined attribute bindings (glBindAttribLocation) */
- struct gl_program_parameter_list *Attributes;
-
- /** Transform feedback varyings */
- struct {
- GLenum BufferMode;
- GLuint NumVarying;
- GLchar **VaryingNames; /**< Array [NumVarying] of char * */
- } TransformFeedback;
-
- /** Geometry shader state - copied into gl_geometry_program at link time */
- struct {
- GLint VerticesOut;
- GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
- GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
- GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
- } Geom;
-
- /* post-link info: */
- struct gl_vertex_program *VertexProgram; /**< Linked vertex program */
- struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */
- struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */
- struct gl_uniform_list *Uniforms;
- struct gl_program_parameter_list *Varying;
- GLboolean LinkStatus; /**< GL_LINK_STATUS */
- GLboolean Validated;
- GLboolean _Used; /**< Ever used for drawing? */
- GLchar *InfoLog;
-
- unsigned Version; /**< GLSL version used for linking */
-
- /**
- * Per-stage shaders resulting from the first stage of linking.
- *
- * Set of linked shaders for this program. The array is accessed using the
- * \c MESA_SHADER_* defines. Entries for non-existent stages will be
- * \c NULL.
- */
- struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES];
-};
-
-
-#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */
-#define GLSL_LOG 0x2 /**< Write shaders to files */
-#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */
-#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */
-#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */
-#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */
-#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */
-#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */
-
-
-/**
- * Context state for GLSL vertex/fragment shaders.
- */
-struct gl_shader_state
-{
- /**
- * Programs used for rendering
- *
- * There is a separate program set for each shader stage. If
- * GL_EXT_separate_shader_objects is not supported, each of these must point
- * to \c NULL or to the same program.
- */
- struct gl_shader_program *CurrentVertexProgram;
- struct gl_shader_program *CurrentGeometryProgram;
- struct gl_shader_program *CurrentFragmentProgram;
-
- /**
- * Program used by glUniform calls.
- *
- * Explicitly set by \c glUseProgram and \c glActiveProgramEXT.
- */
- struct gl_shader_program *ActiveProgram;
-
- void *MemPool;
-
- GLbitfield Flags; /**< Mask of GLSL_x flags */
-};
-
-/**
- * Compiler options for a single GLSL shaders type
- */
-struct gl_shader_compiler_options
-{
- /** Driver-selectable options: */
- GLboolean EmitCondCodes; /**< Use condition codes? */
- GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */
- /**
- * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't
- * support control flow.
- */
- GLboolean EmitNoIfs;
- GLboolean EmitNoLoops;
- GLboolean EmitNoFunctions;
- GLboolean EmitNoCont; /**< Emit CONT opcode? */
- GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */
- GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */
- GLboolean EmitNoPow; /**< Emit POW opcodes? */
-
- /**
- * \name Forms of indirect addressing the driver cannot do.
- */
- /*@{*/
- GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */
- GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */
- GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */
- GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
- /*@}*/
-
- GLuint MaxUnrollIterations;
-
- struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */
-};
-
-/**
- * Transform feedback object state
- */
-struct gl_transform_feedback_object
-{
- GLuint Name; /**< AKA the object ID */
- GLint RefCount;
- GLboolean Active; /**< Is transform feedback enabled? */
- GLboolean Paused; /**< Is transform feedback paused? */
-
- /** The feedback buffers */
- GLuint BufferNames[MAX_FEEDBACK_ATTRIBS];
- struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS];
-
- /** Start of feedback data in dest buffer */
- GLintptr Offset[MAX_FEEDBACK_ATTRIBS];
- /** Max data to put into dest buffer (in bytes) */
- GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS];
-};
-
-
-/**
- * Context state for transform feedback.
- */
-struct gl_transform_feedback
-{
- GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */
-
- GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */
-
- /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */
- struct gl_buffer_object *CurrentBuffer;
-
- /** The table of all transform feedback objects */
- struct _mesa_HashTable *Objects;
-
- /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */
- struct gl_transform_feedback_object *CurrentObject;
-
- /** The default xform-fb object (Name==0) */
- struct gl_transform_feedback_object *DefaultObject;
-};
-
-
-
-/**
- * State which can be shared by multiple contexts:
- */
-struct gl_shared_state
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- GLint RefCount; /**< Reference count */
- struct _mesa_HashTable *DisplayList; /**< Display lists hash table */
- struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */
-
- /** Default texture objects (shared by all texture units) */
- struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS];
-
- /** Fallback texture used when a bound texture is incomplete */
- struct gl_texture_object *FallbackTex;
-
- /**
- * \name Thread safety and statechange notification for texture
- * objects.
- *
- * \todo Improve the granularity of locking.
- */
- /*@{*/
- _glthread_Mutex TexMutex; /**< texobj thread safety */
- GLuint TextureStateStamp; /**< state notification for shared tex */
- /*@}*/
-
- /** Default buffer object for vertex arrays that aren't in VBOs */
- struct gl_buffer_object *NullBufferObj;
-
- /**
- * \name Vertex/geometry/fragment programs
- */
- /*@{*/
- struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
- struct gl_vertex_program *DefaultVertexProgram;
- struct gl_fragment_program *DefaultFragmentProgram;
- struct gl_geometry_program *DefaultGeometryProgram;
- /*@}*/
-
- /* GL_ATI_fragment_shader */
- struct _mesa_HashTable *ATIShaders;
- struct ati_fragment_shader *DefaultFragmentShader;
-
- struct _mesa_HashTable *BufferObjects;
-
- /** Table of both gl_shader and gl_shader_program objects */
- struct _mesa_HashTable *ShaderObjects;
-
- /* GL_EXT_framebuffer_object */
- struct _mesa_HashTable *RenderBuffers;
- struct _mesa_HashTable *FrameBuffers;
-
- /* GL_ARB_sync */
- struct simple_node SyncObjects;
-
- void *DriverData; /**< Device driver shared state */
-};
-
-
-
-
-/**
- * A renderbuffer stores colors or depth values or stencil values.
- * A framebuffer object will have a collection of these.
- * Data are read/written to the buffer with a handful of Get/Put functions.
- *
- * Instances of this object are allocated with the Driver's NewRenderbuffer
- * hook. Drivers will likely wrap this class inside a driver-specific
- * class to simulate inheritance.
- */
-struct gl_renderbuffer
-{
-#define RB_MAGIC 0xaabbccdd
- int Magic; /** XXX TEMPORARY DEBUG INFO */
- _glthread_Mutex Mutex; /**< for thread safety */
- GLuint ClassID; /**< Useful for drivers */
- GLuint Name;
- GLint RefCount;
- GLuint Width, Height;
- GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
-
- GLenum InternalFormat; /**< The user-specified format */
- GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or
- GL_STENCIL_INDEX. */
- GLuint Format; /**< The actual format: MESA_FORMAT_x */
-
- GLubyte NumSamples;
-
- GLenum DataType; /**< Type of values passed to the Get/Put functions */
- GLvoid *Data; /**< This may not be used by some kinds of RBs */
-
- /* Used to wrap one renderbuffer around another: */
- struct gl_renderbuffer *Wrapped;
-
- /* Delete this renderbuffer */
- void (*Delete)(struct gl_renderbuffer *rb);
-
- /* Allocate new storage for this renderbuffer */
- GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLenum internalFormat,
- GLuint width, GLuint height);
-
- /* Lock/Unlock are called before/after calling the Get/Put functions.
- * Not sure this is the right place for these yet.
- void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
- void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
- */
-
- /* Return a pointer to the element/pixel at (x,y).
- * Should return NULL if the buffer memory can't be directly addressed.
- */
- void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLint x, GLint y);
-
- /* Get/Read a row of values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, void *values);
-
- /* Get/Read values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- const GLint x[], const GLint y[], void *values);
-
- /* Put/Write a row of values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *values, const GLubyte *mask);
-
- /* Put/Write a row of RGB values. This is a special-case routine that's
- * only used for RGBA renderbuffers when the source data is GL_RGB. That's
- * a common case for glDrawPixels and some triangle routines.
- * The values will be of format GL_RGB and type DataType.
- */
- void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *values, const GLubyte *mask);
-
-
- /* Put/Write a row of identical values.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- GLint x, GLint y, const void *value, const GLubyte *mask);
-
- /* Put/Write values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
- const GLint x[], const GLint y[], const void *values,
- const GLubyte *mask);
- /* Put/Write identical values at arbitrary locations.
- * The values will be of format _BaseFormat and type DataType.
- */
- void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb,
- GLuint count, const GLint x[], const GLint y[],
- const void *value, const GLubyte *mask);
-};
-
-
-/**
- * A renderbuffer attachment points to either a texture object (and specifies
- * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer.
- */
-struct gl_renderbuffer_attachment
-{
- GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */
- GLboolean Complete;
-
- /**
- * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the
- * application supplied renderbuffer object.
- */
- struct gl_renderbuffer *Renderbuffer;
-
- /**
- * If \c Type is \c GL_TEXTURE, this stores a pointer to the application
- * supplied texture object.
- */
- struct gl_texture_object *Texture;
- GLuint TextureLevel; /**< Attached mipmap level. */
- GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */
- GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D
- * and 2D array textures */
-};
-
-
-/**
- * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc).
- * In C++ terms, think of this as a base class from which device drivers
- * will make derived classes.
- */
-struct gl_framebuffer
-{
- _glthread_Mutex Mutex; /**< for thread safety */
- /**
- * If zero, this is a window system framebuffer. If non-zero, this
- * is a FBO framebuffer; note that for some devices (i.e. those with
- * a natural pixel coordinate system for FBOs that differs from the
- * OpenGL/Mesa coordinate system), this means that the viewport,
- * polygon face orientation, and polygon stipple will have to be inverted.
- */
- GLuint Name;
-
- GLint RefCount;
- GLboolean DeletePending;
-
- /**
- * The framebuffer's visual. Immutable if this is a window system buffer.
- * Computed from attachments if user-made FBO.
- */
- struct gl_config Visual;
-
- GLboolean Initialized;
-
- GLuint Width, Height; /**< size of frame buffer in pixels */
-
- /** \name Drawing bounds (Intersection of buffer size and scissor box) */
- /*@{*/
- GLint _Xmin, _Xmax; /**< inclusive */
- GLint _Ymin, _Ymax; /**< exclusive */
- /*@}*/
-
- /** \name Derived Z buffer stuff */
- /*@{*/
- GLuint _DepthMax; /**< Max depth buffer value */
- GLfloat _DepthMaxF; /**< Float max depth buffer value */
- GLfloat _MRD; /**< minimum resolvable difference in Z values */
- /*@}*/
-
- /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */
- GLenum _Status;
-
- /** Integer color values */
- GLboolean _IntegerColor;
-
- /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */
- struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT];
-
- /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER
- * attribute group and GL_PIXEL attribute group, respectively.
- */
- GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS];
- GLenum ColorReadBuffer;
-
- /** Computed from ColorDraw/ReadBuffer above */
- GLuint _NumColorDrawBuffers;
- GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */
- GLint _ColorReadBufferIndex; /* -1 = None */
- struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS];
- struct gl_renderbuffer *_ColorReadBuffer;
-
- /** The Actual depth/stencil buffers to use. May be wrappers around the
- * depth/stencil buffers attached above. */
- struct gl_renderbuffer *_DepthBuffer;
- struct gl_renderbuffer *_StencilBuffer;
-
- /** Delete this framebuffer */
- void (*Delete)(struct gl_framebuffer *fb);
-};
-
-
-/**
- * Precision info for shader datatypes. See glGetShaderPrecisionFormat().
- */
-struct gl_precision
-{
- GLushort RangeMin; /**< min value exponent */
- GLushort RangeMax; /**< max value exponent */
- GLushort Precision; /**< number of mantissa bits */
-};
-
-
-/**
- * Limits for vertex and fragment programs/shaders.
- */
-struct gl_program_constants
-{
- /* logical limits */
- GLuint MaxInstructions;
- GLuint MaxAluInstructions;
- GLuint MaxTexInstructions;
- GLuint MaxTexIndirections;
- GLuint MaxAttribs;
- GLuint MaxTemps;
- GLuint MaxAddressRegs;
- GLuint MaxParameters;
- GLuint MaxLocalParams;
- GLuint MaxEnvParams;
- /* native/hardware limits */
- GLuint MaxNativeInstructions;
- GLuint MaxNativeAluInstructions;
- GLuint MaxNativeTexInstructions;
- GLuint MaxNativeTexIndirections;
- GLuint MaxNativeAttribs;
- GLuint MaxNativeTemps;
- GLuint MaxNativeAddressRegs;
- GLuint MaxNativeParameters;
- /* For shaders */
- GLuint MaxUniformComponents;
- /* GL_ARB_geometry_shader4 */
- GLuint MaxGeometryTextureImageUnits;
- GLuint MaxGeometryVaryingComponents;
- GLuint MaxVertexVaryingComponents;
- GLuint MaxGeometryUniformComponents;
- GLuint MaxGeometryOutputVertices;
- GLuint MaxGeometryTotalOutputComponents;
- /* ES 2.0 and GL_ARB_ES2_compatibility */
- struct gl_precision LowFloat, MediumFloat, HighFloat;
- struct gl_precision LowInt, MediumInt, HighInt;
-};
-
-
-/**
- * Constants which may be overridden by device driver during context creation
- * but are never changed after that.
- */
-struct gl_constants
-{
- GLint MaxTextureMbytes; /**< Max memory per image, in MB */
- GLint MaxTextureLevels; /**< Max mipmap levels. */
- GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */
- GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */
- GLint MaxArrayTextureLayers; /**< Max layers in array textures */
- GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */
- GLuint MaxTextureCoordUnits;
- GLuint MaxTextureImageUnits;
- GLuint MaxVertexTextureImageUnits;
- GLuint MaxCombinedTextureImageUnits;
- GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */
- GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
- GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */
-
- GLuint MaxArrayLockSize;
-
- GLint SubPixelBits;
-
- GLfloat MinPointSize, MaxPointSize; /**< aliased */
- GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */
- GLfloat PointSizeGranularity;
- GLfloat MinLineWidth, MaxLineWidth; /**< aliased */
- GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */
- GLfloat LineWidthGranularity;
-
- GLuint MaxColorTableSize;
-
- GLuint MaxClipPlanes;
- GLuint MaxLights;
- GLfloat MaxShininess; /**< GL_NV_light_max_exponent */
- GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */
-
- GLuint MaxViewportWidth, MaxViewportHeight;
-
- struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */
- struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */
- struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */
- GLuint MaxProgramMatrices;
- GLuint MaxProgramMatrixStackDepth;
-
- /** vertex array / buffer object bounds checking */
- GLboolean CheckArrayBounds;
-
- GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */
-
- GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */
- GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */
- GLuint MaxSamples; /**< GL_ARB_framebuffer_object */
-
- GLuint MaxVarying; /**< Number of float[4] varying parameters */
-
- GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */
-
- /** Which texture units support GL_ATI_envmap_bumpmap as targets */
- GLbitfield SupportedBumpUnits;
-
- /**
- * Maximum amount of time, measured in nanseconds, that the server can wait.
- */
- GLuint64 MaxServerWaitTimeout;
-
- /** GL_EXT_provoking_vertex */
- GLboolean QuadsFollowProvokingVertexConvention;
-
- /** OpenGL version 3.0 */
- GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */
-
- /** OpenGL version 3.2 */
- GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */
-
- /** GL_EXT_transform_feedback */
- GLuint MaxTransformFeedbackSeparateAttribs;
- GLuint MaxTransformFeedbackSeparateComponents;
- GLuint MaxTransformFeedbackInterleavedComponents;
-
- /** GL_EXT_gpu_shader4 */
- GLint MinProgramTexelOffset, MaxProgramTexelOffset;
-
- /* GL_EXT_framebuffer_sRGB */
- GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */
-};
-
-
-/**
- * Enable flag for each OpenGL extension. Different device drivers will
- * enable different extensions at runtime.
- */
-struct gl_extensions
-{
- GLboolean dummy; /* don't remove this! */
- GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */
- GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */
- GLboolean ARB_ES2_compatibility;
- GLboolean ARB_blend_func_extended;
- GLboolean ARB_copy_buffer;
- GLboolean ARB_depth_buffer_float;
- GLboolean ARB_depth_clamp;
- GLboolean ARB_depth_texture;
- GLboolean ARB_draw_buffers;
- GLboolean ARB_draw_buffers_blend;
- GLboolean ARB_draw_elements_base_vertex;
- GLboolean ARB_draw_instanced;
- GLboolean ARB_fragment_coord_conventions;
- GLboolean ARB_fragment_program;
- GLboolean ARB_fragment_program_shadow;
- GLboolean ARB_fragment_shader;
- GLboolean ARB_framebuffer_object;
- GLboolean ARB_explicit_attrib_location;
- GLboolean ARB_geometry_shader4;
- GLboolean ARB_half_float_pixel;
- GLboolean ARB_half_float_vertex;
- GLboolean ARB_instanced_arrays;
- GLboolean ARB_map_buffer_range;
- GLboolean ARB_multisample;
- GLboolean ARB_multitexture;
- GLboolean ARB_occlusion_query;
- GLboolean ARB_occlusion_query2;
- GLboolean ARB_point_sprite;
- GLboolean ARB_sampler_objects;
- GLboolean ARB_seamless_cube_map;
- GLboolean ARB_shader_objects;
- GLboolean ARB_shader_stencil_export;
- GLboolean ARB_shading_language_100;
- GLboolean ARB_shadow;
- GLboolean ARB_shadow_ambient;
- GLboolean ARB_sync;
- GLboolean ARB_texture_border_clamp;
- GLboolean ARB_texture_buffer_object;
- GLboolean ARB_texture_compression;
- GLboolean ARB_texture_compression_rgtc;
- GLboolean ARB_texture_cube_map;
- GLboolean ARB_texture_env_combine;
- GLboolean ARB_texture_env_crossbar;
- GLboolean ARB_texture_env_dot3;
- GLboolean ARB_texture_float;
- GLboolean ARB_texture_mirrored_repeat;
- GLboolean ARB_texture_multisample;
- GLboolean ARB_texture_non_power_of_two;
- GLboolean ARB_texture_rg;
- GLboolean ARB_texture_rgb10_a2ui;
- GLboolean ARB_timer_query;
- GLboolean ARB_transform_feedback2;
- GLboolean ARB_transpose_matrix;
- GLboolean ARB_uniform_buffer_object;
- GLboolean ARB_vertex_array_object;
- GLboolean ARB_vertex_buffer_object;
- GLboolean ARB_vertex_program;
- GLboolean ARB_vertex_shader;
- GLboolean ARB_vertex_type_2_10_10_10_rev;
- GLboolean ARB_window_pos;
- GLboolean EXT_abgr;
- GLboolean EXT_bgra;
- GLboolean EXT_blend_color;
- GLboolean EXT_blend_equation_separate;
- GLboolean EXT_blend_func_separate;
- GLboolean EXT_blend_logic_op;
- GLboolean EXT_blend_minmax;
- GLboolean EXT_blend_subtract;
- GLboolean EXT_clip_volume_hint;
- GLboolean EXT_compiled_vertex_array;
- GLboolean EXT_copy_texture;
- GLboolean EXT_depth_bounds_test;
- GLboolean EXT_draw_buffers2;
- GLboolean EXT_draw_range_elements;
- GLboolean EXT_fog_coord;
- GLboolean EXT_framebuffer_blit;
- GLboolean EXT_framebuffer_multisample;
- GLboolean EXT_framebuffer_object;
- GLboolean EXT_framebuffer_sRGB;
- GLboolean EXT_gpu_program_parameters;
- GLboolean EXT_gpu_shader4;
- GLboolean EXT_multi_draw_arrays;
- GLboolean EXT_paletted_texture;
- GLboolean EXT_packed_depth_stencil;
- GLboolean EXT_packed_float;
- GLboolean EXT_packed_pixels;
- GLboolean EXT_pixel_buffer_object;
- GLboolean EXT_point_parameters;
- GLboolean EXT_polygon_offset;
- GLboolean EXT_provoking_vertex;
- GLboolean EXT_rescale_normal;
- GLboolean EXT_shadow_funcs;
- GLboolean EXT_secondary_color;
- GLboolean EXT_separate_shader_objects;
- GLboolean EXT_separate_specular_color;
- GLboolean EXT_shared_texture_palette;
- GLboolean EXT_stencil_wrap;
- GLboolean EXT_stencil_two_side;
- GLboolean EXT_subtexture;
- GLboolean EXT_texture;
- GLboolean EXT_texture_object;
- GLboolean EXT_texture3D;
- GLboolean EXT_texture_array;
- GLboolean EXT_texture_compression_s3tc;
- GLboolean EXT_texture_env_add;
- GLboolean EXT_texture_env_combine;
- GLboolean EXT_texture_env_dot3;
- GLboolean EXT_texture_filter_anisotropic;
- GLboolean EXT_texture_integer;
- GLboolean EXT_texture_lod_bias;
- GLboolean EXT_texture_mirror_clamp;
- GLboolean EXT_texture_shared_exponent;
- GLboolean EXT_texture_sRGB;
- GLboolean EXT_texture_sRGB_decode;
- GLboolean EXT_texture_swizzle;
- GLboolean EXT_transform_feedback;
- GLboolean EXT_timer_query;
- GLboolean EXT_vertex_array;
- GLboolean EXT_vertex_array_bgra;
- GLboolean EXT_vertex_array_set;
- GLboolean OES_standard_derivatives;
- /* vendor extensions */
- GLboolean AMD_conservative_depth;
- GLboolean APPLE_client_storage;
- GLboolean APPLE_packed_pixels;
- GLboolean APPLE_vertex_array_object;
- GLboolean APPLE_object_purgeable;
- GLboolean ATI_envmap_bumpmap;
- GLboolean ATI_texture_mirror_once;
- GLboolean ATI_texture_env_combine3;
- GLboolean ATI_fragment_shader;
- GLboolean ATI_separate_stencil;
- GLboolean IBM_rasterpos_clip;
- GLboolean IBM_multimode_draw_arrays;
- GLboolean MESA_pack_invert;
- GLboolean MESA_resize_buffers;
- GLboolean MESA_ycbcr_texture;
- GLboolean MESA_texture_array;
- GLboolean MESA_texture_signed_rgba;
- GLboolean NV_blend_square;
- GLboolean NV_conditional_render;
- GLboolean NV_fragment_program;
- GLboolean NV_fragment_program_option;
- GLboolean NV_light_max_exponent;
- GLboolean NV_point_sprite;
- GLboolean NV_primitive_restart;
- GLboolean NV_texgen_reflection;
- GLboolean NV_texture_env_combine4;
- GLboolean NV_texture_rectangle;
- GLboolean NV_vertex_program;
- GLboolean NV_vertex_program1_1;
- GLboolean OES_read_format;
- GLboolean SGI_texture_color_table;
- GLboolean SGIS_generate_mipmap;
- GLboolean SGIS_texture_edge_clamp;
- GLboolean SGIS_texture_lod;
- GLboolean TDFX_texture_compression_FXT1;
- GLboolean S3_s3tc;
- GLboolean OES_EGL_image;
- GLboolean OES_draw_texture;
- GLboolean EXT_texture_format_BGRA8888;
- GLboolean extension_sentinel;
- /** The extension string */
- const GLubyte *String;
- /** Number of supported extensions */
- GLuint Count;
-};
-
-
-/**
- * A stack of matrices (projection, modelview, color, texture, etc).
- */
-struct gl_matrix_stack
-{
- GLmatrix *Top; /**< points into Stack */
- GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */
- GLuint Depth; /**< 0 <= Depth < MaxDepth */
- GLuint MaxDepth; /**< size of Stack[] array */
- GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */
-};
-
-
-/**
- * \name Bits for image transfer operations
- * \sa __struct gl_contextRec::ImageTransferState.
- */
-/*@{*/
-#define IMAGE_SCALE_BIAS_BIT 0x1
-#define IMAGE_SHIFT_OFFSET_BIT 0x2
-#define IMAGE_MAP_COLOR_BIT 0x4
-#define IMAGE_CLAMP_BIT 0x800
-
-
-/** Pixel Transfer ops */
-#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \
- IMAGE_SHIFT_OFFSET_BIT | \
- IMAGE_MAP_COLOR_BIT)
-
-/**
- * \name Bits to indicate what state has changed.
- */
-/*@{*/
-#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */
-#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */
-#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */
-#define _NEW_COLOR (1 << 3) /**< gl_context::Color */
-#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */
-#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */
-#define _NEW_FOG (1 << 6) /**< gl_context::Fog */
-#define _NEW_HINT (1 << 7) /**< gl_context::Hint */
-#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */
-#define _NEW_LINE (1 << 9) /**< gl_context::Line */
-#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */
-#define _NEW_POINT (1 << 11) /**< gl_context::Point */
-#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */
-#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */
-#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */
-#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */
-#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */
-#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */
-#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */
-#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */
-#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */
-#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */
-#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */
-#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */
-#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */
-#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */
-#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */
-#define _NEW_PROGRAM_CONSTANTS (1 << 27)
-#define _NEW_BUFFER_OBJECT (1 << 28)
-#define _NEW_ALL ~0
-/*@}*/
-
-
-/**
- * \name Bits to track array state changes
- *
- * Also used to summarize array enabled.
- */
-/*@{*/
-#define _NEW_ARRAY_VERTEX VERT_BIT_POS
-#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT
-#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL
-#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0
-#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1
-#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG
-#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX
-#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG
-#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */
-#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0
-#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1
-#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2
-#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3
-#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4
-#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5
-#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6
-#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7
-#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */
-#define _NEW_ARRAY_ALL 0xffffffff
-
-
-#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i))
-#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i))
-/*@}*/
-
-
-
-/**
- * \name A bunch of flags that we think might be useful to drivers.
- *
- * Set in the __struct gl_contextRec::_TriangleCaps bitfield.
- */
-/*@{*/
-#define DD_FLATSHADE 0x1
-#define DD_SEPARATE_SPECULAR 0x2
-#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */
-#define DD_TRI_LIGHT_TWOSIDE 0x8
-#define DD_TRI_UNFILLED 0x10
-#define DD_TRI_SMOOTH 0x20
-#define DD_TRI_STIPPLE 0x40
-#define DD_TRI_OFFSET 0x80
-#define DD_LINE_SMOOTH 0x100
-#define DD_LINE_STIPPLE 0x200
-#define DD_POINT_SMOOTH 0x400
-#define DD_POINT_ATTEN 0x800
-#define DD_TRI_TWOSTENCIL 0x1000
-/*@}*/
-
-
-/**
- * \name Define the state changes under which each of these bits might change
- */
-/*@{*/
-#define _DD_NEW_FLATSHADE _NEW_LIGHT
-#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM)
-#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON
-#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT
-#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON
-#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON
-#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON
-#define _DD_NEW_TRI_OFFSET _NEW_POLYGON
-#define _DD_NEW_LINE_SMOOTH _NEW_LINE
-#define _DD_NEW_LINE_STIPPLE _NEW_LINE
-#define _DD_NEW_LINE_WIDTH _NEW_LINE
-#define _DD_NEW_POINT_SMOOTH _NEW_POINT
-#define _DD_NEW_POINT_SIZE _NEW_POINT
-#define _DD_NEW_POINT_ATTEN _NEW_POINT
-/*@}*/
-
-
-/**
- * Composite state flags
- */
-/*@{*/
-#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \
- _NEW_TEXTURE | \
- _NEW_POINT | \
- _NEW_PROGRAM | \
- _NEW_MODELVIEW)
-
-#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \
- _NEW_TEXTURE)
-
-#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL)
-/*@}*/
-
-
-
-
-/* This has to be included here. */
-#include "dd.h"
-
-
-/**
- * Display list flags.
- * Strictly this is a tnl-private concept, but it doesn't seem
- * worthwhile adding a tnl private structure just to hold this one bit
- * of information:
- */
-#define DLIST_DANGLING_REFS 0x1
-
-
-/** Opaque declaration of display list payload data type */
-union gl_dlist_node;
-
-
-/**
- * Provide a location where information about a display list can be
- * collected. Could be extended with driverPrivate structures,
- * etc. in the future.
- */
-struct gl_display_list
-{
- GLuint Name;
- GLbitfield Flags; /**< DLIST_x flags */
- /** The dlist commands are in a linked list of nodes */
- union gl_dlist_node *Head;
-};
-
-
-/**
- * State used during display list compilation and execution.
- */
-struct gl_dlist_state
-{
- GLuint CallDepth; /**< Current recursion calling depth */
-
- struct gl_display_list *CurrentList; /**< List currently being compiled */
- union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */
- GLuint CurrentPos; /**< Index into current block of nodes */
-
- GLvertexformat ListVtxfmt;
-
- GLubyte ActiveAttribSize[VERT_ATTRIB_MAX];
- GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4];
-
- GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX];
- GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4];
-
- GLubyte ActiveIndex;
- GLfloat CurrentIndex;
-
- GLubyte ActiveEdgeFlag;
- GLboolean CurrentEdgeFlag;
-
- struct {
- /* State known to have been set by the currently-compiling display
- * list. Used to eliminate some redundant state changes.
- */
- GLenum ShadeModel;
- } Current;
-};
-
-
-/**
- * Enum for the OpenGL APIs we know about and may support.
- */
-typedef enum
-{
- API_OPENGL,
- API_OPENGLES,
- API_OPENGLES2
-} gl_api;
-
-
-/**
- * Mesa rendering context.
- *
- * This is the central context data structure for Mesa. Almost all
- * OpenGL state is contained in this structure.
- * Think of this as a base class from which device drivers will derive
- * sub classes.
- *
- * The struct gl_context typedef names this structure.
- */
-struct gl_context
-{
- /** State possibly shared with other contexts in the address space */
- struct gl_shared_state *Shared;
-
- /** \name API function pointer tables */
- /*@{*/
- gl_api API;
- struct _glapi_table *Save; /**< Display list save functions */
- struct _glapi_table *Exec; /**< Execute functions */
- struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */
- /*@}*/
-
- struct gl_config Visual;
- struct gl_framebuffer *DrawBuffer; /**< buffer for writing */
- struct gl_framebuffer *ReadBuffer; /**< buffer for reading */
- struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */
- struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */
-
- /**
- * Device driver function pointer table
- */
- struct dd_function_table Driver;
-
- void *DriverCtx; /**< Points to device driver context/state */
-
- /** Core/Driver constants */
- struct gl_constants Const;
-
- /** \name The various 4x4 matrix stacks */
- /*@{*/
- struct gl_matrix_stack ModelviewMatrixStack;
- struct gl_matrix_stack ProjectionMatrixStack;
- struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS];
- struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES];
- struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */
- /*@}*/
-
- /** Combined modelview and projection matrix */
- GLmatrix _ModelProjectMatrix;
-
- /** \name Display lists */
- struct gl_dlist_state ListState;
-
- GLboolean ExecuteFlag; /**< Execute GL commands? */
- GLboolean CompileFlag; /**< Compile GL commands into display list? */
-
- /** Extension information */
- struct gl_extensions Extensions;
-
- /** Version info */
- GLuint VersionMajor, VersionMinor;
- char *VersionString;
-
- /** \name State attribute stack (for glPush/PopAttrib) */
- /*@{*/
- GLuint AttribStackDepth;
- struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH];
- /*@}*/
-
- /** \name Renderer attribute groups
- *
- * We define a struct for each attribute group to make pushing and popping
- * attributes easy. Also it's a good organization.
- */
- /*@{*/
- struct gl_accum_attrib Accum; /**< Accum buffer attributes */
- struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */
- struct gl_current_attrib Current; /**< Current attributes */
- struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */
- struct gl_eval_attrib Eval; /**< Eval attributes */
- struct gl_fog_attrib Fog; /**< Fog attributes */
- struct gl_hint_attrib Hint; /**< Hint attributes */
- struct gl_light_attrib Light; /**< Light attributes */
- struct gl_line_attrib Line; /**< Line attributes */
- struct gl_list_attrib List; /**< List attributes */
- struct gl_multisample_attrib Multisample;
- struct gl_pixel_attrib Pixel; /**< Pixel attributes */
- struct gl_point_attrib Point; /**< Point attributes */
- struct gl_polygon_attrib Polygon; /**< Polygon attributes */
- GLuint PolygonStipple[32]; /**< Polygon stipple */
- struct gl_scissor_attrib Scissor; /**< Scissor attributes */
- struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */
- struct gl_texture_attrib Texture; /**< Texture attributes */
- struct gl_transform_attrib Transform; /**< Transformation attributes */
- struct gl_viewport_attrib Viewport; /**< Viewport attributes */
- /*@}*/
-
- /** \name Client attribute stack */
- /*@{*/
- GLuint ClientAttribStackDepth;
- struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH];
- /*@}*/
-
- /** \name Client attribute groups */
- /*@{*/
- struct gl_array_attrib Array; /**< Vertex arrays */
- struct gl_pixelstore_attrib Pack; /**< Pixel packing */
- struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */
- struct gl_pixelstore_attrib DefaultPacking; /**< Default params */
- /*@}*/
-
- /** \name Other assorted state (not pushed/popped on attribute stack) */
- /*@{*/
- struct gl_pixelmaps PixelMaps;
-
- struct gl_evaluators EvalMap; /**< All evaluators */
- struct gl_feedback Feedback; /**< Feedback */
- struct gl_selection Select; /**< Selection */
-
- struct gl_program_state Program; /**< general program state */
- struct gl_vertex_program_state VertexProgram;
- struct gl_fragment_program_state FragmentProgram;
- struct gl_geometry_program_state GeometryProgram;
- struct gl_ati_fragment_shader_state ATIFragmentShader;
-
- struct gl_shader_state Shader; /**< GLSL shader object state */
- struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES];
-
- struct gl_query_state Query; /**< occlusion, timer queries */
-
- struct gl_transform_feedback TransformFeedback;
-
- struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
- struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
- /*@}*/
-
- struct gl_meta_state *Meta; /**< for "meta" operations */
-
- /* GL_EXT_framebuffer_object */
- struct gl_renderbuffer *CurrentRenderbuffer;
-
- GLenum ErrorValue; /**< Last error code */
-
- /**
- * Recognize and silence repeated error debug messages in buggy apps.
- */
- const char *ErrorDebugFmtString;
- GLuint ErrorDebugCount;
-
- GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
- GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
-
- GLboolean ViewportInitialized; /**< has viewport size been initialized? */
-
- GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
-
- /** \name Derived state */
- /*@{*/
- /** Bitwise-or of DD_* flags. Note that this bitfield may be used before
- * state validation so they need to always be current.
- */
- GLbitfield _TriangleCaps;
- GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */
- GLfloat _EyeZDir[3];
- GLfloat _ModelViewInvScale;
- GLboolean _NeedEyeCoords;
- GLboolean _ForceEyeCoords;
-
- GLuint TextureStateTimestamp; /**< detect changes to shared state */
-
- struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */
- struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */
- /**@}*/
-
- struct gl_list_extensions *ListExt; /**< driver dlist extensions */
-
- /** \name For debugging/development only */
- /*@{*/
- GLboolean FirstTimeCurrent;
- /*@}*/
-
- /** Dither disable via MESA_NO_DITHER env var */
- GLboolean NoDither;
-
- /** software compression/decompression supported or not */
- GLboolean Mesa_DXTn;
-
- GLboolean TextureFormatSupported[MESA_FORMAT_COUNT];
-
- /**
- * Use dp4 (rather than mul/mad) instructions for position
- * transformation?
- */
- GLboolean mvp_with_dp4;
-
- /**
- * \name Hooks for module contexts.
- *
- * These will eventually live in the driver or elsewhere.
- */
- /*@{*/
- void *swrast_context;
- void *swsetup_context;
- void *swtnl_context;
- void *swtnl_im;
- struct st_context *st;
- void *aelt_context;
- /*@}*/
-};
-
-
-#ifdef DEBUG
-extern int MESA_VERBOSE;
-extern int MESA_DEBUG_FLAGS;
-# define MESA_FUNCTION __FUNCTION__
-#else
-# define MESA_VERBOSE 0
-# define MESA_DEBUG_FLAGS 0
-# define MESA_FUNCTION "a function"
-# ifndef NDEBUG
-# define NDEBUG
-# endif
-#endif
-
-
-/** The MESA_VERBOSE var is a bitmask of these flags */
-enum _verbose
-{
- VERBOSE_VARRAY = 0x0001,
- VERBOSE_TEXTURE = 0x0002,
- VERBOSE_MATERIAL = 0x0004,
- VERBOSE_PIPELINE = 0x0008,
- VERBOSE_DRIVER = 0x0010,
- VERBOSE_STATE = 0x0020,
- VERBOSE_API = 0x0040,
- VERBOSE_DISPLAY_LIST = 0x0100,
- VERBOSE_LIGHTING = 0x0200,
- VERBOSE_PRIMS = 0x0400,
- VERBOSE_VERTS = 0x0800,
- VERBOSE_DISASSEM = 0x1000,
- VERBOSE_DRAW = 0x2000,
- VERBOSE_SWAPBUFFERS = 0x4000
-};
-
-
-/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */
-enum _debug
-{
- DEBUG_ALWAYS_FLUSH = 0x1
-};
-
-
-
-#endif /* MTYPES_H */
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.7
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file mtypes.h
+ * Main Mesa data structures.
+ *
+ * Please try to mark derived values with a leading underscore ('_').
+ */
+
+#ifndef MTYPES_H
+#define MTYPES_H
+
+
+#include "main/glheader.h"
+#include "main/config.h"
+#include "main/mfeatures.h"
+#include "glapi/glapi.h"
+#include "math/m_matrix.h" /* GLmatrix */
+#include "main/simple_list.h" /* struct simple_node */
+#include "main/formats.h" /* MESA_FORMAT_COUNT */
+
+
+/**
+ * Color channel data type.
+ */
+#if CHAN_BITS == 8
+ typedef GLubyte GLchan;
+#define CHAN_MAX 255
+#define CHAN_MAXF 255.0F
+#define CHAN_TYPE GL_UNSIGNED_BYTE
+#elif CHAN_BITS == 16
+ typedef GLushort GLchan;
+#define CHAN_MAX 65535
+#define CHAN_MAXF 65535.0F
+#define CHAN_TYPE GL_UNSIGNED_SHORT
+#elif CHAN_BITS == 32
+ typedef GLfloat GLchan;
+#define CHAN_MAX 1.0
+#define CHAN_MAXF 1.0F
+#define CHAN_TYPE GL_FLOAT
+#else
+#error "illegal number of color channel bits"
+#endif
+
+
+/**
+ * Stencil buffer data type.
+ */
+#if STENCIL_BITS==8
+ typedef GLubyte GLstencil;
+#elif STENCIL_BITS==16
+ typedef GLushort GLstencil;
+#else
+# error "illegal number of stencil bits"
+#endif
+
+
+/**
+ * \name 64-bit extension of GLbitfield.
+ */
+/*@{*/
+typedef GLuint64 GLbitfield64;
+
+/** Set a single bit */
+#define BITFIELD64_BIT(b) (1ULL << (b))
+
+
+/**
+ * \name Some forward type declarations
+ */
+/*@{*/
+struct _mesa_HashTable;
+struct gl_attrib_node;
+struct gl_list_extensions;
+struct gl_meta_state;
+struct gl_pixelstore_attrib;
+struct gl_program_cache;
+struct gl_texture_format;
+struct gl_texture_image;
+struct gl_texture_object;
+struct gl_context;
+struct st_context;
+/*@}*/
+
+
+/** Extra draw modes beyond GL_POINTS, GL_TRIANGLE_FAN, etc */
+#define PRIM_OUTSIDE_BEGIN_END (GL_POLYGON+1)
+#define PRIM_INSIDE_UNKNOWN_PRIM (GL_POLYGON+2)
+#define PRIM_UNKNOWN (GL_POLYGON+3)
+
+
+/**
+ * Shader stages. Note that these will become 5 with tessellation.
+ * These MUST have the same values as gallium's PIPE_SHADER_*
+ */
+typedef enum
+{
+ MESA_SHADER_VERTEX = 0,
+ MESA_SHADER_FRAGMENT = 1,
+ MESA_SHADER_GEOMETRY = 2,
+ MESA_SHADER_TYPES = 3
+} gl_shader_type;
+
+
+
+/**
+ * Indexes for vertex program attributes.
+ * GL_NV_vertex_program aliases generic attributes over the conventional
+ * attributes. In GL_ARB_vertex_program shader the aliasing is optional.
+ * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
+ * generic attributes are distinct/separate).
+ */
+typedef enum
+{
+ VERT_ATTRIB_POS = 0,
+ VERT_ATTRIB_WEIGHT = 1,
+ VERT_ATTRIB_NORMAL = 2,
+ VERT_ATTRIB_COLOR0 = 3,
+ VERT_ATTRIB_COLOR1 = 4,
+ VERT_ATTRIB_FOG = 5,
+ VERT_ATTRIB_COLOR_INDEX = 6,
+ VERT_ATTRIB_POINT_SIZE = 6, /*alias*/
+ VERT_ATTRIB_EDGEFLAG = 7,
+ VERT_ATTRIB_TEX0 = 8,
+ VERT_ATTRIB_TEX1 = 9,
+ VERT_ATTRIB_TEX2 = 10,
+ VERT_ATTRIB_TEX3 = 11,
+ VERT_ATTRIB_TEX4 = 12,
+ VERT_ATTRIB_TEX5 = 13,
+ VERT_ATTRIB_TEX6 = 14,
+ VERT_ATTRIB_TEX7 = 15,
+ VERT_ATTRIB_GENERIC0 = 16,
+ VERT_ATTRIB_GENERIC1 = 17,
+ VERT_ATTRIB_GENERIC2 = 18,
+ VERT_ATTRIB_GENERIC3 = 19,
+ VERT_ATTRIB_GENERIC4 = 20,
+ VERT_ATTRIB_GENERIC5 = 21,
+ VERT_ATTRIB_GENERIC6 = 22,
+ VERT_ATTRIB_GENERIC7 = 23,
+ VERT_ATTRIB_GENERIC8 = 24,
+ VERT_ATTRIB_GENERIC9 = 25,
+ VERT_ATTRIB_GENERIC10 = 26,
+ VERT_ATTRIB_GENERIC11 = 27,
+ VERT_ATTRIB_GENERIC12 = 28,
+ VERT_ATTRIB_GENERIC13 = 29,
+ VERT_ATTRIB_GENERIC14 = 30,
+ VERT_ATTRIB_GENERIC15 = 31,
+ VERT_ATTRIB_MAX = 32
+} gl_vert_attrib;
+
+/**
+ * Bitflags for vertex attributes.
+ * These are used in bitfields in many places.
+ */
+/*@{*/
+#define VERT_BIT_POS (1 << VERT_ATTRIB_POS)
+#define VERT_BIT_WEIGHT (1 << VERT_ATTRIB_WEIGHT)
+#define VERT_BIT_NORMAL (1 << VERT_ATTRIB_NORMAL)
+#define VERT_BIT_COLOR0 (1 << VERT_ATTRIB_COLOR0)
+#define VERT_BIT_COLOR1 (1 << VERT_ATTRIB_COLOR1)
+#define VERT_BIT_FOG (1 << VERT_ATTRIB_FOG)
+#define VERT_BIT_COLOR_INDEX (1 << VERT_ATTRIB_COLOR_INDEX)
+#define VERT_BIT_EDGEFLAG (1 << VERT_ATTRIB_EDGEFLAG)
+#define VERT_BIT_TEX0 (1 << VERT_ATTRIB_TEX0)
+#define VERT_BIT_TEX1 (1 << VERT_ATTRIB_TEX1)
+#define VERT_BIT_TEX2 (1 << VERT_ATTRIB_TEX2)
+#define VERT_BIT_TEX3 (1 << VERT_ATTRIB_TEX3)
+#define VERT_BIT_TEX4 (1 << VERT_ATTRIB_TEX4)
+#define VERT_BIT_TEX5 (1 << VERT_ATTRIB_TEX5)
+#define VERT_BIT_TEX6 (1 << VERT_ATTRIB_TEX6)
+#define VERT_BIT_TEX7 (1 << VERT_ATTRIB_TEX7)
+#define VERT_BIT_GENERIC0 (1 << VERT_ATTRIB_GENERIC0)
+#define VERT_BIT_GENERIC1 (1 << VERT_ATTRIB_GENERIC1)
+#define VERT_BIT_GENERIC2 (1 << VERT_ATTRIB_GENERIC2)
+#define VERT_BIT_GENERIC3 (1 << VERT_ATTRIB_GENERIC3)
+#define VERT_BIT_GENERIC4 (1 << VERT_ATTRIB_GENERIC4)
+#define VERT_BIT_GENERIC5 (1 << VERT_ATTRIB_GENERIC5)
+#define VERT_BIT_GENERIC6 (1 << VERT_ATTRIB_GENERIC6)
+#define VERT_BIT_GENERIC7 (1 << VERT_ATTRIB_GENERIC7)
+#define VERT_BIT_GENERIC8 (1 << VERT_ATTRIB_GENERIC8)
+#define VERT_BIT_GENERIC9 (1 << VERT_ATTRIB_GENERIC9)
+#define VERT_BIT_GENERIC10 (1 << VERT_ATTRIB_GENERIC10)
+#define VERT_BIT_GENERIC11 (1 << VERT_ATTRIB_GENERIC11)
+#define VERT_BIT_GENERIC12 (1 << VERT_ATTRIB_GENERIC12)
+#define VERT_BIT_GENERIC13 (1 << VERT_ATTRIB_GENERIC13)
+#define VERT_BIT_GENERIC14 (1 << VERT_ATTRIB_GENERIC14)
+#define VERT_BIT_GENERIC15 (1 << VERT_ATTRIB_GENERIC15)
+
+#define VERT_BIT_TEX(u) (1 << (VERT_ATTRIB_TEX0 + (u)))
+#define VERT_BIT_GENERIC(g) (1 << (VERT_ATTRIB_GENERIC0 + (g)))
+/*@}*/
+
+
+/**
+ * Indexes for vertex program result attributes
+ */
+typedef enum
+{
+ VERT_RESULT_HPOS = 0,
+ VERT_RESULT_COL0 = 1,
+ VERT_RESULT_COL1 = 2,
+ VERT_RESULT_FOGC = 3,
+ VERT_RESULT_TEX0 = 4,
+ VERT_RESULT_TEX1 = 5,
+ VERT_RESULT_TEX2 = 6,
+ VERT_RESULT_TEX3 = 7,
+ VERT_RESULT_TEX4 = 8,
+ VERT_RESULT_TEX5 = 9,
+ VERT_RESULT_TEX6 = 10,
+ VERT_RESULT_TEX7 = 11,
+ VERT_RESULT_PSIZ = 12,
+ VERT_RESULT_BFC0 = 13,
+ VERT_RESULT_BFC1 = 14,
+ VERT_RESULT_EDGE = 15,
+ VERT_RESULT_VAR0 = 16, /**< shader varying */
+ VERT_RESULT_MAX = (VERT_RESULT_VAR0 + MAX_VARYING)
+} gl_vert_result;
+
+
+/*********************************************/
+
+/**
+ * Indexes for geometry program attributes.
+ */
+typedef enum
+{
+ GEOM_ATTRIB_POSITION = 0,
+ GEOM_ATTRIB_COLOR0 = 1,
+ GEOM_ATTRIB_COLOR1 = 2,
+ GEOM_ATTRIB_SECONDARY_COLOR0 = 3,
+ GEOM_ATTRIB_SECONDARY_COLOR1 = 4,
+ GEOM_ATTRIB_FOG_FRAG_COORD = 5,
+ GEOM_ATTRIB_POINT_SIZE = 6,
+ GEOM_ATTRIB_CLIP_VERTEX = 7,
+ GEOM_ATTRIB_PRIMITIVE_ID = 8,
+ GEOM_ATTRIB_TEX_COORD = 9,
+
+ GEOM_ATTRIB_VAR0 = 16,
+ GEOM_ATTRIB_MAX = (GEOM_ATTRIB_VAR0 + MAX_VARYING)
+} gl_geom_attrib;
+
+/**
+ * Bitflags for geometry attributes.
+ * These are used in bitfields in many places.
+ */
+/*@{*/
+#define GEOM_BIT_COLOR0 (1 << GEOM_ATTRIB_COLOR0)
+#define GEOM_BIT_COLOR1 (1 << GEOM_ATTRIB_COLOR1)
+#define GEOM_BIT_SCOLOR0 (1 << GEOM_ATTRIB_SECONDARY_COLOR0)
+#define GEOM_BIT_SCOLOR1 (1 << GEOM_ATTRIB_SECONDARY_COLOR1)
+#define GEOM_BIT_TEX_COORD (1 << GEOM_ATTRIB_TEX_COORD)
+#define GEOM_BIT_FOG_COORD (1 << GEOM_ATTRIB_FOG_FRAG_COORD)
+#define GEOM_BIT_POSITION (1 << GEOM_ATTRIB_POSITION)
+#define GEOM_BIT_POINT_SIDE (1 << GEOM_ATTRIB_POINT_SIZE)
+#define GEOM_BIT_CLIP_VERTEX (1 << GEOM_ATTRIB_CLIP_VERTEX)
+#define GEOM_BIT_PRIM_ID (1 << GEOM_ATTRIB_PRIMITIVE_ID)
+#define GEOM_BIT_VAR0 (1 << GEOM_ATTRIB_VAR0)
+
+#define GEOM_BIT_VAR(g) (1 << (GEOM_BIT_VAR0 + (g)))
+/*@}*/
+
+
+/**
+ * Indexes for geometry program result attributes
+ */
+typedef enum
+{
+ GEOM_RESULT_POS = 0,
+ GEOM_RESULT_COL0 = 1,
+ GEOM_RESULT_COL1 = 2,
+ GEOM_RESULT_SCOL0 = 3,
+ GEOM_RESULT_SCOL1 = 4,
+ GEOM_RESULT_FOGC = 5,
+ GEOM_RESULT_TEX0 = 6,
+ GEOM_RESULT_TEX1 = 7,
+ GEOM_RESULT_TEX2 = 8,
+ GEOM_RESULT_TEX3 = 9,
+ GEOM_RESULT_TEX4 = 10,
+ GEOM_RESULT_TEX5 = 11,
+ GEOM_RESULT_TEX6 = 12,
+ GEOM_RESULT_TEX7 = 13,
+ GEOM_RESULT_PSIZ = 14,
+ GEOM_RESULT_CLPV = 15,
+ GEOM_RESULT_PRID = 16,
+ GEOM_RESULT_LAYR = 17,
+ GEOM_RESULT_VAR0 = 18, /**< shader varying, should really be 16 */
+ /* ### we need to -2 because var0 is 18 instead 16 like in the others */
+ GEOM_RESULT_MAX = (GEOM_RESULT_VAR0 + MAX_VARYING - 2)
+} gl_geom_result;
+
+
+/**
+ * Indexes for fragment program input attributes.
+ */
+typedef enum
+{
+ FRAG_ATTRIB_WPOS = 0,
+ FRAG_ATTRIB_COL0 = 1,
+ FRAG_ATTRIB_COL1 = 2,
+ FRAG_ATTRIB_FOGC = 3,
+ FRAG_ATTRIB_TEX0 = 4,
+ FRAG_ATTRIB_TEX1 = 5,
+ FRAG_ATTRIB_TEX2 = 6,
+ FRAG_ATTRIB_TEX3 = 7,
+ FRAG_ATTRIB_TEX4 = 8,
+ FRAG_ATTRIB_TEX5 = 9,
+ FRAG_ATTRIB_TEX6 = 10,
+ FRAG_ATTRIB_TEX7 = 11,
+ FRAG_ATTRIB_FACE = 12, /**< front/back face */
+ FRAG_ATTRIB_PNTC = 13, /**< sprite/point coord */
+ FRAG_ATTRIB_VAR0 = 14, /**< shader varying */
+ FRAG_ATTRIB_MAX = (FRAG_ATTRIB_VAR0 + MAX_VARYING)
+} gl_frag_attrib;
+
+/**
+ * Bitflags for fragment program input attributes.
+ */
+/*@{*/
+#define FRAG_BIT_WPOS (1 << FRAG_ATTRIB_WPOS)
+#define FRAG_BIT_COL0 (1 << FRAG_ATTRIB_COL0)
+#define FRAG_BIT_COL1 (1 << FRAG_ATTRIB_COL1)
+#define FRAG_BIT_FOGC (1 << FRAG_ATTRIB_FOGC)
+#define FRAG_BIT_FACE (1 << FRAG_ATTRIB_FACE)
+#define FRAG_BIT_PNTC (1 << FRAG_ATTRIB_PNTC)
+#define FRAG_BIT_TEX0 (1 << FRAG_ATTRIB_TEX0)
+#define FRAG_BIT_TEX1 (1 << FRAG_ATTRIB_TEX1)
+#define FRAG_BIT_TEX2 (1 << FRAG_ATTRIB_TEX2)
+#define FRAG_BIT_TEX3 (1 << FRAG_ATTRIB_TEX3)
+#define FRAG_BIT_TEX4 (1 << FRAG_ATTRIB_TEX4)
+#define FRAG_BIT_TEX5 (1 << FRAG_ATTRIB_TEX5)
+#define FRAG_BIT_TEX6 (1 << FRAG_ATTRIB_TEX6)
+#define FRAG_BIT_TEX7 (1 << FRAG_ATTRIB_TEX7)
+#define FRAG_BIT_VAR0 (1 << FRAG_ATTRIB_VAR0)
+
+#define FRAG_BIT_TEX(U) (FRAG_BIT_TEX0 << (U))
+#define FRAG_BIT_VAR(V) (FRAG_BIT_VAR0 << (V))
+
+#define FRAG_BITS_TEX_ANY (FRAG_BIT_TEX0| \
+ FRAG_BIT_TEX1| \
+ FRAG_BIT_TEX2| \
+ FRAG_BIT_TEX3| \
+ FRAG_BIT_TEX4| \
+ FRAG_BIT_TEX5| \
+ FRAG_BIT_TEX6| \
+ FRAG_BIT_TEX7)
+/*@}*/
+
+
+/**
+ * Fragment program results
+ */
+typedef enum
+{
+ FRAG_RESULT_DEPTH = 0,
+ FRAG_RESULT_STENCIL = 1,
+ FRAG_RESULT_COLOR = 2,
+ FRAG_RESULT_DATA0 = 3,
+ FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+} gl_frag_result;
+
+
+/**
+ * Indexes for all renderbuffers
+ */
+typedef enum
+{
+ /* the four standard color buffers */
+ BUFFER_FRONT_LEFT,
+ BUFFER_BACK_LEFT,
+ BUFFER_FRONT_RIGHT,
+ BUFFER_BACK_RIGHT,
+ BUFFER_DEPTH,
+ BUFFER_STENCIL,
+ BUFFER_ACCUM,
+ /* optional aux buffer */
+ BUFFER_AUX0,
+ /* generic renderbuffers */
+ BUFFER_COLOR0,
+ BUFFER_COLOR1,
+ BUFFER_COLOR2,
+ BUFFER_COLOR3,
+ BUFFER_COLOR4,
+ BUFFER_COLOR5,
+ BUFFER_COLOR6,
+ BUFFER_COLOR7,
+ BUFFER_COUNT
+} gl_buffer_index;
+
+/**
+ * Bit flags for all renderbuffers
+ */
+#define BUFFER_BIT_FRONT_LEFT (1 << BUFFER_FRONT_LEFT)
+#define BUFFER_BIT_BACK_LEFT (1 << BUFFER_BACK_LEFT)
+#define BUFFER_BIT_FRONT_RIGHT (1 << BUFFER_FRONT_RIGHT)
+#define BUFFER_BIT_BACK_RIGHT (1 << BUFFER_BACK_RIGHT)
+#define BUFFER_BIT_AUX0 (1 << BUFFER_AUX0)
+#define BUFFER_BIT_AUX1 (1 << BUFFER_AUX1)
+#define BUFFER_BIT_AUX2 (1 << BUFFER_AUX2)
+#define BUFFER_BIT_AUX3 (1 << BUFFER_AUX3)
+#define BUFFER_BIT_DEPTH (1 << BUFFER_DEPTH)
+#define BUFFER_BIT_STENCIL (1 << BUFFER_STENCIL)
+#define BUFFER_BIT_ACCUM (1 << BUFFER_ACCUM)
+#define BUFFER_BIT_COLOR0 (1 << BUFFER_COLOR0)
+#define BUFFER_BIT_COLOR1 (1 << BUFFER_COLOR1)
+#define BUFFER_BIT_COLOR2 (1 << BUFFER_COLOR2)
+#define BUFFER_BIT_COLOR3 (1 << BUFFER_COLOR3)
+#define BUFFER_BIT_COLOR4 (1 << BUFFER_COLOR4)
+#define BUFFER_BIT_COLOR5 (1 << BUFFER_COLOR5)
+#define BUFFER_BIT_COLOR6 (1 << BUFFER_COLOR6)
+#define BUFFER_BIT_COLOR7 (1 << BUFFER_COLOR7)
+
+/**
+ * Mask of all the color buffer bits (but not accum).
+ */
+#define BUFFER_BITS_COLOR (BUFFER_BIT_FRONT_LEFT | \
+ BUFFER_BIT_BACK_LEFT | \
+ BUFFER_BIT_FRONT_RIGHT | \
+ BUFFER_BIT_BACK_RIGHT | \
+ BUFFER_BIT_AUX0 | \
+ BUFFER_BIT_COLOR0 | \
+ BUFFER_BIT_COLOR1 | \
+ BUFFER_BIT_COLOR2 | \
+ BUFFER_BIT_COLOR3 | \
+ BUFFER_BIT_COLOR4 | \
+ BUFFER_BIT_COLOR5 | \
+ BUFFER_BIT_COLOR6 | \
+ BUFFER_BIT_COLOR7)
+
+
+/**
+ * Framebuffer configuration (aka visual / pixelformat)
+ * Note: some of these fields should be boolean, but it appears that
+ * code in drivers/dri/common/util.c requires int-sized fields.
+ */
+struct gl_config
+{
+ GLboolean rgbMode;
+ GLboolean floatMode;
+ GLboolean colorIndexMode; /* XXX is this used anywhere? */
+ GLuint doubleBufferMode;
+ GLuint stereoMode;
+
+ GLboolean haveAccumBuffer;
+ GLboolean haveDepthBuffer;
+ GLboolean haveStencilBuffer;
+
+ GLint redBits, greenBits, blueBits, alphaBits; /* bits per comp */
+ GLuint redMask, greenMask, blueMask, alphaMask;
+ GLint rgbBits; /* total bits for rgb */
+ GLint indexBits; /* total bits for colorindex */
+
+ GLint accumRedBits, accumGreenBits, accumBlueBits, accumAlphaBits;
+ GLint depthBits;
+ GLint stencilBits;
+
+ GLint numAuxBuffers;
+
+ GLint level;
+
+ /* EXT_visual_rating / GLX 1.2 */
+ GLint visualRating;
+
+ /* EXT_visual_info / GLX 1.2 */
+ GLint transparentPixel;
+ /* colors are floats scaled to ints */
+ GLint transparentRed, transparentGreen, transparentBlue, transparentAlpha;
+ GLint transparentIndex;
+
+ /* ARB_multisample / SGIS_multisample */
+ GLint sampleBuffers;
+ GLint samples;
+
+ /* SGIX_pbuffer / GLX 1.3 */
+ GLint maxPbufferWidth;
+ GLint maxPbufferHeight;
+ GLint maxPbufferPixels;
+ GLint optimalPbufferWidth; /* Only for SGIX_pbuffer. */
+ GLint optimalPbufferHeight; /* Only for SGIX_pbuffer. */
+
+ /* OML_swap_method */
+ GLint swapMethod;
+
+ /* EXT_texture_from_pixmap */
+ GLint bindToTextureRgb;
+ GLint bindToTextureRgba;
+ GLint bindToMipmapTexture;
+ GLint bindToTextureTargets;
+ GLint yInverted;
+
+ /* EXT_framebuffer_sRGB */
+ GLint sRGBCapable;
+};
+
+
+/**
+ * Data structure for color tables
+ */
+struct gl_color_table
+{
+ GLenum InternalFormat; /**< The user-specified format */
+ GLenum _BaseFormat; /**< GL_ALPHA, GL_RGBA, GL_RGB, etc */
+ GLuint Size; /**< number of entries in table */
+ GLfloat *TableF; /**< Color table, floating point values */
+ GLubyte *TableUB; /**< Color table, ubyte values */
+ GLubyte RedSize;
+ GLubyte GreenSize;
+ GLubyte BlueSize;
+ GLubyte AlphaSize;
+ GLubyte LuminanceSize;
+ GLubyte IntensitySize;
+};
+
+
+/**
+ * \name Bit flags used for updating material values.
+ */
+/*@{*/
+#define MAT_ATTRIB_FRONT_AMBIENT 0
+#define MAT_ATTRIB_BACK_AMBIENT 1
+#define MAT_ATTRIB_FRONT_DIFFUSE 2
+#define MAT_ATTRIB_BACK_DIFFUSE 3
+#define MAT_ATTRIB_FRONT_SPECULAR 4
+#define MAT_ATTRIB_BACK_SPECULAR 5
+#define MAT_ATTRIB_FRONT_EMISSION 6
+#define MAT_ATTRIB_BACK_EMISSION 7
+#define MAT_ATTRIB_FRONT_SHININESS 8
+#define MAT_ATTRIB_BACK_SHININESS 9
+#define MAT_ATTRIB_FRONT_INDEXES 10
+#define MAT_ATTRIB_BACK_INDEXES 11
+#define MAT_ATTRIB_MAX 12
+
+#define MAT_ATTRIB_AMBIENT(f) (MAT_ATTRIB_FRONT_AMBIENT+(f))
+#define MAT_ATTRIB_DIFFUSE(f) (MAT_ATTRIB_FRONT_DIFFUSE+(f))
+#define MAT_ATTRIB_SPECULAR(f) (MAT_ATTRIB_FRONT_SPECULAR+(f))
+#define MAT_ATTRIB_EMISSION(f) (MAT_ATTRIB_FRONT_EMISSION+(f))
+#define MAT_ATTRIB_SHININESS(f)(MAT_ATTRIB_FRONT_SHININESS+(f))
+#define MAT_ATTRIB_INDEXES(f) (MAT_ATTRIB_FRONT_INDEXES+(f))
+
+#define MAT_INDEX_AMBIENT 0
+#define MAT_INDEX_DIFFUSE 1
+#define MAT_INDEX_SPECULAR 2
+
+#define MAT_BIT_FRONT_AMBIENT (1<<MAT_ATTRIB_FRONT_AMBIENT)
+#define MAT_BIT_BACK_AMBIENT (1<<MAT_ATTRIB_BACK_AMBIENT)
+#define MAT_BIT_FRONT_DIFFUSE (1<<MAT_ATTRIB_FRONT_DIFFUSE)
+#define MAT_BIT_BACK_DIFFUSE (1<<MAT_ATTRIB_BACK_DIFFUSE)
+#define MAT_BIT_FRONT_SPECULAR (1<<MAT_ATTRIB_FRONT_SPECULAR)
+#define MAT_BIT_BACK_SPECULAR (1<<MAT_ATTRIB_BACK_SPECULAR)
+#define MAT_BIT_FRONT_EMISSION (1<<MAT_ATTRIB_FRONT_EMISSION)
+#define MAT_BIT_BACK_EMISSION (1<<MAT_ATTRIB_BACK_EMISSION)
+#define MAT_BIT_FRONT_SHININESS (1<<MAT_ATTRIB_FRONT_SHININESS)
+#define MAT_BIT_BACK_SHININESS (1<<MAT_ATTRIB_BACK_SHININESS)
+#define MAT_BIT_FRONT_INDEXES (1<<MAT_ATTRIB_FRONT_INDEXES)
+#define MAT_BIT_BACK_INDEXES (1<<MAT_ATTRIB_BACK_INDEXES)
+
+
+#define FRONT_MATERIAL_BITS (MAT_BIT_FRONT_EMISSION | \
+ MAT_BIT_FRONT_AMBIENT | \
+ MAT_BIT_FRONT_DIFFUSE | \
+ MAT_BIT_FRONT_SPECULAR | \
+ MAT_BIT_FRONT_SHININESS | \
+ MAT_BIT_FRONT_INDEXES)
+
+#define BACK_MATERIAL_BITS (MAT_BIT_BACK_EMISSION | \
+ MAT_BIT_BACK_AMBIENT | \
+ MAT_BIT_BACK_DIFFUSE | \
+ MAT_BIT_BACK_SPECULAR | \
+ MAT_BIT_BACK_SHININESS | \
+ MAT_BIT_BACK_INDEXES)
+
+#define ALL_MATERIAL_BITS (FRONT_MATERIAL_BITS | BACK_MATERIAL_BITS)
+/*@}*/
+
+
+#define EXP_TABLE_SIZE 512 /**< Specular exponent lookup table sizes */
+#define SHINE_TABLE_SIZE 256 /**< Material shininess lookup table sizes */
+
+/**
+ * Material shininess lookup table.
+ */
+struct gl_shine_tab
+{
+ struct gl_shine_tab *next, *prev;
+ GLfloat tab[SHINE_TABLE_SIZE+1];
+ GLfloat shininess;
+ GLuint refcount;
+};
+
+
+/**
+ * Light source state.
+ */
+struct gl_light
+{
+ struct gl_light *next; /**< double linked list with sentinel */
+ struct gl_light *prev;
+
+ GLfloat Ambient[4]; /**< ambient color */
+ GLfloat Diffuse[4]; /**< diffuse color */
+ GLfloat Specular[4]; /**< specular color */
+ GLfloat EyePosition[4]; /**< position in eye coordinates */
+ GLfloat SpotDirection[4]; /**< spotlight direction in eye coordinates */
+ GLfloat SpotExponent;
+ GLfloat SpotCutoff; /**< in degrees */
+ GLfloat _CosCutoffNeg; /**< = cos(SpotCutoff) */
+ GLfloat _CosCutoff; /**< = MAX(0, cos(SpotCutoff)) */
+ GLfloat ConstantAttenuation;
+ GLfloat LinearAttenuation;
+ GLfloat QuadraticAttenuation;
+ GLboolean Enabled; /**< On/off flag */
+
+ /**
+ * \name Derived fields
+ */
+ /*@{*/
+ GLbitfield _Flags; /**< State */
+
+ GLfloat _Position[4]; /**< position in eye/obj coordinates */
+ GLfloat _VP_inf_norm[3]; /**< Norm direction to infinite light */
+ GLfloat _h_inf_norm[3]; /**< Norm( _VP_inf_norm + <0,0,1> ) */
+ GLfloat _NormSpotDirection[4]; /**< normalized spotlight direction */
+ GLfloat _VP_inf_spot_attenuation;
+
+ GLfloat _SpotExpTable[EXP_TABLE_SIZE][2]; /**< to replace a pow() call */
+ GLfloat _MatAmbient[2][3]; /**< material ambient * light ambient */
+ GLfloat _MatDiffuse[2][3]; /**< material diffuse * light diffuse */
+ GLfloat _MatSpecular[2][3]; /**< material spec * light specular */
+ GLfloat _dli; /**< CI diffuse light intensity */
+ GLfloat _sli; /**< CI specular light intensity */
+ /*@}*/
+};
+
+
+/**
+ * Light model state.
+ */
+struct gl_lightmodel
+{
+ GLfloat Ambient[4]; /**< ambient color */
+ GLboolean LocalViewer; /**< Local (or infinite) view point? */
+ GLboolean TwoSide; /**< Two (or one) sided lighting? */
+ GLenum ColorControl; /**< either GL_SINGLE_COLOR
+ * or GL_SEPARATE_SPECULAR_COLOR */
+};
+
+
+/**
+ * Material state.
+ */
+struct gl_material
+{
+ GLfloat Attrib[MAT_ATTRIB_MAX][4];
+};
+
+
+/**
+ * Accumulation buffer attribute group (GL_ACCUM_BUFFER_BIT)
+ */
+struct gl_accum_attrib
+{
+ GLfloat ClearColor[4]; /**< Accumulation buffer clear color */
+};
+
+
+/**
+ * Color buffer attribute group (GL_COLOR_BUFFER_BIT).
+ */
+struct gl_colorbuffer_attrib
+{
+ GLuint ClearIndex; /**< Index to use for glClear */
+ GLclampf ClearColor[4]; /**< Color to use for glClear */
+
+ GLuint IndexMask; /**< Color index write mask */
+ GLubyte ColorMask[MAX_DRAW_BUFFERS][4];/**< Each flag is 0xff or 0x0 */
+
+ GLenum DrawBuffer[MAX_DRAW_BUFFERS]; /**< Which buffer to draw into */
+
+ /**
+ * \name alpha testing
+ */
+ /*@{*/
+ GLboolean AlphaEnabled; /**< Alpha test enabled flag */
+ GLenum AlphaFunc; /**< Alpha test function */
+ GLclampf AlphaRef; /**< Alpha reference value */
+ /*@}*/
+
+ /**
+ * \name Blending
+ */
+ /*@{*/
+ GLbitfield BlendEnabled; /**< Per-buffer blend enable flags */
+ GLfloat BlendColor[4]; /**< Blending color */
+ struct
+ {
+ GLenum SrcRGB; /**< RGB blend source term */
+ GLenum DstRGB; /**< RGB blend dest term */
+ GLenum SrcA; /**< Alpha blend source term */
+ GLenum DstA; /**< Alpha blend dest term */
+ GLenum EquationRGB; /**< GL_ADD, GL_SUBTRACT, etc. */
+ GLenum EquationA; /**< GL_ADD, GL_SUBTRACT, etc. */
+ } Blend[MAX_DRAW_BUFFERS];
+ /** Are the blend func terms currently different for each buffer/target? */
+ GLboolean _BlendFuncPerBuffer;
+ /** Are the blend equations currently different for each buffer/target? */
+ GLboolean _BlendEquationPerBuffer;
+ /*@}*/
+
+ /**
+ * \name Logic op
+ */
+ /*@{*/
+ GLenum LogicOp; /**< Logic operator */
+ GLboolean IndexLogicOpEnabled; /**< Color index logic op enabled flag */
+ GLboolean ColorLogicOpEnabled; /**< RGBA logic op enabled flag */
+ GLboolean _LogicOpEnabled; /**< RGBA logic op + EXT_blend_logic_op enabled flag */
+ /*@}*/
+
+ GLboolean DitherFlag; /**< Dither enable flag */
+
+ GLenum ClampFragmentColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
+ GLenum ClampReadColor; /**< GL_TRUE, GL_FALSE or GL_FIXED_ONLY_ARB */
+
+ GLboolean sRGBEnabled; /**< Framebuffer sRGB blending/updating requested */
+};
+
+
+/**
+ * Current attribute group (GL_CURRENT_BIT).
+ */
+struct gl_current_attrib
+{
+ /**
+ * \name Current vertex attributes.
+ * \note Values are valid only after FLUSH_VERTICES has been called.
+ * \note Index and Edgeflag current values are stored as floats in the
+ * SIX and SEVEN attribute slots.
+ */
+ GLfloat Attrib[VERT_ATTRIB_MAX][4]; /**< Position, color, texcoords, etc */
+
+ /**
+ * \name Current raster position attributes (always valid).
+ * \note This set of attributes is very similar to the SWvertex struct.
+ */
+ /*@{*/
+ GLfloat RasterPos[4];
+ GLfloat RasterDistance;
+ GLfloat RasterColor[4];
+ GLfloat RasterSecondaryColor[4];
+ GLfloat RasterTexCoords[MAX_TEXTURE_COORD_UNITS][4];
+ GLboolean RasterPosValid;
+ /*@}*/
+};
+
+
+/**
+ * Depth buffer attribute group (GL_DEPTH_BUFFER_BIT).
+ */
+struct gl_depthbuffer_attrib
+{
+ GLenum Func; /**< Function for depth buffer compare */
+ GLclampd Clear; /**< Value to clear depth buffer to */
+ GLboolean Test; /**< Depth buffering enabled flag */
+ GLboolean Mask; /**< Depth buffer writable? */
+ GLboolean BoundsTest; /**< GL_EXT_depth_bounds_test */
+ GLfloat BoundsMin, BoundsMax;/**< GL_EXT_depth_bounds_test */
+};
+
+
+/**
+ * Evaluator attribute group (GL_EVAL_BIT).
+ */
+struct gl_eval_attrib
+{
+ /**
+ * \name Enable bits
+ */
+ /*@{*/
+ GLboolean Map1Color4;
+ GLboolean Map1Index;
+ GLboolean Map1Normal;
+ GLboolean Map1TextureCoord1;
+ GLboolean Map1TextureCoord2;
+ GLboolean Map1TextureCoord3;
+ GLboolean Map1TextureCoord4;
+ GLboolean Map1Vertex3;
+ GLboolean Map1Vertex4;
+ GLboolean Map1Attrib[16]; /* GL_NV_vertex_program */
+ GLboolean Map2Color4;
+ GLboolean Map2Index;
+ GLboolean Map2Normal;
+ GLboolean Map2TextureCoord1;
+ GLboolean Map2TextureCoord2;
+ GLboolean Map2TextureCoord3;
+ GLboolean Map2TextureCoord4;
+ GLboolean Map2Vertex3;
+ GLboolean Map2Vertex4;
+ GLboolean Map2Attrib[16]; /* GL_NV_vertex_program */
+ GLboolean AutoNormal;
+ /*@}*/
+
+ /**
+ * \name Map Grid endpoints and divisions and calculated du values
+ */
+ /*@{*/
+ GLint MapGrid1un;
+ GLfloat MapGrid1u1, MapGrid1u2, MapGrid1du;
+ GLint MapGrid2un, MapGrid2vn;
+ GLfloat MapGrid2u1, MapGrid2u2, MapGrid2du;
+ GLfloat MapGrid2v1, MapGrid2v2, MapGrid2dv;
+ /*@}*/
+};
+
+
+/**
+ * Fog attribute group (GL_FOG_BIT).
+ */
+struct gl_fog_attrib
+{
+ GLboolean Enabled; /**< Fog enabled flag */
+ GLfloat Color[4]; /**< Fog color */
+ GLfloat Density; /**< Density >= 0.0 */
+ GLfloat Start; /**< Start distance in eye coords */
+ GLfloat End; /**< End distance in eye coords */
+ GLfloat Index; /**< Fog index */
+ GLenum Mode; /**< Fog mode */
+ GLboolean ColorSumEnabled;
+ GLenum FogCoordinateSource; /**< GL_EXT_fog_coord */
+ GLfloat _Scale; /**< (End == Start) ? 1.0 : 1.0 / (End - Start) */
+};
+
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with
+ * a layout qualifier.
+ *
+ * \see enum ir_depth_layout
+ */
+enum gl_frag_depth_layout {
+ FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */
+ FRAG_DEPTH_LAYOUT_ANY,
+ FRAG_DEPTH_LAYOUT_GREATER,
+ FRAG_DEPTH_LAYOUT_LESS,
+ FRAG_DEPTH_LAYOUT_UNCHANGED
+};
+
+
+/**
+ * Hint attribute group (GL_HINT_BIT).
+ *
+ * Values are always one of GL_FASTEST, GL_NICEST, or GL_DONT_CARE.
+ */
+struct gl_hint_attrib
+{
+ GLenum PerspectiveCorrection;
+ GLenum PointSmooth;
+ GLenum LineSmooth;
+ GLenum PolygonSmooth;
+ GLenum Fog;
+ GLenum ClipVolumeClipping; /**< GL_EXT_clip_volume_hint */
+ GLenum TextureCompression; /**< GL_ARB_texture_compression */
+ GLenum GenerateMipmap; /**< GL_SGIS_generate_mipmap */
+ GLenum FragmentShaderDerivative; /**< GL_ARB_fragment_shader */
+};
+
+/**
+ * Light state flags.
+ */
+/*@{*/
+#define LIGHT_SPOT 0x1
+#define LIGHT_LOCAL_VIEWER 0x2
+#define LIGHT_POSITIONAL 0x4
+#define LIGHT_NEED_VERTICES (LIGHT_POSITIONAL|LIGHT_LOCAL_VIEWER)
+/*@}*/
+
+
+/**
+ * Lighting attribute group (GL_LIGHT_BIT).
+ */
+struct gl_light_attrib
+{
+ struct gl_light Light[MAX_LIGHTS]; /**< Array of light sources */
+ struct gl_lightmodel Model; /**< Lighting model */
+
+ /**
+ * Must flush FLUSH_VERTICES before referencing:
+ */
+ /*@{*/
+ struct gl_material Material; /**< Includes front & back values */
+ /*@}*/
+
+ GLboolean Enabled; /**< Lighting enabled flag */
+ GLenum ShadeModel; /**< GL_FLAT or GL_SMOOTH */
+ GLenum ProvokingVertex; /**< GL_EXT_provoking_vertex */
+ GLenum ColorMaterialFace; /**< GL_FRONT, BACK or FRONT_AND_BACK */
+ GLenum ColorMaterialMode; /**< GL_AMBIENT, GL_DIFFUSE, etc */
+ GLbitfield ColorMaterialBitmask; /**< bitmask formed from Face and Mode */
+ GLboolean ColorMaterialEnabled;
+ GLenum ClampVertexColor;
+
+ struct gl_light EnabledList; /**< List sentinel */
+
+ /**
+ * Derived state for optimizations:
+ */
+ /*@{*/
+ GLboolean _NeedEyeCoords;
+ GLboolean _NeedVertices; /**< Use fast shader? */
+ GLbitfield _Flags; /**< LIGHT_* flags, see above */
+ GLfloat _BaseColor[2][3];
+ /*@}*/
+};
+
+
+/**
+ * Line attribute group (GL_LINE_BIT).
+ */
+struct gl_line_attrib
+{
+ GLboolean SmoothFlag; /**< GL_LINE_SMOOTH enabled? */
+ GLboolean StippleFlag; /**< GL_LINE_STIPPLE enabled? */
+ GLushort StipplePattern; /**< Stipple pattern */
+ GLint StippleFactor; /**< Stipple repeat factor */
+ GLfloat Width; /**< Line width */
+};
+
+
+/**
+ * Display list attribute group (GL_LIST_BIT).
+ */
+struct gl_list_attrib
+{
+ GLuint ListBase;
+};
+
+
+/**
+ * Multisample attribute group (GL_MULTISAMPLE_BIT).
+ */
+struct gl_multisample_attrib
+{
+ GLboolean Enabled;
+ GLboolean _Enabled; /**< true if Enabled and multisample buffer */
+ GLboolean SampleAlphaToCoverage;
+ GLboolean SampleAlphaToOne;
+ GLboolean SampleCoverage;
+ GLfloat SampleCoverageValue;
+ GLboolean SampleCoverageInvert;
+};
+
+
+/**
+ * A pixelmap (see glPixelMap)
+ */
+struct gl_pixelmap
+{
+ GLint Size;
+ GLfloat Map[MAX_PIXEL_MAP_TABLE];
+ GLubyte Map8[MAX_PIXEL_MAP_TABLE]; /**< converted to 8-bit color */
+};
+
+
+/**
+ * Collection of all pixelmaps
+ */
+struct gl_pixelmaps
+{
+ struct gl_pixelmap RtoR; /**< i.e. GL_PIXEL_MAP_R_TO_R */
+ struct gl_pixelmap GtoG;
+ struct gl_pixelmap BtoB;
+ struct gl_pixelmap AtoA;
+ struct gl_pixelmap ItoR;
+ struct gl_pixelmap ItoG;
+ struct gl_pixelmap ItoB;
+ struct gl_pixelmap ItoA;
+ struct gl_pixelmap ItoI;
+ struct gl_pixelmap StoS;
+};
+
+
+/**
+ * Pixel attribute group (GL_PIXEL_MODE_BIT).
+ */
+struct gl_pixel_attrib
+{
+ GLenum ReadBuffer; /**< source buffer for glRead/CopyPixels() */
+
+ /*--- Begin Pixel Transfer State ---*/
+ /* Fields are in the order in which they're applied... */
+
+ /** Scale & Bias (index shift, offset) */
+ /*@{*/
+ GLfloat RedBias, RedScale;
+ GLfloat GreenBias, GreenScale;
+ GLfloat BlueBias, BlueScale;
+ GLfloat AlphaBias, AlphaScale;
+ GLfloat DepthBias, DepthScale;
+ GLint IndexShift, IndexOffset;
+ /*@}*/
+
+ /* Pixel Maps */
+ /* Note: actual pixel maps are not part of this attrib group */
+ GLboolean MapColorFlag;
+ GLboolean MapStencilFlag;
+
+ /*--- End Pixel Transfer State ---*/
+
+ /** glPixelZoom */
+ GLfloat ZoomX, ZoomY;
+
+ /** GL_SGI_texture_color_table */
+ GLfloat TextureColorTableScale[4]; /**< RGBA */
+ GLfloat TextureColorTableBias[4]; /**< RGBA */
+};
+
+
+/**
+ * Point attribute group (GL_POINT_BIT).
+ */
+struct gl_point_attrib
+{
+ GLboolean SmoothFlag; /**< True if GL_POINT_SMOOTH is enabled */
+ GLfloat Size; /**< User-specified point size */
+ GLfloat Params[3]; /**< GL_EXT_point_parameters */
+ GLfloat MinSize, MaxSize; /**< GL_EXT_point_parameters */
+ GLfloat Threshold; /**< GL_EXT_point_parameters */
+ GLboolean _Attenuated; /**< True if Params != [1, 0, 0] */
+ GLboolean PointSprite; /**< GL_NV/ARB_point_sprite */
+ GLboolean CoordReplace[MAX_TEXTURE_COORD_UNITS]; /**< GL_ARB_point_sprite*/
+ GLenum SpriteRMode; /**< GL_NV_point_sprite (only!) */
+ GLenum SpriteOrigin; /**< GL_ARB_point_sprite */
+};
+
+
+/**
+ * Polygon attribute group (GL_POLYGON_BIT).
+ */
+struct gl_polygon_attrib
+{
+ GLenum FrontFace; /**< Either GL_CW or GL_CCW */
+ GLenum FrontMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
+ GLenum BackMode; /**< Either GL_POINT, GL_LINE or GL_FILL */
+ GLboolean _FrontBit; /**< 0=GL_CCW, 1=GL_CW */
+ GLboolean CullFlag; /**< Culling on/off flag */
+ GLboolean SmoothFlag; /**< True if GL_POLYGON_SMOOTH is enabled */
+ GLboolean StippleFlag; /**< True if GL_POLYGON_STIPPLE is enabled */
+ GLenum CullFaceMode; /**< Culling mode GL_FRONT or GL_BACK */
+ GLfloat OffsetFactor; /**< Polygon offset factor, from user */
+ GLfloat OffsetUnits; /**< Polygon offset units, from user */
+ GLboolean OffsetPoint; /**< Offset in GL_POINT mode */
+ GLboolean OffsetLine; /**< Offset in GL_LINE mode */
+ GLboolean OffsetFill; /**< Offset in GL_FILL mode */
+};
+
+
+/**
+ * Scissor attributes (GL_SCISSOR_BIT).
+ */
+struct gl_scissor_attrib
+{
+ GLboolean Enabled; /**< Scissor test enabled? */
+ GLint X, Y; /**< Lower left corner of box */
+ GLsizei Width, Height; /**< Size of box */
+};
+
+
+/**
+ * Stencil attribute group (GL_STENCIL_BUFFER_BIT).
+ *
+ * Three sets of stencil data are tracked so that OpenGL 2.0,
+ * GL_EXT_stencil_two_side, and GL_ATI_separate_stencil can all be supported
+ * simultaneously. In each of the stencil state arrays, element 0 corresponds
+ * to GL_FRONT. Element 1 corresponds to the OpenGL 2.0 /
+ * GL_ATI_separate_stencil GL_BACK state. Element 2 corresponds to the
+ * GL_EXT_stencil_two_side GL_BACK state.
+ *
+ * The derived value \c _BackFace is either 1 or 2 depending on whether or
+ * not GL_STENCIL_TEST_TWO_SIDE_EXT is enabled.
+ *
+ * The derived value \c _TestTwoSide is set when the front-face and back-face
+ * stencil state are different.
+ */
+struct gl_stencil_attrib
+{
+ GLboolean Enabled; /**< Enabled flag */
+ GLboolean TestTwoSide; /**< GL_EXT_stencil_two_side */
+ GLubyte ActiveFace; /**< GL_EXT_stencil_two_side (0 or 2) */
+ GLboolean _Enabled; /**< Enabled and stencil buffer present */
+ GLboolean _TestTwoSide;
+ GLubyte _BackFace; /**< Current back stencil state (1 or 2) */
+ GLenum Function[3]; /**< Stencil function */
+ GLenum FailFunc[3]; /**< Fail function */
+ GLenum ZPassFunc[3]; /**< Depth buffer pass function */
+ GLenum ZFailFunc[3]; /**< Depth buffer fail function */
+ GLint Ref[3]; /**< Reference value */
+ GLuint ValueMask[3]; /**< Value mask */
+ GLuint WriteMask[3]; /**< Write mask */
+ GLuint Clear; /**< Clear value */
+};
+
+
+/**
+ * An index for each type of texture object. These correspond to the GL
+ * texture target enums, such as GL_TEXTURE_2D, GL_TEXTURE_CUBE_MAP, etc.
+ * Note: the order is from highest priority to lowest priority.
+ */
+typedef enum
+{
+ TEXTURE_2D_ARRAY_INDEX,
+ TEXTURE_1D_ARRAY_INDEX,
+ TEXTURE_CUBE_INDEX,
+ TEXTURE_3D_INDEX,
+ TEXTURE_RECT_INDEX,
+ TEXTURE_2D_INDEX,
+ TEXTURE_1D_INDEX,
+ NUM_TEXTURE_TARGETS
+} gl_texture_index;
+
+
+/**
+ * Bit flags for each type of texture object
+ * Used for Texture.Unit[]._ReallyEnabled flags.
+ */
+/*@{*/
+#define TEXTURE_2D_ARRAY_BIT (1 << TEXTURE_2D_ARRAY_INDEX)
+#define TEXTURE_1D_ARRAY_BIT (1 << TEXTURE_1D_ARRAY_INDEX)
+#define TEXTURE_CUBE_BIT (1 << TEXTURE_CUBE_INDEX)
+#define TEXTURE_3D_BIT (1 << TEXTURE_3D_INDEX)
+#define TEXTURE_RECT_BIT (1 << TEXTURE_RECT_INDEX)
+#define TEXTURE_2D_BIT (1 << TEXTURE_2D_INDEX)
+#define TEXTURE_1D_BIT (1 << TEXTURE_1D_INDEX)
+/*@}*/
+
+
+/**
+ * TexGenEnabled flags.
+ */
+/*@{*/
+#define S_BIT 1
+#define T_BIT 2
+#define R_BIT 4
+#define Q_BIT 8
+#define STR_BITS (S_BIT | T_BIT | R_BIT)
+/*@}*/
+
+
+/**
+ * Bit flag versions of the corresponding GL_ constants.
+ */
+/*@{*/
+#define TEXGEN_SPHERE_MAP 0x1
+#define TEXGEN_OBJ_LINEAR 0x2
+#define TEXGEN_EYE_LINEAR 0x4
+#define TEXGEN_REFLECTION_MAP_NV 0x8
+#define TEXGEN_NORMAL_MAP_NV 0x10
+
+#define TEXGEN_NEED_NORMALS (TEXGEN_SPHERE_MAP | \
+ TEXGEN_REFLECTION_MAP_NV | \
+ TEXGEN_NORMAL_MAP_NV)
+#define TEXGEN_NEED_EYE_COORD (TEXGEN_SPHERE_MAP | \
+ TEXGEN_REFLECTION_MAP_NV | \
+ TEXGEN_NORMAL_MAP_NV | \
+ TEXGEN_EYE_LINEAR)
+/*@}*/
+
+
+
+/** Tex-gen enabled for texture unit? */
+#define ENABLE_TEXGEN(unit) (1 << (unit))
+
+/** Non-identity texture matrix for texture unit? */
+#define ENABLE_TEXMAT(unit) (1 << (unit))
+
+
+/**
+ * Texel fetch function prototype. We use texel fetch functions to
+ * extract RGBA, color indexes and depth components out of 1D, 2D and 3D
+ * texture images. These functions help to isolate us from the gritty
+ * details of all the various texture image encodings.
+ *
+ * \param texImage texture image.
+ * \param col texel column.
+ * \param row texel row.
+ * \param img texel image level/layer.
+ * \param texelOut output texel (up to 4 GLchans)
+ */
+typedef void (*FetchTexelFuncC)( const struct gl_texture_image *texImage,
+ GLint col, GLint row, GLint img,
+ GLchan *texelOut );
+
+/**
+ * As above, but returns floats.
+ * Used for depth component images and for upcoming signed/float
+ * texture images.
+ */
+typedef void (*FetchTexelFuncF)( const struct gl_texture_image *texImage,
+ GLint col, GLint row, GLint img,
+ GLfloat *texelOut );
+
+
+typedef void (*StoreTexelFunc)(struct gl_texture_image *texImage,
+ GLint col, GLint row, GLint img,
+ const void *texel);
+
+
+/**
+ * Texture image state. Describes the dimensions of a texture image,
+ * the texel format and pointers to Texel Fetch functions.
+ */
+struct gl_texture_image
+{
+ GLint InternalFormat; /**< Internal format as given by the user */
+ GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_ALPHA,
+ * GL_LUMINANCE, GL_LUMINANCE_ALPHA,
+ * GL_INTENSITY, GL_COLOR_INDEX,
+ * GL_DEPTH_COMPONENT or GL_DEPTH_STENCIL_EXT
+ * only. Used for choosing TexEnv arithmetic.
+ */
+ gl_format TexFormat; /**< The actual texture memory format */
+
+ GLuint Border; /**< 0 or 1 */
+ GLuint Width; /**< = 2^WidthLog2 + 2*Border */
+ GLuint Height; /**< = 2^HeightLog2 + 2*Border */
+ GLuint Depth; /**< = 2^DepthLog2 + 2*Border */
+ GLuint Width2; /**< = Width - 2*Border */
+ GLuint Height2; /**< = Height - 2*Border */
+ GLuint Depth2; /**< = Depth - 2*Border */
+ GLuint WidthLog2; /**< = log2(Width2) */
+ GLuint HeightLog2; /**< = log2(Height2) */
+ GLuint DepthLog2; /**< = log2(Depth2) */
+ GLuint MaxLog2; /**< = MAX(WidthLog2, HeightLog2) */
+ GLfloat WidthScale; /**< used for mipmap LOD computation */
+ GLfloat HeightScale; /**< used for mipmap LOD computation */
+ GLfloat DepthScale; /**< used for mipmap LOD computation */
+ GLboolean IsClientData; /**< Data owned by client? */
+ GLboolean _IsPowerOfTwo; /**< Are all dimensions powers of two? */
+
+ struct gl_texture_object *TexObject; /**< Pointer back to parent object */
+
+ FetchTexelFuncC FetchTexelc; /**< GLchan texel fetch function pointer */
+ FetchTexelFuncF FetchTexelf; /**< Float texel fetch function pointer */
+
+ GLuint RowStride; /**< Padded width in units of texels */
+ GLuint *ImageOffsets; /**< if 3D texture: array [Depth] of offsets to
+ each 2D slice in 'Data', in texels */
+ GLvoid *Data; /**< Image data, accessed via FetchTexel() */
+
+ /**
+ * \name For device driver:
+ */
+ /*@{*/
+ void *DriverData; /**< Arbitrary device driver data */
+ /*@}*/
+};
+
+
+/**
+ * Indexes for cube map faces.
+ */
+typedef enum
+{
+ FACE_POS_X = 0,
+ FACE_NEG_X = 1,
+ FACE_POS_Y = 2,
+ FACE_NEG_Y = 3,
+ FACE_POS_Z = 4,
+ FACE_NEG_Z = 5,
+ MAX_FACES = 6
+} gl_face_index;
+
+
+/**
+ * Texture object state. Contains the array of mipmap images, border color,
+ * wrap modes, filter modes, shadow/texcompare state, and the per-texture
+ * color palette.
+ */
+struct gl_texture_object
+{
+ _glthread_Mutex Mutex; /**< for thread safety */
+ GLint RefCount; /**< reference count */
+ GLuint Name; /**< the user-visible texture object ID */
+ GLenum Target; /**< GL_TEXTURE_1D, GL_TEXTURE_2D, etc. */
+ GLfloat Priority; /**< in [0,1] */
+ union {
+ GLfloat f[4];
+ GLuint ui[4];
+ GLint i[4];
+ } BorderColor; /**< Interpreted according to texture format */
+ GLenum WrapS; /**< S-axis texture image wrap mode */
+ GLenum WrapT; /**< T-axis texture image wrap mode */
+ GLenum WrapR; /**< R-axis texture image wrap mode */
+ GLenum MinFilter; /**< minification filter */
+ GLenum MagFilter; /**< magnification filter */
+ GLfloat MinLod; /**< min lambda, OpenGL 1.2 */
+ GLfloat MaxLod; /**< max lambda, OpenGL 1.2 */
+ GLfloat LodBias; /**< OpenGL 1.4 */
+ GLint BaseLevel; /**< min mipmap level, OpenGL 1.2 */
+ GLint MaxLevel; /**< max mipmap level, OpenGL 1.2 */
+ GLfloat MaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
+ GLenum CompareMode; /**< GL_ARB_shadow */
+ GLenum CompareFunc; /**< GL_ARB_shadow */
+ GLfloat CompareFailValue; /**< GL_ARB_shadow_ambient */
+ GLenum DepthMode; /**< GL_ARB_depth_texture */
+ GLint _MaxLevel; /**< actual max mipmap level (q in the spec) */
+ GLfloat _MaxLambda; /**< = _MaxLevel - BaseLevel (q - b in spec) */
+ GLint CropRect[4]; /**< GL_OES_draw_texture */
+ GLenum Swizzle[4]; /**< GL_EXT_texture_swizzle */
+ GLuint _Swizzle; /**< same as Swizzle, but SWIZZLE_* format */
+ GLboolean GenerateMipmap; /**< GL_SGIS_generate_mipmap */
+ GLboolean _Complete; /**< Is texture object complete? */
+ GLboolean _RenderToTexture; /**< Any rendering to this texture? */
+ GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
+ GLenum sRGBDecode; /**< GL_DECODE_EXT or GL_SKIP_DECODE_EXT */
+
+ /** Actual texture images, indexed by [cube face] and [mipmap level] */
+ struct gl_texture_image *Image[MAX_FACES][MAX_TEXTURE_LEVELS];
+
+ /** GL_EXT_paletted_texture */
+ struct gl_color_table Palette;
+
+ /**
+ * \name For device driver.
+ * Note: instead of attaching driver data to this pointer, it's preferable
+ * to instead use this struct as a base class for your own texture object
+ * class. Driver->NewTextureObject() can be used to implement the
+ * allocation.
+ */
+ void *DriverData; /**< Arbitrary device driver data */
+};
+
+
+/** Up to four combiner sources are possible with GL_NV_texture_env_combine4 */
+#define MAX_COMBINER_TERMS 4
+
+
+/**
+ * Texture combine environment state.
+ */
+struct gl_tex_env_combine_state
+{
+ GLenum ModeRGB; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
+ GLenum ModeA; /**< GL_REPLACE, GL_DECAL, GL_ADD, etc. */
+ /** Source terms: GL_PRIMARY_COLOR, GL_TEXTURE, etc */
+ GLenum SourceRGB[MAX_COMBINER_TERMS];
+ GLenum SourceA[MAX_COMBINER_TERMS];
+ /** Source operands: GL_SRC_COLOR, GL_ONE_MINUS_SRC_COLOR, etc */
+ GLenum OperandRGB[MAX_COMBINER_TERMS];
+ GLenum OperandA[MAX_COMBINER_TERMS];
+ GLuint ScaleShiftRGB; /**< 0, 1 or 2 */
+ GLuint ScaleShiftA; /**< 0, 1 or 2 */
+ GLuint _NumArgsRGB; /**< Number of inputs used for the RGB combiner */
+ GLuint _NumArgsA; /**< Number of inputs used for the A combiner */
+};
+
+
+/**
+ * Texture coord generation state.
+ */
+struct gl_texgen
+{
+ GLenum Mode; /**< GL_EYE_LINEAR, GL_SPHERE_MAP, etc */
+ GLbitfield _ModeBit; /**< TEXGEN_x bit corresponding to Mode */
+ GLfloat ObjectPlane[4];
+ GLfloat EyePlane[4];
+};
+
+
+/**
+ * Texture unit state. Contains enable flags, texture environment/function/
+ * combiners, texgen state, pointers to current texture objects and
+ * post-filter color tables.
+ */
+struct gl_texture_unit
+{
+ GLbitfield Enabled; /**< bitmask of TEXTURE_*_BIT flags */
+ GLbitfield _ReallyEnabled; /**< 0 or exactly one of TEXTURE_*_BIT flags */
+
+ GLenum EnvMode; /**< GL_MODULATE, GL_DECAL, GL_BLEND, etc. */
+ GLfloat EnvColor[4];
+
+ struct gl_texgen GenS;
+ struct gl_texgen GenT;
+ struct gl_texgen GenR;
+ struct gl_texgen GenQ;
+ GLbitfield TexGenEnabled; /**< Bitwise-OR of [STRQ]_BIT values */
+ GLbitfield _GenFlags; /**< Bitwise-OR of Gen[STRQ]._ModeBit */
+
+ GLfloat LodBias; /**< for biasing mipmap levels */
+ GLenum BumpTarget;
+ GLfloat RotMatrix[4]; /* 2x2 matrix */
+
+ /**
+ * \name GL_EXT_texture_env_combine
+ */
+ struct gl_tex_env_combine_state Combine;
+
+ /**
+ * Derived state based on \c EnvMode and the \c BaseFormat of the
+ * currently enabled texture.
+ */
+ struct gl_tex_env_combine_state _EnvMode;
+
+ /**
+ * Currently enabled combiner state. This will point to either
+ * \c Combine or \c _EnvMode.
+ */
+ struct gl_tex_env_combine_state *_CurrentCombine;
+
+ /** Current texture object pointers */
+ struct gl_texture_object *CurrentTex[NUM_TEXTURE_TARGETS];
+
+ /** Points to highest priority, complete and enabled texture object */
+ struct gl_texture_object *_Current;
+
+ /** GL_SGI_texture_color_table */
+ /*@{*/
+ struct gl_color_table ColorTable;
+ struct gl_color_table ProxyColorTable;
+ GLboolean ColorTableEnabled;
+ /*@}*/
+};
+
+
+/**
+ * Texture attribute group (GL_TEXTURE_BIT).
+ */
+struct gl_texture_attrib
+{
+ GLuint CurrentUnit; /**< GL_ACTIVE_TEXTURE */
+ struct gl_texture_unit Unit[MAX_COMBINED_TEXTURE_IMAGE_UNITS];
+
+ struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS];
+
+ /** GL_ARB_seamless_cubemap */
+ GLboolean CubeMapSeamless;
+
+ /** GL_EXT_shared_texture_palette */
+ GLboolean SharedPalette;
+ struct gl_color_table Palette;
+
+ /** Texture units/samplers used by vertex or fragment texturing */
+ GLbitfield _EnabledUnits;
+
+ /** Texture coord units/sets used for fragment texturing */
+ GLbitfield _EnabledCoordUnits;
+
+ /** Texture coord units that have texgen enabled */
+ GLbitfield _TexGenEnabled;
+
+ /** Texture coord units that have non-identity matrices */
+ GLbitfield _TexMatEnabled;
+
+ /** Bitwise-OR of all Texture.Unit[i]._GenFlags */
+ GLbitfield _GenFlags;
+};
+
+
+/**
+ * Transformation attribute group (GL_TRANSFORM_BIT).
+ */
+struct gl_transform_attrib
+{
+ GLenum MatrixMode; /**< Matrix mode */
+ GLfloat EyeUserPlane[MAX_CLIP_PLANES][4]; /**< User clip planes */
+ GLfloat _ClipUserPlane[MAX_CLIP_PLANES][4]; /**< derived */
+ GLbitfield ClipPlanesEnabled; /**< on/off bitmask */
+ GLboolean Normalize; /**< Normalize all normals? */
+ GLboolean RescaleNormals; /**< GL_EXT_rescale_normal */
+ GLboolean RasterPositionUnclipped; /**< GL_IBM_rasterpos_clip */
+ GLboolean DepthClamp; /**< GL_ARB_depth_clamp */
+
+ GLfloat CullEyePos[4];
+ GLfloat CullObjPos[4];
+};
+
+
+/**
+ * Viewport attribute group (GL_VIEWPORT_BIT).
+ */
+struct gl_viewport_attrib
+{
+ GLint X, Y; /**< position */
+ GLsizei Width, Height; /**< size */
+ GLfloat Near, Far; /**< Depth buffer range */
+ GLmatrix _WindowMap; /**< Mapping transformation as a matrix. */
+};
+
+
+/**
+ * GL_ARB_vertex/pixel_buffer_object buffer object
+ */
+struct gl_buffer_object
+{
+ _glthread_Mutex Mutex;
+ GLint RefCount;
+ GLuint Name;
+ GLenum Usage; /**< GL_STREAM_DRAW_ARB, GL_STREAM_READ_ARB, etc. */
+ GLsizeiptrARB Size; /**< Size of buffer storage in bytes */
+ GLubyte *Data; /**< Location of storage either in RAM or VRAM. */
+ /** Fields describing a mapped buffer */
+ /*@{*/
+ GLbitfield AccessFlags; /**< Mask of GL_MAP_x_BIT flags */
+ GLvoid *Pointer; /**< User-space address of mapping */
+ GLintptr Offset; /**< Mapped offset */
+ GLsizeiptr Length; /**< Mapped length */
+ /*@}*/
+ GLboolean Written; /**< Ever written to? (for debugging) */
+ GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
+};
+
+
+/**
+ * Client pixel packing/unpacking attributes
+ */
+struct gl_pixelstore_attrib
+{
+ GLint Alignment;
+ GLint RowLength;
+ GLint SkipPixels;
+ GLint SkipRows;
+ GLint ImageHeight;
+ GLint SkipImages;
+ GLboolean SwapBytes;
+ GLboolean LsbFirst;
+ GLboolean ClientStorage; /**< GL_APPLE_client_storage */
+ GLboolean Invert; /**< GL_MESA_pack_invert */
+ struct gl_buffer_object *BufferObj; /**< GL_ARB_pixel_buffer_object */
+};
+
+
+/**
+ * Client vertex array attributes
+ */
+struct gl_client_array
+{
+ GLint Size; /**< components per element (1,2,3,4) */
+ GLenum Type; /**< datatype: GL_FLOAT, GL_INT, etc */
+ GLenum Format; /**< default: GL_RGBA, but may be GL_BGRA */
+ GLsizei Stride; /**< user-specified stride */
+ GLsizei StrideB; /**< actual stride in bytes */
+ const GLubyte *Ptr; /**< Points to array data */
+ GLboolean Enabled; /**< Enabled flag is a boolean */
+ GLboolean Normalized; /**< GL_ARB_vertex_program */
+ GLboolean Integer; /**< Integer-valued? */
+ GLuint InstanceDivisor; /**< GL_ARB_instanced_arrays */
+ GLuint _ElementSize; /**< size of each element in bytes */
+
+ struct gl_buffer_object *BufferObj;/**< GL_ARB_vertex_buffer_object */
+ GLuint _MaxElement; /**< max element index into array buffer + 1 */
+};
+
+
+/**
+ * Collection of vertex arrays. Defined by the GL_APPLE_vertex_array_object
+ * extension, but a nice encapsulation in any case.
+ */
+struct gl_array_object
+{
+ /** Name of the array object as received from glGenVertexArrayAPPLE. */
+ GLuint Name;
+
+ GLint RefCount;
+ _glthread_Mutex Mutex;
+ GLboolean VBOonly; /**< require all arrays to live in VBOs? */
+
+ /** Conventional vertex arrays */
+ /*@{*/
+ struct gl_client_array Vertex;
+ struct gl_client_array Weight;
+ struct gl_client_array Normal;
+ struct gl_client_array Color;
+ struct gl_client_array SecondaryColor;
+ struct gl_client_array FogCoord;
+ struct gl_client_array Index;
+ struct gl_client_array EdgeFlag;
+ struct gl_client_array TexCoord[MAX_TEXTURE_COORD_UNITS];
+ struct gl_client_array PointSize;
+ /*@}*/
+
+ /**
+ * Generic arrays for vertex programs/shaders.
+ * For NV vertex programs, these attributes alias and take priority
+ * over the conventional attribs above. For ARB vertex programs and
+ * GLSL vertex shaders, these attributes are separate.
+ */
+ struct gl_client_array VertexAttrib[MAX_VERTEX_GENERIC_ATTRIBS];
+
+ /** Mask of _NEW_ARRAY_* values indicating which arrays are enabled */
+ GLbitfield _Enabled;
+
+ /**
+ * Min of all enabled arrays' _MaxElement. When arrays reside inside VBOs
+ * we can determine the max legal (in bounds) glDrawElements array index.
+ */
+ GLuint _MaxElement;
+};
+
+
+/**
+ * Vertex array state
+ */
+struct gl_array_attrib
+{
+ /** Currently bound array object. See _mesa_BindVertexArrayAPPLE() */
+ struct gl_array_object *ArrayObj;
+
+ /** The default vertex array object */
+ struct gl_array_object *DefaultArrayObj;
+
+ /** Array objects (GL_ARB/APPLE_vertex_array_object) */
+ struct _mesa_HashTable *Objects;
+
+ GLint ActiveTexture; /**< Client Active Texture */
+ GLuint LockFirst; /**< GL_EXT_compiled_vertex_array */
+ GLuint LockCount; /**< GL_EXT_compiled_vertex_array */
+
+ /** GL 3.1 (slightly different from GL_NV_primitive_restart) */
+ GLboolean PrimitiveRestart;
+ GLuint RestartIndex;
+
+ GLbitfield NewState; /**< mask of _NEW_ARRAY_* values */
+ GLboolean RebindArrays; /**< whether the VBO module should rebind arrays */
+
+ /* GL_ARB_vertex_buffer_object */
+ struct gl_buffer_object *ArrayBufferObj;
+ struct gl_buffer_object *ElementArrayBufferObj;
+};
+
+
+/**
+ * Feedback buffer state
+ */
+struct gl_feedback
+{
+ GLenum Type;
+ GLbitfield _Mask; /**< FB_* bits */
+ GLfloat *Buffer;
+ GLuint BufferSize;
+ GLuint Count;
+};
+
+
+/**
+ * Selection buffer state
+ */
+struct gl_selection
+{
+ GLuint *Buffer; /**< selection buffer */
+ GLuint BufferSize; /**< size of the selection buffer */
+ GLuint BufferCount; /**< number of values in the selection buffer */
+ GLuint Hits; /**< number of records in the selection buffer */
+ GLuint NameStackDepth; /**< name stack depth */
+ GLuint NameStack[MAX_NAME_STACK_DEPTH]; /**< name stack */
+ GLboolean HitFlag; /**< hit flag */
+ GLfloat HitMinZ; /**< minimum hit depth */
+ GLfloat HitMaxZ; /**< maximum hit depth */
+};
+
+
+/**
+ * 1-D Evaluator control points
+ */
+struct gl_1d_map
+{
+ GLuint Order; /**< Number of control points */
+ GLfloat u1, u2, du; /**< u1, u2, 1.0/(u2-u1) */
+ GLfloat *Points; /**< Points to contiguous control points */
+};
+
+
+/**
+ * 2-D Evaluator control points
+ */
+struct gl_2d_map
+{
+ GLuint Uorder; /**< Number of control points in U dimension */
+ GLuint Vorder; /**< Number of control points in V dimension */
+ GLfloat u1, u2, du;
+ GLfloat v1, v2, dv;
+ GLfloat *Points; /**< Points to contiguous control points */
+};
+
+
+/**
+ * All evaluator control point state
+ */
+struct gl_evaluators
+{
+ /**
+ * \name 1-D maps
+ */
+ /*@{*/
+ struct gl_1d_map Map1Vertex3;
+ struct gl_1d_map Map1Vertex4;
+ struct gl_1d_map Map1Index;
+ struct gl_1d_map Map1Color4;
+ struct gl_1d_map Map1Normal;
+ struct gl_1d_map Map1Texture1;
+ struct gl_1d_map Map1Texture2;
+ struct gl_1d_map Map1Texture3;
+ struct gl_1d_map Map1Texture4;
+ struct gl_1d_map Map1Attrib[16]; /**< GL_NV_vertex_program */
+ /*@}*/
+
+ /**
+ * \name 2-D maps
+ */
+ /*@{*/
+ struct gl_2d_map Map2Vertex3;
+ struct gl_2d_map Map2Vertex4;
+ struct gl_2d_map Map2Index;
+ struct gl_2d_map Map2Color4;
+ struct gl_2d_map Map2Normal;
+ struct gl_2d_map Map2Texture1;
+ struct gl_2d_map Map2Texture2;
+ struct gl_2d_map Map2Texture3;
+ struct gl_2d_map Map2Texture4;
+ struct gl_2d_map Map2Attrib[16]; /**< GL_NV_vertex_program */
+ /*@}*/
+};
+
+
+/**
+ * Names of the various vertex/fragment program register files, etc.
+ *
+ * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c)
+ * All values should fit in a 4-bit field.
+ *
+ * NOTE: PROGRAM_ENV_PARAM, PROGRAM_STATE_VAR, PROGRAM_NAMED_PARAM,
+ * PROGRAM_CONSTANT, and PROGRAM_UNIFORM can all be considered to
+ * be "uniform" variables since they can only be set outside glBegin/End.
+ * They're also all stored in the same Parameters array.
+ */
+typedef enum
+{
+ PROGRAM_TEMPORARY, /**< machine->Temporary[] */
+ PROGRAM_INPUT, /**< machine->Inputs[] */
+ PROGRAM_OUTPUT, /**< machine->Outputs[] */
+ PROGRAM_VARYING, /**< machine->Inputs[]/Outputs[] */
+ PROGRAM_LOCAL_PARAM, /**< gl_program->LocalParams[] */
+ PROGRAM_ENV_PARAM, /**< gl_program->Parameters[] */
+ PROGRAM_STATE_VAR, /**< gl_program->Parameters[] */
+ PROGRAM_NAMED_PARAM, /**< gl_program->Parameters[] */
+ PROGRAM_CONSTANT, /**< gl_program->Parameters[] */
+ PROGRAM_UNIFORM, /**< gl_program->Parameters[] */
+ PROGRAM_WRITE_ONLY, /**< A dummy, write-only register */
+ PROGRAM_ADDRESS, /**< machine->AddressReg */
+ PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
+ PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
+ PROGRAM_UNDEFINED, /**< Invalid/TBD value */
+ PROGRAM_FILE_MAX
+} gl_register_file;
+
+
+/**
+ * If the register file is PROGRAM_SYSTEM_VALUE, the register index will be
+ * one of these values.
+ */
+typedef enum
+{
+ SYSTEM_VALUE_FRONT_FACE, /**< Fragment shader only (not done yet) */
+ SYSTEM_VALUE_INSTANCE_ID, /**< Vertex shader only */
+ SYSTEM_VALUE_MAX /**< Number of values */
+} gl_system_value;
+
+
+/** Vertex and fragment instructions */
+struct prog_instruction;
+struct gl_program_parameter_list;
+struct gl_uniform_list;
+
+
+/**
+ * Base class for any kind of program object
+ */
+struct gl_program
+{
+ GLuint Id;
+ GLubyte *String; /**< Null-terminated program text */
+ GLint RefCount;
+ GLenum Target; /**< GL_VERTEX/FRAGMENT_PROGRAM_ARB, GL_FRAGMENT_PROGRAM_NV */
+ GLenum Format; /**< String encoding format */
+ GLboolean Resident;
+
+ struct prog_instruction *Instructions;
+
+ GLbitfield InputsRead; /**< Bitmask of which input regs are read */
+ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
+ GLbitfield SystemValuesRead; /**< Bitmask of SYSTEM_VALUE_x inputs used */
+ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */
+ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */
+ GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */
+ GLbitfield SamplersUsed; /**< Bitfield of which samplers are used */
+ GLbitfield ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+
+ /** Named parameters, constants, etc. from program text */
+ struct gl_program_parameter_list *Parameters;
+ /** Numbered local parameters */
+ GLfloat LocalParams[MAX_PROGRAM_LOCAL_PARAMS][4];
+
+ /** Vertex/fragment shader varying vars */
+ struct gl_program_parameter_list *Varying;
+ /** Vertex program user-defined attributes */
+ struct gl_program_parameter_list *Attributes;
+
+ /** Map from sampler unit to texture unit (set by glUniform1i()) */
+ GLubyte SamplerUnits[MAX_SAMPLERS];
+ /** Which texture target is being sampled (TEXTURE_1D/2D/3D/etc_INDEX) */
+ gl_texture_index SamplerTargets[MAX_SAMPLERS];
+
+ /** Bitmask of which register files are read/written with indirect
+ * addressing. Mask of (1 << PROGRAM_x) bits.
+ */
+ GLbitfield IndirectRegisterFiles;
+
+ /** Logical counts */
+ /*@{*/
+ GLuint NumInstructions;
+ GLuint NumTemporaries;
+ GLuint NumParameters;
+ GLuint NumAttributes;
+ GLuint NumAddressRegs;
+ GLuint NumAluInstructions;
+ GLuint NumTexInstructions;
+ GLuint NumTexIndirections;
+ /*@}*/
+ /** Native, actual h/w counts */
+ /*@{*/
+ GLuint NumNativeInstructions;
+ GLuint NumNativeTemporaries;
+ GLuint NumNativeParameters;
+ GLuint NumNativeAttributes;
+ GLuint NumNativeAddressRegs;
+ GLuint NumNativeAluInstructions;
+ GLuint NumNativeTexInstructions;
+ GLuint NumNativeTexIndirections;
+ /*@}*/
+};
+
+
+/** Vertex program object */
+struct gl_vertex_program
+{
+ struct gl_program Base; /**< base class */
+ GLboolean IsNVProgram; /**< is this a GL_NV_vertex_program program? */
+ GLboolean IsPositionInvariant;
+};
+
+
+/** Geometry program object */
+struct gl_geometry_program
+{
+ struct gl_program Base; /**< base class */
+
+ GLint VerticesOut;
+ GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
+ GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
+ GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
+};
+
+
+/** Fragment program object */
+struct gl_fragment_program
+{
+ struct gl_program Base; /**< base class */
+ GLenum FogOption;
+ GLboolean UsesKill; /**< shader uses KIL instruction */
+ GLboolean OriginUpperLeft;
+ GLboolean PixelCenterInteger;
+ enum gl_frag_depth_layout FragDepthLayout;
+};
+
+
+/**
+ * State common to vertex and fragment programs.
+ */
+struct gl_program_state
+{
+ GLint ErrorPos; /* GL_PROGRAM_ERROR_POSITION_ARB/NV */
+ const char *ErrorString; /* GL_PROGRAM_ERROR_STRING_ARB/NV */
+};
+
+
+/**
+ * Context state for vertex programs.
+ */
+struct gl_vertex_program_state
+{
+ GLboolean Enabled; /**< User-set GL_VERTEX_PROGRAM_ARB/NV flag */
+ GLboolean _Enabled; /**< Enabled and _valid_ user program? */
+ GLboolean PointSizeEnabled; /**< GL_VERTEX_PROGRAM_POINT_SIZE_ARB/NV */
+ GLboolean TwoSideEnabled; /**< GL_VERTEX_PROGRAM_TWO_SIDE_ARB/NV */
+ struct gl_vertex_program *Current; /**< User-bound vertex program */
+
+ /** Currently enabled and valid vertex program (including internal
+ * programs, user-defined vertex programs and GLSL vertex shaders).
+ * This is the program we must use when rendering.
+ */
+ struct gl_vertex_program *_Current;
+
+ GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
+
+ /* For GL_NV_vertex_program only: */
+ GLenum TrackMatrix[MAX_PROGRAM_ENV_PARAMS / 4];
+ GLenum TrackMatrixTransform[MAX_PROGRAM_ENV_PARAMS / 4];
+
+ /** Should fixed-function T&L be implemented with a vertex prog? */
+ GLboolean _MaintainTnlProgram;
+
+ /** Program to emulate fixed-function T&L (see above) */
+ struct gl_vertex_program *_TnlProgram;
+
+ /** Cache of fixed-function programs */
+ struct gl_program_cache *Cache;
+
+ GLboolean _Overriden;
+};
+
+
+/**
+ * Context state for geometry programs.
+ */
+struct gl_geometry_program_state
+{
+ GLboolean Enabled; /**< GL_ARB_GEOMETRY_SHADER4 */
+ GLboolean _Enabled; /**< Enabled and valid program? */
+ struct gl_geometry_program *Current; /**< user-bound geometry program */
+
+ /** Currently enabled and valid program (including internal programs
+ * and compiled shader programs).
+ */
+ struct gl_geometry_program *_Current;
+
+ GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
+
+ /** Cache of fixed-function programs */
+ struct gl_program_cache *Cache;
+};
+
+/**
+ * Context state for fragment programs.
+ */
+struct gl_fragment_program_state
+{
+ GLboolean Enabled; /**< User-set fragment program enable flag */
+ GLboolean _Enabled; /**< Enabled and _valid_ user program? */
+ struct gl_fragment_program *Current; /**< User-bound fragment program */
+
+ /** Currently enabled and valid fragment program (including internal
+ * programs, user-defined fragment programs and GLSL fragment shaders).
+ * This is the program we must use when rendering.
+ */
+ struct gl_fragment_program *_Current;
+
+ GLfloat Parameters[MAX_PROGRAM_ENV_PARAMS][4]; /**< Env params */
+
+ /** Should fixed-function texturing be implemented with a fragment prog? */
+ GLboolean _MaintainTexEnvProgram;
+
+ /** Program to emulate fixed-function texture env/combine (see above) */
+ struct gl_fragment_program *_TexEnvProgram;
+
+ /** Cache of fixed-function programs */
+ struct gl_program_cache *Cache;
+};
+
+
+/**
+ * ATI_fragment_shader runtime state
+ */
+#define ATI_FS_INPUT_PRIMARY 0
+#define ATI_FS_INPUT_SECONDARY 1
+
+struct atifs_instruction;
+struct atifs_setupinst;
+
+/**
+ * ATI fragment shader
+ */
+struct ati_fragment_shader
+{
+ GLuint Id;
+ GLint RefCount;
+ struct atifs_instruction *Instructions[2];
+ struct atifs_setupinst *SetupInst[2];
+ GLfloat Constants[8][4];
+ GLbitfield LocalConstDef; /**< Indicates which constants have been set */
+ GLubyte numArithInstr[2];
+ GLubyte regsAssigned[2];
+ GLubyte NumPasses; /**< 1 or 2 */
+ GLubyte cur_pass;
+ GLubyte last_optype;
+ GLboolean interpinp1;
+ GLboolean isValid;
+ GLuint swizzlerq;
+};
+
+/**
+ * Context state for GL_ATI_fragment_shader
+ */
+struct gl_ati_fragment_shader_state
+{
+ GLboolean Enabled;
+ GLboolean _Enabled; /**< enabled and valid shader? */
+ GLboolean Compiling;
+ GLfloat GlobalConstants[8][4];
+ struct ati_fragment_shader *Current;
+};
+
+
+/**
+ * Occlusion/timer query object.
+ */
+struct gl_query_object
+{
+ GLenum Target; /**< The query target, when active */
+ GLuint Id; /**< hash table ID/name */
+ GLuint64EXT Result; /**< the counter */
+ GLboolean Active; /**< inside Begin/EndQuery */
+ GLboolean Ready; /**< result is ready? */
+};
+
+
+/**
+ * Context state for query objects.
+ */
+struct gl_query_state
+{
+ struct _mesa_HashTable *QueryObjects;
+ struct gl_query_object *CurrentOcclusionObject; /* GL_ARB_occlusion_query */
+ struct gl_query_object *CurrentTimerObject; /* GL_EXT_timer_query */
+
+ /** GL_NV_conditional_render */
+ struct gl_query_object *CondRenderQuery;
+
+ /** GL_EXT_transform_feedback */
+ struct gl_query_object *PrimitivesGenerated;
+ struct gl_query_object *PrimitivesWritten;
+
+ /** GL_ARB_timer_query */
+ struct gl_query_object *TimeElapsed;
+
+ GLenum CondRenderMode;
+};
+
+
+/** Sync object state */
+struct gl_sync_object {
+ struct simple_node link;
+ GLenum Type; /**< GL_SYNC_FENCE */
+ GLuint Name; /**< Fence name */
+ GLint RefCount; /**< Reference count */
+ GLboolean DeletePending; /**< Object was deleted while there were still
+ * live references (e.g., sync not yet finished)
+ */
+ GLenum SyncCondition;
+ GLbitfield Flags; /**< Flags passed to glFenceSync */
+ GLuint StatusFlag:1; /**< Has the sync object been signaled? */
+};
+
+
+/** Set by #pragma directives */
+struct gl_sl_pragmas
+{
+ GLboolean IgnoreOptimize; /**< ignore #pragma optimize(on/off) ? */
+ GLboolean IgnoreDebug; /**< ignore #pragma debug(on/off) ? */
+ GLboolean Optimize; /**< defaults on */
+ GLboolean Debug; /**< defaults off */
+};
+
+
+/**
+ * A GLSL vertex or fragment shader object.
+ */
+struct gl_shader
+{
+ GLenum Type; /**< GL_FRAGMENT_SHADER || GL_VERTEX_SHADER || GL_GEOMETRY_SHADER_ARB (first field!) */
+ GLuint Name; /**< AKA the handle */
+ GLint RefCount; /**< Reference count */
+ GLboolean DeletePending;
+ GLboolean CompileStatus;
+ const GLchar *Source; /**< Source code string */
+ GLuint SourceChecksum; /**< for debug/logging purposes */
+ struct gl_program *Program; /**< Post-compile assembly code */
+ GLchar *InfoLog;
+ struct gl_sl_pragmas Pragmas;
+
+ unsigned Version; /**< GLSL version used for linking */
+
+ struct exec_list *ir;
+ struct glsl_symbol_table *symbols;
+
+ /** Shaders containing built-in functions that are used for linking. */
+ struct gl_shader *builtins_to_link[16];
+ unsigned num_builtins_to_link;
+};
+
+
+/**
+ * A GLSL program object.
+ * Basically a linked collection of vertex and fragment shaders.
+ */
+struct gl_shader_program
+{
+ GLenum Type; /**< Always GL_SHADER_PROGRAM (internal token) */
+ GLuint Name; /**< aka handle or ID */
+ GLint RefCount; /**< Reference count */
+ GLboolean DeletePending;
+
+ GLuint NumShaders; /**< number of attached shaders */
+ struct gl_shader **Shaders; /**< List of attached the shaders */
+
+ /** User-defined attribute bindings (glBindAttribLocation) */
+ struct gl_program_parameter_list *Attributes;
+
+ /** Transform feedback varyings */
+ struct {
+ GLenum BufferMode;
+ GLuint NumVarying;
+ GLchar **VaryingNames; /**< Array [NumVarying] of char * */
+ } TransformFeedback;
+
+ /** Geometry shader state - copied into gl_geometry_program at link time */
+ struct {
+ GLint VerticesOut;
+ GLenum InputType; /**< GL_POINTS, GL_LINES, GL_LINES_ADJACENCY_ARB,
+ GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
+ GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
+ } Geom;
+
+ /* post-link info: */
+ struct gl_vertex_program *VertexProgram; /**< Linked vertex program */
+ struct gl_fragment_program *FragmentProgram; /**< Linked fragment prog */
+ struct gl_geometry_program *GeometryProgram; /**< Linked geometry prog */
+ struct gl_uniform_list *Uniforms;
+ struct gl_program_parameter_list *Varying;
+ GLboolean LinkStatus; /**< GL_LINK_STATUS */
+ GLboolean Validated;
+ GLboolean _Used; /**< Ever used for drawing? */
+ GLchar *InfoLog;
+
+ unsigned Version; /**< GLSL version used for linking */
+
+ /**
+ * Per-stage shaders resulting from the first stage of linking.
+ *
+ * Set of linked shaders for this program. The array is accessed using the
+ * \c MESA_SHADER_* defines. Entries for non-existent stages will be
+ * \c NULL.
+ */
+ struct gl_shader *_LinkedShaders[MESA_SHADER_TYPES];
+};
+
+
+#define GLSL_DUMP 0x1 /**< Dump shaders to stdout */
+#define GLSL_LOG 0x2 /**< Write shaders to files */
+#define GLSL_OPT 0x4 /**< Force optimizations (override pragmas) */
+#define GLSL_NO_OPT 0x8 /**< Force no optimizations (override pragmas) */
+#define GLSL_UNIFORMS 0x10 /**< Print glUniform calls */
+#define GLSL_NOP_VERT 0x20 /**< Force no-op vertex shaders */
+#define GLSL_NOP_FRAG 0x40 /**< Force no-op fragment shaders */
+#define GLSL_USE_PROG 0x80 /**< Log glUseProgram calls */
+
+
+/**
+ * Context state for GLSL vertex/fragment shaders.
+ */
+struct gl_shader_state
+{
+ /**
+ * Programs used for rendering
+ *
+ * There is a separate program set for each shader stage. If
+ * GL_EXT_separate_shader_objects is not supported, each of these must point
+ * to \c NULL or to the same program.
+ */
+ struct gl_shader_program *CurrentVertexProgram;
+ struct gl_shader_program *CurrentGeometryProgram;
+ struct gl_shader_program *CurrentFragmentProgram;
+
+ /**
+ * Program used by glUniform calls.
+ *
+ * Explicitly set by \c glUseProgram and \c glActiveProgramEXT.
+ */
+ struct gl_shader_program *ActiveProgram;
+
+ void *MemPool;
+
+ GLbitfield Flags; /**< Mask of GLSL_x flags */
+};
+
+/**
+ * Compiler options for a single GLSL shaders type
+ */
+struct gl_shader_compiler_options
+{
+ /** Driver-selectable options: */
+ GLboolean EmitCondCodes; /**< Use condition codes? */
+ GLboolean EmitNVTempInitialization; /**< 0-fill NV temp registers */
+ /**
+ * Attempts to flatten all ir_if (OPCODE_IF) for GPUs that can't
+ * support control flow.
+ */
+ GLboolean EmitNoIfs;
+ GLboolean EmitNoLoops;
+ GLboolean EmitNoFunctions;
+ GLboolean EmitNoCont; /**< Emit CONT opcode? */
+ GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */
+ GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */
+ GLboolean EmitNoPow; /**< Emit POW opcodes? */
+
+ /**
+ * \name Forms of indirect addressing the driver cannot do.
+ */
+ /*@{*/
+ GLboolean EmitNoIndirectInput; /**< No indirect addressing of inputs */
+ GLboolean EmitNoIndirectOutput; /**< No indirect addressing of outputs */
+ GLboolean EmitNoIndirectTemp; /**< No indirect addressing of temps */
+ GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
+ /*@}*/
+
+ GLuint MaxUnrollIterations;
+
+ struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */
+};
+
+/**
+ * Transform feedback object state
+ */
+struct gl_transform_feedback_object
+{
+ GLuint Name; /**< AKA the object ID */
+ GLint RefCount;
+ GLboolean Active; /**< Is transform feedback enabled? */
+ GLboolean Paused; /**< Is transform feedback paused? */
+
+ /** The feedback buffers */
+ GLuint BufferNames[MAX_FEEDBACK_ATTRIBS];
+ struct gl_buffer_object *Buffers[MAX_FEEDBACK_ATTRIBS];
+
+ /** Start of feedback data in dest buffer */
+ GLintptr Offset[MAX_FEEDBACK_ATTRIBS];
+ /** Max data to put into dest buffer (in bytes) */
+ GLsizeiptr Size[MAX_FEEDBACK_ATTRIBS];
+};
+
+
+/**
+ * Context state for transform feedback.
+ */
+struct gl_transform_feedback
+{
+ GLenum Mode; /**< GL_POINTS, GL_LINES or GL_TRIANGLES */
+
+ GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */
+
+ /** The general binding point (GL_TRANSFORM_FEEDBACK_BUFFER) */
+ struct gl_buffer_object *CurrentBuffer;
+
+ /** The table of all transform feedback objects */
+ struct _mesa_HashTable *Objects;
+
+ /** The current xform-fb object (GL_TRANSFORM_FEEDBACK_BINDING) */
+ struct gl_transform_feedback_object *CurrentObject;
+
+ /** The default xform-fb object (Name==0) */
+ struct gl_transform_feedback_object *DefaultObject;
+};
+
+
+
+/**
+ * State which can be shared by multiple contexts:
+ */
+struct gl_shared_state
+{
+ _glthread_Mutex Mutex; /**< for thread safety */
+ GLint RefCount; /**< Reference count */
+ struct _mesa_HashTable *DisplayList; /**< Display lists hash table */
+ struct _mesa_HashTable *TexObjects; /**< Texture objects hash table */
+
+ /** Default texture objects (shared by all texture units) */
+ struct gl_texture_object *DefaultTex[NUM_TEXTURE_TARGETS];
+
+ /** Fallback texture used when a bound texture is incomplete */
+ struct gl_texture_object *FallbackTex;
+
+ /**
+ * \name Thread safety and statechange notification for texture
+ * objects.
+ *
+ * \todo Improve the granularity of locking.
+ */
+ /*@{*/
+ _glthread_Mutex TexMutex; /**< texobj thread safety */
+ GLuint TextureStateStamp; /**< state notification for shared tex */
+ /*@}*/
+
+ /** Default buffer object for vertex arrays that aren't in VBOs */
+ struct gl_buffer_object *NullBufferObj;
+
+ /**
+ * \name Vertex/geometry/fragment programs
+ */
+ /*@{*/
+ struct _mesa_HashTable *Programs; /**< All vertex/fragment programs */
+ struct gl_vertex_program *DefaultVertexProgram;
+ struct gl_fragment_program *DefaultFragmentProgram;
+ struct gl_geometry_program *DefaultGeometryProgram;
+ /*@}*/
+
+ /* GL_ATI_fragment_shader */
+ struct _mesa_HashTable *ATIShaders;
+ struct ati_fragment_shader *DefaultFragmentShader;
+
+ struct _mesa_HashTable *BufferObjects;
+
+ /** Table of both gl_shader and gl_shader_program objects */
+ struct _mesa_HashTable *ShaderObjects;
+
+ /* GL_EXT_framebuffer_object */
+ struct _mesa_HashTable *RenderBuffers;
+ struct _mesa_HashTable *FrameBuffers;
+
+ /* GL_ARB_sync */
+ struct simple_node SyncObjects;
+
+ void *DriverData; /**< Device driver shared state */
+};
+
+
+
+
+/**
+ * A renderbuffer stores colors or depth values or stencil values.
+ * A framebuffer object will have a collection of these.
+ * Data are read/written to the buffer with a handful of Get/Put functions.
+ *
+ * Instances of this object are allocated with the Driver's NewRenderbuffer
+ * hook. Drivers will likely wrap this class inside a driver-specific
+ * class to simulate inheritance.
+ */
+struct gl_renderbuffer
+{
+#define RB_MAGIC 0xaabbccdd
+ int Magic; /** XXX TEMPORARY DEBUG INFO */
+ _glthread_Mutex Mutex; /**< for thread safety */
+ GLuint ClassID; /**< Useful for drivers */
+ GLuint Name;
+ GLint RefCount;
+ GLuint Width, Height;
+ GLboolean Purgeable; /**< Is the buffer purgeable under memory pressure? */
+
+ GLenum InternalFormat; /**< The user-specified format */
+ GLenum _BaseFormat; /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or
+ GL_STENCIL_INDEX. */
+ gl_format Format; /**< The actual renderbuffer memory format */
+
+ GLubyte NumSamples;
+
+ GLenum DataType; /**< Type of values passed to the Get/Put functions */
+ GLvoid *Data; /**< This may not be used by some kinds of RBs */
+
+ /* Used to wrap one renderbuffer around another: */
+ struct gl_renderbuffer *Wrapped;
+
+ /* Delete this renderbuffer */
+ void (*Delete)(struct gl_renderbuffer *rb);
+
+ /* Allocate new storage for this renderbuffer */
+ GLboolean (*AllocStorage)(struct gl_context *ctx, struct gl_renderbuffer *rb,
+ GLenum internalFormat,
+ GLuint width, GLuint height);
+
+ /* Lock/Unlock are called before/after calling the Get/Put functions.
+ * Not sure this is the right place for these yet.
+ void (*Lock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
+ void (*Unlock)(struct gl_context *ctx, struct gl_renderbuffer *rb);
+ */
+
+ /* Return a pointer to the element/pixel at (x,y).
+ * Should return NULL if the buffer memory can't be directly addressed.
+ */
+ void *(*GetPointer)(struct gl_context *ctx, struct gl_renderbuffer *rb,
+ GLint x, GLint y);
+
+ /* Get/Read a row of values.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*GetRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ GLint x, GLint y, void *values);
+
+ /* Get/Read values at arbitrary locations.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*GetValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ const GLint x[], const GLint y[], void *values);
+
+ /* Put/Write a row of values.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*PutRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ GLint x, GLint y, const void *values, const GLubyte *mask);
+
+ /* Put/Write a row of RGB values. This is a special-case routine that's
+ * only used for RGBA renderbuffers when the source data is GL_RGB. That's
+ * a common case for glDrawPixels and some triangle routines.
+ * The values will be of format GL_RGB and type DataType.
+ */
+ void (*PutRowRGB)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ GLint x, GLint y, const void *values, const GLubyte *mask);
+
+
+ /* Put/Write a row of identical values.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*PutMonoRow)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ GLint x, GLint y, const void *value, const GLubyte *mask);
+
+ /* Put/Write values at arbitrary locations.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*PutValues)(struct gl_context *ctx, struct gl_renderbuffer *rb, GLuint count,
+ const GLint x[], const GLint y[], const void *values,
+ const GLubyte *mask);
+ /* Put/Write identical values at arbitrary locations.
+ * The values will be of format _BaseFormat and type DataType.
+ */
+ void (*PutMonoValues)(struct gl_context *ctx, struct gl_renderbuffer *rb,
+ GLuint count, const GLint x[], const GLint y[],
+ const void *value, const GLubyte *mask);
+};
+
+
+/**
+ * A renderbuffer attachment points to either a texture object (and specifies
+ * a mipmap level, cube face or 3D texture slice) or points to a renderbuffer.
+ */
+struct gl_renderbuffer_attachment
+{
+ GLenum Type; /**< \c GL_NONE or \c GL_TEXTURE or \c GL_RENDERBUFFER_EXT */
+ GLboolean Complete;
+
+ /**
+ * If \c Type is \c GL_RENDERBUFFER_EXT, this stores a pointer to the
+ * application supplied renderbuffer object.
+ */
+ struct gl_renderbuffer *Renderbuffer;
+
+ /**
+ * If \c Type is \c GL_TEXTURE, this stores a pointer to the application
+ * supplied texture object.
+ */
+ struct gl_texture_object *Texture;
+ GLuint TextureLevel; /**< Attached mipmap level. */
+ GLuint CubeMapFace; /**< 0 .. 5, for cube map textures. */
+ GLuint Zoffset; /**< Slice for 3D textures, or layer for both 1D
+ * and 2D array textures */
+};
+
+
+/**
+ * A framebuffer is a collection of renderbuffers (color, depth, stencil, etc).
+ * In C++ terms, think of this as a base class from which device drivers
+ * will make derived classes.
+ */
+struct gl_framebuffer
+{
+ _glthread_Mutex Mutex; /**< for thread safety */
+ /**
+ * If zero, this is a window system framebuffer. If non-zero, this
+ * is a FBO framebuffer; note that for some devices (i.e. those with
+ * a natural pixel coordinate system for FBOs that differs from the
+ * OpenGL/Mesa coordinate system), this means that the viewport,
+ * polygon face orientation, and polygon stipple will have to be inverted.
+ */
+ GLuint Name;
+
+ GLint RefCount;
+ GLboolean DeletePending;
+
+ /**
+ * The framebuffer's visual. Immutable if this is a window system buffer.
+ * Computed from attachments if user-made FBO.
+ */
+ struct gl_config Visual;
+
+ GLboolean Initialized;
+
+ GLuint Width, Height; /**< size of frame buffer in pixels */
+
+ /** \name Drawing bounds (Intersection of buffer size and scissor box) */
+ /*@{*/
+ GLint _Xmin, _Xmax; /**< inclusive */
+ GLint _Ymin, _Ymax; /**< exclusive */
+ /*@}*/
+
+ /** \name Derived Z buffer stuff */
+ /*@{*/
+ GLuint _DepthMax; /**< Max depth buffer value */
+ GLfloat _DepthMaxF; /**< Float max depth buffer value */
+ GLfloat _MRD; /**< minimum resolvable difference in Z values */
+ /*@}*/
+
+ /** One of the GL_FRAMEBUFFER_(IN)COMPLETE_* tokens */
+ GLenum _Status;
+
+ /** Integer color values */
+ GLboolean _IntegerColor;
+
+ /** Array of all renderbuffer attachments, indexed by BUFFER_* tokens. */
+ struct gl_renderbuffer_attachment Attachment[BUFFER_COUNT];
+
+ /* In unextended OpenGL these vars are part of the GL_COLOR_BUFFER
+ * attribute group and GL_PIXEL attribute group, respectively.
+ */
+ GLenum ColorDrawBuffer[MAX_DRAW_BUFFERS];
+ GLenum ColorReadBuffer;
+
+ /** Computed from ColorDraw/ReadBuffer above */
+ GLuint _NumColorDrawBuffers;
+ GLint _ColorDrawBufferIndexes[MAX_DRAW_BUFFERS]; /**< BUFFER_x or -1 */
+ GLint _ColorReadBufferIndex; /* -1 = None */
+ struct gl_renderbuffer *_ColorDrawBuffers[MAX_DRAW_BUFFERS];
+ struct gl_renderbuffer *_ColorReadBuffer;
+
+ /** The Actual depth/stencil buffers to use. May be wrappers around the
+ * depth/stencil buffers attached above. */
+ struct gl_renderbuffer *_DepthBuffer;
+ struct gl_renderbuffer *_StencilBuffer;
+
+ /** Delete this framebuffer */
+ void (*Delete)(struct gl_framebuffer *fb);
+};
+
+
+/**
+ * Precision info for shader datatypes. See glGetShaderPrecisionFormat().
+ */
+struct gl_precision
+{
+ GLushort RangeMin; /**< min value exponent */
+ GLushort RangeMax; /**< max value exponent */
+ GLushort Precision; /**< number of mantissa bits */
+};
+
+
+/**
+ * Limits for vertex and fragment programs/shaders.
+ */
+struct gl_program_constants
+{
+ /* logical limits */
+ GLuint MaxInstructions;
+ GLuint MaxAluInstructions;
+ GLuint MaxTexInstructions;
+ GLuint MaxTexIndirections;
+ GLuint MaxAttribs;
+ GLuint MaxTemps;
+ GLuint MaxAddressRegs;
+ GLuint MaxParameters;
+ GLuint MaxLocalParams;
+ GLuint MaxEnvParams;
+ /* native/hardware limits */
+ GLuint MaxNativeInstructions;
+ GLuint MaxNativeAluInstructions;
+ GLuint MaxNativeTexInstructions;
+ GLuint MaxNativeTexIndirections;
+ GLuint MaxNativeAttribs;
+ GLuint MaxNativeTemps;
+ GLuint MaxNativeAddressRegs;
+ GLuint MaxNativeParameters;
+ /* For shaders */
+ GLuint MaxUniformComponents;
+ /* GL_ARB_geometry_shader4 */
+ GLuint MaxGeometryTextureImageUnits;
+ GLuint MaxGeometryVaryingComponents;
+ GLuint MaxVertexVaryingComponents;
+ GLuint MaxGeometryUniformComponents;
+ GLuint MaxGeometryOutputVertices;
+ GLuint MaxGeometryTotalOutputComponents;
+ /* ES 2.0 and GL_ARB_ES2_compatibility */
+ struct gl_precision LowFloat, MediumFloat, HighFloat;
+ struct gl_precision LowInt, MediumInt, HighInt;
+};
+
+
+/**
+ * Constants which may be overridden by device driver during context creation
+ * but are never changed after that.
+ */
+struct gl_constants
+{
+ GLint MaxTextureMbytes; /**< Max memory per image, in MB */
+ GLint MaxTextureLevels; /**< Max mipmap levels. */
+ GLint Max3DTextureLevels; /**< Max mipmap levels for 3D textures */
+ GLint MaxCubeTextureLevels; /**< Max mipmap levels for cube textures */
+ GLint MaxArrayTextureLayers; /**< Max layers in array textures */
+ GLint MaxTextureRectSize; /**< Max rectangle texture size, in pixes */
+ GLuint MaxTextureCoordUnits;
+ GLuint MaxTextureImageUnits;
+ GLuint MaxVertexTextureImageUnits;
+ GLuint MaxCombinedTextureImageUnits;
+ GLuint MaxTextureUnits; /**< = MIN(CoordUnits, ImageUnits) */
+ GLfloat MaxTextureMaxAnisotropy; /**< GL_EXT_texture_filter_anisotropic */
+ GLfloat MaxTextureLodBias; /**< GL_EXT_texture_lod_bias */
+
+ GLuint MaxArrayLockSize;
+
+ GLint SubPixelBits;
+
+ GLfloat MinPointSize, MaxPointSize; /**< aliased */
+ GLfloat MinPointSizeAA, MaxPointSizeAA; /**< antialiased */
+ GLfloat PointSizeGranularity;
+ GLfloat MinLineWidth, MaxLineWidth; /**< aliased */
+ GLfloat MinLineWidthAA, MaxLineWidthAA; /**< antialiased */
+ GLfloat LineWidthGranularity;
+
+ GLuint MaxColorTableSize;
+
+ GLuint MaxClipPlanes;
+ GLuint MaxLights;
+ GLfloat MaxShininess; /**< GL_NV_light_max_exponent */
+ GLfloat MaxSpotExponent; /**< GL_NV_light_max_exponent */
+
+ GLuint MaxViewportWidth, MaxViewportHeight;
+
+ struct gl_program_constants VertexProgram; /**< GL_ARB_vertex_program */
+ struct gl_program_constants FragmentProgram; /**< GL_ARB_fragment_program */
+ struct gl_program_constants GeometryProgram; /**< GL_ARB_geometry_shader4 */
+ GLuint MaxProgramMatrices;
+ GLuint MaxProgramMatrixStackDepth;
+
+ /** vertex array / buffer object bounds checking */
+ GLboolean CheckArrayBounds;
+
+ GLuint MaxDrawBuffers; /**< GL_ARB_draw_buffers */
+
+ GLuint MaxColorAttachments; /**< GL_EXT_framebuffer_object */
+ GLuint MaxRenderbufferSize; /**< GL_EXT_framebuffer_object */
+ GLuint MaxSamples; /**< GL_ARB_framebuffer_object */
+
+ GLuint MaxVarying; /**< Number of float[4] varying parameters */
+
+ GLuint GLSLVersion; /**< GLSL version supported (ex: 120 = 1.20) */
+
+ /** Which texture units support GL_ATI_envmap_bumpmap as targets */
+ GLbitfield SupportedBumpUnits;
+
+ /**
+ * Maximum amount of time, measured in nanseconds, that the server can wait.
+ */
+ GLuint64 MaxServerWaitTimeout;
+
+ /** GL_EXT_provoking_vertex */
+ GLboolean QuadsFollowProvokingVertexConvention;
+
+ /** OpenGL version 3.0 */
+ GLbitfield ContextFlags; /**< Ex: GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT */
+
+ /** OpenGL version 3.2 */
+ GLbitfield ProfileMask; /**< Mask of CONTEXT_x_PROFILE_BIT */
+
+ /** GL_EXT_transform_feedback */
+ GLuint MaxTransformFeedbackSeparateAttribs;
+ GLuint MaxTransformFeedbackSeparateComponents;
+ GLuint MaxTransformFeedbackInterleavedComponents;
+
+ /** GL_EXT_gpu_shader4 */
+ GLint MinProgramTexelOffset, MaxProgramTexelOffset;
+
+ /* GL_EXT_framebuffer_sRGB */
+ GLboolean sRGBCapable; /* can enable sRGB blend/update on FBOs */
+};
+
+
+/**
+ * Enable flag for each OpenGL extension. Different device drivers will
+ * enable different extensions at runtime.
+ */
+struct gl_extensions
+{
+ GLboolean dummy; /* don't remove this! */
+ GLboolean dummy_true; /* Set true by _mesa_init_extensions(). */
+ GLboolean dummy_false; /* Set false by _mesa_init_extensions(). */
+ GLboolean ARB_ES2_compatibility;
+ GLboolean ARB_blend_func_extended;
+ GLboolean ARB_copy_buffer;
+ GLboolean ARB_depth_buffer_float;
+ GLboolean ARB_depth_clamp;
+ GLboolean ARB_depth_texture;
+ GLboolean ARB_draw_buffers;
+ GLboolean ARB_draw_buffers_blend;
+ GLboolean ARB_draw_elements_base_vertex;
+ GLboolean ARB_draw_instanced;
+ GLboolean ARB_fragment_coord_conventions;
+ GLboolean ARB_fragment_program;
+ GLboolean ARB_fragment_program_shadow;
+ GLboolean ARB_fragment_shader;
+ GLboolean ARB_framebuffer_object;
+ GLboolean ARB_explicit_attrib_location;
+ GLboolean ARB_geometry_shader4;
+ GLboolean ARB_half_float_pixel;
+ GLboolean ARB_half_float_vertex;
+ GLboolean ARB_instanced_arrays;
+ GLboolean ARB_map_buffer_range;
+ GLboolean ARB_multisample;
+ GLboolean ARB_multitexture;
+ GLboolean ARB_occlusion_query;
+ GLboolean ARB_occlusion_query2;
+ GLboolean ARB_point_sprite;
+ GLboolean ARB_sampler_objects;
+ GLboolean ARB_seamless_cube_map;
+ GLboolean ARB_shader_objects;
+ GLboolean ARB_shader_stencil_export;
+ GLboolean ARB_shading_language_100;
+ GLboolean ARB_shadow;
+ GLboolean ARB_shadow_ambient;
+ GLboolean ARB_sync;
+ GLboolean ARB_texture_border_clamp;
+ GLboolean ARB_texture_buffer_object;
+ GLboolean ARB_texture_compression;
+ GLboolean ARB_texture_compression_rgtc;
+ GLboolean ARB_texture_cube_map;
+ GLboolean ARB_texture_env_combine;
+ GLboolean ARB_texture_env_crossbar;
+ GLboolean ARB_texture_env_dot3;
+ GLboolean ARB_texture_float;
+ GLboolean ARB_texture_mirrored_repeat;
+ GLboolean ARB_texture_multisample;
+ GLboolean ARB_texture_non_power_of_two;
+ GLboolean ARB_texture_rg;
+ GLboolean ARB_texture_rgb10_a2ui;
+ GLboolean ARB_timer_query;
+ GLboolean ARB_transform_feedback2;
+ GLboolean ARB_transpose_matrix;
+ GLboolean ARB_uniform_buffer_object;
+ GLboolean ARB_vertex_array_object;
+ GLboolean ARB_vertex_buffer_object;
+ GLboolean ARB_vertex_program;
+ GLboolean ARB_vertex_shader;
+ GLboolean ARB_vertex_type_2_10_10_10_rev;
+ GLboolean ARB_window_pos;
+ GLboolean EXT_abgr;
+ GLboolean EXT_bgra;
+ GLboolean EXT_blend_color;
+ GLboolean EXT_blend_equation_separate;
+ GLboolean EXT_blend_func_separate;
+ GLboolean EXT_blend_logic_op;
+ GLboolean EXT_blend_minmax;
+ GLboolean EXT_blend_subtract;
+ GLboolean EXT_clip_volume_hint;
+ GLboolean EXT_compiled_vertex_array;
+ GLboolean EXT_copy_texture;
+ GLboolean EXT_depth_bounds_test;
+ GLboolean EXT_draw_buffers2;
+ GLboolean EXT_draw_range_elements;
+ GLboolean EXT_fog_coord;
+ GLboolean EXT_framebuffer_blit;
+ GLboolean EXT_framebuffer_multisample;
+ GLboolean EXT_framebuffer_object;
+ GLboolean EXT_framebuffer_sRGB;
+ GLboolean EXT_gpu_program_parameters;
+ GLboolean EXT_gpu_shader4;
+ GLboolean EXT_multi_draw_arrays;
+ GLboolean EXT_paletted_texture;
+ GLboolean EXT_packed_depth_stencil;
+ GLboolean EXT_packed_float;
+ GLboolean EXT_packed_pixels;
+ GLboolean EXT_pixel_buffer_object;
+ GLboolean EXT_point_parameters;
+ GLboolean EXT_polygon_offset;
+ GLboolean EXT_provoking_vertex;
+ GLboolean EXT_rescale_normal;
+ GLboolean EXT_shadow_funcs;
+ GLboolean EXT_secondary_color;
+ GLboolean EXT_separate_shader_objects;
+ GLboolean EXT_separate_specular_color;
+ GLboolean EXT_shared_texture_palette;
+ GLboolean EXT_stencil_wrap;
+ GLboolean EXT_stencil_two_side;
+ GLboolean EXT_subtexture;
+ GLboolean EXT_texture;
+ GLboolean EXT_texture_object;
+ GLboolean EXT_texture3D;
+ GLboolean EXT_texture_array;
+ GLboolean EXT_texture_compression_s3tc;
+ GLboolean EXT_texture_env_add;
+ GLboolean EXT_texture_env_combine;
+ GLboolean EXT_texture_env_dot3;
+ GLboolean EXT_texture_filter_anisotropic;
+ GLboolean EXT_texture_integer;
+ GLboolean EXT_texture_lod_bias;
+ GLboolean EXT_texture_mirror_clamp;
+ GLboolean EXT_texture_shared_exponent;
+ GLboolean EXT_texture_sRGB;
+ GLboolean EXT_texture_sRGB_decode;
+ GLboolean EXT_texture_swizzle;
+ GLboolean EXT_transform_feedback;
+ GLboolean EXT_timer_query;
+ GLboolean EXT_vertex_array;
+ GLboolean EXT_vertex_array_bgra;
+ GLboolean EXT_vertex_array_set;
+ GLboolean OES_standard_derivatives;
+ /* vendor extensions */
+ GLboolean AMD_conservative_depth;
+ GLboolean APPLE_client_storage;
+ GLboolean APPLE_packed_pixels;
+ GLboolean APPLE_vertex_array_object;
+ GLboolean APPLE_object_purgeable;
+ GLboolean ATI_envmap_bumpmap;
+ GLboolean ATI_texture_mirror_once;
+ GLboolean ATI_texture_env_combine3;
+ GLboolean ATI_fragment_shader;
+ GLboolean ATI_separate_stencil;
+ GLboolean IBM_rasterpos_clip;
+ GLboolean IBM_multimode_draw_arrays;
+ GLboolean MESA_pack_invert;
+ GLboolean MESA_resize_buffers;
+ GLboolean MESA_ycbcr_texture;
+ GLboolean MESA_texture_array;
+ GLboolean MESA_texture_signed_rgba;
+ GLboolean NV_blend_square;
+ GLboolean NV_conditional_render;
+ GLboolean NV_fragment_program;
+ GLboolean NV_fragment_program_option;
+ GLboolean NV_light_max_exponent;
+ GLboolean NV_point_sprite;
+ GLboolean NV_primitive_restart;
+ GLboolean NV_texgen_reflection;
+ GLboolean NV_texture_env_combine4;
+ GLboolean NV_texture_rectangle;
+ GLboolean NV_vertex_program;
+ GLboolean NV_vertex_program1_1;
+ GLboolean OES_read_format;
+ GLboolean SGI_texture_color_table;
+ GLboolean SGIS_generate_mipmap;
+ GLboolean SGIS_texture_edge_clamp;
+ GLboolean SGIS_texture_lod;
+ GLboolean TDFX_texture_compression_FXT1;
+ GLboolean S3_s3tc;
+ GLboolean OES_EGL_image;
+ GLboolean OES_draw_texture;
+ GLboolean EXT_texture_format_BGRA8888;
+ GLboolean extension_sentinel;
+ /** The extension string */
+ const GLubyte *String;
+ /** Number of supported extensions */
+ GLuint Count;
+};
+
+
+/**
+ * A stack of matrices (projection, modelview, color, texture, etc).
+ */
+struct gl_matrix_stack
+{
+ GLmatrix *Top; /**< points into Stack */
+ GLmatrix *Stack; /**< array [MaxDepth] of GLmatrix */
+ GLuint Depth; /**< 0 <= Depth < MaxDepth */
+ GLuint MaxDepth; /**< size of Stack[] array */
+ GLuint DirtyFlag; /**< _NEW_MODELVIEW or _NEW_PROJECTION, for example */
+};
+
+
+/**
+ * \name Bits for image transfer operations
+ * \sa __struct gl_contextRec::ImageTransferState.
+ */
+/*@{*/
+#define IMAGE_SCALE_BIAS_BIT 0x1
+#define IMAGE_SHIFT_OFFSET_BIT 0x2
+#define IMAGE_MAP_COLOR_BIT 0x4
+#define IMAGE_CLAMP_BIT 0x800
+
+
+/** Pixel Transfer ops */
+#define IMAGE_BITS (IMAGE_SCALE_BIAS_BIT | \
+ IMAGE_SHIFT_OFFSET_BIT | \
+ IMAGE_MAP_COLOR_BIT)
+
+/**
+ * \name Bits to indicate what state has changed.
+ */
+/*@{*/
+#define _NEW_MODELVIEW (1 << 0) /**< gl_context::ModelView */
+#define _NEW_PROJECTION (1 << 1) /**< gl_context::Projection */
+#define _NEW_TEXTURE_MATRIX (1 << 2) /**< gl_context::TextureMatrix */
+#define _NEW_COLOR (1 << 3) /**< gl_context::Color */
+#define _NEW_DEPTH (1 << 4) /**< gl_context::Depth */
+#define _NEW_EVAL (1 << 5) /**< gl_context::Eval, EvalMap */
+#define _NEW_FOG (1 << 6) /**< gl_context::Fog */
+#define _NEW_HINT (1 << 7) /**< gl_context::Hint */
+#define _NEW_LIGHT (1 << 8) /**< gl_context::Light */
+#define _NEW_LINE (1 << 9) /**< gl_context::Line */
+#define _NEW_PIXEL (1 << 10) /**< gl_context::Pixel */
+#define _NEW_POINT (1 << 11) /**< gl_context::Point */
+#define _NEW_POLYGON (1 << 12) /**< gl_context::Polygon */
+#define _NEW_POLYGONSTIPPLE (1 << 13) /**< gl_context::PolygonStipple */
+#define _NEW_SCISSOR (1 << 14) /**< gl_context::Scissor */
+#define _NEW_STENCIL (1 << 15) /**< gl_context::Stencil */
+#define _NEW_TEXTURE (1 << 16) /**< gl_context::Texture */
+#define _NEW_TRANSFORM (1 << 17) /**< gl_context::Transform */
+#define _NEW_VIEWPORT (1 << 18) /**< gl_context::Viewport */
+#define _NEW_PACKUNPACK (1 << 19) /**< gl_context::Pack, Unpack */
+#define _NEW_ARRAY (1 << 20) /**< gl_context::Array */
+#define _NEW_RENDERMODE (1 << 21) /**< gl_context::RenderMode, etc */
+#define _NEW_BUFFERS (1 << 22) /**< gl_context::Visual, DrawBuffer, */
+#define _NEW_CURRENT_ATTRIB (1 << 23) /**< gl_context::Current */
+#define _NEW_MULTISAMPLE (1 << 24) /**< gl_context::Multisample */
+#define _NEW_TRACK_MATRIX (1 << 25) /**< gl_context::VertexProgram */
+#define _NEW_PROGRAM (1 << 26) /**< New program/shader state */
+#define _NEW_PROGRAM_CONSTANTS (1 << 27)
+#define _NEW_BUFFER_OBJECT (1 << 28)
+#define _NEW_ALL ~0
+/*@}*/
+
+
+/**
+ * \name Bits to track array state changes
+ *
+ * Also used to summarize array enabled.
+ */
+/*@{*/
+#define _NEW_ARRAY_VERTEX VERT_BIT_POS
+#define _NEW_ARRAY_WEIGHT VERT_BIT_WEIGHT
+#define _NEW_ARRAY_NORMAL VERT_BIT_NORMAL
+#define _NEW_ARRAY_COLOR0 VERT_BIT_COLOR0
+#define _NEW_ARRAY_COLOR1 VERT_BIT_COLOR1
+#define _NEW_ARRAY_FOGCOORD VERT_BIT_FOG
+#define _NEW_ARRAY_INDEX VERT_BIT_COLOR_INDEX
+#define _NEW_ARRAY_EDGEFLAG VERT_BIT_EDGEFLAG
+#define _NEW_ARRAY_POINT_SIZE VERT_BIT_COLOR_INDEX /* aliased */
+#define _NEW_ARRAY_TEXCOORD_0 VERT_BIT_TEX0
+#define _NEW_ARRAY_TEXCOORD_1 VERT_BIT_TEX1
+#define _NEW_ARRAY_TEXCOORD_2 VERT_BIT_TEX2
+#define _NEW_ARRAY_TEXCOORD_3 VERT_BIT_TEX3
+#define _NEW_ARRAY_TEXCOORD_4 VERT_BIT_TEX4
+#define _NEW_ARRAY_TEXCOORD_5 VERT_BIT_TEX5
+#define _NEW_ARRAY_TEXCOORD_6 VERT_BIT_TEX6
+#define _NEW_ARRAY_TEXCOORD_7 VERT_BIT_TEX7
+#define _NEW_ARRAY_ATTRIB_0 VERT_BIT_GENERIC0 /* start at bit 16 */
+#define _NEW_ARRAY_ALL 0xffffffff
+
+
+#define _NEW_ARRAY_TEXCOORD(i) (_NEW_ARRAY_TEXCOORD_0 << (i))
+#define _NEW_ARRAY_ATTRIB(i) (_NEW_ARRAY_ATTRIB_0 << (i))
+/*@}*/
+
+
+
+/**
+ * \name A bunch of flags that we think might be useful to drivers.
+ *
+ * Set in the __struct gl_contextRec::_TriangleCaps bitfield.
+ */
+/*@{*/
+#define DD_FLATSHADE 0x1
+#define DD_SEPARATE_SPECULAR 0x2
+#define DD_TRI_CULL_FRONT_BACK 0x4 /* special case on some hw */
+#define DD_TRI_LIGHT_TWOSIDE 0x8
+#define DD_TRI_UNFILLED 0x10
+#define DD_TRI_SMOOTH 0x20
+#define DD_TRI_STIPPLE 0x40
+#define DD_TRI_OFFSET 0x80
+#define DD_LINE_SMOOTH 0x100
+#define DD_LINE_STIPPLE 0x200
+#define DD_POINT_SMOOTH 0x400
+#define DD_POINT_ATTEN 0x800
+#define DD_TRI_TWOSTENCIL 0x1000
+/*@}*/
+
+
+/**
+ * \name Define the state changes under which each of these bits might change
+ */
+/*@{*/
+#define _DD_NEW_FLATSHADE _NEW_LIGHT
+#define _DD_NEW_SEPARATE_SPECULAR (_NEW_LIGHT | _NEW_FOG | _NEW_PROGRAM)
+#define _DD_NEW_TRI_CULL_FRONT_BACK _NEW_POLYGON
+#define _DD_NEW_TRI_LIGHT_TWOSIDE _NEW_LIGHT
+#define _DD_NEW_TRI_UNFILLED _NEW_POLYGON
+#define _DD_NEW_TRI_SMOOTH _NEW_POLYGON
+#define _DD_NEW_TRI_STIPPLE _NEW_POLYGON
+#define _DD_NEW_TRI_OFFSET _NEW_POLYGON
+#define _DD_NEW_LINE_SMOOTH _NEW_LINE
+#define _DD_NEW_LINE_STIPPLE _NEW_LINE
+#define _DD_NEW_LINE_WIDTH _NEW_LINE
+#define _DD_NEW_POINT_SMOOTH _NEW_POINT
+#define _DD_NEW_POINT_SIZE _NEW_POINT
+#define _DD_NEW_POINT_ATTEN _NEW_POINT
+/*@}*/
+
+
+/**
+ * Composite state flags
+ */
+/*@{*/
+#define _MESA_NEW_NEED_EYE_COORDS (_NEW_LIGHT | \
+ _NEW_TEXTURE | \
+ _NEW_POINT | \
+ _NEW_PROGRAM | \
+ _NEW_MODELVIEW)
+
+#define _MESA_NEW_NEED_NORMALS (_NEW_LIGHT | \
+ _NEW_TEXTURE)
+
+#define _MESA_NEW_TRANSFER_STATE (_NEW_PIXEL)
+/*@}*/
+
+
+
+
+/* This has to be included here. */
+#include "dd.h"
+
+
+/**
+ * Display list flags.
+ * Strictly this is a tnl-private concept, but it doesn't seem
+ * worthwhile adding a tnl private structure just to hold this one bit
+ * of information:
+ */
+#define DLIST_DANGLING_REFS 0x1
+
+
+/** Opaque declaration of display list payload data type */
+union gl_dlist_node;
+
+
+/**
+ * Provide a location where information about a display list can be
+ * collected. Could be extended with driverPrivate structures,
+ * etc. in the future.
+ */
+struct gl_display_list
+{
+ GLuint Name;
+ GLbitfield Flags; /**< DLIST_x flags */
+ /** The dlist commands are in a linked list of nodes */
+ union gl_dlist_node *Head;
+};
+
+
+/**
+ * State used during display list compilation and execution.
+ */
+struct gl_dlist_state
+{
+ GLuint CallDepth; /**< Current recursion calling depth */
+
+ struct gl_display_list *CurrentList; /**< List currently being compiled */
+ union gl_dlist_node *CurrentBlock; /**< Pointer to current block of nodes */
+ GLuint CurrentPos; /**< Index into current block of nodes */
+
+ GLvertexformat ListVtxfmt;
+
+ GLubyte ActiveAttribSize[VERT_ATTRIB_MAX];
+ GLfloat CurrentAttrib[VERT_ATTRIB_MAX][4];
+
+ GLubyte ActiveMaterialSize[MAT_ATTRIB_MAX];
+ GLfloat CurrentMaterial[MAT_ATTRIB_MAX][4];
+
+ GLubyte ActiveIndex;
+ GLfloat CurrentIndex;
+
+ GLubyte ActiveEdgeFlag;
+ GLboolean CurrentEdgeFlag;
+
+ struct {
+ /* State known to have been set by the currently-compiling display
+ * list. Used to eliminate some redundant state changes.
+ */
+ GLenum ShadeModel;
+ } Current;
+};
+
+
+/**
+ * Enum for the OpenGL APIs we know about and may support.
+ */
+typedef enum
+{
+ API_OPENGL,
+ API_OPENGLES,
+ API_OPENGLES2
+} gl_api;
+
+
+/**
+ * Mesa rendering context.
+ *
+ * This is the central context data structure for Mesa. Almost all
+ * OpenGL state is contained in this structure.
+ * Think of this as a base class from which device drivers will derive
+ * sub classes.
+ *
+ * The struct gl_context typedef names this structure.
+ */
+struct gl_context
+{
+ /** State possibly shared with other contexts in the address space */
+ struct gl_shared_state *Shared;
+
+ /** \name API function pointer tables */
+ /*@{*/
+ gl_api API;
+ struct _glapi_table *Save; /**< Display list save functions */
+ struct _glapi_table *Exec; /**< Execute functions */
+ struct _glapi_table *CurrentDispatch; /**< == Save or Exec !! */
+ /*@}*/
+
+ struct gl_config Visual;
+ struct gl_framebuffer *DrawBuffer; /**< buffer for writing */
+ struct gl_framebuffer *ReadBuffer; /**< buffer for reading */
+ struct gl_framebuffer *WinSysDrawBuffer; /**< set with MakeCurrent */
+ struct gl_framebuffer *WinSysReadBuffer; /**< set with MakeCurrent */
+
+ /**
+ * Device driver function pointer table
+ */
+ struct dd_function_table Driver;
+
+ void *DriverCtx; /**< Points to device driver context/state */
+
+ /** Core/Driver constants */
+ struct gl_constants Const;
+
+ /** \name The various 4x4 matrix stacks */
+ /*@{*/
+ struct gl_matrix_stack ModelviewMatrixStack;
+ struct gl_matrix_stack ProjectionMatrixStack;
+ struct gl_matrix_stack TextureMatrixStack[MAX_TEXTURE_UNITS];
+ struct gl_matrix_stack ProgramMatrixStack[MAX_PROGRAM_MATRICES];
+ struct gl_matrix_stack *CurrentStack; /**< Points to one of the above stacks */
+ /*@}*/
+
+ /** Combined modelview and projection matrix */
+ GLmatrix _ModelProjectMatrix;
+
+ /** \name Display lists */
+ struct gl_dlist_state ListState;
+
+ GLboolean ExecuteFlag; /**< Execute GL commands? */
+ GLboolean CompileFlag; /**< Compile GL commands into display list? */
+
+ /** Extension information */
+ struct gl_extensions Extensions;
+
+ /** Version info */
+ GLuint VersionMajor, VersionMinor;
+ char *VersionString;
+
+ /** \name State attribute stack (for glPush/PopAttrib) */
+ /*@{*/
+ GLuint AttribStackDepth;
+ struct gl_attrib_node *AttribStack[MAX_ATTRIB_STACK_DEPTH];
+ /*@}*/
+
+ /** \name Renderer attribute groups
+ *
+ * We define a struct for each attribute group to make pushing and popping
+ * attributes easy. Also it's a good organization.
+ */
+ /*@{*/
+ struct gl_accum_attrib Accum; /**< Accum buffer attributes */
+ struct gl_colorbuffer_attrib Color; /**< Color buffer attributes */
+ struct gl_current_attrib Current; /**< Current attributes */
+ struct gl_depthbuffer_attrib Depth; /**< Depth buffer attributes */
+ struct gl_eval_attrib Eval; /**< Eval attributes */
+ struct gl_fog_attrib Fog; /**< Fog attributes */
+ struct gl_hint_attrib Hint; /**< Hint attributes */
+ struct gl_light_attrib Light; /**< Light attributes */
+ struct gl_line_attrib Line; /**< Line attributes */
+ struct gl_list_attrib List; /**< List attributes */
+ struct gl_multisample_attrib Multisample;
+ struct gl_pixel_attrib Pixel; /**< Pixel attributes */
+ struct gl_point_attrib Point; /**< Point attributes */
+ struct gl_polygon_attrib Polygon; /**< Polygon attributes */
+ GLuint PolygonStipple[32]; /**< Polygon stipple */
+ struct gl_scissor_attrib Scissor; /**< Scissor attributes */
+ struct gl_stencil_attrib Stencil; /**< Stencil buffer attributes */
+ struct gl_texture_attrib Texture; /**< Texture attributes */
+ struct gl_transform_attrib Transform; /**< Transformation attributes */
+ struct gl_viewport_attrib Viewport; /**< Viewport attributes */
+ /*@}*/
+
+ /** \name Client attribute stack */
+ /*@{*/
+ GLuint ClientAttribStackDepth;
+ struct gl_attrib_node *ClientAttribStack[MAX_CLIENT_ATTRIB_STACK_DEPTH];
+ /*@}*/
+
+ /** \name Client attribute groups */
+ /*@{*/
+ struct gl_array_attrib Array; /**< Vertex arrays */
+ struct gl_pixelstore_attrib Pack; /**< Pixel packing */
+ struct gl_pixelstore_attrib Unpack; /**< Pixel unpacking */
+ struct gl_pixelstore_attrib DefaultPacking; /**< Default params */
+ /*@}*/
+
+ /** \name Other assorted state (not pushed/popped on attribute stack) */
+ /*@{*/
+ struct gl_pixelmaps PixelMaps;
+
+ struct gl_evaluators EvalMap; /**< All evaluators */
+ struct gl_feedback Feedback; /**< Feedback */
+ struct gl_selection Select; /**< Selection */
+
+ struct gl_program_state Program; /**< general program state */
+ struct gl_vertex_program_state VertexProgram;
+ struct gl_fragment_program_state FragmentProgram;
+ struct gl_geometry_program_state GeometryProgram;
+ struct gl_ati_fragment_shader_state ATIFragmentShader;
+
+ struct gl_shader_state Shader; /**< GLSL shader object state */
+ struct gl_shader_compiler_options ShaderCompilerOptions[MESA_SHADER_TYPES];
+
+ struct gl_query_state Query; /**< occlusion, timer queries */
+
+ struct gl_transform_feedback TransformFeedback;
+
+ struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
+ struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
+ /*@}*/
+
+ struct gl_meta_state *Meta; /**< for "meta" operations */
+
+ /* GL_EXT_framebuffer_object */
+ struct gl_renderbuffer *CurrentRenderbuffer;
+
+ GLenum ErrorValue; /**< Last error code */
+
+ /**
+ * Recognize and silence repeated error debug messages in buggy apps.
+ */
+ const char *ErrorDebugFmtString;
+ GLuint ErrorDebugCount;
+
+ GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
+ GLbitfield NewState; /**< bitwise-or of _NEW_* flags */
+
+ GLboolean ViewportInitialized; /**< has viewport size been initialized? */
+
+ GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */
+
+ /** \name Derived state */
+ /*@{*/
+ /** Bitwise-or of DD_* flags. Note that this bitfield may be used before
+ * state validation so they need to always be current.
+ */
+ GLbitfield _TriangleCaps;
+ GLbitfield _ImageTransferState;/**< bitwise-or of IMAGE_*_BIT flags */
+ GLfloat _EyeZDir[3];
+ GLfloat _ModelViewInvScale;
+ GLboolean _NeedEyeCoords;
+ GLboolean _ForceEyeCoords;
+
+ GLuint TextureStateTimestamp; /**< detect changes to shared state */
+
+ struct gl_shine_tab *_ShineTable[2]; /**< Active shine tables */
+ struct gl_shine_tab *_ShineTabList; /**< MRU list of inactive shine tables */
+ /**@}*/
+
+ struct gl_list_extensions *ListExt; /**< driver dlist extensions */
+
+ /** \name For debugging/development only */
+ /*@{*/
+ GLboolean FirstTimeCurrent;
+ /*@}*/
+
+ /** Dither disable via MESA_NO_DITHER env var */
+ GLboolean NoDither;
+
+ /** software compression/decompression supported or not */
+ GLboolean Mesa_DXTn;
+
+ GLboolean TextureFormatSupported[MESA_FORMAT_COUNT];
+
+ /**
+ * Use dp4 (rather than mul/mad) instructions for position
+ * transformation?
+ */
+ GLboolean mvp_with_dp4;
+
+ /**
+ * \name Hooks for module contexts.
+ *
+ * These will eventually live in the driver or elsewhere.
+ */
+ /*@{*/
+ void *swrast_context;
+ void *swsetup_context;
+ void *swtnl_context;
+ void *swtnl_im;
+ struct st_context *st;
+ void *aelt_context;
+ /*@}*/
+};
+
+
+#ifdef DEBUG
+extern int MESA_VERBOSE;
+extern int MESA_DEBUG_FLAGS;
+# define MESA_FUNCTION __FUNCTION__
+#else
+# define MESA_VERBOSE 0
+# define MESA_DEBUG_FLAGS 0
+# define MESA_FUNCTION "a function"
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+
+
+/** The MESA_VERBOSE var is a bitmask of these flags */
+enum _verbose
+{
+ VERBOSE_VARRAY = 0x0001,
+ VERBOSE_TEXTURE = 0x0002,
+ VERBOSE_MATERIAL = 0x0004,
+ VERBOSE_PIPELINE = 0x0008,
+ VERBOSE_DRIVER = 0x0010,
+ VERBOSE_STATE = 0x0020,
+ VERBOSE_API = 0x0040,
+ VERBOSE_DISPLAY_LIST = 0x0100,
+ VERBOSE_LIGHTING = 0x0200,
+ VERBOSE_PRIMS = 0x0400,
+ VERBOSE_VERTS = 0x0800,
+ VERBOSE_DISASSEM = 0x1000,
+ VERBOSE_DRAW = 0x2000,
+ VERBOSE_SWAPBUFFERS = 0x4000
+};
+
+
+/** The MESA_DEBUG_FLAGS var is a bitmask of these flags */
+enum _debug
+{
+ DEBUG_ALWAYS_FLUSH = 0x1
+};
+
+
+
+#endif /* MTYPES_H */
diff --git a/mesalib/src/mesa/main/state.c b/mesalib/src/mesa/main/state.c
index c07e2c380..502c42929 100644
--- a/mesalib/src/mesa/main/state.c
+++ b/mesalib/src/mesa/main/state.c
@@ -1,732 +1,734 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.3
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * \file state.c
- * State management.
- *
- * This file manages recalculation of derived values in struct gl_context.
- */
-
-
-#include "glheader.h"
-#include "mtypes.h"
-#include "context.h"
-#include "debug.h"
-#include "macros.h"
-#include "ffvertex_prog.h"
-#include "framebuffer.h"
-#include "light.h"
-#include "matrix.h"
-#include "pixel.h"
-#include "program/program.h"
-#include "program/prog_parameter.h"
-#include "state.h"
-#include "stencil.h"
-#include "texenvprogram.h"
-#include "texobj.h"
-#include "texstate.h"
-
-
-static void
-update_separate_specular(struct gl_context *ctx)
-{
- if (NEED_SECONDARY_COLOR(ctx))
- ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
- else
- ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR;
-}
-
-
-/**
- * Compute the index of the last array element that can be safely accessed
- * in a vertex array. We can really only do this when the array lives in
- * a VBO.
- * The array->_MaxElement field will be updated.
- * Later in glDrawArrays/Elements/etc we can do some bounds checking.
- */
-static void
-compute_max_element(struct gl_client_array *array)
-{
- assert(array->Enabled);
- if (array->BufferObj->Name) {
- GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr;
- GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size;
-
- if (offset < obj_size) {
- array->_MaxElement = (obj_size - offset +
- array->StrideB -
- array->_ElementSize) / array->StrideB;
- } else {
- array->_MaxElement = 0;
- }
- }
- else {
- /* user-space array, no idea how big it is */
- array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */
- }
-}
-
-
-/**
- * Helper for update_arrays().
- * \return min(current min, array->_MaxElement).
- */
-static GLuint
-update_min(GLuint min, struct gl_client_array *array)
-{
- compute_max_element(array);
- return MIN2(min, array->_MaxElement);
-}
-
-
-/**
- * Update ctx->Array._MaxElement (the max legal index into all enabled arrays).
- * Need to do this upon new array state or new buffer object state.
- */
-static void
-update_arrays( struct gl_context *ctx )
-{
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- GLuint i, min = ~0;
-
- /* find min of _MaxElement values for all enabled arrays */
-
- /* 0 */
- if (ctx->VertexProgram._Current
- && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]);
- }
- else if (arrayObj->Vertex.Enabled) {
- min = update_min(min, &arrayObj->Vertex);
- }
-
- /* 1 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]);
- }
- /* no conventional vertex weight array */
-
- /* 2 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]);
- }
- else if (arrayObj->Normal.Enabled) {
- min = update_min(min, &arrayObj->Normal);
- }
-
- /* 3 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]);
- }
- else if (arrayObj->Color.Enabled) {
- min = update_min(min, &arrayObj->Color);
- }
-
- /* 4 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]);
- }
- else if (arrayObj->SecondaryColor.Enabled) {
- min = update_min(min, &arrayObj->SecondaryColor);
- }
-
- /* 5 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]);
- }
- else if (arrayObj->FogCoord.Enabled) {
- min = update_min(min, &arrayObj->FogCoord);
- }
-
- /* 6 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]);
- }
- else if (arrayObj->Index.Enabled) {
- min = update_min(min, &arrayObj->Index);
- }
-
- /* 7 */
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]);
- }
-
- /* 8..15 */
- for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) {
- if (ctx->VertexProgram._Enabled
- && arrayObj->VertexAttrib[i].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[i]);
- }
- else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits
- && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) {
- min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]);
- }
- }
-
- /* 16..31 */
- if (ctx->VertexProgram._Current) {
- for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
- if (arrayObj->VertexAttrib[i].Enabled) {
- min = update_min(min, &arrayObj->VertexAttrib[i]);
- }
- }
- }
-
- if (arrayObj->EdgeFlag.Enabled) {
- min = update_min(min, &arrayObj->EdgeFlag);
- }
-
- /* _MaxElement is one past the last legal array element */
- arrayObj->_MaxElement = min;
-}
-
-
-/**
- * Update the following fields:
- * ctx->VertexProgram._Enabled
- * ctx->FragmentProgram._Enabled
- * ctx->ATIFragmentShader._Enabled
- * This needs to be done before texture state validation.
- */
-static void
-update_program_enables(struct gl_context *ctx)
-{
- /* These _Enabled flags indicate if the program is enabled AND valid. */
- ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled
- && ctx->VertexProgram.Current->Base.Instructions;
- ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled
- && ctx->FragmentProgram.Current->Base.Instructions;
- ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled
- && ctx->ATIFragmentShader.Current->Instructions[0];
-}
-
-
-/**
- * Update vertex/fragment program state. In particular, update these fields:
- * ctx->VertexProgram._Current
- * ctx->VertexProgram._TnlProgram,
- * These point to the highest priority enabled vertex/fragment program or are
- * NULL if fixed-function processing is to be done.
- *
- * This function needs to be called after texture state validation in case
- * we're generating a fragment program from fixed-function texture state.
- *
- * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex
- * or fragment program is being used.
- */
-static GLbitfield
-update_program(struct gl_context *ctx)
-{
- const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram;
- const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram;
- const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram;
- const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
- const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
- const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
- GLbitfield new_state = 0x0;
-
- /*
- * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current
- * pointers to the programs that should be used for rendering. If either
- * is NULL, use fixed-function code paths.
- *
- * These programs may come from several sources. The priority is as
- * follows:
- * 1. OpenGL 2.0/ARB vertex/fragment shaders
- * 2. ARB/NV vertex/fragment programs
- * 3. Programs derived from fixed-function state.
- *
- * Note: it's possible for a vertex shader to get used with a fragment
- * program (and vice versa) here, but in practice that shouldn't ever
- * come up, or matter.
- */
-
- if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) {
- /* Use shader programs */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- fsProg->FragmentProgram);
- }
- else if (ctx->FragmentProgram._Enabled) {
- /* use user-defined vertex program */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- ctx->FragmentProgram.Current);
- }
- else if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- /* Use fragment program generated from fixed-function state.
- */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
- _mesa_get_fixed_func_fragment_program(ctx));
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram,
- ctx->FragmentProgram._Current);
- }
- else {
- /* no fragment program */
- _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL);
- }
-
- if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) {
- /* Use shader programs */
- _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current,
- gsProg->GeometryProgram);
- } else {
- /* no fragment program */
- _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
- }
-
- /* Examine vertex program after fragment program as
- * _mesa_get_fixed_func_vertex_program() needs to know active
- * fragprog inputs.
- */
- if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) {
- /* Use shader programs */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- vsProg->VertexProgram);
- }
- else if (ctx->VertexProgram._Enabled) {
- /* use user-defined vertex program */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- ctx->VertexProgram.Current);
- }
- else if (ctx->VertexProgram._MaintainTnlProgram) {
- /* Use vertex program generated from fixed-function state.
- */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
- _mesa_get_fixed_func_vertex_program(ctx));
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram,
- ctx->VertexProgram._Current);
- }
- else {
- /* no vertex program */
- _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
- }
-
- /* Let the driver know what's happening:
- */
- if (ctx->FragmentProgram._Current != prevFP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
- (struct gl_program *) ctx->FragmentProgram._Current);
- }
- }
-
- if (ctx->GeometryProgram._Current != prevGP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM,
- (struct gl_program *) ctx->GeometryProgram._Current);
- }
- }
-
- if (ctx->VertexProgram._Current != prevVP) {
- new_state |= _NEW_PROGRAM;
- if (ctx->Driver.BindProgram) {
- ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
- (struct gl_program *) ctx->VertexProgram._Current);
- }
- }
-
- return new_state;
-}
-
-
-/**
- * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0.
- */
-static GLbitfield
-update_program_constants(struct gl_context *ctx)
-{
- GLbitfield new_state = 0x0;
-
- if (ctx->FragmentProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->FragmentProgram._Current->Base.Parameters;
- if (params && params->StateFlags & ctx->NewState) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- if (ctx->GeometryProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->GeometryProgram._Current->Base.Parameters;
- /*FIXME: StateFlags is always 0 because we have unnamed constant
- * not state changes */
- if (params /*&& params->StateFlags & ctx->NewState*/) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- if (ctx->VertexProgram._Current) {
- const struct gl_program_parameter_list *params =
- ctx->VertexProgram._Current->Base.Parameters;
- if (params && params->StateFlags & ctx->NewState) {
- new_state |= _NEW_PROGRAM_CONSTANTS;
- }
- }
-
- return new_state;
-}
-
-
-
-
-static void
-update_viewport_matrix(struct gl_context *ctx)
-{
- const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
-
- ASSERT(depthMax > 0);
-
- /* Compute scale and bias values. This is really driver-specific
- * and should be maintained elsewhere if at all.
- * NOTE: RasterPos uses this.
- */
- _math_matrix_viewport(&ctx->Viewport._WindowMap,
- ctx->Viewport.X, ctx->Viewport.Y,
- ctx->Viewport.Width, ctx->Viewport.Height,
- ctx->Viewport.Near, ctx->Viewport.Far,
- depthMax);
-}
-
-
-/**
- * Update derived multisample state.
- */
-static void
-update_multisample(struct gl_context *ctx)
-{
- ctx->Multisample._Enabled = GL_FALSE;
- if (ctx->Multisample.Enabled &&
- ctx->DrawBuffer &&
- ctx->DrawBuffer->Visual.sampleBuffers)
- ctx->Multisample._Enabled = GL_TRUE;
-}
-
-
-/**
- * Update derived color/blend/logicop state.
- */
-static void
-update_color(struct gl_context *ctx)
-{
- /* This is needed to support 1.1's RGB logic ops AND
- * 1.0's blending logicops.
- */
- ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx);
-}
-
-
-/*
- * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET
- * in ctx->_TriangleCaps if needed.
- */
-static void
-update_polygon(struct gl_context *ctx)
-{
- ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET);
-
- if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
-
- if ( ctx->Polygon.OffsetPoint
- || ctx->Polygon.OffsetLine
- || ctx->Polygon.OffsetFill)
- ctx->_TriangleCaps |= DD_TRI_OFFSET;
-}
-
-
-/**
- * Update the ctx->_TriangleCaps bitfield.
- * XXX that bitfield should really go away someday!
- * This function must be called after other update_*() functions since
- * there are dependencies on some other derived values.
- */
-#if 0
-static void
-update_tricaps(struct gl_context *ctx, GLbitfield new_state)
-{
- ctx->_TriangleCaps = 0;
-
- /*
- * Points
- */
- if (1/*new_state & _NEW_POINT*/) {
- if (ctx->Point.SmoothFlag)
- ctx->_TriangleCaps |= DD_POINT_SMOOTH;
- if (ctx->Point._Attenuated)
- ctx->_TriangleCaps |= DD_POINT_ATTEN;
- }
-
- /*
- * Lines
- */
- if (1/*new_state & _NEW_LINE*/) {
- if (ctx->Line.SmoothFlag)
- ctx->_TriangleCaps |= DD_LINE_SMOOTH;
- if (ctx->Line.StippleFlag)
- ctx->_TriangleCaps |= DD_LINE_STIPPLE;
- }
-
- /*
- * Polygons
- */
- if (1/*new_state & _NEW_POLYGON*/) {
- if (ctx->Polygon.SmoothFlag)
- ctx->_TriangleCaps |= DD_TRI_SMOOTH;
- if (ctx->Polygon.StippleFlag)
- ctx->_TriangleCaps |= DD_TRI_STIPPLE;
- if (ctx->Polygon.FrontMode != GL_FILL
- || ctx->Polygon.BackMode != GL_FILL)
- ctx->_TriangleCaps |= DD_TRI_UNFILLED;
- if (ctx->Polygon.CullFlag
- && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
- ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
- if (ctx->Polygon.OffsetPoint ||
- ctx->Polygon.OffsetLine ||
- ctx->Polygon.OffsetFill)
- ctx->_TriangleCaps |= DD_TRI_OFFSET;
- }
-
- /*
- * Lighting and shading
- */
- if (ctx->Light.Enabled && ctx->Light.Model.TwoSide)
- ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE;
- if (ctx->Light.ShadeModel == GL_FLAT)
- ctx->_TriangleCaps |= DD_FLATSHADE;
- if (NEED_SECONDARY_COLOR(ctx))
- ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
-
- /*
- * Stencil
- */
- if (ctx->Stencil._TestTwoSide)
- ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL;
-}
-#endif
-
-
-/**
- * Compute derived GL state.
- * If __struct gl_contextRec::NewState is non-zero then this function \b must
- * be called before rendering anything.
- *
- * Calls dd_function_table::UpdateState to perform any internal state
- * management necessary.
- *
- * \sa _mesa_update_modelview_project(), _mesa_update_texture(),
- * _mesa_update_buffer_bounds(),
- * _mesa_update_lighting() and _mesa_update_tnl_spaces().
- */
-void
-_mesa_update_state_locked( struct gl_context *ctx )
-{
- GLbitfield new_state = ctx->NewState;
- GLbitfield prog_flags = _NEW_PROGRAM;
- GLbitfield new_prog_state = 0x0;
-
- if (new_state == _NEW_CURRENT_ATTRIB)
- goto out;
-
- if (MESA_VERBOSE & VERBOSE_STATE)
- _mesa_print_state("_mesa_update_state", new_state);
-
- /* Determine which state flags effect vertex/fragment program state */
- if (ctx->FragmentProgram._MaintainTexEnvProgram) {
- prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG |
- _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE |
- _NEW_PROGRAM);
- }
- if (ctx->VertexProgram._MaintainTnlProgram) {
- prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
- _NEW_TRANSFORM | _NEW_POINT |
- _NEW_FOG | _NEW_LIGHT |
- _MESA_NEW_NEED_EYE_COORDS);
- }
-
- /*
- * Now update derived state info
- */
-
- if (new_state & prog_flags)
- update_program_enables( ctx );
-
- if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
- _mesa_update_modelview_project( ctx, new_state );
-
- if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX))
- _mesa_update_texture( ctx, new_state );
-
- if (new_state & _NEW_BUFFERS)
- _mesa_update_framebuffer(ctx);
-
- if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
- _mesa_update_draw_buffer_bounds( ctx );
-
- if (new_state & _NEW_POLYGON)
- update_polygon( ctx );
-
- if (new_state & _NEW_LIGHT)
- _mesa_update_lighting( ctx );
-
- if (new_state & (_NEW_STENCIL | _NEW_BUFFERS))
- _mesa_update_stencil( ctx );
-
- if (new_state & _MESA_NEW_TRANSFER_STATE)
- _mesa_update_pixel( ctx, new_state );
-
- if (new_state & _DD_NEW_SEPARATE_SPECULAR)
- update_separate_specular( ctx );
-
- if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT))
- update_viewport_matrix(ctx);
-
- if (new_state & _NEW_MULTISAMPLE)
- update_multisample( ctx );
-
- if (new_state & _NEW_COLOR)
- update_color( ctx );
-
-#if 0
- if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT
- | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR))
- update_tricaps( ctx, new_state );
-#endif
-
- /* ctx->_NeedEyeCoords is now up to date.
- *
- * If the truth value of this variable has changed, update for the
- * new lighting space and recompute the positions of lights and the
- * normal transform.
- *
- * If the lighting space hasn't changed, may still need to recompute
- * light positions & normal transforms for other reasons.
- */
- if (new_state & _MESA_NEW_NEED_EYE_COORDS)
- _mesa_update_tnl_spaces( ctx, new_state );
-
- if (new_state & prog_flags) {
- /* When we generate programs from fixed-function vertex/fragment state
- * this call may generate/bind a new program. If so, we need to
- * propogate the _NEW_PROGRAM flag to the driver.
- */
- new_prog_state |= update_program( ctx );
- }
-
- if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT))
- update_arrays( ctx );
-
- out:
- new_prog_state |= update_program_constants(ctx);
-
- /*
- * Give the driver a chance to act upon the new_state flags.
- * The driver might plug in different span functions, for example.
- * Also, this is where the driver can invalidate the state of any
- * active modules (such as swrast_setup, swrast, tnl, etc).
- *
- * Set ctx->NewState to zero to avoid recursion if
- * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?)
- */
- new_state = ctx->NewState | new_prog_state;
- ctx->NewState = 0;
- ctx->Driver.UpdateState(ctx, new_state);
- ctx->Array.NewState = 0;
-}
-
-
-/* This is the usual entrypoint for state updates:
- */
-void
-_mesa_update_state( struct gl_context *ctx )
-{
- _mesa_lock_context_textures(ctx);
- _mesa_update_state_locked(ctx);
- _mesa_unlock_context_textures(ctx);
-}
-
-
-
-
-/**
- * Want to figure out which fragment program inputs are actually
- * constant/current values from ctx->Current. These should be
- * referenced as a tracked state variable rather than a fragment
- * program input, to save the overhead of putting a constant value in
- * every submitted vertex, transferring it to hardware, interpolating
- * it across the triangle, etc...
- *
- * When there is a VP bound, just use vp->outputs. But when we're
- * generating vp from fixed function state, basically want to
- * calculate:
- *
- * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) |
- * potential_vp_outputs )
- *
- * Where potential_vp_outputs is calculated by looking at enabled
- * texgen, etc.
- *
- * The generated fragment program should then only declare inputs that
- * may vary or otherwise differ from the ctx->Current values.
- * Otherwise, the fp should track them as state values instead.
- */
-void
-_mesa_set_varying_vp_inputs( struct gl_context *ctx,
- GLbitfield varying_inputs )
-{
- if (ctx->varying_vp_inputs != varying_inputs) {
- ctx->varying_vp_inputs = varying_inputs;
- ctx->NewState |= _NEW_ARRAY;
- /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/
- }
-}
-
-
-/**
- * Used by drivers to tell core Mesa that the driver is going to
- * install/ use its own vertex program. In particular, this will
- * prevent generated fragment programs from using state vars instead
- * of ordinary varyings/inputs.
- */
-void
-_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag)
-{
- if (ctx->VertexProgram._Overriden != flag) {
- ctx->VertexProgram._Overriden = flag;
-
- /* Set one of the bits which will trigger fragment program
- * regeneration:
- */
- ctx->NewState |= _NEW_PROGRAM;
- }
-}
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.3
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file state.c
+ * State management.
+ *
+ * This file manages recalculation of derived values in struct gl_context.
+ */
+
+
+#include "glheader.h"
+#include "mtypes.h"
+#include "context.h"
+#include "debug.h"
+#include "macros.h"
+#include "ffvertex_prog.h"
+#include "framebuffer.h"
+#include "light.h"
+#include "matrix.h"
+#include "pixel.h"
+#include "program/program.h"
+#include "program/prog_parameter.h"
+#include "state.h"
+#include "stencil.h"
+#include "texenvprogram.h"
+#include "texobj.h"
+#include "texstate.h"
+
+
+static void
+update_separate_specular(struct gl_context *ctx)
+{
+ if (NEED_SECONDARY_COLOR(ctx))
+ ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
+ else
+ ctx->_TriangleCaps &= ~DD_SEPARATE_SPECULAR;
+}
+
+
+/**
+ * Compute the index of the last array element that can be safely accessed
+ * in a vertex array. We can really only do this when the array lives in
+ * a VBO.
+ * The array->_MaxElement field will be updated.
+ * Later in glDrawArrays/Elements/etc we can do some bounds checking.
+ */
+static void
+compute_max_element(struct gl_client_array *array)
+{
+ assert(array->Enabled);
+ if (array->BufferObj->Name) {
+ GLsizeiptrARB offset = (GLsizeiptrARB) array->Ptr;
+ GLsizeiptrARB obj_size = (GLsizeiptrARB) array->BufferObj->Size;
+
+ if (offset < obj_size) {
+ array->_MaxElement = (obj_size - offset +
+ array->StrideB -
+ array->_ElementSize) / array->StrideB;
+ } else {
+ array->_MaxElement = 0;
+ }
+ }
+ else {
+ /* user-space array, no idea how big it is */
+ array->_MaxElement = 2 * 1000 * 1000 * 1000; /* just a big number */
+ }
+}
+
+
+/**
+ * Helper for update_arrays().
+ * \return min(current min, array->_MaxElement).
+ */
+static GLuint
+update_min(GLuint min, struct gl_client_array *array)
+{
+ compute_max_element(array);
+ return MIN2(min, array->_MaxElement);
+}
+
+
+/**
+ * Update ctx->Array._MaxElement (the max legal index into all enabled arrays).
+ * Need to do this upon new array state or new buffer object state.
+ */
+static void
+update_arrays( struct gl_context *ctx )
+{
+ struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
+ GLuint i, min = ~0;
+
+ /* find min of _MaxElement values for all enabled arrays */
+
+ /* 0 */
+ if (ctx->VertexProgram._Current
+ && arrayObj->VertexAttrib[VERT_ATTRIB_POS].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_POS]);
+ }
+ else if (arrayObj->Vertex.Enabled) {
+ min = update_min(min, &arrayObj->Vertex);
+ }
+
+ /* 1 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_WEIGHT]);
+ }
+ /* no conventional vertex weight array */
+
+ /* 2 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_NORMAL]);
+ }
+ else if (arrayObj->Normal.Enabled) {
+ min = update_min(min, &arrayObj->Normal);
+ }
+
+ /* 3 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR0]);
+ }
+ else if (arrayObj->Color.Enabled) {
+ min = update_min(min, &arrayObj->Color);
+ }
+
+ /* 4 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR1]);
+ }
+ else if (arrayObj->SecondaryColor.Enabled) {
+ min = update_min(min, &arrayObj->SecondaryColor);
+ }
+
+ /* 5 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_FOG].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_FOG]);
+ }
+ else if (arrayObj->FogCoord.Enabled) {
+ min = update_min(min, &arrayObj->FogCoord);
+ }
+
+ /* 6 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_COLOR_INDEX]);
+ }
+ else if (arrayObj->Index.Enabled) {
+ min = update_min(min, &arrayObj->Index);
+ }
+
+ /* 7 */
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[VERT_ATTRIB_EDGEFLAG]);
+ }
+
+ /* 8..15 */
+ for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) {
+ if (ctx->VertexProgram._Enabled
+ && arrayObj->VertexAttrib[i].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[i]);
+ }
+ else if (i - VERT_ATTRIB_TEX0 < ctx->Const.MaxTextureCoordUnits
+ && arrayObj->TexCoord[i - VERT_ATTRIB_TEX0].Enabled) {
+ min = update_min(min, &arrayObj->TexCoord[i - VERT_ATTRIB_TEX0]);
+ }
+ }
+
+ /* 16..31 */
+ if (ctx->VertexProgram._Current) {
+ for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
+ if (arrayObj->VertexAttrib[i].Enabled) {
+ min = update_min(min, &arrayObj->VertexAttrib[i]);
+ }
+ }
+ }
+
+ if (arrayObj->EdgeFlag.Enabled) {
+ min = update_min(min, &arrayObj->EdgeFlag);
+ }
+
+ /* _MaxElement is one past the last legal array element */
+ arrayObj->_MaxElement = min;
+}
+
+
+/**
+ * Update the following fields:
+ * ctx->VertexProgram._Enabled
+ * ctx->FragmentProgram._Enabled
+ * ctx->ATIFragmentShader._Enabled
+ * This needs to be done before texture state validation.
+ */
+static void
+update_program_enables(struct gl_context *ctx)
+{
+ /* These _Enabled flags indicate if the program is enabled AND valid. */
+ ctx->VertexProgram._Enabled = ctx->VertexProgram.Enabled
+ && ctx->VertexProgram.Current->Base.Instructions;
+ ctx->FragmentProgram._Enabled = ctx->FragmentProgram.Enabled
+ && ctx->FragmentProgram.Current->Base.Instructions;
+ ctx->ATIFragmentShader._Enabled = ctx->ATIFragmentShader.Enabled
+ && ctx->ATIFragmentShader.Current->Instructions[0];
+}
+
+
+/**
+ * Update vertex/fragment program state. In particular, update these fields:
+ * ctx->VertexProgram._Current
+ * ctx->VertexProgram._TnlProgram,
+ * These point to the highest priority enabled vertex/fragment program or are
+ * NULL if fixed-function processing is to be done.
+ *
+ * This function needs to be called after texture state validation in case
+ * we're generating a fragment program from fixed-function texture state.
+ *
+ * \return bitfield which will indicate _NEW_PROGRAM state if a new vertex
+ * or fragment program is being used.
+ */
+static GLbitfield
+update_program(struct gl_context *ctx)
+{
+ const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram;
+ const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram;
+ const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram;
+ const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
+ const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
+ const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
+ GLbitfield new_state = 0x0;
+
+ /*
+ * Set the ctx->VertexProgram._Current and ctx->FragmentProgram._Current
+ * pointers to the programs that should be used for rendering. If either
+ * is NULL, use fixed-function code paths.
+ *
+ * These programs may come from several sources. The priority is as
+ * follows:
+ * 1. OpenGL 2.0/ARB vertex/fragment shaders
+ * 2. ARB/NV vertex/fragment programs
+ * 3. Programs derived from fixed-function state.
+ *
+ * Note: it's possible for a vertex shader to get used with a fragment
+ * program (and vice versa) here, but in practice that shouldn't ever
+ * come up, or matter.
+ */
+
+ if (fsProg && fsProg->LinkStatus && fsProg->FragmentProgram) {
+ /* Use shader programs */
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
+ fsProg->FragmentProgram);
+ }
+ else if (ctx->FragmentProgram._Enabled) {
+ /* use user-defined vertex program */
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
+ ctx->FragmentProgram.Current);
+ }
+ else if (ctx->FragmentProgram._MaintainTexEnvProgram) {
+ /* Use fragment program generated from fixed-function state.
+ */
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
+ _mesa_get_fixed_func_fragment_program(ctx));
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram,
+ ctx->FragmentProgram._Current);
+ }
+ else {
+ /* no fragment program */
+ _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL);
+ }
+
+ if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) {
+ /* Use shader programs */
+ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current,
+ gsProg->GeometryProgram);
+ } else {
+ /* no fragment program */
+ _mesa_reference_geomprog(ctx, &ctx->GeometryProgram._Current, NULL);
+ }
+
+ /* Examine vertex program after fragment program as
+ * _mesa_get_fixed_func_vertex_program() needs to know active
+ * fragprog inputs.
+ */
+ if (vsProg && vsProg->LinkStatus && vsProg->VertexProgram) {
+ /* Use shader programs */
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
+ vsProg->VertexProgram);
+ }
+ else if (ctx->VertexProgram._Enabled) {
+ /* use user-defined vertex program */
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
+ ctx->VertexProgram.Current);
+ }
+ else if (ctx->VertexProgram._MaintainTnlProgram) {
+ /* Use vertex program generated from fixed-function state.
+ */
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current,
+ _mesa_get_fixed_func_vertex_program(ctx));
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._TnlProgram,
+ ctx->VertexProgram._Current);
+ }
+ else {
+ /* no vertex program */
+ _mesa_reference_vertprog(ctx, &ctx->VertexProgram._Current, NULL);
+ }
+
+ /* Let the driver know what's happening:
+ */
+ if (ctx->FragmentProgram._Current != prevFP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+ (struct gl_program *) ctx->FragmentProgram._Current);
+ }
+ }
+
+ if (ctx->GeometryProgram._Current != prevGP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, MESA_GEOMETRY_PROGRAM,
+ (struct gl_program *) ctx->GeometryProgram._Current);
+ }
+ }
+
+ if (ctx->VertexProgram._Current != prevVP) {
+ new_state |= _NEW_PROGRAM;
+ if (ctx->Driver.BindProgram) {
+ ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+ (struct gl_program *) ctx->VertexProgram._Current);
+ }
+ }
+
+ return new_state;
+}
+
+
+/**
+ * Examine shader constants and return either _NEW_PROGRAM_CONSTANTS or 0.
+ */
+static GLbitfield
+update_program_constants(struct gl_context *ctx)
+{
+ GLbitfield new_state = 0x0;
+
+ if (ctx->FragmentProgram._Current) {
+ const struct gl_program_parameter_list *params =
+ ctx->FragmentProgram._Current->Base.Parameters;
+ if (params && params->StateFlags & ctx->NewState) {
+ new_state |= _NEW_PROGRAM_CONSTANTS;
+ }
+ }
+
+ if (ctx->GeometryProgram._Current) {
+ const struct gl_program_parameter_list *params =
+ ctx->GeometryProgram._Current->Base.Parameters;
+ /*FIXME: StateFlags is always 0 because we have unnamed constant
+ * not state changes */
+ if (params /*&& params->StateFlags & ctx->NewState*/) {
+ new_state |= _NEW_PROGRAM_CONSTANTS;
+ }
+ }
+
+ if (ctx->VertexProgram._Current) {
+ const struct gl_program_parameter_list *params =
+ ctx->VertexProgram._Current->Base.Parameters;
+ if (params && params->StateFlags & ctx->NewState) {
+ new_state |= _NEW_PROGRAM_CONSTANTS;
+ }
+ }
+
+ return new_state;
+}
+
+
+
+
+static void
+update_viewport_matrix(struct gl_context *ctx)
+{
+ const GLfloat depthMax = ctx->DrawBuffer->_DepthMaxF;
+
+ ASSERT(depthMax > 0);
+
+ /* Compute scale and bias values. This is really driver-specific
+ * and should be maintained elsewhere if at all.
+ * NOTE: RasterPos uses this.
+ */
+ _math_matrix_viewport(&ctx->Viewport._WindowMap,
+ ctx->Viewport.X, ctx->Viewport.Y,
+ ctx->Viewport.Width, ctx->Viewport.Height,
+ ctx->Viewport.Near, ctx->Viewport.Far,
+ depthMax);
+}
+
+
+/**
+ * Update derived multisample state.
+ */
+static void
+update_multisample(struct gl_context *ctx)
+{
+ ctx->Multisample._Enabled = GL_FALSE;
+ if (ctx->Multisample.Enabled &&
+ ctx->DrawBuffer &&
+ ctx->DrawBuffer->Visual.sampleBuffers)
+ ctx->Multisample._Enabled = GL_TRUE;
+}
+
+
+/**
+ * Update derived color/blend/logicop state.
+ */
+static void
+update_color(struct gl_context *ctx)
+{
+ /* This is needed to support 1.1's RGB logic ops AND
+ * 1.0's blending logicops.
+ */
+ ctx->Color._LogicOpEnabled = RGBA_LOGICOP_ENABLED(ctx);
+}
+
+
+/*
+ * Check polygon state and set DD_TRI_CULL_FRONT_BACK and/or DD_TRI_OFFSET
+ * in ctx->_TriangleCaps if needed.
+ */
+static void
+update_polygon(struct gl_context *ctx)
+{
+ ctx->_TriangleCaps &= ~(DD_TRI_CULL_FRONT_BACK | DD_TRI_OFFSET);
+
+ if (ctx->Polygon.CullFlag && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
+
+ if ( ctx->Polygon.OffsetPoint
+ || ctx->Polygon.OffsetLine
+ || ctx->Polygon.OffsetFill)
+ ctx->_TriangleCaps |= DD_TRI_OFFSET;
+}
+
+
+/**
+ * Update the ctx->_TriangleCaps bitfield.
+ * XXX that bitfield should really go away someday!
+ * This function must be called after other update_*() functions since
+ * there are dependencies on some other derived values.
+ */
+#if 0
+static void
+update_tricaps(struct gl_context *ctx, GLbitfield new_state)
+{
+ ctx->_TriangleCaps = 0;
+
+ /*
+ * Points
+ */
+ if (1/*new_state & _NEW_POINT*/) {
+ if (ctx->Point.SmoothFlag)
+ ctx->_TriangleCaps |= DD_POINT_SMOOTH;
+ if (ctx->Point._Attenuated)
+ ctx->_TriangleCaps |= DD_POINT_ATTEN;
+ }
+
+ /*
+ * Lines
+ */
+ if (1/*new_state & _NEW_LINE*/) {
+ if (ctx->Line.SmoothFlag)
+ ctx->_TriangleCaps |= DD_LINE_SMOOTH;
+ if (ctx->Line.StippleFlag)
+ ctx->_TriangleCaps |= DD_LINE_STIPPLE;
+ }
+
+ /*
+ * Polygons
+ */
+ if (1/*new_state & _NEW_POLYGON*/) {
+ if (ctx->Polygon.SmoothFlag)
+ ctx->_TriangleCaps |= DD_TRI_SMOOTH;
+ if (ctx->Polygon.StippleFlag)
+ ctx->_TriangleCaps |= DD_TRI_STIPPLE;
+ if (ctx->Polygon.FrontMode != GL_FILL
+ || ctx->Polygon.BackMode != GL_FILL)
+ ctx->_TriangleCaps |= DD_TRI_UNFILLED;
+ if (ctx->Polygon.CullFlag
+ && ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+ ctx->_TriangleCaps |= DD_TRI_CULL_FRONT_BACK;
+ if (ctx->Polygon.OffsetPoint ||
+ ctx->Polygon.OffsetLine ||
+ ctx->Polygon.OffsetFill)
+ ctx->_TriangleCaps |= DD_TRI_OFFSET;
+ }
+
+ /*
+ * Lighting and shading
+ */
+ if (ctx->Light.Enabled && ctx->Light.Model.TwoSide)
+ ctx->_TriangleCaps |= DD_TRI_LIGHT_TWOSIDE;
+ if (ctx->Light.ShadeModel == GL_FLAT)
+ ctx->_TriangleCaps |= DD_FLATSHADE;
+ if (NEED_SECONDARY_COLOR(ctx))
+ ctx->_TriangleCaps |= DD_SEPARATE_SPECULAR;
+
+ /*
+ * Stencil
+ */
+ if (ctx->Stencil._TestTwoSide)
+ ctx->_TriangleCaps |= DD_TRI_TWOSTENCIL;
+}
+#endif
+
+
+/**
+ * Compute derived GL state.
+ * If __struct gl_contextRec::NewState is non-zero then this function \b must
+ * be called before rendering anything.
+ *
+ * Calls dd_function_table::UpdateState to perform any internal state
+ * management necessary.
+ *
+ * \sa _mesa_update_modelview_project(), _mesa_update_texture(),
+ * _mesa_update_buffer_bounds(),
+ * _mesa_update_lighting() and _mesa_update_tnl_spaces().
+ */
+void
+_mesa_update_state_locked( struct gl_context *ctx )
+{
+ GLbitfield new_state = ctx->NewState;
+ GLbitfield prog_flags = _NEW_PROGRAM;
+ GLbitfield new_prog_state = 0x0;
+
+ if (new_state == _NEW_CURRENT_ATTRIB)
+ goto out;
+
+ if (MESA_VERBOSE & VERBOSE_STATE)
+ _mesa_print_state("_mesa_update_state", new_state);
+
+ /* Determine which state flags effect vertex/fragment program state */
+ if (ctx->FragmentProgram._MaintainTexEnvProgram) {
+ prog_flags |= (_NEW_BUFFERS | _NEW_TEXTURE | _NEW_FOG |
+ _NEW_ARRAY | _NEW_LIGHT | _NEW_POINT | _NEW_RENDERMODE |
+ _NEW_PROGRAM);
+ }
+ if (ctx->VertexProgram._MaintainTnlProgram) {
+ prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX |
+ _NEW_TRANSFORM | _NEW_POINT |
+ _NEW_FOG | _NEW_LIGHT |
+ _MESA_NEW_NEED_EYE_COORDS);
+ }
+
+ /*
+ * Now update derived state info
+ */
+
+ if (new_state & prog_flags)
+ update_program_enables( ctx );
+
+ if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+ _mesa_update_modelview_project( ctx, new_state );
+
+ if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX))
+ _mesa_update_texture( ctx, new_state );
+
+ if (new_state & _NEW_BUFFERS)
+ _mesa_update_framebuffer(ctx);
+
+ if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
+ _mesa_update_draw_buffer_bounds( ctx );
+
+ if (new_state & _NEW_POLYGON)
+ update_polygon( ctx );
+
+ if (new_state & _NEW_LIGHT)
+ _mesa_update_lighting( ctx );
+
+ if (new_state & (_NEW_STENCIL | _NEW_BUFFERS))
+ _mesa_update_stencil( ctx );
+
+ if (new_state & _MESA_NEW_TRANSFER_STATE)
+ _mesa_update_pixel( ctx, new_state );
+
+ if (new_state & _DD_NEW_SEPARATE_SPECULAR)
+ update_separate_specular( ctx );
+
+ if (new_state & (_NEW_BUFFERS | _NEW_VIEWPORT))
+ update_viewport_matrix(ctx);
+
+ if (new_state & _NEW_MULTISAMPLE)
+ update_multisample( ctx );
+
+ if (new_state & _NEW_COLOR)
+ update_color( ctx );
+
+#if 0
+ if (new_state & (_NEW_POINT | _NEW_LINE | _NEW_POLYGON | _NEW_LIGHT
+ | _NEW_STENCIL | _DD_NEW_SEPARATE_SPECULAR))
+ update_tricaps( ctx, new_state );
+#endif
+
+ /* ctx->_NeedEyeCoords is now up to date.
+ *
+ * If the truth value of this variable has changed, update for the
+ * new lighting space and recompute the positions of lights and the
+ * normal transform.
+ *
+ * If the lighting space hasn't changed, may still need to recompute
+ * light positions & normal transforms for other reasons.
+ */
+ if (new_state & _MESA_NEW_NEED_EYE_COORDS)
+ _mesa_update_tnl_spaces( ctx, new_state );
+
+ if (new_state & prog_flags) {
+ /* When we generate programs from fixed-function vertex/fragment state
+ * this call may generate/bind a new program. If so, we need to
+ * propogate the _NEW_PROGRAM flag to the driver.
+ */
+ new_prog_state |= update_program( ctx );
+ }
+
+ if (new_state & (_NEW_ARRAY | _NEW_PROGRAM | _NEW_BUFFER_OBJECT))
+ update_arrays( ctx );
+
+ out:
+ new_prog_state |= update_program_constants(ctx);
+
+ /*
+ * Give the driver a chance to act upon the new_state flags.
+ * The driver might plug in different span functions, for example.
+ * Also, this is where the driver can invalidate the state of any
+ * active modules (such as swrast_setup, swrast, tnl, etc).
+ *
+ * Set ctx->NewState to zero to avoid recursion if
+ * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?)
+ */
+ new_state = ctx->NewState | new_prog_state;
+ ctx->NewState = 0;
+ ctx->Driver.UpdateState(ctx, new_state);
+ ctx->Array.NewState = 0;
+ if (!ctx->Array.RebindArrays)
+ ctx->Array.RebindArrays = (new_state & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
+}
+
+
+/* This is the usual entrypoint for state updates:
+ */
+void
+_mesa_update_state( struct gl_context *ctx )
+{
+ _mesa_lock_context_textures(ctx);
+ _mesa_update_state_locked(ctx);
+ _mesa_unlock_context_textures(ctx);
+}
+
+
+
+
+/**
+ * Want to figure out which fragment program inputs are actually
+ * constant/current values from ctx->Current. These should be
+ * referenced as a tracked state variable rather than a fragment
+ * program input, to save the overhead of putting a constant value in
+ * every submitted vertex, transferring it to hardware, interpolating
+ * it across the triangle, etc...
+ *
+ * When there is a VP bound, just use vp->outputs. But when we're
+ * generating vp from fixed function state, basically want to
+ * calculate:
+ *
+ * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) |
+ * potential_vp_outputs )
+ *
+ * Where potential_vp_outputs is calculated by looking at enabled
+ * texgen, etc.
+ *
+ * The generated fragment program should then only declare inputs that
+ * may vary or otherwise differ from the ctx->Current values.
+ * Otherwise, the fp should track them as state values instead.
+ */
+void
+_mesa_set_varying_vp_inputs( struct gl_context *ctx,
+ GLbitfield varying_inputs )
+{
+ if (ctx->varying_vp_inputs != varying_inputs) {
+ ctx->varying_vp_inputs = varying_inputs;
+ ctx->NewState |= _NEW_ARRAY;
+ /*printf("%s %x\n", __FUNCTION__, varying_inputs);*/
+ }
+}
+
+
+/**
+ * Used by drivers to tell core Mesa that the driver is going to
+ * install/ use its own vertex program. In particular, this will
+ * prevent generated fragment programs from using state vars instead
+ * of ordinary varyings/inputs.
+ */
+void
+_mesa_set_vp_override(struct gl_context *ctx, GLboolean flag)
+{
+ if (ctx->VertexProgram._Overriden != flag) {
+ ctx->VertexProgram._Overriden = flag;
+
+ /* Set one of the bits which will trigger fragment program
+ * regeneration:
+ */
+ ctx->NewState |= _NEW_PROGRAM;
+ }
+}
diff --git a/mesalib/src/mesa/state_tracker/st_atom_blend.c b/mesalib/src/mesa/state_tracker/st_atom_blend.c
index 26bb3dab9..fb1c7a4ef 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_blend.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_blend.c
@@ -1,299 +1,302 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-
-#include "st_context.h"
-#include "st_atom.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "cso_cache/cso_context.h"
-
-#include "main/macros.h"
-
-/**
- * Convert GLenum blend tokens to pipe tokens.
- * Both blend factors and blend funcs are accepted.
- */
-static GLuint
-translate_blend(GLenum blend)
-{
- switch (blend) {
- /* blend functions */
- case GL_FUNC_ADD:
- return PIPE_BLEND_ADD;
- case GL_FUNC_SUBTRACT:
- return PIPE_BLEND_SUBTRACT;
- case GL_FUNC_REVERSE_SUBTRACT:
- return PIPE_BLEND_REVERSE_SUBTRACT;
- case GL_MIN:
- return PIPE_BLEND_MIN;
- case GL_MAX:
- return PIPE_BLEND_MAX;
-
- /* blend factors */
- case GL_ONE:
- return PIPE_BLENDFACTOR_ONE;
- case GL_SRC_COLOR:
- return PIPE_BLENDFACTOR_SRC_COLOR;
- case GL_SRC_ALPHA:
- return PIPE_BLENDFACTOR_SRC_ALPHA;
- case GL_DST_ALPHA:
- return PIPE_BLENDFACTOR_DST_ALPHA;
- case GL_DST_COLOR:
- return PIPE_BLENDFACTOR_DST_COLOR;
- case GL_SRC_ALPHA_SATURATE:
- return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case GL_CONSTANT_COLOR:
- return PIPE_BLENDFACTOR_CONST_COLOR;
- case GL_CONSTANT_ALPHA:
- return PIPE_BLENDFACTOR_CONST_ALPHA;
- /*
- return PIPE_BLENDFACTOR_SRC1_COLOR;
- return PIPE_BLENDFACTOR_SRC1_ALPHA;
- */
- case GL_ZERO:
- return PIPE_BLENDFACTOR_ZERO;
- case GL_ONE_MINUS_SRC_COLOR:
- return PIPE_BLENDFACTOR_INV_SRC_COLOR;
- case GL_ONE_MINUS_SRC_ALPHA:
- return PIPE_BLENDFACTOR_INV_SRC_ALPHA;
- case GL_ONE_MINUS_DST_COLOR:
- return PIPE_BLENDFACTOR_INV_DST_COLOR;
- case GL_ONE_MINUS_DST_ALPHA:
- return PIPE_BLENDFACTOR_INV_DST_ALPHA;
- case GL_ONE_MINUS_CONSTANT_COLOR:
- return PIPE_BLENDFACTOR_INV_CONST_COLOR;
- case GL_ONE_MINUS_CONSTANT_ALPHA:
- return PIPE_BLENDFACTOR_INV_CONST_ALPHA;
- /*
- return PIPE_BLENDFACTOR_INV_SRC1_COLOR;
- return PIPE_BLENDFACTOR_INV_SRC1_ALPHA;
- */
- default:
- assert("invalid GL token in translate_blend()" == NULL);
- return 0;
- }
-}
-
-
-/**
- * Convert GLenum logicop tokens to pipe tokens.
- */
-static GLuint
-translate_logicop(GLenum logicop)
-{
- switch (logicop) {
- case GL_CLEAR:
- return PIPE_LOGICOP_CLEAR;
- case GL_NOR:
- return PIPE_LOGICOP_NOR;
- case GL_AND_INVERTED:
- return PIPE_LOGICOP_AND_INVERTED;
- case GL_COPY_INVERTED:
- return PIPE_LOGICOP_COPY_INVERTED;
- case GL_AND_REVERSE:
- return PIPE_LOGICOP_AND_REVERSE;
- case GL_INVERT:
- return PIPE_LOGICOP_INVERT;
- case GL_XOR:
- return PIPE_LOGICOP_XOR;
- case GL_NAND:
- return PIPE_LOGICOP_NAND;
- case GL_AND:
- return PIPE_LOGICOP_AND;
- case GL_EQUIV:
- return PIPE_LOGICOP_EQUIV;
- case GL_NOOP:
- return PIPE_LOGICOP_NOOP;
- case GL_OR_INVERTED:
- return PIPE_LOGICOP_OR_INVERTED;
- case GL_COPY:
- return PIPE_LOGICOP_COPY;
- case GL_OR_REVERSE:
- return PIPE_LOGICOP_OR_REVERSE;
- case GL_OR:
- return PIPE_LOGICOP_OR;
- case GL_SET:
- return PIPE_LOGICOP_SET;
- default:
- assert("invalid GL token in translate_logicop()" == NULL);
- return 0;
- }
-}
-
-/**
- * Figure out if colormasks are different per rt.
- */
-static GLboolean
-colormask_per_rt(struct gl_context *ctx)
-{
- /* a bit suboptimal have to compare lots of values */
- unsigned i;
- for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) {
- if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-/**
- * Figure out if blend enables/state are different per rt.
- */
-static GLboolean
-blend_per_rt(struct gl_context *ctx)
-{
- if (ctx->Color.BlendEnabled &&
- (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) {
- /* This can only happen if GL_EXT_draw_buffers2 is enabled */
- return GL_TRUE;
- }
- if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) {
- /* this can only happen if GL_ARB_draw_buffers_blend is enabled */
- return GL_TRUE;
- }
- return GL_FALSE;
-}
-
-static void
-update_blend( struct st_context *st )
-{
- struct pipe_blend_state *blend = &st->state.blend;
- unsigned num_state = 1;
- unsigned i;
-
- memset(blend, 0, sizeof(*blend));
-
- if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) {
- num_state = st->ctx->Const.MaxDrawBuffers;
- blend->independent_blend_enable = 1;
- }
- /* Note it is impossible to correctly deal with EXT_blend_logic_op and
- EXT_draw_buffers2/EXT_blend_equation_separate at the same time.
- These combinations would require support for per-rt logicop enables
- and separate alpha/rgb logicop/blend support respectively. Neither
- possible in gallium nor most hardware. Assume these combinations
- don't happen. */
- if (st->ctx->Color.ColorLogicOpEnabled ||
- (st->ctx->Color.BlendEnabled &&
- st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) {
- /* logicop enabled */
- blend->logicop_enable = 1;
- blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp);
- }
- else if (st->ctx->Color.BlendEnabled) {
- /* blending enabled */
- for (i = 0; i < num_state; i++) {
-
- blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1;
-
- blend->rt[i].rgb_func =
- translate_blend(st->ctx->Color.Blend[i].EquationRGB);
-
- if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN ||
- st->ctx->Color.Blend[i].EquationRGB == GL_MAX) {
- /* Min/max are special */
- blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
- }
- else {
- blend->rt[i].rgb_src_factor =
- translate_blend(st->ctx->Color.Blend[i].SrcRGB);
- blend->rt[i].rgb_dst_factor =
- translate_blend(st->ctx->Color.Blend[i].DstRGB);
- }
-
- blend->rt[i].alpha_func =
- translate_blend(st->ctx->Color.Blend[i].EquationA);
-
- if (st->ctx->Color.Blend[i].EquationA == GL_MIN ||
- st->ctx->Color.Blend[i].EquationA == GL_MAX) {
- /* Min/max are special */
- blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
- }
- else {
- blend->rt[i].alpha_src_factor =
- translate_blend(st->ctx->Color.Blend[i].SrcA);
- blend->rt[i].alpha_dst_factor =
- translate_blend(st->ctx->Color.Blend[i].DstA);
- }
- }
- }
- else {
- /* no blending / logicop */
- }
-
- /* Colormask - maybe reverse these bits? */
- for (i = 0; i < num_state; i++) {
- if (st->ctx->Color.ColorMask[i][0])
- blend->rt[i].colormask |= PIPE_MASK_R;
- if (st->ctx->Color.ColorMask[i][1])
- blend->rt[i].colormask |= PIPE_MASK_G;
- if (st->ctx->Color.ColorMask[i][2])
- blend->rt[i].colormask |= PIPE_MASK_B;
- if (st->ctx->Color.ColorMask[i][3])
- blend->rt[i].colormask |= PIPE_MASK_A;
- }
-
- if (st->ctx->Color.DitherFlag)
- blend->dither = 1;
-
- if (st->ctx->Multisample.Enabled) {
- /* unlike in gallium/d3d10 these operations are only performed
- if msaa is enabled */
- if (st->ctx->Multisample.SampleAlphaToCoverage)
- blend->alpha_to_coverage = 1;
- if (st->ctx->Multisample.SampleAlphaToOne)
- blend->alpha_to_one = 1;
- }
-
- cso_set_blend(st->cso_context, blend);
-
- {
- struct pipe_blend_color bc;
- COPY_4FV(bc.color, st->ctx->Color.BlendColor);
- cso_set_blend_color(st->cso_context, &bc);
- }
-}
-
-
-const struct st_tracked_state st_update_blend = {
- "st_update_blend", /* name */
- { /* dirty */
- (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */
- 0, /* st */
- },
- update_blend, /* update */
-};
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+
+#include "st_context.h"
+#include "st_atom.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "cso_cache/cso_context.h"
+
+#include "main/macros.h"
+
+/**
+ * Convert GLenum blend tokens to pipe tokens.
+ * Both blend factors and blend funcs are accepted.
+ */
+static GLuint
+translate_blend(GLenum blend)
+{
+ switch (blend) {
+ /* blend functions */
+ case GL_FUNC_ADD:
+ return PIPE_BLEND_ADD;
+ case GL_FUNC_SUBTRACT:
+ return PIPE_BLEND_SUBTRACT;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ return PIPE_BLEND_REVERSE_SUBTRACT;
+ case GL_MIN:
+ return PIPE_BLEND_MIN;
+ case GL_MAX:
+ return PIPE_BLEND_MAX;
+
+ /* blend factors */
+ case GL_ONE:
+ return PIPE_BLENDFACTOR_ONE;
+ case GL_SRC_COLOR:
+ return PIPE_BLENDFACTOR_SRC_COLOR;
+ case GL_SRC_ALPHA:
+ return PIPE_BLENDFACTOR_SRC_ALPHA;
+ case GL_DST_ALPHA:
+ return PIPE_BLENDFACTOR_DST_ALPHA;
+ case GL_DST_COLOR:
+ return PIPE_BLENDFACTOR_DST_COLOR;
+ case GL_SRC_ALPHA_SATURATE:
+ return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case GL_CONSTANT_COLOR:
+ return PIPE_BLENDFACTOR_CONST_COLOR;
+ case GL_CONSTANT_ALPHA:
+ return PIPE_BLENDFACTOR_CONST_ALPHA;
+ /*
+ return PIPE_BLENDFACTOR_SRC1_COLOR;
+ return PIPE_BLENDFACTOR_SRC1_ALPHA;
+ */
+ case GL_ZERO:
+ return PIPE_BLENDFACTOR_ZERO;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return PIPE_BLENDFACTOR_INV_SRC_COLOR;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+ case GL_ONE_MINUS_DST_COLOR:
+ return PIPE_BLENDFACTOR_INV_DST_COLOR;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return PIPE_BLENDFACTOR_INV_DST_ALPHA;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return PIPE_BLENDFACTOR_INV_CONST_COLOR;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return PIPE_BLENDFACTOR_INV_CONST_ALPHA;
+ /*
+ return PIPE_BLENDFACTOR_INV_SRC1_COLOR;
+ return PIPE_BLENDFACTOR_INV_SRC1_ALPHA;
+ */
+ default:
+ assert("invalid GL token in translate_blend()" == NULL);
+ return 0;
+ }
+}
+
+
+/**
+ * Convert GLenum logicop tokens to pipe tokens.
+ */
+static GLuint
+translate_logicop(GLenum logicop)
+{
+ switch (logicop) {
+ case GL_CLEAR:
+ return PIPE_LOGICOP_CLEAR;
+ case GL_NOR:
+ return PIPE_LOGICOP_NOR;
+ case GL_AND_INVERTED:
+ return PIPE_LOGICOP_AND_INVERTED;
+ case GL_COPY_INVERTED:
+ return PIPE_LOGICOP_COPY_INVERTED;
+ case GL_AND_REVERSE:
+ return PIPE_LOGICOP_AND_REVERSE;
+ case GL_INVERT:
+ return PIPE_LOGICOP_INVERT;
+ case GL_XOR:
+ return PIPE_LOGICOP_XOR;
+ case GL_NAND:
+ return PIPE_LOGICOP_NAND;
+ case GL_AND:
+ return PIPE_LOGICOP_AND;
+ case GL_EQUIV:
+ return PIPE_LOGICOP_EQUIV;
+ case GL_NOOP:
+ return PIPE_LOGICOP_NOOP;
+ case GL_OR_INVERTED:
+ return PIPE_LOGICOP_OR_INVERTED;
+ case GL_COPY:
+ return PIPE_LOGICOP_COPY;
+ case GL_OR_REVERSE:
+ return PIPE_LOGICOP_OR_REVERSE;
+ case GL_OR:
+ return PIPE_LOGICOP_OR;
+ case GL_SET:
+ return PIPE_LOGICOP_SET;
+ default:
+ assert("invalid GL token in translate_logicop()" == NULL);
+ return 0;
+ }
+}
+
+/**
+ * Figure out if colormasks are different per rt.
+ */
+static GLboolean
+colormask_per_rt(struct gl_context *ctx)
+{
+ /* a bit suboptimal have to compare lots of values */
+ unsigned i;
+ for (i = 1; i < ctx->Const.MaxDrawBuffers; i++) {
+ if (memcmp(ctx->Color.ColorMask[0], ctx->Color.ColorMask[i], 4)) {
+ return GL_TRUE;
+ }
+ }
+ return GL_FALSE;
+}
+
+/**
+ * Figure out if blend enables/state are different per rt.
+ */
+static GLboolean
+blend_per_rt(struct gl_context *ctx)
+{
+ if (ctx->Color.BlendEnabled &&
+ (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) {
+ /* This can only happen if GL_EXT_draw_buffers2 is enabled */
+ return GL_TRUE;
+ }
+ if (ctx->Color._BlendFuncPerBuffer || ctx->Color._BlendEquationPerBuffer) {
+ /* this can only happen if GL_ARB_draw_buffers_blend is enabled */
+ return GL_TRUE;
+ }
+ return GL_FALSE;
+}
+
+static void
+update_blend( struct st_context *st )
+{
+ struct pipe_blend_state *blend = &st->state.blend;
+ unsigned num_state = 1;
+ unsigned i, j;
+
+ memset(blend, 0, sizeof(*blend));
+
+ if (blend_per_rt(st->ctx) || colormask_per_rt(st->ctx)) {
+ num_state = st->ctx->Const.MaxDrawBuffers;
+ blend->independent_blend_enable = 1;
+ }
+ /* Note it is impossible to correctly deal with EXT_blend_logic_op and
+ EXT_draw_buffers2/EXT_blend_equation_separate at the same time.
+ These combinations would require support for per-rt logicop enables
+ and separate alpha/rgb logicop/blend support respectively. Neither
+ possible in gallium nor most hardware. Assume these combinations
+ don't happen. */
+ if (st->ctx->Color.ColorLogicOpEnabled ||
+ (st->ctx->Color.BlendEnabled &&
+ st->ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) {
+ /* logicop enabled */
+ blend->logicop_enable = 1;
+ blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp);
+ }
+ else if (st->ctx->Color.BlendEnabled) {
+ /* blending enabled */
+ for (i = 0, j = 0; i < num_state; i++) {
+
+ blend->rt[i].blend_enable = (st->ctx->Color.BlendEnabled >> i) & 0x1;
+
+ if (st->ctx->Extensions.ARB_draw_buffers_blend)
+ j = i;
+
+ blend->rt[i].rgb_func =
+ translate_blend(st->ctx->Color.Blend[j].EquationRGB);
+
+ if (st->ctx->Color.Blend[i].EquationRGB == GL_MIN ||
+ st->ctx->Color.Blend[i].EquationRGB == GL_MAX) {
+ /* Min/max are special */
+ blend->rt[i].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend->rt[i].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+ }
+ else {
+ blend->rt[i].rgb_src_factor =
+ translate_blend(st->ctx->Color.Blend[j].SrcRGB);
+ blend->rt[i].rgb_dst_factor =
+ translate_blend(st->ctx->Color.Blend[j].DstRGB);
+ }
+
+ blend->rt[i].alpha_func =
+ translate_blend(st->ctx->Color.Blend[j].EquationA);
+
+ if (st->ctx->Color.Blend[i].EquationA == GL_MIN ||
+ st->ctx->Color.Blend[i].EquationA == GL_MAX) {
+ /* Min/max are special */
+ blend->rt[i].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend->rt[i].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+ }
+ else {
+ blend->rt[i].alpha_src_factor =
+ translate_blend(st->ctx->Color.Blend[j].SrcA);
+ blend->rt[i].alpha_dst_factor =
+ translate_blend(st->ctx->Color.Blend[j].DstA);
+ }
+ }
+ }
+ else {
+ /* no blending / logicop */
+ }
+
+ /* Colormask - maybe reverse these bits? */
+ for (i = 0; i < num_state; i++) {
+ if (st->ctx->Color.ColorMask[i][0])
+ blend->rt[i].colormask |= PIPE_MASK_R;
+ if (st->ctx->Color.ColorMask[i][1])
+ blend->rt[i].colormask |= PIPE_MASK_G;
+ if (st->ctx->Color.ColorMask[i][2])
+ blend->rt[i].colormask |= PIPE_MASK_B;
+ if (st->ctx->Color.ColorMask[i][3])
+ blend->rt[i].colormask |= PIPE_MASK_A;
+ }
+
+ if (st->ctx->Color.DitherFlag)
+ blend->dither = 1;
+
+ if (st->ctx->Multisample.Enabled) {
+ /* unlike in gallium/d3d10 these operations are only performed
+ if msaa is enabled */
+ if (st->ctx->Multisample.SampleAlphaToCoverage)
+ blend->alpha_to_coverage = 1;
+ if (st->ctx->Multisample.SampleAlphaToOne)
+ blend->alpha_to_one = 1;
+ }
+
+ cso_set_blend(st->cso_context, blend);
+
+ {
+ struct pipe_blend_color bc;
+ COPY_4FV(bc.color, st->ctx->Color.BlendColor);
+ cso_set_blend_color(st->cso_context, &bc);
+ }
+}
+
+
+const struct st_tracked_state st_update_blend = {
+ "st_update_blend", /* name */
+ { /* dirty */
+ (_NEW_COLOR | _NEW_MULTISAMPLE), /* XXX _NEW_BLEND someday? */ /* mesa */
+ 0, /* st */
+ },
+ update_blend, /* update */
+};
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
index ddd130a81..0ea567155 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
@@ -1,890 +1,893 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Brian Paul
- */
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_atom_constbuf.h"
-#include "st_program.h"
-#include "st_cb_bitmap.h"
-#include "st_texture.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_inlines.h"
-#include "util/u_draw_quad.h"
-#include "util/u_simple_shaders.h"
-#include "program/prog_instruction.h"
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_drawpix
-
-/**
- * glBitmaps are drawn as textured quads. The user's bitmap pattern
- * is stored in a texture image. An alpha8 texture format is used.
- * The fragment shader samples a bit (texel) from the texture, then
- * discards the fragment if the bit is off.
- *
- * Note that we actually store the inverse image of the bitmap to
- * simplify the fragment program. An "on" bit gets stored as texel=0x0
- * and an "off" bit is stored as texel=0xff. Then we kill the
- * fragment if the negated texel value is less than zero.
- */
-
-
-/**
- * The bitmap cache attempts to accumulate multiple glBitmap calls in a
- * buffer which is then rendered en mass upon a flush, state change, etc.
- * A wide, short buffer is used to target the common case of a series
- * of glBitmap calls being used to draw text.
- */
-static GLboolean UseBitmapCache = GL_TRUE;
-
-
-#define BITMAP_CACHE_WIDTH 512
-#define BITMAP_CACHE_HEIGHT 32
-
-struct bitmap_cache
-{
- /** Window pos to render the cached image */
- GLint xpos, ypos;
- /** Bounds of region used in window coords */
- GLint xmin, ymin, xmax, ymax;
-
- GLfloat color[4];
-
- /** Bitmap's Z position */
- GLfloat zpos;
-
- struct pipe_resource *texture;
- struct pipe_transfer *trans;
-
- GLboolean empty;
-
- /** An I8 texture image: */
- ubyte *buffer;
-};
-
-
-/** Epsilon for Z comparisons */
-#define Z_EPSILON 1e-06
-
-
-/**
- * Make fragment program for glBitmap:
- * Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
- struct st_context *st = st_context(ctx);
- struct st_fragment_program *stfp;
- struct gl_program *p;
- GLuint ic = 0;
-
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = 3;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
- p->Instructions[ic].DstReg.Index = 0;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = samplerIndex;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
-
- /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
- p->Instructions[ic].Opcode = OPCODE_KIL;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
- if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
- p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
- p->Instructions[ic].SrcReg[0].Index = 0;
- p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
- ic++;
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = FRAG_BIT_TEX0;
- p->OutputsWritten = 0x0;
- p->SamplersUsed = (1 << samplerIndex);
-
- stfp = (struct st_fragment_program *) p;
- stfp->Base.UsesKill = GL_TRUE;
-
- return stfp;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
- int i;
- for (i = 0; i < 32; i++) {
- if ((bitfield & (1 << i)) == 0) {
- return i;
- }
- }
- return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st current context
- * \param fpIn the incoming fragment program
- * \param fpOut the new fragment program which does fragment culling
- * \param bitmap_sampler sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut,
- GLuint *bitmap_sampler)
-{
- struct st_fragment_program *bitmap_prog;
- struct gl_program *newProg;
- uint sampler;
-
- /*
- * Generate new program which is the user-defined program prefixed
- * with the bitmap sampler/kill instructions.
- */
- sampler = find_free_bit(fpIn->Base.SamplersUsed);
- bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
- newProg = _mesa_combine_programs(st->ctx,
- &bitmap_prog->Base.Base,
- &fpIn->Base);
- /* done with this after combining */
- st_reference_fragprog(st, &bitmap_prog, NULL);
-
-#if 0
- {
- printf("Combined bitmap program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
- }
-#endif
-
- /* return results */
- *fpOut = (struct gl_fragment_program *) newProg;
- *bitmap_sampler = sampler;
-}
-
-
-/**
- * Copy user-provide bitmap bits into texture buffer, expanding
- * bits into texels.
- * "On" bits will set texels to 0x0.
- * "Off" bits will not modify texels.
- * Note that the image is actually going to be upside down in
- * the texture. We deal with that with texcoords.
- */
-static void
-unpack_bitmap(struct st_context *st,
- GLint px, GLint py, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap,
- ubyte *destBuffer, uint destStride)
-{
- destBuffer += py * destStride + px;
-
- _mesa_expand_bitmap(width, height, unpack, bitmap,
- destBuffer, destStride, 0x0);
-}
-
-
-/**
- * Create a texture which represents a bitmap image.
- */
-static struct pipe_resource *
-make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_transfer *transfer;
- ubyte *dest;
- struct pipe_resource *pt;
-
- /* PBO source... */
- bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
- if (!bitmap) {
- return NULL;
- }
-
- /**
- * Create texture to hold bitmap pattern.
- */
- pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
- 0, width, height, 1, 1,
- PIPE_BIND_SAMPLER_VIEW);
- if (!pt) {
- _mesa_unmap_pbo_source(ctx, unpack);
- return NULL;
- }
-
- transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
- PIPE_TRANSFER_WRITE,
- 0, 0, width, height);
-
- dest = pipe_transfer_map(pipe, transfer);
-
- /* Put image into texture transfer */
- memset(dest, 0xff, height * transfer->stride);
- unpack_bitmap(st, 0, 0, width, height, unpack, bitmap,
- dest, transfer->stride);
-
- _mesa_unmap_pbo_source(ctx, unpack);
-
- /* Release transfer */
- pipe_transfer_unmap(pipe, transfer);
- pipe->transfer_destroy(pipe, transfer);
-
- return pt;
-}
-
-static GLuint
-setup_bitmap_vertex_data(struct st_context *st, bool normalized,
- int x, int y, int width, int height,
- float z, const float color[4])
-{
- struct pipe_context *pipe = st->pipe;
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat)fb->Width;
- const GLfloat fb_height = (GLfloat)fb->Height;
- const GLfloat x0 = (GLfloat)x;
- const GLfloat x1 = (GLfloat)(x + width);
- const GLfloat y0 = (GLfloat)y;
- const GLfloat y1 = (GLfloat)(y + height);
- GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
- GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
- const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
- const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
- const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
- GLuint i;
-
- if(!normalized)
- {
- sRight = width;
- tBot = height;
- }
-
- /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
- * no_flush) updates to buffers where we know there is no conflict
- * with previous data. Currently using max_slots > 1 will cause
- * synchronous rendering if the driver flushes its command buffers
- * between one bitmap and the next. Our flush hook below isn't
- * sufficient to catch this as the driver doesn't tell us when it
- * flushes its own command buffers. Until this gets fixed, pay the
- * price of allocating a new buffer for each bitmap cache-flush to
- * avoid synchronous rendering.
- */
- if (st->bitmap.vbuf_slot >= max_slots) {
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf_slot = 0;
- }
-
- if (!st->bitmap.vbuf) {
- st->bitmap.vbuf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- max_slots *
- sizeof(st->bitmap.vertices));
- }
-
- /* Positions are in clip coords since we need to do clipping in case
- * the bitmap quad goes beyond the window bounds.
- */
- st->bitmap.vertices[0][0][0] = clip_x0;
- st->bitmap.vertices[0][0][1] = clip_y0;
- st->bitmap.vertices[0][2][0] = sLeft;
- st->bitmap.vertices[0][2][1] = tTop;
-
- st->bitmap.vertices[1][0][0] = clip_x1;
- st->bitmap.vertices[1][0][1] = clip_y0;
- st->bitmap.vertices[1][2][0] = sRight;
- st->bitmap.vertices[1][2][1] = tTop;
-
- st->bitmap.vertices[2][0][0] = clip_x1;
- st->bitmap.vertices[2][0][1] = clip_y1;
- st->bitmap.vertices[2][2][0] = sRight;
- st->bitmap.vertices[2][2][1] = tBot;
-
- st->bitmap.vertices[3][0][0] = clip_x0;
- st->bitmap.vertices[3][0][1] = clip_y1;
- st->bitmap.vertices[3][2][0] = sLeft;
- st->bitmap.vertices[3][2][1] = tBot;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- st->bitmap.vertices[i][0][2] = z;
- st->bitmap.vertices[i][0][3] = 1.0;
- st->bitmap.vertices[i][1][0] = color[0];
- st->bitmap.vertices[i][1][1] = color[1];
- st->bitmap.vertices[i][1][2] = color[2];
- st->bitmap.vertices[i][1][3] = color[3];
- st->bitmap.vertices[i][2][2] = 0.0; /*R*/
- st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
- }
-
- /* put vertex data into vbuf */
- pipe_buffer_write_nooverlap(st->pipe,
- st->bitmap.vbuf,
- st->bitmap.vbuf_slot
- * sizeof(st->bitmap.vertices),
- sizeof st->bitmap.vertices,
- st->bitmap.vertices);
-
- return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
-}
-
-
-
-/**
- * Render a glBitmap by drawing a textured quad
- */
-static void
-draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
- GLsizei width, GLsizei height,
- struct pipe_sampler_view *sv,
- const GLfloat *color)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = st->cso_context;
- struct st_fp_variant *fpv;
- struct st_fp_variant_key key;
- GLuint maxSize;
- GLuint offset;
-
- memset(&key, 0, sizeof(key));
- key.st = st;
- key.bitmap = GL_TRUE;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- /* As an optimization, Mesa's fragment programs will sometimes get the
- * primary color from a statevar/constant rather than a varying variable.
- * when that's the case, we need to ensure that we use the 'color'
- * parameter and not the current attribute color (which may have changed
- * through glRasterPos and state validation.
- * So, we force the proper color here. Not elegant, but it works.
- */
- {
- GLfloat colorSave[4];
- COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
- COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
- COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
- }
-
-
- /* limit checks */
- /* XXX if the bitmap is larger than the max texture size, break
- * it up into chunks.
- */
- maxSize = 1 << (pipe->screen->get_param(pipe->screen,
- PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
- assert(width <= (GLsizei)maxSize);
- assert(height <= (GLsizei)maxSize);
-
- cso_save_rasterizer(cso);
- cso_save_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_viewport(cso);
- cso_save_fragment_shader(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
-
- /* rasterizer state: just scissor */
- st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
- cso_set_rasterizer(cso, &st->bitmap.rasterizer);
-
- /* fragment shader state: TEX lookup program */
- cso_set_fragment_shader_handle(cso, fpv->driver_shader);
-
- /* vertex shader state: position + texcoord pass-through */
- cso_set_vertex_shader_handle(cso, st->bitmap.vs);
-
- /* user samplers, plus our bitmap sampler */
- {
- struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
- uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
- uint i;
- for (i = 0; i < st->state.num_samplers; i++) {
- samplers[i] = &st->state.samplers[i];
- }
- samplers[fpv->bitmap_sampler] =
- &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
- cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
- }
-
- /* user textures, plus the bitmap texture */
- {
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
- uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
- memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
- sampler_views[fpv->bitmap_sampler] = sv;
- cso_set_fragment_sampler_views(cso, num, sampler_views);
- }
-
- /* viewport state: viewport matching window dims */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- const GLfloat width = (GLfloat)fb->Width;
- const GLfloat height = (GLfloat)fb->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 0.5f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.5f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
-
- /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
-
- /* draw textured quad */
- offset = setup_bitmap_vertex_data(st,
- sv->texture->target != PIPE_TEXTURE_RECT,
- x, y, width, height, z, color);
-
- util_draw_vertex_buffer(pipe, st->bitmap.vbuf, offset,
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 3); /* attribs/vert */
-
-
- /* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_viewport(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
-}
-
-
-static void
-reset_cache(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
- cache->empty = GL_TRUE;
-
- cache->xmin = 1000000;
- cache->xmax = -1000000;
- cache->ymin = 1000000;
- cache->ymax = -1000000;
-
- if (cache->trans) {
- pipe->transfer_destroy(pipe, cache->trans);
- cache->trans = NULL;
- }
-
- assert(!cache->texture);
-
- /* allocate a new texture */
- cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
- st->bitmap.tex_format, 0,
- BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
- 1, 1,
- PIPE_BIND_SAMPLER_VIEW);
-}
-
-
-/** Print bitmap image to stdout (debug) */
-static void
-print_cache(const struct bitmap_cache *cache)
-{
- int i, j, k;
-
- for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) {
- k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1);
- for (j = 0; j < BITMAP_CACHE_WIDTH; j++) {
- if (cache->buffer[k])
- printf("X");
- else
- printf(" ");
- k++;
- }
- printf("\n");
- }
-}
-
-
-/**
- * Create gallium pipe_transfer object for the bitmap cache.
- */
-static void
-create_cache_trans(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (cache->trans)
- return;
-
- /* Map the texture transfer.
- * Subsequent glBitmap calls will write into the texture image.
- */
- cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0,
- BITMAP_CACHE_WIDTH,
- BITMAP_CACHE_HEIGHT);
- cache->buffer = pipe_transfer_map(pipe, cache->trans);
-
- /* init image to all 0xff */
- memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
-}
-
-
-/**
- * If there's anything in the bitmap cache, draw/flush it now.
- */
-void
-st_flush_bitmap_cache(struct st_context *st)
-{
- if (!st->bitmap.cache->empty) {
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (st->ctx->DrawBuffer) {
- struct pipe_context *pipe = st->pipe;
- struct pipe_sampler_view *sv;
-
- assert(cache->xmin <= cache->xmax);
-
-/* printf("flush size %d x %d at %d, %d\n",
- cache->xmax - cache->xmin,
- cache->ymax - cache->ymin,
- cache->xpos, cache->ypos);
-*/
-
- /* The texture transfer has been mapped until now.
- * So unmap and release the texture transfer before drawing.
- */
- if (cache->trans) {
- if (0)
- print_cache(cache);
- pipe_transfer_unmap(pipe, cache->trans);
- cache->buffer = NULL;
-
- pipe->transfer_destroy(pipe, cache->trans);
- cache->trans = NULL;
- }
-
- sv = st_create_texture_sampler_view(st->pipe, cache->texture);
- if (sv) {
- draw_bitmap_quad(st->ctx,
- cache->xpos,
- cache->ypos,
- cache->zpos,
- BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
- sv,
- cache->color);
-
- pipe_sampler_view_reference(&sv, NULL);
- }
- }
-
- /* release/free the texture */
- pipe_resource_reference(&cache->texture, NULL);
-
- reset_cache(st);
- }
-}
-
-
-/**
- * Flush bitmap cache and release vertex buffer.
- */
-void
-st_flush_bitmap( struct st_context *st )
-{
- st_flush_bitmap_cache(st);
-
- /* Release vertex buffer to avoid synchronous rendering if we were
- * to map it in the next frame.
- */
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf_slot = 0;
-}
-
-
-/**
- * Try to accumulate this glBitmap call in the bitmap cache.
- * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
- */
-static GLboolean
-accum_bitmap(struct st_context *st,
- GLint x, GLint y, GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack,
- const GLubyte *bitmap )
-{
- struct bitmap_cache *cache = st->bitmap.cache;
- int px = -999, py = -999;
- const GLfloat z = st->ctx->Current.RasterPos[2];
-
- if (width > BITMAP_CACHE_WIDTH ||
- height > BITMAP_CACHE_HEIGHT)
- return GL_FALSE; /* too big to cache */
-
- if (!cache->empty) {
- px = x - cache->xpos; /* pos in buffer */
- py = y - cache->ypos;
- if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
- py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
- !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
- ((fabs(z - cache->zpos) > Z_EPSILON))) {
- /* This bitmap would extend beyond cache bounds, or the bitmap
- * color is changing
- * so flush and continue.
- */
- st_flush_bitmap_cache(st);
- }
- }
-
- if (cache->empty) {
- /* Initialize. Center bitmap vertically in the buffer. */
- px = 0;
- py = (BITMAP_CACHE_HEIGHT - height) / 2;
- cache->xpos = x;
- cache->ypos = y - py;
- cache->zpos = z;
- cache->empty = GL_FALSE;
- COPY_4FV(cache->color, st->ctx->Current.RasterColor);
- }
-
- assert(px != -999);
- assert(py != -999);
-
- if (x < cache->xmin)
- cache->xmin = x;
- if (y < cache->ymin)
- cache->ymin = y;
- if (x + width > cache->xmax)
- cache->xmax = x + width;
- if (y + height > cache->ymax)
- cache->ymax = y + height;
-
- /* create the transfer if needed */
- create_cache_trans(st);
-
- unpack_bitmap(st, px, py, width, height, unpack, bitmap,
- cache->buffer, BITMAP_CACHE_WIDTH);
-
- return GL_TRUE; /* accumulated */
-}
-
-
-
-/**
- * Called via ctx->Driver.Bitmap()
- */
-static void
-st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height,
- const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
-{
- struct st_context *st = st_context(ctx);
- struct pipe_resource *pt;
-
- if (width == 0 || height == 0)
- return;
-
- st_validate_state(st);
-
- if (!st->bitmap.vs) {
- /* create pass-through vertex shader now */
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_COLOR,
- TGSI_SEMANTIC_GENERIC };
- const uint semantic_indexes[] = { 0, 0, 0 };
- st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
- semantic_names,
- semantic_indexes);
- }
-
- if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
- return;
-
- pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
- if (pt) {
- struct pipe_sampler_view *sv =
- st_create_texture_sampler_view(st->pipe, pt);
-
- assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
-
- if (sv) {
- draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height, sv,
- st->ctx->Current.RasterColor);
-
- pipe_sampler_view_reference(&sv, NULL);
- }
-
- /* release/free the texture */
- pipe_resource_reference(&pt, NULL);
- }
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap_functions(struct dd_function_table *functions)
-{
- functions->Bitmap = st_Bitmap;
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap(struct st_context *st)
-{
- struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
-
- /* init sampler state once */
- memset(sampler, 0, sizeof(*sampler));
- sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
- sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
- sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
- sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- st->bitmap.samplers[1] = *sampler;
- st->bitmap.samplers[1].normalized_coords = 1;
-
- /* init baseline rasterizer state once */
- memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
- st->bitmap.rasterizer.gl_rasterization_rules = 1;
-
- /* find a usable texture format */
- if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
- }
- else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
- }
- else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
- PIPE_TEXTURE_2D, 0,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
- }
- else {
- /* XXX support more formats */
- assert(0);
- }
-
- /* alloc bitmap cache object */
- st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
-
- reset_cache(st);
-}
-
-
-/** Per-context tear-down */
-void
-st_destroy_bitmap(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct bitmap_cache *cache = st->bitmap.cache;
-
- if (st->bitmap.vs) {
- cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
- st->bitmap.vs = NULL;
- }
-
- if (st->bitmap.vbuf) {
- pipe_resource_reference(&st->bitmap.vbuf, NULL);
- st->bitmap.vbuf = NULL;
- }
-
- if (cache) {
- if (cache->trans) {
- pipe_transfer_unmap(pipe, cache->trans);
- pipe->transfer_destroy(pipe, cache->trans);
- }
- pipe_resource_reference(&st->bitmap.cache->texture, NULL);
- free(st->bitmap.cache);
- st->bitmap.cache = NULL;
- }
-}
-
-#endif /* FEATURE_drawpix */
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Brian Paul
+ */
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "program/program.h"
+#include "program/prog_print.h"
+
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_atom_constbuf.h"
+#include "st_program.h"
+#include "st_cb_bitmap.h"
+#include "st_texture.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+#include "util/u_draw_quad.h"
+#include "util/u_simple_shaders.h"
+#include "program/prog_instruction.h"
+#include "cso_cache/cso_context.h"
+
+
+#if FEATURE_drawpix
+
+/**
+ * glBitmaps are drawn as textured quads. The user's bitmap pattern
+ * is stored in a texture image. An alpha8 texture format is used.
+ * The fragment shader samples a bit (texel) from the texture, then
+ * discards the fragment if the bit is off.
+ *
+ * Note that we actually store the inverse image of the bitmap to
+ * simplify the fragment program. An "on" bit gets stored as texel=0x0
+ * and an "off" bit is stored as texel=0xff. Then we kill the
+ * fragment if the negated texel value is less than zero.
+ */
+
+
+/**
+ * The bitmap cache attempts to accumulate multiple glBitmap calls in a
+ * buffer which is then rendered en mass upon a flush, state change, etc.
+ * A wide, short buffer is used to target the common case of a series
+ * of glBitmap calls being used to draw text.
+ */
+static GLboolean UseBitmapCache = GL_TRUE;
+
+
+#define BITMAP_CACHE_WIDTH 512
+#define BITMAP_CACHE_HEIGHT 32
+
+struct bitmap_cache
+{
+ /** Window pos to render the cached image */
+ GLint xpos, ypos;
+ /** Bounds of region used in window coords */
+ GLint xmin, ymin, xmax, ymax;
+
+ GLfloat color[4];
+
+ /** Bitmap's Z position */
+ GLfloat zpos;
+
+ struct pipe_resource *texture;
+ struct pipe_transfer *trans;
+
+ GLboolean empty;
+
+ /** An I8 texture image: */
+ ubyte *buffer;
+};
+
+
+/** Epsilon for Z comparisons */
+#define Z_EPSILON 1e-06
+
+
+/**
+ * Make fragment program for glBitmap:
+ * Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ */
+static struct st_fragment_program *
+make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_fragment_program *stfp;
+ struct gl_program *p;
+ GLuint ic = 0;
+
+ p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+ if (!p)
+ return NULL;
+
+ p->NumInstructions = 3;
+
+ p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
+ if (!p->Instructions) {
+ ctx->Driver.DeleteProgram(ctx, p);
+ return NULL;
+ }
+ _mesa_init_instructions(p->Instructions, p->NumInstructions);
+
+ /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+ p->Instructions[ic].Opcode = OPCODE_TEX;
+ p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
+ p->Instructions[ic].DstReg.Index = 0;
+ p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+ p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+ p->Instructions[ic].TexSrcUnit = samplerIndex;
+ p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+ ic++;
+
+ /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+ p->Instructions[ic].Opcode = OPCODE_KIL;
+ p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
+
+ if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+ p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
+
+ p->Instructions[ic].SrcReg[0].Index = 0;
+ p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
+ ic++;
+
+ /* END; */
+ p->Instructions[ic++].Opcode = OPCODE_END;
+
+ assert(ic == p->NumInstructions);
+
+ p->InputsRead = FRAG_BIT_TEX0;
+ p->OutputsWritten = 0x0;
+ p->SamplersUsed = (1 << samplerIndex);
+
+ stfp = (struct st_fragment_program *) p;
+ stfp->Base.UsesKill = GL_TRUE;
+
+ return stfp;
+}
+
+
+static int
+find_free_bit(uint bitfield)
+{
+ int i;
+ for (i = 0; i < 32; i++) {
+ if ((bitfield & (1 << i)) == 0) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+/**
+ * Combine basic bitmap fragment program with the user-defined program.
+ * \param st current context
+ * \param fpIn the incoming fragment program
+ * \param fpOut the new fragment program which does fragment culling
+ * \param bitmap_sampler sampler number for the bitmap texture
+ */
+void
+st_make_bitmap_fragment_program(struct st_context *st,
+ struct gl_fragment_program *fpIn,
+ struct gl_fragment_program **fpOut,
+ GLuint *bitmap_sampler)
+{
+ struct st_fragment_program *bitmap_prog;
+ struct gl_program *newProg;
+ uint sampler;
+
+ /*
+ * Generate new program which is the user-defined program prefixed
+ * with the bitmap sampler/kill instructions.
+ */
+ sampler = find_free_bit(fpIn->Base.SamplersUsed);
+ bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+
+ newProg = _mesa_combine_programs(st->ctx,
+ &bitmap_prog->Base.Base,
+ &fpIn->Base);
+ /* done with this after combining */
+ st_reference_fragprog(st, &bitmap_prog, NULL);
+
+#if 0
+ {
+ printf("Combined bitmap program:\n");
+ _mesa_print_program(newProg);
+ printf("InputsRead: 0x%x\n", newProg->InputsRead);
+ printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
+ _mesa_print_parameter_list(newProg->Parameters);
+ }
+#endif
+
+ /* return results */
+ *fpOut = (struct gl_fragment_program *) newProg;
+ *bitmap_sampler = sampler;
+}
+
+
+/**
+ * Copy user-provide bitmap bits into texture buffer, expanding
+ * bits into texels.
+ * "On" bits will set texels to 0x0.
+ * "Off" bits will not modify texels.
+ * Note that the image is actually going to be upside down in
+ * the texture. We deal with that with texcoords.
+ */
+static void
+unpack_bitmap(struct st_context *st,
+ GLint px, GLint py, GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap,
+ ubyte *destBuffer, uint destStride)
+{
+ destBuffer += py * destStride + px;
+
+ _mesa_expand_bitmap(width, height, unpack, bitmap,
+ destBuffer, destStride, 0x0);
+}
+
+
+/**
+ * Create a texture which represents a bitmap image.
+ */
+static struct pipe_resource *
+make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_transfer *transfer;
+ ubyte *dest;
+ struct pipe_resource *pt;
+
+ /* PBO source... */
+ bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
+ if (!bitmap) {
+ return NULL;
+ }
+
+ /**
+ * Create texture to hold bitmap pattern.
+ */
+ pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
+ 0, width, height, 1, 1,
+ PIPE_BIND_SAMPLER_VIEW);
+ if (!pt) {
+ _mesa_unmap_pbo_source(ctx, unpack);
+ return NULL;
+ }
+
+ transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
+ PIPE_TRANSFER_WRITE,
+ 0, 0, width, height);
+
+ dest = pipe_transfer_map(pipe, transfer);
+
+ /* Put image into texture transfer */
+ memset(dest, 0xff, height * transfer->stride);
+ unpack_bitmap(st, 0, 0, width, height, unpack, bitmap,
+ dest, transfer->stride);
+
+ _mesa_unmap_pbo_source(ctx, unpack);
+
+ /* Release transfer */
+ pipe_transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
+
+ return pt;
+}
+
+static GLuint
+setup_bitmap_vertex_data(struct st_context *st, bool normalized,
+ int x, int y, int width, int height,
+ float z, const float color[4])
+{
+ struct pipe_context *pipe = st->pipe;
+ const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const GLfloat fb_width = (GLfloat)fb->Width;
+ const GLfloat fb_height = (GLfloat)fb->Height;
+ const GLfloat x0 = (GLfloat)x;
+ const GLfloat x1 = (GLfloat)(x + width);
+ const GLfloat y0 = (GLfloat)y;
+ const GLfloat y1 = (GLfloat)(y + height);
+ GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
+ GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
+ const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
+ const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
+ const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
+ const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
+ const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
+ GLuint i;
+
+ if(!normalized)
+ {
+ sRight = width;
+ tBot = height;
+ }
+
+ /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
+ * no_flush) updates to buffers where we know there is no conflict
+ * with previous data. Currently using max_slots > 1 will cause
+ * synchronous rendering if the driver flushes its command buffers
+ * between one bitmap and the next. Our flush hook below isn't
+ * sufficient to catch this as the driver doesn't tell us when it
+ * flushes its own command buffers. Until this gets fixed, pay the
+ * price of allocating a new buffer for each bitmap cache-flush to
+ * avoid synchronous rendering.
+ */
+ if (st->bitmap.vbuf_slot >= max_slots) {
+ pipe_resource_reference(&st->bitmap.vbuf, NULL);
+ st->bitmap.vbuf_slot = 0;
+ }
+
+ if (!st->bitmap.vbuf) {
+ st->bitmap.vbuf = pipe_buffer_create(pipe->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
+ max_slots *
+ sizeof(st->bitmap.vertices));
+ }
+
+ /* Positions are in clip coords since we need to do clipping in case
+ * the bitmap quad goes beyond the window bounds.
+ */
+ st->bitmap.vertices[0][0][0] = clip_x0;
+ st->bitmap.vertices[0][0][1] = clip_y0;
+ st->bitmap.vertices[0][2][0] = sLeft;
+ st->bitmap.vertices[0][2][1] = tTop;
+
+ st->bitmap.vertices[1][0][0] = clip_x1;
+ st->bitmap.vertices[1][0][1] = clip_y0;
+ st->bitmap.vertices[1][2][0] = sRight;
+ st->bitmap.vertices[1][2][1] = tTop;
+
+ st->bitmap.vertices[2][0][0] = clip_x1;
+ st->bitmap.vertices[2][0][1] = clip_y1;
+ st->bitmap.vertices[2][2][0] = sRight;
+ st->bitmap.vertices[2][2][1] = tBot;
+
+ st->bitmap.vertices[3][0][0] = clip_x0;
+ st->bitmap.vertices[3][0][1] = clip_y1;
+ st->bitmap.vertices[3][2][0] = sLeft;
+ st->bitmap.vertices[3][2][1] = tBot;
+
+ /* same for all verts: */
+ for (i = 0; i < 4; i++) {
+ st->bitmap.vertices[i][0][2] = z;
+ st->bitmap.vertices[i][0][3] = 1.0;
+ st->bitmap.vertices[i][1][0] = color[0];
+ st->bitmap.vertices[i][1][1] = color[1];
+ st->bitmap.vertices[i][1][2] = color[2];
+ st->bitmap.vertices[i][1][3] = color[3];
+ st->bitmap.vertices[i][2][2] = 0.0; /*R*/
+ st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
+ }
+
+ /* put vertex data into vbuf */
+ pipe_buffer_write_nooverlap(st->pipe,
+ st->bitmap.vbuf,
+ st->bitmap.vbuf_slot
+ * sizeof(st->bitmap.vertices),
+ sizeof st->bitmap.vertices,
+ st->bitmap.vertices);
+
+ return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
+}
+
+
+
+/**
+ * Render a glBitmap by drawing a textured quad
+ */
+static void
+draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
+ GLsizei width, GLsizei height,
+ struct pipe_sampler_view *sv,
+ const GLfloat *color)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct cso_context *cso = st->cso_context;
+ struct st_fp_variant *fpv;
+ struct st_fp_variant_key key;
+ GLuint maxSize;
+ GLuint offset;
+
+ memset(&key, 0, sizeof(key));
+ key.st = st;
+ key.bitmap = GL_TRUE;
+
+ fpv = st_get_fp_variant(st, st->fp, &key);
+
+ /* As an optimization, Mesa's fragment programs will sometimes get the
+ * primary color from a statevar/constant rather than a varying variable.
+ * when that's the case, we need to ensure that we use the 'color'
+ * parameter and not the current attribute color (which may have changed
+ * through glRasterPos and state validation.
+ * So, we force the proper color here. Not elegant, but it works.
+ */
+ {
+ GLfloat colorSave[4];
+ COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
+ COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+ COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
+ }
+
+
+ /* limit checks */
+ /* XXX if the bitmap is larger than the max texture size, break
+ * it up into chunks.
+ */
+ maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+ PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
+ assert(width <= (GLsizei)maxSize);
+ assert(height <= (GLsizei)maxSize);
+
+ cso_save_rasterizer(cso);
+ cso_save_samplers(cso);
+ cso_save_fragment_sampler_views(cso);
+ cso_save_viewport(cso);
+ cso_save_fragment_shader(cso);
+ cso_save_vertex_shader(cso);
+ cso_save_vertex_elements(cso);
+ cso_save_vertex_buffers(cso);
+
+ /* rasterizer state: just scissor */
+ st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
+ cso_set_rasterizer(cso, &st->bitmap.rasterizer);
+
+ /* fragment shader state: TEX lookup program */
+ cso_set_fragment_shader_handle(cso, fpv->driver_shader);
+
+ /* vertex shader state: position + texcoord pass-through */
+ cso_set_vertex_shader_handle(cso, st->bitmap.vs);
+
+ /* user samplers, plus our bitmap sampler */
+ {
+ struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
+ uint i;
+ for (i = 0; i < st->state.num_samplers; i++) {
+ samplers[i] = &st->state.samplers[i];
+ }
+ samplers[fpv->bitmap_sampler] =
+ &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
+ cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
+ }
+
+ /* user textures, plus the bitmap texture */
+ {
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
+ memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
+ sampler_views[fpv->bitmap_sampler] = sv;
+ cso_set_fragment_sampler_views(cso, num, sampler_views);
+ }
+
+ /* viewport state: viewport matching window dims */
+ {
+ const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
+ const GLfloat width = (GLfloat)fb->Width;
+ const GLfloat height = (GLfloat)fb->Height;
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * width;
+ vp.scale[1] = height * (invert ? -0.5f : 0.5f);
+ vp.scale[2] = 0.5f;
+ vp.scale[3] = 1.0f;
+ vp.translate[0] = 0.5f * width;
+ vp.translate[1] = 0.5f * height;
+ vp.translate[2] = 0.5f;
+ vp.translate[3] = 0.0f;
+ cso_set_viewport(cso, &vp);
+ }
+
+ cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
+ /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+ z = z * 2.0 - 1.0;
+
+ /* draw textured quad */
+ offset = setup_bitmap_vertex_data(st,
+ sv->texture->target != PIPE_TEXTURE_RECT,
+ x, y, width, height, z, color);
+
+ util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset,
+ PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ 3); /* attribs/vert */
+
+
+ /* restore state */
+ cso_restore_rasterizer(cso);
+ cso_restore_samplers(cso);
+ cso_restore_fragment_sampler_views(cso);
+ cso_restore_viewport(cso);
+ cso_restore_fragment_shader(cso);
+ cso_restore_vertex_shader(cso);
+ cso_restore_vertex_elements(cso);
+ cso_restore_vertex_buffers(cso);
+}
+
+
+static void
+reset_cache(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct bitmap_cache *cache = st->bitmap.cache;
+
+ /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
+ cache->empty = GL_TRUE;
+
+ cache->xmin = 1000000;
+ cache->xmax = -1000000;
+ cache->ymin = 1000000;
+ cache->ymax = -1000000;
+
+ if (cache->trans) {
+ pipe->transfer_destroy(pipe, cache->trans);
+ cache->trans = NULL;
+ }
+
+ assert(!cache->texture);
+
+ /* allocate a new texture */
+ cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
+ st->bitmap.tex_format, 0,
+ BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
+ 1, 1,
+ PIPE_BIND_SAMPLER_VIEW);
+}
+
+
+/** Print bitmap image to stdout (debug) */
+static void
+print_cache(const struct bitmap_cache *cache)
+{
+ int i, j, k;
+
+ for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) {
+ k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1);
+ for (j = 0; j < BITMAP_CACHE_WIDTH; j++) {
+ if (cache->buffer[k])
+ printf("X");
+ else
+ printf(" ");
+ k++;
+ }
+ printf("\n");
+ }
+}
+
+
+/**
+ * Create gallium pipe_transfer object for the bitmap cache.
+ */
+static void
+create_cache_trans(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct bitmap_cache *cache = st->bitmap.cache;
+
+ if (cache->trans)
+ return;
+
+ /* Map the texture transfer.
+ * Subsequent glBitmap calls will write into the texture image.
+ */
+ cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ BITMAP_CACHE_WIDTH,
+ BITMAP_CACHE_HEIGHT);
+ cache->buffer = pipe_transfer_map(pipe, cache->trans);
+
+ /* init image to all 0xff */
+ memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
+}
+
+
+/**
+ * If there's anything in the bitmap cache, draw/flush it now.
+ */
+void
+st_flush_bitmap_cache(struct st_context *st)
+{
+ if (!st->bitmap.cache->empty) {
+ struct bitmap_cache *cache = st->bitmap.cache;
+
+ if (st->ctx->DrawBuffer) {
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_sampler_view *sv;
+
+ assert(cache->xmin <= cache->xmax);
+
+/* printf("flush size %d x %d at %d, %d\n",
+ cache->xmax - cache->xmin,
+ cache->ymax - cache->ymin,
+ cache->xpos, cache->ypos);
+*/
+
+ /* The texture transfer has been mapped until now.
+ * So unmap and release the texture transfer before drawing.
+ */
+ if (cache->trans) {
+ if (0)
+ print_cache(cache);
+ pipe_transfer_unmap(pipe, cache->trans);
+ cache->buffer = NULL;
+
+ pipe->transfer_destroy(pipe, cache->trans);
+ cache->trans = NULL;
+ }
+
+ sv = st_create_texture_sampler_view(st->pipe, cache->texture);
+ if (sv) {
+ draw_bitmap_quad(st->ctx,
+ cache->xpos,
+ cache->ypos,
+ cache->zpos,
+ BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
+ sv,
+ cache->color);
+
+ pipe_sampler_view_reference(&sv, NULL);
+ }
+ }
+
+ /* release/free the texture */
+ pipe_resource_reference(&cache->texture, NULL);
+
+ reset_cache(st);
+ }
+}
+
+
+/**
+ * Flush bitmap cache and release vertex buffer.
+ */
+void
+st_flush_bitmap( struct st_context *st )
+{
+ st_flush_bitmap_cache(st);
+
+ /* Release vertex buffer to avoid synchronous rendering if we were
+ * to map it in the next frame.
+ */
+ pipe_resource_reference(&st->bitmap.vbuf, NULL);
+ st->bitmap.vbuf_slot = 0;
+}
+
+
+/**
+ * Try to accumulate this glBitmap call in the bitmap cache.
+ * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
+ */
+static GLboolean
+accum_bitmap(struct st_context *st,
+ GLint x, GLint y, GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLubyte *bitmap )
+{
+ struct bitmap_cache *cache = st->bitmap.cache;
+ int px = -999, py = -999;
+ const GLfloat z = st->ctx->Current.RasterPos[2];
+
+ if (width > BITMAP_CACHE_WIDTH ||
+ height > BITMAP_CACHE_HEIGHT)
+ return GL_FALSE; /* too big to cache */
+
+ if (!cache->empty) {
+ px = x - cache->xpos; /* pos in buffer */
+ py = y - cache->ypos;
+ if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
+ py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
+ !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
+ ((fabs(z - cache->zpos) > Z_EPSILON))) {
+ /* This bitmap would extend beyond cache bounds, or the bitmap
+ * color is changing
+ * so flush and continue.
+ */
+ st_flush_bitmap_cache(st);
+ }
+ }
+
+ if (cache->empty) {
+ /* Initialize. Center bitmap vertically in the buffer. */
+ px = 0;
+ py = (BITMAP_CACHE_HEIGHT - height) / 2;
+ cache->xpos = x;
+ cache->ypos = y - py;
+ cache->zpos = z;
+ cache->empty = GL_FALSE;
+ COPY_4FV(cache->color, st->ctx->Current.RasterColor);
+ }
+
+ assert(px != -999);
+ assert(py != -999);
+
+ if (x < cache->xmin)
+ cache->xmin = x;
+ if (y < cache->ymin)
+ cache->ymin = y;
+ if (x + width > cache->xmax)
+ cache->xmax = x + width;
+ if (y + height > cache->ymax)
+ cache->ymax = y + height;
+
+ /* create the transfer if needed */
+ create_cache_trans(st);
+
+ unpack_bitmap(st, px, py, width, height, unpack, bitmap,
+ cache->buffer, BITMAP_CACHE_WIDTH);
+
+ return GL_TRUE; /* accumulated */
+}
+
+
+
+/**
+ * Called via ctx->Driver.Bitmap()
+ */
+static void
+st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height,
+ const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_resource *pt;
+
+ if (width == 0 || height == 0)
+ return;
+
+ st_validate_state(st);
+
+ if (!st->bitmap.vs) {
+ /* create pass-through vertex shader now */
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_COLOR,
+ TGSI_SEMANTIC_GENERIC };
+ const uint semantic_indexes[] = { 0, 0, 0 };
+ st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
+ semantic_names,
+ semantic_indexes);
+ }
+
+ if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
+ return;
+
+ pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
+ if (pt) {
+ struct pipe_sampler_view *sv =
+ st_create_texture_sampler_view(st->pipe, pt);
+
+ assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
+
+ if (sv) {
+ draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
+ width, height, sv,
+ st->ctx->Current.RasterColor);
+
+ pipe_sampler_view_reference(&sv, NULL);
+ }
+
+ /* release/free the texture */
+ pipe_resource_reference(&pt, NULL);
+ }
+}
+
+
+/** Per-context init */
+void
+st_init_bitmap_functions(struct dd_function_table *functions)
+{
+ functions->Bitmap = st_Bitmap;
+}
+
+
+/** Per-context init */
+void
+st_init_bitmap(struct st_context *st)
+{
+ struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+
+ /* init sampler state once */
+ memset(sampler, 0, sizeof(*sampler));
+ sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
+ sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
+ sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
+ sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ st->bitmap.samplers[1] = *sampler;
+ st->bitmap.samplers[1].normalized_coords = 1;
+
+ /* init baseline rasterizer state once */
+ memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
+ st->bitmap.rasterizer.gl_rasterization_rules = 1;
+
+ /* find a usable texture format */
+ if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0)) {
+ st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
+ }
+ else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0)) {
+ st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
+ }
+ else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
+ PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_SAMPLER_VIEW, 0)) {
+ st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
+ }
+ else {
+ /* XXX support more formats */
+ assert(0);
+ }
+
+ /* alloc bitmap cache object */
+ st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
+
+ reset_cache(st);
+}
+
+
+/** Per-context tear-down */
+void
+st_destroy_bitmap(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct bitmap_cache *cache = st->bitmap.cache;
+
+ if (st->bitmap.vs) {
+ cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
+ st->bitmap.vs = NULL;
+ }
+
+ if (st->bitmap.vbuf) {
+ pipe_resource_reference(&st->bitmap.vbuf, NULL);
+ st->bitmap.vbuf = NULL;
+ }
+
+ if (cache) {
+ if (cache->trans) {
+ pipe_transfer_unmap(pipe, cache->trans);
+ pipe->transfer_destroy(pipe, cache->trans);
+ }
+ pipe_resource_reference(&st->bitmap.cache->texture, NULL);
+ free(st->bitmap.cache);
+ st->bitmap.cache = NULL;
+ }
+}
+
+#endif /* FEATURE_drawpix */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
index ba8a8cf89..12528f49f 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -1,447 +1,468 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * Functions for pixel buffer objects and vertex/element buffer objects.
- */
-
-
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "main/arrayobj.h"
-#include "main/bufferobj.h"
-
-#include "st_context.h"
-#include "st_cb_bufferobjects.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-
-
-/**
- * There is some duplication between mesa's bufferobjects and our
- * bufmgr buffers. Both have an integer handle and a hashtable to
- * lookup an opaque structure. It would be nice if the handles and
- * internal structure where somehow shared.
- */
-static struct gl_buffer_object *
-st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target)
-{
- struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object);
-
- if (!st_obj)
- return NULL;
-
- _mesa_initialize_buffer_object(&st_obj->Base, name, target);
-
- return &st_obj->Base;
-}
-
-
-
-/**
- * Deallocate/free a vertex/pixel buffer object.
- * Called via glDeleteBuffersARB().
- */
-static void
-st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- assert(obj->RefCount == 0);
- assert(st_obj->transfer == NULL);
-
- if (st_obj->buffer)
- pipe_resource_reference(&st_obj->buffer, NULL);
-
- free(st_obj);
-}
-
-
-
-/**
- * Replace data in a subrange of buffer object. If the data range
- * specified by size + offset extends beyond the end of the buffer or
- * if data is NULL, no copy is performed.
- * Called via glBufferSubDataARB().
- */
-static void
-st_bufferobj_subdata(struct gl_context *ctx,
- GLenum target,
- GLintptrARB offset,
- GLsizeiptrARB size,
- const GLvoid * data, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* we may be called from VBO code, so double-check params here */
- ASSERT(offset >= 0);
- ASSERT(size >= 0);
- ASSERT(offset + size <= obj->Size);
-
- if (!size)
- return;
-
- /*
- * According to ARB_vertex_buffer_object specification, if data is null,
- * then the contents of the buffer object's data store is undefined. We just
- * ignore, and leave it unchanged.
- */
- if (!data)
- return;
-
- /* Now that transfers are per-context, we don't have to figure out
- * flushing here. Usually drivers won't need to flush in this case
- * even if the buffer is currently referenced by hardware - they
- * just queue the upload as dma rather than mapping the underlying
- * buffer directly.
- */
- pipe_buffer_write(st_context(ctx)->pipe,
- st_obj->buffer,
- offset, size, data);
-}
-
-
-/**
- * Called via glGetBufferSubDataARB().
- */
-static void
-st_bufferobj_get_subdata(struct gl_context *ctx,
- GLenum target,
- GLintptrARB offset,
- GLsizeiptrARB size,
- GLvoid * data, struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* we may be called from VBO code, so double-check params here */
- ASSERT(offset >= 0);
- ASSERT(size >= 0);
- ASSERT(offset + size <= obj->Size);
-
- if (!size)
- return;
-
- pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer,
- offset, size, data);
-}
-
-
-/**
- * Allocate space for and store data in a buffer object. Any data that was
- * previously stored in the buffer object is lost. If data is NULL,
- * memory will be allocated, but no copy will occur.
- * Called via ctx->Driver.BufferData().
- * \return GL_TRUE for success, GL_FALSE if out of memory
- */
-static GLboolean
-st_bufferobj_data(struct gl_context *ctx,
- GLenum target,
- GLsizeiptrARB size,
- const GLvoid * data,
- GLenum usage,
- struct gl_buffer_object *obj)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- unsigned buffer_usage;
-
- st_obj->Base.Size = size;
- st_obj->Base.Usage = usage;
-
- switch(target) {
- case GL_PIXEL_PACK_BUFFER_ARB:
- case GL_PIXEL_UNPACK_BUFFER_ARB:
- buffer_usage = PIPE_BIND_RENDER_TARGET;
- break;
- case GL_ARRAY_BUFFER_ARB:
- buffer_usage = PIPE_BIND_VERTEX_BUFFER;
- break;
- case GL_ELEMENT_ARRAY_BUFFER_ARB:
- buffer_usage = PIPE_BIND_INDEX_BUFFER;
- break;
- default:
- buffer_usage = 0;
- }
-
- pipe_resource_reference( &st_obj->buffer, NULL );
-
- if (size != 0) {
- st_obj->buffer = pipe_buffer_create(pipe->screen, buffer_usage, size);
-
- if (!st_obj->buffer) {
- return GL_FALSE;
- }
-
- if (data)
- pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0,
- size, data);
- return GL_TRUE;
- }
-
- return GL_TRUE;
-}
-
-
-/**
- * Dummy data whose's pointer is used for zero size buffers or ranges.
- */
-static long st_bufferobj_zero_length = 0;
-
-
-
-/**
- * Called via glMapBufferARB().
- */
-static void *
-st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
- struct gl_buffer_object *obj)
-{
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- uint flags;
-
- switch (access) {
- case GL_WRITE_ONLY:
- flags = PIPE_TRANSFER_WRITE;
- break;
- case GL_READ_ONLY:
- flags = PIPE_TRANSFER_READ;
- break;
- case GL_READ_WRITE:
- default:
- flags = PIPE_TRANSFER_READ_WRITE;
- break;
- }
-
- /* Handle zero-size buffers here rather than in drivers */
- if (obj->Size == 0) {
- obj->Pointer = &st_bufferobj_zero_length;
- }
- else {
- obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
- st_obj->buffer,
- flags,
- &st_obj->transfer);
- }
-
- if (obj->Pointer) {
- obj->Offset = 0;
- obj->Length = obj->Size;
- }
- return obj->Pointer;
-}
-
-
-/**
- * Called via glMapBufferRange().
- */
-static void *
-st_bufferobj_map_range(struct gl_context *ctx, GLenum target,
- GLintptr offset, GLsizeiptr length, GLbitfield access,
- struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
- enum pipe_transfer_usage flags = 0x0;
-
- if (access & GL_MAP_WRITE_BIT)
- flags |= PIPE_TRANSFER_WRITE;
-
- if (access & GL_MAP_READ_BIT)
- flags |= PIPE_TRANSFER_READ;
-
- if (access & GL_MAP_FLUSH_EXPLICIT_BIT)
- flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
-
- if (access & GL_MAP_INVALIDATE_RANGE_BIT)
- flags |= PIPE_TRANSFER_DISCARD;
-
- if (access & GL_MAP_INVALIDATE_BUFFER_BIT)
- flags |= PIPE_TRANSFER_DISCARD;
-
- if (access & GL_MAP_UNSYNCHRONIZED_BIT)
- flags |= PIPE_TRANSFER_UNSYNCHRONIZED;
-
- /* ... other flags ...
- */
-
- if (access & MESA_MAP_NOWAIT_BIT)
- flags |= PIPE_TRANSFER_DONTBLOCK;
-
- assert(offset >= 0);
- assert(length >= 0);
- assert(offset < obj->Size);
- assert(offset + length <= obj->Size);
-
- /*
- * We go out of way here to hide the degenerate yet valid case of zero
- * length range from the pipe driver.
- */
- if (!length) {
- obj->Pointer = &st_bufferobj_zero_length;
- }
- else {
- obj->Pointer = pipe_buffer_map_range(pipe,
- st_obj->buffer,
- offset, length,
- flags,
- &st_obj->transfer);
- if (obj->Pointer) {
- obj->Pointer = (ubyte *) obj->Pointer + offset;
- }
- }
-
- if (obj->Pointer) {
- obj->Offset = offset;
- obj->Length = length;
- obj->AccessFlags = access;
- }
-
- return obj->Pointer;
-}
-
-
-static void
-st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
- GLintptr offset, GLsizeiptr length,
- struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- /* Subrange is relative to mapped range */
- assert(offset >= 0);
- assert(length >= 0);
- assert(offset + length <= obj->Length);
- assert(obj->Pointer);
-
- if (!length)
- return;
-
- pipe_buffer_flush_mapped_range(pipe, st_obj->transfer,
- obj->Offset + offset, length);
-}
-
-
-/**
- * Called via glUnmapBufferARB().
- */
-static GLboolean
-st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *st_obj = st_buffer_object(obj);
-
- if (obj->Length)
- pipe_buffer_unmap(pipe, st_obj->transfer);
-
- st_obj->transfer = NULL;
- obj->Pointer = NULL;
- obj->Offset = 0;
- obj->Length = 0;
- return GL_TRUE;
-}
-
-
-/**
- * Called via glCopyBufferSubData().
- */
-static void
-st_copy_buffer_subdata(struct gl_context *ctx,
- struct gl_buffer_object *src,
- struct gl_buffer_object *dst,
- GLintptr readOffset, GLintptr writeOffset,
- GLsizeiptr size)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct st_buffer_object *srcObj = st_buffer_object(src);
- struct st_buffer_object *dstObj = st_buffer_object(dst);
- struct pipe_transfer *src_transfer;
- struct pipe_transfer *dst_transfer;
- ubyte *srcPtr, *dstPtr;
-
- if(!size)
- return;
-
- /* buffer should not already be mapped */
- assert(!src->Pointer);
- assert(!dst->Pointer);
-
- srcPtr = (ubyte *) pipe_buffer_map_range(pipe,
- srcObj->buffer,
- readOffset, size,
- PIPE_TRANSFER_READ,
- &src_transfer);
-
- dstPtr = (ubyte *) pipe_buffer_map_range(pipe,
- dstObj->buffer,
- writeOffset, size,
- PIPE_TRANSFER_WRITE,
- &dst_transfer);
-
- if (srcPtr && dstPtr)
- memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
-
- pipe_buffer_unmap(pipe, src_transfer);
- pipe_buffer_unmap(pipe, dst_transfer);
-}
-
-
-/* TODO: if buffer wasn't created with appropriate usage flags, need
- * to recreate it now and copy contents -- or possibly create a
- * gallium entrypoint to extend the usage flags and let the driver
- * decide if a copy is necessary.
- */
-void
-st_bufferobj_validate_usage(struct st_context *st,
- struct st_buffer_object *obj,
- unsigned usage)
-{
-}
-
-
-void
-st_init_bufferobject_functions(struct dd_function_table *functions)
-{
- functions->NewBufferObject = st_bufferobj_alloc;
- functions->DeleteBuffer = st_bufferobj_free;
- functions->BufferData = st_bufferobj_data;
- functions->BufferSubData = st_bufferobj_subdata;
- functions->GetBufferSubData = st_bufferobj_get_subdata;
- functions->MapBuffer = st_bufferobj_map;
- functions->MapBufferRange = st_bufferobj_map_range;
- functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
- functions->UnmapBuffer = st_bufferobj_unmap;
- functions->CopyBufferSubData = st_copy_buffer_subdata;
-
- /* For GL_APPLE_vertex_array_object */
- functions->NewArrayObject = _mesa_new_array_object;
- functions->DeleteArrayObject = _mesa_delete_array_object;
-}
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Functions for pixel buffer objects and vertex/element buffer objects.
+ */
+
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/arrayobj.h"
+#include "main/bufferobj.h"
+
+#include "st_context.h"
+#include "st_cb_bufferobjects.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+
+
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers. Both have an integer handle and a hashtable to
+ * lookup an opaque structure. It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *
+st_bufferobj_alloc(struct gl_context *ctx, GLuint name, GLenum target)
+{
+ struct st_buffer_object *st_obj = ST_CALLOC_STRUCT(st_buffer_object);
+
+ if (!st_obj)
+ return NULL;
+
+ _mesa_initialize_buffer_object(&st_obj->Base, name, target);
+
+ return &st_obj->Base;
+}
+
+
+
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void
+st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
+{
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+
+ assert(obj->RefCount == 0);
+ assert(st_obj->transfer == NULL);
+
+ if (st_obj->buffer)
+ pipe_resource_reference(&st_obj->buffer, NULL);
+
+ free(st_obj);
+}
+
+
+
+/**
+ * Replace data in a subrange of buffer object. If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+st_bufferobj_subdata(struct gl_context *ctx,
+ GLenum target,
+ GLintptrARB offset,
+ GLsizeiptrARB size,
+ const GLvoid * data, struct gl_buffer_object *obj)
+{
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+
+ /* we may be called from VBO code, so double-check params here */
+ ASSERT(offset >= 0);
+ ASSERT(size >= 0);
+ ASSERT(offset + size <= obj->Size);
+
+ if (!size)
+ return;
+
+ /*
+ * According to ARB_vertex_buffer_object specification, if data is null,
+ * then the contents of the buffer object's data store is undefined. We just
+ * ignore, and leave it unchanged.
+ */
+ if (!data)
+ return;
+
+ /* Now that transfers are per-context, we don't have to figure out
+ * flushing here. Usually drivers won't need to flush in this case
+ * even if the buffer is currently referenced by hardware - they
+ * just queue the upload as dma rather than mapping the underlying
+ * buffer directly.
+ */
+ pipe_buffer_write(st_context(ctx)->pipe,
+ st_obj->buffer,
+ offset, size, data);
+}
+
+
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void
+st_bufferobj_get_subdata(struct gl_context *ctx,
+ GLenum target,
+ GLintptrARB offset,
+ GLsizeiptrARB size,
+ GLvoid * data, struct gl_buffer_object *obj)
+{
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+
+ /* we may be called from VBO code, so double-check params here */
+ ASSERT(offset >= 0);
+ ASSERT(size >= 0);
+ ASSERT(offset + size <= obj->Size);
+
+ if (!size)
+ return;
+
+ pipe_buffer_read(st_context(ctx)->pipe, st_obj->buffer,
+ offset, size, data);
+}
+
+
+/**
+ * Allocate space for and store data in a buffer object. Any data that was
+ * previously stored in the buffer object is lost. If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+st_bufferobj_data(struct gl_context *ctx,
+ GLenum target,
+ GLsizeiptrARB size,
+ const GLvoid * data,
+ GLenum usage,
+ struct gl_buffer_object *obj)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+ unsigned bind, pipe_usage;
+
+ st_obj->Base.Size = size;
+ st_obj->Base.Usage = usage;
+
+ switch(target) {
+ case GL_PIXEL_PACK_BUFFER_ARB:
+ case GL_PIXEL_UNPACK_BUFFER_ARB:
+ bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+ break;
+ case GL_ARRAY_BUFFER_ARB:
+ bind = PIPE_BIND_VERTEX_BUFFER;
+ break;
+ case GL_ELEMENT_ARRAY_BUFFER_ARB:
+ bind = PIPE_BIND_INDEX_BUFFER;
+ break;
+ default:
+ bind = 0;
+ }
+
+ switch (usage) {
+ case GL_STATIC_DRAW:
+ case GL_STATIC_READ:
+ case GL_STATIC_COPY:
+ pipe_usage = PIPE_USAGE_STATIC;
+ break;
+ case GL_DYNAMIC_DRAW:
+ case GL_DYNAMIC_READ:
+ case GL_DYNAMIC_COPY:
+ pipe_usage = PIPE_USAGE_DYNAMIC;
+ break;
+ case GL_STREAM_DRAW:
+ case GL_STREAM_READ:
+ case GL_STREAM_COPY:
+ pipe_usage = PIPE_USAGE_STREAM;
+ break;
+ default:
+ pipe_usage = PIPE_USAGE_DEFAULT;
+ }
+
+ pipe_resource_reference( &st_obj->buffer, NULL );
+
+ if (size != 0) {
+ st_obj->buffer = pipe_buffer_create(pipe->screen, bind,
+ pipe_usage, size);
+
+ if (!st_obj->buffer) {
+ return GL_FALSE;
+ }
+
+ if (data)
+ pipe_buffer_write(st_context(ctx)->pipe, st_obj->buffer, 0,
+ size, data);
+ return GL_TRUE;
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Dummy data whose's pointer is used for zero size buffers or ranges.
+ */
+static long st_bufferobj_zero_length = 0;
+
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static void *
+st_bufferobj_map(struct gl_context *ctx, GLenum target, GLenum access,
+ struct gl_buffer_object *obj)
+{
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+ uint flags;
+
+ switch (access) {
+ case GL_WRITE_ONLY:
+ flags = PIPE_TRANSFER_WRITE;
+ break;
+ case GL_READ_ONLY:
+ flags = PIPE_TRANSFER_READ;
+ break;
+ case GL_READ_WRITE:
+ default:
+ flags = PIPE_TRANSFER_READ_WRITE;
+ break;
+ }
+
+ /* Handle zero-size buffers here rather than in drivers */
+ if (obj->Size == 0) {
+ obj->Pointer = &st_bufferobj_zero_length;
+ }
+ else {
+ obj->Pointer = pipe_buffer_map(st_context(ctx)->pipe,
+ st_obj->buffer,
+ flags,
+ &st_obj->transfer);
+ }
+
+ if (obj->Pointer) {
+ obj->Offset = 0;
+ obj->Length = obj->Size;
+ }
+ return obj->Pointer;
+}
+
+
+/**
+ * Called via glMapBufferRange().
+ */
+static void *
+st_bufferobj_map_range(struct gl_context *ctx, GLenum target,
+ GLintptr offset, GLsizeiptr length, GLbitfield access,
+ struct gl_buffer_object *obj)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+ enum pipe_transfer_usage flags = 0x0;
+
+ if (access & GL_MAP_WRITE_BIT)
+ flags |= PIPE_TRANSFER_WRITE;
+
+ if (access & GL_MAP_READ_BIT)
+ flags |= PIPE_TRANSFER_READ;
+
+ if (access & GL_MAP_FLUSH_EXPLICIT_BIT)
+ flags |= PIPE_TRANSFER_FLUSH_EXPLICIT;
+
+ if (access & GL_MAP_INVALIDATE_RANGE_BIT)
+ flags |= PIPE_TRANSFER_DISCARD;
+
+ if (access & GL_MAP_INVALIDATE_BUFFER_BIT)
+ flags |= PIPE_TRANSFER_DISCARD;
+
+ if (access & GL_MAP_UNSYNCHRONIZED_BIT)
+ flags |= PIPE_TRANSFER_UNSYNCHRONIZED;
+
+ /* ... other flags ...
+ */
+
+ if (access & MESA_MAP_NOWAIT_BIT)
+ flags |= PIPE_TRANSFER_DONTBLOCK;
+
+ assert(offset >= 0);
+ assert(length >= 0);
+ assert(offset < obj->Size);
+ assert(offset + length <= obj->Size);
+
+ /*
+ * We go out of way here to hide the degenerate yet valid case of zero
+ * length range from the pipe driver.
+ */
+ if (!length) {
+ obj->Pointer = &st_bufferobj_zero_length;
+ }
+ else {
+ obj->Pointer = pipe_buffer_map_range(pipe,
+ st_obj->buffer,
+ offset, length,
+ flags,
+ &st_obj->transfer);
+ if (obj->Pointer) {
+ obj->Pointer = (ubyte *) obj->Pointer + offset;
+ }
+ }
+
+ if (obj->Pointer) {
+ obj->Offset = offset;
+ obj->Length = length;
+ obj->AccessFlags = access;
+ }
+
+ return obj->Pointer;
+}
+
+
+static void
+st_bufferobj_flush_mapped_range(struct gl_context *ctx, GLenum target,
+ GLintptr offset, GLsizeiptr length,
+ struct gl_buffer_object *obj)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+
+ /* Subrange is relative to mapped range */
+ assert(offset >= 0);
+ assert(length >= 0);
+ assert(offset + length <= obj->Length);
+ assert(obj->Pointer);
+
+ if (!length)
+ return;
+
+ pipe_buffer_flush_mapped_range(pipe, st_obj->transfer,
+ obj->Offset + offset, length);
+}
+
+
+/**
+ * Called via glUnmapBufferARB().
+ */
+static GLboolean
+st_bufferobj_unmap(struct gl_context *ctx, GLenum target, struct gl_buffer_object *obj)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_buffer_object *st_obj = st_buffer_object(obj);
+
+ if (obj->Length)
+ pipe_buffer_unmap(pipe, st_obj->transfer);
+
+ st_obj->transfer = NULL;
+ obj->Pointer = NULL;
+ obj->Offset = 0;
+ obj->Length = 0;
+ return GL_TRUE;
+}
+
+
+/**
+ * Called via glCopyBufferSubData().
+ */
+static void
+st_copy_buffer_subdata(struct gl_context *ctx,
+ struct gl_buffer_object *src,
+ struct gl_buffer_object *dst,
+ GLintptr readOffset, GLintptr writeOffset,
+ GLsizeiptr size)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_buffer_object *srcObj = st_buffer_object(src);
+ struct st_buffer_object *dstObj = st_buffer_object(dst);
+ struct pipe_transfer *src_transfer;
+ struct pipe_transfer *dst_transfer;
+ ubyte *srcPtr, *dstPtr;
+
+ if(!size)
+ return;
+
+ /* buffer should not already be mapped */
+ assert(!src->Pointer);
+ assert(!dst->Pointer);
+
+ srcPtr = (ubyte *) pipe_buffer_map_range(pipe,
+ srcObj->buffer,
+ readOffset, size,
+ PIPE_TRANSFER_READ,
+ &src_transfer);
+
+ dstPtr = (ubyte *) pipe_buffer_map_range(pipe,
+ dstObj->buffer,
+ writeOffset, size,
+ PIPE_TRANSFER_WRITE,
+ &dst_transfer);
+
+ if (srcPtr && dstPtr)
+ memcpy(dstPtr + writeOffset, srcPtr + readOffset, size);
+
+ pipe_buffer_unmap(pipe, src_transfer);
+ pipe_buffer_unmap(pipe, dst_transfer);
+}
+
+
+/* TODO: if buffer wasn't created with appropriate usage flags, need
+ * to recreate it now and copy contents -- or possibly create a
+ * gallium entrypoint to extend the usage flags and let the driver
+ * decide if a copy is necessary.
+ */
+void
+st_bufferobj_validate_usage(struct st_context *st,
+ struct st_buffer_object *obj,
+ unsigned usage)
+{
+}
+
+
+void
+st_init_bufferobject_functions(struct dd_function_table *functions)
+{
+ functions->NewBufferObject = st_bufferobj_alloc;
+ functions->DeleteBuffer = st_bufferobj_free;
+ functions->BufferData = st_bufferobj_data;
+ functions->BufferSubData = st_bufferobj_subdata;
+ functions->GetBufferSubData = st_bufferobj_get_subdata;
+ functions->MapBuffer = st_bufferobj_map;
+ functions->MapBufferRange = st_bufferobj_map_range;
+ functions->FlushMappedBufferRange = st_bufferobj_flush_mapped_range;
+ functions->UnmapBuffer = st_bufferobj_unmap;
+ functions->CopyBufferSubData = st_copy_buffer_subdata;
+
+ /* For GL_APPLE_vertex_array_object */
+ functions->NewArrayObject = _mesa_new_array_object;
+ functions->DeleteArrayObject = _mesa_delete_array_object;
+}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c
index 3e27be271..d2e0cd73c 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_clear.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c
@@ -1,559 +1,563 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- * Michel Dänzer
- */
-
-#include "main/glheader.h"
-#include "main/formats.h"
-#include "main/macros.h"
-#include "program/prog_instruction.h"
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_accum.h"
-#include "st_cb_clear.h"
-#include "st_cb_fbo.h"
-#include "st_program.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
-#include "util/u_simple_shaders.h"
-#include "util/u_draw_quad.h"
-
-#include "cso_cache/cso_context.h"
-
-
-/**
- * Do per-context initialization for glClear.
- */
-void
-st_init_clear(struct st_context *st)
-{
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *pscreen = st->pipe->screen;
-
- memset(&st->clear, 0, sizeof(st->clear));
-
- st->clear.raster.gl_rasterization_rules = 1;
- st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE);
-
- /* fragment shader state: color pass-through program */
- st->clear.fs = util_make_fragment_passthrough_shader(pipe);
-
- /* vertex shader state: color/position pass-through */
- {
- const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
- TGSI_SEMANTIC_COLOR };
- const uint semantic_indexes[] = { 0, 0 };
- st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2,
- semantic_names,
- semantic_indexes);
- }
-}
-
-
-/**
- * Free per-context state for glClear.
- */
-void
-st_destroy_clear(struct st_context *st)
-{
- if (st->clear.fs) {
- cso_delete_fragment_shader(st->cso_context, st->clear.fs);
- st->clear.fs = NULL;
- }
- if (st->clear.vs) {
- cso_delete_vertex_shader(st->cso_context, st->clear.vs);
- st->clear.vs = NULL;
- }
- if (st->clear.vbuf) {
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf = NULL;
- }
-}
-
-
-/**
- * Draw a screen-aligned quadrilateral.
- * Coords are clip coords with y=0=bottom.
- */
-static void
-draw_quad(struct st_context *st,
- float x0, float y0, float x1, float y1, GLfloat z,
- const GLfloat color[4])
-{
- struct pipe_context *pipe = st->pipe;
-
- /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
- * no_flush) updates to buffers where we know there is no conflict
- * with previous data. Currently using max_slots > 1 will cause
- * synchronous rendering if the driver flushes its command buffers
- * between one bitmap and the next. Our flush hook below isn't
- * sufficient to catch this as the driver doesn't tell us when it
- * flushes its own command buffers. Until this gets fixed, pay the
- * price of allocating a new buffer for each bitmap cache-flush to
- * avoid synchronous rendering.
- */
- const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */
- GLuint i;
-
- if (st->clear.vbuf_slot >= max_slots) {
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf_slot = 0;
- }
-
- if (!st->clear.vbuf) {
- st->clear.vbuf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- max_slots * sizeof(st->clear.vertices));
- }
-
- /* positions */
- st->clear.vertices[0][0][0] = x0;
- st->clear.vertices[0][0][1] = y0;
-
- st->clear.vertices[1][0][0] = x1;
- st->clear.vertices[1][0][1] = y0;
-
- st->clear.vertices[2][0][0] = x1;
- st->clear.vertices[2][0][1] = y1;
-
- st->clear.vertices[3][0][0] = x0;
- st->clear.vertices[3][0][1] = y1;
-
- /* same for all verts: */
- for (i = 0; i < 4; i++) {
- st->clear.vertices[i][0][2] = z;
- st->clear.vertices[i][0][3] = 1.0;
- st->clear.vertices[i][1][0] = color[0];
- st->clear.vertices[i][1][1] = color[1];
- st->clear.vertices[i][1][2] = color[2];
- st->clear.vertices[i][1][3] = color[3];
- }
-
- /* put vertex data into vbuf */
- pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf,
- st->clear.vbuf_slot
- * sizeof(st->clear.vertices),
- sizeof(st->clear.vertices),
- st->clear.vertices);
-
- /* draw */
- util_draw_vertex_buffer(pipe,
- st->clear.vbuf,
- st->clear.vbuf_slot * sizeof(st->clear.vertices),
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- 2); /* attribs/vert */
-
- /* Increment slot */
- st->clear.vbuf_slot++;
-}
-
-
-
-/**
- * Do glClear by drawing a quadrilateral.
- * The vertices of the quad will be computed from the
- * ctx->DrawBuffer->_X/Ymin/max fields.
- */
-static void
-clear_with_quad(struct gl_context *ctx,
- GLboolean color, GLboolean depth, GLboolean stencil)
-{
- struct st_context *st = st_context(ctx);
- const struct gl_framebuffer *fb = ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f;
- const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
- const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
- const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
-
- /*
- printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__,
- color ? "color, " : "",
- depth ? "depth, " : "",
- stencil ? "stencil" : "",
- x0, y0,
- x1, y1);
- */
-
- cso_save_blend(st->cso_context);
- cso_save_stencil_ref(st->cso_context);
- cso_save_depth_stencil_alpha(st->cso_context);
- cso_save_rasterizer(st->cso_context);
- cso_save_viewport(st->cso_context);
- cso_save_clip(st->cso_context);
- cso_save_fragment_shader(st->cso_context);
- cso_save_vertex_shader(st->cso_context);
- cso_save_vertex_elements(st->cso_context);
-
- /* blend state: RGBA masking */
- {
- struct pipe_blend_state blend;
- memset(&blend, 0, sizeof(blend));
- blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
- blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
- blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
- if (color) {
- if (ctx->Color.ColorMask[0][0])
- blend.rt[0].colormask |= PIPE_MASK_R;
- if (ctx->Color.ColorMask[0][1])
- blend.rt[0].colormask |= PIPE_MASK_G;
- if (ctx->Color.ColorMask[0][2])
- blend.rt[0].colormask |= PIPE_MASK_B;
- if (ctx->Color.ColorMask[0][3])
- blend.rt[0].colormask |= PIPE_MASK_A;
- if (st->ctx->Color.DitherFlag)
- blend.dither = 1;
- }
- cso_set_blend(st->cso_context, &blend);
- }
-
- /* depth_stencil state: always pass/set to ref value */
- {
- struct pipe_depth_stencil_alpha_state depth_stencil;
- memset(&depth_stencil, 0, sizeof(depth_stencil));
- if (depth) {
- depth_stencil.depth.enabled = 1;
- depth_stencil.depth.writemask = 1;
- depth_stencil.depth.func = PIPE_FUNC_ALWAYS;
- }
-
- if (stencil) {
- struct pipe_stencil_ref stencil_ref;
- memset(&stencil_ref, 0, sizeof(stencil_ref));
- depth_stencil.stencil[0].enabled = 1;
- depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS;
- depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
- depth_stencil.stencil[0].valuemask = 0xff;
- depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
- stencil_ref.ref_value[0] = ctx->Stencil.Clear;
- cso_set_stencil_ref(st->cso_context, &stencil_ref);
- }
-
- cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
- }
-
- cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
-
- cso_set_rasterizer(st->cso_context, &st->clear.raster);
-
- /* viewport state: viewport matching window dims */
- {
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * fb_width;
- vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 1.0f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * fb_width;
- vp.translate[1] = 0.5f * fb_height;
- vp.translate[2] = 0.0f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(st->cso_context, &vp);
- }
-
- cso_set_clip(st->cso_context, &st->clear.clip);
- cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
- cso_set_vertex_shader_handle(st->cso_context, st->clear.vs);
-
- /* draw quad matching scissor rect (XXX verify coord round-off) */
- draw_quad(st, x0, y0, x1, y1,
- (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor);
-
- /* Restore pipe state */
- cso_restore_blend(st->cso_context);
- cso_restore_stencil_ref(st->cso_context);
- cso_restore_depth_stencil_alpha(st->cso_context);
- cso_restore_rasterizer(st->cso_context);
- cso_restore_viewport(st->cso_context);
- cso_restore_clip(st->cso_context);
- cso_restore_fragment_shader(st->cso_context);
- cso_restore_vertex_shader(st->cso_context);
- cso_restore_vertex_elements(st->cso_context);
-}
-
-
-/**
- * Determine if we need to clear the depth buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (!ctx->Color.ColorMask[0][0] ||
- !ctx->Color.ColorMask[0][1] ||
- !ctx->Color.ColorMask[0][2] ||
- !ctx->Color.ColorMask[0][3])
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the combiend depth/stencil buffer by
- * drawing a quad.
- */
-static INLINE GLboolean
-check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
-{
- const GLuint stencilMax = 0xff;
- GLboolean maskStencil
- = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
-
- assert(rb->Format == MESA_FORMAT_S8 ||
- rb->Format == MESA_FORMAT_Z24_S8 ||
- rb->Format == MESA_FORMAT_S8_Z24);
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (maskStencil)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the depth buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
- boolean ds_separate)
-{
- const struct st_renderbuffer *strb = st_renderbuffer(rb);
- const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-/**
- * Determine if we need to clear the stencil buffer by drawing a quad.
- */
-static INLINE GLboolean
-check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
- boolean ds_separate)
-{
- const struct st_renderbuffer *strb = st_renderbuffer(rb);
- const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
- const GLuint stencilMax = 0xff;
- const GLboolean maskStencil
- = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
-
- assert(rb->Format == MESA_FORMAT_S8 ||
- rb->Format == MESA_FORMAT_Z24_S8 ||
- rb->Format == MESA_FORMAT_S8_Z24);
-
- if (maskStencil)
- return GL_TRUE;
-
- if (ctx->Scissor.Enabled &&
- (ctx->Scissor.X != 0 ||
- ctx->Scissor.Y != 0 ||
- ctx->Scissor.Width < rb->Width ||
- ctx->Scissor.Height < rb->Height))
- return GL_TRUE;
-
- /* This is correct, but it is necessary to look at the depth clear
- * value held in the surface when it comes time to issue the clear,
- * rather than taking depth and stencil clear values from the
- * current state.
- */
- if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0)
- return GL_TRUE;
-
- return GL_FALSE;
-}
-
-
-
-/**
- * Called when we need to flush.
- */
-void
-st_flush_clear(struct st_context *st)
-{
- /* Release vertex buffer to avoid synchronous rendering if we were
- * to map it in the next frame.
- */
- pipe_resource_reference(&st->clear.vbuf, NULL);
- st->clear.vbuf_slot = 0;
-}
-
-
-
-/**
- * Called via ctx->Driver.Clear()
- */
-static void
-st_Clear(struct gl_context *ctx, GLbitfield mask)
-{
- static const GLbitfield BUFFER_BITS_DS
- = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
- struct st_context *st = st_context(ctx);
- struct gl_renderbuffer *depthRb
- = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
- struct gl_renderbuffer *stencilRb
- = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
- GLbitfield quad_buffers = 0x0;
- GLbitfield clear_buffers = 0x0;
- GLuint i;
-
- /* This makes sure the pipe has the latest scissor, etc values */
- st_validate_state( st );
-
- if (mask & BUFFER_BITS_COLOR) {
- for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
- GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
-
- if (mask & (1 << b)) {
- struct gl_renderbuffer *rb
- = ctx->DrawBuffer->Attachment[b].Renderbuffer;
- struct st_renderbuffer *strb = st_renderbuffer(rb);
-
- if (!strb || !strb->surface)
- continue;
-
- if (check_clear_color_with_quad( ctx, rb ))
- quad_buffers |= PIPE_CLEAR_COLOR;
- else
- clear_buffers |= PIPE_CLEAR_COLOR;
- }
- }
- }
-
- if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) {
- /* clearing combined depth + stencil */
- struct st_renderbuffer *strb = st_renderbuffer(depthRb);
-
- if (strb->surface) {
- if (check_clear_depth_stencil_with_quad(ctx, depthRb))
- quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- else
- clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- }
- }
- else {
- /* separate depth/stencil clears */
- /* I don't think truly separate buffers are actually possible in gallium or hw? */
- if (mask & BUFFER_BIT_DEPTH) {
- struct st_renderbuffer *strb = st_renderbuffer(depthRb);
-
- if (strb->surface) {
- if (check_clear_depth_with_quad(ctx, depthRb,
- st->clear.enable_ds_separate))
- quad_buffers |= PIPE_CLEAR_DEPTH;
- else
- clear_buffers |= PIPE_CLEAR_DEPTH;
- }
- }
- if (mask & BUFFER_BIT_STENCIL) {
- struct st_renderbuffer *strb = st_renderbuffer(stencilRb);
-
- if (strb->surface) {
- if (check_clear_stencil_with_quad(ctx, stencilRb,
- st->clear.enable_ds_separate))
- quad_buffers |= PIPE_CLEAR_STENCIL;
- else
- clear_buffers |= PIPE_CLEAR_STENCIL;
- }
- }
- }
-
- /*
- * If we're going to use clear_with_quad() for any reason, use it for
- * everything possible.
- */
- if (quad_buffers) {
- quad_buffers |= clear_buffers;
- clear_with_quad(ctx,
- quad_buffers & PIPE_CLEAR_COLOR,
- quad_buffers & PIPE_CLEAR_DEPTH,
- quad_buffers & PIPE_CLEAR_STENCIL);
- } else if (clear_buffers) {
- /* driver cannot know it can clear everything if the buffer
- * is a combined depth/stencil buffer but this wasn't actually
- * required from the visual. Hence fix this up to avoid potential
- * read-modify-write in the driver.
- */
- if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) &&
- ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
- (depthRb == stencilRb) &&
- (ctx->DrawBuffer->Visual.depthBits == 0 ||
- ctx->DrawBuffer->Visual.stencilBits == 0))
- clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
- st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
- ctx->Depth.Clear, ctx->Stencil.Clear);
- }
- if (mask & BUFFER_BIT_ACCUM)
- st_clear_accum_buffer(ctx,
- ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
-}
-
-
-void
-st_init_clear_functions(struct dd_function_table *functions)
-{
- functions->Clear = st_Clear;
-}
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2009 VMware, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ * Michel Dänzer
+ */
+
+#include "main/glheader.h"
+#include "main/formats.h"
+#include "main/macros.h"
+#include "program/prog_instruction.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_cb_accum.h"
+#include "st_cb_clear.h"
+#include "st_cb_fbo.h"
+#include "st_program.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_simple_shaders.h"
+#include "util/u_draw_quad.h"
+
+#include "cso_cache/cso_context.h"
+
+
+/**
+ * Do per-context initialization for glClear.
+ */
+void
+st_init_clear(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *pscreen = st->pipe->screen;
+
+ memset(&st->clear, 0, sizeof(st->clear));
+
+ st->clear.raster.gl_rasterization_rules = 1;
+ st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE);
+
+ /* fragment shader state: color pass-through program */
+ st->clear.fs = util_make_fragment_passthrough_shader(pipe);
+
+ /* vertex shader state: color/position pass-through */
+ {
+ const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+ TGSI_SEMANTIC_COLOR };
+ const uint semantic_indexes[] = { 0, 0 };
+ st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2,
+ semantic_names,
+ semantic_indexes);
+ }
+}
+
+
+/**
+ * Free per-context state for glClear.
+ */
+void
+st_destroy_clear(struct st_context *st)
+{
+ if (st->clear.fs) {
+ cso_delete_fragment_shader(st->cso_context, st->clear.fs);
+ st->clear.fs = NULL;
+ }
+ if (st->clear.vs) {
+ cso_delete_vertex_shader(st->cso_context, st->clear.vs);
+ st->clear.vs = NULL;
+ }
+ if (st->clear.vbuf) {
+ pipe_resource_reference(&st->clear.vbuf, NULL);
+ st->clear.vbuf = NULL;
+ }
+}
+
+
+/**
+ * Draw a screen-aligned quadrilateral.
+ * Coords are clip coords with y=0=bottom.
+ */
+static void
+draw_quad(struct st_context *st,
+ float x0, float y0, float x1, float y1, GLfloat z,
+ const GLfloat color[4])
+{
+ struct pipe_context *pipe = st->pipe;
+
+ /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
+ * no_flush) updates to buffers where we know there is no conflict
+ * with previous data. Currently using max_slots > 1 will cause
+ * synchronous rendering if the driver flushes its command buffers
+ * between one bitmap and the next. Our flush hook below isn't
+ * sufficient to catch this as the driver doesn't tell us when it
+ * flushes its own command buffers. Until this gets fixed, pay the
+ * price of allocating a new buffer for each bitmap cache-flush to
+ * avoid synchronous rendering.
+ */
+ const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */
+ GLuint i;
+
+ if (st->clear.vbuf_slot >= max_slots) {
+ pipe_resource_reference(&st->clear.vbuf, NULL);
+ st->clear.vbuf_slot = 0;
+ }
+
+ if (!st->clear.vbuf) {
+ st->clear.vbuf = pipe_buffer_create(pipe->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
+ max_slots * sizeof(st->clear.vertices));
+ }
+
+ /* positions */
+ st->clear.vertices[0][0][0] = x0;
+ st->clear.vertices[0][0][1] = y0;
+
+ st->clear.vertices[1][0][0] = x1;
+ st->clear.vertices[1][0][1] = y0;
+
+ st->clear.vertices[2][0][0] = x1;
+ st->clear.vertices[2][0][1] = y1;
+
+ st->clear.vertices[3][0][0] = x0;
+ st->clear.vertices[3][0][1] = y1;
+
+ /* same for all verts: */
+ for (i = 0; i < 4; i++) {
+ st->clear.vertices[i][0][2] = z;
+ st->clear.vertices[i][0][3] = 1.0;
+ st->clear.vertices[i][1][0] = color[0];
+ st->clear.vertices[i][1][1] = color[1];
+ st->clear.vertices[i][1][2] = color[2];
+ st->clear.vertices[i][1][3] = color[3];
+ }
+
+ /* put vertex data into vbuf */
+ pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf,
+ st->clear.vbuf_slot
+ * sizeof(st->clear.vertices),
+ sizeof(st->clear.vertices),
+ st->clear.vertices);
+
+ /* draw */
+ util_draw_vertex_buffer(pipe,
+ st->cso_context,
+ st->clear.vbuf,
+ st->clear.vbuf_slot * sizeof(st->clear.vertices),
+ PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ 2); /* attribs/vert */
+
+ /* Increment slot */
+ st->clear.vbuf_slot++;
+}
+
+
+
+/**
+ * Do glClear by drawing a quadrilateral.
+ * The vertices of the quad will be computed from the
+ * ctx->DrawBuffer->_X/Ymin/max fields.
+ */
+static void
+clear_with_quad(struct gl_context *ctx,
+ GLboolean color, GLboolean depth, GLboolean stencil)
+{
+ struct st_context *st = st_context(ctx);
+ const struct gl_framebuffer *fb = ctx->DrawBuffer;
+ const GLfloat fb_width = (GLfloat) fb->Width;
+ const GLfloat fb_height = (GLfloat) fb->Height;
+ const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f;
+ const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
+ const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
+ const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
+
+ /*
+ printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__,
+ color ? "color, " : "",
+ depth ? "depth, " : "",
+ stencil ? "stencil" : "",
+ x0, y0,
+ x1, y1);
+ */
+
+ cso_save_blend(st->cso_context);
+ cso_save_stencil_ref(st->cso_context);
+ cso_save_depth_stencil_alpha(st->cso_context);
+ cso_save_rasterizer(st->cso_context);
+ cso_save_viewport(st->cso_context);
+ cso_save_clip(st->cso_context);
+ cso_save_fragment_shader(st->cso_context);
+ cso_save_vertex_shader(st->cso_context);
+ cso_save_vertex_elements(st->cso_context);
+ cso_save_vertex_buffers(st->cso_context);
+
+ /* blend state: RGBA masking */
+ {
+ struct pipe_blend_state blend;
+ memset(&blend, 0, sizeof(blend));
+ blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+ blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+ if (color) {
+ if (ctx->Color.ColorMask[0][0])
+ blend.rt[0].colormask |= PIPE_MASK_R;
+ if (ctx->Color.ColorMask[0][1])
+ blend.rt[0].colormask |= PIPE_MASK_G;
+ if (ctx->Color.ColorMask[0][2])
+ blend.rt[0].colormask |= PIPE_MASK_B;
+ if (ctx->Color.ColorMask[0][3])
+ blend.rt[0].colormask |= PIPE_MASK_A;
+ if (st->ctx->Color.DitherFlag)
+ blend.dither = 1;
+ }
+ cso_set_blend(st->cso_context, &blend);
+ }
+
+ /* depth_stencil state: always pass/set to ref value */
+ {
+ struct pipe_depth_stencil_alpha_state depth_stencil;
+ memset(&depth_stencil, 0, sizeof(depth_stencil));
+ if (depth) {
+ depth_stencil.depth.enabled = 1;
+ depth_stencil.depth.writemask = 1;
+ depth_stencil.depth.func = PIPE_FUNC_ALWAYS;
+ }
+
+ if (stencil) {
+ struct pipe_stencil_ref stencil_ref;
+ memset(&stencil_ref, 0, sizeof(stencil_ref));
+ depth_stencil.stencil[0].enabled = 1;
+ depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS;
+ depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
+ depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+ depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
+ depth_stencil.stencil[0].valuemask = 0xff;
+ depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
+ stencil_ref.ref_value[0] = ctx->Stencil.Clear;
+ cso_set_stencil_ref(st->cso_context, &stencil_ref);
+ }
+
+ cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
+ }
+
+ cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
+
+ cso_set_rasterizer(st->cso_context, &st->clear.raster);
+
+ /* viewport state: viewport matching window dims */
+ {
+ const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * fb_width;
+ vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
+ vp.scale[2] = 1.0f;
+ vp.scale[3] = 1.0f;
+ vp.translate[0] = 0.5f * fb_width;
+ vp.translate[1] = 0.5f * fb_height;
+ vp.translate[2] = 0.0f;
+ vp.translate[3] = 0.0f;
+ cso_set_viewport(st->cso_context, &vp);
+ }
+
+ cso_set_clip(st->cso_context, &st->clear.clip);
+ cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
+ cso_set_vertex_shader_handle(st->cso_context, st->clear.vs);
+
+ /* draw quad matching scissor rect (XXX verify coord round-off) */
+ draw_quad(st, x0, y0, x1, y1,
+ (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor);
+
+ /* Restore pipe state */
+ cso_restore_blend(st->cso_context);
+ cso_restore_stencil_ref(st->cso_context);
+ cso_restore_depth_stencil_alpha(st->cso_context);
+ cso_restore_rasterizer(st->cso_context);
+ cso_restore_viewport(st->cso_context);
+ cso_restore_clip(st->cso_context);
+ cso_restore_fragment_shader(st->cso_context);
+ cso_restore_vertex_shader(st->cso_context);
+ cso_restore_vertex_elements(st->cso_context);
+ cso_restore_vertex_buffers(st->cso_context);
+}
+
+
+/**
+ * Determine if we need to clear the depth buffer by drawing a quad.
+ */
+static INLINE GLboolean
+check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
+ return GL_TRUE;
+
+ if (!ctx->Color.ColorMask[0][0] ||
+ !ctx->Color.ColorMask[0][1] ||
+ !ctx->Color.ColorMask[0][2] ||
+ !ctx->Color.ColorMask[0][3])
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+
+/**
+ * Determine if we need to clear the combiend depth/stencil buffer by
+ * drawing a quad.
+ */
+static INLINE GLboolean
+check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
+{
+ const GLuint stencilMax = 0xff;
+ GLboolean maskStencil
+ = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
+
+ assert(rb->Format == MESA_FORMAT_S8 ||
+ rb->Format == MESA_FORMAT_Z24_S8 ||
+ rb->Format == MESA_FORMAT_S8_Z24);
+
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
+ return GL_TRUE;
+
+ if (maskStencil)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+
+/**
+ * Determine if we need to clear the depth buffer by drawing a quad.
+ */
+static INLINE GLboolean
+check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
+ boolean ds_separate)
+{
+ const struct st_renderbuffer *strb = st_renderbuffer(rb);
+ const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
+
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
+ return GL_TRUE;
+
+ if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+
+/**
+ * Determine if we need to clear the stencil buffer by drawing a quad.
+ */
+static INLINE GLboolean
+check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
+ boolean ds_separate)
+{
+ const struct st_renderbuffer *strb = st_renderbuffer(rb);
+ const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
+ const GLuint stencilMax = 0xff;
+ const GLboolean maskStencil
+ = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
+
+ assert(rb->Format == MESA_FORMAT_S8 ||
+ rb->Format == MESA_FORMAT_Z24_S8 ||
+ rb->Format == MESA_FORMAT_S8_Z24);
+
+ if (maskStencil)
+ return GL_TRUE;
+
+ if (ctx->Scissor.Enabled &&
+ (ctx->Scissor.X != 0 ||
+ ctx->Scissor.Y != 0 ||
+ ctx->Scissor.Width < rb->Width ||
+ ctx->Scissor.Height < rb->Height))
+ return GL_TRUE;
+
+ /* This is correct, but it is necessary to look at the depth clear
+ * value held in the surface when it comes time to issue the clear,
+ * rather than taking depth and stencil clear values from the
+ * current state.
+ */
+ if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+
+
+/**
+ * Called when we need to flush.
+ */
+void
+st_flush_clear(struct st_context *st)
+{
+ /* Release vertex buffer to avoid synchronous rendering if we were
+ * to map it in the next frame.
+ */
+ pipe_resource_reference(&st->clear.vbuf, NULL);
+ st->clear.vbuf_slot = 0;
+}
+
+
+
+/**
+ * Called via ctx->Driver.Clear()
+ */
+static void
+st_Clear(struct gl_context *ctx, GLbitfield mask)
+{
+ static const GLbitfield BUFFER_BITS_DS
+ = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+ struct st_context *st = st_context(ctx);
+ struct gl_renderbuffer *depthRb
+ = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+ struct gl_renderbuffer *stencilRb
+ = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+ GLbitfield quad_buffers = 0x0;
+ GLbitfield clear_buffers = 0x0;
+ GLuint i;
+
+ /* This makes sure the pipe has the latest scissor, etc values */
+ st_validate_state( st );
+
+ if (mask & BUFFER_BITS_COLOR) {
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
+
+ if (mask & (1 << b)) {
+ struct gl_renderbuffer *rb
+ = ctx->DrawBuffer->Attachment[b].Renderbuffer;
+ struct st_renderbuffer *strb = st_renderbuffer(rb);
+
+ if (!strb || !strb->surface)
+ continue;
+
+ if (check_clear_color_with_quad( ctx, rb ))
+ quad_buffers |= PIPE_CLEAR_COLOR;
+ else
+ clear_buffers |= PIPE_CLEAR_COLOR;
+ }
+ }
+ }
+
+ if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) {
+ /* clearing combined depth + stencil */
+ struct st_renderbuffer *strb = st_renderbuffer(depthRb);
+
+ if (strb->surface) {
+ if (check_clear_depth_stencil_with_quad(ctx, depthRb))
+ quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
+ else
+ clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
+ }
+ }
+ else {
+ /* separate depth/stencil clears */
+ /* I don't think truly separate buffers are actually possible in gallium or hw? */
+ if (mask & BUFFER_BIT_DEPTH) {
+ struct st_renderbuffer *strb = st_renderbuffer(depthRb);
+
+ if (strb->surface) {
+ if (check_clear_depth_with_quad(ctx, depthRb,
+ st->clear.enable_ds_separate))
+ quad_buffers |= PIPE_CLEAR_DEPTH;
+ else
+ clear_buffers |= PIPE_CLEAR_DEPTH;
+ }
+ }
+ if (mask & BUFFER_BIT_STENCIL) {
+ struct st_renderbuffer *strb = st_renderbuffer(stencilRb);
+
+ if (strb->surface) {
+ if (check_clear_stencil_with_quad(ctx, stencilRb,
+ st->clear.enable_ds_separate))
+ quad_buffers |= PIPE_CLEAR_STENCIL;
+ else
+ clear_buffers |= PIPE_CLEAR_STENCIL;
+ }
+ }
+ }
+
+ /*
+ * If we're going to use clear_with_quad() for any reason, use it for
+ * everything possible.
+ */
+ if (quad_buffers) {
+ quad_buffers |= clear_buffers;
+ clear_with_quad(ctx,
+ quad_buffers & PIPE_CLEAR_COLOR,
+ quad_buffers & PIPE_CLEAR_DEPTH,
+ quad_buffers & PIPE_CLEAR_STENCIL);
+ } else if (clear_buffers) {
+ /* driver cannot know it can clear everything if the buffer
+ * is a combined depth/stencil buffer but this wasn't actually
+ * required from the visual. Hence fix this up to avoid potential
+ * read-modify-write in the driver.
+ */
+ if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) &&
+ ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
+ (depthRb == stencilRb) &&
+ (ctx->DrawBuffer->Visual.depthBits == 0 ||
+ ctx->DrawBuffer->Visual.stencilBits == 0))
+ clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
+ st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
+ ctx->Depth.Clear, ctx->Stencil.Clear);
+ }
+ if (mask & BUFFER_BIT_ACCUM)
+ st_clear_accum_buffer(ctx,
+ ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
+}
+
+
+void
+st_init_clear_functions(struct dd_function_table *functions)
+{
+ functions->Clear = st_Clear;
+}
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
index 56c7e8581..07527002b 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -1,1368 +1,1371 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Brian Paul
- */
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "main/mtypes.h"
-#include "main/pack.h"
-#include "main/texformat.h"
-#include "main/texstore.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-#include "program/prog_instruction.h"
-
-#include "st_atom.h"
-#include "st_atom_constbuf.h"
-#include "st_cb_drawpixels.h"
-#include "st_cb_readpixels.h"
-#include "st_cb_fbo.h"
-#include "st_context.h"
-#include "st_debug.h"
-#include "st_format.h"
-#include "st_program.h"
-#include "st_texture.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "tgsi/tgsi_ureg.h"
-#include "util/u_draw_quad.h"
-#include "util/u_format.h"
-#include "util/u_inlines.h"
-#include "util/u_math.h"
-#include "util/u_tile.h"
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_drawpix
-
-/**
- * Check if the given program is:
- * 0: MOVE result.color, fragment.color;
- * 1: END;
- */
-static GLboolean
-is_passthrough_program(const struct gl_fragment_program *prog)
-{
- if (prog->Base.NumInstructions == 2) {
- const struct prog_instruction *inst = prog->Base.Instructions;
- if (inst[0].Opcode == OPCODE_MOV &&
- inst[1].Opcode == OPCODE_END &&
- inst[0].DstReg.File == PROGRAM_OUTPUT &&
- inst[0].DstReg.Index == FRAG_RESULT_COLOR &&
- inst[0].DstReg.WriteMask == WRITEMASK_XYZW &&
- inst[0].SrcReg[0].File == PROGRAM_INPUT &&
- inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 &&
- inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) {
- return GL_TRUE;
- }
- }
- return GL_FALSE;
-}
-
-
-
-/**
- * Make fragment shader for glDraw/CopyPixels. This shader is made
- * by combining the pixel transfer shader with the user-defined shader.
- * \param fpIn the current/incoming fragment program
- * \param fpOut returns the combined fragment program
- */
-void
-st_make_drawpix_fragment_program(struct st_context *st,
- struct gl_fragment_program *fpIn,
- struct gl_fragment_program **fpOut)
-{
- struct gl_program *newProg;
-
- if (is_passthrough_program(fpIn)) {
- newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
- &st->pixel_xfer.program->Base);
- }
- else {
-#if 0
- /* debug */
- printf("Base program:\n");
- _mesa_print_program(&fpIn->Base);
- printf("DrawPix program:\n");
- _mesa_print_program(&st->pixel_xfer.program->Base.Base);
-#endif
- newProg = _mesa_combine_programs(st->ctx,
- &st->pixel_xfer.program->Base.Base,
- &fpIn->Base);
- }
-
-#if 0
- /* debug */
- printf("Combined DrawPixels program:\n");
- _mesa_print_program(newProg);
- printf("InputsRead: 0x%x\n", newProg->InputsRead);
- printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
- _mesa_print_parameter_list(newProg->Parameters);
-#endif
-
- *fpOut = (struct gl_fragment_program *) newProg;
-}
-
-
-/**
- * Create fragment program that does a TEX() instruction to get a Z and/or
- * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
- * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
- * Pass fragment color through as-is.
- * \return pointer to the gl_fragment program
- */
-struct gl_fragment_program *
-st_make_drawpix_z_stencil_program(struct st_context *st,
- GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct gl_context *ctx = st->ctx;
- struct gl_program *p;
- struct gl_fragment_program *fp;
- GLuint ic = 0;
- const GLuint shaderIndex = write_depth * 2 + write_stencil;
-
- assert(shaderIndex < Elements(st->drawpix.shaders));
-
- if (st->drawpix.shaders[shaderIndex]) {
- /* already have the proper shader */
- return st->drawpix.shaders[shaderIndex];
- }
-
- /*
- * Create shader now
- */
- p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
- if (!p)
- return NULL;
-
- p->NumInstructions = write_depth ? 2 : 1;
- p->NumInstructions += write_stencil ? 1 : 0;
-
- p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
- if (!p->Instructions) {
- ctx->Driver.DeleteProgram(ctx, p);
- return NULL;
- }
- _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
- if (write_depth) {
- /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = 0;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- }
-
- if (write_stencil) {
- /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */
- p->Instructions[ic].Opcode = OPCODE_TEX;
- p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
- p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
- p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
- p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
- p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
- p->Instructions[ic].TexSrcUnit = 1;
- p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
- ic++;
- }
-
- /* END; */
- p->Instructions[ic++].Opcode = OPCODE_END;
-
- assert(ic == p->NumInstructions);
-
- p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0;
- p->OutputsWritten = 0;
- if (write_depth)
- p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH);
- if (write_stencil)
- p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL);
-
- p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
- if (write_stencil)
- p->SamplersUsed |= 1 << 1;
-
- fp = (struct gl_fragment_program *) p;
-
- /* save the new shader */
- st->drawpix.shaders[shaderIndex] = fp;
-
- return fp;
-}
-
-
-/**
- * Create a simple vertex shader that just passes through the
- * vertex position and texcoord (and optionally, color).
- */
-static void *
-make_passthrough_vertex_shader(struct st_context *st,
- GLboolean passColor)
-{
- if (!st->drawpix.vert_shaders[passColor]) {
- struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
-
- if (ureg == NULL)
- return NULL;
-
- /* MOV result.pos, vertex.pos; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
- ureg_DECL_vs_input( ureg, 0 ));
-
- /* MOV result.texcoord0, vertex.attr[1]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ),
- ureg_DECL_vs_input( ureg, 1 ));
-
- if (passColor) {
- /* MOV result.color0, vertex.attr[2]; */
- ureg_MOV(ureg,
- ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
- ureg_DECL_vs_input( ureg, 2 ));
- }
-
- ureg_END( ureg );
-
- st->drawpix.vert_shaders[passColor] =
- ureg_create_shader_and_destroy( ureg, st->pipe );
- }
-
- return st->drawpix.vert_shaders[passColor];
-}
-
-
-/**
- * Return a texture base format for drawing/copying an image
- * of the given format.
- */
-static GLenum
-base_format(GLenum format)
-{
- switch (format) {
- case GL_DEPTH_COMPONENT:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH_STENCIL:
- return GL_DEPTH_STENCIL;
- case GL_STENCIL_INDEX:
- return GL_STENCIL_INDEX;
- default:
- return GL_RGBA;
- }
-}
-
-
-/**
- * Return a texture internalFormat for drawing/copying an image
- * of the given format and type.
- */
-static GLenum
-internal_format(GLenum format, GLenum type)
-{
- switch (format) {
- case GL_DEPTH_COMPONENT:
- return GL_DEPTH_COMPONENT;
- case GL_DEPTH_STENCIL:
- return GL_DEPTH_STENCIL;
- case GL_STENCIL_INDEX:
- return GL_STENCIL_INDEX;
- default:
- if (_mesa_is_integer_format(format)) {
- switch (type) {
- case GL_BYTE:
- return GL_RGBA8I;
- case GL_UNSIGNED_BYTE:
- return GL_RGBA8UI;
- case GL_SHORT:
- return GL_RGBA16I;
- case GL_UNSIGNED_SHORT:
- return GL_RGBA16UI;
- case GL_INT:
- return GL_RGBA32I;
- case GL_UNSIGNED_INT:
- return GL_RGBA32UI;
- default:
- assert(0 && "Unexpected type in internal_format()");
- return GL_RGBA_INTEGER;
- }
- }
- else {
- return GL_RGBA;
- }
- }
-}
-
-
-/**
- * Create a temporary texture to hold an image of the given size.
- * If width, height are not POT and the driver only handles POT textures,
- * allocate the next larger size of texture that is POT.
- */
-static struct pipe_resource *
-alloc_texture(struct st_context *st, GLsizei width, GLsizei height,
- enum pipe_format texFormat)
-{
- struct pipe_resource *pt;
-
- pt = st_texture_create(st, st->internal_target, texFormat, 0,
- width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-
- return pt;
-}
-
-
-/**
- * Make texture containing an image for glDrawPixels image.
- * If 'pixels' is NULL, leave the texture image data undefined.
- */
-static struct pipe_resource *
-make_texture(struct st_context *st,
- GLsizei width, GLsizei height, GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct gl_context *ctx = st->ctx;
- struct pipe_context *pipe = st->pipe;
- gl_format mformat;
- struct pipe_resource *pt;
- enum pipe_format pipeFormat;
- GLuint cpp;
- GLenum baseFormat, intFormat;
-
- baseFormat = base_format(format);
- intFormat = internal_format(format, type);
-
- mformat = st_ChooseTextureFormat_renderable(ctx, intFormat,
- format, type, GL_FALSE);
- assert(mformat);
-
- pipeFormat = st_mesa_format_to_pipe_format(mformat);
- assert(pipeFormat);
- cpp = util_format_get_blocksize(pipeFormat);
-
- pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
- if (!pixels)
- return NULL;
-
- /* alloc temporary texture */
- pt = alloc_texture(st, width, height, pipeFormat);
- if (!pt) {
- _mesa_unmap_pbo_source(ctx, unpack);
- return NULL;
- }
-
- {
- struct pipe_transfer *transfer;
- static const GLuint dstImageOffsets = 0;
- GLboolean success;
- GLubyte *dest;
- const GLbitfield imageTransferStateSave = ctx->_ImageTransferState;
-
- /* we'll do pixel transfer in a fragment shader */
- ctx->_ImageTransferState = 0x0;
-
- transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
- PIPE_TRANSFER_WRITE, 0, 0,
- width, height);
-
- /* map texture transfer */
- dest = pipe_transfer_map(pipe, transfer);
-
-
- /* Put image into texture transfer.
- * Note that the image is actually going to be upside down in
- * the texture. We deal with that with texcoords.
- */
- success = _mesa_texstore(ctx, 2, /* dims */
- baseFormat, /* baseInternalFormat */
- mformat, /* gl_format */
- dest, /* dest */
- 0, 0, 0, /* dstX/Y/Zoffset */
- transfer->stride, /* dstRowStride, bytes */
- &dstImageOffsets, /* dstImageOffsets */
- width, height, 1, /* size */
- format, type, /* src format/type */
- pixels, /* data source */
- unpack);
-
- /* unmap */
- pipe_transfer_unmap(pipe, transfer);
- pipe->transfer_destroy(pipe, transfer);
-
- assert(success);
-
- /* restore */
- ctx->_ImageTransferState = imageTransferStateSave;
- }
-
- _mesa_unmap_pbo_source(ctx, unpack);
-
- return pt;
-}
-
-
-/**
- * Draw quad with texcoords and optional color.
- * Coords are gallium window coords with y=0=top.
- * \param color may be null
- * \param invertTex if true, flip texcoords vertically
- */
-static void
-draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
- GLfloat x1, GLfloat y1, const GLfloat *color,
- GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */
-
- /* setup vertex data */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat) fb->Width;
- const GLfloat fb_height = (GLfloat) fb->Height;
- const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f;
- const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f;
- const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f;
- const GLfloat sLeft = 0.0f, sRight = maxXcoord;
- const GLfloat tTop = invertTex ? maxYcoord : 0.0f;
- const GLfloat tBot = invertTex ? 0.0f : maxYcoord;
- GLuint i;
-
- /* upper-left */
- verts[0][0][0] = clip_x0; /* v[0].attr[0].x */
- verts[0][0][1] = clip_y0; /* v[0].attr[0].y */
-
- /* upper-right */
- verts[1][0][0] = clip_x1;
- verts[1][0][1] = clip_y0;
-
- /* lower-right */
- verts[2][0][0] = clip_x1;
- verts[2][0][1] = clip_y1;
-
- /* lower-left */
- verts[3][0][0] = clip_x0;
- verts[3][0][1] = clip_y1;
-
- verts[0][1][0] = sLeft; /* v[0].attr[1].S */
- verts[0][1][1] = tTop; /* v[0].attr[1].T */
- verts[1][1][0] = sRight;
- verts[1][1][1] = tTop;
- verts[2][1][0] = sRight;
- verts[2][1][1] = tBot;
- verts[3][1][0] = sLeft;
- verts[3][1][1] = tBot;
-
- /* same for all verts: */
- if (color) {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /* v[i].attr[0].z */
- verts[i][0][3] = 1.0f; /* v[i].attr[0].w */
- verts[i][2][0] = color[0]; /* v[i].attr[2].r */
- verts[i][2][1] = color[1]; /* v[i].attr[2].g */
- verts[i][2][2] = color[2]; /* v[i].attr[2].b */
- verts[i][2][3] = color[3]; /* v[i].attr[2].a */
- verts[i][1][2] = 0.0f; /* v[i].attr[1].R */
- verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */
- }
- }
- else {
- for (i = 0; i < 4; i++) {
- verts[i][0][2] = z; /*Z*/
- verts[i][0][3] = 1.0f; /*W*/
- verts[i][1][2] = 0.0f; /*R*/
- verts[i][1][3] = 1.0f; /*Q*/
- }
- }
- }
-
- {
- struct pipe_resource *buf;
-
- /* allocate/load buffer object with vertex data */
- buf = pipe_buffer_create(pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- sizeof(verts));
- pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts);
-
- util_draw_vertex_buffer(pipe, buf, 0,
- PIPE_PRIM_QUADS,
- 4, /* verts */
- 3); /* attribs/vert */
- pipe_resource_reference(&buf, NULL);
- }
-}
-
-
-
-static void
-draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
- GLsizei width, GLsizei height,
- GLfloat zoomX, GLfloat zoomY,
- struct pipe_sampler_view **sv,
- int num_sampler_view,
- void *driver_vp,
- void *driver_fp,
- const GLfloat *color,
- GLboolean invertTex,
- GLboolean write_depth, GLboolean write_stencil)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = st->cso_context;
- GLfloat x0, y0, x1, y1;
- GLsizei maxSize;
- boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT;
-
- /* limit checks */
- /* XXX if DrawPixels image is larger than max texture size, break
- * it up into chunks.
- */
- maxSize = 1 << (pipe->screen->get_param(pipe->screen,
- PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
- assert(width <= maxSize);
- assert(height <= maxSize);
-
- cso_save_rasterizer(cso);
- cso_save_viewport(cso);
- cso_save_samplers(cso);
- cso_save_fragment_sampler_views(cso);
- cso_save_fragment_shader(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
- if (write_stencil) {
- cso_save_depth_stencil_alpha(cso);
- cso_save_blend(cso);
- }
-
- /* rasterizer state: just scissor */
- {
- struct pipe_rasterizer_state rasterizer;
- memset(&rasterizer, 0, sizeof(rasterizer));
- rasterizer.gl_rasterization_rules = 1;
- rasterizer.scissor = ctx->Scissor.Enabled;
- cso_set_rasterizer(cso, &rasterizer);
- }
-
- if (write_stencil) {
- /* Stencil writing bypasses the normal fragment pipeline to
- * disable color writing and set stencil test to always pass.
- */
- struct pipe_depth_stencil_alpha_state dsa;
- struct pipe_blend_state blend;
-
- /* depth/stencil */
- memset(&dsa, 0, sizeof(dsa));
- dsa.stencil[0].enabled = 1;
- dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
- dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
- dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
- if (write_depth) {
- /* writing depth+stencil: depth test always passes */
- dsa.depth.enabled = 1;
- dsa.depth.writemask = ctx->Depth.Mask;
- dsa.depth.func = PIPE_FUNC_ALWAYS;
- }
- cso_set_depth_stencil_alpha(cso, &dsa);
-
- /* blend (colormask) */
- memset(&blend, 0, sizeof(blend));
- cso_set_blend(cso, &blend);
- }
-
- /* fragment shader state: TEX lookup program */
- cso_set_fragment_shader_handle(cso, driver_fp);
-
- /* vertex shader state: position + texcoord pass-through */
- cso_set_vertex_shader_handle(cso, driver_vp);
-
-
- /* texture sampling state: */
- {
- struct pipe_sampler_state sampler;
- memset(&sampler, 0, sizeof(sampler));
- sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
- sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
- sampler.wrap_r = PIPE_TEX_WRAP_CLAMP;
- sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
- sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
- sampler.normalized_coords = normalized;
-
- cso_single_sampler(cso, 0, &sampler);
- if (num_sampler_view > 1) {
- cso_single_sampler(cso, 1, &sampler);
- }
- cso_single_sampler_done(cso);
- }
-
- /* viewport state: viewport matching window dims */
- {
- const float w = (float) ctx->DrawBuffer->Width;
- const float h = (float) ctx->DrawBuffer->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * w;
- vp.scale[1] = -0.5f * h;
- vp.scale[2] = 0.5f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * w;
- vp.translate[1] = 0.5f * h;
- vp.translate[2] = 0.5f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
- cso_set_vertex_elements(cso, 3, st->velems_util_draw);
-
- /* texture state: */
- cso_set_fragment_sampler_views(cso, num_sampler_view, sv);
-
- /* Compute Gallium window coords (y=0=top) with pixel zoom.
- * Recall that these coords are transformed by the current
- * vertex shader and viewport transformation.
- */
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
- y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
- invertTex = !invertTex;
- }
-
- x0 = (GLfloat) x;
- x1 = x + width * ctx->Pixel.ZoomX;
- y0 = (GLfloat) y;
- y1 = y + height * ctx->Pixel.ZoomY;
-
- /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
- z = z * 2.0 - 1.0;
-
- draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
- normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
- normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
-
- /* restore state */
- cso_restore_rasterizer(cso);
- cso_restore_viewport(cso);
- cso_restore_samplers(cso);
- cso_restore_fragment_sampler_views(cso);
- cso_restore_fragment_shader(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
- if (write_stencil) {
- cso_restore_depth_stencil_alpha(cso);
- cso_restore_blend(cso);
- }
-}
-
-
-/**
- * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we
- * can't use a fragment shader to write stencil values.
- */
-static void
-draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height, GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack,
- const GLvoid *pixels)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct st_renderbuffer *strb;
- enum pipe_transfer_usage usage;
- struct pipe_transfer *pt;
- const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
- GLint skipPixels;
- ubyte *stmap;
- struct gl_pixelstore_attrib clippedUnpack = *unpack;
-
- if (!zoom) {
- if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
- &clippedUnpack)) {
- /* totally clipped */
- return;
- }
- }
-
- strb = st_renderbuffer(ctx->DrawBuffer->
- Attachment[BUFFER_STENCIL].Renderbuffer);
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- y = ctx->DrawBuffer->Height - y - height;
- }
-
- if(format != GL_DEPTH_STENCIL &&
- util_format_get_component_bits(strb->format,
- UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
- usage = PIPE_TRANSFER_READ_WRITE;
- else
- usage = PIPE_TRANSFER_WRITE;
-
- pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0,
- usage, x, y,
- width, height);
-
- stmap = pipe_transfer_map(pipe, pt);
-
- pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
- assert(pixels);
-
- /* if width > MAX_WIDTH, have to process image in chunks */
- skipPixels = 0;
- while (skipPixels < width) {
- const GLint spanX = skipPixels;
- const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
- GLint row;
- for (row = 0; row < height; row++) {
- GLubyte sValues[MAX_WIDTH];
- GLuint zValues[MAX_WIDTH];
- GLenum destType = GL_UNSIGNED_BYTE;
- const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels,
- width, height,
- format, type,
- row, skipPixels);
- _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues,
- type, source, &clippedUnpack,
- ctx->_ImageTransferState);
-
- if (format == GL_DEPTH_STENCIL) {
- _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues,
- (1 << 24) - 1, type, source,
- &clippedUnpack);
- }
-
- if (zoom) {
- _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with "
- "zoom not complete");
- }
-
- {
- GLint spanY;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- spanY = height - row - 1;
- }
- else {
- spanY = row;
- }
-
- /* now pack the stencil (and Z) values in the dest format */
- switch (pt->resource->format) {
- case PIPE_FORMAT_S8_USCALED:
- {
- ubyte *dest = stmap + spanY * pt->stride + spanX;
- assert(usage == PIPE_TRANSFER_WRITE);
- memcpy(dest, sValues, spanWidth);
- }
- break;
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- if (format == GL_DEPTH_STENCIL) {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = zValues[k] | (sValues[k] << 24);
- }
- }
- else {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24);
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- if (format == GL_DEPTH_STENCIL) {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff);
- }
- }
- else {
- uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
- GLint k;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (k = 0; k < spanWidth; k++) {
- dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff);
- }
- }
- break;
- default:
- assert(0);
- }
- }
- }
- skipPixels += spanWidth;
- }
-
- _mesa_unmap_pbo_source(ctx, &clippedUnpack);
-
- /* unmap the stencil buffer */
- pipe_transfer_unmap(pipe, pt);
- pipe->transfer_destroy(pipe, pt);
-}
-
-
-/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for RGBA data.
- */
-static struct st_fp_variant *
-get_color_fp_variant(struct st_context *st)
-{
- struct gl_context *ctx = st->ctx;
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
- ctx->Pixel.RedScale != 1.0 ||
- ctx->Pixel.GreenBias != 0.0 ||
- ctx->Pixel.GreenScale != 1.0 ||
- ctx->Pixel.BlueBias != 0.0 ||
- ctx->Pixel.BlueScale != 1.0 ||
- ctx->Pixel.AlphaBias != 0.0 ||
- ctx->Pixel.AlphaScale != 1.0);
- key.pixelMaps = ctx->Pixel.MapColorFlag;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
- * Get fragment program variant for a glDrawPixels or glCopyPixels
- * command for depth/stencil data.
- */
-static struct st_fp_variant *
-get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth,
- GLboolean write_stencil)
-{
- struct st_fp_variant_key key;
- struct st_fp_variant *fpv;
-
- memset(&key, 0, sizeof(key));
-
- key.st = st;
- key.drawpixels = 1;
- key.drawpixels_z = write_depth;
- key.drawpixels_stencil = write_stencil;
-
- fpv = st_get_fp_variant(st, st->fp, &key);
-
- return fpv;
-}
-
-
-/**
- * Called via ctx->Driver.DrawPixels()
- */
-static void
-st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
- GLsizei width, GLsizei height,
- GLenum format, GLenum type,
- const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels)
-{
- void *driver_vp, *driver_fp;
- struct st_context *st = st_context(ctx);
- const GLfloat *color;
- struct pipe_context *pipe = st->pipe;
- GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
- struct pipe_sampler_view *sv[2];
- int num_sampler_view = 1;
- enum pipe_format stencil_format = PIPE_FORMAT_NONE;
- struct st_fp_variant *fpv;
-
- if (format == GL_DEPTH_STENCIL)
- write_stencil = write_depth = GL_TRUE;
- else if (format == GL_STENCIL_INDEX)
- write_stencil = GL_TRUE;
- else if (format == GL_DEPTH_COMPONENT)
- write_depth = GL_TRUE;
-
- if (write_stencil) {
- enum pipe_format tex_format;
- /* can we write to stencil if not fallback */
- if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT))
- goto stencil_fallback;
-
- tex_format = st_choose_format(st->pipe->screen, base_format(format),
- PIPE_TEXTURE_2D,
- 0, PIPE_BIND_SAMPLER_VIEW);
- if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
- stencil_format = PIPE_FORMAT_X24S8_USCALED;
- else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM)
- stencil_format = PIPE_FORMAT_S8X24_USCALED;
- else
- stencil_format = PIPE_FORMAT_S8_USCALED;
- if (stencil_format == PIPE_FORMAT_NONE)
- goto stencil_fallback;
- }
-
- /* Mesa state should be up to date by now */
- assert(ctx->NewState == 0x0);
-
- st_validate_state(st);
-
- /*
- * Get vertex/fragment shaders
- */
- if (write_depth || write_stencil) {
- fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil);
-
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
-
- color = ctx->Current.RasterColor;
- }
- else {
- fpv = get_color_fp_variant(st);
-
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
-
- color = NULL;
- if (st->pixel_xfer.pixelmap_enabled) {
- sv[1] = st->pixel_xfer.pixelmap_sampler_view;
- num_sampler_view++;
- }
- }
-
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
- /* draw with textured quad */
- {
- struct pipe_resource *pt
- = make_texture(st, width, height, format, type, unpack, pixels);
- if (pt) {
- sv[0] = st_create_texture_sampler_view(st->pipe, pt);
-
- if (sv[0]) {
- if (write_stencil) {
- sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
- stencil_format);
- num_sampler_view++;
- }
-
- draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
- width, height,
- ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
- sv,
- num_sampler_view,
- driver_vp,
- driver_fp,
- color, GL_FALSE, write_depth, write_stencil);
- pipe_sampler_view_reference(&sv[0], NULL);
- if (num_sampler_view > 1)
- pipe_sampler_view_reference(&sv[1], NULL);
- }
- pipe_resource_reference(&pt, NULL);
- }
- }
- return;
-
-stencil_fallback:
- draw_stencil_pixels(ctx, x, y, width, height, format, type,
- unpack, pixels);
-}
-
-
-
-/**
- * Software fallback for glCopyPixels(GL_STENCIL).
- */
-static void
-copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty)
-{
- struct st_renderbuffer *rbDraw;
- struct pipe_context *pipe = st_context(ctx)->pipe;
- enum pipe_transfer_usage usage;
- struct pipe_transfer *ptDraw;
- ubyte *drawMap;
- ubyte *buffer;
- int i;
-
- buffer = malloc(width * height * sizeof(ubyte));
- if (!buffer) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)");
- return;
- }
-
- /* Get the dest renderbuffer. If there's a wrapper, use the
- * underlying renderbuffer.
- */
- rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer);
- if (rbDraw->Base.Wrapped)
- rbDraw = st_renderbuffer(rbDraw->Base.Wrapped);
-
- /* this will do stencil pixel transfer ops */
- st_read_stencil_pixels(ctx, srcx, srcy, width, height,
- GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
- &ctx->DefaultPacking, buffer);
-
- if (0) {
- /* debug code: dump stencil values */
- GLint row, col;
- for (row = 0; row < height; row++) {
- printf("%3d: ", row);
- for (col = 0; col < width; col++) {
- printf("%02x ", buffer[col + row * width]);
- }
- printf("\n");
- }
- }
-
- if (util_format_get_component_bits(rbDraw->format,
- UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
- usage = PIPE_TRANSFER_READ_WRITE;
- else
- usage = PIPE_TRANSFER_WRITE;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- dsty = rbDraw->Base.Height - dsty - height;
- }
-
- ptDraw = pipe_get_transfer(st_context(ctx)->pipe,
- rbDraw->texture, 0, 0,
- usage, dstx, dsty,
- width, height);
-
- assert(util_format_get_blockwidth(ptDraw->resource->format) == 1);
- assert(util_format_get_blockheight(ptDraw->resource->format) == 1);
-
- /* map the stencil buffer */
- drawMap = pipe_transfer_map(pipe, ptDraw);
-
- /* draw */
- /* XXX PixelZoom not handled yet */
- for (i = 0; i < height; i++) {
- ubyte *dst;
- const ubyte *src;
- int y;
-
- y = i;
-
- if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
- y = height - y - 1;
- }
-
- dst = drawMap + y * ptDraw->stride;
- src = buffer + i * width;
-
- switch (ptDraw->resource->format) {
- case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
- {
- uint *dst4 = (uint *) dst;
- int j;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (j = 0; j < width; j++) {
- *dst4 = (*dst4 & 0xffffff) | (src[j] << 24);
- dst4++;
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
- {
- uint *dst4 = (uint *) dst;
- int j;
- assert(usage == PIPE_TRANSFER_READ_WRITE);
- for (j = 0; j < width; j++) {
- *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff);
- dst4++;
- }
- }
- break;
- case PIPE_FORMAT_S8_USCALED:
- assert(usage == PIPE_TRANSFER_WRITE);
- memcpy(dst, src, width);
- break;
- default:
- assert(0);
- }
- }
-
- free(buffer);
-
- /* unmap the stencil buffer */
- pipe_transfer_unmap(pipe, ptDraw);
- pipe->transfer_destroy(pipe, ptDraw);
-}
-
-
-static void
-st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
- GLsizei width, GLsizei height,
- GLint dstx, GLint dsty, GLenum type)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
- struct st_renderbuffer *rbRead;
- void *driver_vp, *driver_fp;
- struct pipe_resource *pt;
- struct pipe_sampler_view *sv[2];
- int num_sampler_view = 1;
- GLfloat *color;
- enum pipe_format srcFormat, texFormat;
- GLboolean invertTex = GL_FALSE;
- GLint readX, readY, readW, readH;
- GLuint sample_count;
- struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
- struct st_fp_variant *fpv;
-
- st_validate_state(st);
-
- if (type == GL_STENCIL) {
- /* can't use texturing to do stencil */
- copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty);
- return;
- }
-
- /*
- * Get vertex/fragment shaders
- */
- if (type == GL_COLOR) {
- rbRead = st_get_color_read_renderbuffer(ctx);
- color = NULL;
-
- fpv = get_color_fp_variant(st);
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
-
- if (st->pixel_xfer.pixelmap_enabled) {
- sv[1] = st->pixel_xfer.pixelmap_sampler_view;
- num_sampler_view++;
- }
- }
- else {
- assert(type == GL_DEPTH);
- rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
- color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
-
- fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
- driver_fp = fpv->driver_shader;
-
- driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
- }
-
- /* update fragment program constants */
- st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-
-
- if (rbRead->Base.Wrapped)
- rbRead = st_renderbuffer(rbRead->Base.Wrapped);
-
- sample_count = rbRead->texture->nr_samples;
- /* I believe this would be legal, presumably would need to do a resolve
- for color, and for depth/stencil spec says to just use one of the
- depth/stencil samples per pixel? Need some transfer clarifications. */
- assert(sample_count < 2);
-
- srcFormat = rbRead->texture->format;
-
- if (screen->is_format_supported(screen, srcFormat, st->internal_target,
- sample_count,
- PIPE_BIND_SAMPLER_VIEW, 0)) {
- texFormat = srcFormat;
- }
- else {
- /* srcFormat can't be used as a texture format */
- if (type == GL_DEPTH) {
- texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT,
- st->internal_target, sample_count,
- PIPE_BIND_DEPTH_STENCIL);
- assert(texFormat != PIPE_FORMAT_NONE);
- }
- else {
- /* default color format */
- texFormat = st_choose_format(screen, GL_RGBA, st->internal_target,
- sample_count, PIPE_BIND_SAMPLER_VIEW);
- assert(texFormat != PIPE_FORMAT_NONE);
- }
- }
-
- /* Invert src region if needed */
- if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
- srcy = ctx->ReadBuffer->Height - srcy - height;
- invertTex = !invertTex;
- }
-
- /* Clip the read region against the src buffer bounds.
- * We'll still allocate a temporary buffer/texture for the original
- * src region size but we'll only read the region which is on-screen.
- * This may mean that we draw garbage pixels into the dest region, but
- * that's expected.
- */
- readX = srcx;
- readY = srcy;
- readW = width;
- readH = height;
- _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack);
- readW = MAX2(0, readW);
- readH = MAX2(0, readH);
-
- /* alloc temporary texture */
- pt = alloc_texture(st, width, height, texFormat);
- if (!pt)
- return;
-
- sv[0] = st_create_texture_sampler_view(st->pipe, pt);
- if (!sv[0]) {
- pipe_resource_reference(&pt, NULL);
- return;
- }
-
- /* Make temporary texture which is a copy of the src region.
- */
- if (srcFormat == texFormat) {
- struct pipe_box src_box;
- u_box_2d(readX, readY, readW, readH, &src_box);
- /* copy source framebuffer surface into mipmap/texture */
- pipe->resource_copy_region(pipe,
- pt, /* dest tex */
- 0,
- pack.SkipPixels, pack.SkipRows, 0, /* dest pos */
- rbRead->texture, /* src tex */
- 0,
- &src_box);
-
- }
- else {
- /* CPU-based fallback/conversion */
- struct pipe_transfer *ptRead =
- pipe_get_transfer(st->pipe, rbRead->texture, 0, 0,
- PIPE_TRANSFER_READ,
- readX, readY, readW, readH);
- struct pipe_transfer *ptTex;
- enum pipe_transfer_usage transfer_usage;
-
- if (ST_DEBUG & DEBUG_FALLBACK)
- debug_printf("%s: fallback processing\n", __FUNCTION__);
-
- if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format))
- transfer_usage = PIPE_TRANSFER_READ_WRITE;
- else
- transfer_usage = PIPE_TRANSFER_WRITE;
-
- ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage,
- 0, 0, width, height);
-
- /* copy image from ptRead surface to ptTex surface */
- if (type == GL_COLOR) {
- /* alternate path using get/put_tile() */
- GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
- enum pipe_format readFormat, drawFormat;
- readFormat = util_format_linear(rbRead->texture->format);
- drawFormat = util_format_linear(pt->format);
- pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH,
- readFormat, buf);
- pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
- readW, readH, drawFormat, buf);
- free(buf);
- }
- else {
- /* GL_DEPTH */
- GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
- pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf);
- pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
- readW, readH, buf);
- free(buf);
- }
-
- pipe->transfer_destroy(pipe, ptRead);
- pipe->transfer_destroy(pipe, ptTex);
- }
-
- /* OK, the texture 'pt' contains the src image/pixels. Now draw a
- * textured quad with that texture.
- */
- draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2],
- width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
- sv,
- num_sampler_view,
- driver_vp,
- driver_fp,
- color, invertTex, GL_FALSE, GL_FALSE);
-
- pipe_resource_reference(&pt, NULL);
- pipe_sampler_view_reference(&sv[0], NULL);
-}
-
-
-
-void st_init_drawpixels_functions(struct dd_function_table *functions)
-{
- functions->DrawPixels = st_DrawPixels;
- functions->CopyPixels = st_CopyPixels;
-}
-
-
-void
-st_destroy_drawpix(struct st_context *st)
-{
- GLuint i;
-
- for (i = 0; i < Elements(st->drawpix.shaders); i++) {
- if (st->drawpix.shaders[i])
- _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL);
- }
-
- st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
- if (st->drawpix.vert_shaders[0])
- ureg_free_tokens(st->drawpix.vert_shaders[0]);
- if (st->drawpix.vert_shaders[1])
- ureg_free_tokens(st->drawpix.vert_shaders[1]);
-}
-
-#endif /* FEATURE_drawpix */
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Brian Paul
+ */
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "main/mtypes.h"
+#include "main/pack.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "program/program.h"
+#include "program/prog_print.h"
+#include "program/prog_instruction.h"
+
+#include "st_atom.h"
+#include "st_atom_constbuf.h"
+#include "st_cb_drawpixels.h"
+#include "st_cb_readpixels.h"
+#include "st_cb_fbo.h"
+#include "st_context.h"
+#include "st_debug.h"
+#include "st_format.h"
+#include "st_program.h"
+#include "st_texture.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_ureg.h"
+#include "util/u_draw_quad.h"
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_math.h"
+#include "util/u_tile.h"
+#include "cso_cache/cso_context.h"
+
+
+#if FEATURE_drawpix
+
+/**
+ * Check if the given program is:
+ * 0: MOVE result.color, fragment.color;
+ * 1: END;
+ */
+static GLboolean
+is_passthrough_program(const struct gl_fragment_program *prog)
+{
+ if (prog->Base.NumInstructions == 2) {
+ const struct prog_instruction *inst = prog->Base.Instructions;
+ if (inst[0].Opcode == OPCODE_MOV &&
+ inst[1].Opcode == OPCODE_END &&
+ inst[0].DstReg.File == PROGRAM_OUTPUT &&
+ inst[0].DstReg.Index == FRAG_RESULT_COLOR &&
+ inst[0].DstReg.WriteMask == WRITEMASK_XYZW &&
+ inst[0].SrcReg[0].File == PROGRAM_INPUT &&
+ inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 &&
+ inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+ return GL_TRUE;
+ }
+ }
+ return GL_FALSE;
+}
+
+
+
+/**
+ * Make fragment shader for glDraw/CopyPixels. This shader is made
+ * by combining the pixel transfer shader with the user-defined shader.
+ * \param fpIn the current/incoming fragment program
+ * \param fpOut returns the combined fragment program
+ */
+void
+st_make_drawpix_fragment_program(struct st_context *st,
+ struct gl_fragment_program *fpIn,
+ struct gl_fragment_program **fpOut)
+{
+ struct gl_program *newProg;
+
+ if (is_passthrough_program(fpIn)) {
+ newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
+ &st->pixel_xfer.program->Base);
+ }
+ else {
+#if 0
+ /* debug */
+ printf("Base program:\n");
+ _mesa_print_program(&fpIn->Base);
+ printf("DrawPix program:\n");
+ _mesa_print_program(&st->pixel_xfer.program->Base.Base);
+#endif
+ newProg = _mesa_combine_programs(st->ctx,
+ &st->pixel_xfer.program->Base.Base,
+ &fpIn->Base);
+ }
+
+#if 0
+ /* debug */
+ printf("Combined DrawPixels program:\n");
+ _mesa_print_program(newProg);
+ printf("InputsRead: 0x%x\n", newProg->InputsRead);
+ printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
+ _mesa_print_parameter_list(newProg->Parameters);
+#endif
+
+ *fpOut = (struct gl_fragment_program *) newProg;
+}
+
+
+/**
+ * Create fragment program that does a TEX() instruction to get a Z and/or
+ * stencil value value, then writes to FRAG_RESULT_DEPTH/FRAG_RESULT_STENCIL.
+ * Used for glDrawPixels(GL_DEPTH_COMPONENT / GL_STENCIL_INDEX).
+ * Pass fragment color through as-is.
+ * \return pointer to the gl_fragment program
+ */
+struct gl_fragment_program *
+st_make_drawpix_z_stencil_program(struct st_context *st,
+ GLboolean write_depth,
+ GLboolean write_stencil)
+{
+ struct gl_context *ctx = st->ctx;
+ struct gl_program *p;
+ struct gl_fragment_program *fp;
+ GLuint ic = 0;
+ const GLuint shaderIndex = write_depth * 2 + write_stencil;
+
+ assert(shaderIndex < Elements(st->drawpix.shaders));
+
+ if (st->drawpix.shaders[shaderIndex]) {
+ /* already have the proper shader */
+ return st->drawpix.shaders[shaderIndex];
+ }
+
+ /*
+ * Create shader now
+ */
+ p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+ if (!p)
+ return NULL;
+
+ p->NumInstructions = write_depth ? 2 : 1;
+ p->NumInstructions += write_stencil ? 1 : 0;
+
+ p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
+ if (!p->Instructions) {
+ ctx->Driver.DeleteProgram(ctx, p);
+ return NULL;
+ }
+ _mesa_init_instructions(p->Instructions, p->NumInstructions);
+
+ if (write_depth) {
+ /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
+ p->Instructions[ic].Opcode = OPCODE_TEX;
+ p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
+ p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
+ p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
+ p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+ p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+ p->Instructions[ic].TexSrcUnit = 0;
+ p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+ ic++;
+ }
+
+ if (write_stencil) {
+ /* TEX result.stencil, fragment.texcoord[0], texture[0], 2D; */
+ p->Instructions[ic].Opcode = OPCODE_TEX;
+ p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
+ p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
+ p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
+ p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+ p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+ p->Instructions[ic].TexSrcUnit = 1;
+ p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+ ic++;
+ }
+
+ /* END; */
+ p->Instructions[ic++].Opcode = OPCODE_END;
+
+ assert(ic == p->NumInstructions);
+
+ p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0;
+ p->OutputsWritten = 0;
+ if (write_depth)
+ p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH);
+ if (write_stencil)
+ p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL);
+
+ p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */
+ if (write_stencil)
+ p->SamplersUsed |= 1 << 1;
+
+ fp = (struct gl_fragment_program *) p;
+
+ /* save the new shader */
+ st->drawpix.shaders[shaderIndex] = fp;
+
+ return fp;
+}
+
+
+/**
+ * Create a simple vertex shader that just passes through the
+ * vertex position and texcoord (and optionally, color).
+ */
+static void *
+make_passthrough_vertex_shader(struct st_context *st,
+ GLboolean passColor)
+{
+ if (!st->drawpix.vert_shaders[passColor]) {
+ struct ureg_program *ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
+
+ if (ureg == NULL)
+ return NULL;
+
+ /* MOV result.pos, vertex.pos; */
+ ureg_MOV(ureg,
+ ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, 0 ),
+ ureg_DECL_vs_input( ureg, 0 ));
+
+ /* MOV result.texcoord0, vertex.attr[1]; */
+ ureg_MOV(ureg,
+ ureg_DECL_output( ureg, TGSI_SEMANTIC_GENERIC, 0 ),
+ ureg_DECL_vs_input( ureg, 1 ));
+
+ if (passColor) {
+ /* MOV result.color0, vertex.attr[2]; */
+ ureg_MOV(ureg,
+ ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ),
+ ureg_DECL_vs_input( ureg, 2 ));
+ }
+
+ ureg_END( ureg );
+
+ st->drawpix.vert_shaders[passColor] =
+ ureg_create_shader_and_destroy( ureg, st->pipe );
+ }
+
+ return st->drawpix.vert_shaders[passColor];
+}
+
+
+/**
+ * Return a texture base format for drawing/copying an image
+ * of the given format.
+ */
+static GLenum
+base_format(GLenum format)
+{
+ switch (format) {
+ case GL_DEPTH_COMPONENT:
+ return GL_DEPTH_COMPONENT;
+ case GL_DEPTH_STENCIL:
+ return GL_DEPTH_STENCIL;
+ case GL_STENCIL_INDEX:
+ return GL_STENCIL_INDEX;
+ default:
+ return GL_RGBA;
+ }
+}
+
+
+/**
+ * Return a texture internalFormat for drawing/copying an image
+ * of the given format and type.
+ */
+static GLenum
+internal_format(GLenum format, GLenum type)
+{
+ switch (format) {
+ case GL_DEPTH_COMPONENT:
+ return GL_DEPTH_COMPONENT;
+ case GL_DEPTH_STENCIL:
+ return GL_DEPTH_STENCIL;
+ case GL_STENCIL_INDEX:
+ return GL_STENCIL_INDEX;
+ default:
+ if (_mesa_is_integer_format(format)) {
+ switch (type) {
+ case GL_BYTE:
+ return GL_RGBA8I;
+ case GL_UNSIGNED_BYTE:
+ return GL_RGBA8UI;
+ case GL_SHORT:
+ return GL_RGBA16I;
+ case GL_UNSIGNED_SHORT:
+ return GL_RGBA16UI;
+ case GL_INT:
+ return GL_RGBA32I;
+ case GL_UNSIGNED_INT:
+ return GL_RGBA32UI;
+ default:
+ assert(0 && "Unexpected type in internal_format()");
+ return GL_RGBA_INTEGER;
+ }
+ }
+ else {
+ return GL_RGBA;
+ }
+ }
+}
+
+
+/**
+ * Create a temporary texture to hold an image of the given size.
+ * If width, height are not POT and the driver only handles POT textures,
+ * allocate the next larger size of texture that is POT.
+ */
+static struct pipe_resource *
+alloc_texture(struct st_context *st, GLsizei width, GLsizei height,
+ enum pipe_format texFormat)
+{
+ struct pipe_resource *pt;
+
+ pt = st_texture_create(st, st->internal_target, texFormat, 0,
+ width, height, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+
+ return pt;
+}
+
+
+/**
+ * Make texture containing an image for glDrawPixels image.
+ * If 'pixels' is NULL, leave the texture image data undefined.
+ */
+static struct pipe_resource *
+make_texture(struct st_context *st,
+ GLsizei width, GLsizei height, GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
+{
+ struct gl_context *ctx = st->ctx;
+ struct pipe_context *pipe = st->pipe;
+ gl_format mformat;
+ struct pipe_resource *pt;
+ enum pipe_format pipeFormat;
+ GLuint cpp;
+ GLenum baseFormat, intFormat;
+
+ baseFormat = base_format(format);
+ intFormat = internal_format(format, type);
+
+ mformat = st_ChooseTextureFormat_renderable(ctx, intFormat,
+ format, type, GL_FALSE);
+ assert(mformat);
+
+ pipeFormat = st_mesa_format_to_pipe_format(mformat);
+ assert(pipeFormat);
+ cpp = util_format_get_blocksize(pipeFormat);
+
+ pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
+ if (!pixels)
+ return NULL;
+
+ /* alloc temporary texture */
+ pt = alloc_texture(st, width, height, pipeFormat);
+ if (!pt) {
+ _mesa_unmap_pbo_source(ctx, unpack);
+ return NULL;
+ }
+
+ {
+ struct pipe_transfer *transfer;
+ static const GLuint dstImageOffsets = 0;
+ GLboolean success;
+ GLubyte *dest;
+ const GLbitfield imageTransferStateSave = ctx->_ImageTransferState;
+
+ /* we'll do pixel transfer in a fragment shader */
+ ctx->_ImageTransferState = 0x0;
+
+ transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ width, height);
+
+ /* map texture transfer */
+ dest = pipe_transfer_map(pipe, transfer);
+
+
+ /* Put image into texture transfer.
+ * Note that the image is actually going to be upside down in
+ * the texture. We deal with that with texcoords.
+ */
+ success = _mesa_texstore(ctx, 2, /* dims */
+ baseFormat, /* baseInternalFormat */
+ mformat, /* gl_format */
+ dest, /* dest */
+ 0, 0, 0, /* dstX/Y/Zoffset */
+ transfer->stride, /* dstRowStride, bytes */
+ &dstImageOffsets, /* dstImageOffsets */
+ width, height, 1, /* size */
+ format, type, /* src format/type */
+ pixels, /* data source */
+ unpack);
+
+ /* unmap */
+ pipe_transfer_unmap(pipe, transfer);
+ pipe->transfer_destroy(pipe, transfer);
+
+ assert(success);
+
+ /* restore */
+ ctx->_ImageTransferState = imageTransferStateSave;
+ }
+
+ _mesa_unmap_pbo_source(ctx, unpack);
+
+ return pt;
+}
+
+
+/**
+ * Draw quad with texcoords and optional color.
+ * Coords are gallium window coords with y=0=top.
+ * \param color may be null
+ * \param invertTex if true, flip texcoords vertically
+ */
+static void
+draw_quad(struct gl_context *ctx, GLfloat x0, GLfloat y0, GLfloat z,
+ GLfloat x1, GLfloat y1, const GLfloat *color,
+ GLboolean invertTex, GLfloat maxXcoord, GLfloat maxYcoord)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */
+
+ /* setup vertex data */
+ {
+ const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const GLfloat fb_width = (GLfloat) fb->Width;
+ const GLfloat fb_height = (GLfloat) fb->Height;
+ const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f;
+ const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f;
+ const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f;
+ const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f;
+ const GLfloat sLeft = 0.0f, sRight = maxXcoord;
+ const GLfloat tTop = invertTex ? maxYcoord : 0.0f;
+ const GLfloat tBot = invertTex ? 0.0f : maxYcoord;
+ GLuint i;
+
+ /* upper-left */
+ verts[0][0][0] = clip_x0; /* v[0].attr[0].x */
+ verts[0][0][1] = clip_y0; /* v[0].attr[0].y */
+
+ /* upper-right */
+ verts[1][0][0] = clip_x1;
+ verts[1][0][1] = clip_y0;
+
+ /* lower-right */
+ verts[2][0][0] = clip_x1;
+ verts[2][0][1] = clip_y1;
+
+ /* lower-left */
+ verts[3][0][0] = clip_x0;
+ verts[3][0][1] = clip_y1;
+
+ verts[0][1][0] = sLeft; /* v[0].attr[1].S */
+ verts[0][1][1] = tTop; /* v[0].attr[1].T */
+ verts[1][1][0] = sRight;
+ verts[1][1][1] = tTop;
+ verts[2][1][0] = sRight;
+ verts[2][1][1] = tBot;
+ verts[3][1][0] = sLeft;
+ verts[3][1][1] = tBot;
+
+ /* same for all verts: */
+ if (color) {
+ for (i = 0; i < 4; i++) {
+ verts[i][0][2] = z; /* v[i].attr[0].z */
+ verts[i][0][3] = 1.0f; /* v[i].attr[0].w */
+ verts[i][2][0] = color[0]; /* v[i].attr[2].r */
+ verts[i][2][1] = color[1]; /* v[i].attr[2].g */
+ verts[i][2][2] = color[2]; /* v[i].attr[2].b */
+ verts[i][2][3] = color[3]; /* v[i].attr[2].a */
+ verts[i][1][2] = 0.0f; /* v[i].attr[1].R */
+ verts[i][1][3] = 1.0f; /* v[i].attr[1].Q */
+ }
+ }
+ else {
+ for (i = 0; i < 4; i++) {
+ verts[i][0][2] = z; /*Z*/
+ verts[i][0][3] = 1.0f; /*W*/
+ verts[i][1][2] = 0.0f; /*R*/
+ verts[i][1][3] = 1.0f; /*Q*/
+ }
+ }
+ }
+
+ {
+ struct pipe_resource *buf;
+
+ /* allocate/load buffer object with vertex data */
+ buf = pipe_buffer_create(pipe->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STATIC,
+ sizeof(verts));
+ pipe_buffer_write(st->pipe, buf, 0, sizeof(verts), verts);
+
+ util_draw_vertex_buffer(pipe, st->cso_context, buf, 0,
+ PIPE_PRIM_QUADS,
+ 4, /* verts */
+ 3); /* attribs/vert */
+ pipe_resource_reference(&buf, NULL);
+ }
+}
+
+
+
+static void
+draw_textured_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
+ GLsizei width, GLsizei height,
+ GLfloat zoomX, GLfloat zoomY,
+ struct pipe_sampler_view **sv,
+ int num_sampler_view,
+ void *driver_vp,
+ void *driver_fp,
+ const GLfloat *color,
+ GLboolean invertTex,
+ GLboolean write_depth, GLboolean write_stencil)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct cso_context *cso = st->cso_context;
+ GLfloat x0, y0, x1, y1;
+ GLsizei maxSize;
+ boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT;
+
+ /* limit checks */
+ /* XXX if DrawPixels image is larger than max texture size, break
+ * it up into chunks.
+ */
+ maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+ PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
+ assert(width <= maxSize);
+ assert(height <= maxSize);
+
+ cso_save_rasterizer(cso);
+ cso_save_viewport(cso);
+ cso_save_samplers(cso);
+ cso_save_fragment_sampler_views(cso);
+ cso_save_fragment_shader(cso);
+ cso_save_vertex_shader(cso);
+ cso_save_vertex_elements(cso);
+ cso_save_vertex_buffers(cso);
+ if (write_stencil) {
+ cso_save_depth_stencil_alpha(cso);
+ cso_save_blend(cso);
+ }
+
+ /* rasterizer state: just scissor */
+ {
+ struct pipe_rasterizer_state rasterizer;
+ memset(&rasterizer, 0, sizeof(rasterizer));
+ rasterizer.gl_rasterization_rules = 1;
+ rasterizer.scissor = ctx->Scissor.Enabled;
+ cso_set_rasterizer(cso, &rasterizer);
+ }
+
+ if (write_stencil) {
+ /* Stencil writing bypasses the normal fragment pipeline to
+ * disable color writing and set stencil test to always pass.
+ */
+ struct pipe_depth_stencil_alpha_state dsa;
+ struct pipe_blend_state blend;
+
+ /* depth/stencil */
+ memset(&dsa, 0, sizeof(dsa));
+ dsa.stencil[0].enabled = 1;
+ dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
+ dsa.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
+ dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+ if (write_depth) {
+ /* writing depth+stencil: depth test always passes */
+ dsa.depth.enabled = 1;
+ dsa.depth.writemask = ctx->Depth.Mask;
+ dsa.depth.func = PIPE_FUNC_ALWAYS;
+ }
+ cso_set_depth_stencil_alpha(cso, &dsa);
+
+ /* blend (colormask) */
+ memset(&blend, 0, sizeof(blend));
+ cso_set_blend(cso, &blend);
+ }
+
+ /* fragment shader state: TEX lookup program */
+ cso_set_fragment_shader_handle(cso, driver_fp);
+
+ /* vertex shader state: position + texcoord pass-through */
+ cso_set_vertex_shader_handle(cso, driver_vp);
+
+
+ /* texture sampling state: */
+ {
+ struct pipe_sampler_state sampler;
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP;
+ sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+ sampler.normalized_coords = normalized;
+
+ cso_single_sampler(cso, 0, &sampler);
+ if (num_sampler_view > 1) {
+ cso_single_sampler(cso, 1, &sampler);
+ }
+ cso_single_sampler_done(cso);
+ }
+
+ /* viewport state: viewport matching window dims */
+ {
+ const float w = (float) ctx->DrawBuffer->Width;
+ const float h = (float) ctx->DrawBuffer->Height;
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * w;
+ vp.scale[1] = -0.5f * h;
+ vp.scale[2] = 0.5f;
+ vp.scale[3] = 1.0f;
+ vp.translate[0] = 0.5f * w;
+ vp.translate[1] = 0.5f * h;
+ vp.translate[2] = 0.5f;
+ vp.translate[3] = 0.0f;
+ cso_set_viewport(cso, &vp);
+ }
+
+ cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
+ /* texture state: */
+ cso_set_fragment_sampler_views(cso, num_sampler_view, sv);
+
+ /* Compute Gallium window coords (y=0=top) with pixel zoom.
+ * Recall that these coords are transformed by the current
+ * vertex shader and viewport transformation.
+ */
+ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_BOTTOM) {
+ y = ctx->DrawBuffer->Height - (int) (y + height * ctx->Pixel.ZoomY);
+ invertTex = !invertTex;
+ }
+
+ x0 = (GLfloat) x;
+ x1 = x + width * ctx->Pixel.ZoomX;
+ y0 = (GLfloat) y;
+ y1 = y + height * ctx->Pixel.ZoomY;
+
+ /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+ z = z * 2.0 - 1.0;
+
+ draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
+ normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
+ normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
+
+ /* restore state */
+ cso_restore_rasterizer(cso);
+ cso_restore_viewport(cso);
+ cso_restore_samplers(cso);
+ cso_restore_fragment_sampler_views(cso);
+ cso_restore_fragment_shader(cso);
+ cso_restore_vertex_shader(cso);
+ cso_restore_vertex_elements(cso);
+ cso_restore_vertex_buffers(cso);
+ if (write_stencil) {
+ cso_restore_depth_stencil_alpha(cso);
+ cso_restore_blend(cso);
+ }
+}
+
+
+/**
+ * Software fallback to do glDrawPixels(GL_STENCIL_INDEX) when we
+ * can't use a fragment shader to write stencil values.
+ */
+static void
+draw_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height, GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack,
+ const GLvoid *pixels)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct st_renderbuffer *strb;
+ enum pipe_transfer_usage usage;
+ struct pipe_transfer *pt;
+ const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+ GLint skipPixels;
+ ubyte *stmap;
+ struct gl_pixelstore_attrib clippedUnpack = *unpack;
+
+ if (!zoom) {
+ if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
+ &clippedUnpack)) {
+ /* totally clipped */
+ return;
+ }
+ }
+
+ strb = st_renderbuffer(ctx->DrawBuffer->
+ Attachment[BUFFER_STENCIL].Renderbuffer);
+
+ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
+ y = ctx->DrawBuffer->Height - y - height;
+ }
+
+ if(format != GL_DEPTH_STENCIL &&
+ util_format_get_component_bits(strb->format,
+ UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
+ usage = PIPE_TRANSFER_READ_WRITE;
+ else
+ usage = PIPE_TRANSFER_WRITE;
+
+ pt = pipe_get_transfer(st_context(ctx)->pipe, strb->texture, 0, 0,
+ usage, x, y,
+ width, height);
+
+ stmap = pipe_transfer_map(pipe, pt);
+
+ pixels = _mesa_map_pbo_source(ctx, &clippedUnpack, pixels);
+ assert(pixels);
+
+ /* if width > MAX_WIDTH, have to process image in chunks */
+ skipPixels = 0;
+ while (skipPixels < width) {
+ const GLint spanX = skipPixels;
+ const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+ GLint row;
+ for (row = 0; row < height; row++) {
+ GLubyte sValues[MAX_WIDTH];
+ GLuint zValues[MAX_WIDTH];
+ GLenum destType = GL_UNSIGNED_BYTE;
+ const GLvoid *source = _mesa_image_address2d(&clippedUnpack, pixels,
+ width, height,
+ format, type,
+ row, skipPixels);
+ _mesa_unpack_stencil_span(ctx, spanWidth, destType, sValues,
+ type, source, &clippedUnpack,
+ ctx->_ImageTransferState);
+
+ if (format == GL_DEPTH_STENCIL) {
+ _mesa_unpack_depth_span(ctx, spanWidth, GL_UNSIGNED_INT, zValues,
+ (1 << 24) - 1, type, source,
+ &clippedUnpack);
+ }
+
+ if (zoom) {
+ _mesa_problem(ctx, "Gallium glDrawPixels(GL_STENCIL) with "
+ "zoom not complete");
+ }
+
+ {
+ GLint spanY;
+
+ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
+ spanY = height - row - 1;
+ }
+ else {
+ spanY = row;
+ }
+
+ /* now pack the stencil (and Z) values in the dest format */
+ switch (pt->resource->format) {
+ case PIPE_FORMAT_S8_USCALED:
+ {
+ ubyte *dest = stmap + spanY * pt->stride + spanX;
+ assert(usage == PIPE_TRANSFER_WRITE);
+ memcpy(dest, sValues, spanWidth);
+ }
+ break;
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ if (format == GL_DEPTH_STENCIL) {
+ uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+ GLint k;
+ assert(usage == PIPE_TRANSFER_WRITE);
+ for (k = 0; k < spanWidth; k++) {
+ dest[k] = zValues[k] | (sValues[k] << 24);
+ }
+ }
+ else {
+ uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+ GLint k;
+ assert(usage == PIPE_TRANSFER_READ_WRITE);
+ for (k = 0; k < spanWidth; k++) {
+ dest[k] = (dest[k] & 0xffffff) | (sValues[k] << 24);
+ }
+ }
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ if (format == GL_DEPTH_STENCIL) {
+ uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+ GLint k;
+ assert(usage == PIPE_TRANSFER_WRITE);
+ for (k = 0; k < spanWidth; k++) {
+ dest[k] = (zValues[k] << 8) | (sValues[k] & 0xff);
+ }
+ }
+ else {
+ uint *dest = (uint *) (stmap + spanY * pt->stride + spanX*4);
+ GLint k;
+ assert(usage == PIPE_TRANSFER_READ_WRITE);
+ for (k = 0; k < spanWidth; k++) {
+ dest[k] = (dest[k] & 0xffffff00) | (sValues[k] & 0xff);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ }
+ }
+ }
+ skipPixels += spanWidth;
+ }
+
+ _mesa_unmap_pbo_source(ctx, &clippedUnpack);
+
+ /* unmap the stencil buffer */
+ pipe_transfer_unmap(pipe, pt);
+ pipe->transfer_destroy(pipe, pt);
+}
+
+
+/**
+ * Get fragment program variant for a glDrawPixels or glCopyPixels
+ * command for RGBA data.
+ */
+static struct st_fp_variant *
+get_color_fp_variant(struct st_context *st)
+{
+ struct gl_context *ctx = st->ctx;
+ struct st_fp_variant_key key;
+ struct st_fp_variant *fpv;
+
+ memset(&key, 0, sizeof(key));
+
+ key.st = st;
+ key.drawpixels = 1;
+ key.scaleAndBias = (ctx->Pixel.RedBias != 0.0 ||
+ ctx->Pixel.RedScale != 1.0 ||
+ ctx->Pixel.GreenBias != 0.0 ||
+ ctx->Pixel.GreenScale != 1.0 ||
+ ctx->Pixel.BlueBias != 0.0 ||
+ ctx->Pixel.BlueScale != 1.0 ||
+ ctx->Pixel.AlphaBias != 0.0 ||
+ ctx->Pixel.AlphaScale != 1.0);
+ key.pixelMaps = ctx->Pixel.MapColorFlag;
+
+ fpv = st_get_fp_variant(st, st->fp, &key);
+
+ return fpv;
+}
+
+
+/**
+ * Get fragment program variant for a glDrawPixels or glCopyPixels
+ * command for depth/stencil data.
+ */
+static struct st_fp_variant *
+get_depth_stencil_fp_variant(struct st_context *st, GLboolean write_depth,
+ GLboolean write_stencil)
+{
+ struct st_fp_variant_key key;
+ struct st_fp_variant *fpv;
+
+ memset(&key, 0, sizeof(key));
+
+ key.st = st;
+ key.drawpixels = 1;
+ key.drawpixels_z = write_depth;
+ key.drawpixels_stencil = write_stencil;
+
+ fpv = st_get_fp_variant(st, st->fp, &key);
+
+ return fpv;
+}
+
+
+/**
+ * Called via ctx->Driver.DrawPixels()
+ */
+static void
+st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height,
+ GLenum format, GLenum type,
+ const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels)
+{
+ void *driver_vp, *driver_fp;
+ struct st_context *st = st_context(ctx);
+ const GLfloat *color;
+ struct pipe_context *pipe = st->pipe;
+ GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
+ struct pipe_sampler_view *sv[2];
+ int num_sampler_view = 1;
+ enum pipe_format stencil_format = PIPE_FORMAT_NONE;
+ struct st_fp_variant *fpv;
+
+ if (format == GL_DEPTH_STENCIL)
+ write_stencil = write_depth = GL_TRUE;
+ else if (format == GL_STENCIL_INDEX)
+ write_stencil = GL_TRUE;
+ else if (format == GL_DEPTH_COMPONENT)
+ write_depth = GL_TRUE;
+
+ if (write_stencil) {
+ enum pipe_format tex_format;
+ /* can we write to stencil if not fallback */
+ if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT))
+ goto stencil_fallback;
+
+ tex_format = st_choose_format(st->pipe->screen, base_format(format),
+ PIPE_TEXTURE_2D,
+ 0, PIPE_BIND_SAMPLER_VIEW);
+ if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
+ stencil_format = PIPE_FORMAT_X24S8_USCALED;
+ else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM)
+ stencil_format = PIPE_FORMAT_S8X24_USCALED;
+ else
+ stencil_format = PIPE_FORMAT_S8_USCALED;
+ if (stencil_format == PIPE_FORMAT_NONE)
+ goto stencil_fallback;
+ }
+
+ /* Mesa state should be up to date by now */
+ assert(ctx->NewState == 0x0);
+
+ st_validate_state(st);
+
+ /*
+ * Get vertex/fragment shaders
+ */
+ if (write_depth || write_stencil) {
+ fpv = get_depth_stencil_fp_variant(st, write_depth, write_stencil);
+
+ driver_fp = fpv->driver_shader;
+
+ driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
+
+ color = ctx->Current.RasterColor;
+ }
+ else {
+ fpv = get_color_fp_variant(st);
+
+ driver_fp = fpv->driver_shader;
+
+ driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
+
+ color = NULL;
+ if (st->pixel_xfer.pixelmap_enabled) {
+ sv[1] = st->pixel_xfer.pixelmap_sampler_view;
+ num_sampler_view++;
+ }
+ }
+
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+
+ /* draw with textured quad */
+ {
+ struct pipe_resource *pt
+ = make_texture(st, width, height, format, type, unpack, pixels);
+ if (pt) {
+ sv[0] = st_create_texture_sampler_view(st->pipe, pt);
+
+ if (sv[0]) {
+ if (write_stencil) {
+ sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
+ stencil_format);
+ num_sampler_view++;
+ }
+
+ draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
+ width, height,
+ ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
+ sv,
+ num_sampler_view,
+ driver_vp,
+ driver_fp,
+ color, GL_FALSE, write_depth, write_stencil);
+ pipe_sampler_view_reference(&sv[0], NULL);
+ if (num_sampler_view > 1)
+ pipe_sampler_view_reference(&sv[1], NULL);
+ }
+ pipe_resource_reference(&pt, NULL);
+ }
+ }
+ return;
+
+stencil_fallback:
+ draw_stencil_pixels(ctx, x, y, width, height, format, type,
+ unpack, pixels);
+}
+
+
+
+/**
+ * Software fallback for glCopyPixels(GL_STENCIL).
+ */
+static void
+copy_stencil_pixels(struct gl_context *ctx, GLint srcx, GLint srcy,
+ GLsizei width, GLsizei height,
+ GLint dstx, GLint dsty)
+{
+ struct st_renderbuffer *rbDraw;
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ enum pipe_transfer_usage usage;
+ struct pipe_transfer *ptDraw;
+ ubyte *drawMap;
+ ubyte *buffer;
+ int i;
+
+ buffer = malloc(width * height * sizeof(ubyte));
+ if (!buffer) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)");
+ return;
+ }
+
+ /* Get the dest renderbuffer. If there's a wrapper, use the
+ * underlying renderbuffer.
+ */
+ rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer);
+ if (rbDraw->Base.Wrapped)
+ rbDraw = st_renderbuffer(rbDraw->Base.Wrapped);
+
+ /* this will do stencil pixel transfer ops */
+ st_read_stencil_pixels(ctx, srcx, srcy, width, height,
+ GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
+ &ctx->DefaultPacking, buffer);
+
+ if (0) {
+ /* debug code: dump stencil values */
+ GLint row, col;
+ for (row = 0; row < height; row++) {
+ printf("%3d: ", row);
+ for (col = 0; col < width; col++) {
+ printf("%02x ", buffer[col + row * width]);
+ }
+ printf("\n");
+ }
+ }
+
+ if (util_format_get_component_bits(rbDraw->format,
+ UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
+ usage = PIPE_TRANSFER_READ_WRITE;
+ else
+ usage = PIPE_TRANSFER_WRITE;
+
+ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
+ dsty = rbDraw->Base.Height - dsty - height;
+ }
+
+ ptDraw = pipe_get_transfer(st_context(ctx)->pipe,
+ rbDraw->texture, 0, 0,
+ usage, dstx, dsty,
+ width, height);
+
+ assert(util_format_get_blockwidth(ptDraw->resource->format) == 1);
+ assert(util_format_get_blockheight(ptDraw->resource->format) == 1);
+
+ /* map the stencil buffer */
+ drawMap = pipe_transfer_map(pipe, ptDraw);
+
+ /* draw */
+ /* XXX PixelZoom not handled yet */
+ for (i = 0; i < height; i++) {
+ ubyte *dst;
+ const ubyte *src;
+ int y;
+
+ y = i;
+
+ if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
+ y = height - y - 1;
+ }
+
+ dst = drawMap + y * ptDraw->stride;
+ src = buffer + i * width;
+
+ switch (ptDraw->resource->format) {
+ case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
+ {
+ uint *dst4 = (uint *) dst;
+ int j;
+ assert(usage == PIPE_TRANSFER_READ_WRITE);
+ for (j = 0; j < width; j++) {
+ *dst4 = (*dst4 & 0xffffff) | (src[j] << 24);
+ dst4++;
+ }
+ }
+ break;
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ {
+ uint *dst4 = (uint *) dst;
+ int j;
+ assert(usage == PIPE_TRANSFER_READ_WRITE);
+ for (j = 0; j < width; j++) {
+ *dst4 = (*dst4 & 0xffffff00) | (src[j] & 0xff);
+ dst4++;
+ }
+ }
+ break;
+ case PIPE_FORMAT_S8_USCALED:
+ assert(usage == PIPE_TRANSFER_WRITE);
+ memcpy(dst, src, width);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ free(buffer);
+
+ /* unmap the stencil buffer */
+ pipe_transfer_unmap(pipe, ptDraw);
+ pipe->transfer_destroy(pipe, ptDraw);
+}
+
+
+static void
+st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
+ GLsizei width, GLsizei height,
+ GLint dstx, GLint dsty, GLenum type)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct st_renderbuffer *rbRead;
+ void *driver_vp, *driver_fp;
+ struct pipe_resource *pt;
+ struct pipe_sampler_view *sv[2];
+ int num_sampler_view = 1;
+ GLfloat *color;
+ enum pipe_format srcFormat, texFormat;
+ GLboolean invertTex = GL_FALSE;
+ GLint readX, readY, readW, readH;
+ GLuint sample_count;
+ struct gl_pixelstore_attrib pack = ctx->DefaultPacking;
+ struct st_fp_variant *fpv;
+
+ st_validate_state(st);
+
+ if (type == GL_STENCIL) {
+ /* can't use texturing to do stencil */
+ copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty);
+ return;
+ }
+
+ /*
+ * Get vertex/fragment shaders
+ */
+ if (type == GL_COLOR) {
+ rbRead = st_get_color_read_renderbuffer(ctx);
+ color = NULL;
+
+ fpv = get_color_fp_variant(st);
+ driver_fp = fpv->driver_shader;
+
+ driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
+
+ if (st->pixel_xfer.pixelmap_enabled) {
+ sv[1] = st->pixel_xfer.pixelmap_sampler_view;
+ num_sampler_view++;
+ }
+ }
+ else {
+ assert(type == GL_DEPTH);
+ rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
+ color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+
+ fpv = get_depth_stencil_fp_variant(st, GL_TRUE, GL_FALSE);
+ driver_fp = fpv->driver_shader;
+
+ driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
+ }
+
+ /* update fragment program constants */
+ st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+
+
+ if (rbRead->Base.Wrapped)
+ rbRead = st_renderbuffer(rbRead->Base.Wrapped);
+
+ sample_count = rbRead->texture->nr_samples;
+ /* I believe this would be legal, presumably would need to do a resolve
+ for color, and for depth/stencil spec says to just use one of the
+ depth/stencil samples per pixel? Need some transfer clarifications. */
+ assert(sample_count < 2);
+
+ srcFormat = rbRead->texture->format;
+
+ if (screen->is_format_supported(screen, srcFormat, st->internal_target,
+ sample_count,
+ PIPE_BIND_SAMPLER_VIEW, 0)) {
+ texFormat = srcFormat;
+ }
+ else {
+ /* srcFormat can't be used as a texture format */
+ if (type == GL_DEPTH) {
+ texFormat = st_choose_format(screen, GL_DEPTH_COMPONENT,
+ st->internal_target, sample_count,
+ PIPE_BIND_DEPTH_STENCIL);
+ assert(texFormat != PIPE_FORMAT_NONE);
+ }
+ else {
+ /* default color format */
+ texFormat = st_choose_format(screen, GL_RGBA, st->internal_target,
+ sample_count, PIPE_BIND_SAMPLER_VIEW);
+ assert(texFormat != PIPE_FORMAT_NONE);
+ }
+ }
+
+ /* Invert src region if needed */
+ if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+ srcy = ctx->ReadBuffer->Height - srcy - height;
+ invertTex = !invertTex;
+ }
+
+ /* Clip the read region against the src buffer bounds.
+ * We'll still allocate a temporary buffer/texture for the original
+ * src region size but we'll only read the region which is on-screen.
+ * This may mean that we draw garbage pixels into the dest region, but
+ * that's expected.
+ */
+ readX = srcx;
+ readY = srcy;
+ readW = width;
+ readH = height;
+ _mesa_clip_readpixels(ctx, &readX, &readY, &readW, &readH, &pack);
+ readW = MAX2(0, readW);
+ readH = MAX2(0, readH);
+
+ /* alloc temporary texture */
+ pt = alloc_texture(st, width, height, texFormat);
+ if (!pt)
+ return;
+
+ sv[0] = st_create_texture_sampler_view(st->pipe, pt);
+ if (!sv[0]) {
+ pipe_resource_reference(&pt, NULL);
+ return;
+ }
+
+ /* Make temporary texture which is a copy of the src region.
+ */
+ if (srcFormat == texFormat) {
+ struct pipe_box src_box;
+ u_box_2d(readX, readY, readW, readH, &src_box);
+ /* copy source framebuffer surface into mipmap/texture */
+ pipe->resource_copy_region(pipe,
+ pt, /* dest tex */
+ 0,
+ pack.SkipPixels, pack.SkipRows, 0, /* dest pos */
+ rbRead->texture, /* src tex */
+ 0,
+ &src_box);
+
+ }
+ else {
+ /* CPU-based fallback/conversion */
+ struct pipe_transfer *ptRead =
+ pipe_get_transfer(st->pipe, rbRead->texture, 0, 0,
+ PIPE_TRANSFER_READ,
+ readX, readY, readW, readH);
+ struct pipe_transfer *ptTex;
+ enum pipe_transfer_usage transfer_usage;
+
+ if (ST_DEBUG & DEBUG_FALLBACK)
+ debug_printf("%s: fallback processing\n", __FUNCTION__);
+
+ if (type == GL_DEPTH && util_format_is_depth_and_stencil(pt->format))
+ transfer_usage = PIPE_TRANSFER_READ_WRITE;
+ else
+ transfer_usage = PIPE_TRANSFER_WRITE;
+
+ ptTex = pipe_get_transfer(st->pipe, pt, 0, 0, transfer_usage,
+ 0, 0, width, height);
+
+ /* copy image from ptRead surface to ptTex surface */
+ if (type == GL_COLOR) {
+ /* alternate path using get/put_tile() */
+ GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+ enum pipe_format readFormat, drawFormat;
+ readFormat = util_format_linear(rbRead->texture->format);
+ drawFormat = util_format_linear(pt->format);
+ pipe_get_tile_rgba_format(pipe, ptRead, readX, readY, readW, readH,
+ readFormat, buf);
+ pipe_put_tile_rgba_format(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
+ readW, readH, drawFormat, buf);
+ free(buf);
+ }
+ else {
+ /* GL_DEPTH */
+ GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint));
+ pipe_get_tile_z(pipe, ptRead, readX, readY, readW, readH, buf);
+ pipe_put_tile_z(pipe, ptTex, pack.SkipPixels, pack.SkipRows,
+ readW, readH, buf);
+ free(buf);
+ }
+
+ pipe->transfer_destroy(pipe, ptRead);
+ pipe->transfer_destroy(pipe, ptTex);
+ }
+
+ /* OK, the texture 'pt' contains the src image/pixels. Now draw a
+ * textured quad with that texture.
+ */
+ draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2],
+ width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
+ sv,
+ num_sampler_view,
+ driver_vp,
+ driver_fp,
+ color, invertTex, GL_FALSE, GL_FALSE);
+
+ pipe_resource_reference(&pt, NULL);
+ pipe_sampler_view_reference(&sv[0], NULL);
+}
+
+
+
+void st_init_drawpixels_functions(struct dd_function_table *functions)
+{
+ functions->DrawPixels = st_DrawPixels;
+ functions->CopyPixels = st_CopyPixels;
+}
+
+
+void
+st_destroy_drawpix(struct st_context *st)
+{
+ GLuint i;
+
+ for (i = 0; i < Elements(st->drawpix.shaders); i++) {
+ if (st->drawpix.shaders[i])
+ _mesa_reference_fragprog(st->ctx, &st->drawpix.shaders[i], NULL);
+ }
+
+ st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL);
+ if (st->drawpix.vert_shaders[0])
+ ureg_free_tokens(st->drawpix.vert_shaders[0]);
+ if (st->drawpix.vert_shaders[1])
+ ureg_free_tokens(st->drawpix.vert_shaders[1]);
+}
+
+#endif /* FEATURE_drawpix */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
index 5976f1048..86ceb9d78 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
@@ -1,304 +1,307 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- **************************************************************************/
-
-
-/**
- * Implementation of glDrawTex() for GL_OES_draw_tex
- */
-
-
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_drawtex.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_draw_quad.h"
-#include "util/u_simple_shaders.h"
-
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_OES_draw_texture
-
-
-struct cached_shader
-{
- void *handle;
-
- uint num_attribs;
- uint semantic_names[2 + MAX_TEXTURE_UNITS];
- uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
-};
-
-#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS)
-
-/**
- * Simple linear list cache.
- * Most of the time there'll only be one cached shader.
- */
-static struct cached_shader CachedShaders[MAX_SHADERS];
-static GLuint NumCachedShaders = 0;
-
-
-static void *
-lookup_shader(struct pipe_context *pipe,
- uint num_attribs,
- const uint *semantic_names,
- const uint *semantic_indexes)
-{
- GLuint i, j;
-
- /* look for existing shader with same attributes */
- for (i = 0; i < NumCachedShaders; i++) {
- if (CachedShaders[i].num_attribs == num_attribs) {
- GLboolean match = GL_TRUE;
- for (j = 0; j < num_attribs; j++) {
- if (semantic_names[j] != CachedShaders[i].semantic_names[j] ||
- semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) {
- match = GL_FALSE;
- break;
- }
- }
- if (match)
- return CachedShaders[i].handle;
- }
- }
-
- /* not found - create new one now */
- if (NumCachedShaders >= MAX_SHADERS) {
- return NULL;
- }
-
- CachedShaders[i].num_attribs = num_attribs;
- for (j = 0; j < num_attribs; j++) {
- CachedShaders[i].semantic_names[j] = semantic_names[j];
- CachedShaders[i].semantic_indexes[j] = semantic_indexes[j];
- }
-
- CachedShaders[i].handle =
- util_make_vertex_passthrough_shader(pipe,
- num_attribs,
- semantic_names,
- semantic_indexes);
- NumCachedShaders++;
-
- return CachedShaders[i].handle;
-}
-
-static void
-st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
- GLfloat width, GLfloat height)
-{
- struct st_context *st = ctx->st;
- struct pipe_context *pipe = st->pipe;
- struct cso_context *cso = ctx->st->cso_context;
- struct pipe_resource *vbuffer;
- struct pipe_transfer *vbuffer_transfer;
- GLuint i, numTexCoords, numAttribs;
- GLboolean emitColor;
- uint semantic_names[2 + MAX_TEXTURE_UNITS];
- uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
- struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
- GLbitfield inputs = VERT_BIT_POS;
-
- st_validate_state(st);
-
- /* determine if we need vertex color */
- if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0)
- emitColor = GL_TRUE;
- else
- emitColor = GL_FALSE;
-
- /* determine how many enabled sets of texcoords */
- numTexCoords = 0;
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
- inputs |= VERT_BIT_TEX(i);
- numTexCoords++;
- }
- }
-
- /* total number of attributes per vertex */
- numAttribs = 1 + emitColor + numTexCoords;
-
-
- /* create the vertex buffer */
- vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
- numAttribs * 4 * 4 * sizeof(GLfloat));
-
- /* load vertex buffer */
- {
-#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \
- do { \
- GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \
- assert(k < 4 * 4 * numAttribs); \
- vbuf[k + 0] = X; \
- vbuf[k + 1] = Y; \
- vbuf[k + 2] = Z; \
- vbuf[k + 3] = W; \
- } while (0)
-
- const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height;
- GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer,
- PIPE_TRANSFER_WRITE,
- &vbuffer_transfer);
- GLuint attr;
-
- z = CLAMP(z, 0.0f, 1.0f);
-
- /* positions (in clip coords) */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLfloat fb_width = (GLfloat)fb->Width;
- const GLfloat fb_height = (GLfloat)fb->Height;
-
- const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
- const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
- const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
-
- SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */
- SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */
- SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */
- SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */
-
- semantic_names[0] = TGSI_SEMANTIC_POSITION;
- semantic_indexes[0] = 0;
- }
-
- /* colors */
- if (emitColor) {
- const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
- SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]);
- SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]);
- semantic_names[1] = TGSI_SEMANTIC_COLOR;
- semantic_indexes[1] = 0;
- attr = 2;
- }
- else {
- attr = 1;
- }
-
- /* texcoords */
- for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
- if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
- struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current;
- struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
- const GLfloat wt = (GLfloat) img->Width;
- const GLfloat ht = (GLfloat) img->Height;
- const GLfloat s0 = obj->CropRect[0] / wt;
- const GLfloat t0 = obj->CropRect[1] / ht;
- const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt;
- const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht;
-
- /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/
- SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */
- SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */
- SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */
- SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */
-
- semantic_names[attr] = TGSI_SEMANTIC_GENERIC;
- semantic_indexes[attr] = 0;
-
- attr++;
- }
- }
-
- pipe_buffer_unmap(pipe, vbuffer_transfer);
-
-#undef SET_ATTRIB
- }
-
-
- cso_save_viewport(cso);
- cso_save_vertex_shader(cso);
- cso_save_vertex_elements(cso);
-
- {
- void *vs = lookup_shader(pipe, numAttribs,
- semantic_names, semantic_indexes);
- cso_set_vertex_shader_handle(cso, vs);
- }
-
- for (i = 0; i < numAttribs; i++) {
- velements[i].src_offset = i * 4 * sizeof(float);
- velements[i].instance_divisor = 0;
- velements[i].vertex_buffer_index = 0;
- velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
- }
- cso_set_vertex_elements(cso, numAttribs, velements);
-
- /* viewport state: viewport matching window dims */
- {
- const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
- const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
- const GLfloat width = (GLfloat)fb->Width;
- const GLfloat height = (GLfloat)fb->Height;
- struct pipe_viewport_state vp;
- vp.scale[0] = 0.5f * width;
- vp.scale[1] = height * (invert ? -0.5f : 0.5f);
- vp.scale[2] = 1.0f;
- vp.scale[3] = 1.0f;
- vp.translate[0] = 0.5f * width;
- vp.translate[1] = 0.5f * height;
- vp.translate[2] = 0.0f;
- vp.translate[3] = 0.0f;
- cso_set_viewport(cso, &vp);
- }
-
-
- util_draw_vertex_buffer(pipe, vbuffer,
- 0, /* offset */
- PIPE_PRIM_TRIANGLE_FAN,
- 4, /* verts */
- numAttribs); /* attribs/vert */
-
-
- pipe_resource_reference(&vbuffer, NULL);
-
- /* restore state */
- cso_restore_viewport(cso);
- cso_restore_vertex_shader(cso);
- cso_restore_vertex_elements(cso);
-}
-
-
-void
-st_init_drawtex_functions(struct dd_function_table *functions)
-{
- functions->DrawTex = st_DrawTex;
-}
-
-
-/**
- * Free any cached shaders
- */
-void
-st_destroy_drawtex(struct st_context *st)
-{
- GLuint i;
- for (i = 0; i < NumCachedShaders; i++) {
- cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle);
- }
- NumCachedShaders = 0;
-}
-
-
-#endif /* FEATURE_OES_draw_texture */
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ **************************************************************************/
+
+
+/**
+ * Implementation of glDrawTex() for GL_OES_draw_tex
+ */
+
+
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "program/program.h"
+#include "program/prog_print.h"
+
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_cb_drawtex.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_draw_quad.h"
+#include "util/u_simple_shaders.h"
+
+#include "cso_cache/cso_context.h"
+
+
+#if FEATURE_OES_draw_texture
+
+
+struct cached_shader
+{
+ void *handle;
+
+ uint num_attribs;
+ uint semantic_names[2 + MAX_TEXTURE_UNITS];
+ uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
+};
+
+#define MAX_SHADERS (2 * MAX_TEXTURE_UNITS)
+
+/**
+ * Simple linear list cache.
+ * Most of the time there'll only be one cached shader.
+ */
+static struct cached_shader CachedShaders[MAX_SHADERS];
+static GLuint NumCachedShaders = 0;
+
+
+static void *
+lookup_shader(struct pipe_context *pipe,
+ uint num_attribs,
+ const uint *semantic_names,
+ const uint *semantic_indexes)
+{
+ GLuint i, j;
+
+ /* look for existing shader with same attributes */
+ for (i = 0; i < NumCachedShaders; i++) {
+ if (CachedShaders[i].num_attribs == num_attribs) {
+ GLboolean match = GL_TRUE;
+ for (j = 0; j < num_attribs; j++) {
+ if (semantic_names[j] != CachedShaders[i].semantic_names[j] ||
+ semantic_indexes[j] != CachedShaders[i].semantic_indexes[j]) {
+ match = GL_FALSE;
+ break;
+ }
+ }
+ if (match)
+ return CachedShaders[i].handle;
+ }
+ }
+
+ /* not found - create new one now */
+ if (NumCachedShaders >= MAX_SHADERS) {
+ return NULL;
+ }
+
+ CachedShaders[i].num_attribs = num_attribs;
+ for (j = 0; j < num_attribs; j++) {
+ CachedShaders[i].semantic_names[j] = semantic_names[j];
+ CachedShaders[i].semantic_indexes[j] = semantic_indexes[j];
+ }
+
+ CachedShaders[i].handle =
+ util_make_vertex_passthrough_shader(pipe,
+ num_attribs,
+ semantic_names,
+ semantic_indexes);
+ NumCachedShaders++;
+
+ return CachedShaders[i].handle;
+}
+
+static void
+st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
+ GLfloat width, GLfloat height)
+{
+ struct st_context *st = ctx->st;
+ struct pipe_context *pipe = st->pipe;
+ struct cso_context *cso = ctx->st->cso_context;
+ struct pipe_resource *vbuffer;
+ struct pipe_transfer *vbuffer_transfer;
+ GLuint i, numTexCoords, numAttribs;
+ GLboolean emitColor;
+ uint semantic_names[2 + MAX_TEXTURE_UNITS];
+ uint semantic_indexes[2 + MAX_TEXTURE_UNITS];
+ struct pipe_vertex_element velements[2 + MAX_TEXTURE_UNITS];
+ GLbitfield inputs = VERT_BIT_POS;
+
+ st_validate_state(st);
+
+ /* determine if we need vertex color */
+ if (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_COL0)
+ emitColor = GL_TRUE;
+ else
+ emitColor = GL_FALSE;
+
+ /* determine how many enabled sets of texcoords */
+ numTexCoords = 0;
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
+ inputs |= VERT_BIT_TEX(i);
+ numTexCoords++;
+ }
+ }
+
+ /* total number of attributes per vertex */
+ numAttribs = 1 + emitColor + numTexCoords;
+
+
+ /* create the vertex buffer */
+ vbuffer = pipe_buffer_create(pipe->screen, PIPE_BIND_VERTEX_BUFFER,
+ PIPE_USAGE_STREAM,
+ numAttribs * 4 * 4 * sizeof(GLfloat));
+
+ /* load vertex buffer */
+ {
+#define SET_ATTRIB(VERT, ATTR, X, Y, Z, W) \
+ do { \
+ GLuint k = (((VERT) * numAttribs + (ATTR)) * 4); \
+ assert(k < 4 * 4 * numAttribs); \
+ vbuf[k + 0] = X; \
+ vbuf[k + 1] = Y; \
+ vbuf[k + 2] = Z; \
+ vbuf[k + 3] = W; \
+ } while (0)
+
+ const GLfloat x0 = x, y0 = y, x1 = x + width, y1 = y + height;
+ GLfloat *vbuf = (GLfloat *) pipe_buffer_map(pipe, vbuffer,
+ PIPE_TRANSFER_WRITE,
+ &vbuffer_transfer);
+ GLuint attr;
+
+ z = CLAMP(z, 0.0f, 1.0f);
+
+ /* positions (in clip coords) */
+ {
+ const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const GLfloat fb_width = (GLfloat)fb->Width;
+ const GLfloat fb_height = (GLfloat)fb->Height;
+
+ const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
+ const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
+ const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
+ const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
+
+ SET_ATTRIB(0, 0, clip_x0, clip_y0, z, 1.0f); /* lower left */
+ SET_ATTRIB(1, 0, clip_x1, clip_y0, z, 1.0f); /* lower right */
+ SET_ATTRIB(2, 0, clip_x1, clip_y1, z, 1.0f); /* upper right */
+ SET_ATTRIB(3, 0, clip_x0, clip_y1, z, 1.0f); /* upper left */
+
+ semantic_names[0] = TGSI_SEMANTIC_POSITION;
+ semantic_indexes[0] = 0;
+ }
+
+ /* colors */
+ if (emitColor) {
+ const GLfloat *c = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+ SET_ATTRIB(0, 1, c[0], c[1], c[2], c[3]);
+ SET_ATTRIB(1, 1, c[0], c[1], c[2], c[3]);
+ SET_ATTRIB(2, 1, c[0], c[1], c[2], c[3]);
+ SET_ATTRIB(3, 1, c[0], c[1], c[2], c[3]);
+ semantic_names[1] = TGSI_SEMANTIC_COLOR;
+ semantic_indexes[1] = 0;
+ attr = 2;
+ }
+ else {
+ attr = 1;
+ }
+
+ /* texcoords */
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_2D_BIT) {
+ struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current;
+ struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
+ const GLfloat wt = (GLfloat) img->Width;
+ const GLfloat ht = (GLfloat) img->Height;
+ const GLfloat s0 = obj->CropRect[0] / wt;
+ const GLfloat t0 = obj->CropRect[1] / ht;
+ const GLfloat s1 = (obj->CropRect[0] + obj->CropRect[2]) / wt;
+ const GLfloat t1 = (obj->CropRect[1] + obj->CropRect[3]) / ht;
+
+ /*printf("crop texcoords: %g, %g .. %g, %g\n", s0, t0, s1, t1);*/
+ SET_ATTRIB(0, attr, s0, t0, 0.0f, 1.0f); /* lower left */
+ SET_ATTRIB(1, attr, s1, t0, 0.0f, 1.0f); /* lower right */
+ SET_ATTRIB(2, attr, s1, t1, 0.0f, 1.0f); /* upper right */
+ SET_ATTRIB(3, attr, s0, t1, 0.0f, 1.0f); /* upper left */
+
+ semantic_names[attr] = TGSI_SEMANTIC_GENERIC;
+ semantic_indexes[attr] = 0;
+
+ attr++;
+ }
+ }
+
+ pipe_buffer_unmap(pipe, vbuffer_transfer);
+
+#undef SET_ATTRIB
+ }
+
+
+ cso_save_viewport(cso);
+ cso_save_vertex_shader(cso);
+ cso_save_vertex_elements(cso);
+ cso_save_vertex_buffers(cso);
+
+ {
+ void *vs = lookup_shader(pipe, numAttribs,
+ semantic_names, semantic_indexes);
+ cso_set_vertex_shader_handle(cso, vs);
+ }
+
+ for (i = 0; i < numAttribs; i++) {
+ velements[i].src_offset = i * 4 * sizeof(float);
+ velements[i].instance_divisor = 0;
+ velements[i].vertex_buffer_index = 0;
+ velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ }
+ cso_set_vertex_elements(cso, numAttribs, velements);
+
+ /* viewport state: viewport matching window dims */
+ {
+ const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+ const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
+ const GLfloat width = (GLfloat)fb->Width;
+ const GLfloat height = (GLfloat)fb->Height;
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * width;
+ vp.scale[1] = height * (invert ? -0.5f : 0.5f);
+ vp.scale[2] = 1.0f;
+ vp.scale[3] = 1.0f;
+ vp.translate[0] = 0.5f * width;
+ vp.translate[1] = 0.5f * height;
+ vp.translate[2] = 0.0f;
+ vp.translate[3] = 0.0f;
+ cso_set_viewport(cso, &vp);
+ }
+
+
+ util_draw_vertex_buffer(pipe, cso, vbuffer,
+ 0, /* offset */
+ PIPE_PRIM_TRIANGLE_FAN,
+ 4, /* verts */
+ numAttribs); /* attribs/vert */
+
+
+ pipe_resource_reference(&vbuffer, NULL);
+
+ /* restore state */
+ cso_restore_viewport(cso);
+ cso_restore_vertex_shader(cso);
+ cso_restore_vertex_elements(cso);
+ cso_restore_vertex_buffers(cso);
+}
+
+
+void
+st_init_drawtex_functions(struct dd_function_table *functions)
+{
+ functions->DrawTex = st_DrawTex;
+}
+
+
+/**
+ * Free any cached shaders
+ */
+void
+st_destroy_drawtex(struct st_context *st)
+{
+ GLuint i;
+ for (i = 0; i < NumCachedShaders; i++) {
+ cso_delete_vertex_shader(st->cso_context, CachedShaders[i].handle);
+ }
+ NumCachedShaders = 0;
+}
+
+
+#endif /* FEATURE_OES_draw_texture */
diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c
index dccbff3c1..7a19f35bb 100644
--- a/mesalib/src/mesa/state_tracker/st_context.c
+++ b/mesalib/src/mesa/state_tracker/st_context.c
@@ -203,6 +203,11 @@ static void st_destroy_context_priv( struct st_context *st )
st_destroy_drawpix(st);
st_destroy_drawtex(st);
+ /* Unreference any user vertex buffers. */
+ for (i = 0; i < st->num_user_vbs; i++) {
+ pipe_resource_reference(&st->user_vb[i], NULL);
+ }
+
for (i = 0; i < Elements(st->state.sampler_views); i++) {
pipe_sampler_view_reference(&st->state.sampler_views[i], NULL);
}
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h
index 492ee600e..77765f023 100644
--- a/mesalib/src/mesa/state_tracker/st_context.h
+++ b/mesalib/src/mesa/state_tracker/st_context.h
@@ -1,265 +1,270 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef ST_CONTEXT_H
-#define ST_CONTEXT_H
-
-#include "main/mtypes.h"
-#include "pipe/p_state.h"
-#include "state_tracker/st_api.h"
-
-struct bitmap_cache;
-struct blit_state;
-struct dd_function_table;
-struct draw_context;
-struct draw_stage;
-struct gen_mipmap_state;
-struct st_context;
-struct st_fragment_program;
-
-
-#define ST_NEW_MESA 0x1 /* Mesa state has changed */
-#define ST_NEW_FRAGMENT_PROGRAM 0x2
-#define ST_NEW_VERTEX_PROGRAM 0x4
-#define ST_NEW_FRAMEBUFFER 0x8
-#define ST_NEW_EDGEFLAGS_DATA 0x10
-#define ST_NEW_GEOMETRY_PROGRAM 0x20
-
-
-struct st_state_flags {
- GLuint mesa;
- GLuint st;
-};
-
-struct st_tracked_state {
- const char *name;
- struct st_state_flags dirty;
- void (*update)( struct st_context *st );
-};
-
-
-
-struct st_context
-{
- struct st_context_iface iface;
-
- struct gl_context *ctx;
-
- struct pipe_context *pipe;
-
- struct draw_context *draw; /**< For selection/feedback/rastpos only */
- struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */
- struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */
- struct draw_stage *rastpos_stage; /**< For glRasterPos */
-
-
- /* On old libGL's for linux we need to invalidate the drawables
- * on glViewpport calls, this is set via a option.
- */
- boolean invalidate_on_gl_viewport;
-
- /* Some state is contained in constant objects.
- * Other state is just parameter values.
- */
- struct {
- struct pipe_blend_state blend;
- struct pipe_depth_stencil_alpha_state depth_stencil;
- struct pipe_rasterizer_state rasterizer;
- struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS];
- struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS];
- struct pipe_clip_state clip;
- struct {
- void *ptr;
- unsigned size;
- } constants[PIPE_SHADER_TYPES];
- struct pipe_framebuffer_state framebuffer;
- struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
- struct pipe_scissor_state scissor;
- struct pipe_viewport_state viewport;
- unsigned sample_mask;
-
- GLuint num_samplers;
- GLuint num_textures;
-
- GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */
- } state;
-
- char vendor[100];
- char renderer[100];
-
- struct st_state_flags dirty;
-
- GLboolean missing_textures;
- GLboolean vertdata_edgeflags;
-
- /** Mapping from VERT_RESULT_x to post-transformed vertex slot */
- const GLuint *vertex_result_to_slot;
-
- struct st_vertex_program *vp; /**< Currently bound vertex program */
- struct st_fragment_program *fp; /**< Currently bound fragment program */
- struct st_geometry_program *gp; /**< Currently bound geometry program */
-
- struct st_vp_variant *vp_variant;
- struct st_fp_variant *fp_variant;
- struct st_gp_variant *gp_variant;
-
- struct gl_texture_object *default_texture;
-
- struct {
- struct gl_program_cache *cache;
- struct st_fragment_program *program; /**< cur pixel transfer prog */
- GLuint xfer_prog_sn; /**< pixel xfer program serial no. */
- GLuint user_prog_sn; /**< user fragment program serial no. */
- struct st_fragment_program *combined_prog;
- GLuint combined_prog_sn;
- struct pipe_resource *pixelmap_texture;
- struct pipe_sampler_view *pixelmap_sampler_view;
- boolean pixelmap_enabled; /**< use the pixelmap texture? */
- } pixel_xfer;
-
- /** for glBitmap */
- struct {
- struct pipe_rasterizer_state rasterizer;
- struct pipe_sampler_state samplers[2];
- enum pipe_format tex_format;
- void *vs;
- float vertices[4][3][4]; /**< vertex pos + color + texcoord */
- struct pipe_resource *vbuf;
- unsigned vbuf_slot; /* next free slot in vbuf */
- struct bitmap_cache *cache;
- } bitmap;
-
- /** for glDraw/CopyPixels */
- struct {
- struct gl_fragment_program *shaders[4];
- void *vert_shaders[2]; /**< ureg shaders */
- } drawpix;
-
- /** for glClear */
- struct {
- struct pipe_rasterizer_state raster;
- struct pipe_viewport_state viewport;
- struct pipe_clip_state clip;
- void *vs;
- void *fs;
- float vertices[4][2][4]; /**< vertex pos + color */
- struct pipe_resource *vbuf;
- unsigned vbuf_slot;
- boolean enable_ds_separate;
- } clear;
-
- /** used for anything using util_draw_vertex_buffer */
- struct pipe_vertex_element velems_util_draw[3];
-
- void *passthrough_fs; /**< simple pass-through frag shader */
-
- enum pipe_texture_target internal_target;
- struct gen_mipmap_state *gen_mipmap;
- struct blit_state *blit;
-
- struct cso_context *cso_context;
-
- int force_msaa;
- void *winsys_drawable_handle;
-};
-
-
-/* Need this so that we can implement Mesa callbacks in this module.
- */
-static INLINE struct st_context *st_context(struct gl_context *ctx)
-{
- return ctx->st;
-}
-
-
-/**
- * Wrapper for struct gl_framebuffer.
- * This is an opaque type to the outside world.
- */
-struct st_framebuffer
-{
- struct gl_framebuffer Base;
- void *Private;
-
- struct st_framebuffer_iface *iface;
- enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
- unsigned num_statts;
- int32_t revalidate;
-};
-
-
-extern void st_init_driver_functions(struct dd_function_table *functions);
-
-void st_invalidate_state(struct gl_context * ctx, GLuint new_state);
-
-
-
-#define Y_0_TOP 1
-#define Y_0_BOTTOM 2
-
-static INLINE GLuint
-st_fb_orientation(const struct gl_framebuffer *fb)
-{
- if (fb && fb->Name == 0) {
- /* Drawing into a window (on-screen buffer).
- *
- * Negate Y scale to flip image vertically.
- * The NDC Y coords prior to viewport transformation are in the range
- * [y=-1=bottom, y=1=top]
- * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where
- * H is the window height.
- * Use the viewport transformation to invert Y.
- */
- return Y_0_TOP;
- }
- else {
- /* Drawing into user-created FBO (very likely a texture).
- *
- * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering.
- */
- return Y_0_BOTTOM;
- }
-}
-
-
-/** clear-alloc a struct-sized object, with casting */
-#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
-
-
-extern int
-st_get_msaa(void);
-
-extern struct st_context *
-st_create_context(gl_api api, struct pipe_context *pipe,
- const struct gl_config *visual,
- struct st_context *share);
-
-extern void
-st_destroy_context(struct st_context *st);
-
-
-#endif
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef ST_CONTEXT_H
+#define ST_CONTEXT_H
+
+#include "main/mtypes.h"
+#include "pipe/p_state.h"
+#include "state_tracker/st_api.h"
+
+struct bitmap_cache;
+struct blit_state;
+struct dd_function_table;
+struct draw_context;
+struct draw_stage;
+struct gen_mipmap_state;
+struct st_context;
+struct st_fragment_program;
+
+
+#define ST_NEW_MESA 0x1 /* Mesa state has changed */
+#define ST_NEW_FRAGMENT_PROGRAM 0x2
+#define ST_NEW_VERTEX_PROGRAM 0x4
+#define ST_NEW_FRAMEBUFFER 0x8
+#define ST_NEW_EDGEFLAGS_DATA 0x10
+#define ST_NEW_GEOMETRY_PROGRAM 0x20
+
+
+struct st_state_flags {
+ GLuint mesa;
+ GLuint st;
+};
+
+struct st_tracked_state {
+ const char *name;
+ struct st_state_flags dirty;
+ void (*update)( struct st_context *st );
+};
+
+
+
+struct st_context
+{
+ struct st_context_iface iface;
+
+ struct gl_context *ctx;
+
+ struct pipe_context *pipe;
+
+ struct draw_context *draw; /**< For selection/feedback/rastpos only */
+ struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */
+ struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */
+ struct draw_stage *rastpos_stage; /**< For glRasterPos */
+
+
+ /* On old libGL's for linux we need to invalidate the drawables
+ * on glViewpport calls, this is set via a option.
+ */
+ boolean invalidate_on_gl_viewport;
+
+ /* Some state is contained in constant objects.
+ * Other state is just parameter values.
+ */
+ struct {
+ struct pipe_blend_state blend;
+ struct pipe_depth_stencil_alpha_state depth_stencil;
+ struct pipe_rasterizer_state rasterizer;
+ struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS];
+ struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS];
+ struct pipe_clip_state clip;
+ struct {
+ void *ptr;
+ unsigned size;
+ } constants[PIPE_SHADER_TYPES];
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ unsigned sample_mask;
+
+ GLuint num_samplers;
+ GLuint num_textures;
+
+ GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */
+ } state;
+
+ char vendor[100];
+ char renderer[100];
+
+ struct st_state_flags dirty;
+
+ GLboolean missing_textures;
+ GLboolean vertdata_edgeflags;
+
+ /** Mapping from VERT_RESULT_x to post-transformed vertex slot */
+ const GLuint *vertex_result_to_slot;
+
+ struct st_vertex_program *vp; /**< Currently bound vertex program */
+ struct st_fragment_program *fp; /**< Currently bound fragment program */
+ struct st_geometry_program *gp; /**< Currently bound geometry program */
+
+ struct st_vp_variant *vp_variant;
+ struct st_fp_variant *fp_variant;
+ struct st_gp_variant *gp_variant;
+
+ struct gl_texture_object *default_texture;
+
+ struct {
+ struct gl_program_cache *cache;
+ struct st_fragment_program *program; /**< cur pixel transfer prog */
+ GLuint xfer_prog_sn; /**< pixel xfer program serial no. */
+ GLuint user_prog_sn; /**< user fragment program serial no. */
+ struct st_fragment_program *combined_prog;
+ GLuint combined_prog_sn;
+ struct pipe_resource *pixelmap_texture;
+ struct pipe_sampler_view *pixelmap_sampler_view;
+ boolean pixelmap_enabled; /**< use the pixelmap texture? */
+ } pixel_xfer;
+
+ /** for glBitmap */
+ struct {
+ struct pipe_rasterizer_state rasterizer;
+ struct pipe_sampler_state samplers[2];
+ enum pipe_format tex_format;
+ void *vs;
+ float vertices[4][3][4]; /**< vertex pos + color + texcoord */
+ struct pipe_resource *vbuf;
+ unsigned vbuf_slot; /* next free slot in vbuf */
+ struct bitmap_cache *cache;
+ } bitmap;
+
+ /** for glDraw/CopyPixels */
+ struct {
+ struct gl_fragment_program *shaders[4];
+ void *vert_shaders[2]; /**< ureg shaders */
+ } drawpix;
+
+ /** for glClear */
+ struct {
+ struct pipe_rasterizer_state raster;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+ void *vs;
+ void *fs;
+ float vertices[4][2][4]; /**< vertex pos + color */
+ struct pipe_resource *vbuf;
+ unsigned vbuf_slot;
+ boolean enable_ds_separate;
+ } clear;
+
+ /** used for anything using util_draw_vertex_buffer */
+ struct pipe_vertex_element velems_util_draw[3];
+
+ void *passthrough_fs; /**< simple pass-through frag shader */
+
+ enum pipe_texture_target internal_target;
+ struct gen_mipmap_state *gen_mipmap;
+ struct blit_state *blit;
+
+ struct cso_context *cso_context;
+
+ int force_msaa;
+ void *winsys_drawable_handle;
+
+ /* User vertex buffers. */
+ struct pipe_resource *user_vb[PIPE_MAX_ATTRIBS];
+ unsigned user_vb_stride[PIPE_MAX_ATTRIBS];
+ unsigned num_user_vbs;
+};
+
+
+/* Need this so that we can implement Mesa callbacks in this module.
+ */
+static INLINE struct st_context *st_context(struct gl_context *ctx)
+{
+ return ctx->st;
+}
+
+
+/**
+ * Wrapper for struct gl_framebuffer.
+ * This is an opaque type to the outside world.
+ */
+struct st_framebuffer
+{
+ struct gl_framebuffer Base;
+ void *Private;
+
+ struct st_framebuffer_iface *iface;
+ enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
+ unsigned num_statts;
+ int32_t revalidate;
+};
+
+
+extern void st_init_driver_functions(struct dd_function_table *functions);
+
+void st_invalidate_state(struct gl_context * ctx, GLuint new_state);
+
+
+
+#define Y_0_TOP 1
+#define Y_0_BOTTOM 2
+
+static INLINE GLuint
+st_fb_orientation(const struct gl_framebuffer *fb)
+{
+ if (fb && fb->Name == 0) {
+ /* Drawing into a window (on-screen buffer).
+ *
+ * Negate Y scale to flip image vertically.
+ * The NDC Y coords prior to viewport transformation are in the range
+ * [y=-1=bottom, y=1=top]
+ * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where
+ * H is the window height.
+ * Use the viewport transformation to invert Y.
+ */
+ return Y_0_TOP;
+ }
+ else {
+ /* Drawing into user-created FBO (very likely a texture).
+ *
+ * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering.
+ */
+ return Y_0_BOTTOM;
+ }
+}
+
+
+/** clear-alloc a struct-sized object, with casting */
+#define ST_CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T))
+
+
+extern int
+st_get_msaa(void);
+
+extern struct st_context *
+st_create_context(gl_api api, struct pipe_context *pipe,
+ const struct gl_config *visual,
+ struct st_context *share);
+
+extern void
+st_destroy_context(struct st_context *st);
+
+
+#endif
diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c
index 19466ea44..830e3e3c1 100644
--- a/mesalib/src/mesa/state_tracker/st_draw.c
+++ b/mesalib/src/mesa/state_tracker/st_draw.c
@@ -1,788 +1,749 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * This file implements the st_draw_vbo() function which is called from
- * Mesa's VBO module. All point/line/triangle rendering is done through
- * this function whether the user called glBegin/End, glDrawArrays,
- * glDrawElements, glEvalMesh, or glCalList, etc.
- *
- * We basically convert the VBO's vertex attribute/array information into
- * Gallium vertex state, bind the vertex buffer objects and call
- * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/prog_uniform.h"
-
-#include "vbo/vbo.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_bufferobjects.h"
-#include "st_draw.h"
-#include "st_program.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_prim.h"
-#include "util/u_draw_quad.h"
-#include "draw/draw_context.h"
-#include "cso_cache/cso_context.h"
-
-
-static GLuint double_types[4] = {
- PIPE_FORMAT_R64_FLOAT,
- PIPE_FORMAT_R64G64_FLOAT,
- PIPE_FORMAT_R64G64B64_FLOAT,
- PIPE_FORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[4] = {
- PIPE_FORMAT_R32_FLOAT,
- PIPE_FORMAT_R32G32_FLOAT,
- PIPE_FORMAT_R32G32B32_FLOAT,
- PIPE_FORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint half_float_types[4] = {
- PIPE_FORMAT_R16_FLOAT,
- PIPE_FORMAT_R16G16_FLOAT,
- PIPE_FORMAT_R16G16B16_FLOAT,
- PIPE_FORMAT_R16G16B16A16_FLOAT
-};
-
-static GLuint uint_types_norm[4] = {
- PIPE_FORMAT_R32_UNORM,
- PIPE_FORMAT_R32G32_UNORM,
- PIPE_FORMAT_R32G32B32_UNORM,
- PIPE_FORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[4] = {
- PIPE_FORMAT_R32_USCALED,
- PIPE_FORMAT_R32G32_USCALED,
- PIPE_FORMAT_R32G32B32_USCALED,
- PIPE_FORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint int_types_norm[4] = {
- PIPE_FORMAT_R32_SNORM,
- PIPE_FORMAT_R32G32_SNORM,
- PIPE_FORMAT_R32G32B32_SNORM,
- PIPE_FORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[4] = {
- PIPE_FORMAT_R32_SSCALED,
- PIPE_FORMAT_R32G32_SSCALED,
- PIPE_FORMAT_R32G32B32_SSCALED,
- PIPE_FORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint ushort_types_norm[4] = {
- PIPE_FORMAT_R16_UNORM,
- PIPE_FORMAT_R16G16_UNORM,
- PIPE_FORMAT_R16G16B16_UNORM,
- PIPE_FORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[4] = {
- PIPE_FORMAT_R16_USCALED,
- PIPE_FORMAT_R16G16_USCALED,
- PIPE_FORMAT_R16G16B16_USCALED,
- PIPE_FORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint short_types_norm[4] = {
- PIPE_FORMAT_R16_SNORM,
- PIPE_FORMAT_R16G16_SNORM,
- PIPE_FORMAT_R16G16B16_SNORM,
- PIPE_FORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[4] = {
- PIPE_FORMAT_R16_SSCALED,
- PIPE_FORMAT_R16G16_SSCALED,
- PIPE_FORMAT_R16G16B16_SSCALED,
- PIPE_FORMAT_R16G16B16A16_SSCALED
-};
-
-static GLuint ubyte_types_norm[4] = {
- PIPE_FORMAT_R8_UNORM,
- PIPE_FORMAT_R8G8_UNORM,
- PIPE_FORMAT_R8G8B8_UNORM,
- PIPE_FORMAT_R8G8B8A8_UNORM
-};
-
-static GLuint ubyte_types_scale[4] = {
- PIPE_FORMAT_R8_USCALED,
- PIPE_FORMAT_R8G8_USCALED,
- PIPE_FORMAT_R8G8B8_USCALED,
- PIPE_FORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint byte_types_norm[4] = {
- PIPE_FORMAT_R8_SNORM,
- PIPE_FORMAT_R8G8_SNORM,
- PIPE_FORMAT_R8G8B8_SNORM,
- PIPE_FORMAT_R8G8B8A8_SNORM
-};
-
-static GLuint byte_types_scale[4] = {
- PIPE_FORMAT_R8_SSCALED,
- PIPE_FORMAT_R8G8_SSCALED,
- PIPE_FORMAT_R8G8B8_SSCALED,
- PIPE_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint fixed_types[4] = {
- PIPE_FORMAT_R32_FIXED,
- PIPE_FORMAT_R32G32_FIXED,
- PIPE_FORMAT_R32G32B32_FIXED,
- PIPE_FORMAT_R32G32B32A32_FIXED
-};
-
-
-
-/**
- * Return a PIPE_FORMAT_x for the given GL datatype and size.
- */
-GLuint
-st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
- GLboolean normalized)
-{
- assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
- type == GL_FIXED || type == GL_HALF_FLOAT);
- assert(size >= 1);
- assert(size <= 4);
- assert(format == GL_RGBA || format == GL_BGRA);
-
- if (format == GL_BGRA) {
- /* this is an odd-ball case */
- assert(type == GL_UNSIGNED_BYTE);
- assert(normalized);
- return PIPE_FORMAT_B8G8R8A8_UNORM;
- }
-
- if (normalized) {
- switch (type) {
- case GL_DOUBLE: return double_types[size-1];
- case GL_FLOAT: return float_types[size-1];
- case GL_HALF_FLOAT: return half_float_types[size-1];
- case GL_INT: return int_types_norm[size-1];
- case GL_SHORT: return short_types_norm[size-1];
- case GL_BYTE: return byte_types_norm[size-1];
- case GL_UNSIGNED_INT: return uint_types_norm[size-1];
- case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
- case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
- case GL_FIXED: return fixed_types[size-1];
- default: assert(0); return 0;
- }
- }
- else {
- switch (type) {
- case GL_DOUBLE: return double_types[size-1];
- case GL_FLOAT: return float_types[size-1];
- case GL_HALF_FLOAT: return half_float_types[size-1];
- case GL_INT: return int_types_scale[size-1];
- case GL_SHORT: return short_types_scale[size-1];
- case GL_BYTE: return byte_types_scale[size-1];
- case GL_UNSIGNED_INT: return uint_types_scale[size-1];
- case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
- case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
- case GL_FIXED: return fixed_types[size-1];
- default: assert(0); return 0;
- }
- }
- return 0; /* silence compiler warning */
-}
-
-
-
-
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- * \param userSpace returns whether the arrays are in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLboolean *userSpace)
-{
- GLuint attr;
- const struct gl_buffer_object *firstBufObj = NULL;
- GLint firstStride = -1;
- GLuint num_client_arrays = 0;
- const GLubyte *client_addr = NULL;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
- const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
-
- if (firstStride < 0) {
- firstStride = stride;
- }
- else if (firstStride != stride) {
- return GL_FALSE;
- }
-
- if (!bufObj || !bufObj->Name) {
- num_client_arrays++;
- /* Try to detect if the client-space arrays are
- * "close" to each other.
- */
- if (!client_addr) {
- client_addr = arrays[mesaAttr]->Ptr;
- }
- else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
- /* arrays start too far apart */
- return GL_FALSE;
- }
- }
- else if (!firstBufObj) {
- firstBufObj = bufObj;
- }
- else if (bufObj != firstBufObj) {
- return GL_FALSE;
- }
- }
-
- *userSpace = (num_client_arrays == vpv->num_inputs);
- /* debug_printf("user space: %s (%d arrays, %d inputs)\n",
- (int)*userSpace ? "Yes" : "No", num_client_arrays, vp->num_inputs); */
-
- return GL_TRUE;
-}
-
-
-/**
- * Compute the memory range occupied by the arrays.
- */
-static void
-get_arrays_bounds(const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- const GLubyte **low, const GLubyte **high)
-{
- const GLubyte *low_addr = NULL;
- const GLubyte *high_addr = NULL;
- GLuint attr;
-
- /* debug_printf("get_arrays_bounds: Handling %u attrs\n", vpv->num_inputs); */
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- const GLint stride = arrays[mesaAttr]->StrideB;
- const GLubyte *start = arrays[mesaAttr]->Ptr;
- const unsigned sz = (arrays[mesaAttr]->Size *
- _mesa_sizeof_type(arrays[mesaAttr]->Type));
- const GLubyte *end = start + (max_index * stride) + sz;
-
- /* debug_printf("attr %u: stride %d size %u start %p end %p\n",
- attr, stride, sz, start, end); */
-
- if (attr == 0) {
- low_addr = start;
- high_addr = end;
- }
- else {
- low_addr = MIN2(low_addr, start);
- high_addr = MAX2(high_addr, end);
- }
- }
-
- *low = low_addr;
- *high = high_addr;
-}
-
-
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct gl_context *ctx,
- const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean userSpace,
- struct pipe_vertex_buffer *vbuffer,
- struct pipe_vertex_element velements[])
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLuint attr;
- const GLubyte *offset0 = NULL;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- GLsizei stride = arrays[mesaAttr]->StrideB;
-
- /*printf("stobj %u = %p\n", attr, (void*)stobj);*/
-
- if (attr == 0) {
- const GLubyte *low, *high;
-
- get_arrays_bounds(vp, vpv, arrays, max_index, &low, &high);
- /* debug_printf("buffer range: %p %p range %d max index %u\n",
- low, high, high - low, max_index); */
-
- offset0 = low;
- if (userSpace) {
- vbuffer->buffer =
- pipe_user_buffer_create(pipe->screen, (void *) low, high - low,
- PIPE_BIND_VERTEX_BUFFER);
- vbuffer->buffer_offset = 0;
- }
- else {
- vbuffer->buffer = NULL;
- pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
- vbuffer->buffer_offset = pointer_to_offset(low);
- }
- vbuffer->stride = stride; /* in bytes */
- vbuffer->max_index = max_index;
- }
-
- /*
- if (arrays[mesaAttr]->InstanceDivisor)
- vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
- else
- vbuffer[attr].max_index = max_index;
- */
-
- velements[attr].src_offset =
- (unsigned) (arrays[mesaAttr]->Ptr - offset0);
- velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
- velements[attr].vertex_buffer_index = 0;
- velements[attr].src_format =
- st_pipe_vertex_format(arrays[mesaAttr]->Type,
- arrays[mesaAttr]->Size,
- arrays[mesaAttr]->Format,
- arrays[mesaAttr]->Normalized);
- assert(velements[attr].src_format);
- }
-}
-
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer returns vertex buffer info
- * \param velements returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct gl_context *ctx,
- const struct st_vertex_program *vp,
- const struct st_vp_variant *vpv,
- const struct gl_client_array **arrays,
- GLuint max_index,
- GLboolean *userSpace,
- struct pipe_vertex_buffer vbuffer[],
- struct pipe_vertex_element velements[])
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- GLuint attr;
-
- for (attr = 0; attr < vpv->num_inputs; attr++) {
- const GLuint mesaAttr = vp->index_to_input[attr];
- struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
- GLsizei stride = arrays[mesaAttr]->StrideB;
-
- *userSpace = GL_FALSE;
-
- if (bufobj && bufobj->Name) {
- /* Attribute data is in a VBO.
- * Recall that for VBOs, the gl_client_array->Ptr field is
- * really an offset from the start of the VBO, not a pointer.
- */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- assert(stobj->buffer);
- /*printf("stobj %u = %p\n", attr, (void*) stobj);*/
-
- vbuffer[attr].buffer = NULL;
- pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
- vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
- }
- else {
- /* attribute data is in user-space memory, not a VBO */
- uint bytes;
- /*printf("user-space array %d stride %d\n", attr, stride);*/
-
- *userSpace = GL_TRUE;
-
- /* wrap user data */
- if (arrays[mesaAttr]->Ptr) {
- /* user's vertex array */
- if (arrays[mesaAttr]->StrideB) {
- bytes = arrays[mesaAttr]->StrideB * (max_index + 1);
- }
- else {
- bytes = arrays[mesaAttr]->Size
- * _mesa_sizeof_type(arrays[mesaAttr]->Type);
- }
- vbuffer[attr].buffer =
- pipe_user_buffer_create(pipe->screen,
- (void *) arrays[mesaAttr]->Ptr, bytes,
- PIPE_BIND_VERTEX_BUFFER);
- }
- else {
- /* no array, use ctx->Current.Attrib[] value */
- bytes = sizeof(ctx->Current.Attrib[0]);
- vbuffer[attr].buffer =
- pipe_user_buffer_create(pipe->screen,
- (void *) ctx->Current.Attrib[mesaAttr],
- bytes,
- PIPE_BIND_VERTEX_BUFFER);
- stride = 0;
- }
-
- vbuffer[attr].buffer_offset = 0;
- }
-
- assert(velements[attr].src_offset <= 2048); /* 11-bit field */
-
- /* common-case setup */
- vbuffer[attr].stride = stride; /* in bytes */
- if (arrays[mesaAttr]->InstanceDivisor)
- vbuffer[attr].max_index = arrays[mesaAttr]->_MaxElement;
- else
- vbuffer[attr].max_index = max_index;
-
- velements[attr].src_offset = 0;
- velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
- velements[attr].vertex_buffer_index = attr;
- velements[attr].src_format
- = st_pipe_vertex_format(arrays[mesaAttr]->Type,
- arrays[mesaAttr]->Size,
- arrays[mesaAttr]->Format,
- arrays[mesaAttr]->Normalized);
- assert(velements[attr].src_format);
- }
-}
-
-
-static void
-setup_index_buffer(struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- struct pipe_index_buffer *ibuffer)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
-
- memset(ibuffer, 0, sizeof(*ibuffer));
- if (ib) {
- struct gl_buffer_object *bufobj = ib->obj;
-
- switch (ib->type) {
- case GL_UNSIGNED_INT:
- ibuffer->index_size = 4;
- break;
- case GL_UNSIGNED_SHORT:
- ibuffer->index_size = 2;
- break;
- case GL_UNSIGNED_BYTE:
- ibuffer->index_size = 1;
- break;
- default:
- assert(0);
- return;
- }
-
- /* get/create the index buffer object */
- if (bufobj && bufobj->Name) {
- /* elements/indexes are in a real VBO */
- struct st_buffer_object *stobj = st_buffer_object(bufobj);
- pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
- ibuffer->offset = pointer_to_offset(ib->ptr);
- }
- else {
- /* element/indicies are in user space memory */
- ibuffer->buffer =
- pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
- ib->count * ibuffer->index_size,
- PIPE_BIND_INDEX_BUFFER);
- }
- }
-}
-
-/**
- * Prior to drawing, check that any uniforms referenced by the
- * current shader have been set. If a uniform has not been set,
- * issue a warning.
- */
-static void
-check_uniforms(struct gl_context *ctx)
-{
- struct gl_shader_program *shProg[3] = {
- ctx->Shader.CurrentVertexProgram,
- ctx->Shader.CurrentGeometryProgram,
- ctx->Shader.CurrentFragmentProgram,
- };
- unsigned j;
-
- for (j = 0; j < 3; j++) {
- unsigned i;
-
- if (shProg[j] == NULL || !shProg[j]->LinkStatus)
- continue;
-
- for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
- const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
- if (!u->Initialized) {
- _mesa_warning(ctx,
- "Using shader with uninitialized uniform: %s",
- u->Name);
- }
- }
- }
-}
-
-
-/**
- * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
- * the corresponding Gallium type.
- */
-static unsigned
-translate_prim(const struct gl_context *ctx, unsigned prim)
-{
- /* GL prims should match Gallium prims, spot-check a few */
- assert(GL_POINTS == PIPE_PRIM_POINTS);
- assert(GL_QUADS == PIPE_PRIM_QUADS);
- assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
-
- /* Avoid quadstrips if it's easy to do so:
- * Note: it's imporant to do the correct trimming if we change the prim type!
- * We do that wherever this function is called.
- */
- if (prim == GL_QUAD_STRIP &&
- ctx->Light.ShadeModel != GL_FLAT &&
- ctx->Polygon.FrontMode == GL_FILL &&
- ctx->Polygon.BackMode == GL_FILL)
- prim = GL_TRIANGLE_STRIP;
-
- return prim;
-}
-
-
-
-/**
- * This function gets plugged into the VBO module and is called when
- * we have something to render.
- * Basically, translate the information into the format expected by gallium.
- */
-void
-st_draw_vbo(struct gl_context *ctx,
- const struct gl_client_array **arrays,
- const struct _mesa_prim *prims,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- struct st_context *st = st_context(ctx);
- struct pipe_context *pipe = st->pipe;
- const struct st_vertex_program *vp;
- const struct st_vp_variant *vpv;
- struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
- GLuint attr;
- struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
- unsigned num_vbuffers, num_velements;
- struct pipe_index_buffer ibuffer;
- GLboolean userSpace = GL_FALSE;
- GLboolean vertDataEdgeFlags;
- struct pipe_draw_info info;
- unsigned i;
-
- /* Mesa core state should have been validated already */
- assert(ctx->NewState == 0x0);
-
- /* Gallium probably doesn't want this in some cases. */
- if (!index_bounds_valid)
- if (!vbo_all_varyings_in_vbos(arrays))
- vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
-
- /* sanity check for pointer arithmetic below */
- assert(sizeof(arrays[0]->Ptr[0]) == 1);
-
- vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
- arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
- if (vertDataEdgeFlags != st->vertdata_edgeflags) {
- st->vertdata_edgeflags = vertDataEdgeFlags;
- st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
- }
-
- st_validate_state(st);
-
- /* must get these after state validation! */
- vp = st->vp;
- vpv = st->vp_variant;
-
-#if 0
- if (MESA_VERBOSE & VERBOSE_GLSL) {
- check_uniforms(ctx);
- }
-#else
- (void) check_uniforms;
-#endif
-
- memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
- /*
- * Setup the vbuffer[] and velements[] arrays.
- */
- if (is_interleaved_arrays(vp, vpv, arrays, &userSpace)) {
- /*printf("Draw interleaved\n");*/
- setup_interleaved_attribs(ctx, vp, vpv, arrays, max_index, userSpace,
- vbuffer, velements);
- num_vbuffers = 1;
- num_velements = vpv->num_inputs;
- if (num_velements == 0)
- num_vbuffers = 0;
- }
- else {
- /*printf("Draw non-interleaved\n");*/
- setup_non_interleaved_attribs(ctx, vp, vpv, arrays, max_index,
- &userSpace, vbuffer, velements);
- num_vbuffers = vpv->num_inputs;
- num_velements = vpv->num_inputs;
- }
-
-#if 0
- {
- GLuint i;
- for (i = 0; i < num_vbuffers; i++) {
- printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride);
- printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index);
- printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset);
- printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer);
- }
- for (i = 0; i < num_velements; i++) {
- printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index);
- printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset);
- printf("vlements[%d].format = %s\n", i, util_format_name(velements[i].src_format));
- }
- }
-#endif
-
- pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer);
- cso_set_vertex_elements(st->cso_context, num_velements, velements);
-
- setup_index_buffer(ctx, ib, &ibuffer);
- pipe->set_index_buffer(pipe, &ibuffer);
-
- util_draw_init_info(&info);
- if (ib) {
- info.indexed = TRUE;
- if (min_index != ~0 && max_index != ~0) {
- info.min_index = min_index;
- info.max_index = max_index;
- }
- }
-
- info.primitive_restart = st->ctx->Array.PrimitiveRestart;
- info.restart_index = st->ctx->Array.RestartIndex;
-
- /* do actual drawing */
- for (i = 0; i < nr_prims; i++) {
- info.mode = translate_prim( ctx, prims[i].mode );
- info.start = prims[i].start;
- info.count = prims[i].count;
- info.instance_count = prims[i].num_instances;
- info.index_bias = prims[i].basevertex;
- if (!ib) {
- info.min_index = info.start;
- info.max_index = info.start + info.count - 1;
- }
-
- if (u_trim_pipe_prim(info.mode, &info.count))
- pipe->draw_vbo(pipe, &info);
- }
-
- pipe_resource_reference(&ibuffer.buffer, NULL);
-
- /* unreference buffers (frees wrapped user-space buffer objects) */
- for (attr = 0; attr < num_vbuffers; attr++) {
- pipe_resource_reference(&vbuffer[attr].buffer, NULL);
- assert(!vbuffer[attr].buffer);
- }
-
- if (userSpace)
- {
- pipe->set_vertex_buffers(pipe, 0, NULL);
- }
-}
-
-
-void st_init_draw( struct st_context *st )
-{
- struct gl_context *ctx = st->ctx;
-
- vbo_set_draw_func(ctx, st_draw_vbo);
-
-#if FEATURE_feedback || FEATURE_rastpos
- st->draw = draw_create(st->pipe); /* for selection/feedback */
-
- /* Disable draw options that might convert points/lines to tris, etc.
- * as that would foul-up feedback/selection mode.
- */
- draw_wide_line_threshold(st->draw, 1000.0f);
- draw_wide_point_threshold(st->draw, 1000.0f);
- draw_enable_line_stipple(st->draw, FALSE);
- draw_enable_point_sprites(st->draw, FALSE);
-#endif
-}
-
-
-void st_destroy_draw( struct st_context *st )
-{
-#if FEATURE_feedback || FEATURE_rastpos
- draw_destroy(st->draw);
-#endif
-}
-
-
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * This file implements the st_draw_vbo() function which is called from
+ * Mesa's VBO module. All point/line/triangle rendering is done through
+ * this function whether the user called glBegin/End, glDrawArrays,
+ * glDrawElements, glEvalMesh, or glCalList, etc.
+ *
+ * We basically convert the VBO's vertex attribute/array information into
+ * Gallium vertex state, bind the vertex buffer objects and call
+ * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
+ *
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "program/prog_uniform.h"
+
+#include "vbo/vbo.h"
+
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_cb_bufferobjects.h"
+#include "st_draw.h"
+#include "st_program.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_prim.h"
+#include "util/u_draw_quad.h"
+#include "draw/draw_context.h"
+#include "cso_cache/cso_context.h"
+
+
+static GLuint double_types[4] = {
+ PIPE_FORMAT_R64_FLOAT,
+ PIPE_FORMAT_R64G64_FLOAT,
+ PIPE_FORMAT_R64G64B64_FLOAT,
+ PIPE_FORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[4] = {
+ PIPE_FORMAT_R32_FLOAT,
+ PIPE_FORMAT_R32G32_FLOAT,
+ PIPE_FORMAT_R32G32B32_FLOAT,
+ PIPE_FORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint half_float_types[4] = {
+ PIPE_FORMAT_R16_FLOAT,
+ PIPE_FORMAT_R16G16_FLOAT,
+ PIPE_FORMAT_R16G16B16_FLOAT,
+ PIPE_FORMAT_R16G16B16A16_FLOAT
+};
+
+static GLuint uint_types_norm[4] = {
+ PIPE_FORMAT_R32_UNORM,
+ PIPE_FORMAT_R32G32_UNORM,
+ PIPE_FORMAT_R32G32B32_UNORM,
+ PIPE_FORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[4] = {
+ PIPE_FORMAT_R32_USCALED,
+ PIPE_FORMAT_R32G32_USCALED,
+ PIPE_FORMAT_R32G32B32_USCALED,
+ PIPE_FORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[4] = {
+ PIPE_FORMAT_R32_SNORM,
+ PIPE_FORMAT_R32G32_SNORM,
+ PIPE_FORMAT_R32G32B32_SNORM,
+ PIPE_FORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[4] = {
+ PIPE_FORMAT_R32_SSCALED,
+ PIPE_FORMAT_R32G32_SSCALED,
+ PIPE_FORMAT_R32G32B32_SSCALED,
+ PIPE_FORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[4] = {
+ PIPE_FORMAT_R16_UNORM,
+ PIPE_FORMAT_R16G16_UNORM,
+ PIPE_FORMAT_R16G16B16_UNORM,
+ PIPE_FORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[4] = {
+ PIPE_FORMAT_R16_USCALED,
+ PIPE_FORMAT_R16G16_USCALED,
+ PIPE_FORMAT_R16G16B16_USCALED,
+ PIPE_FORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[4] = {
+ PIPE_FORMAT_R16_SNORM,
+ PIPE_FORMAT_R16G16_SNORM,
+ PIPE_FORMAT_R16G16B16_SNORM,
+ PIPE_FORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[4] = {
+ PIPE_FORMAT_R16_SSCALED,
+ PIPE_FORMAT_R16G16_SSCALED,
+ PIPE_FORMAT_R16G16B16_SSCALED,
+ PIPE_FORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[4] = {
+ PIPE_FORMAT_R8_UNORM,
+ PIPE_FORMAT_R8G8_UNORM,
+ PIPE_FORMAT_R8G8B8_UNORM,
+ PIPE_FORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[4] = {
+ PIPE_FORMAT_R8_USCALED,
+ PIPE_FORMAT_R8G8_USCALED,
+ PIPE_FORMAT_R8G8B8_USCALED,
+ PIPE_FORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[4] = {
+ PIPE_FORMAT_R8_SNORM,
+ PIPE_FORMAT_R8G8_SNORM,
+ PIPE_FORMAT_R8G8B8_SNORM,
+ PIPE_FORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[4] = {
+ PIPE_FORMAT_R8_SSCALED,
+ PIPE_FORMAT_R8G8_SSCALED,
+ PIPE_FORMAT_R8G8B8_SSCALED,
+ PIPE_FORMAT_R8G8B8A8_SSCALED
+};
+
+static GLuint fixed_types[4] = {
+ PIPE_FORMAT_R32_FIXED,
+ PIPE_FORMAT_R32G32_FIXED,
+ PIPE_FORMAT_R32G32B32_FIXED,
+ PIPE_FORMAT_R32G32B32A32_FIXED
+};
+
+
+
+/**
+ * Return a PIPE_FORMAT_x for the given GL datatype and size.
+ */
+GLuint
+st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
+ GLboolean normalized)
+{
+ assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
+ type == GL_FIXED || type == GL_HALF_FLOAT);
+ assert(size >= 1);
+ assert(size <= 4);
+ assert(format == GL_RGBA || format == GL_BGRA);
+
+ if (format == GL_BGRA) {
+ /* this is an odd-ball case */
+ assert(type == GL_UNSIGNED_BYTE);
+ assert(normalized);
+ return PIPE_FORMAT_B8G8R8A8_UNORM;
+ }
+
+ if (normalized) {
+ switch (type) {
+ case GL_DOUBLE: return double_types[size-1];
+ case GL_FLOAT: return float_types[size-1];
+ case GL_HALF_FLOAT: return half_float_types[size-1];
+ case GL_INT: return int_types_norm[size-1];
+ case GL_SHORT: return short_types_norm[size-1];
+ case GL_BYTE: return byte_types_norm[size-1];
+ case GL_UNSIGNED_INT: return uint_types_norm[size-1];
+ case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
+ case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
+ case GL_FIXED: return fixed_types[size-1];
+ default: assert(0); return 0;
+ }
+ }
+ else {
+ switch (type) {
+ case GL_DOUBLE: return double_types[size-1];
+ case GL_FLOAT: return float_types[size-1];
+ case GL_HALF_FLOAT: return half_float_types[size-1];
+ case GL_INT: return int_types_scale[size-1];
+ case GL_SHORT: return short_types_scale[size-1];
+ case GL_BYTE: return byte_types_scale[size-1];
+ case GL_UNSIGNED_INT: return uint_types_scale[size-1];
+ case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
+ case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
+ case GL_FIXED: return fixed_types[size-1];
+ default: assert(0); return 0;
+ }
+ }
+ return 0; /* silence compiler warning */
+}
+
+
+
+
+
+/**
+ * Examine the active arrays to determine if we have interleaved
+ * vertex arrays all living in one VBO, or all living in user space.
+ * \param userSpace returns whether the arrays are in user space.
+ */
+static GLboolean
+is_interleaved_arrays(const struct st_vertex_program *vp,
+ const struct st_vp_variant *vpv,
+ const struct gl_client_array **arrays)
+{
+ GLuint attr;
+ const struct gl_buffer_object *firstBufObj = NULL;
+ GLint firstStride = -1;
+ const GLubyte *client_addr = NULL;
+
+ for (attr = 0; attr < vpv->num_inputs; attr++) {
+ const GLuint mesaAttr = vp->index_to_input[attr];
+ const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
+ const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
+
+ if (firstStride < 0) {
+ firstStride = stride;
+ }
+ else if (firstStride != stride) {
+ return GL_FALSE;
+ }
+
+ if (!bufObj || !bufObj->Name) {
+ /* Try to detect if the client-space arrays are
+ * "close" to each other.
+ */
+ if (!client_addr) {
+ client_addr = arrays[mesaAttr]->Ptr;
+ }
+ else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
+ /* arrays start too far apart */
+ return GL_FALSE;
+ }
+ }
+ else if (!firstBufObj) {
+ firstBufObj = bufObj;
+ }
+ else if (bufObj != firstBufObj) {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
+
+
+/**
+ * Set up for drawing interleaved arrays that all live in one VBO
+ * or all live in user space.
+ * \param vbuffer returns vertex buffer info
+ * \param velements returns vertex element info
+ */
+static void
+setup_interleaved_attribs(struct gl_context *ctx,
+ const struct st_vertex_program *vp,
+ const struct st_vp_variant *vpv,
+ const struct gl_client_array **arrays,
+ struct pipe_vertex_buffer *vbuffer,
+ struct pipe_vertex_element velements[],
+ unsigned max_index)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ GLuint attr;
+ const GLubyte *low_addr = NULL;
+
+ /* Find the lowest address. */
+ for (attr = 0; attr < vpv->num_inputs; attr++) {
+ const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+
+ low_addr = !low_addr ? start : MIN2(low_addr, start);
+ }
+
+ for (attr = 0; attr < vpv->num_inputs; attr++) {
+ const GLuint mesaAttr = vp->index_to_input[attr];
+ struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
+ struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ GLsizei stride = arrays[mesaAttr]->StrideB;
+
+ if (attr == 0) {
+ if (bufobj && bufobj->Name) {
+ vbuffer->buffer = NULL;
+ pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
+ vbuffer->buffer_offset = pointer_to_offset(low_addr);
+ } else {
+ vbuffer->buffer =
+ pipe_user_buffer_create(pipe->screen, (void*)low_addr,
+ stride * (max_index + 1),
+ PIPE_BIND_VERTEX_BUFFER);
+ vbuffer->buffer_offset = 0;
+
+ /* Track user vertex buffers. */
+ pipe_resource_reference(&st->user_vb[0], vbuffer->buffer);
+ st->user_vb_stride[0] = stride;
+ st->num_user_vbs = 1;
+ }
+ vbuffer->stride = stride; /* in bytes */
+ }
+
+ velements[attr].src_offset =
+ (unsigned) (arrays[mesaAttr]->Ptr - low_addr);
+ velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+ velements[attr].vertex_buffer_index = 0;
+ velements[attr].src_format =
+ st_pipe_vertex_format(arrays[mesaAttr]->Type,
+ arrays[mesaAttr]->Size,
+ arrays[mesaAttr]->Format,
+ arrays[mesaAttr]->Normalized);
+ assert(velements[attr].src_format);
+ }
+}
+
+
+/**
+ * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
+ * vertex attribute.
+ * \param vbuffer returns vertex buffer info
+ * \param velements returns vertex element info
+ */
+static void
+setup_non_interleaved_attribs(struct gl_context *ctx,
+ const struct st_vertex_program *vp,
+ const struct st_vp_variant *vpv,
+ const struct gl_client_array **arrays,
+ struct pipe_vertex_buffer vbuffer[],
+ struct pipe_vertex_element velements[],
+ unsigned max_index)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ GLuint attr;
+
+ for (attr = 0; attr < vpv->num_inputs; attr++) {
+ const GLuint mesaAttr = vp->index_to_input[attr];
+ struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
+ GLsizei stride = arrays[mesaAttr]->StrideB;
+
+ if (bufobj && bufobj->Name) {
+ /* Attribute data is in a VBO.
+ * Recall that for VBOs, the gl_client_array->Ptr field is
+ * really an offset from the start of the VBO, not a pointer.
+ */
+ struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ assert(stobj->buffer);
+
+ vbuffer[attr].buffer = NULL;
+ pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
+ vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
+ }
+ else {
+ /* wrap user data */
+ if (arrays[mesaAttr]->Ptr) {
+ vbuffer[attr].buffer =
+ pipe_user_buffer_create(pipe->screen,
+ (void *) arrays[mesaAttr]->Ptr,
+ stride * (max_index + 1),
+ PIPE_BIND_VERTEX_BUFFER);
+ }
+ else {
+ /* no array, use ctx->Current.Attrib[] value */
+ uint bytes = sizeof(ctx->Current.Attrib[0]);
+ vbuffer[attr].buffer =
+ pipe_user_buffer_create(pipe->screen,
+ (void *) ctx->Current.Attrib[mesaAttr],
+ bytes,
+ PIPE_BIND_VERTEX_BUFFER);
+ stride = 0;
+ }
+
+ vbuffer[attr].buffer_offset = 0;
+
+ /* Track user vertex buffers. */
+ pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer);
+ st->user_vb_stride[attr] = stride;
+ st->num_user_vbs = MAX2(st->num_user_vbs, attr+1);
+ }
+
+ /* common-case setup */
+ vbuffer[attr].stride = stride; /* in bytes */
+
+ velements[attr].src_offset = 0;
+ velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+ velements[attr].vertex_buffer_index = attr;
+ velements[attr].src_format
+ = st_pipe_vertex_format(arrays[mesaAttr]->Type,
+ arrays[mesaAttr]->Size,
+ arrays[mesaAttr]->Format,
+ arrays[mesaAttr]->Normalized);
+ assert(velements[attr].src_format);
+ }
+}
+
+
+static void
+setup_index_buffer(struct gl_context *ctx,
+ const struct _mesa_index_buffer *ib,
+ struct pipe_index_buffer *ibuffer)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+
+ memset(ibuffer, 0, sizeof(*ibuffer));
+ if (ib) {
+ struct gl_buffer_object *bufobj = ib->obj;
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT:
+ ibuffer->index_size = 4;
+ break;
+ case GL_UNSIGNED_SHORT:
+ ibuffer->index_size = 2;
+ break;
+ case GL_UNSIGNED_BYTE:
+ ibuffer->index_size = 1;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ /* get/create the index buffer object */
+ if (bufobj && bufobj->Name) {
+ /* elements/indexes are in a real VBO */
+ struct st_buffer_object *stobj = st_buffer_object(bufobj);
+ pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
+ ibuffer->offset = pointer_to_offset(ib->ptr);
+ }
+ else {
+ /* element/indicies are in user space memory */
+ ibuffer->buffer =
+ pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
+ ib->count * ibuffer->index_size,
+ PIPE_BIND_INDEX_BUFFER);
+ }
+ }
+}
+
+/**
+ * Prior to drawing, check that any uniforms referenced by the
+ * current shader have been set. If a uniform has not been set,
+ * issue a warning.
+ */
+static void
+check_uniforms(struct gl_context *ctx)
+{
+ struct gl_shader_program *shProg[3] = {
+ ctx->Shader.CurrentVertexProgram,
+ ctx->Shader.CurrentGeometryProgram,
+ ctx->Shader.CurrentFragmentProgram,
+ };
+ unsigned j;
+
+ for (j = 0; j < 3; j++) {
+ unsigned i;
+
+ if (shProg[j] == NULL || !shProg[j]->LinkStatus)
+ continue;
+
+ for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
+ const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
+ if (!u->Initialized) {
+ _mesa_warning(ctx,
+ "Using shader with uninitialized uniform: %s",
+ u->Name);
+ }
+ }
+ }
+}
+
+
+/**
+ * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
+ * the corresponding Gallium type.
+ */
+static unsigned
+translate_prim(const struct gl_context *ctx, unsigned prim)
+{
+ /* GL prims should match Gallium prims, spot-check a few */
+ assert(GL_POINTS == PIPE_PRIM_POINTS);
+ assert(GL_QUADS == PIPE_PRIM_QUADS);
+ assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
+
+ /* Avoid quadstrips if it's easy to do so:
+ * Note: it's imporant to do the correct trimming if we change the prim type!
+ * We do that wherever this function is called.
+ */
+ if (prim == GL_QUAD_STRIP &&
+ ctx->Light.ShadeModel != GL_FLAT &&
+ ctx->Polygon.FrontMode == GL_FILL &&
+ ctx->Polygon.BackMode == GL_FILL)
+ prim = GL_TRIANGLE_STRIP;
+
+ return prim;
+}
+
+
+static void
+st_validate_varrays(struct gl_context *ctx,
+ const struct gl_client_array **arrays,
+ unsigned max_index)
+{
+ struct st_context *st = st_context(ctx);
+ const struct st_vertex_program *vp;
+ const struct st_vp_variant *vpv;
+ struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
+ struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
+ unsigned num_vbuffers, num_velements;
+ GLuint attr;
+ unsigned i;
+
+ /* must get these after state validation! */
+ vp = st->vp;
+ vpv = st->vp_variant;
+
+ memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
+
+ /* Unreference any user vertex buffers. */
+ for (i = 0; i < st->num_user_vbs; i++) {
+ pipe_resource_reference(&st->user_vb[i], NULL);
+ }
+ st->num_user_vbs = 0;
+
+ /*
+ * Setup the vbuffer[] and velements[] arrays.
+ */
+ if (is_interleaved_arrays(vp, vpv, arrays)) {
+ setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
+ max_index);
+ num_vbuffers = 1;
+ num_velements = vpv->num_inputs;
+ if (num_velements == 0)
+ num_vbuffers = 0;
+ }
+ else {
+ setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
+ vbuffer, velements, max_index);
+ num_vbuffers = vpv->num_inputs;
+ num_velements = vpv->num_inputs;
+ }
+
+ cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
+ cso_set_vertex_elements(st->cso_context, num_velements, velements);
+
+ /* unreference buffers (frees wrapped user-space buffer objects)
+ * This is OK, because the pipe driver should reference buffers by itself
+ * in set_vertex_buffers. */
+ for (attr = 0; attr < num_vbuffers; attr++) {
+ pipe_resource_reference(&vbuffer[attr].buffer, NULL);
+ assert(!vbuffer[attr].buffer);
+ }
+}
+
+
+/**
+ * This function gets plugged into the VBO module and is called when
+ * we have something to render.
+ * Basically, translate the information into the format expected by gallium.
+ */
+void
+st_draw_vbo(struct gl_context *ctx,
+ const struct gl_client_array **arrays,
+ const struct _mesa_prim *prims,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_index_buffer ibuffer;
+ struct pipe_draw_info info;
+ unsigned i;
+ GLboolean new_array =
+ st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;
+
+ /* Mesa core state should have been validated already */
+ assert(ctx->NewState == 0x0);
+
+ if (ib) {
+ /* Gallium probably doesn't want this in some cases. */
+ if (!index_bounds_valid)
+ if (!vbo_all_varyings_in_vbos(arrays))
+ vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+ } else {
+ /* Get min/max index for non-indexed drawing. */
+ min_index = ~0;
+ max_index = 0;
+
+ for (i = 0; i < nr_prims; i++) {
+ min_index = MIN2(min_index, prims[i].start);
+ max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+ }
+ }
+
+ /* Validate state. */
+ if (st->dirty.st) {
+ GLboolean vertDataEdgeFlags;
+
+ /* sanity check for pointer arithmetic below */
+ assert(sizeof(arrays[0]->Ptr[0]) == 1);
+
+ vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
+ arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
+ if (vertDataEdgeFlags != st->vertdata_edgeflags) {
+ st->vertdata_edgeflags = vertDataEdgeFlags;
+ st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
+ }
+
+ st_validate_state(st);
+
+ if (new_array) {
+ st_validate_varrays(ctx, arrays, max_index);
+ }
+
+#if 0
+ if (MESA_VERBOSE & VERBOSE_GLSL) {
+ check_uniforms(ctx);
+ }
+#else
+ (void) check_uniforms;
+#endif
+ }
+
+ /* Notify the driver that the content of user buffers may have been
+ * changed. */
+ if (!new_array && st->num_user_vbs) {
+ for (i = 0; i < st->num_user_vbs; i++) {
+ if (st->user_vb[i]) {
+ unsigned stride = st->user_vb_stride[i];
+
+ if (stride) {
+ pipe->redefine_user_buffer(pipe, st->user_vb[i],
+ min_index * stride,
+ (max_index + 1 - min_index) * stride);
+ } else {
+ /* stride == 0 */
+ pipe->redefine_user_buffer(pipe, st->user_vb[i],
+ 0, st->user_vb[i]->width0);
+ }
+ }
+ }
+ }
+
+ setup_index_buffer(ctx, ib, &ibuffer);
+ pipe->set_index_buffer(pipe, &ibuffer);
+
+ util_draw_init_info(&info);
+ if (ib) {
+ info.indexed = TRUE;
+ if (min_index != ~0 && max_index != ~0) {
+ info.min_index = min_index;
+ info.max_index = max_index;
+ }
+ }
+
+ info.primitive_restart = st->ctx->Array.PrimitiveRestart;
+ info.restart_index = st->ctx->Array.RestartIndex;
+
+ /* do actual drawing */
+ for (i = 0; i < nr_prims; i++) {
+ info.mode = translate_prim( ctx, prims[i].mode );
+ info.start = prims[i].start;
+ info.count = prims[i].count;
+ info.instance_count = prims[i].num_instances;
+ info.index_bias = prims[i].basevertex;
+ if (!ib) {
+ info.min_index = info.start;
+ info.max_index = info.start + info.count - 1;
+ }
+
+ if (u_trim_pipe_prim(info.mode, &info.count))
+ pipe->draw_vbo(pipe, &info);
+ }
+
+ pipe_resource_reference(&ibuffer.buffer, NULL);
+}
+
+
+void st_init_draw( struct st_context *st )
+{
+ struct gl_context *ctx = st->ctx;
+
+ vbo_set_draw_func(ctx, st_draw_vbo);
+
+#if FEATURE_feedback || FEATURE_rastpos
+ st->draw = draw_create(st->pipe); /* for selection/feedback */
+
+ /* Disable draw options that might convert points/lines to tris, etc.
+ * as that would foul-up feedback/selection mode.
+ */
+ draw_wide_line_threshold(st->draw, 1000.0f);
+ draw_wide_point_threshold(st->draw, 1000.0f);
+ draw_enable_line_stipple(st->draw, FALSE);
+ draw_enable_point_sprites(st->draw, FALSE);
+#endif
+}
+
+
+void st_destroy_draw( struct st_context *st )
+{
+#if FEATURE_feedback || FEATURE_rastpos
+ draw_destroy(st->draw);
+#endif
+}
+
+
diff --git a/mesalib/src/mesa/state_tracker/st_draw_feedback.c b/mesalib/src/mesa/state_tracker/st_draw_feedback.c
index 545b32d75..1e1220bfe 100644
--- a/mesalib/src/mesa/state_tracker/st_draw_feedback.c
+++ b/mesalib/src/mesa/state_tracker/st_draw_feedback.c
@@ -179,7 +179,6 @@ st_feedback_draw_vbo(struct gl_context *ctx,
/* common-case setup */
vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */
- vbuffers[attr].max_index = max_index;
velements[attr].instance_divisor = 0;
velements[attr].vertex_buffer_index = attr;
velements[attr].src_format =
diff --git a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
index 0be66a2c2..18eb3be68 100644
--- a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
@@ -1,424 +1,422 @@
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "main/imports.h"
-#include "main/mipmap.h"
-#include "main/teximage.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_gen_mipmap.h"
-
-#include "st_debug.h"
-#include "st_context.h"
-#include "st_texture.h"
-#include "st_gen_mipmap.h"
-#include "st_cb_texture.h"
-
-
-/**
- * one-time init for generate mipmap
- * XXX Note: there may be other times we need no-op/simple state like this.
- * In that case, some code refactoring would be good.
- */
-void
-st_init_generate_mipmap(struct st_context *st)
-{
- st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context);
-}
-
-
-void
-st_destroy_generate_mipmap(struct st_context *st)
-{
- util_destroy_gen_mipmap(st->gen_mipmap);
- st->gen_mipmap = NULL;
-}
-
-
-/**
- * Generate mipmap levels using hardware rendering.
- * \return TRUE if successful, FALSE if not possible
- */
-static boolean
-st_render_mipmap(struct st_context *st,
- GLenum target,
- struct st_texture_object *stObj,
- uint baseLevel, uint lastLevel)
-{
- struct pipe_context *pipe = st->pipe;
- struct pipe_screen *screen = pipe->screen;
- struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe);
- const uint face = _mesa_tex_target_to_face(target);
-
- assert(psv->texture == stObj->pt);
-#if 0
- assert(target != GL_TEXTURE_3D); /* implemented but untested */
-#endif
-
- /* check if we can render in the texture's format */
- /* XXX should probably kill this and always use util_gen_mipmap
- since this implements a sw fallback as well */
- if (!screen->is_format_supported(screen, psv->format, psv->texture->target,
- 0, PIPE_BIND_RENDER_TARGET, 0)) {
- return FALSE;
- }
-
- util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel,
- PIPE_TEX_FILTER_LINEAR);
-
- return TRUE;
-}
-
-
-/**
- * Helper function to decompress an image. The result is a 32-bpp RGBA
- * image with stride==width.
- */
-static void
-decompress_image(enum pipe_format format,
- const uint8_t *src, uint8_t *dst,
- unsigned width, unsigned height)
-{
- const struct util_format_description *desc = util_format_description(format);
- const uint bw = util_format_get_blockwidth(format);
- const uint bh = util_format_get_blockheight(format);
- const uint dst_stride = 4 * MAX2(width, bw);
- const uint src_stride = util_format_get_stride(format, width);
-
- desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
-
- if (width < bw || height < bh) {
- /* We're decompressing an image smaller than the compression
- * block size. We don't want garbage pixel values in the region
- * outside (width x height) so replicate pixels from the (width
- * x height) region to fill out the (bw x bh) block size.
- */
- uint x, y;
- for (y = 0; y < bh; y++) {
- for (x = 0; x < bw; x++) {
- if (x >= width || y >= height) {
- uint p = (y * bw + x) * 4;
- dst[p + 0] = dst[0];
- dst[p + 1] = dst[1];
- dst[p + 2] = dst[2];
- dst[p + 3] = dst[3];
- }
- }
- }
- }
-}
-
-
-/**
- * Helper function to compress an image. The source is a 32-bpp RGBA image
- * with stride==width.
- */
-static void
-compress_image(enum pipe_format format,
- const uint8_t *src, uint8_t *dst,
- unsigned width, unsigned height)
-{
- const struct util_format_description *desc = util_format_description(format);
- const uint dst_stride = util_format_get_stride(format, width);
- const uint src_stride = 4 * width;
-
- desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
-}
-
-
-/**
- * Software fallback for generate mipmap levels.
- */
-static void
-fallback_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct pipe_context *pipe = st_context(ctx)->pipe;
- struct pipe_resource *pt = st_get_texobj_resource(texObj);
- const uint baseLevel = texObj->BaseLevel;
- const uint lastLevel = pt->last_level;
- const uint face = _mesa_tex_target_to_face(target);
- uint dstLevel;
- GLenum datatype;
- GLuint comps;
- GLboolean compressed;
-
- if (ST_DEBUG & DEBUG_FALLBACK)
- debug_printf("%s: fallback processing\n", __FUNCTION__);
-
- assert(target != GL_TEXTURE_3D); /* not done yet */
-
- compressed =
- _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat);
-
- if (compressed) {
- datatype = GL_UNSIGNED_BYTE;
- comps = 4;
- }
- else {
- _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat,
- &datatype, &comps);
- assert(comps > 0 && "bad texture format in fallback_generate_mipmap()");
- }
-
- for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
- const uint srcLevel = dstLevel - 1;
- const uint srcWidth = u_minify(pt->width0, srcLevel);
- const uint srcHeight = u_minify(pt->height0, srcLevel);
- const uint srcDepth = u_minify(pt->depth0, srcLevel);
- const uint dstWidth = u_minify(pt->width0, dstLevel);
- const uint dstHeight = u_minify(pt->height0, dstLevel);
- const uint dstDepth = u_minify(pt->depth0, dstLevel);
- struct pipe_transfer *srcTrans, *dstTrans;
- const ubyte *srcData;
- ubyte *dstData;
- int srcStride, dstStride;
-
- srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel,
- face,
- PIPE_TRANSFER_READ, 0, 0,
- srcWidth, srcHeight);
-
- dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel,
- face,
- PIPE_TRANSFER_WRITE, 0, 0,
- dstWidth, dstHeight);
-
- srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans);
- dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans);
-
- srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format);
- dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format);
-
- /* this cannot work correctly for 3d since it does
- not respect layerStride. */
- if (compressed) {
- const enum pipe_format format = pt->format;
- const uint bw = util_format_get_blockwidth(format);
- const uint bh = util_format_get_blockheight(format);
- const uint srcWidth2 = align(srcWidth, bw);
- const uint srcHeight2 = align(srcHeight, bh);
- const uint dstWidth2 = align(dstWidth, bw);
- const uint dstHeight2 = align(dstHeight, bh);
- uint8_t *srcTemp, *dstTemp;
-
- assert(comps == 4);
-
- srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000);
- dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000);
-
- /* decompress the src image: srcData -> srcTemp */
- decompress_image(format, srcData, srcTemp, srcWidth, srcHeight);
-
- _mesa_generate_mipmap_level(target, datatype, comps,
- 0 /*border*/,
- srcWidth2, srcHeight2, srcDepth,
- srcTemp,
- srcWidth2, /* stride in texels */
- dstWidth2, dstHeight2, dstDepth,
- dstTemp,
- dstWidth2); /* stride in texels */
-
- /* compress the new image: dstTemp -> dstData */
- compress_image(format, dstTemp, dstData, dstWidth, dstHeight);
-
- free(srcTemp);
- free(dstTemp);
- }
- else {
- _mesa_generate_mipmap_level(target, datatype, comps,
- 0 /*border*/,
- srcWidth, srcHeight, srcDepth,
- srcData,
- srcStride, /* stride in texels */
- dstWidth, dstHeight, dstDepth,
- dstData,
- dstStride); /* stride in texels */
- }
-
- pipe_transfer_unmap(pipe, srcTrans);
- pipe_transfer_unmap(pipe, dstTrans);
-
- pipe->transfer_destroy(pipe, srcTrans);
- pipe->transfer_destroy(pipe, dstTrans);
- }
-}
-
-
-/**
- * Compute the expected number of mipmap levels in the texture given
- * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/
- * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap
- * levels should be generated.
- */
-static GLuint
-compute_num_levels(struct gl_context *ctx,
- struct gl_texture_object *texObj,
- GLenum target)
-{
- if (target == GL_TEXTURE_RECTANGLE_ARB) {
- return 1;
- }
- else {
- const struct gl_texture_image *baseImage =
- _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel);
- GLuint size, numLevels;
-
- size = MAX2(baseImage->Width2, baseImage->Height2);
- size = MAX2(size, baseImage->Depth2);
-
- numLevels = texObj->BaseLevel;
-
- while (size > 0) {
- numLevels++;
- size >>= 1;
- }
-
- numLevels = MIN2(numLevels, texObj->MaxLevel + 1);
-
- assert(numLevels >= 1);
-
- return numLevels;
- }
-}
-
-
-/**
- * Called via ctx->Driver.GenerateMipmap().
- */
-void
-st_generate_mipmap(struct gl_context *ctx, GLenum target,
- struct gl_texture_object *texObj)
-{
- struct st_context *st = st_context(ctx);
- struct st_texture_object *stObj = st_texture_object(texObj);
- struct pipe_resource *pt = st_get_texobj_resource(texObj);
- const uint baseLevel = texObj->BaseLevel;
- uint lastLevel;
- uint dstLevel;
-
- if (!pt)
- return;
-
- /* not sure if this ultimately actually should work,
- but we're not supporting multisampled textures yet. */
- assert(pt->nr_samples < 2);
-
- /* find expected last mipmap level to generate*/
- lastLevel = compute_num_levels(ctx, texObj, target) - 1;
-
- if (lastLevel == 0)
- return;
-
- if (pt->last_level < lastLevel) {
- /* The current gallium texture doesn't have space for all the
- * mipmap levels we need to generate. So allocate a new texture.
- */
- struct pipe_resource *oldTex = stObj->pt;
-
- /* create new texture with space for more levels */
- stObj->pt = st_texture_create(st,
- oldTex->target,
- oldTex->format,
- lastLevel,
- oldTex->width0,
- oldTex->height0,
- oldTex->depth0,
- oldTex->array_size,
- oldTex->bind);
-
- /* The texture isn't in a "complete" state yet so set the expected
- * lastLevel here, since it won't get done in st_finalize_texture().
- */
- stObj->lastLevel = lastLevel;
-
- /* This will copy the old texture's base image into the new texture
- * which we just allocated.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
-
- /* release the old tex (will likely be freed too) */
- pipe_resource_reference(&oldTex, NULL);
- pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-
- pt = stObj->pt;
- }
- else {
- /* Make sure that the base texture image data is present in the
- * texture buffer.
- */
- st_finalize_texture(ctx, st->pipe, texObj);
- }
-
- assert(pt->last_level >= lastLevel);
-
- /* Try to generate the mipmap by rendering/texturing. If that fails,
- * use the software fallback.
- */
- if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) {
- /* since the util code actually also has a fallback, should
- probably make it never fail and kill this */
- fallback_generate_mipmap(ctx, target, texObj);
- }
-
- /* Fill in the Mesa gl_texture_image fields */
- for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
- const uint srcLevel = dstLevel - 1;
- const struct gl_texture_image *srcImage
- = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
- struct gl_texture_image *dstImage;
- struct st_texture_image *stImage;
- uint dstWidth = u_minify(pt->width0, dstLevel);
- uint dstHeight = u_minify(pt->height0, dstLevel);
- uint dstDepth = u_minify(pt->depth0, dstLevel);
- uint border = srcImage->Border;
-
- dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel);
- if (!dstImage) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
- return;
- }
-
- /* Free old image data */
- if (dstImage->Data)
- ctx->Driver.FreeTexImageData(ctx, dstImage);
-
- /* initialize new image */
- _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight,
- dstDepth, border, srcImage->InternalFormat,
- srcImage->TexFormat);
-
- stImage = st_texture_image(dstImage);
- stImage->level = dstLevel;
-
- pipe_resource_reference(&stImage->pt, pt);
- }
-}
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/mipmap.h"
+#include "main/teximage.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_gen_mipmap.h"
+
+#include "st_debug.h"
+#include "st_context.h"
+#include "st_texture.h"
+#include "st_gen_mipmap.h"
+#include "st_cb_texture.h"
+
+
+/**
+ * one-time init for generate mipmap
+ * XXX Note: there may be other times we need no-op/simple state like this.
+ * In that case, some code refactoring would be good.
+ */
+void
+st_init_generate_mipmap(struct st_context *st)
+{
+ st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context);
+}
+
+
+void
+st_destroy_generate_mipmap(struct st_context *st)
+{
+ util_destroy_gen_mipmap(st->gen_mipmap);
+ st->gen_mipmap = NULL;
+}
+
+
+/**
+ * Generate mipmap levels using hardware rendering.
+ * \return TRUE if successful, FALSE if not possible
+ */
+static boolean
+st_render_mipmap(struct st_context *st,
+ GLenum target,
+ struct st_texture_object *stObj,
+ uint baseLevel, uint lastLevel)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_sampler_view *psv = st_get_texture_sampler_view(stObj, pipe);
+ const uint face = _mesa_tex_target_to_face(target);
+
+ assert(psv->texture == stObj->pt);
+#if 0
+ assert(target != GL_TEXTURE_3D); /* implemented but untested */
+#endif
+
+ /* check if we can render in the texture's format */
+ /* XXX should probably kill this and always use util_gen_mipmap
+ since this implements a sw fallback as well */
+ if (!screen->is_format_supported(screen, psv->format, psv->texture->target,
+ 0, PIPE_BIND_RENDER_TARGET, 0)) {
+ return FALSE;
+ }
+
+ util_gen_mipmap(st->gen_mipmap, psv, face, baseLevel, lastLevel,
+ PIPE_TEX_FILTER_LINEAR);
+
+ return TRUE;
+}
+
+
+/**
+ * Helper function to decompress an image. The result is a 32-bpp RGBA
+ * image with stride==width.
+ */
+static void
+decompress_image(enum pipe_format format,
+ const uint8_t *src, uint8_t *dst,
+ unsigned width, unsigned height, unsigned src_stride)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ const uint bw = util_format_get_blockwidth(format);
+ const uint bh = util_format_get_blockheight(format);
+ const uint dst_stride = 4 * MAX2(width, bw);
+
+ desc->unpack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
+
+ if (width < bw || height < bh) {
+ /* We're decompressing an image smaller than the compression
+ * block size. We don't want garbage pixel values in the region
+ * outside (width x height) so replicate pixels from the (width
+ * x height) region to fill out the (bw x bh) block size.
+ */
+ uint x, y;
+ for (y = 0; y < bh; y++) {
+ for (x = 0; x < bw; x++) {
+ if (x >= width || y >= height) {
+ uint p = (y * bw + x) * 4;
+ dst[p + 0] = dst[0];
+ dst[p + 1] = dst[1];
+ dst[p + 2] = dst[2];
+ dst[p + 3] = dst[3];
+ }
+ }
+ }
+ }
+}
+
+
+/**
+ * Helper function to compress an image. The source is a 32-bpp RGBA image
+ * with stride==width.
+ */
+static void
+compress_image(enum pipe_format format,
+ const uint8_t *src, uint8_t *dst,
+ unsigned width, unsigned height, unsigned dst_stride)
+{
+ const struct util_format_description *desc = util_format_description(format);
+ const uint src_stride = 4 * width;
+
+ desc->pack_rgba_8unorm(dst, dst_stride, src, src_stride, width, height);
+}
+
+
+/**
+ * Software fallback for generate mipmap levels.
+ */
+static void
+fallback_generate_mipmap(struct gl_context *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct pipe_resource *pt = st_get_texobj_resource(texObj);
+ const uint baseLevel = texObj->BaseLevel;
+ const uint lastLevel = pt->last_level;
+ const uint face = _mesa_tex_target_to_face(target);
+ uint dstLevel;
+ GLenum datatype;
+ GLuint comps;
+ GLboolean compressed;
+
+ if (ST_DEBUG & DEBUG_FALLBACK)
+ debug_printf("%s: fallback processing\n", __FUNCTION__);
+
+ assert(target != GL_TEXTURE_3D); /* not done yet */
+
+ compressed =
+ _mesa_is_format_compressed(texObj->Image[face][baseLevel]->TexFormat);
+
+ if (compressed) {
+ datatype = GL_UNSIGNED_BYTE;
+ comps = 4;
+ }
+ else {
+ _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat,
+ &datatype, &comps);
+ assert(comps > 0 && "bad texture format in fallback_generate_mipmap()");
+ }
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ const uint srcWidth = u_minify(pt->width0, srcLevel);
+ const uint srcHeight = u_minify(pt->height0, srcLevel);
+ const uint srcDepth = u_minify(pt->depth0, srcLevel);
+ const uint dstWidth = u_minify(pt->width0, dstLevel);
+ const uint dstHeight = u_minify(pt->height0, dstLevel);
+ const uint dstDepth = u_minify(pt->depth0, dstLevel);
+ struct pipe_transfer *srcTrans, *dstTrans;
+ const ubyte *srcData;
+ ubyte *dstData;
+ int srcStride, dstStride;
+
+ srcTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, srcLevel,
+ face,
+ PIPE_TRANSFER_READ, 0, 0,
+ srcWidth, srcHeight);
+
+ dstTrans = pipe_get_transfer(st_context(ctx)->pipe, pt, dstLevel,
+ face,
+ PIPE_TRANSFER_WRITE, 0, 0,
+ dstWidth, dstHeight);
+
+ srcData = (ubyte *) pipe_transfer_map(pipe, srcTrans);
+ dstData = (ubyte *) pipe_transfer_map(pipe, dstTrans);
+
+ srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->resource->format);
+ dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->resource->format);
+
+ /* this cannot work correctly for 3d since it does
+ not respect layerStride. */
+ if (compressed) {
+ const enum pipe_format format = pt->format;
+ const uint bw = util_format_get_blockwidth(format);
+ const uint bh = util_format_get_blockheight(format);
+ const uint srcWidth2 = align(srcWidth, bw);
+ const uint srcHeight2 = align(srcHeight, bh);
+ const uint dstWidth2 = align(dstWidth, bw);
+ const uint dstHeight2 = align(dstHeight, bh);
+ uint8_t *srcTemp, *dstTemp;
+
+ assert(comps == 4);
+
+ srcTemp = malloc(srcWidth2 * srcHeight2 * comps + 000);
+ dstTemp = malloc(dstWidth2 * dstHeight2 * comps + 000);
+
+ /* decompress the src image: srcData -> srcTemp */
+ decompress_image(format, srcData, srcTemp, srcWidth, srcHeight, srcTrans->stride);
+
+ _mesa_generate_mipmap_level(target, datatype, comps,
+ 0 /*border*/,
+ srcWidth2, srcHeight2, srcDepth,
+ srcTemp,
+ srcWidth2, /* stride in texels */
+ dstWidth2, dstHeight2, dstDepth,
+ dstTemp,
+ dstWidth2); /* stride in texels */
+
+ /* compress the new image: dstTemp -> dstData */
+ compress_image(format, dstTemp, dstData, dstWidth, dstHeight, dstTrans->stride);
+
+ free(srcTemp);
+ free(dstTemp);
+ }
+ else {
+ _mesa_generate_mipmap_level(target, datatype, comps,
+ 0 /*border*/,
+ srcWidth, srcHeight, srcDepth,
+ srcData,
+ srcStride, /* stride in texels */
+ dstWidth, dstHeight, dstDepth,
+ dstData,
+ dstStride); /* stride in texels */
+ }
+
+ pipe_transfer_unmap(pipe, srcTrans);
+ pipe_transfer_unmap(pipe, dstTrans);
+
+ pipe->transfer_destroy(pipe, srcTrans);
+ pipe->transfer_destroy(pipe, dstTrans);
+ }
+}
+
+
+/**
+ * Compute the expected number of mipmap levels in the texture given
+ * the width/height/depth of the base image and the GL_TEXTURE_BASE_LEVEL/
+ * GL_TEXTURE_MAX_LEVEL settings. This will tell us how many mipmap
+ * levels should be generated.
+ */
+static GLuint
+compute_num_levels(struct gl_context *ctx,
+ struct gl_texture_object *texObj,
+ GLenum target)
+{
+ if (target == GL_TEXTURE_RECTANGLE_ARB) {
+ return 1;
+ }
+ else {
+ const struct gl_texture_image *baseImage =
+ _mesa_get_tex_image(ctx, texObj, target, texObj->BaseLevel);
+ GLuint size, numLevels;
+
+ size = MAX2(baseImage->Width2, baseImage->Height2);
+ size = MAX2(size, baseImage->Depth2);
+
+ numLevels = texObj->BaseLevel;
+
+ while (size > 0) {
+ numLevels++;
+ size >>= 1;
+ }
+
+ numLevels = MIN2(numLevels, texObj->MaxLevel + 1);
+
+ assert(numLevels >= 1);
+
+ return numLevels;
+ }
+}
+
+
+/**
+ * Called via ctx->Driver.GenerateMipmap().
+ */
+void
+st_generate_mipmap(struct gl_context *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_object *stObj = st_texture_object(texObj);
+ struct pipe_resource *pt = st_get_texobj_resource(texObj);
+ const uint baseLevel = texObj->BaseLevel;
+ uint lastLevel;
+ uint dstLevel;
+
+ if (!pt)
+ return;
+
+ /* not sure if this ultimately actually should work,
+ but we're not supporting multisampled textures yet. */
+ assert(pt->nr_samples < 2);
+
+ /* find expected last mipmap level to generate*/
+ lastLevel = compute_num_levels(ctx, texObj, target) - 1;
+
+ if (lastLevel == 0)
+ return;
+
+ if (pt->last_level < lastLevel) {
+ /* The current gallium texture doesn't have space for all the
+ * mipmap levels we need to generate. So allocate a new texture.
+ */
+ struct pipe_resource *oldTex = stObj->pt;
+
+ /* create new texture with space for more levels */
+ stObj->pt = st_texture_create(st,
+ oldTex->target,
+ oldTex->format,
+ lastLevel,
+ oldTex->width0,
+ oldTex->height0,
+ oldTex->depth0,
+ oldTex->array_size,
+ oldTex->bind);
+
+ /* The texture isn't in a "complete" state yet so set the expected
+ * lastLevel here, since it won't get done in st_finalize_texture().
+ */
+ stObj->lastLevel = lastLevel;
+
+ /* This will copy the old texture's base image into the new texture
+ * which we just allocated.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+
+ /* release the old tex (will likely be freed too) */
+ pipe_resource_reference(&oldTex, NULL);
+ pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+
+ pt = stObj->pt;
+ }
+ else {
+ /* Make sure that the base texture image data is present in the
+ * texture buffer.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+ }
+
+ assert(pt->last_level >= lastLevel);
+
+ /* Try to generate the mipmap by rendering/texturing. If that fails,
+ * use the software fallback.
+ */
+ if (!st_render_mipmap(st, target, stObj, baseLevel, lastLevel)) {
+ /* since the util code actually also has a fallback, should
+ probably make it never fail and kill this */
+ fallback_generate_mipmap(ctx, target, texObj);
+ }
+
+ /* Fill in the Mesa gl_texture_image fields */
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ const struct gl_texture_image *srcImage
+ = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
+ struct gl_texture_image *dstImage;
+ struct st_texture_image *stImage;
+ uint dstWidth = u_minify(pt->width0, dstLevel);
+ uint dstHeight = u_minify(pt->height0, dstLevel);
+ uint dstDepth = u_minify(pt->depth0, dstLevel);
+ uint border = srcImage->Border;
+
+ dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel);
+ if (!dstImage) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
+ return;
+ }
+
+ /* Free old image data */
+ if (dstImage->Data)
+ ctx->Driver.FreeTexImageData(ctx, dstImage);
+
+ /* initialize new image */
+ _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight,
+ dstDepth, border, srcImage->InternalFormat,
+ srcImage->TexFormat);
+
+ stImage = st_texture_image(dstImage);
+ stImage->level = dstLevel;
+
+ pipe_resource_reference(&stImage->pt, pt);
+ }
+}
diff --git a/mesalib/src/mesa/tnl/t_draw.c b/mesalib/src/mesa/tnl/t_draw.c
index 741f0ed3f..b1967e654 100644
--- a/mesalib/src/mesa/tnl/t_draw.c
+++ b/mesalib/src/mesa/tnl/t_draw.c
@@ -1,494 +1,534 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.1
- *
- * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/condrender.h"
-#include "main/context.h"
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
-#include "t_context.h"
-#include "tnl.h"
-
-
-
-static GLubyte *get_space(struct gl_context *ctx, GLuint bytes)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLubyte *space = malloc(bytes);
-
- tnl->block[tnl->nr_blocks++] = space;
- return space;
-}
-
-
-static void free_space(struct gl_context *ctx)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- GLuint i;
- for (i = 0; i < tnl->nr_blocks; i++)
- free(tnl->block[i]);
- tnl->nr_blocks = 0;
-}
-
-
-/* Convert the incoming array to GLfloats. Understands the
- * array->Normalized flag and selects the correct conversion method.
- */
-#define CONVERT( TYPE, MACRO ) do { \
- GLuint i, j; \
- if (input->Normalized) { \
- for (i = 0; i < count; i++) { \
- const TYPE *in = (TYPE *)ptr; \
- for (j = 0; j < sz; j++) { \
- *fptr++ = MACRO(*in); \
- in++; \
- } \
- ptr += input->StrideB; \
- } \
- } else { \
- for (i = 0; i < count; i++) { \
- const TYPE *in = (TYPE *)ptr; \
- for (j = 0; j < sz; j++) { \
- *fptr++ = (GLfloat)(*in); \
- in++; \
- } \
- ptr += input->StrideB; \
- } \
- } \
-} while (0)
-
-
-/**
- * Convert array of BGRA/GLubyte[4] values to RGBA/float[4]
- * \param ptr input/ubyte array
- * \param fptr output/float array
- */
-static void
-convert_bgra_to_float(const struct gl_client_array *input,
- const GLubyte *ptr, GLfloat *fptr,
- GLuint count )
-{
- GLuint i;
- assert(input->Normalized);
- assert(input->Size == 4);
- for (i = 0; i < count; i++) {
- const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */
- *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */
- *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */
- *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */
- *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */
- ptr += input->StrideB;
- }
-}
-
-static void
-convert_half_to_float(const struct gl_client_array *input,
- const GLubyte *ptr, GLfloat *fptr,
- GLuint count, GLuint sz)
-{
- GLuint i, j;
-
- for (i = 0; i < count; i++) {
- GLhalfARB *in = (GLhalfARB *)ptr;
-
- for (j = 0; j < sz; j++) {
- *fptr++ = _mesa_half_to_float(in[j]);
- }
- ptr += input->StrideB;
- }
-}
-
-/* Adjust pointer to point at first requested element, convert to
- * floating point, populate VB->AttribPtr[].
- */
-static void _tnl_import_array( struct gl_context *ctx,
- GLuint attrib,
- GLuint count,
- const struct gl_client_array *input,
- const GLubyte *ptr )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint stride = input->StrideB;
-
- if (input->Type != GL_FLOAT) {
- const GLuint sz = input->Size;
- GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat));
- GLfloat *fptr = (GLfloat *)buf;
-
- switch (input->Type) {
- case GL_BYTE:
- CONVERT(GLbyte, BYTE_TO_FLOAT);
- break;
- case GL_UNSIGNED_BYTE:
- if (input->Format == GL_BGRA) {
- /* See GL_EXT_vertex_array_bgra */
- convert_bgra_to_float(input, ptr, fptr, count);
- }
- else {
- CONVERT(GLubyte, UBYTE_TO_FLOAT);
- }
- break;
- case GL_SHORT:
- CONVERT(GLshort, SHORT_TO_FLOAT);
- break;
- case GL_UNSIGNED_SHORT:
- CONVERT(GLushort, USHORT_TO_FLOAT);
- break;
- case GL_INT:
- CONVERT(GLint, INT_TO_FLOAT);
- break;
- case GL_UNSIGNED_INT:
- CONVERT(GLuint, UINT_TO_FLOAT);
- break;
- case GL_DOUBLE:
- CONVERT(GLdouble, (GLfloat));
- break;
- case GL_HALF_FLOAT:
- convert_half_to_float(input, ptr, fptr, count, sz);
- break;
- default:
- assert(0);
- break;
- }
-
- ptr = buf;
- stride = sz * sizeof(GLfloat);
- }
-
- VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib];
- VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr;
- VB->AttribPtr[attrib]->start = (GLfloat *)ptr;
- VB->AttribPtr[attrib]->count = count;
- VB->AttribPtr[attrib]->stride = stride;
- VB->AttribPtr[attrib]->size = input->Size;
-
- /* This should die, but so should the whole GLvector4f concept:
- */
- VB->AttribPtr[attrib]->flags = (((1<<input->Size)-1) |
- VEC_NOT_WRITEABLE |
- (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE));
-
- VB->AttribPtr[attrib]->storage = NULL;
-}
-
-#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2)
-
-
-static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx,
- const GLvector4f *input,
- GLuint count)
-{
- const GLubyte *ptr = (const GLubyte *)input->data;
- const GLuint stride = input->stride;
- GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS);
- GLboolean *bptr = space;
- GLuint i;
-
- for (i = 0; i < count; i++) {
- *bptr++ = ((GLfloat *)ptr)[0] == 1.0;
- ptr += stride;
- }
-
- return space;
-}
-
-
-static void bind_inputs( struct gl_context *ctx,
- const struct gl_client_array *inputs[],
- GLint count,
- struct gl_buffer_object **bo,
- GLuint *nr_bo )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint i;
-
- /* Map all the VBOs
- */
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- const void *ptr;
-
- if (inputs[i]->BufferObj->Name) {
- if (!inputs[i]->BufferObj->Pointer) {
- bo[*nr_bo] = inputs[i]->BufferObj;
- (*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- inputs[i]->BufferObj);
-
- assert(inputs[i]->BufferObj->Pointer);
- }
-
- ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer,
- inputs[i]->Ptr);
- }
- else
- ptr = inputs[i]->Ptr;
-
- /* Just make sure the array is floating point, otherwise convert to
- * temporary storage.
- *
- * XXX: remove the GLvector4f type at some stage and just use
- * client arrays.
- */
- _tnl_import_array(ctx, i, count, inputs[i], ptr);
- }
-
- /* We process only the vertices between min & max index:
- */
- VB->Count = count;
-
- /* These should perhaps be part of _TNL_ATTRIB_* */
- VB->BackfaceColorPtr = NULL;
- VB->BackfaceIndexPtr = NULL;
- VB->BackfaceSecondaryColorPtr = NULL;
-
- /* Clipping and drawing code still requires this to be a packed
- * array of ubytes which can be written into. TODO: Fix and
- * remove.
- */
- if (ctx->Polygon.FrontMode != GL_FILL ||
- ctx->Polygon.BackMode != GL_FILL)
- {
- VB->EdgeFlag = _tnl_import_edgeflag( ctx,
- VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG],
- VB->Count );
- }
- else {
- /* the data previously pointed to by EdgeFlag may have been freed */
- VB->EdgeFlag = NULL;
- }
-}
-
-
-/* Translate indices to GLuints and store in VB->Elts.
- */
-static void bind_indices( struct gl_context *ctx,
- const struct _mesa_index_buffer *ib,
- struct gl_buffer_object **bo,
- GLuint *nr_bo)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
- GLuint i;
- void *ptr;
-
- if (!ib) {
- VB->Elts = NULL;
- return;
- }
-
- if (ib->obj->Name && !ib->obj->Pointer) {
- bo[*nr_bo] = ib->obj;
- (*nr_bo)++;
- ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER,
- GL_READ_ONLY_ARB,
- ib->obj);
-
- assert(ib->obj->Pointer);
- }
-
- ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
-
- if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
- VB->Elts = (GLuint *) ptr;
- }
- else {
- GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint));
- VB->Elts = elts;
-
- if (ib->type == GL_UNSIGNED_INT) {
- const GLuint *in = (GLuint *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- else if (ib->type == GL_UNSIGNED_SHORT) {
- const GLushort *in = (GLushort *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- else {
- const GLubyte *in = (GLubyte *)ptr;
- for (i = 0; i < ib->count; i++)
- *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
- }
- }
-}
-
-static void bind_prims( struct gl_context *ctx,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- struct vertex_buffer *VB = &tnl->vb;
-
- VB->Primitive = prim;
- VB->PrimitiveCount = nr_prims;
-}
-
-static void unmap_vbos( struct gl_context *ctx,
- struct gl_buffer_object **bo,
- GLuint nr_bo )
-{
- GLuint i;
- for (i = 0; i < nr_bo; i++) {
- ctx->Driver.UnmapBuffer(ctx,
- 0, /* target -- I don't see why this would be needed */
- bo[i]);
- }
-}
-
-
-void _tnl_vbo_draw_prims(struct gl_context *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLboolean index_bounds_valid,
- GLuint min_index,
- GLuint max_index)
-{
- if (!index_bounds_valid)
- vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
-
- _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
-}
-
-/* This is the main entrypoint into the slimmed-down software tnl
- * module. In a regular swtnl driver, this can be plugged straight
- * into the vbo->Driver.DrawPrims() callback.
- */
-void _tnl_draw_prims( struct gl_context *ctx,
- const struct gl_client_array *arrays[],
- const struct _mesa_prim *prim,
- GLuint nr_prims,
- const struct _mesa_index_buffer *ib,
- GLuint min_index,
- GLuint max_index)
-{
- TNLcontext *tnl = TNL_CONTEXT(ctx);
- const GLuint TEST_SPLIT = 0;
- const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES;
- GLint max_basevertex = prim->basevertex;
- GLuint i;
-
- /* Mesa core state should have been validated already */
- assert(ctx->NewState == 0x0);
-
- if (!_mesa_check_conditional_render(ctx))
- return; /* don't draw */
-
- for (i = 1; i < nr_prims; i++)
- max_basevertex = MAX2(max_basevertex, prim[i].basevertex);
-
- if (0)
- {
- printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
- for (i = 0; i < nr_prims; i++)
- printf("prim %d: %s start %d count %d\n", i,
- _mesa_lookup_enum_by_nr(prim[i].mode),
- prim[i].start,
- prim[i].count);
- }
-
- if (min_index) {
- /* We always translate away calls with min_index != 0.
- */
- vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib,
- min_index, max_index,
- _tnl_vbo_draw_prims );
- return;
- }
- else if ((GLint)max_index + max_basevertex > max) {
- /* The software TNL pipeline has a fixed amount of storage for
- * vertices and it is necessary to split incoming drawing commands
- * if they exceed that limit.
- */
- struct split_limits limits;
- limits.max_verts = max;
- limits.max_vb_size = ~0;
- limits.max_indices = ~0;
-
- /* This will split the buffers one way or another and
- * recursively call back into this function.
- */
- vbo_split_prims( ctx, arrays, prim, nr_prims, ib,
- 0, max_index + prim->basevertex,
- _tnl_vbo_draw_prims,
- &limits );
- }
- else {
- /* May need to map a vertex buffer object for every attribute plus
- * one for the index buffer.
- */
- struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1];
- GLuint nr_bo = 0;
- GLuint inst;
-
- for (i = 0; i < nr_prims;) {
- GLuint this_nr_prims;
-
- /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
- * will rebase the elements to the basevertex, and we'll only
- * emit strings of prims with the same basevertex in one draw call.
- */
- for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
- this_nr_prims++) {
- if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
- break;
- }
-
- assert(prim[i].num_instances > 0);
-
- /* Binding inputs may imply mapping some vertex buffer objects.
- * They will need to be unmapped below.
- */
- for (inst = 0; inst < prim[i].num_instances; inst++) {
-
- bind_prims(ctx, &prim[i], this_nr_prims);
- bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1,
- bo, &nr_bo);
- bind_indices(ctx, ib, bo, &nr_bo);
-
- tnl->CurInstance = inst;
- TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx);
-
- unmap_vbos(ctx, bo, nr_bo);
- free_space(ctx);
- }
-
- i += this_nr_prims;
- }
- }
-}
-
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.1
+ *
+ * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/condrender.h"
+#include "main/context.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "t_context.h"
+#include "tnl.h"
+
+
+
+static GLubyte *get_space(struct gl_context *ctx, GLuint bytes)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ GLubyte *space = malloc(bytes);
+
+ tnl->block[tnl->nr_blocks++] = space;
+ return space;
+}
+
+
+static void free_space(struct gl_context *ctx)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ GLuint i;
+ for (i = 0; i < tnl->nr_blocks; i++)
+ free(tnl->block[i]);
+ tnl->nr_blocks = 0;
+}
+
+
+/* Convert the incoming array to GLfloats. Understands the
+ * array->Normalized flag and selects the correct conversion method.
+ */
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)ptr; \
+ for (j = 0; j < sz; j++) { \
+ *fptr++ = MACRO(*in); \
+ in++; \
+ } \
+ ptr += input->StrideB; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)ptr; \
+ for (j = 0; j < sz; j++) { \
+ *fptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ ptr += input->StrideB; \
+ } \
+ } \
+} while (0)
+
+
+/**
+ * Convert array of BGRA/GLubyte[4] values to RGBA/float[4]
+ * \param ptr input/ubyte array
+ * \param fptr output/float array
+ */
+static void
+convert_bgra_to_float(const struct gl_client_array *input,
+ const GLubyte *ptr, GLfloat *fptr,
+ GLuint count )
+{
+ GLuint i;
+ assert(input->Normalized);
+ assert(input->Size == 4);
+ for (i = 0; i < count; i++) {
+ const GLubyte *in = (GLubyte *) ptr; /* in is in BGRA order */
+ *fptr++ = UBYTE_TO_FLOAT(in[2]); /* red */
+ *fptr++ = UBYTE_TO_FLOAT(in[1]); /* green */
+ *fptr++ = UBYTE_TO_FLOAT(in[0]); /* blue */
+ *fptr++ = UBYTE_TO_FLOAT(in[3]); /* alpha */
+ ptr += input->StrideB;
+ }
+}
+
+static void
+convert_half_to_float(const struct gl_client_array *input,
+ const GLubyte *ptr, GLfloat *fptr,
+ GLuint count, GLuint sz)
+{
+ GLuint i, j;
+
+ for (i = 0; i < count; i++) {
+ GLhalfARB *in = (GLhalfARB *)ptr;
+
+ for (j = 0; j < sz; j++) {
+ *fptr++ = _mesa_half_to_float(in[j]);
+ }
+ ptr += input->StrideB;
+ }
+}
+
+/**
+ * \brief Convert fixed-point to floating-point.
+ *
+ * In OpenGL, a fixed-point number is a "signed 2's complement 16.16 scaled
+ * integer" (Table 2.2 of the OpenGL ES 2.0 spec).
+ *
+ * If the buffer has the \c normalized flag set, the formula
+ * \code normalize(x) := (2*x + 1) / (2^16 - 1) \endcode
+ * is used to map the fixed-point numbers into the range [-1, 1].
+ */
+static void
+convert_fixed_to_float(const struct gl_client_array *input,
+ const GLubyte *ptr, GLfloat *fptr,
+ GLuint count)
+{
+ GLuint i, j;
+ const GLint size = input->Size;
+
+ if (input->Normalized) {
+ for (i = 0; i < count; ++i) {
+ const GLfixed *in = (GLfixed *) ptr;
+ for (j = 0; j < size; ++j) {
+ *fptr++ = (GLfloat) (2 * in[j] + 1) / (GLfloat) ((1 << 16) - 1);
+ }
+ ptr += input->StrideB;
+ }
+ } else {
+ for (i = 0; i < count; ++i) {
+ const GLfixed *in = (GLfixed *) ptr;
+ for (j = 0; j < size; ++j) {
+ *fptr++ = in[j] / (GLfloat) (1 << 16);
+ }
+ ptr += input->StrideB;
+ }
+ }
+}
+
+/* Adjust pointer to point at first requested element, convert to
+ * floating point, populate VB->AttribPtr[].
+ */
+static void _tnl_import_array( struct gl_context *ctx,
+ GLuint attrib,
+ GLuint count,
+ const struct gl_client_array *input,
+ const GLubyte *ptr )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint stride = input->StrideB;
+
+ if (input->Type != GL_FLOAT) {
+ const GLuint sz = input->Size;
+ GLubyte *buf = get_space(ctx, count * sz * sizeof(GLfloat));
+ GLfloat *fptr = (GLfloat *)buf;
+
+ switch (input->Type) {
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ if (input->Format == GL_BGRA) {
+ /* See GL_EXT_vertex_array_bgra */
+ convert_bgra_to_float(input, ptr, fptr, count);
+ }
+ else {
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ }
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_HALF_FLOAT:
+ convert_half_to_float(input, ptr, fptr, count, sz);
+ break;
+ case GL_FIXED:
+ convert_fixed_to_float(input, ptr, fptr, count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ ptr = buf;
+ stride = sz * sizeof(GLfloat);
+ }
+
+ VB->AttribPtr[attrib] = &tnl->tmp_inputs[attrib];
+ VB->AttribPtr[attrib]->data = (GLfloat (*)[4])ptr;
+ VB->AttribPtr[attrib]->start = (GLfloat *)ptr;
+ VB->AttribPtr[attrib]->count = count;
+ VB->AttribPtr[attrib]->stride = stride;
+ VB->AttribPtr[attrib]->size = input->Size;
+
+ /* This should die, but so should the whole GLvector4f concept:
+ */
+ VB->AttribPtr[attrib]->flags = (((1<<input->Size)-1) |
+ VEC_NOT_WRITEABLE |
+ (stride == 4*sizeof(GLfloat) ? 0 : VEC_BAD_STRIDE));
+
+ VB->AttribPtr[attrib]->storage = NULL;
+}
+
+#define CLIPVERTS ((6 + MAX_CLIP_PLANES) * 2)
+
+
+static GLboolean *_tnl_import_edgeflag( struct gl_context *ctx,
+ const GLvector4f *input,
+ GLuint count)
+{
+ const GLubyte *ptr = (const GLubyte *)input->data;
+ const GLuint stride = input->stride;
+ GLboolean *space = (GLboolean *)get_space(ctx, count + CLIPVERTS);
+ GLboolean *bptr = space;
+ GLuint i;
+
+ for (i = 0; i < count; i++) {
+ *bptr++ = ((GLfloat *)ptr)[0] == 1.0;
+ ptr += stride;
+ }
+
+ return space;
+}
+
+
+static void bind_inputs( struct gl_context *ctx,
+ const struct gl_client_array *inputs[],
+ GLint count,
+ struct gl_buffer_object **bo,
+ GLuint *nr_bo )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint i;
+
+ /* Map all the VBOs
+ */
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ const void *ptr;
+
+ if (inputs[i]->BufferObj->Name) {
+ if (!inputs[i]->BufferObj->Pointer) {
+ bo[*nr_bo] = inputs[i]->BufferObj;
+ (*nr_bo)++;
+ ctx->Driver.MapBuffer(ctx,
+ GL_ARRAY_BUFFER,
+ GL_READ_ONLY_ARB,
+ inputs[i]->BufferObj);
+
+ assert(inputs[i]->BufferObj->Pointer);
+ }
+
+ ptr = ADD_POINTERS(inputs[i]->BufferObj->Pointer,
+ inputs[i]->Ptr);
+ }
+ else
+ ptr = inputs[i]->Ptr;
+
+ /* Just make sure the array is floating point, otherwise convert to
+ * temporary storage.
+ *
+ * XXX: remove the GLvector4f type at some stage and just use
+ * client arrays.
+ */
+ _tnl_import_array(ctx, i, count, inputs[i], ptr);
+ }
+
+ /* We process only the vertices between min & max index:
+ */
+ VB->Count = count;
+
+ /* These should perhaps be part of _TNL_ATTRIB_* */
+ VB->BackfaceColorPtr = NULL;
+ VB->BackfaceIndexPtr = NULL;
+ VB->BackfaceSecondaryColorPtr = NULL;
+
+ /* Clipping and drawing code still requires this to be a packed
+ * array of ubytes which can be written into. TODO: Fix and
+ * remove.
+ */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL)
+ {
+ VB->EdgeFlag = _tnl_import_edgeflag( ctx,
+ VB->AttribPtr[_TNL_ATTRIB_EDGEFLAG],
+ VB->Count );
+ }
+ else {
+ /* the data previously pointed to by EdgeFlag may have been freed */
+ VB->EdgeFlag = NULL;
+ }
+}
+
+
+/* Translate indices to GLuints and store in VB->Elts.
+ */
+static void bind_indices( struct gl_context *ctx,
+ const struct _mesa_index_buffer *ib,
+ struct gl_buffer_object **bo,
+ GLuint *nr_bo)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ GLuint i;
+ void *ptr;
+
+ if (!ib) {
+ VB->Elts = NULL;
+ return;
+ }
+
+ if (ib->obj->Name && !ib->obj->Pointer) {
+ bo[*nr_bo] = ib->obj;
+ (*nr_bo)++;
+ ctx->Driver.MapBuffer(ctx,
+ GL_ELEMENT_ARRAY_BUFFER,
+ GL_READ_ONLY_ARB,
+ ib->obj);
+
+ assert(ib->obj->Pointer);
+ }
+
+ ptr = ADD_POINTERS(ib->obj->Pointer, ib->ptr);
+
+ if (ib->type == GL_UNSIGNED_INT && VB->Primitive[0].basevertex == 0) {
+ VB->Elts = (GLuint *) ptr;
+ }
+ else {
+ GLuint *elts = (GLuint *)get_space(ctx, ib->count * sizeof(GLuint));
+ VB->Elts = elts;
+
+ if (ib->type == GL_UNSIGNED_INT) {
+ const GLuint *in = (GLuint *)ptr;
+ for (i = 0; i < ib->count; i++)
+ *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
+ }
+ else if (ib->type == GL_UNSIGNED_SHORT) {
+ const GLushort *in = (GLushort *)ptr;
+ for (i = 0; i < ib->count; i++)
+ *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
+ }
+ else {
+ const GLubyte *in = (GLubyte *)ptr;
+ for (i = 0; i < ib->count; i++)
+ *elts++ = (GLuint)(*in++) + VB->Primitive[0].basevertex;
+ }
+ }
+}
+
+static void bind_prims( struct gl_context *ctx,
+ const struct _mesa_prim *prim,
+ GLuint nr_prims )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+
+ VB->Primitive = prim;
+ VB->PrimitiveCount = nr_prims;
+}
+
+static void unmap_vbos( struct gl_context *ctx,
+ struct gl_buffer_object **bo,
+ GLuint nr_bo )
+{
+ GLuint i;
+ for (i = 0; i < nr_bo; i++) {
+ ctx->Driver.UnmapBuffer(ctx,
+ 0, /* target -- I don't see why this would be needed */
+ bo[i]);
+ }
+}
+
+
+void _tnl_vbo_draw_prims(struct gl_context *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+
+/* This is the main entrypoint into the slimmed-down software tnl
+ * module. In a regular swtnl driver, this can be plugged straight
+ * into the vbo->Driver.DrawPrims() callback.
+ */
+void _tnl_draw_prims( struct gl_context *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ const GLuint TEST_SPLIT = 0;
+ const GLint max = TEST_SPLIT ? 8 : tnl->vb.Size - MAX_CLIPPED_VERTICES;
+ GLint max_basevertex = prim->basevertex;
+ GLuint i;
+
+ /* Mesa core state should have been validated already */
+ assert(ctx->NewState == 0x0);
+
+ if (!_mesa_check_conditional_render(ctx))
+ return; /* don't draw */
+
+ for (i = 1; i < nr_prims; i++)
+ max_basevertex = MAX2(max_basevertex, prim[i].basevertex);
+
+ if (0)
+ {
+ printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+ for (i = 0; i < nr_prims; i++)
+ printf("prim %d: %s start %d count %d\n", i,
+ _mesa_lookup_enum_by_nr(prim[i].mode),
+ prim[i].start,
+ prim[i].count);
+ }
+
+ if (min_index) {
+ /* We always translate away calls with min_index != 0.
+ */
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib,
+ min_index, max_index,
+ _tnl_vbo_draw_prims );
+ return;
+ }
+ else if ((GLint)max_index + max_basevertex > max) {
+ /* The software TNL pipeline has a fixed amount of storage for
+ * vertices and it is necessary to split incoming drawing commands
+ * if they exceed that limit.
+ */
+ struct split_limits limits;
+ limits.max_verts = max;
+ limits.max_vb_size = ~0;
+ limits.max_indices = ~0;
+
+ /* This will split the buffers one way or another and
+ * recursively call back into this function.
+ */
+ vbo_split_prims( ctx, arrays, prim, nr_prims, ib,
+ 0, max_index + prim->basevertex,
+ _tnl_vbo_draw_prims,
+ &limits );
+ }
+ else {
+ /* May need to map a vertex buffer object for every attribute plus
+ * one for the index buffer.
+ */
+ struct gl_buffer_object *bo[VERT_ATTRIB_MAX + 1];
+ GLuint nr_bo = 0;
+ GLuint inst;
+
+ for (i = 0; i < nr_prims;) {
+ GLuint this_nr_prims;
+
+ /* Our SW TNL pipeline doesn't handle basevertex yet, so bind_indices
+ * will rebase the elements to the basevertex, and we'll only
+ * emit strings of prims with the same basevertex in one draw call.
+ */
+ for (this_nr_prims = 1; i + this_nr_prims < nr_prims;
+ this_nr_prims++) {
+ if (prim[i].basevertex != prim[i + this_nr_prims].basevertex)
+ break;
+ }
+
+ assert(prim[i].num_instances > 0);
+
+ /* Binding inputs may imply mapping some vertex buffer objects.
+ * They will need to be unmapped below.
+ */
+ for (inst = 0; inst < prim[i].num_instances; inst++) {
+
+ bind_prims(ctx, &prim[i], this_nr_prims);
+ bind_inputs(ctx, arrays, max_index + prim[i].basevertex + 1,
+ bo, &nr_bo);
+ bind_indices(ctx, ib, bo, &nr_bo);
+
+ tnl->CurInstance = inst;
+ TNL_CONTEXT(ctx)->Driver.RunPipeline(ctx);
+
+ unmap_vbos(ctx, bo, nr_bo);
+ free_space(ctx);
+ }
+
+ i += this_nr_prims;
+ }
+ }
+}
+
diff --git a/mesalib/src/mesa/vbo/vbo_exec_array.c b/mesalib/src/mesa/vbo/vbo_exec_array.c
index 13b54d59c..6749541b7 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_array.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_array.c
@@ -1,1277 +1,1282 @@
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/api_validate.h"
-#include "main/varray.h"
-#include "main/bufferobj.h"
-#include "main/enums.h"
-#include "main/macros.h"
-
-#include "vbo_context.h"
-
-
-/**
- * Compute min and max elements by scanning the index buffer for
- * glDraw[Range]Elements() calls.
- * If primitive restart is enabled, we need to ignore restart
- * indexes when computing min/max.
- */
-void
-vbo_get_minmax_index(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index, GLuint *max_index)
-{
- const GLboolean restart = ctx->Array.PrimitiveRestart;
- const GLuint restartIndex = ctx->Array.RestartIndex;
- const GLuint count = prim->count;
- const void *indices;
- GLuint i;
-
- if (_mesa_is_bufferobj(ib->obj)) {
- const GLvoid *map =
- ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, ib->obj);
- indices = ADD_POINTERS(map, ib->ptr);
- } else {
- indices = ib->ptr;
- }
-
- switch (ib->type) {
- case GL_UNSIGNED_INT: {
- const GLuint *ui_indices = (const GLuint *)indices;
- GLuint max_ui = 0;
- GLuint min_ui = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] != restartIndex) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- *min_index = min_ui;
- *max_index = max_ui;
- break;
- }
- case GL_UNSIGNED_SHORT: {
- const GLushort *us_indices = (const GLushort *)indices;
- GLuint max_us = 0;
- GLuint min_us = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (us_indices[i] != restartIndex) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- *min_index = min_us;
- *max_index = max_us;
- break;
- }
- case GL_UNSIGNED_BYTE: {
- const GLubyte *ub_indices = (const GLubyte *)indices;
- GLuint max_ub = 0;
- GLuint min_ub = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] != restartIndex) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- *min_index = min_ub;
- *max_index = max_ub;
- break;
- }
- default:
- assert(0);
- break;
- }
-
- if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
- }
-}
-
-
-/**
- * Check that element 'j' of the array has reasonable data.
- * Map VBO if needed.
- * For debugging purposes; not normally used.
- */
-static void
-check_array_data(struct gl_context *ctx, struct gl_client_array *array,
- GLuint attrib, GLuint j)
-{
- if (array->Enabled) {
- const void *data = array->Ptr;
- if (_mesa_is_bufferobj(array->BufferObj)) {
- if (!array->BufferObj->Pointer) {
- /* need to map now */
- array->BufferObj->Pointer =
- ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, array->BufferObj);
- }
- data = ADD_POINTERS(data, array->BufferObj->Pointer);
- }
- switch (array->Type) {
- case GL_FLOAT:
- {
- GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j);
- GLint k;
- for (k = 0; k < array->Size; k++) {
- if (IS_INF_OR_NAN(f[k]) ||
- f[k] >= 1.0e20 || f[k] <= -1.0e10) {
- printf("Bad array data:\n");
- printf(" Element[%u].%u = %f\n", j, k, f[k]);
- printf(" Array %u at %p\n", attrib, (void* ) array);
- printf(" Type 0x%x, Size %d, Stride %d\n",
- array->Type, array->Size, array->Stride);
- printf(" Address/offset %p in Buffer Object %u\n",
- array->Ptr, array->BufferObj->Name);
- f[k] = 1.0; /* XXX replace the bad value! */
- }
- /*assert(!IS_INF_OR_NAN(f[k]));*/
- }
- }
- break;
- default:
- ;
- }
- }
-}
-
-
-/**
- * Unmap the buffer object referenced by given array, if mapped.
- */
-static void
-unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
-{
- if (array->Enabled &&
- _mesa_is_bufferobj(array->BufferObj) &&
- _mesa_bufferobj_mapped(array->BufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
- }
-}
-
-
-/**
- * Examine the array's data for NaNs, etc.
- * For debug purposes; not normally used.
- */
-static void
-check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
- const void *elements, GLint basevertex)
-{
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- const void *elemMap;
- GLint i, k;
-
- if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- elemMap = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
- elements = ADD_POINTERS(elements, elemMap);
- }
-
- for (i = 0; i < count; i++) {
- GLuint j;
-
- /* j = element[i] */
- switch (elemType) {
- case GL_UNSIGNED_BYTE:
- j = ((const GLubyte *) elements)[i];
- break;
- case GL_UNSIGNED_SHORT:
- j = ((const GLushort *) elements)[i];
- break;
- case GL_UNSIGNED_INT:
- j = ((const GLuint *) elements)[i];
- break;
- default:
- assert(0);
- }
-
- /* check element j of each enabled array */
- check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j);
- check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j);
- check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j);
- check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j);
- for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
- check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j);
- }
- for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
- check_array_data(ctx, &arrayObj->VertexAttrib[k],
- VERT_ATTRIB_GENERIC0 + k, j);
- }
- }
-
- if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
- }
-
- unmap_array_buffer(ctx, &arrayObj->Vertex);
- unmap_array_buffer(ctx, &arrayObj->Normal);
- unmap_array_buffer(ctx, &arrayObj->Color);
- for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
- unmap_array_buffer(ctx, &arrayObj->TexCoord[k]);
- }
- for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
- unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]);
- }
-}
-
-
-/**
- * Check array data, looking for NaNs, etc.
- */
-static void
-check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count)
-{
- /* TO DO */
-}
-
-
-/**
- * Print info/data for glDrawArrays(), for debugging.
- */
-static void
-print_draw_arrays(struct gl_context *ctx,
- GLenum mode, GLint start, GLsizei count)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- int i;
-
- printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n",
- mode, start, count);
-
- for (i = 0; i < 32; i++) {
- GLuint bufName = exec->array.inputs[i]->BufferObj->Name;
- GLint stride = exec->array.inputs[i]->Stride;
- printf("attr %2d: size %d stride %d enabled %d "
- "ptr %p Bufobj %u\n",
- i,
- exec->array.inputs[i]->Size,
- stride,
- /*exec->array.inputs[i]->Enabled,*/
- exec->array.legacy_array[i]->Enabled,
- exec->array.inputs[i]->Ptr,
- bufName);
-
- if (bufName) {
- struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName);
- GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY_ARB, buf);
- int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
- float *f = (float *) (p + offset);
- int *k = (int *) f;
- int i;
- int n = (count * stride) / 4;
- if (n > 32)
- n = 32;
- printf(" Data at offset %d:\n", offset);
- for (i = 0; i < n; i++) {
- printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]);
- }
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf);
- }
- }
-}
-
-
-/**
- * Bind the VBO executor to the current vertex array object prior
- * to drawing.
- *
- * Just translate the arrayobj into a sane layout.
- */
-static void
-bind_array_obj(struct gl_context *ctx)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
- GLuint i;
-
- /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array
- * rather than as individual named arrays. Then this function can
- * go away.
- */
- exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex;
- exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight;
- exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal;
- exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color;
- exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor;
- exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord;
- exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index;
- if (arrayObj->PointSize.Enabled) {
- /* this aliases COLOR_INDEX */
- exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize;
- }
- exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag;
-
- for (i = 0; i < Elements(arrayObj->TexCoord); i++)
- exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i];
-
- for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
- assert(i < Elements(exec->array.generic_array));
- exec->array.generic_array[i] = &arrayObj->VertexAttrib[i];
- }
-
- exec->array.array_obj = arrayObj->Name;
-}
-
-
-/**
- * Set the vbo->exec->inputs[] pointers to point to the enabled
- * vertex arrays. This depends on the current vertex program/shader
- * being executed because of whether or not generic vertex arrays
- * alias the conventional vertex arrays.
- * For arrays that aren't enabled, we set the input[attrib] pointer
- * to point at a zero-stride current value "array".
- */
-static void
-recalculate_input_bindings(struct gl_context *ctx)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- const struct gl_client_array **inputs = &exec->array.inputs[0];
- GLbitfield const_inputs = 0x0;
- GLuint i;
-
- exec->array.program_mode = get_program_mode(ctx);
- exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled;
-
- switch (exec->array.program_mode) {
- case VP_NONE:
- /* When no vertex program is active (or the vertex program is generated
- * from fixed-function state). We put the material values into the
- * generic slots. This is the only situation where material values
- * are available as per-vertex attributes.
- */
- for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- for (i = 0; i < MAT_ATTRIB_MAX; i++) {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
-
- /* Could use just about anything, just to fill in the empty
- * slots:
- */
- for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
- break;
-
- case VP_NV:
- /* NV_vertex_program - attribute arrays alias and override
- * conventional, legacy arrays. No materials, and the generic
- * slots are vacant.
- */
- for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.generic_array[i]->Enabled)
- inputs[i] = exec->array.generic_array[i];
- else if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- /* Could use just about anything, just to fill in the empty
- * slots:
- */
- for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
- inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
- const_inputs |= 1 << i;
- }
- break;
-
- case VP_ARB:
- /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0]
- * attribute array aliases and overrides the legacy position array.
- *
- * Otherwise, legacy attributes available in the legacy slots,
- * generic attributes in the generic slots and materials are not
- * available as per-vertex attributes.
- */
- if (exec->array.generic_array[0]->Enabled)
- inputs[0] = exec->array.generic_array[0];
- else if (exec->array.legacy_array[0]->Enabled)
- inputs[0] = exec->array.legacy_array[0];
- else {
- inputs[0] = &vbo->legacy_currval[0];
- const_inputs |= 1 << 0;
- }
-
- for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
- if (exec->array.legacy_array[i]->Enabled)
- inputs[i] = exec->array.legacy_array[i];
- else {
- inputs[i] = &vbo->legacy_currval[i];
- const_inputs |= 1 << i;
- }
- }
-
- for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
- if (exec->array.generic_array[i]->Enabled)
- inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
- else {
- inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
- const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
- }
-
- }
- break;
- }
-
- _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
-}
-
-
-/**
- * Examine the enabled vertex arrays to set the exec->array.inputs[] values.
- * These will point to the arrays to actually use for drawing. Some will
- * be user-provided arrays, other will be zero-stride const-valued arrays.
- * Note that this might set the _NEW_ARRAY dirty flag so state validation
- * must be done after this call.
- */
-static void
-bind_arrays(struct gl_context *ctx)
-{
- bind_array_obj(ctx);
- recalculate_input_bindings(ctx);
-}
-
-
-/**
- * Helper function called by the other DrawArrays() functions below.
- * This is where we handle primitive restart for drawing non-indexed
- * arrays. If primitive restart is enabled, it typically means
- * splitting one DrawArrays() into two.
- */
-static void
-vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
- GLsizei count, GLuint numInstances)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_prim prim[2];
-
- bind_arrays(ctx);
-
- /* Again... because we may have changed the bitmask of per-vertex varying
- * attributes. If we regenerate the fixed-function vertex program now
- * we may be able to prune down the number of vertex attributes which we
- * need in the shader.
- */
- if (ctx->NewState)
- _mesa_update_state(ctx);
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0; /* filled in below */
- prim[0].count = 0; /* filled in below */
- prim[0].indexed = 0;
- prim[0].basevertex = 0;
- prim[0].num_instances = numInstances;
-
- /* Implement the primitive restart index */
- if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) {
- GLuint primCount = 0;
-
- if (ctx->Array.RestartIndex == start) {
- /* special case: RestartIndex at beginning */
- if (count > 1) {
- prim[0].start = start + 1;
- prim[0].count = count - 1;
- primCount = 1;
- }
- }
- else if (ctx->Array.RestartIndex == start + count - 1) {
- /* special case: RestartIndex at end */
- if (count > 1) {
- prim[0].start = start;
- prim[0].count = count - 1;
- primCount = 1;
- }
- }
- else {
- /* general case: RestartIndex in middle, split into two prims */
- prim[0].start = start;
- prim[0].count = ctx->Array.RestartIndex - start;
-
- prim[1] = prim[0];
- prim[1].start = ctx->Array.RestartIndex + 1;
- prim[1].count = count - prim[1].start;
-
- primCount = 2;
- }
-
- if (primCount > 0) {
- /* draw one or two prims */
- vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL,
- GL_TRUE, start, start + count - 1);
- }
- }
- else {
- /* no prim restart */
- prim[0].start = start;
- prim[0].count = count;
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL,
- GL_TRUE, start, start + count - 1);
- }
-}
-
-
-
-/**
- * Called from glDrawArrays when in immediate mode (not display list mode).
- */
-static void GLAPIENTRY
-vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count);
-
- if (!_mesa_validate_DrawArrays( ctx, mode, start, count ))
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDrawArrays")) {
- return;
- }
-
- if (0)
- check_draw_arrays_data(ctx, start, count);
-
- vbo_draw_arrays(ctx, mode, start, count, 1);
-
- if (0)
- print_draw_arrays(ctx, mode, start, count);
-}
-
-
-/**
- * Called from glDrawArraysInstanced when in immediate mode (not
- * display list mode).
- */
-static void GLAPIENTRY
-vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
- GLsizei numInstances)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
-
- if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) {
- return;
- }
-
- if (0)
- check_draw_arrays_data(ctx, start, count);
-
- vbo_draw_arrays(ctx, mode, start, count, numInstances);
-
- if (0)
- print_draw_arrays(ctx, mode, start, count);
-}
-
-
-/**
- * Map GL_ELEMENT_ARRAY_BUFFER and print contents.
- * For debugging.
- */
-static void
-dump_element_buffer(struct gl_context *ctx, GLenum type)
-{
- const GLvoid *map = ctx->Driver.MapBuffer(ctx,
- GL_ELEMENT_ARRAY_BUFFER_ARB,
- GL_READ_ONLY,
- ctx->Array.ElementArrayBufferObj);
- switch (type) {
- case GL_UNSIGNED_BYTE:
- {
- const GLubyte *us = (const GLubyte *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) {
- printf("%02x ", us[i]);
- if (i % 32 == 31)
- printf("\n");
- }
- printf("\n");
- }
- break;
- case GL_UNSIGNED_SHORT:
- {
- const GLushort *us = (const GLushort *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) {
- printf("%04x ", us[i]);
- if (i % 16 == 15)
- printf("\n");
- }
- printf("\n");
- }
- break;
- case GL_UNSIGNED_INT:
- {
- const GLuint *us = (const GLuint *) map;
- GLint i;
- for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) {
- printf("%08x ", us[i]);
- if (i % 8 == 7)
- printf("\n");
- }
- printf("\n");
- }
- break;
- default:
- ;
- }
-
- ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
- ctx->Array.ElementArrayBufferObj);
-}
-
-
-/**
- * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements.
- * Do the rendering for a glDrawElements or glDrawRangeElements call after
- * we've validated buffer bounds, etc.
- */
-static void
-vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
- GLboolean index_bounds_valid,
- GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices,
- GLint basevertex, GLint numInstances)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_index_buffer ib;
- struct _mesa_prim prim[1];
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) {
- return;
- }
-
- bind_arrays( ctx );
-
- /* check for dirty state again */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- ib.count = count;
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = indices;
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0;
- prim[0].count = count;
- prim[0].indexed = 1;
- prim[0].basevertex = basevertex;
- prim[0].num_instances = numInstances;
-
- /* Need to give special consideration to rendering a range of
- * indices starting somewhere above zero. Typically the
- * application is issuing multiple DrawRangeElements() to draw
- * successive primitives layed out linearly in the vertex arrays.
- * Unless the vertex arrays are all in a VBO (or locked as with
- * CVA), the OpenGL semantics imply that we need to re-read or
- * re-upload the vertex data on each draw call.
- *
- * In the case of hardware tnl, we want to avoid starting the
- * upload at zero, as it will mean every draw call uploads an
- * increasing amount of not-used vertex data. Worse - in the
- * software tnl module, all those vertices might be transformed and
- * lit but never rendered.
- *
- * If we just upload or transform the vertices in start..end,
- * however, the indices will be incorrect.
- *
- * At this level, we don't know exactly what the requirements of
- * the backend are going to be, though it will likely boil down to
- * either:
- *
- * 1) Do nothing, everything is in a VBO and is processed once
- * only.
- *
- * 2) Adjust the indices and vertex arrays so that start becomes
- * zero.
- *
- * Rather than doing anything here, I'll provide a helper function
- * for the latter case elsewhere.
- */
-
- vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib,
- index_bounds_valid, start, end );
-}
-
-
-/**
- * Called by glDrawRangeElementsBaseVertex() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
- GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices,
- GLint basevertex)
-{
- static GLuint warnCount = 0;
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx,
- "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
-
- if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count,
- type, indices, basevertex ))
- return;
-
- /* NOTE: It's important that 'end' is a reasonable value.
- * in _tnl_draw_prims(), we use end to determine how many vertices
- * to transform. If it's too large, we can unnecessarily split prims
- * or we can read/write out of memory in several different places!
- */
-
- /* Catch/fix some potential user errors */
- if (type == GL_UNSIGNED_BYTE) {
- start = MIN2(start, 0xff);
- end = MIN2(end, 0xff);
- }
- else if (type == GL_UNSIGNED_SHORT) {
- start = MIN2(start, 0xffff);
- end = MIN2(end, 0xffff);
- }
-
- if (end >= ctx->Array.ArrayObj->_MaxElement) {
- /* the max element is out of bounds of one or more enabled arrays */
- warnCount++;
-
- if (warnCount < 10) {
- _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, "
- "type 0x%x, indices=%p)\n"
- "\tend is out of bounds (max=%u) "
- "Element Buffer %u (size %d)\n"
- "\tThis should probably be fixed in the application.",
- start, end, count, type, indices,
- ctx->Array.ArrayObj->_MaxElement - 1,
- ctx->Array.ElementArrayBufferObj->Name,
- (int) ctx->Array.ElementArrayBufferObj->Size);
- }
-
- if (0)
- dump_element_buffer(ctx, type);
-
- if (0)
- _mesa_print_arrays(ctx);
-
-#ifdef DEBUG
- /* 'end' was out of bounds, but now let's check the actual array
- * indexes to see if any of them are out of bounds.
- */
- {
- GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
- ctx->Array.ElementArrayBufferObj);
- if (max >= ctx->Array.ArrayObj->_MaxElement) {
- if (warnCount < 10) {
- _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, "
- "count %d, type 0x%x, indices=%p)\n"
- "\tindex=%u is out of bounds (max=%u) "
- "Element Buffer %u (size %d)\n"
- "\tSkipping the glDrawRangeElements() call",
- start, end, count, type, indices, max,
- ctx->Array.ArrayObj->_MaxElement - 1,
- ctx->Array.ElementArrayBufferObj->Name,
- (int) ctx->Array.ElementArrayBufferObj->Size);
- }
- }
- /* XXX we could also find the min index and compare to 'start'
- * to see if start is correct. But it's more likely to get the
- * upper bound wrong.
- */
- }
-#endif
-
- /* Set 'end' to the max possible legal value */
- assert(ctx->Array.ArrayObj->_MaxElement >= 1);
- end = ctx->Array.ArrayObj->_MaxElement - 1;
- }
- else if (0) {
- printf("glDraw[Range]Elements{,BaseVertex}"
- "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, "
- "base %d\n",
- start, end, type, count,
- ctx->Array.ElementArrayBufferObj->Name,
- basevertex);
- }
-
-#if 0
- check_draw_elements_data(ctx, count, type, indices);
-#else
- (void) check_draw_elements_data;
-#endif
-
- vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end,
- count, type, indices, basevertex, 1);
-}
-
-
-/**
- * Called by glDrawRangeElements() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
- GLsizei count, GLenum type, const GLvoid *indices)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx,
- "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), start, end, count,
- _mesa_lookup_enum_by_nr(type), indices);
-
- vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
- indices, 0);
-}
-
-
-/**
- * Called by glDrawElements() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices);
-
- if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 ))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, 0, 1);
-}
-
-
-/**
- * Called by glDrawElementsBaseVertex() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, basevertex);
-
- if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices,
- basevertex ))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, basevertex, 1);
-}
-
-
-/**
- * Called by glDrawElementsInstanced() in immediate mode.
- */
-static void GLAPIENTRY
-vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLsizei numInstances)
-{
- GET_CURRENT_CONTEXT(ctx);
-
- if (MESA_VERBOSE & VERBOSE_DRAW)
- _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
- _mesa_lookup_enum_by_nr(mode), count,
- _mesa_lookup_enum_by_nr(type), indices, numInstances);
-
- if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
- numInstances))
- return;
-
- vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
- count, type, indices, 0, numInstances);
-}
-
-
-/**
- * Inner support for both _mesa_MultiDrawElements() and
- * _mesa_MultiDrawRangeElements().
- * This does the actual rendering after we've checked array indexes, etc.
- */
-static void
-vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount,
- const GLint *basevertex)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct _mesa_index_buffer ib;
- struct _mesa_prim *prim;
- unsigned int index_type_size = 0;
- uintptr_t min_index_ptr, max_index_ptr;
- GLboolean fallback = GL_FALSE;
- int i;
-
- if (primcount == 0)
- return;
-
- FLUSH_CURRENT( ctx, 0 );
-
- if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) {
- return;
- }
-
- prim = calloc(1, primcount * sizeof(*prim));
- if (prim == NULL) {
- _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements");
- return;
- }
-
- /* Decide if we can do this all as one set of primitives sharing the
- * same index buffer, or if we have to reset the index pointer per
- * primitive.
- */
- bind_arrays( ctx );
-
- /* check for dirty state again */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- switch (type) {
- case GL_UNSIGNED_INT:
- index_type_size = 4;
- break;
- case GL_UNSIGNED_SHORT:
- index_type_size = 2;
- break;
- case GL_UNSIGNED_BYTE:
- index_type_size = 1;
- break;
- default:
- assert(0);
- }
-
- min_index_ptr = (uintptr_t)indices[0];
- max_index_ptr = 0;
- for (i = 0; i < primcount; i++) {
- min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]);
- max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] +
- index_type_size * count[i]);
- }
-
- /* Check if we can handle this thing as a bunch of index offsets from the
- * same index pointer. If we can't, then we have to fall back to doing
- * a draw_prims per primitive.
- * Check that the difference between each prim's indexes is a multiple of
- * the index/element size.
- */
- if (index_type_size != 1) {
- for (i = 0; i < primcount; i++) {
- if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) {
- fallback = GL_TRUE;
- break;
- }
- }
- }
-
- /* If the index buffer isn't in a VBO, then treating the application's
- * subranges of the index buffer as one large index buffer may lead to
- * us reading unmapped memory.
- */
- if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj))
- fallback = GL_TRUE;
-
- if (!fallback) {
- ib.count = (max_index_ptr - min_index_ptr) / index_type_size;
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = (void *)min_index_ptr;
-
- for (i = 0; i < primcount; i++) {
- prim[i].begin = (i == 0);
- prim[i].end = (i == primcount - 1);
- prim[i].weak = 0;
- prim[i].pad = 0;
- prim[i].mode = mode;
- prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size;
- prim[i].count = count[i];
- prim[i].indexed = 1;
- prim[i].num_instances = 1;
- if (basevertex != NULL)
- prim[i].basevertex = basevertex[i];
- else
- prim[i].basevertex = 0;
- }
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib,
- GL_FALSE, ~0, ~0);
- } else {
- /* render one prim at a time */
- for (i = 0; i < primcount; i++) {
- ib.count = count[i];
- ib.type = type;
- ib.obj = ctx->Array.ElementArrayBufferObj;
- ib.ptr = indices[i];
-
- prim[0].begin = 1;
- prim[0].end = 1;
- prim[0].weak = 0;
- prim[0].pad = 0;
- prim[0].mode = mode;
- prim[0].start = 0;
- prim[0].count = count[i];
- prim[0].indexed = 1;
- prim[0].num_instances = 1;
- if (basevertex != NULL)
- prim[0].basevertex = basevertex[i];
- else
- prim[0].basevertex = 0;
-
- vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib,
- GL_FALSE, ~0, ~0);
- }
- }
-
- free(prim);
-}
-
-
-static void GLAPIENTRY
-vbo_exec_MultiDrawElements(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices,
- GLsizei primcount)
-{
- GET_CURRENT_CONTEXT(ctx);
- GLint i;
-
- ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
-
- for (i = 0; i < primcount; i++) {
- if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
- 0))
- return;
- }
-
- vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
- NULL);
-}
-
-
-static void GLAPIENTRY
-vbo_exec_MultiDrawElementsBaseVertex(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices,
- GLsizei primcount,
- const GLsizei *basevertex)
-{
- GET_CURRENT_CONTEXT(ctx);
- GLint i;
-
- ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
-
- for (i = 0; i < primcount; i++) {
- if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
- basevertex[i]))
- return;
- }
-
- vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
- basevertex);
-}
-
-
-/**
- * Plug in the immediate-mode vertex array drawing commands into the
- * givven vbo_exec_context object.
- */
-void
-vbo_exec_array_init( struct vbo_exec_context *exec )
-{
- exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays;
- exec->vtxfmt.DrawElements = vbo_exec_DrawElements;
- exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements;
- exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements;
- exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex;
- exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex;
- exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex;
- exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced;
- exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced;
-}
-
-
-void
-vbo_exec_array_destroy( struct vbo_exec_context *exec )
-{
- /* nothing to do */
-}
-
-
-
-/**
- * The following functions are only used for OpenGL ES 1/2 support.
- * And some aren't even supported (yet) in ES 1/2.
- */
-
-
-void GLAPIENTRY
-_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count)
-{
- vbo_exec_DrawArrays(mode, first, count);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices)
-{
- vbo_exec_DrawElements(mode, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count,
- GLenum type, const GLvoid *indices)
-{
- vbo_exec_DrawRangeElements(mode, start, end, count, type, indices);
-}
-
-
-void GLAPIENTRY
-_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
- GLsizei count, GLenum type,
- const GLvoid *indices, GLint basevertex)
-{
- vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
- indices, basevertex);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount)
-{
- vbo_exec_MultiDrawElements(mode, count, type, indices, primcount);
-}
-
-
-void GLAPIENTRY
-_mesa_MultiDrawElementsBaseVertex(GLenum mode,
- const GLsizei *count, GLenum type,
- const GLvoid **indices, GLsizei primcount,
- const GLint *basevertex)
-{
- vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices,
- primcount, basevertex);
-}
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/varray.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/macros.h"
+
+#include "vbo_context.h"
+
+
+/**
+ * Compute min and max elements by scanning the index buffer for
+ * glDraw[Range]Elements() calls.
+ * If primitive restart is enabled, we need to ignore restart
+ * indexes when computing min/max.
+ */
+void
+vbo_get_minmax_index(struct gl_context *ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index, GLuint *max_index)
+{
+ const GLboolean restart = ctx->Array.PrimitiveRestart;
+ const GLuint restartIndex = ctx->Array.RestartIndex;
+ const GLuint count = prim->count;
+ const void *indices;
+ GLuint i;
+
+ if (_mesa_is_bufferobj(ib->obj)) {
+ const GLvoid *map =
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY, ib->obj);
+ indices = ADD_POINTERS(map, ib->ptr);
+ } else {
+ indices = ib->ptr;
+ }
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT: {
+ const GLuint *ui_indices = (const GLuint *)indices;
+ GLuint max_ui = 0;
+ GLuint min_ui = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] != restartIndex) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ *min_index = min_ui;
+ *max_index = max_ui;
+ break;
+ }
+ case GL_UNSIGNED_SHORT: {
+ const GLushort *us_indices = (const GLushort *)indices;
+ GLuint max_us = 0;
+ GLuint min_us = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] != restartIndex) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ *min_index = min_us;
+ *max_index = max_us;
+ break;
+ }
+ case GL_UNSIGNED_BYTE: {
+ const GLubyte *ub_indices = (const GLubyte *)indices;
+ GLuint max_ub = 0;
+ GLuint min_ub = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] != restartIndex) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ *min_index = min_ub;
+ *max_index = max_ub;
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+
+ if (_mesa_is_bufferobj(ib->obj)) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, ib->obj);
+ }
+}
+
+
+/**
+ * Check that element 'j' of the array has reasonable data.
+ * Map VBO if needed.
+ * For debugging purposes; not normally used.
+ */
+static void
+check_array_data(struct gl_context *ctx, struct gl_client_array *array,
+ GLuint attrib, GLuint j)
+{
+ if (array->Enabled) {
+ const void *data = array->Ptr;
+ if (_mesa_is_bufferobj(array->BufferObj)) {
+ if (!array->BufferObj->Pointer) {
+ /* need to map now */
+ array->BufferObj->Pointer =
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY, array->BufferObj);
+ }
+ data = ADD_POINTERS(data, array->BufferObj->Pointer);
+ }
+ switch (array->Type) {
+ case GL_FLOAT:
+ {
+ GLfloat *f = (GLfloat *) ((GLubyte *) data + array->StrideB * j);
+ GLint k;
+ for (k = 0; k < array->Size; k++) {
+ if (IS_INF_OR_NAN(f[k]) ||
+ f[k] >= 1.0e20 || f[k] <= -1.0e10) {
+ printf("Bad array data:\n");
+ printf(" Element[%u].%u = %f\n", j, k, f[k]);
+ printf(" Array %u at %p\n", attrib, (void* ) array);
+ printf(" Type 0x%x, Size %d, Stride %d\n",
+ array->Type, array->Size, array->Stride);
+ printf(" Address/offset %p in Buffer Object %u\n",
+ array->Ptr, array->BufferObj->Name);
+ f[k] = 1.0; /* XXX replace the bad value! */
+ }
+ /*assert(!IS_INF_OR_NAN(f[k]));*/
+ }
+ }
+ break;
+ default:
+ ;
+ }
+ }
+}
+
+
+/**
+ * Unmap the buffer object referenced by given array, if mapped.
+ */
+static void
+unmap_array_buffer(struct gl_context *ctx, struct gl_client_array *array)
+{
+ if (array->Enabled &&
+ _mesa_is_bufferobj(array->BufferObj) &&
+ _mesa_bufferobj_mapped(array->BufferObj)) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, array->BufferObj);
+ }
+}
+
+
+/**
+ * Examine the array's data for NaNs, etc.
+ * For debug purposes; not normally used.
+ */
+static void
+check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
+ const void *elements, GLint basevertex)
+{
+ struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
+ const void *elemMap;
+ GLint i, k;
+
+ if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
+ elemMap = ctx->Driver.MapBuffer(ctx,
+ GL_ELEMENT_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY,
+ ctx->Array.ElementArrayBufferObj);
+ elements = ADD_POINTERS(elements, elemMap);
+ }
+
+ for (i = 0; i < count; i++) {
+ GLuint j;
+
+ /* j = element[i] */
+ switch (elemType) {
+ case GL_UNSIGNED_BYTE:
+ j = ((const GLubyte *) elements)[i];
+ break;
+ case GL_UNSIGNED_SHORT:
+ j = ((const GLushort *) elements)[i];
+ break;
+ case GL_UNSIGNED_INT:
+ j = ((const GLuint *) elements)[i];
+ break;
+ default:
+ assert(0);
+ }
+
+ /* check element j of each enabled array */
+ check_array_data(ctx, &arrayObj->Vertex, VERT_ATTRIB_POS, j);
+ check_array_data(ctx, &arrayObj->Normal, VERT_ATTRIB_NORMAL, j);
+ check_array_data(ctx, &arrayObj->Color, VERT_ATTRIB_COLOR0, j);
+ check_array_data(ctx, &arrayObj->SecondaryColor, VERT_ATTRIB_COLOR1, j);
+ for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
+ check_array_data(ctx, &arrayObj->TexCoord[k], VERT_ATTRIB_TEX0 + k, j);
+ }
+ for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
+ check_array_data(ctx, &arrayObj->VertexAttrib[k],
+ VERT_ATTRIB_GENERIC0 + k, j);
+ }
+ }
+
+ if (_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj)) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
+ ctx->Array.ElementArrayBufferObj);
+ }
+
+ unmap_array_buffer(ctx, &arrayObj->Vertex);
+ unmap_array_buffer(ctx, &arrayObj->Normal);
+ unmap_array_buffer(ctx, &arrayObj->Color);
+ for (k = 0; k < Elements(arrayObj->TexCoord); k++) {
+ unmap_array_buffer(ctx, &arrayObj->TexCoord[k]);
+ }
+ for (k = 0; k < Elements(arrayObj->VertexAttrib); k++) {
+ unmap_array_buffer(ctx, &arrayObj->VertexAttrib[k]);
+ }
+}
+
+
+/**
+ * Check array data, looking for NaNs, etc.
+ */
+static void
+check_draw_arrays_data(struct gl_context *ctx, GLint start, GLsizei count)
+{
+ /* TO DO */
+}
+
+
+/**
+ * Print info/data for glDrawArrays(), for debugging.
+ */
+static void
+print_draw_arrays(struct gl_context *ctx,
+ GLenum mode, GLint start, GLsizei count)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ int i;
+
+ printf("vbo_exec_DrawArrays(mode 0x%x, start %d, count %d):\n",
+ mode, start, count);
+
+ for (i = 0; i < 32; i++) {
+ GLuint bufName = exec->array.inputs[i]->BufferObj->Name;
+ GLint stride = exec->array.inputs[i]->Stride;
+ printf("attr %2d: size %d stride %d enabled %d "
+ "ptr %p Bufobj %u\n",
+ i,
+ exec->array.inputs[i]->Size,
+ stride,
+ /*exec->array.inputs[i]->Enabled,*/
+ exec->array.legacy_array[i]->Enabled,
+ exec->array.inputs[i]->Ptr,
+ bufName);
+
+ if (bufName) {
+ struct gl_buffer_object *buf = _mesa_lookup_bufferobj(ctx, bufName);
+ GLubyte *p = ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY_ARB, buf);
+ int offset = (int) (GLintptr) exec->array.inputs[i]->Ptr;
+ float *f = (float *) (p + offset);
+ int *k = (int *) f;
+ int i;
+ int n = (count * stride) / 4;
+ if (n > 32)
+ n = 32;
+ printf(" Data at offset %d:\n", offset);
+ for (i = 0; i < n; i++) {
+ printf(" float[%d] = 0x%08x %f\n", i, k[i], f[i]);
+ }
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB, buf);
+ }
+ }
+}
+
+
+/**
+ * Bind the VBO executor to the current vertex array object prior
+ * to drawing.
+ *
+ * Just translate the arrayobj into a sane layout.
+ */
+static void
+bind_array_obj(struct gl_context *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ struct gl_array_object *arrayObj = ctx->Array.ArrayObj;
+ GLuint i;
+
+ /* TODO: Fix the ArrayObj struct to keep legacy arrays in an array
+ * rather than as individual named arrays. Then this function can
+ * go away.
+ */
+ exec->array.legacy_array[VERT_ATTRIB_POS] = &arrayObj->Vertex;
+ exec->array.legacy_array[VERT_ATTRIB_WEIGHT] = &arrayObj->Weight;
+ exec->array.legacy_array[VERT_ATTRIB_NORMAL] = &arrayObj->Normal;
+ exec->array.legacy_array[VERT_ATTRIB_COLOR0] = &arrayObj->Color;
+ exec->array.legacy_array[VERT_ATTRIB_COLOR1] = &arrayObj->SecondaryColor;
+ exec->array.legacy_array[VERT_ATTRIB_FOG] = &arrayObj->FogCoord;
+ exec->array.legacy_array[VERT_ATTRIB_COLOR_INDEX] = &arrayObj->Index;
+ if (arrayObj->PointSize.Enabled) {
+ /* this aliases COLOR_INDEX */
+ exec->array.legacy_array[VERT_ATTRIB_POINT_SIZE] = &arrayObj->PointSize;
+ }
+ exec->array.legacy_array[VERT_ATTRIB_EDGEFLAG] = &arrayObj->EdgeFlag;
+
+ for (i = 0; i < Elements(arrayObj->TexCoord); i++)
+ exec->array.legacy_array[VERT_ATTRIB_TEX0 + i] = &arrayObj->TexCoord[i];
+
+ for (i = 0; i < Elements(arrayObj->VertexAttrib); i++) {
+ assert(i < Elements(exec->array.generic_array));
+ exec->array.generic_array[i] = &arrayObj->VertexAttrib[i];
+ }
+
+ exec->array.array_obj = arrayObj->Name;
+}
+
+
+/**
+ * Set the vbo->exec->inputs[] pointers to point to the enabled
+ * vertex arrays. This depends on the current vertex program/shader
+ * being executed because of whether or not generic vertex arrays
+ * alias the conventional vertex arrays.
+ * For arrays that aren't enabled, we set the input[attrib] pointer
+ * to point at a zero-stride current value "array".
+ */
+static void
+recalculate_input_bindings(struct gl_context *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ const struct gl_client_array **inputs = &exec->array.inputs[0];
+ GLbitfield const_inputs = 0x0;
+ GLuint i;
+
+ exec->array.program_mode = get_program_mode(ctx);
+ exec->array.enabled_flags = ctx->Array.ArrayObj->_Enabled;
+
+ switch (exec->array.program_mode) {
+ case VP_NONE:
+ /* When no vertex program is active (or the vertex program is generated
+ * from fixed-function state). We put the material values into the
+ * generic slots. This is the only situation where material values
+ * are available as per-vertex attributes.
+ */
+ for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
+ if (exec->array.legacy_array[i]->Enabled)
+ inputs[i] = exec->array.legacy_array[i];
+ else {
+ inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
+ }
+
+ for (i = 0; i < MAT_ATTRIB_MAX; i++) {
+ inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+ }
+
+ /* Could use just about anything, just to fill in the empty
+ * slots:
+ */
+ for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) {
+ inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+ }
+ break;
+
+ case VP_NV:
+ /* NV_vertex_program - attribute arrays alias and override
+ * conventional, legacy arrays. No materials, and the generic
+ * slots are vacant.
+ */
+ for (i = 0; i <= VERT_ATTRIB_TEX7; i++) {
+ if (exec->array.generic_array[i]->Enabled)
+ inputs[i] = exec->array.generic_array[i];
+ else if (exec->array.legacy_array[i]->Enabled)
+ inputs[i] = exec->array.legacy_array[i];
+ else {
+ inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
+ }
+
+ /* Could use just about anything, just to fill in the empty
+ * slots:
+ */
+ for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
+ inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0];
+ const_inputs |= 1 << i;
+ }
+ break;
+
+ case VP_ARB:
+ /* GL_ARB_vertex_program or GLSL vertex shader - Only the generic[0]
+ * attribute array aliases and overrides the legacy position array.
+ *
+ * Otherwise, legacy attributes available in the legacy slots,
+ * generic attributes in the generic slots and materials are not
+ * available as per-vertex attributes.
+ */
+ if (exec->array.generic_array[0]->Enabled)
+ inputs[0] = exec->array.generic_array[0];
+ else if (exec->array.legacy_array[0]->Enabled)
+ inputs[0] = exec->array.legacy_array[0];
+ else {
+ inputs[0] = &vbo->legacy_currval[0];
+ const_inputs |= 1 << 0;
+ }
+
+ for (i = 1; i <= VERT_ATTRIB_TEX7; i++) {
+ if (exec->array.legacy_array[i]->Enabled)
+ inputs[i] = exec->array.legacy_array[i];
+ else {
+ inputs[i] = &vbo->legacy_currval[i];
+ const_inputs |= 1 << i;
+ }
+ }
+
+ for (i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
+ if (exec->array.generic_array[i]->Enabled)
+ inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i];
+ else {
+ inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i];
+ const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i);
+ }
+
+ }
+ break;
+ }
+
+ _mesa_set_varying_vp_inputs( ctx, ~const_inputs );
+}
+
+
+/**
+ * Examine the enabled vertex arrays to set the exec->array.inputs[] values.
+ * These will point to the arrays to actually use for drawing. Some will
+ * be user-provided arrays, other will be zero-stride const-valued arrays.
+ * Note that this might set the _NEW_ARRAY dirty flag so state validation
+ * must be done after this call.
+ */
+static void
+bind_arrays(struct gl_context *ctx)
+{
+ if (!ctx->Array.RebindArrays) {
+ return;
+ }
+
+ bind_array_obj(ctx);
+ recalculate_input_bindings(ctx);
+ ctx->Array.RebindArrays = GL_FALSE;
+}
+
+
+/**
+ * Helper function called by the other DrawArrays() functions below.
+ * This is where we handle primitive restart for drawing non-indexed
+ * arrays. If primitive restart is enabled, it typically means
+ * splitting one DrawArrays() into two.
+ */
+static void
+vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
+ GLsizei count, GLuint numInstances)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ struct _mesa_prim prim[2];
+
+ bind_arrays(ctx);
+
+ /* Again... because we may have changed the bitmask of per-vertex varying
+ * attributes. If we regenerate the fixed-function vertex program now
+ * we may be able to prune down the number of vertex attributes which we
+ * need in the shader.
+ */
+ if (ctx->NewState)
+ _mesa_update_state(ctx);
+
+ prim[0].begin = 1;
+ prim[0].end = 1;
+ prim[0].weak = 0;
+ prim[0].pad = 0;
+ prim[0].mode = mode;
+ prim[0].start = 0; /* filled in below */
+ prim[0].count = 0; /* filled in below */
+ prim[0].indexed = 0;
+ prim[0].basevertex = 0;
+ prim[0].num_instances = numInstances;
+
+ /* Implement the primitive restart index */
+ if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) {
+ GLuint primCount = 0;
+
+ if (ctx->Array.RestartIndex == start) {
+ /* special case: RestartIndex at beginning */
+ if (count > 1) {
+ prim[0].start = start + 1;
+ prim[0].count = count - 1;
+ primCount = 1;
+ }
+ }
+ else if (ctx->Array.RestartIndex == start + count - 1) {
+ /* special case: RestartIndex at end */
+ if (count > 1) {
+ prim[0].start = start;
+ prim[0].count = count - 1;
+ primCount = 1;
+ }
+ }
+ else {
+ /* general case: RestartIndex in middle, split into two prims */
+ prim[0].start = start;
+ prim[0].count = ctx->Array.RestartIndex - start;
+
+ prim[1] = prim[0];
+ prim[1].start = ctx->Array.RestartIndex + 1;
+ prim[1].count = count - prim[1].start;
+
+ primCount = 2;
+ }
+
+ if (primCount > 0) {
+ /* draw one or two prims */
+ vbo->draw_prims(ctx, exec->array.inputs, prim, primCount, NULL,
+ GL_TRUE, start, start + count - 1);
+ }
+ }
+ else {
+ /* no prim restart */
+ prim[0].start = start;
+ prim[0].count = count;
+
+ vbo->draw_prims(ctx, exec->array.inputs, prim, 1, NULL,
+ GL_TRUE, start, start + count - 1);
+ }
+}
+
+
+
+/**
+ * Called from glDrawArrays when in immediate mode (not display list mode).
+ */
+static void GLAPIENTRY
+vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx, "glDrawArrays(%s, %d, %d)\n",
+ _mesa_lookup_enum_by_nr(mode), start, count);
+
+ if (!_mesa_validate_DrawArrays( ctx, mode, start, count ))
+ return;
+
+ FLUSH_CURRENT( ctx, 0 );
+
+ if (!_mesa_valid_to_render(ctx, "glDrawArrays")) {
+ return;
+ }
+
+ if (0)
+ check_draw_arrays_data(ctx, start, count);
+
+ vbo_draw_arrays(ctx, mode, start, count, 1);
+
+ if (0)
+ print_draw_arrays(ctx, mode, start, count);
+}
+
+
+/**
+ * Called from glDrawArraysInstanced when in immediate mode (not
+ * display list mode).
+ */
+static void GLAPIENTRY
+vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
+ GLsizei numInstances)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx, "glDrawArraysInstanced(%s, %d, %d, %d)\n",
+ _mesa_lookup_enum_by_nr(mode), start, count, numInstances);
+
+ if (!_mesa_validate_DrawArraysInstanced(ctx, mode, start, count, numInstances))
+ return;
+
+ FLUSH_CURRENT( ctx, 0 );
+
+ if (!_mesa_valid_to_render(ctx, "glDrawArraysInstanced")) {
+ return;
+ }
+
+ if (0)
+ check_draw_arrays_data(ctx, start, count);
+
+ vbo_draw_arrays(ctx, mode, start, count, numInstances);
+
+ if (0)
+ print_draw_arrays(ctx, mode, start, count);
+}
+
+
+/**
+ * Map GL_ELEMENT_ARRAY_BUFFER and print contents.
+ * For debugging.
+ */
+static void
+dump_element_buffer(struct gl_context *ctx, GLenum type)
+{
+ const GLvoid *map = ctx->Driver.MapBuffer(ctx,
+ GL_ELEMENT_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY,
+ ctx->Array.ElementArrayBufferObj);
+ switch (type) {
+ case GL_UNSIGNED_BYTE:
+ {
+ const GLubyte *us = (const GLubyte *) map;
+ GLint i;
+ for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size; i++) {
+ printf("%02x ", us[i]);
+ if (i % 32 == 31)
+ printf("\n");
+ }
+ printf("\n");
+ }
+ break;
+ case GL_UNSIGNED_SHORT:
+ {
+ const GLushort *us = (const GLushort *) map;
+ GLint i;
+ for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 2; i++) {
+ printf("%04x ", us[i]);
+ if (i % 16 == 15)
+ printf("\n");
+ }
+ printf("\n");
+ }
+ break;
+ case GL_UNSIGNED_INT:
+ {
+ const GLuint *us = (const GLuint *) map;
+ GLint i;
+ for (i = 0; i < ctx->Array.ElementArrayBufferObj->Size / 4; i++) {
+ printf("%08x ", us[i]);
+ if (i % 8 == 7)
+ printf("\n");
+ }
+ printf("\n");
+ }
+ break;
+ default:
+ ;
+ }
+
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB,
+ ctx->Array.ElementArrayBufferObj);
+}
+
+
+/**
+ * Inner support for both _mesa_DrawElements and _mesa_DrawRangeElements.
+ * Do the rendering for a glDrawElements or glDrawRangeElements call after
+ * we've validated buffer bounds, etc.
+ */
+static void
+vbo_validated_drawrangeelements(struct gl_context *ctx, GLenum mode,
+ GLboolean index_bounds_valid,
+ GLuint start, GLuint end,
+ GLsizei count, GLenum type,
+ const GLvoid *indices,
+ GLint basevertex, GLint numInstances)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ struct _mesa_index_buffer ib;
+ struct _mesa_prim prim[1];
+
+ FLUSH_CURRENT( ctx, 0 );
+
+ if (!_mesa_valid_to_render(ctx, "glDraw[Range]Elements")) {
+ return;
+ }
+
+ bind_arrays( ctx );
+
+ /* check for dirty state again */
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ ib.count = count;
+ ib.type = type;
+ ib.obj = ctx->Array.ElementArrayBufferObj;
+ ib.ptr = indices;
+
+ prim[0].begin = 1;
+ prim[0].end = 1;
+ prim[0].weak = 0;
+ prim[0].pad = 0;
+ prim[0].mode = mode;
+ prim[0].start = 0;
+ prim[0].count = count;
+ prim[0].indexed = 1;
+ prim[0].basevertex = basevertex;
+ prim[0].num_instances = numInstances;
+
+ /* Need to give special consideration to rendering a range of
+ * indices starting somewhere above zero. Typically the
+ * application is issuing multiple DrawRangeElements() to draw
+ * successive primitives layed out linearly in the vertex arrays.
+ * Unless the vertex arrays are all in a VBO (or locked as with
+ * CVA), the OpenGL semantics imply that we need to re-read or
+ * re-upload the vertex data on each draw call.
+ *
+ * In the case of hardware tnl, we want to avoid starting the
+ * upload at zero, as it will mean every draw call uploads an
+ * increasing amount of not-used vertex data. Worse - in the
+ * software tnl module, all those vertices might be transformed and
+ * lit but never rendered.
+ *
+ * If we just upload or transform the vertices in start..end,
+ * however, the indices will be incorrect.
+ *
+ * At this level, we don't know exactly what the requirements of
+ * the backend are going to be, though it will likely boil down to
+ * either:
+ *
+ * 1) Do nothing, everything is in a VBO and is processed once
+ * only.
+ *
+ * 2) Adjust the indices and vertex arrays so that start becomes
+ * zero.
+ *
+ * Rather than doing anything here, I'll provide a helper function
+ * for the latter case elsewhere.
+ */
+
+ vbo->draw_prims( ctx, exec->array.inputs, prim, 1, &ib,
+ index_bounds_valid, start, end );
+}
+
+
+/**
+ * Called by glDrawRangeElementsBaseVertex() in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
+ GLuint start, GLuint end,
+ GLsizei count, GLenum type,
+ const GLvoid *indices,
+ GLint basevertex)
+{
+ static GLuint warnCount = 0;
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx,
+ "glDrawRangeElementsBaseVertex(%s, %u, %u, %d, %s, %p, %d)\n",
+ _mesa_lookup_enum_by_nr(mode), start, end, count,
+ _mesa_lookup_enum_by_nr(type), indices, basevertex);
+
+ if (!_mesa_validate_DrawRangeElements( ctx, mode, start, end, count,
+ type, indices, basevertex ))
+ return;
+
+ /* NOTE: It's important that 'end' is a reasonable value.
+ * in _tnl_draw_prims(), we use end to determine how many vertices
+ * to transform. If it's too large, we can unnecessarily split prims
+ * or we can read/write out of memory in several different places!
+ */
+
+ /* Catch/fix some potential user errors */
+ if (type == GL_UNSIGNED_BYTE) {
+ start = MIN2(start, 0xff);
+ end = MIN2(end, 0xff);
+ }
+ else if (type == GL_UNSIGNED_SHORT) {
+ start = MIN2(start, 0xffff);
+ end = MIN2(end, 0xffff);
+ }
+
+ if (end >= ctx->Array.ArrayObj->_MaxElement) {
+ /* the max element is out of bounds of one or more enabled arrays */
+ warnCount++;
+
+ if (warnCount < 10) {
+ _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, count %d, "
+ "type 0x%x, indices=%p)\n"
+ "\tend is out of bounds (max=%u) "
+ "Element Buffer %u (size %d)\n"
+ "\tThis should probably be fixed in the application.",
+ start, end, count, type, indices,
+ ctx->Array.ArrayObj->_MaxElement - 1,
+ ctx->Array.ElementArrayBufferObj->Name,
+ (int) ctx->Array.ElementArrayBufferObj->Size);
+ }
+
+ if (0)
+ dump_element_buffer(ctx, type);
+
+ if (0)
+ _mesa_print_arrays(ctx);
+
+#ifdef DEBUG
+ /* 'end' was out of bounds, but now let's check the actual array
+ * indexes to see if any of them are out of bounds.
+ */
+ {
+ GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
+ ctx->Array.ElementArrayBufferObj);
+ if (max >= ctx->Array.ArrayObj->_MaxElement) {
+ if (warnCount < 10) {
+ _mesa_warning(ctx, "glDraw[Range]Elements(start %u, end %u, "
+ "count %d, type 0x%x, indices=%p)\n"
+ "\tindex=%u is out of bounds (max=%u) "
+ "Element Buffer %u (size %d)\n"
+ "\tSkipping the glDrawRangeElements() call",
+ start, end, count, type, indices, max,
+ ctx->Array.ArrayObj->_MaxElement - 1,
+ ctx->Array.ElementArrayBufferObj->Name,
+ (int) ctx->Array.ElementArrayBufferObj->Size);
+ }
+ }
+ /* XXX we could also find the min index and compare to 'start'
+ * to see if start is correct. But it's more likely to get the
+ * upper bound wrong.
+ */
+ }
+#endif
+
+ /* Set 'end' to the max possible legal value */
+ assert(ctx->Array.ArrayObj->_MaxElement >= 1);
+ end = ctx->Array.ArrayObj->_MaxElement - 1;
+ }
+ else if (0) {
+ printf("glDraw[Range]Elements{,BaseVertex}"
+ "(start %u, end %u, type 0x%x, count %d) ElemBuf %u, "
+ "base %d\n",
+ start, end, type, count,
+ ctx->Array.ElementArrayBufferObj->Name,
+ basevertex);
+ }
+
+#if 0
+ check_draw_elements_data(ctx, count, type, indices);
+#else
+ (void) check_draw_elements_data;
+#endif
+
+ vbo_validated_drawrangeelements(ctx, mode, GL_TRUE, start, end,
+ count, type, indices, basevertex, 1);
+}
+
+
+/**
+ * Called by glDrawRangeElements() in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_DrawRangeElements(GLenum mode, GLuint start, GLuint end,
+ GLsizei count, GLenum type, const GLvoid *indices)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx,
+ "glDrawRangeElements(%s, %u, %u, %d, %s, %p)\n",
+ _mesa_lookup_enum_by_nr(mode), start, end, count,
+ _mesa_lookup_enum_by_nr(type), indices);
+
+ vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
+ indices, 0);
+}
+
+
+/**
+ * Called by glDrawElements() in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_DrawElements(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx, "glDrawElements(%s, %u, %s, %p)\n",
+ _mesa_lookup_enum_by_nr(mode), count,
+ _mesa_lookup_enum_by_nr(type), indices);
+
+ if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices, 0 ))
+ return;
+
+ vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
+ count, type, indices, 0, 1);
+}
+
+
+/**
+ * Called by glDrawElementsBaseVertex() in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices, GLint basevertex)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx, "glDrawElementsBaseVertex(%s, %d, %s, %p, %d)\n",
+ _mesa_lookup_enum_by_nr(mode), count,
+ _mesa_lookup_enum_by_nr(type), indices, basevertex);
+
+ if (!_mesa_validate_DrawElements( ctx, mode, count, type, indices,
+ basevertex ))
+ return;
+
+ vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
+ count, type, indices, basevertex, 1);
+}
+
+
+/**
+ * Called by glDrawElementsInstanced() in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_DrawElementsInstanced(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices, GLsizei numInstances)
+{
+ GET_CURRENT_CONTEXT(ctx);
+
+ if (MESA_VERBOSE & VERBOSE_DRAW)
+ _mesa_debug(ctx, "glDrawElementsInstanced(%s, %d, %s, %p, %d)\n",
+ _mesa_lookup_enum_by_nr(mode), count,
+ _mesa_lookup_enum_by_nr(type), indices, numInstances);
+
+ if (!_mesa_validate_DrawElementsInstanced(ctx, mode, count, type, indices,
+ numInstances))
+ return;
+
+ vbo_validated_drawrangeelements(ctx, mode, GL_FALSE, ~0, ~0,
+ count, type, indices, 0, numInstances);
+}
+
+
+/**
+ * Inner support for both _mesa_MultiDrawElements() and
+ * _mesa_MultiDrawRangeElements().
+ * This does the actual rendering after we've checked array indexes, etc.
+ */
+static void
+vbo_validated_multidrawelements(struct gl_context *ctx, GLenum mode,
+ const GLsizei *count, GLenum type,
+ const GLvoid **indices, GLsizei primcount,
+ const GLint *basevertex)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ struct _mesa_index_buffer ib;
+ struct _mesa_prim *prim;
+ unsigned int index_type_size = 0;
+ uintptr_t min_index_ptr, max_index_ptr;
+ GLboolean fallback = GL_FALSE;
+ int i;
+
+ if (primcount == 0)
+ return;
+
+ FLUSH_CURRENT( ctx, 0 );
+
+ if (!_mesa_valid_to_render(ctx, "glMultiDrawElements")) {
+ return;
+ }
+
+ prim = calloc(1, primcount * sizeof(*prim));
+ if (prim == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glMultiDrawElements");
+ return;
+ }
+
+ /* Decide if we can do this all as one set of primitives sharing the
+ * same index buffer, or if we have to reset the index pointer per
+ * primitive.
+ */
+ bind_arrays( ctx );
+
+ /* check for dirty state again */
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ switch (type) {
+ case GL_UNSIGNED_INT:
+ index_type_size = 4;
+ break;
+ case GL_UNSIGNED_SHORT:
+ index_type_size = 2;
+ break;
+ case GL_UNSIGNED_BYTE:
+ index_type_size = 1;
+ break;
+ default:
+ assert(0);
+ }
+
+ min_index_ptr = (uintptr_t)indices[0];
+ max_index_ptr = 0;
+ for (i = 0; i < primcount; i++) {
+ min_index_ptr = MIN2(min_index_ptr, (uintptr_t)indices[i]);
+ max_index_ptr = MAX2(max_index_ptr, (uintptr_t)indices[i] +
+ index_type_size * count[i]);
+ }
+
+ /* Check if we can handle this thing as a bunch of index offsets from the
+ * same index pointer. If we can't, then we have to fall back to doing
+ * a draw_prims per primitive.
+ * Check that the difference between each prim's indexes is a multiple of
+ * the index/element size.
+ */
+ if (index_type_size != 1) {
+ for (i = 0; i < primcount; i++) {
+ if ((((uintptr_t)indices[i] - min_index_ptr) % index_type_size) != 0) {
+ fallback = GL_TRUE;
+ break;
+ }
+ }
+ }
+
+ /* If the index buffer isn't in a VBO, then treating the application's
+ * subranges of the index buffer as one large index buffer may lead to
+ * us reading unmapped memory.
+ */
+ if (!_mesa_is_bufferobj(ctx->Array.ElementArrayBufferObj))
+ fallback = GL_TRUE;
+
+ if (!fallback) {
+ ib.count = (max_index_ptr - min_index_ptr) / index_type_size;
+ ib.type = type;
+ ib.obj = ctx->Array.ElementArrayBufferObj;
+ ib.ptr = (void *)min_index_ptr;
+
+ for (i = 0; i < primcount; i++) {
+ prim[i].begin = (i == 0);
+ prim[i].end = (i == primcount - 1);
+ prim[i].weak = 0;
+ prim[i].pad = 0;
+ prim[i].mode = mode;
+ prim[i].start = ((uintptr_t)indices[i] - min_index_ptr) / index_type_size;
+ prim[i].count = count[i];
+ prim[i].indexed = 1;
+ prim[i].num_instances = 1;
+ if (basevertex != NULL)
+ prim[i].basevertex = basevertex[i];
+ else
+ prim[i].basevertex = 0;
+ }
+
+ vbo->draw_prims(ctx, exec->array.inputs, prim, primcount, &ib,
+ GL_FALSE, ~0, ~0);
+ } else {
+ /* render one prim at a time */
+ for (i = 0; i < primcount; i++) {
+ ib.count = count[i];
+ ib.type = type;
+ ib.obj = ctx->Array.ElementArrayBufferObj;
+ ib.ptr = indices[i];
+
+ prim[0].begin = 1;
+ prim[0].end = 1;
+ prim[0].weak = 0;
+ prim[0].pad = 0;
+ prim[0].mode = mode;
+ prim[0].start = 0;
+ prim[0].count = count[i];
+ prim[0].indexed = 1;
+ prim[0].num_instances = 1;
+ if (basevertex != NULL)
+ prim[0].basevertex = basevertex[i];
+ else
+ prim[0].basevertex = 0;
+
+ vbo->draw_prims(ctx, exec->array.inputs, prim, 1, &ib,
+ GL_FALSE, ~0, ~0);
+ }
+ }
+
+ free(prim);
+}
+
+
+static void GLAPIENTRY
+vbo_exec_MultiDrawElements(GLenum mode,
+ const GLsizei *count, GLenum type,
+ const GLvoid **indices,
+ GLsizei primcount)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ GLint i;
+
+ ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
+
+ for (i = 0; i < primcount; i++) {
+ if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
+ 0))
+ return;
+ }
+
+ vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
+ NULL);
+}
+
+
+static void GLAPIENTRY
+vbo_exec_MultiDrawElementsBaseVertex(GLenum mode,
+ const GLsizei *count, GLenum type,
+ const GLvoid **indices,
+ GLsizei primcount,
+ const GLsizei *basevertex)
+{
+ GET_CURRENT_CONTEXT(ctx);
+ GLint i;
+
+ ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx);
+
+ for (i = 0; i < primcount; i++) {
+ if (!_mesa_validate_DrawElements(ctx, mode, count[i], type, indices[i],
+ basevertex[i]))
+ return;
+ }
+
+ vbo_validated_multidrawelements(ctx, mode, count, type, indices, primcount,
+ basevertex);
+}
+
+
+/**
+ * Plug in the immediate-mode vertex array drawing commands into the
+ * givven vbo_exec_context object.
+ */
+void
+vbo_exec_array_init( struct vbo_exec_context *exec )
+{
+ exec->vtxfmt.DrawArrays = vbo_exec_DrawArrays;
+ exec->vtxfmt.DrawElements = vbo_exec_DrawElements;
+ exec->vtxfmt.DrawRangeElements = vbo_exec_DrawRangeElements;
+ exec->vtxfmt.MultiDrawElementsEXT = vbo_exec_MultiDrawElements;
+ exec->vtxfmt.DrawElementsBaseVertex = vbo_exec_DrawElementsBaseVertex;
+ exec->vtxfmt.DrawRangeElementsBaseVertex = vbo_exec_DrawRangeElementsBaseVertex;
+ exec->vtxfmt.MultiDrawElementsBaseVertex = vbo_exec_MultiDrawElementsBaseVertex;
+ exec->vtxfmt.DrawArraysInstanced = vbo_exec_DrawArraysInstanced;
+ exec->vtxfmt.DrawElementsInstanced = vbo_exec_DrawElementsInstanced;
+}
+
+
+void
+vbo_exec_array_destroy( struct vbo_exec_context *exec )
+{
+ /* nothing to do */
+}
+
+
+
+/**
+ * The following functions are only used for OpenGL ES 1/2 support.
+ * And some aren't even supported (yet) in ES 1/2.
+ */
+
+
+void GLAPIENTRY
+_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count)
+{
+ vbo_exec_DrawArrays(mode, first, count);
+}
+
+
+void GLAPIENTRY
+_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices)
+{
+ vbo_exec_DrawElements(mode, count, type, indices);
+}
+
+
+void GLAPIENTRY
+_mesa_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type,
+ const GLvoid *indices, GLint basevertex)
+{
+ vbo_exec_DrawElementsBaseVertex(mode, count, type, indices, basevertex);
+}
+
+
+void GLAPIENTRY
+_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count,
+ GLenum type, const GLvoid *indices)
+{
+ vbo_exec_DrawRangeElements(mode, start, end, count, type, indices);
+}
+
+
+void GLAPIENTRY
+_mesa_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end,
+ GLsizei count, GLenum type,
+ const GLvoid *indices, GLint basevertex)
+{
+ vbo_exec_DrawRangeElementsBaseVertex(mode, start, end, count, type,
+ indices, basevertex);
+}
+
+
+void GLAPIENTRY
+_mesa_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type,
+ const GLvoid **indices, GLsizei primcount)
+{
+ vbo_exec_MultiDrawElements(mode, count, type, indices, primcount);
+}
+
+
+void GLAPIENTRY
+_mesa_MultiDrawElementsBaseVertex(GLenum mode,
+ const GLsizei *count, GLenum type,
+ const GLvoid **indices, GLsizei primcount,
+ const GLint *basevertex)
+{
+ vbo_exec_MultiDrawElementsBaseVertex(mode, count, type, indices,
+ primcount, basevertex);
+}
diff --git a/mesalib/src/mesa/vbo/vbo_exec_draw.c b/mesalib/src/mesa/vbo/vbo_exec_draw.c
index 048f3d170..f8be83ea8 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_draw.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_draw.c
@@ -1,420 +1,421 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.2
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/compiler.h"
-#include "main/enums.h"
-#include "main/mfeatures.h"
-#include "main/state.h"
-
-#include "vbo_context.h"
-
-
-#if FEATURE_beginend
-
-
-static void
-vbo_exec_debug_verts( struct vbo_exec_context *exec )
-{
- GLuint count = exec->vtx.vert_count;
- GLuint i;
-
- printf("%s: %u vertices %d primitives, %d vertsize\n",
- __FUNCTION__,
- count,
- exec->vtx.prim_count,
- exec->vtx.vertex_size);
-
- for (i = 0 ; i < exec->vtx.prim_count ; i++) {
- struct _mesa_prim *prim = &exec->vtx.prim[i];
- printf(" prim %d: %s%s %d..%d %s %s\n",
- i,
- _mesa_lookup_prim_by_nr(prim->mode),
- prim->weak ? " (weak)" : "",
- prim->start,
- prim->start + prim->count,
- prim->begin ? "BEGIN" : "(wrap)",
- prim->end ? "END" : "(wrap)");
- }
-}
-
-
-/*
- * NOTE: Need to have calculated primitives by this point -- do it on the fly.
- * NOTE: Old 'parity' issue is gone.
- */
-static GLuint
-vbo_copy_vertices( struct vbo_exec_context *exec )
-{
- GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count;
- GLuint ovf, i;
- GLuint sz = exec->vtx.vertex_size;
- GLfloat *dst = exec->vtx.copied.buffer;
- const GLfloat *src = (exec->vtx.buffer_map +
- exec->vtx.prim[exec->vtx.prim_count-1].start *
- exec->vtx.vertex_size);
-
-
- switch (exec->ctx->Driver.CurrentExecPrimitive) {
- case GL_POINTS:
- return 0;
- case GL_LINES:
- ovf = nr&1;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_TRIANGLES:
- ovf = nr%3;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_QUADS:
- ovf = nr&3;
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case GL_LINE_STRIP:
- if (nr == 0) {
- return 0;
- }
- else {
- memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) );
- return 1;
- }
- case GL_LINE_LOOP:
- case GL_TRIANGLE_FAN:
- case GL_POLYGON:
- if (nr == 0) {
- return 0;
- }
- else if (nr == 1) {
- memcpy( dst, src+0, sz * sizeof(GLfloat) );
- return 1;
- }
- else {
- memcpy( dst, src+0, sz * sizeof(GLfloat) );
- memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) );
- return 2;
- }
- case GL_TRIANGLE_STRIP:
- /* no parity issue, but need to make sure the tri is not drawn twice */
- if (nr & 1) {
- exec->vtx.prim[exec->vtx.prim_count-1].count--;
- }
- /* fallthrough */
- case GL_QUAD_STRIP:
- switch (nr) {
- case 0:
- ovf = 0;
- break;
- case 1:
- ovf = 1;
- break;
- default:
- ovf = 2 + (nr & 1);
- break;
- }
- for (i = 0 ; i < ovf ; i++)
- memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
- return i;
- case PRIM_OUTSIDE_BEGIN_END:
- return 0;
- default:
- assert(0);
- return 0;
- }
-}
-
-
-
-/* TODO: populate these as the vertex is defined:
- */
-static void
-vbo_exec_bind_arrays( struct gl_context *ctx )
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_exec_context *exec = &vbo->exec;
- struct gl_client_array *arrays = exec->vtx.arrays;
- const GLuint count = exec->vtx.vert_count;
- const GLuint *map;
- GLuint attr;
- GLbitfield varying_inputs = 0x0;
-
- /* Install the default (ie Current) attributes first, then overlay
- * all active ones.
- */
- switch (get_program_mode(exec->ctx)) {
- case VP_NONE:
- for (attr = 0; attr < 16; attr++) {
- exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
- }
- for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
- ASSERT(attr + 16 < Elements(exec->vtx.inputs));
- exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr];
- }
- map = vbo->map_vp_none;
- break;
- case VP_NV:
- case VP_ARB:
- /* The aliasing of attributes for NV vertex programs has already
- * occurred. NV vertex programs cannot access material values,
- * nor attributes greater than VERT_ATTRIB_TEX7.
- */
- for (attr = 0; attr < 16; attr++) {
- exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
- ASSERT(attr + 16 < Elements(exec->vtx.inputs));
- exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr];
- }
- map = vbo->map_vp_arb;
-
- /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
- * In that case we effectively need to route the data from
- * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
- */
- if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
- (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
- exec->vtx.inputs[16] = exec->vtx.inputs[0];
- exec->vtx.attrsz[16] = exec->vtx.attrsz[0];
- exec->vtx.attrptr[16] = exec->vtx.attrptr[0];
- exec->vtx.attrsz[0] = 0;
- }
- break;
- default:
- assert(0);
- }
-
- /* Make all active attributes (including edgeflag) available as
- * arrays of floats.
- */
- for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) {
- const GLuint src = map[attr];
-
- if (exec->vtx.attrsz[src]) {
- GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
- (GLbyte *)exec->vtx.vertex;
-
- /* override the default array set above */
- ASSERT(attr < Elements(exec->vtx.inputs));
- ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
- exec->vtx.inputs[attr] = &arrays[attr];
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- /* a real buffer obj: Ptr is an offset, not a pointer*/
- assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */
- assert(offset >= 0);
- arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset;
- }
- else {
- /* Ptr into ordinary app memory */
- arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset;
- }
- arrays[attr].Size = exec->vtx.attrsz[src];
- arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat);
- arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat);
- arrays[attr].Type = GL_FLOAT;
- arrays[attr].Format = GL_RGBA;
- arrays[attr].Enabled = 1;
- _mesa_reference_buffer_object(ctx,
- &arrays[attr].BufferObj,
- exec->vtx.bufferobj);
- arrays[attr]._MaxElement = count; /* ??? */
-
- varying_inputs |= 1 << attr;
- }
- }
-
- _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-}
-
-
-static void
-vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
-{
- GLenum target = GL_ARRAY_BUFFER_ARB;
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- struct gl_context *ctx = exec->ctx;
-
- if (ctx->Driver.FlushMappedBufferRange) {
- GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset;
- GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
-
- if (length)
- ctx->Driver.FlushMappedBufferRange(ctx, target,
- offset, length,
- exec->vtx.bufferobj);
- }
-
- exec->vtx.buffer_used += (exec->vtx.buffer_ptr -
- exec->vtx.buffer_map) * sizeof(float);
-
- assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
- assert(exec->vtx.buffer_ptr != NULL);
-
- ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
- exec->vtx.buffer_map = NULL;
- exec->vtx.buffer_ptr = NULL;
- exec->vtx.max_vert = 0;
- }
-}
-
-
-void
-vbo_exec_vtx_map( struct vbo_exec_context *exec )
-{
- struct gl_context *ctx = exec->ctx;
- const GLenum target = GL_ARRAY_BUFFER_ARB;
- const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
- const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */
- GL_MAP_INVALIDATE_RANGE_BIT |
- GL_MAP_UNSYNCHRONIZED_BIT |
- GL_MAP_FLUSH_EXPLICIT_BIT |
- MESA_MAP_NOWAIT_BIT;
- const GLenum usage = GL_STREAM_DRAW_ARB;
-
- if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
- return;
-
- if (exec->vtx.buffer_map != NULL) {
- assert(0);
- exec->vtx.buffer_map = NULL;
- exec->vtx.buffer_ptr = NULL;
- }
-
- if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
- ctx->Driver.MapBufferRange) {
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBufferRange(ctx,
- target,
- exec->vtx.buffer_used,
- (VBO_VERT_BUFFER_SIZE -
- exec->vtx.buffer_used),
- accessRange,
- exec->vtx.bufferobj);
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- }
-
- if (!exec->vtx.buffer_map) {
- exec->vtx.buffer_used = 0;
-
- ctx->Driver.BufferData(ctx, target,
- VBO_VERT_BUFFER_SIZE,
- NULL, usage, exec->vtx.bufferobj);
-
-
- if (ctx->Driver.MapBufferRange)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
- 0, VBO_VERT_BUFFER_SIZE,
- accessRange,
- exec->vtx.bufferobj);
- if (!exec->vtx.buffer_map)
- exec->vtx.buffer_map =
- (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
- assert(exec->vtx.buffer_map);
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- }
-
- if (0)
- printf("map %d..\n", exec->vtx.buffer_used);
-}
-
-
-
-/**
- * Execute the buffer and save copied verts.
- */
-void
-vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
-{
- if (0)
- vbo_exec_debug_verts( exec );
-
- if (exec->vtx.prim_count &&
- exec->vtx.vert_count) {
-
- exec->vtx.copied.nr = vbo_copy_vertices( exec );
-
- if (exec->vtx.copied.nr != exec->vtx.vert_count) {
- struct gl_context *ctx = exec->ctx;
-
- /* Before the update_state() as this may raise _NEW_ARRAY
- * from _mesa_set_varying_vp_inputs().
- */
- vbo_exec_bind_arrays( ctx );
-
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
- vbo_exec_vtx_unmap( exec );
- }
-
- if (0)
- printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count,
- exec->vtx.vert_count);
-
- vbo_context(ctx)->draw_prims( ctx,
- exec->vtx.inputs,
- exec->vtx.prim,
- exec->vtx.prim_count,
- NULL,
- GL_TRUE,
- 0,
- exec->vtx.vert_count - 1);
-
- /* If using a real VBO, get new storage -- unless asked not to.
- */
- if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) {
- vbo_exec_vtx_map( exec );
- }
- }
- }
-
- /* May have to unmap explicitly if we didn't draw:
- */
- if (unmap &&
- _mesa_is_bufferobj(exec->vtx.bufferobj) &&
- exec->vtx.buffer_map) {
- vbo_exec_vtx_unmap( exec );
- }
-
-
- if (unmap || exec->vtx.vertex_size == 0)
- exec->vtx.max_vert = 0;
- else
- exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
- (exec->vtx.vertex_size * sizeof(GLfloat)));
-
- exec->vtx.buffer_ptr = exec->vtx.buffer_map;
- exec->vtx.prim_count = 0;
- exec->vtx.vert_count = 0;
-}
-
-
-#endif /* FEATURE_beginend */
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.2
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/compiler.h"
+#include "main/enums.h"
+#include "main/mfeatures.h"
+#include "main/state.h"
+
+#include "vbo_context.h"
+
+
+#if FEATURE_beginend
+
+
+static void
+vbo_exec_debug_verts( struct vbo_exec_context *exec )
+{
+ GLuint count = exec->vtx.vert_count;
+ GLuint i;
+
+ printf("%s: %u vertices %d primitives, %d vertsize\n",
+ __FUNCTION__,
+ count,
+ exec->vtx.prim_count,
+ exec->vtx.vertex_size);
+
+ for (i = 0 ; i < exec->vtx.prim_count ; i++) {
+ struct _mesa_prim *prim = &exec->vtx.prim[i];
+ printf(" prim %d: %s%s %d..%d %s %s\n",
+ i,
+ _mesa_lookup_prim_by_nr(prim->mode),
+ prim->weak ? " (weak)" : "",
+ prim->start,
+ prim->start + prim->count,
+ prim->begin ? "BEGIN" : "(wrap)",
+ prim->end ? "END" : "(wrap)");
+ }
+}
+
+
+/*
+ * NOTE: Need to have calculated primitives by this point -- do it on the fly.
+ * NOTE: Old 'parity' issue is gone.
+ */
+static GLuint
+vbo_copy_vertices( struct vbo_exec_context *exec )
+{
+ GLuint nr = exec->vtx.prim[exec->vtx.prim_count-1].count;
+ GLuint ovf, i;
+ GLuint sz = exec->vtx.vertex_size;
+ GLfloat *dst = exec->vtx.copied.buffer;
+ const GLfloat *src = (exec->vtx.buffer_map +
+ exec->vtx.prim[exec->vtx.prim_count-1].start *
+ exec->vtx.vertex_size);
+
+
+ switch (exec->ctx->Driver.CurrentExecPrimitive) {
+ case GL_POINTS:
+ return 0;
+ case GL_LINES:
+ ovf = nr&1;
+ for (i = 0 ; i < ovf ; i++)
+ memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
+ return i;
+ case GL_TRIANGLES:
+ ovf = nr%3;
+ for (i = 0 ; i < ovf ; i++)
+ memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
+ return i;
+ case GL_QUADS:
+ ovf = nr&3;
+ for (i = 0 ; i < ovf ; i++)
+ memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
+ return i;
+ case GL_LINE_STRIP:
+ if (nr == 0) {
+ return 0;
+ }
+ else {
+ memcpy( dst, src+(nr-1)*sz, sz * sizeof(GLfloat) );
+ return 1;
+ }
+ case GL_LINE_LOOP:
+ case GL_TRIANGLE_FAN:
+ case GL_POLYGON:
+ if (nr == 0) {
+ return 0;
+ }
+ else if (nr == 1) {
+ memcpy( dst, src+0, sz * sizeof(GLfloat) );
+ return 1;
+ }
+ else {
+ memcpy( dst, src+0, sz * sizeof(GLfloat) );
+ memcpy( dst+sz, src+(nr-1)*sz, sz * sizeof(GLfloat) );
+ return 2;
+ }
+ case GL_TRIANGLE_STRIP:
+ /* no parity issue, but need to make sure the tri is not drawn twice */
+ if (nr & 1) {
+ exec->vtx.prim[exec->vtx.prim_count-1].count--;
+ }
+ /* fallthrough */
+ case GL_QUAD_STRIP:
+ switch (nr) {
+ case 0:
+ ovf = 0;
+ break;
+ case 1:
+ ovf = 1;
+ break;
+ default:
+ ovf = 2 + (nr & 1);
+ break;
+ }
+ for (i = 0 ; i < ovf ; i++)
+ memcpy( dst+i*sz, src+(nr-ovf+i)*sz, sz * sizeof(GLfloat) );
+ return i;
+ case PRIM_OUTSIDE_BEGIN_END:
+ return 0;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+
+
+/* TODO: populate these as the vertex is defined:
+ */
+static void
+vbo_exec_bind_arrays( struct gl_context *ctx )
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_exec_context *exec = &vbo->exec;
+ struct gl_client_array *arrays = exec->vtx.arrays;
+ const GLuint count = exec->vtx.vert_count;
+ const GLuint *map;
+ GLuint attr;
+ GLbitfield varying_inputs = 0x0;
+
+ /* Install the default (ie Current) attributes first, then overlay
+ * all active ones.
+ */
+ switch (get_program_mode(exec->ctx)) {
+ case VP_NONE:
+ for (attr = 0; attr < 16; attr++) {
+ exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
+ }
+ for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
+ ASSERT(attr + 16 < Elements(exec->vtx.inputs));
+ exec->vtx.inputs[attr + 16] = &vbo->mat_currval[attr];
+ }
+ map = vbo->map_vp_none;
+ break;
+ case VP_NV:
+ case VP_ARB:
+ /* The aliasing of attributes for NV vertex programs has already
+ * occurred. NV vertex programs cannot access material values,
+ * nor attributes greater than VERT_ATTRIB_TEX7.
+ */
+ for (attr = 0; attr < 16; attr++) {
+ exec->vtx.inputs[attr] = &vbo->legacy_currval[attr];
+ ASSERT(attr + 16 < Elements(exec->vtx.inputs));
+ exec->vtx.inputs[attr + 16] = &vbo->generic_currval[attr];
+ }
+ map = vbo->map_vp_arb;
+
+ /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
+ * In that case we effectively need to route the data from
+ * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
+ */
+ if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
+ (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
+ exec->vtx.inputs[16] = exec->vtx.inputs[0];
+ exec->vtx.attrsz[16] = exec->vtx.attrsz[0];
+ exec->vtx.attrptr[16] = exec->vtx.attrptr[0];
+ exec->vtx.attrsz[0] = 0;
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ /* Make all active attributes (including edgeflag) available as
+ * arrays of floats.
+ */
+ for (attr = 0; attr < VERT_ATTRIB_MAX ; attr++) {
+ const GLuint src = map[attr];
+
+ if (exec->vtx.attrsz[src]) {
+ GLsizeiptr offset = (GLbyte *)exec->vtx.attrptr[src] -
+ (GLbyte *)exec->vtx.vertex;
+
+ /* override the default array set above */
+ ASSERT(attr < Elements(exec->vtx.inputs));
+ ASSERT(attr < Elements(exec->vtx.arrays)); /* arrays[] */
+ exec->vtx.inputs[attr] = &arrays[attr];
+
+ if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
+ /* a real buffer obj: Ptr is an offset, not a pointer*/
+ assert(exec->vtx.bufferobj->Pointer); /* buf should be mapped */
+ assert(offset >= 0);
+ arrays[attr].Ptr = (GLubyte *)exec->vtx.bufferobj->Offset + offset;
+ }
+ else {
+ /* Ptr into ordinary app memory */
+ arrays[attr].Ptr = (GLubyte *)exec->vtx.buffer_map + offset;
+ }
+ arrays[attr].Size = exec->vtx.attrsz[src];
+ arrays[attr].StrideB = exec->vtx.vertex_size * sizeof(GLfloat);
+ arrays[attr].Stride = exec->vtx.vertex_size * sizeof(GLfloat);
+ arrays[attr].Type = GL_FLOAT;
+ arrays[attr].Format = GL_RGBA;
+ arrays[attr].Enabled = 1;
+ _mesa_reference_buffer_object(ctx,
+ &arrays[attr].BufferObj,
+ exec->vtx.bufferobj);
+ arrays[attr]._MaxElement = count; /* ??? */
+
+ varying_inputs |= 1 << attr;
+ ctx->NewState |= _NEW_ARRAY;
+ }
+ }
+
+ _mesa_set_varying_vp_inputs( ctx, varying_inputs );
+}
+
+
+static void
+vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
+{
+ GLenum target = GL_ARRAY_BUFFER_ARB;
+
+ if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
+ struct gl_context *ctx = exec->ctx;
+
+ if (ctx->Driver.FlushMappedBufferRange) {
+ GLintptr offset = exec->vtx.buffer_used - exec->vtx.bufferobj->Offset;
+ GLsizeiptr length = (exec->vtx.buffer_ptr - exec->vtx.buffer_map) * sizeof(float);
+
+ if (length)
+ ctx->Driver.FlushMappedBufferRange(ctx, target,
+ offset, length,
+ exec->vtx.bufferobj);
+ }
+
+ exec->vtx.buffer_used += (exec->vtx.buffer_ptr -
+ exec->vtx.buffer_map) * sizeof(float);
+
+ assert(exec->vtx.buffer_used <= VBO_VERT_BUFFER_SIZE);
+ assert(exec->vtx.buffer_ptr != NULL);
+
+ ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj);
+ exec->vtx.buffer_map = NULL;
+ exec->vtx.buffer_ptr = NULL;
+ exec->vtx.max_vert = 0;
+ }
+}
+
+
+void
+vbo_exec_vtx_map( struct vbo_exec_context *exec )
+{
+ struct gl_context *ctx = exec->ctx;
+ const GLenum target = GL_ARRAY_BUFFER_ARB;
+ const GLenum access = GL_READ_WRITE_ARB; /* for MapBuffer */
+ const GLenum accessRange = GL_MAP_WRITE_BIT | /* for MapBufferRange */
+ GL_MAP_INVALIDATE_RANGE_BIT |
+ GL_MAP_UNSYNCHRONIZED_BIT |
+ GL_MAP_FLUSH_EXPLICIT_BIT |
+ MESA_MAP_NOWAIT_BIT;
+ const GLenum usage = GL_STREAM_DRAW_ARB;
+
+ if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
+ return;
+
+ if (exec->vtx.buffer_map != NULL) {
+ assert(0);
+ exec->vtx.buffer_map = NULL;
+ exec->vtx.buffer_ptr = NULL;
+ }
+
+ if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
+ ctx->Driver.MapBufferRange) {
+ exec->vtx.buffer_map =
+ (GLfloat *)ctx->Driver.MapBufferRange(ctx,
+ target,
+ exec->vtx.buffer_used,
+ (VBO_VERT_BUFFER_SIZE -
+ exec->vtx.buffer_used),
+ accessRange,
+ exec->vtx.bufferobj);
+ exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+ }
+
+ if (!exec->vtx.buffer_map) {
+ exec->vtx.buffer_used = 0;
+
+ ctx->Driver.BufferData(ctx, target,
+ VBO_VERT_BUFFER_SIZE,
+ NULL, usage, exec->vtx.bufferobj);
+
+
+ if (ctx->Driver.MapBufferRange)
+ exec->vtx.buffer_map =
+ (GLfloat *)ctx->Driver.MapBufferRange(ctx, target,
+ 0, VBO_VERT_BUFFER_SIZE,
+ accessRange,
+ exec->vtx.bufferobj);
+ if (!exec->vtx.buffer_map)
+ exec->vtx.buffer_map =
+ (GLfloat *)ctx->Driver.MapBuffer(ctx, target, access, exec->vtx.bufferobj);
+ assert(exec->vtx.buffer_map);
+ exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+ }
+
+ if (0)
+ printf("map %d..\n", exec->vtx.buffer_used);
+}
+
+
+
+/**
+ * Execute the buffer and save copied verts.
+ */
+void
+vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
+{
+ if (0)
+ vbo_exec_debug_verts( exec );
+
+ if (exec->vtx.prim_count &&
+ exec->vtx.vert_count) {
+
+ exec->vtx.copied.nr = vbo_copy_vertices( exec );
+
+ if (exec->vtx.copied.nr != exec->vtx.vert_count) {
+ struct gl_context *ctx = exec->ctx;
+
+ /* Before the update_state() as this may raise _NEW_ARRAY
+ * from _mesa_set_varying_vp_inputs().
+ */
+ vbo_exec_bind_arrays( ctx );
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (_mesa_is_bufferobj(exec->vtx.bufferobj)) {
+ vbo_exec_vtx_unmap( exec );
+ }
+
+ if (0)
+ printf("%s %d %d\n", __FUNCTION__, exec->vtx.prim_count,
+ exec->vtx.vert_count);
+
+ vbo_context(ctx)->draw_prims( ctx,
+ exec->vtx.inputs,
+ exec->vtx.prim,
+ exec->vtx.prim_count,
+ NULL,
+ GL_TRUE,
+ 0,
+ exec->vtx.vert_count - 1);
+
+ /* If using a real VBO, get new storage -- unless asked not to.
+ */
+ if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) {
+ vbo_exec_vtx_map( exec );
+ }
+ }
+ }
+
+ /* May have to unmap explicitly if we didn't draw:
+ */
+ if (unmap &&
+ _mesa_is_bufferobj(exec->vtx.bufferobj) &&
+ exec->vtx.buffer_map) {
+ vbo_exec_vtx_unmap( exec );
+ }
+
+
+ if (unmap || exec->vtx.vertex_size == 0)
+ exec->vtx.max_vert = 0;
+ else
+ exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) /
+ (exec->vtx.vertex_size * sizeof(GLfloat)));
+
+ exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+ exec->vtx.prim_count = 0;
+ exec->vtx.vert_count = 0;
+}
+
+
+#endif /* FEATURE_beginend */
diff --git a/mesalib/src/mesa/vbo/vbo_save_draw.c b/mesalib/src/mesa/vbo/vbo_save_draw.c
index 6d8dbdb86..634a6d3f8 100644
--- a/mesalib/src/mesa/vbo/vbo_save_draw.c
+++ b/mesalib/src/mesa/vbo/vbo_save_draw.c
@@ -1,304 +1,305 @@
-/*
- * Mesa 3-D graphics library
- * Version: 7.2
- *
- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* Author:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/imports.h"
-#include "main/mfeatures.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/light.h"
-#include "main/state.h"
-
-#include "vbo_context.h"
-
-
-#if FEATURE_dlist
-
-
-/**
- * After playback, copy everything but the position from the
- * last vertex to the saved state
- */
-static void
-_playback_copy_to_current(struct gl_context *ctx,
- const struct vbo_save_vertex_list *node)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- GLfloat vertex[VBO_ATTRIB_MAX * 4];
- GLfloat *data;
- GLuint i, offset;
-
- if (node->current_size == 0)
- return;
-
- if (node->current_data) {
- data = node->current_data;
- }
- else {
- data = vertex;
-
- if (node->count)
- offset = (node->buffer_offset +
- (node->count-1) * node->vertex_size * sizeof(GLfloat));
- else
- offset = node->buffer_offset;
-
- ctx->Driver.GetBufferSubData( ctx, 0, offset,
- node->vertex_size * sizeof(GLfloat),
- data, node->vertex_store->bufferobj );
-
- data += node->attrsz[0]; /* skip vertex position */
- }
-
- for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
- if (node->attrsz[i]) {
- GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
- GLfloat tmp[4];
-
- COPY_CLEAN_4V(tmp,
- node->attrsz[i],
- data);
-
- if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) {
- memcpy(current, tmp, 4 * sizeof(GLfloat));
-
- vbo->currval[i].Size = node->attrsz[i];
-
- if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
- i <= VBO_ATTRIB_LAST_MATERIAL)
- ctx->NewState |= _NEW_LIGHT;
-
- ctx->NewState |= _NEW_CURRENT_ATTRIB;
- }
-
- data += node->attrsz[i];
- }
- }
-
- /* Colormaterial -- this kindof sucks.
- */
- if (ctx->Light.ColorMaterialEnabled) {
- _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]);
- }
-
- /* CurrentExecPrimitive
- */
- if (node->prim_count) {
- const struct _mesa_prim *prim = &node->prim[node->prim_count - 1];
- if (prim->end)
- ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
- else
- ctx->Driver.CurrentExecPrimitive = prim->mode;
- }
-}
-
-
-
-/**
- * Treat the vertex storage as a VBO, define vertex arrays pointing
- * into it:
- */
-static void vbo_bind_vertex_list(struct gl_context *ctx,
- const struct vbo_save_vertex_list *node)
-{
- struct vbo_context *vbo = vbo_context(ctx);
- struct vbo_save_context *save = &vbo->save;
- struct gl_client_array *arrays = save->arrays;
- GLuint buffer_offset = node->buffer_offset;
- const GLuint *map;
- GLuint attr;
- GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */
- GLbitfield varying_inputs = 0x0;
-
- memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
-
- /* Install the default (ie Current) attributes first, then overlay
- * all active ones.
- */
- switch (get_program_mode(ctx)) {
- case VP_NONE:
- for (attr = 0; attr < 16; attr++) {
- save->inputs[attr] = &vbo->legacy_currval[attr];
- }
- for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
- save->inputs[attr + 16] = &vbo->mat_currval[attr];
- }
- map = vbo->map_vp_none;
- break;
- case VP_NV:
- case VP_ARB:
- /* The aliasing of attributes for NV vertex programs has already
- * occurred. NV vertex programs cannot access material values,
- * nor attributes greater than VERT_ATTRIB_TEX7.
- */
- for (attr = 0; attr < 16; attr++) {
- save->inputs[attr] = &vbo->legacy_currval[attr];
- save->inputs[attr + 16] = &vbo->generic_currval[attr];
- }
- map = vbo->map_vp_arb;
-
- /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
- * In that case we effectively need to route the data from
- * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
- */
- if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
- (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
- save->inputs[16] = save->inputs[0];
- node_attrsz[16] = node_attrsz[0];
- node_attrsz[0] = 0;
- }
- break;
- default:
- assert(0);
- }
-
- for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
- const GLuint src = map[attr];
-
- if (node_attrsz[src]) {
- /* override the default array set above */
- save->inputs[attr] = &arrays[attr];
-
- arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset;
- arrays[attr].Size = node->attrsz[src];
- arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat);
- arrays[attr].Stride = node->vertex_size * sizeof(GLfloat);
- arrays[attr].Type = GL_FLOAT;
- arrays[attr].Format = GL_RGBA;
- arrays[attr].Enabled = 1;
- _mesa_reference_buffer_object(ctx,
- &arrays[attr].BufferObj,
- node->vertex_store->bufferobj);
- arrays[attr]._MaxElement = node->count; /* ??? */
-
- assert(arrays[attr].BufferObj->Name);
-
- buffer_offset += node->attrsz[src] * sizeof(GLfloat);
- varying_inputs |= 1<<attr;
- }
- }
-
- _mesa_set_varying_vp_inputs( ctx, varying_inputs );
-}
-
-
-static void
-vbo_save_loopback_vertex_list(struct gl_context *ctx,
- const struct vbo_save_vertex_list *list)
-{
- const char *buffer = ctx->Driver.MapBuffer(ctx,
- GL_ARRAY_BUFFER_ARB,
- GL_READ_ONLY, /* ? */
- list->vertex_store->bufferobj);
-
- vbo_loopback_vertex_list(ctx,
- (const GLfloat *)(buffer + list->buffer_offset),
- list->attrsz,
- list->prim,
- list->prim_count,
- list->wrap_count,
- list->vertex_size);
-
- ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
- list->vertex_store->bufferobj);
-}
-
-
-/**
- * Execute the buffer and save copied verts.
- * This is called from the display list code when executing
- * a drawing command.
- */
-void
-vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
-{
- const struct vbo_save_vertex_list *node =
- (const struct vbo_save_vertex_list *) data;
- struct vbo_save_context *save = &vbo_context(ctx)->save;
-
- FLUSH_CURRENT(ctx, 0);
-
- if (node->prim_count > 0 && node->count > 0) {
-
- if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END &&
- node->prim[0].begin) {
-
- /* Degenerate case: list is called inside begin/end pair and
- * includes operations such as glBegin or glDrawArrays.
- */
- if (0)
- printf("displaylist recursive begin");
-
- vbo_save_loopback_vertex_list( ctx, node );
- return;
- }
- else if (save->replay_flags) {
- /* Various degnerate cases: translate into immediate mode
- * calls rather than trying to execute in place.
- */
- vbo_save_loopback_vertex_list( ctx, node );
- return;
- }
-
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- /* XXX also need to check if shader enabled, but invalid */
- if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) ||
- (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glBegin (invalid vertex/fragment program)");
- return;
- }
-
- vbo_bind_vertex_list( ctx, node );
-
- /* Again...
- */
- if (ctx->NewState)
- _mesa_update_state( ctx );
-
- vbo_context(ctx)->draw_prims(ctx,
- save->inputs,
- node->prim,
- node->prim_count,
- NULL,
- GL_TRUE,
- 0, /* Node is a VBO, so this is ok */
- node->count - 1);
- }
-
- /* Copy to current?
- */
- _playback_copy_to_current( ctx, node );
-}
-
-
-#endif /* FEATURE_dlist */
+/*
+ * Mesa 3-D graphics library
+ * Version: 7.2
+ *
+ * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Author:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/imports.h"
+#include "main/mfeatures.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/light.h"
+#include "main/state.h"
+
+#include "vbo_context.h"
+
+
+#if FEATURE_dlist
+
+
+/**
+ * After playback, copy everything but the position from the
+ * last vertex to the saved state
+ */
+static void
+_playback_copy_to_current(struct gl_context *ctx,
+ const struct vbo_save_vertex_list *node)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ GLfloat vertex[VBO_ATTRIB_MAX * 4];
+ GLfloat *data;
+ GLuint i, offset;
+
+ if (node->current_size == 0)
+ return;
+
+ if (node->current_data) {
+ data = node->current_data;
+ }
+ else {
+ data = vertex;
+
+ if (node->count)
+ offset = (node->buffer_offset +
+ (node->count-1) * node->vertex_size * sizeof(GLfloat));
+ else
+ offset = node->buffer_offset;
+
+ ctx->Driver.GetBufferSubData( ctx, 0, offset,
+ node->vertex_size * sizeof(GLfloat),
+ data, node->vertex_store->bufferobj );
+
+ data += node->attrsz[0]; /* skip vertex position */
+ }
+
+ for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
+ if (node->attrsz[i]) {
+ GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+ GLfloat tmp[4];
+
+ COPY_CLEAN_4V(tmp,
+ node->attrsz[i],
+ data);
+
+ if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) {
+ memcpy(current, tmp, 4 * sizeof(GLfloat));
+
+ vbo->currval[i].Size = node->attrsz[i];
+
+ if (i >= VBO_ATTRIB_FIRST_MATERIAL &&
+ i <= VBO_ATTRIB_LAST_MATERIAL)
+ ctx->NewState |= _NEW_LIGHT;
+
+ ctx->NewState |= _NEW_CURRENT_ATTRIB;
+ }
+
+ data += node->attrsz[i];
+ }
+ }
+
+ /* Colormaterial -- this kindof sucks.
+ */
+ if (ctx->Light.ColorMaterialEnabled) {
+ _mesa_update_color_material(ctx, ctx->Current.Attrib[VBO_ATTRIB_COLOR0]);
+ }
+
+ /* CurrentExecPrimitive
+ */
+ if (node->prim_count) {
+ const struct _mesa_prim *prim = &node->prim[node->prim_count - 1];
+ if (prim->end)
+ ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
+ else
+ ctx->Driver.CurrentExecPrimitive = prim->mode;
+ }
+}
+
+
+
+/**
+ * Treat the vertex storage as a VBO, define vertex arrays pointing
+ * into it:
+ */
+static void vbo_bind_vertex_list(struct gl_context *ctx,
+ const struct vbo_save_vertex_list *node)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+ struct vbo_save_context *save = &vbo->save;
+ struct gl_client_array *arrays = save->arrays;
+ GLuint buffer_offset = node->buffer_offset;
+ const GLuint *map;
+ GLuint attr;
+ GLubyte node_attrsz[VBO_ATTRIB_MAX]; /* copy of node->attrsz[] */
+ GLbitfield varying_inputs = 0x0;
+
+ memcpy(node_attrsz, node->attrsz, sizeof(node->attrsz));
+
+ /* Install the default (ie Current) attributes first, then overlay
+ * all active ones.
+ */
+ switch (get_program_mode(ctx)) {
+ case VP_NONE:
+ for (attr = 0; attr < 16; attr++) {
+ save->inputs[attr] = &vbo->legacy_currval[attr];
+ }
+ for (attr = 0; attr < MAT_ATTRIB_MAX; attr++) {
+ save->inputs[attr + 16] = &vbo->mat_currval[attr];
+ }
+ map = vbo->map_vp_none;
+ break;
+ case VP_NV:
+ case VP_ARB:
+ /* The aliasing of attributes for NV vertex programs has already
+ * occurred. NV vertex programs cannot access material values,
+ * nor attributes greater than VERT_ATTRIB_TEX7.
+ */
+ for (attr = 0; attr < 16; attr++) {
+ save->inputs[attr] = &vbo->legacy_currval[attr];
+ save->inputs[attr + 16] = &vbo->generic_currval[attr];
+ }
+ map = vbo->map_vp_arb;
+
+ /* check if VERT_ATTRIB_POS is not read but VERT_BIT_GENERIC0 is read.
+ * In that case we effectively need to route the data from
+ * glVertexAttrib(0, val) calls to feed into the GENERIC0 input.
+ */
+ if ((ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_POS) == 0 &&
+ (ctx->VertexProgram._Current->Base.InputsRead & VERT_BIT_GENERIC0)) {
+ save->inputs[16] = save->inputs[0];
+ node_attrsz[16] = node_attrsz[0];
+ node_attrsz[0] = 0;
+ }
+ break;
+ default:
+ assert(0);
+ }
+
+ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
+ const GLuint src = map[attr];
+
+ if (node_attrsz[src]) {
+ /* override the default array set above */
+ save->inputs[attr] = &arrays[attr];
+
+ arrays[attr].Ptr = (const GLubyte *) NULL + buffer_offset;
+ arrays[attr].Size = node->attrsz[src];
+ arrays[attr].StrideB = node->vertex_size * sizeof(GLfloat);
+ arrays[attr].Stride = node->vertex_size * sizeof(GLfloat);
+ arrays[attr].Type = GL_FLOAT;
+ arrays[attr].Format = GL_RGBA;
+ arrays[attr].Enabled = 1;
+ _mesa_reference_buffer_object(ctx,
+ &arrays[attr].BufferObj,
+ node->vertex_store->bufferobj);
+ arrays[attr]._MaxElement = node->count; /* ??? */
+
+ assert(arrays[attr].BufferObj->Name);
+
+ buffer_offset += node->attrsz[src] * sizeof(GLfloat);
+ varying_inputs |= 1<<attr;
+ ctx->NewState |= _NEW_ARRAY;
+ }
+ }
+
+ _mesa_set_varying_vp_inputs( ctx, varying_inputs );
+}
+
+
+static void
+vbo_save_loopback_vertex_list(struct gl_context *ctx,
+ const struct vbo_save_vertex_list *list)
+{
+ const char *buffer = ctx->Driver.MapBuffer(ctx,
+ GL_ARRAY_BUFFER_ARB,
+ GL_READ_ONLY, /* ? */
+ list->vertex_store->bufferobj);
+
+ vbo_loopback_vertex_list(ctx,
+ (const GLfloat *)(buffer + list->buffer_offset),
+ list->attrsz,
+ list->prim,
+ list->prim_count,
+ list->wrap_count,
+ list->vertex_size);
+
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER_ARB,
+ list->vertex_store->bufferobj);
+}
+
+
+/**
+ * Execute the buffer and save copied verts.
+ * This is called from the display list code when executing
+ * a drawing command.
+ */
+void
+vbo_save_playback_vertex_list(struct gl_context *ctx, void *data)
+{
+ const struct vbo_save_vertex_list *node =
+ (const struct vbo_save_vertex_list *) data;
+ struct vbo_save_context *save = &vbo_context(ctx)->save;
+
+ FLUSH_CURRENT(ctx, 0);
+
+ if (node->prim_count > 0 && node->count > 0) {
+
+ if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END &&
+ node->prim[0].begin) {
+
+ /* Degenerate case: list is called inside begin/end pair and
+ * includes operations such as glBegin or glDrawArrays.
+ */
+ if (0)
+ printf("displaylist recursive begin");
+
+ vbo_save_loopback_vertex_list( ctx, node );
+ return;
+ }
+ else if (save->replay_flags) {
+ /* Various degnerate cases: translate into immediate mode
+ * calls rather than trying to execute in place.
+ */
+ vbo_save_loopback_vertex_list( ctx, node );
+ return;
+ }
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ /* XXX also need to check if shader enabled, but invalid */
+ if ((ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) ||
+ (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "glBegin (invalid vertex/fragment program)");
+ return;
+ }
+
+ vbo_bind_vertex_list( ctx, node );
+
+ /* Again...
+ */
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ vbo_context(ctx)->draw_prims(ctx,
+ save->inputs,
+ node->prim,
+ node->prim_count,
+ NULL,
+ GL_TRUE,
+ 0, /* Node is a VBO, so this is ok */
+ node->count - 1);
+ }
+
+ /* Copy to current?
+ */
+ _playback_copy_to_current( ctx, node );
+}
+
+
+#endif /* FEATURE_dlist */
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 63b08c1fb..062c58a89 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS = pixman test
+SUBDIRS = pixman test demos
pkgconfigdir=$(libdir)/pkgconfig
pkgconfig_DATA=pixman-1.pc
diff --git a/pixman/configure.ac b/pixman/configure.ac
index ab2ecde1b..5242799bb 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -794,6 +794,7 @@ AC_OUTPUT([pixman-1.pc
Makefile
pixman/Makefile
pixman/pixman-version.h
+ demos/Makefile
test/Makefile])
m4_if(m4_eval(pixman_minor % 2), [1], [
diff --git a/pixman/demos/Makefile.am b/pixman/demos/Makefile.am
new file mode 100644
index 000000000..2dcdfd350
--- /dev/null
+++ b/pixman/demos/Makefile.am
@@ -0,0 +1,34 @@
+if HAVE_GTK
+
+AM_CFLAGS = @OPENMP_CFLAGS@
+AM_LDFLAGS = @OPENMP_CFLAGS@
+
+LDADD = $(GTK_LIBS) $(top_builddir)/pixman/libpixman-1.la -lm
+INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(GTK_CFLAGS)
+
+GTK_UTILS = gtk-utils.c gtk-utils.h
+
+DEMOS = \
+ clip-test \
+ clip-in \
+ composite-test \
+ gradient-test \
+ radial-test \
+ alpha-test \
+ screen-test \
+ convolution-test \
+ trap-test
+
+gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
+alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
+composite_test_SOURCES = composite-test.c $(GTK_UTILS)
+clip_test_SOURCES = clip-test.c $(GTK_UTILS)
+clip_in_SOURCES = clip-in.c $(GTK_UTILS)
+trap_test_SOURCES = trap-test.c $(GTK_UTILS)
+screen_test_SOURCES = screen-test.c $(GTK_UTILS)
+convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
+radial_test_SOURCES = radial-test.c ../test/utils.c ../test/utils.h $(GTK_UTILS)
+
+noinst_PROGRAMS = $(DEMOS)
+
+endif
diff --git a/pixman/test/alpha-test.c b/pixman/demos/alpha-test.c
index 92c208142..92c208142 100644
--- a/pixman/test/alpha-test.c
+++ b/pixman/demos/alpha-test.c
diff --git a/pixman/test/clip-in.c b/pixman/demos/clip-in.c
index 51579811f..51579811f 100644
--- a/pixman/test/clip-in.c
+++ b/pixman/demos/clip-in.c
diff --git a/pixman/test/clip-test.c b/pixman/demos/clip-test.c
index aa0df4482..aa0df4482 100644
--- a/pixman/test/clip-test.c
+++ b/pixman/demos/clip-test.c
diff --git a/pixman/test/composite-test.c b/pixman/demos/composite-test.c
index 79d5d5eac..79d5d5eac 100644
--- a/pixman/test/composite-test.c
+++ b/pixman/demos/composite-test.c
diff --git a/pixman/test/convolution-test.c b/pixman/demos/convolution-test.c
index da284af7b..da284af7b 100644
--- a/pixman/test/convolution-test.c
+++ b/pixman/demos/convolution-test.c
diff --git a/pixman/test/gradient-test.c b/pixman/demos/gradient-test.c
index fc84844b0..fc84844b0 100644
--- a/pixman/test/gradient-test.c
+++ b/pixman/demos/gradient-test.c
diff --git a/pixman/test/gtk-utils.c b/pixman/demos/gtk-utils.c
index f45cdc912..f45cdc912 100644
--- a/pixman/test/gtk-utils.c
+++ b/pixman/demos/gtk-utils.c
diff --git a/pixman/test/gtk-utils.h b/pixman/demos/gtk-utils.h
index 2cb13bcf0..2cb13bcf0 100644
--- a/pixman/test/gtk-utils.h
+++ b/pixman/demos/gtk-utils.h
diff --git a/pixman/test/radial-test.c b/pixman/demos/radial-test.c
index 5d716c339..35e90d786 100644
--- a/pixman/test/radial-test.c
+++ b/pixman/demos/radial-test.c
@@ -1,4 +1,4 @@
-#include "utils.h"
+#include "../test/utils.h"
#include "gtk-utils.h"
#define NUM_GRADIENTS 7
diff --git a/pixman/test/screen-test.c b/pixman/demos/screen-test.c
index e69dba3de..e69dba3de 100644
--- a/pixman/test/screen-test.c
+++ b/pixman/demos/screen-test.c
diff --git a/pixman/test/trap-test.c b/pixman/demos/trap-test.c
index 19295e7a5..19295e7a5 100644
--- a/pixman/test/trap-test.c
+++ b/pixman/demos/trap-test.c
diff --git a/pixman/pixman/pixman-arm-common.h b/pixman/pixman/pixman-arm-common.h
index 372e9f9a8..9b1322b6c 100644
--- a/pixman/pixman/pixman-arm-common.h
+++ b/pixman/pixman/pixman-arm-common.h
@@ -282,19 +282,20 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
src_type, dst_type) \
void \
pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x);\
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
\
static force_inline void \
scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
- src_type * ps, \
+ const src_type * ps, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
{ \
pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
vx, unit_x);\
@@ -316,4 +317,48 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \
SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ const uint8_t * mask); \
+ \
+static force_inline void \
+scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \
+ dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
+ vx, unit_x, \
+ mask); \
+} \
+ \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
#endif
diff --git a/pixman/pixman/pixman-arm-neon-asm.S b/pixman/pixman/pixman-arm-neon-asm.S
index 51533d42f..47daf457c 100644
--- a/pixman/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman/pixman-arm-neon-asm.S
@@ -1,2365 +1,2393 @@
-/*
- * Copyright © 2009 Nokia Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
- */
-
-/*
- * This file contains implementations of NEON optimized pixel processing
- * functions. There is no full and detailed tutorial, but some functions
- * (those which are exposing some new or interesting features) are
- * extensively commented and can be used as examples.
- *
- * You may want to have a look at the comments for following functions:
- * - pixman_composite_over_8888_0565_asm_neon
- * - pixman_composite_over_n_8_0565_asm_neon
- */
-
-/* Prevent the stack from becoming executable for no reason... */
-#if defined(__linux__) && defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
-
- .text
- .fpu neon
- .arch armv7a
- .object_arch armv4
- .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
- .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
- .arm
- .altmacro
-
-#include "pixman-arm-neon-asm.h"
-
-/* Global configuration options and preferences */
-
-/*
- * The code can optionally make use of unaligned memory accesses to improve
- * performance of handling leading/trailing pixels for each scanline.
- * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
- * example in linux if unaligned memory accesses are not configured to
- * generate.exceptions.
- */
-.set RESPECT_STRICT_ALIGNMENT, 1
-
-/*
- * Set default prefetch type. There is a choice between the following options:
- *
- * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
- * as NOP to workaround some HW bugs or for whatever other reason)
- *
- * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
- * advanced prefetch intruduces heavy overhead)
- *
- * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
- * which can run ARM and NEON instructions simultaneously so that extra ARM
- * instructions do not add (many) extra cycles, but improve prefetch efficiency)
- *
- * Note: some types of function can't support advanced prefetch and fallback
- * to simple one (those which handle 24bpp pixels)
- */
-.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
-
-/* Prefetch distance in pixels for simple prefetch */
-.set PREFETCH_DISTANCE_SIMPLE, 64
-
-/*
- * Implementation of pixman_composite_over_8888_0565_asm_neon
- *
- * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
- * performs OVER compositing operation. Function fast_composite_over_8888_0565
- * from pixman-fast-path.c does the same in C and can be used as a reference.
- *
- * First we need to have some NEON assembly code which can do the actual
- * operation on the pixels and provide it to the template macro.
- *
- * Template macro quite conveniently takes care of emitting all the necessary
- * code for memory reading and writing (including quite tricky cases of
- * handling unaligned leading/trailing pixels), so we only need to deal with
- * the data in NEON registers.
- *
- * NEON registers allocation in general is recommented to be the following:
- * d0, d1, d2, d3 - contain loaded source pixel data
- * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed)
- * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
- * d28, d29, d30, d31 - place for storing the result (destination pixels)
- *
- * As can be seen above, four 64-bit NEON registers are used for keeping
- * intermediate pixel data and up to 8 pixels can be processed in one step
- * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
- *
- * This particular function uses the following registers allocation:
- * d0, d1, d2, d3 - contain loaded source pixel data
- * d4, d5 - contain loaded destination pixels (they are needed)
- * d28, d29 - place for storing the result (destination pixels)
- */
-
-/*
- * Step one. We need to have some code to do some arithmetics on pixel data.
- * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
- * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
- * perform all the needed calculations and write the result to {d28, d29}.
- * The rationale for having two macros and not just one will be explained
- * later. In practice, any single monolitic function which does the work can
- * be split into two parts in any arbitrary way without affecting correctness.
- *
- * There is one special trick here too. Common template macro can optionally
- * make our life a bit easier by doing R, G, B, A color components
- * deinterleaving for 32bpp pixel formats (and this feature is used in
- * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
- * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
- * actually use d0 register for blue channel (a vector of eight 8-bit
- * values), d1 register for green, d2 for red and d3 for alpha. This
- * simple conversion can be also done with a few NEON instructions:
- *
- * Packed to planar conversion:
- * vuzp.8 d0, d1
- * vuzp.8 d2, d3
- * vuzp.8 d1, d3
- * vuzp.8 d0, d2
- *
- * Planar to packed conversion:
- * vzip.8 d0, d2
- * vzip.8 d1, d3
- * vzip.8 d2, d3
- * vzip.8 d0, d1
- *
- * But pixel can be loaded directly in planar format using VLD4.8 NEON
- * instruction. It is 1 cycle slower than VLD1.32, so this is not always
- * desirable, that's why deinterleaving is optional.
- *
- * But anyway, here is the code:
- */
-.macro pixman_composite_over_8888_0565_process_pixblock_head
- /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
- and put data into d6 - red, d7 - green, d30 - blue */
- vshrn.u16 d6, q2, #8
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vsri.u8 d6, d6, #5
- vmvn.8 d3, d3 /* invert source alpha */
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- /* now do alpha blending, storing results in 8-bit planar format
- into d16 - red, d19 - green, d18 - blue */
- vmull.u8 q10, d3, d6
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- vrshr.u16 q13, q10, #8
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail
- /* ... continue alpha blending */
- vqadd.u8 d16, d2, d20
- vqadd.u8 q9, q0, q11
- /* convert the result to r5g6b5 and store it into {d28, d29} */
- vshll.u8 q14, d16, #8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-/*
- * OK, now we got almost everything that we need. Using the above two
- * macros, the work can be done right. But now we want to optimize
- * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
- * a lot from good code scheduling and software pipelining.
- *
- * Let's construct some code, which will run in the core main loop.
- * Some pseudo-code of the main loop will look like this:
- * head
- * while (...) {
- * tail
- * head
- * }
- * tail
- *
- * It may look a bit weird, but this setup allows to hide instruction
- * latencies better and also utilize dual-issue capability more
- * efficiently (make pairs of load-store and ALU instructions).
- *
- * So what we need now is a '*_tail_head' macro, which will be used
- * in the core main loop. A trivial straightforward implementation
- * of this macro would look like this:
- *
- * pixman_composite_over_8888_0565_process_pixblock_tail
- * vst1.16 {d28, d29}, [DST_W, :128]!
- * vld1.16 {d4, d5}, [DST_R, :128]!
- * vld4.32 {d0, d1, d2, d3}, [SRC]!
- * pixman_composite_over_8888_0565_process_pixblock_head
- * cache_preload 8, 8
- *
- * Now it also got some VLD/VST instructions. We simply can't move from
- * processing one block of pixels to the other one with just arithmetics.
- * The previously processed data needs to be written to memory and new
- * data needs to be fetched. Fortunately, this main loop does not deal
- * with partial leading/trailing pixels and can load/store a full block
- * of pixels in a bulk. Additionally, destination buffer is already
- * 16 bytes aligned here (which is good for performance).
- *
- * New things here are DST_R, DST_W, SRC and MASK identifiers. These
- * are the aliases for ARM registers which are used as pointers for
- * accessing data. We maintain separate pointers for reading and writing
- * destination buffer (DST_R and DST_W).
- *
- * Another new thing is 'cache_preload' macro. It is used for prefetching
- * data into CPU L2 cache and improve performance when dealing with large
- * images which are far larger than cache size. It uses one argument
- * (actually two, but they need to be the same here) - number of pixels
- * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
- * details about this macro. Moreover, if good performance is needed
- * the code from this macro needs to be copied into '*_tail_head' macro
- * and mixed with the rest of code for optimal instructions scheduling.
- * We are actually doing it below.
- *
- * Now after all the explanations, here is the optimized code.
- * Different instruction streams (originaling from '*_head', '*_tail'
- * and 'cache_preload' macro) use different indentation levels for
- * better readability. Actually taking the code from one of these
- * indentation levels and ignoring a few VLD/VST instructions would
- * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
- * macro!
- */
-
-#if 1
-
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
- vqadd.u8 d16, d2, d20
- vld1.16 {d4, d5}, [DST_R, :128]!
- vqadd.u8 q9, q0, q11
- vshrn.u16 d6, q2, #8
- fetch_src_pixblock
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vshll.u8 q14, d16, #8
- PF add PF_X, PF_X, #8
- vshll.u8 q8, d19, #8
- PF tst PF_CTL, #0xF
- vsri.u8 d6, d6, #5
- PF addne PF_X, PF_X, #8
- vmvn.8 d3, d3
- PF subne PF_CTL, PF_CTL, #1
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- vmull.u8 q10, d3, d6
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vsri.u16 q14, q8, #5
- PF cmp PF_X, ORIG_W
- vshll.u8 q9, d18, #8
- vrshr.u16 q13, q10, #8
- PF subge PF_X, PF_X, ORIG_W
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- PF subges PF_CTL, PF_CTL, #0x10
- vsri.u16 q14, q9, #11
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vraddhn.u16 d22, q12, q15
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-#else
-
-/* If we did not care much about the performance, we would just use this... */
-.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
- pixman_composite_over_8888_0565_process_pixblock_tail
- vst1.16 {d28, d29}, [DST_W, :128]!
- vld1.16 {d4, d5}, [DST_R, :128]!
- fetch_src_pixblock
- pixman_composite_over_8888_0565_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-#endif
-
-/*
- * And now the final part. We are using 'generate_composite_function' macro
- * to put all the stuff together. We are specifying the name of the function
- * which we want to get, number of bits per pixel for the source, mask and
- * destination (0 if unused, like mask in this case). Next come some bit
- * flags:
- * FLAG_DST_READWRITE - tells that the destination buffer is both read
- * and written, for write-only buffer we would use
- * FLAG_DST_WRITEONLY flag instead
- * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
- * and separate color channels for 32bpp format.
- * The next things are:
- * - the number of pixels processed per iteration (8 in this case, because
- * that's the maximum what can fit into four 64-bit NEON registers).
- * - prefetch distance, measured in pixel blocks. In this case it is 5 times
- * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
- * prefetch distance can be selected by running some benchmarks.
- *
- * After that we specify some macros, these are 'default_init',
- * 'default_cleanup' here which are empty (but it is possible to have custom
- * init/cleanup macros to be able to save/restore some extra NEON registers
- * like d8-d15 or do anything else) followed by
- * 'pixman_composite_over_8888_0565_process_pixblock_head',
- * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
- * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
- * which we got implemented above.
- *
- * The last part is the NEON registers allocation scheme.
- */
-generate_composite_function \
- pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_0565_process_pixblock_head, \
- pixman_composite_over_8888_0565_process_pixblock_tail, \
- pixman_composite_over_8888_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_0565_process_pixblock_head
- /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
- and put data into d6 - red, d7 - green, d30 - blue */
- vshrn.u16 d6, q2, #8
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vsri.u8 d6, d6, #5
- vsri.u8 d7, d7, #6
- vshrn.u16 d30, q2, #2
- /* now do alpha blending, storing results in 8-bit planar format
- into d16 - red, d19 - green, d18 - blue */
- vmull.u8 q10, d3, d6
- vmull.u8 q11, d3, d7
- vmull.u8 q12, d3, d30
- vrshr.u16 q13, q10, #8
- vrshr.u16 q3, q11, #8
- vrshr.u16 q15, q12, #8
- vraddhn.u16 d20, q10, q13
- vraddhn.u16 d23, q11, q3
- vraddhn.u16 d22, q12, q15
-.endm
-
-.macro pixman_composite_over_n_0565_process_pixblock_tail
- /* ... continue alpha blending */
- vqadd.u8 d16, d2, d20
- vqadd.u8 q9, q0, q11
- /* convert the result to r5g6b5 and store it into {d28, d29} */
- vshll.u8 q14, d16, #8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_0565_process_pixblock_tail_head
- pixman_composite_over_n_0565_process_pixblock_tail
- vld1.16 {d4, d5}, [DST_R, :128]!
- vst1.16 {d28, d29}, [DST_W, :128]!
- pixman_composite_over_n_0565_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
- vmvn.8 d3, d3 /* invert source alpha */
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_0565_init, \
- default_cleanup, \
- pixman_composite_over_n_0565_process_pixblock_head, \
- pixman_composite_over_n_0565_process_pixblock_tail, \
- pixman_composite_over_n_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_0565_process_pixblock_head
- vshll.u8 q8, d1, #8
- vshll.u8 q14, d2, #8
- vshll.u8 q9, d0, #8
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
- vsri.u16 q14, q8, #5
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- fetch_src_pixblock
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vsri.u16 q14, q9, #11
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vshll.u8 q8, d1, #8
- vst1.16 {d28, d29}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vshll.u8 q14, d2, #8
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vshll.u8 q9, d0, #8
-.endm
-
-generate_composite_function \
- pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_0565_process_pixblock_head, \
- pixman_composite_src_8888_0565_process_pixblock_tail, \
- pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_8888_process_pixblock_head
- vshrn.u16 d30, q0, #8
- vshrn.u16 d29, q0, #3
- vsli.u16 q0, q0, #5
- vmov.u8 d31, #255
- vsri.u8 d30, d30, #5
- vsri.u8 d29, d29, #6
- vshrn.u16 d28, q0, #2
-.endm
-
-.macro pixman_composite_src_0565_8888_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
- pixman_composite_src_0565_8888_process_pixblock_tail
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- fetch_src_pixblock
- pixman_composite_src_0565_8888_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_8888_process_pixblock_head, \
- pixman_composite_src_0565_8888_process_pixblock_tail, \
- pixman_composite_src_0565_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_process_pixblock_head
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_add_8_8_process_pixblock_tail_head
- fetch_src_pixblock
- PF add PF_X, PF_X, #32
- PF tst PF_CTL, #0xF
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- PF addne PF_X, PF_X, #32
- PF subne PF_CTL, PF_CTL, #1
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vqadd.u8 q14, q0, q2
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vqadd.u8 q15, q1, q3
-.endm
-
-generate_composite_function \
- pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
- fetch_src_pixblock
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vld1.32 {d4, d5, d6, d7}, [DST_R, :128]!
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vst1.32 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- vqadd.u8 q14, q0, q2
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vqadd.u8 q15, q1, q3
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_process_pixblock_tail, \
- pixman_composite_add_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head
- vmvn.8 d24, d3 /* get inverted alpha */
- /* do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- fetch_src_pixblock
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_head, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8888_process_pixblock_head
- pixman_composite_out_reverse_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail
- pixman_composite_out_reverse_8888_8888_process_pixblock_tail
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- fetch_src_pixblock
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8888_process_pixblock_tail_head
- pixman_composite_over_8888_8888_process_pixblock_tail
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- pixman_composite_over_8888_8888_process_pixblock_head
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_over_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8888_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
- vrshr.u16 q14, q8, #8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- PF cmp PF_X, ORIG_W
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- vld4.8 {d0, d1, d2, d3}, [DST_R, :128]!
- vmvn.8 d22, d3
- PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF subge PF_X, PF_X, ORIG_W
- vmull.u8 q8, d22, d4
- PF subges PF_CTL, PF_CTL, #0x10
- vmull.u8 q9, d22, d5
- vmull.u8 q10, d22, d6
- PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
- vmull.u8 q11, d22, d7
-.endm
-
-.macro pixman_composite_over_reverse_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d7[0]}, [DUMMY]
- vdup.8 d4, d7[0]
- vdup.8 d5, d7[1]
- vdup.8 d6, d7[2]
- vdup.8 d7, d7[3]
-.endm
-
-generate_composite_function \
- pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_reverse_n_8888_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 4, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_head
- vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */
- vmull.u8 q1, d24, d9
- vmull.u8 q6, d24, d10
- vmull.u8 q7, d24, d11
- vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */
- vshrn.u16 d7, q2, #3
- vsli.u16 q2, q2, #5
- vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */
- vrshr.u16 q9, q1, #8
- vrshr.u16 q10, q6, #8
- vrshr.u16 q11, q7, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q6, q10
- vraddhn.u16 d3, q7, q11
- vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */
- vsri.u8 d7, d7, #6
- vmvn.8 d3, d3
- vshrn.u16 d30, q2, #2
- vmull.u8 q8, d3, d6 /* now do alpha blending */
- vmull.u8 q9, d3, d7
- vmull.u8 q10, d3, d30
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
- /* 3 cycle bubble (after vmull.u8) */
- vrshr.u16 q13, q8, #8
- vrshr.u16 q11, q9, #8
- vrshr.u16 q15, q10, #8
- vraddhn.u16 d16, q8, q13
- vraddhn.u16 d27, q9, q11
- vraddhn.u16 d26, q10, q15
- vqadd.u8 d16, d2, d16
- /* 1 cycle bubble */
- vqadd.u8 q9, q0, q13
- vshll.u8 q14, d16, #8 /* convert to 16bpp */
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- /* 1 cycle bubble */
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
- vld1.16 {d4, d5}, [DST_R, :128]!
- vshrn.u16 d6, q2, #8
- fetch_mask_pixblock
- vshrn.u16 d7, q2, #3
- fetch_src_pixblock
- vmull.u8 q6, d24, d10
- vrshr.u16 q13, q8, #8
- vrshr.u16 q11, q9, #8
- vrshr.u16 q15, q10, #8
- vraddhn.u16 d16, q8, q13
- vraddhn.u16 d27, q9, q11
- vraddhn.u16 d26, q10, q15
- vqadd.u8 d16, d2, d16
- vmull.u8 q1, d24, d9
- vqadd.u8 q9, q0, q13
- vshll.u8 q14, d16, #8
- vmull.u8 q0, d24, d8
- vshll.u8 q8, d19, #8
- vshll.u8 q9, d18, #8
- vsri.u16 q14, q8, #5
- vmull.u8 q7, d24, d11
- vsri.u16 q14, q9, #11
-
- cache_preload 8, 8
-
- vsli.u16 q2, q2, #5
- vrshr.u16 q8, q0, #8
- vrshr.u16 q9, q1, #8
- vrshr.u16 q10, q6, #8
- vrshr.u16 q11, q7, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q6, q10
- vraddhn.u16 d3, q7, q11
- vsri.u8 d6, d6, #5
- vsri.u8 d7, d7, #6
- vmvn.8 d3, d3
- vshrn.u16 d30, q2, #2
- vst1.16 {d28, d29}, [DST_W, :128]!
- vmull.u8 q8, d3, d6
- vmull.u8 q9, d3, d7
- vmull.u8 q10, d3, d30
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-/*
- * This function needs a special initialization of solid mask.
- * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
- * offset, split into color components and replicated in d8-d11
- * registers. Additionally, this function needs all the NEON registers,
- * so it has to save d8-d15 registers which are callee saved according
- * to ABI. These registers are restored from 'cleanup' macro. All the
- * other NEON registers are caller saved, so can be clobbered freely
- * without introducing any problems.
- */
-.macro pixman_composite_over_n_8_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_0565_init, \
- pixman_composite_over_n_8_0565_cleanup, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_0565_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vpush {d8-d15}
- vld1.32 {d24[0]}, [DUMMY]
- vdup.8 d24, d24[3]
-.endm
-
-.macro pixman_composite_over_8888_n_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_8888_n_0565_init, \
- pixman_composite_over_8888_n_0565_cleanup, \
- pixman_composite_over_8888_8_0565_process_pixblock_head, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail, \
- pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 24 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0565_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
- vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- cache_preload 16, 16
-.endm
-
-generate_composite_function \
- pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 16, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_0565_process_pixblock_head, \
- pixman_composite_src_0565_0565_process_pixblock_tail, \
- pixman_composite_src_0565_0565_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8_process_pixblock_tail_head
- vst1.8 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #8
- vsli.u64 d0, d0, #16
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
- FLAG_DST_WRITEONLY, \
- 32, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_8_init, \
- pixman_composite_src_n_8_cleanup, \
- pixman_composite_src_n_8_process_pixblock_head, \
- pixman_composite_src_n_8_process_pixblock_tail, \
- pixman_composite_src_n_8_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_0565_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_0565_process_pixblock_tail_head
- vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_0565_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #16
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_0565_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 16, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_0565_init, \
- pixman_composite_src_n_0565_cleanup, \
- pixman_composite_src_n_0565_process_pixblock_head, \
- pixman_composite_src_n_0565_process_pixblock_tail, \
- pixman_composite_src_n_0565_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_n_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_src_n_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d0[0]}, [DUMMY]
- vsli.u64 d0, d0, #32
- vorr d1, d0, d0
- vorr q1, q0, q0
-.endm
-
-.macro pixman_composite_src_n_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 0, /* prefetch distance */ \
- pixman_composite_src_n_8888_init, \
- pixman_composite_src_n_8888_cleanup, \
- pixman_composite_src_n_8888_process_pixblock_head, \
- pixman_composite_src_n_8888_process_pixblock_tail, \
- pixman_composite_src_n_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_8888_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_8888_process_pixblock_head, \
- pixman_composite_src_8888_8888_process_pixblock_tail, \
- pixman_composite_src_8888_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_x888_8888_process_pixblock_head
- vorr q0, q0, q2
- vorr q1, q1, q2
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
- vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
- fetch_src_pixblock
- vorr q0, q0, q2
- vorr q1, q1, q2
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_x888_8888_init
- vmov.u8 q2, #0xFF
- vshl.u32 q2, q2, #24
-.endm
-
-generate_composite_function \
- pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- pixman_composite_src_x888_8888_init, \
- default_cleanup, \
- pixman_composite_src_x888_8888_process_pixblock_head, \
- pixman_composite_src_x888_8888_process_pixblock_tail, \
- pixman_composite_src_x888_8888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_head
- /* expecting deinterleaved source data in {d8, d9, d10, d11} */
- /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
- /* and destination data in {d4, d5, d6, d7} */
- /* mask is in d24 (d25, d26, d27 are unused) */
-
- /* in */
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d24, d9
- vmull.u8 q6, d24, d10
- vmull.u8 q7, d24, d11
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vmvn.8 d24, d3 /* get inverted alpha */
- /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
- /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
- pixman_composite_over_n_8_8888_process_pixblock_tail
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- cache_preload 8, 8
- pixman_composite_over_n_8_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8_8888_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_8888_init, \
- pixman_composite_over_n_8_8888_cleanup, \
- pixman_composite_over_n_8_8888_process_pixblock_head, \
- pixman_composite_over_n_8_8888_process_pixblock_tail, \
- pixman_composite_over_n_8_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8_8_process_pixblock_head
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d25, d8
- vmull.u8 q6, d26, d8
- vmull.u8 q7, d27, d8
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vmvn.8 q12, q0
- vmvn.8 q13, q1
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d25, d5
- vmull.u8 q10, d26, d6
- vmull.u8 q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8_8_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_n_8_8_process_pixblock_tail
- fetch_mask_pixblock
- cache_preload 32, 32
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- pixman_composite_over_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_n_8_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d8[0]}, [DUMMY]
- vdup.8 d8, d8[3]
-.endm
-
-.macro pixman_composite_over_n_8_8_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8_8_init, \
- pixman_composite_over_n_8_8_cleanup, \
- pixman_composite_over_n_8_8_process_pixblock_head, \
- pixman_composite_over_n_8_8_process_pixblock_tail, \
- pixman_composite_over_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
- /*
- * 'combine_mask_ca' replacement
- *
- * input: solid src (n) in {d8, d9, d10, d11}
- * dest in {d4, d5, d6, d7 }
- * mask in {d24, d25, d26, d27}
- * output: updated src in {d0, d1, d2, d3 }
- * updated mask in {d24, d25, d26, d3 }
- */
- vmull.u8 q0, d24, d8
- vmull.u8 q1, d25, d9
- vmull.u8 q6, d26, d10
- vmull.u8 q7, d27, d11
- vmull.u8 q9, d11, d25
- vmull.u8 q12, d11, d24
- vmull.u8 q13, d11, d26
- vrshr.u16 q8, q0, #8
- vrshr.u16 q10, q1, #8
- vrshr.u16 q11, q6, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q10
- vraddhn.u16 d2, q6, q11
- vrshr.u16 q11, q12, #8
- vrshr.u16 q8, q9, #8
- vrshr.u16 q6, q13, #8
- vrshr.u16 q10, q7, #8
- vraddhn.u16 d24, q12, q11
- vraddhn.u16 d25, q9, q8
- vraddhn.u16 d26, q13, q6
- vraddhn.u16 d3, q7, q10
- /*
- * 'combine_over_ca' replacement
- *
- * output: updated dest in {d28, d29, d30, d31}
- */
- vmvn.8 d24, d24
- vmvn.8 d25, d25
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d25, d5
- vmvn.8 d26, d26
- vmvn.8 d27, d3
- vmull.u8 q10, d26, d6
- vmull.u8 q11, d27, d7
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
- /* ... continue 'combine_over_ca' replacement */
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q6, q10, #8
- vrshr.u16 q7, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q6, q10
- vraddhn.u16 d31, q7, q11
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrshr.u16 q6, q10, #8
- vrshr.u16 q7, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q6, q10
- vraddhn.u16 d31, q7, q11
- fetch_mask_pixblock
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
- cache_preload 8, 8
- pixman_composite_over_n_8888_8888_ca_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d8, d11[0]
- vdup.8 d9, d11[1]
- vdup.8 d10, d11[2]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_over_n_8888_8888_ca_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_n_8888_8888_ca_init, \
- pixman_composite_over_n_8888_8888_ca_cleanup, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
- pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_in_n_8_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* and destination data in {d4, d5, d6, d7} */
- vmull.u8 q8, d4, d3
- vmull.u8 q9, d5, d3
- vmull.u8 q10, d6, d3
- vmull.u8 q11, d7, d3
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q8, q14
- vraddhn.u16 d29, q9, q15
- vraddhn.u16 d30, q10, q12
- vraddhn.u16 d31, q11, q13
-.endm
-
-.macro pixman_composite_in_n_8_process_pixblock_tail_head
- pixman_composite_in_n_8_process_pixblock_tail
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- cache_preload 32, 32
- pixman_composite_in_n_8_process_pixblock_head
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_in_n_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d3, d3[3]
-.endm
-
-.macro pixman_composite_in_n_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_in_n_8_init, \
- pixman_composite_in_n_8_cleanup, \
- pixman_composite_in_n_8_process_pixblock_head, \
- pixman_composite_in_n_8_process_pixblock_tail, \
- pixman_composite_in_n_8_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-.macro pixman_composite_add_n_8_8_process_pixblock_head
- /* expecting source data in {d8, d9, d10, d11} */
- /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
- /* and destination data in {d4, d5, d6, d7} */
- /* mask is in d24, d25, d26, d27 */
- vmull.u8 q0, d24, d11
- vmull.u8 q1, d25, d11
- vmull.u8 q6, d26, d11
- vmull.u8 q7, d27, d11
- vrshr.u16 q10, q0, #8
- vrshr.u16 q11, q1, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q13, q7, #8
- vraddhn.u16 d0, q0, q10
- vraddhn.u16 d1, q1, q11
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d3, q7, q13
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_n_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
- pixman_composite_add_n_8_8_process_pixblock_tail
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- cache_preload 32, 32
- pixman_composite_add_n_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_n_8_8_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vpush {d8-d15}
- vld1.32 {d11[0]}, [DUMMY]
- vdup.8 d11, d11[3]
-.endm
-
-.macro pixman_composite_add_n_8_8_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_n_8_8_init, \
- pixman_composite_add_n_8_8_cleanup, \
- pixman_composite_add_n_8_8_process_pixblock_head, \
- pixman_composite_add_n_8_8_process_pixblock_tail, \
- pixman_composite_add_n_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8_8_8_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* mask in {d24, d25, d26, d27} */
- vmull.u8 q8, d24, d0
- vmull.u8 q9, d25, d1
- vmull.u8 q10, d26, d2
- vmull.u8 q11, d27, d3
- vrshr.u16 q0, q8, #8
- vrshr.u16 q1, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d0, q0, q8
- vraddhn.u16 d1, q1, q9
- vraddhn.u16 d2, q12, q10
- vraddhn.u16 d3, q13, q11
- vqadd.u8 q14, q0, q2
- vqadd.u8 q15, q1, q3
-.endm
-
-.macro pixman_composite_add_8_8_8_process_pixblock_tail
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
- pixman_composite_add_8_8_8_process_pixblock_tail
- vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
- fetch_mask_pixblock
- fetch_src_pixblock
- cache_preload 32, 32
- pixman_composite_add_8_8_8_process_pixblock_head
-.endm
-
-.macro pixman_composite_add_8_8_8_init
-.endm
-
-.macro pixman_composite_add_8_8_8_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
- FLAG_DST_READWRITE, \
- 32, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_8_8_8_init, \
- pixman_composite_add_8_8_8_cleanup, \
- pixman_composite_add_8_8_8_process_pixblock_head, \
- pixman_composite_add_8_8_8_process_pixblock_tail, \
- pixman_composite_add_8_8_8_process_pixblock_tail_head
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* mask in {d24, d25, d26, d27} */
- vmull.u8 q8, d27, d0
- vmull.u8 q9, d27, d1
- vmull.u8 q10, d27, d2
- vmull.u8 q11, d27, d3
- /* 1 cycle bubble */
- vrsra.u16 q8, q8, #8
- vrsra.u16 q9, q9, #8
- vrsra.u16 q10, q10, #8
- vrsra.u16 q11, q11, #8
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
- /* 2 cycle bubble */
- vrshrn.u16 d28, q8, #8
- vrshrn.u16 d29, q9, #8
- vrshrn.u16 d30, q10, #8
- vrshrn.u16 d31, q11, #8
- vqadd.u8 q14, q2, q14
- /* 1 cycle bubble */
- vqadd.u8 q15, q3, q15
-.endm
-
-.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
- fetch_src_pixblock
- vrshrn.u16 d28, q8, #8
- fetch_mask_pixblock
- vrshrn.u16 d29, q9, #8
- vmull.u8 q8, d27, d0
- vrshrn.u16 d30, q10, #8
- vmull.u8 q9, d27, d1
- vrshrn.u16 d31, q11, #8
- vmull.u8 q10, d27, d2
- vqadd.u8 q14, q2, q14
- vmull.u8 q11, d27, d3
- vqadd.u8 q15, q3, q15
- vrsra.u16 q8, q8, #8
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- vrsra.u16 q9, q9, #8
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- vrsra.u16 q10, q10, #8
-
- cache_preload 8, 8
-
- vrsra.u16 q11, q11, #8
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-generate_composite_function \
- pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_n_8_8888_init
- add DUMMY, sp, #ARGS_STACK_OFFSET
- vld1.32 {d3[0]}, [DUMMY]
- vdup.8 d0, d3[0]
- vdup.8 d1, d3[1]
- vdup.8 d2, d3[2]
- vdup.8 d3, d3[3]
-.endm
-
-.macro pixman_composite_add_n_8_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_n_8_8888_init, \
- pixman_composite_add_n_8_8888_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_8888_n_8888_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vld1.32 {d27[0]}, [DUMMY]
- vdup.8 d27, d27[3]
-.endm
-
-.macro pixman_composite_add_8888_n_8888_cleanup
-.endm
-
-generate_composite_function \
- pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_add_8888_n_8888_init, \
- pixman_composite_add_8888_n_8888_cleanup, \
- pixman_composite_add_8888_8888_8888_process_pixblock_head, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
- pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 27 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
- /* expecting source data in {d0, d1, d2, d3} */
- /* destination data in {d4, d5, d6, d7} */
- /* solid mask is in d15 */
-
- /* 'in' */
- vmull.u8 q8, d15, d3
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vrshr.u16 q13, q8, #8
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d3, q8, q13
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
- vmvn.8 d24, d3 /* get inverted alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
- vmull.u8 q11, d24, d7
-.endm
-
-.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vrshr.u16 q13, q11, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q10
- vraddhn.u16 d31, q13, q11
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
- pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_head
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
-.endm
-
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
- pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
- vqadd.u8 q14, q0, q14
- vqadd.u8 q15, q1, q15
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-.macro pixman_composite_over_8888_n_8888_init
- add DUMMY, sp, #48
- vpush {d8-d15}
- vld1.32 {d15[0]}, [DUMMY]
- vdup.8 d15, d15[3]
-.endm
-
-.macro pixman_composite_over_8888_n_8888_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_8888_n_8888_init, \
- pixman_composite_over_8888_n_8888_cleanup, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail_head
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-generate_composite_function_single_scanline \
- pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 12 /* mask_basereg */
-
-/******************************************************************************/
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
- vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
- pixman_composite_over_8888_n_8888_process_pixblock_tail
- fetch_src_pixblock
- cache_preload 8, 8
- fetch_mask_pixblock
- pixman_composite_over_8888_n_8888_process_pixblock_head
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_8888_n_8888_process_pixblock_head, \
- pixman_composite_over_8888_n_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0888_process_pixblock_head
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
- vst3.8 {d0, d1, d2}, [DST_W]!
- fetch_src_pixblock
- cache_preload 8, 8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0888_0888_process_pixblock_head, \
- pixman_composite_src_0888_0888_process_pixblock_tail, \
- pixman_composite_src_0888_0888_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
- vswp d0, d2
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
- vst4.8 {d0, d1, d2, d3}, [DST_W]!
- fetch_src_pixblock
- vswp d0, d2
- cache_preload 8, 8
-.endm
-
-.macro pixman_composite_src_0888_8888_rev_init
- veor d3, d3, d3
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- pixman_composite_src_0888_8888_rev_init, \
- default_cleanup, \
- pixman_composite_src_0888_8888_rev_process_pixblock_head, \
- pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
- pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
- 0, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
- vshll.u8 q8, d1, #8
- vshll.u8 q9, d2, #8
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
- vshll.u8 q14, d0, #8
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
-.endm
-
-.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
- vshll.u8 q14, d0, #8
- fetch_src_pixblock
- vsri.u16 q14, q8, #5
- vsri.u16 q14, q9, #11
- vshll.u8 q8, d1, #8
- vst1.16 {d28, d29}, [DST_W, :128]!
- vshll.u8 q9, d2, #8
-.endm
-
-generate_composite_function \
- pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
- FLAG_DST_WRITEONLY, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0888_0565_rev_process_pixblock_head, \
- pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
- pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- vraddhn.u16 d30, q11, q8
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d28, q13, q10
-.endm
-
-.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- fetch_src_pixblock
- vraddhn.u16 d30, q11, q8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d28, q13, q10
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
- pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_pixbuf_8888_process_pixblock_head, \
- pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
- pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- vraddhn.u16 d28, q11, q8
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d30, q13, q10
-.endm
-
-.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
- vrshr.u16 q11, q8, #8
- vswp d3, d31
- vrshr.u16 q12, q9, #8
- vrshr.u16 q13, q10, #8
- fetch_src_pixblock
- vraddhn.u16 d28, q11, q8
- PF add PF_X, PF_X, #8
- PF tst PF_CTL, #0xF
- PF addne PF_X, PF_X, #8
- PF subne PF_CTL, PF_CTL, #1
- vraddhn.u16 d29, q12, q9
- vraddhn.u16 d30, q13, q10
- vmull.u8 q8, d3, d0
- vmull.u8 q9, d3, d1
- vmull.u8 q10, d3, d2
- vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
- PF cmp PF_X, ORIG_W
- PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
- PF subge PF_X, PF_X, ORIG_W
- PF subges PF_CTL, PF_CTL, #0x10
- PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
-.endm
-
-generate_composite_function \
- pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- 10, /* prefetch distance */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
- pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 0, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q4, d2, d1, d0
- convert_0565_to_x888 q5, d6, d5, d4
- /* source pixel data is in {d0, d1, d2, XX} */
- /* destination pixel data is in {d4, d5, d6, XX} */
- vmvn.8 d7, d15
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vmull.u8 q8, d7, d4
- vmull.u8 q9, d7, d5
- vmull.u8 q13, d7, d6
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
-.endm
-
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q13, #8
- vraddhn.u16 d28, q14, q8
- vraddhn.u16 d29, q15, q9
- vraddhn.u16 d30, q12, q13
- vqadd.u8 q0, q0, q14
- vqadd.u8 q1, q1, q15
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
- fetch_mask_pixblock
- pixman_composite_over_0565_8_0565_process_pixblock_tail
- fetch_src_pixblock
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_over_0565_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_over_0565_8_0565_process_pixblock_head, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_over_0565_n_0565_init
- add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
- vpush {d8-d15}
- vld1.32 {d15[0]}, [DUMMY]
- vdup.8 d15, d15[3]
-.endm
-
-.macro pixman_composite_over_0565_n_0565_cleanup
- vpop {d8-d15}
-.endm
-
-generate_composite_function \
- pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- pixman_composite_over_0565_n_0565_init, \
- pixman_composite_over_0565_n_0565_cleanup, \
- pixman_composite_over_0565_8_0565_process_pixblock_head, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail, \
- pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q4, d2, d1, d0
- convert_0565_to_x888 q5, d6, d5, d4
- /* source pixel data is in {d0, d1, d2, XX} */
- /* destination pixel data is in {d4, d5, d6, XX} */
- vmull.u8 q6, d15, d2
- vmull.u8 q5, d15, d1
- vmull.u8 q4, d15, d0
- vrshr.u16 q12, q6, #8
- vrshr.u16 q11, q5, #8
- vrshr.u16 q10, q4, #8
- vraddhn.u16 d2, q6, q12
- vraddhn.u16 d1, q5, q11
- vraddhn.u16 d0, q4, q10
-.endm
-
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
- vqadd.u8 q0, q0, q2
- vqadd.u8 q1, q1, q3
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
- fetch_mask_pixblock
- pixman_composite_add_0565_8_0565_process_pixblock_tail
- fetch_src_pixblock
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_add_0565_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_add_0565_8_0565_process_pixblock_head, \
- pixman_composite_add_0565_8_0565_process_pixblock_tail, \
- pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 8, /* src_basereg */ \
- 15 /* mask_basereg */
-
-/******************************************************************************/
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
- /* mask is in d15 */
- convert_0565_to_x888 q5, d6, d5, d4
- /* destination pixel data is in {d4, d5, d6, xx} */
- vmvn.8 d24, d15 /* get inverted alpha */
- /* now do alpha blending */
- vmull.u8 q8, d24, d4
- vmull.u8 q9, d24, d5
- vmull.u8 q10, d24, d6
-.endm
-
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
- vrshr.u16 q14, q8, #8
- vrshr.u16 q15, q9, #8
- vrshr.u16 q12, q10, #8
- vraddhn.u16 d0, q14, q8
- vraddhn.u16 d1, q15, q9
- vraddhn.u16 d2, q12, q10
- /* 32bpp result is in {d0, d1, d2, XX} */
- convert_8888_to_0565 d2, d1, d0, q14, q15, q3
-.endm
-
-/* TODO: expand macros and do better instructions scheduling */
-.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
- fetch_src_pixblock
- pixman_composite_out_reverse_8_0565_process_pixblock_tail
- vld1.16 {d10, d11}, [DST_R, :128]!
- cache_preload 8, 8
- pixman_composite_out_reverse_8_0565_process_pixblock_head
- vst1.16 {d28, d29}, [DST_W, :128]!
-.endm
-
-generate_composite_function \
- pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
- FLAG_DST_READWRITE, \
- 8, /* number of pixels, processed in a single block */ \
- 5, /* prefetch distance */ \
- default_init_need_all_regs, \
- default_cleanup_need_all_regs, \
- pixman_composite_out_reverse_8_0565_process_pixblock_head, \
- pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
- pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 10, /* dst_r_basereg */ \
- 15, /* src_basereg */ \
- 0 /* mask_basereg */
-
-/******************************************************************************/
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_8888_process_pixblock_head, \
- pixman_composite_over_8888_8888_process_pixblock_tail, \
- pixman_composite_over_8888_8888_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
- FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_over_8888_0565_process_pixblock_head, \
- pixman_composite_over_8888_0565_process_pixblock_tail, \
- pixman_composite_over_8888_0565_process_pixblock_tail_head, \
- 28, /* dst_w_basereg */ \
- 4, /* dst_r_basereg */ \
- 0, /* src_basereg */ \
- 24 /* mask_basereg */
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_8888_0565_process_pixblock_head, \
- pixman_composite_src_8888_0565_process_pixblock_tail, \
- pixman_composite_src_8888_0565_process_pixblock_tail_head
-
-generate_composite_function_nearest_scanline \
- pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
- FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
- 8, /* number of pixels, processed in a single block */ \
- default_init, \
- default_cleanup, \
- pixman_composite_src_0565_8888_process_pixblock_head, \
- pixman_composite_src_0565_8888_process_pixblock_tail, \
- pixman_composite_src_0565_8888_process_pixblock_tail_head
+/*
+ * Copyright © 2009 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+/*
+ * This file contains implementations of NEON optimized pixel processing
+ * functions. There is no full and detailed tutorial, but some functions
+ * (those which are exposing some new or interesting features) are
+ * extensively commented and can be used as examples.
+ *
+ * You may want to have a look at the comments for following functions:
+ * - pixman_composite_over_8888_0565_asm_neon
+ * - pixman_composite_over_n_8_0565_asm_neon
+ */
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .fpu neon
+ .arch armv7a
+ .object_arch armv4
+ .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
+ .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
+ .arm
+ .altmacro
+
+#include "pixman-arm-neon-asm.h"
+
+/* Global configuration options and preferences */
+
+/*
+ * The code can optionally make use of unaligned memory accesses to improve
+ * performance of handling leading/trailing pixels for each scanline.
+ * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
+ * example in linux if unaligned memory accesses are not configured to
+ * generate.exceptions.
+ */
+.set RESPECT_STRICT_ALIGNMENT, 1
+
+/*
+ * Set default prefetch type. There is a choice between the following options:
+ *
+ * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
+ * as NOP to workaround some HW bugs or for whatever other reason)
+ *
+ * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
+ * advanced prefetch intruduces heavy overhead)
+ *
+ * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
+ * which can run ARM and NEON instructions simultaneously so that extra ARM
+ * instructions do not add (many) extra cycles, but improve prefetch efficiency)
+ *
+ * Note: some types of function can't support advanced prefetch and fallback
+ * to simple one (those which handle 24bpp pixels)
+ */
+.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
+
+/* Prefetch distance in pixels for simple prefetch */
+.set PREFETCH_DISTANCE_SIMPLE, 64
+
+/*
+ * Implementation of pixman_composite_over_8888_0565_asm_neon
+ *
+ * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
+ * performs OVER compositing operation. Function fast_composite_over_8888_0565
+ * from pixman-fast-path.c does the same in C and can be used as a reference.
+ *
+ * First we need to have some NEON assembly code which can do the actual
+ * operation on the pixels and provide it to the template macro.
+ *
+ * Template macro quite conveniently takes care of emitting all the necessary
+ * code for memory reading and writing (including quite tricky cases of
+ * handling unaligned leading/trailing pixels), so we only need to deal with
+ * the data in NEON registers.
+ *
+ * NEON registers allocation in general is recommented to be the following:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed)
+ * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
+ * d28, d29, d30, d31 - place for storing the result (destination pixels)
+ *
+ * As can be seen above, four 64-bit NEON registers are used for keeping
+ * intermediate pixel data and up to 8 pixels can be processed in one step
+ * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
+ *
+ * This particular function uses the following registers allocation:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5 - contain loaded destination pixels (they are needed)
+ * d28, d29 - place for storing the result (destination pixels)
+ */
+
+/*
+ * Step one. We need to have some code to do some arithmetics on pixel data.
+ * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
+ * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
+ * perform all the needed calculations and write the result to {d28, d29}.
+ * The rationale for having two macros and not just one will be explained
+ * later. In practice, any single monolitic function which does the work can
+ * be split into two parts in any arbitrary way without affecting correctness.
+ *
+ * There is one special trick here too. Common template macro can optionally
+ * make our life a bit easier by doing R, G, B, A color components
+ * deinterleaving for 32bpp pixel formats (and this feature is used in
+ * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
+ * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
+ * actually use d0 register for blue channel (a vector of eight 8-bit
+ * values), d1 register for green, d2 for red and d3 for alpha. This
+ * simple conversion can be also done with a few NEON instructions:
+ *
+ * Packed to planar conversion:
+ * vuzp.8 d0, d1
+ * vuzp.8 d2, d3
+ * vuzp.8 d1, d3
+ * vuzp.8 d0, d2
+ *
+ * Planar to packed conversion:
+ * vzip.8 d0, d2
+ * vzip.8 d1, d3
+ * vzip.8 d2, d3
+ * vzip.8 d0, d1
+ *
+ * But pixel can be loaded directly in planar format using VLD4.8 NEON
+ * instruction. It is 1 cycle slower than VLD1.32, so this is not always
+ * desirable, that's why deinterleaving is optional.
+ *
+ * But anyway, here is the code:
+ */
+.macro pixman_composite_over_8888_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3 /* invert source alpha */
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/*
+ * OK, now we got almost everything that we need. Using the above two
+ * macros, the work can be done right. But now we want to optimize
+ * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
+ * a lot from good code scheduling and software pipelining.
+ *
+ * Let's construct some code, which will run in the core main loop.
+ * Some pseudo-code of the main loop will look like this:
+ * head
+ * while (...) {
+ * tail
+ * head
+ * }
+ * tail
+ *
+ * It may look a bit weird, but this setup allows to hide instruction
+ * latencies better and also utilize dual-issue capability more
+ * efficiently (make pairs of load-store and ALU instructions).
+ *
+ * So what we need now is a '*_tail_head' macro, which will be used
+ * in the core main loop. A trivial straightforward implementation
+ * of this macro would look like this:
+ *
+ * pixman_composite_over_8888_0565_process_pixblock_tail
+ * vst1.16 {d28, d29}, [DST_W, :128]!
+ * vld1.16 {d4, d5}, [DST_R, :128]!
+ * vld4.32 {d0, d1, d2, d3}, [SRC]!
+ * pixman_composite_over_8888_0565_process_pixblock_head
+ * cache_preload 8, 8
+ *
+ * Now it also got some VLD/VST instructions. We simply can't move from
+ * processing one block of pixels to the other one with just arithmetics.
+ * The previously processed data needs to be written to memory and new
+ * data needs to be fetched. Fortunately, this main loop does not deal
+ * with partial leading/trailing pixels and can load/store a full block
+ * of pixels in a bulk. Additionally, destination buffer is already
+ * 16 bytes aligned here (which is good for performance).
+ *
+ * New things here are DST_R, DST_W, SRC and MASK identifiers. These
+ * are the aliases for ARM registers which are used as pointers for
+ * accessing data. We maintain separate pointers for reading and writing
+ * destination buffer (DST_R and DST_W).
+ *
+ * Another new thing is 'cache_preload' macro. It is used for prefetching
+ * data into CPU L2 cache and improve performance when dealing with large
+ * images which are far larger than cache size. It uses one argument
+ * (actually two, but they need to be the same here) - number of pixels
+ * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
+ * details about this macro. Moreover, if good performance is needed
+ * the code from this macro needs to be copied into '*_tail_head' macro
+ * and mixed with the rest of code for optimal instructions scheduling.
+ * We are actually doing it below.
+ *
+ * Now after all the explanations, here is the optimized code.
+ * Different instruction streams (originaling from '*_head', '*_tail'
+ * and 'cache_preload' macro) use different indentation levels for
+ * better readability. Actually taking the code from one of these
+ * indentation levels and ignoring a few VLD/VST instructions would
+ * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
+ * macro!
+ */
+
+#if 1
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ vqadd.u8 d16, d2, d20
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vqadd.u8 q9, q0, q11
+ vshrn.u16 d6, q2, #8
+ fetch_src_pixblock
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vshll.u8 q14, d16, #8
+ PF add PF_X, PF_X, #8
+ vshll.u8 q8, d19, #8
+ PF tst PF_CTL, #0xF
+ vsri.u8 d6, d6, #5
+ PF addne PF_X, PF_X, #8
+ vmvn.8 d3, d3
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ vmull.u8 q10, d3, d6
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vsri.u16 q14, q8, #5
+ PF cmp PF_X, ORIG_W
+ vshll.u8 q9, d18, #8
+ vrshr.u16 q13, q10, #8
+ PF subge PF_X, PF_X, ORIG_W
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ PF subges PF_CTL, PF_CTL, #0x10
+ vsri.u16 q14, q9, #11
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vraddhn.u16 d22, q12, q15
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+#else
+
+/* If we did not care much about the performance, we would just use this... */
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ pixman_composite_over_8888_0565_process_pixblock_tail
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ fetch_src_pixblock
+ pixman_composite_over_8888_0565_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+#endif
+
+/*
+ * And now the final part. We are using 'generate_composite_function' macro
+ * to put all the stuff together. We are specifying the name of the function
+ * which we want to get, number of bits per pixel for the source, mask and
+ * destination (0 if unused, like mask in this case). Next come some bit
+ * flags:
+ * FLAG_DST_READWRITE - tells that the destination buffer is both read
+ * and written, for write-only buffer we would use
+ * FLAG_DST_WRITEONLY flag instead
+ * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
+ * and separate color channels for 32bpp format.
+ * The next things are:
+ * - the number of pixels processed per iteration (8 in this case, because
+ * that's the maximum what can fit into four 64-bit NEON registers).
+ * - prefetch distance, measured in pixel blocks. In this case it is 5 times
+ * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
+ * prefetch distance can be selected by running some benchmarks.
+ *
+ * After that we specify some macros, these are 'default_init',
+ * 'default_cleanup' here which are empty (but it is possible to have custom
+ * init/cleanup macros to be able to save/restore some extra NEON registers
+ * like d8-d15 or do anything else) followed by
+ * 'pixman_composite_over_8888_0565_process_pixblock_head',
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
+ * which we got implemented above.
+ *
+ * The last part is the NEON registers allocation scheme.
+ */
+generate_composite_function \
+ pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_0565_process_pixblock_head, \
+ pixman_composite_over_8888_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_0565_process_pixblock_tail_head
+ pixman_composite_over_n_0565_process_pixblock_tail
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ pixman_composite_over_n_0565_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_over_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+ vmvn.8 d3, d3 /* invert source alpha */
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_0565_init, \
+ default_cleanup, \
+ pixman_composite_over_n_0565_process_pixblock_head, \
+ pixman_composite_over_n_0565_process_pixblock_tail, \
+ pixman_composite_over_n_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_0565_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q14, d2, #8
+ vshll.u8 q9, d0, #8
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
+ vsri.u16 q14, q8, #5
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ fetch_src_pixblock
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u16 q14, q9, #11
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vshll.u8 q14, d2, #8
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vshll.u8 q9, d0, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_8888_process_pixblock_head
+ vshrn.u16 d30, q0, #8
+ vshrn.u16 d29, q0, #3
+ vsli.u16 q0, q0, #5
+ vmov.u8 d31, #255
+ vsri.u8 d30, d30, #5
+ vsri.u8 d29, d29, #6
+ vshrn.u16 d28, q0, #2
+.endm
+
+.macro pixman_composite_src_0565_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
+ pixman_composite_src_0565_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ fetch_src_pixblock
+ pixman_composite_src_0565_8888_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8_8_process_pixblock_head
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8_8_process_pixblock_tail
+.endm
+
+.macro pixman_composite_add_8_8_process_pixblock_tail_head
+ fetch_src_pixblock
+ PF add PF_X, PF_X, #32
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #32
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8_8_asm_neon, 8, 0, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
+ pixman_composite_add_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
+ fetch_src_pixblock
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld1.32 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.32 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_out_reverse_8888_8888_process_pixblock_head
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+.endm
+
+.macro pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ fetch_src_pixblock
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_out_reverse_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_out_reverse_8888_8888_process_pixblock_head, \
+ pixman_composite_out_reverse_8888_8888_process_pixblock_tail, \
+ pixman_composite_out_reverse_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_8888_process_pixblock_head
+ pixman_composite_out_reverse_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail
+ pixman_composite_out_reverse_8888_8888_process_pixblock_tail
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ fetch_src_pixblock
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8888_process_pixblock_tail_head
+ pixman_composite_over_8888_8888_process_pixblock_tail
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ pixman_composite_over_8888_8888_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_over_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [DST_R, :128]!
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+.macro pixman_composite_over_reverse_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d7[0]}, [DUMMY]
+ vdup.8 d4, d7[0]
+ vdup.8 d5, d7[1]
+ vdup.8 d6, d7[2]
+ vdup.8 d7, d7[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_reverse_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 4, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_8_0565_process_pixblock_head
+ vmull.u8 q0, d24, d8 /* IN for SRC pixels (part1) */
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vshrn.u16 d6, q2, #8 /* convert DST_R data to 32-bpp (part1) */
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vrshr.u16 q8, q0, #8 /* IN for SRC pixels (part2) */
+ vrshr.u16 q9, q1, #8
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q11, q7, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q6, q10
+ vraddhn.u16 d3, q7, q11
+ vsri.u8 d6, d6, #5 /* convert DST_R data to 32-bpp (part2) */
+ vsri.u8 d7, d7, #6
+ vmvn.8 d3, d3
+ vshrn.u16 d30, q2, #2
+ vmull.u8 q8, d3, d6 /* now do alpha blending */
+ vmull.u8 q9, d3, d7
+ vmull.u8 q10, d3, d30
+.endm
+
+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail
+ /* 3 cycle bubble (after vmull.u8) */
+ vrshr.u16 q13, q8, #8
+ vrshr.u16 q11, q9, #8
+ vrshr.u16 q15, q10, #8
+ vraddhn.u16 d16, q8, q13
+ vraddhn.u16 d27, q9, q11
+ vraddhn.u16 d26, q10, q15
+ vqadd.u8 d16, d2, d16
+ /* 1 cycle bubble */
+ vqadd.u8 q9, q0, q13
+ vshll.u8 q14, d16, #8 /* convert to 16bpp */
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ /* 1 cycle bubble */
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vshrn.u16 d6, q2, #8
+ fetch_mask_pixblock
+ vshrn.u16 d7, q2, #3
+ fetch_src_pixblock
+ vmull.u8 q6, d24, d10
+ vrshr.u16 q13, q8, #8
+ vrshr.u16 q11, q9, #8
+ vrshr.u16 q15, q10, #8
+ vraddhn.u16 d16, q8, q13
+ vraddhn.u16 d27, q9, q11
+ vraddhn.u16 d26, q10, q15
+ vqadd.u8 d16, d2, d16
+ vmull.u8 q1, d24, d9
+ vqadd.u8 q9, q0, q13
+ vshll.u8 q14, d16, #8
+ vmull.u8 q0, d24, d8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vmull.u8 q7, d24, d11
+ vsri.u16 q14, q9, #11
+
+ cache_preload 8, 8
+
+ vsli.u16 q2, q2, #5
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q9, q1, #8
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q11, q7, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q6, q10
+ vraddhn.u16 d3, q7, q11
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vmvn.8 d3, d3
+ vshrn.u16 d30, q2, #2
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vmull.u8 q8, d3, d6
+ vmull.u8 q9, d3, d7
+ vmull.u8 q10, d3, d30
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+/*
+ * This function needs a special initialization of solid mask.
+ * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
+ * offset, split into color components and replicated in d8-d11
+ * registers. Additionally, this function needs all the NEON registers,
+ * so it has to save d8-d15 registers which are callee saved according
+ * to ABI. These registers are restored from 'cleanup' macro. All the
+ * other NEON registers are caller saved, so can be clobbered freely
+ * without introducing any problems.
+ */
+.macro pixman_composite_over_n_8_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_0565_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_0565_init, \
+ pixman_composite_over_n_8_0565_cleanup, \
+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_n_0565_init
+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+ vpush {d8-d15}
+ vld1.32 {d24[0]}, [DUMMY]
+ vdup.8 d24, d24[3]
+.endm
+
+.macro pixman_composite_over_8888_n_0565_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_n_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_n_0565_init, \
+ pixman_composite_over_8888_n_0565_cleanup, \
+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+ fetch_src_pixblock
+ cache_preload 16, 16
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_0565_process_pixblock_head, \
+ pixman_composite_src_0565_0565_process_pixblock_tail, \
+ pixman_composite_src_0565_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail_head
+ vst1.8 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #8
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
+ FLAG_DST_WRITEONLY, \
+ 32, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8_init, \
+ pixman_composite_src_n_8_cleanup, \
+ pixman_composite_src_n_8_process_pixblock_head, \
+ pixman_composite_src_n_8_process_pixblock_tail, \
+ pixman_composite_src_n_8_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_0565_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_0565_init, \
+ pixman_composite_src_n_0565_cleanup, \
+ pixman_composite_src_n_0565_process_pixblock_head, \
+ pixman_composite_src_n_0565_process_pixblock_tail, \
+ pixman_composite_src_n_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #32
+ vorr d1, d0, d0
+ vorr q1, q0, q0
+.endm
+
+.macro pixman_composite_src_n_8888_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8888_init, \
+ pixman_composite_src_n_8888_cleanup, \
+ pixman_composite_src_n_8888_process_pixblock_head, \
+ pixman_composite_src_n_8888_process_pixblock_tail, \
+ pixman_composite_src_n_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ fetch_src_pixblock
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_8888_process_pixblock_head, \
+ pixman_composite_src_8888_8888_process_pixblock_tail, \
+ pixman_composite_src_8888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_x888_8888_process_pixblock_head
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ fetch_src_pixblock
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_x888_8888_init
+ vmov.u8 q2, #0xFF
+ vshl.u32 q2, q2, #24
+.endm
+
+generate_composite_function \
+ pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_x888_8888_init, \
+ default_cleanup, \
+ pixman_composite_src_x888_8888_process_pixblock_head, \
+ pixman_composite_src_x888_8888_process_pixblock_tail, \
+ pixman_composite_src_x888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_head
+ /* expecting deinterleaved source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24 (d25, d26, d27 are unused) */
+
+ /* in */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
+ /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
+ pixman_composite_over_n_8_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ fetch_mask_pixblock
+ cache_preload 8, 8
+ pixman_composite_over_n_8_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_8888_init, \
+ pixman_composite_over_n_8_8888_cleanup, \
+ pixman_composite_over_n_8_8888_process_pixblock_head, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_8_process_pixblock_head
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d8
+ vmull.u8 q6, d26, d8
+ vmull.u8 q7, d27, d8
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vmvn.8 q12, q0
+ vmvn.8 q13, q1
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d25, d5
+ vmull.u8 q10, d26, d6
+ vmull.u8 q11, d27, d7
+.endm
+
+.macro pixman_composite_over_n_8_8_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_8_process_pixblock_tail_head
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_n_8_8_process_pixblock_tail
+ fetch_mask_pixblock
+ cache_preload 32, 32
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ pixman_composite_over_n_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d8[0]}, [DUMMY]
+ vdup.8 d8, d8[3]
+.endm
+
+.macro pixman_composite_over_n_8_8_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_8_asm_neon, 0, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_8_init, \
+ pixman_composite_over_n_8_8_cleanup, \
+ pixman_composite_over_n_8_8_process_pixblock_head, \
+ pixman_composite_over_n_8_8_process_pixblock_tail, \
+ pixman_composite_over_n_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11}
+ * dest in {d4, d5, d6, d7 }
+ * mask in {d24, d25, d26, d27}
+ * output: updated src in {d0, d1, d2, d3 }
+ * updated mask in {d24, d25, d26, d3 }
+ */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d9
+ vmull.u8 q6, d26, d10
+ vmull.u8 q7, d27, d11
+ vmull.u8 q9, d11, d25
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vrshr.u16 q10, q7, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d25, q9, q8
+ vraddhn.u16 d26, q13, q6
+ vraddhn.u16 d3, q7, q10
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in {d28, d29, d30, d31}
+ */
+ vmvn.8 d24, d24
+ vmvn.8 d25, d25
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d25, d5
+ vmvn.8 d26, d26
+ vmvn.8 d27, d3
+ vmull.u8 q10, d26, d6
+ vmull.u8 q11, d27, d7
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
+ /* ... continue 'combine_over_ca' replacement */
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ fetch_mask_pixblock
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ cache_preload 8, 8
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_8888_ca_init, \
+ pixman_composite_over_n_8888_8888_ca_cleanup, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_in_n_8_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* and destination data in {d4, d5, d6, d7} */
+ vmull.u8 q8, d4, d3
+ vmull.u8 q9, d5, d3
+ vmull.u8 q10, d6, d3
+ vmull.u8 q11, d7, d3
+.endm
+
+.macro pixman_composite_in_n_8_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q8, q14
+ vraddhn.u16 d29, q9, q15
+ vraddhn.u16 d30, q10, q12
+ vraddhn.u16 d31, q11, q13
+.endm
+
+.macro pixman_composite_in_n_8_process_pixblock_tail_head
+ pixman_composite_in_n_8_process_pixblock_tail
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ cache_preload 32, 32
+ pixman_composite_in_n_8_process_pixblock_head
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_in_n_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d3, d3[3]
+.endm
+
+.macro pixman_composite_in_n_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_in_n_8_asm_neon, 0, 0, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_in_n_8_init, \
+ pixman_composite_in_n_8_cleanup, \
+ pixman_composite_in_n_8_process_pixblock_head, \
+ pixman_composite_in_n_8_process_pixblock_tail, \
+ pixman_composite_in_n_8_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+.macro pixman_composite_add_n_8_8_process_pixblock_head
+ /* expecting source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24, d25, d26, d27 */
+ vmull.u8 q0, d24, d11
+ vmull.u8 q1, d25, d11
+ vmull.u8 q6, d26, d11
+ vmull.u8 q7, d27, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_n_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
+ pixman_composite_add_n_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ fetch_mask_pixblock
+ cache_preload 32, 32
+ pixman_composite_add_n_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_n_8_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_add_n_8_8_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_n_8_8_init, \
+ pixman_composite_add_n_8_8_cleanup, \
+ pixman_composite_add_n_8_8_process_pixblock_head, \
+ pixman_composite_add_n_8_8_process_pixblock_tail, \
+ pixman_composite_add_n_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8_8_8_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d24, d0
+ vmull.u8 q9, d25, d1
+ vmull.u8 q10, d26, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
+ pixman_composite_add_8_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ fetch_mask_pixblock
+ fetch_src_pixblock
+ cache_preload 32, 32
+ pixman_composite_add_8_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_8_8_8_init
+.endm
+
+.macro pixman_composite_add_8_8_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_8_8_8_init, \
+ pixman_composite_add_8_8_8_cleanup, \
+ pixman_composite_add_8_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_8_process_pixblock_tail, \
+ pixman_composite_add_8_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d27, d0
+ vmull.u8 q9, d27, d1
+ vmull.u8 q10, d27, d2
+ vmull.u8 q11, d27, d3
+ /* 1 cycle bubble */
+ vrsra.u16 q8, q8, #8
+ vrsra.u16 q9, q9, #8
+ vrsra.u16 q10, q10, #8
+ vrsra.u16 q11, q11, #8
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+ /* 2 cycle bubble */
+ vrshrn.u16 d28, q8, #8
+ vrshrn.u16 d29, q9, #8
+ vrshrn.u16 d30, q10, #8
+ vrshrn.u16 d31, q11, #8
+ vqadd.u8 q14, q2, q14
+ /* 1 cycle bubble */
+ vqadd.u8 q15, q3, q15
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+ fetch_src_pixblock
+ vrshrn.u16 d28, q8, #8
+ fetch_mask_pixblock
+ vrshrn.u16 d29, q9, #8
+ vmull.u8 q8, d27, d0
+ vrshrn.u16 d30, q10, #8
+ vmull.u8 q9, d27, d1
+ vrshrn.u16 d31, q11, #8
+ vmull.u8 q10, d27, d2
+ vqadd.u8 q14, q2, q14
+ vmull.u8 q11, d27, d3
+ vqadd.u8 q15, q3, q15
+ vrsra.u16 q8, q8, #8
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrsra.u16 q9, q9, #8
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vrsra.u16 q10, q10, #8
+
+ cache_preload 8, 8
+
+ vrsra.u16 q11, q11, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+generate_composite_function \
+ pixman_composite_add_8888_8_8888_asm_neon, 32, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 27 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_n_8_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+.endm
+
+.macro pixman_composite_add_n_8_8888_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_add_n_8_8888_asm_neon, 0, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_n_8_8888_init, \
+ pixman_composite_add_n_8_8888_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 27 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_n_8888_init
+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+ vld1.32 {d27[0]}, [DUMMY]
+ vdup.8 d27, d27[3]
+.endm
+
+.macro pixman_composite_add_8888_n_8888_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_n_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_8888_n_8888_init, \
+ pixman_composite_add_8888_n_8888_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 27 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* solid mask is in d15 */
+
+ /* 'in' */
+ vmull.u8 q8, d15, d3
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vrshr.u16 q13, q8, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d3, q8, q13
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
+ fetch_src_pixblock
+ cache_preload 8, 8
+ fetch_mask_pixblock
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_head
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
+ pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ fetch_src_pixblock
+ cache_preload 8, 8
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_n_8888_init
+ add DUMMY, sp, #48
+ vpush {d8-d15}
+ vld1.32 {d15[0]}, [DUMMY]
+ vdup.8 d15, d15[3]
+.endm
+
+.macro pixman_composite_over_8888_n_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_n_8888_init, \
+ pixman_composite_over_8888_n_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ fetch_src_pixblock
+ cache_preload 8, 8
+ fetch_mask_pixblock
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ fetch_src_pixblock
+ cache_preload 8, 8
+ fetch_mask_pixblock
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
+ vst3.8 {d0, d1, d2}, [DST_W]!
+ fetch_src_pixblock
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0888_process_pixblock_head, \
+ pixman_composite_src_0888_0888_process_pixblock_tail, \
+ pixman_composite_src_0888_0888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
+ vswp d0, d2
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
+ vst4.8 {d0, d1, d2, d3}, [DST_W]!
+ fetch_src_pixblock
+ vswp d0, d2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_init
+ veor d3, d3, d3
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_0888_8888_rev_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_head, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q9, d2, #8
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
+ vshll.u8 q14, d0, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
+ vshll.u8 q14, d0, #8
+ fetch_src_pixblock
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vshll.u8 q9, d2, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_head, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vraddhn.u16 d30, q11, q8
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ fetch_src_pixblock
+ vraddhn.u16 d30, q11, q8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+ pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_head, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_head
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+.endm
+
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vraddhn.u16 d28, q11, q8
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d30, q13, q10
+.endm
+
+.macro pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ fetch_src_pixblock
+ vraddhn.u16 d28, q11, q8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d30, q13, q10
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+ pixman_composite_src_rpixbuf_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_rpixbuf_8888_process_pixblock_head, \
+ pixman_composite_src_rpixbuf_8888_process_pixblock_tail, \
+ pixman_composite_src_rpixbuf_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q4, d2, d1, d0
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* source pixel data is in {d0, d1, d2, XX} */
+ /* destination pixel data is in {d4, d5, d6, XX} */
+ vmvn.8 d7, d15
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vmull.u8 q8, d7, d4
+ vmull.u8 q9, d7, d5
+ vmull.u8 q13, d7, d6
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+.endm
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q13, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q13
+ vqadd.u8 q0, q0, q14
+ vqadd.u8 q1, q1, q15
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
+ fetch_mask_pixblock
+ pixman_composite_over_0565_8_0565_process_pixblock_tail
+ fetch_src_pixblock
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_over_0565_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_0565_n_0565_init
+ add DUMMY, sp, #(ARGS_STACK_OFFSET + 8)
+ vpush {d8-d15}
+ vld1.32 {d15[0]}, [DUMMY]
+ vdup.8 d15, d15[3]
+.endm
+
+.macro pixman_composite_over_0565_n_0565_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_0565_n_0565_asm_neon, 16, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_0565_n_0565_init, \
+ pixman_composite_over_0565_n_0565_cleanup, \
+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_add_0565_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q4, d2, d1, d0
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* source pixel data is in {d0, d1, d2, XX} */
+ /* destination pixel data is in {d4, d5, d6, XX} */
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+.endm
+
+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail
+ vqadd.u8 q0, q0, q2
+ vqadd.u8 q1, q1, q3
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head
+ fetch_mask_pixblock
+ pixman_composite_add_0565_8_0565_process_pixblock_tail
+ fetch_src_pixblock
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_add_0565_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_add_0565_8_0565_process_pixblock_head, \
+ pixman_composite_add_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* destination pixel data is in {d4, d5, d6, xx} */
+ vmvn.8 d24, d15 /* get inverted alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+.endm
+
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vraddhn.u16 d0, q14, q8
+ vraddhn.u16 d1, q15, q9
+ vraddhn.u16 d2, q12, q10
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head
+ fetch_src_pixblock
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_out_reverse_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_head, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail, \
+ pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 15, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_0565_OVER_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_0565_process_pixblock_head, \
+ pixman_composite_over_8888_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_0565_SRC_asm_neon, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_0565_8888_SRC_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
diff --git a/pixman/pixman/pixman-arm-neon.c b/pixman/pixman/pixman-arm-neon.c
index 7d6c83775..3e0c0d1c2 100644
--- a/pixman/pixman/pixman-arm-neon.c
+++ b/pixman/pixman/pixman-arm-neon.c
@@ -122,6 +122,11 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ OVER, uint16_t, uint16_t)
+
void
pixman_composite_src_n_8_asm_neon (int32_t w,
int32_t h,
@@ -332,6 +337,12 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/pixman/pixman-fast-path.c b/pixman/pixman/pixman-fast-path.c
index 4cb8321aa..92f030871 100644
--- a/pixman/pixman/pixman-fast-path.c
+++ b/pixman/pixman/pixman-fast-path.c
@@ -1,2227 +1,2228 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include "pixman-private.h"
-#include "pixman-combine32.h"
-#include "pixman-fast-path.h"
-
-static force_inline uint32_t
-fetch_24 (uint8_t *a)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- return (*a << 16) | (*(uint16_t *)(a + 1));
-#else
- return *a | (*(uint16_t *)(a + 1) << 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- return (*(uint16_t *)a << 8) | *(a + 2);
-#else
- return *(uint16_t *)a | (*(a + 2) << 16);
-#endif
- }
-}
-
-static force_inline void
-store_24 (uint8_t *a,
- uint32_t v)
-{
- if (((unsigned long)a) & 1)
- {
-#ifdef WORDS_BIGENDIAN
- *a = (uint8_t) (v >> 16);
- *(uint16_t *)(a + 1) = (uint16_t) (v);
-#else
- *a = (uint8_t) (v);
- *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif
- }
- else
- {
-#ifdef WORDS_BIGENDIAN
- *(uint16_t *)a = (uint16_t)(v >> 8);
- *(a + 2) = (uint8_t)v;
-#else
- *(uint16_t *)a = (uint16_t)v;
- *(a + 2) = (uint8_t)(v >> 16);
-#endif
- }
-}
-
-static force_inline uint32_t
-over (uint32_t src,
- uint32_t dest)
-{
- uint32_t a = ~src >> 24;
-
- UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
-
- return dest;
-}
-
-static uint32_t
-in (uint32_t x,
- uint8_t y)
-{
- uint16_t a = y;
-
- UN8x4_MUL_UN8 (x, a);
-
- return x;
-}
-
-/*
- * Naming convention:
- *
- * op_src_mask_dest
- */
-static void
-fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *src, *src_line;
- uint32_t *dst, *dst_line;
- uint8_t *mask, *mask_line;
- int src_stride, mask_stride, dst_stride;
- uint8_t m;
- uint32_t s, d;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- src = src_line;
- src_line += src_stride;
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
-
- w = width;
- while (w--)
- {
- m = *mask++;
- if (m)
- {
- s = *src | 0xff000000;
-
- if (m == 0xff)
- {
- *dst = s;
- }
- else
- {
- d = in (s, m);
- *dst = over (d, *dst);
- }
- }
- src++;
- dst++;
- }
- }
-}
-
-static void
-fast_composite_in_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
- uint16_t t;
-
- src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
-
- srca = src >> 24;
-
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- m = MUL_UN8 (m, srca, t);
-
- if (m == 0)
- *dst = 0;
- else if (m != 0xff)
- *dst = MUL_UN8 (m, *dst, t);
-
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_in_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dest_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
-
- if (s == 0)
- *dst = 0;
- else if (s != 0xff)
- *dst = MUL_UN8 (s, *dst, t);
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (m)
- {
- d = in (src, m);
- *dst = over (d, *dst);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
-
- if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
-
- *dst = s;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint32_t *dst_line, *dst, d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (ma)
- {
- d = *dst;
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = d;
- }
-
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = fetch_24 (dst);
- d = over (src, d);
- }
- store_24 (dst, d);
- }
- else if (m)
- {
- d = over (in (src, m), fetch_24 (dst));
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-
-static void
-fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint8_t *mask_line, *mask, m;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- {
- d = src;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- else if (m)
- {
- d = *dst;
- d = over (in (src, m), CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca, s;
- uint16_t src16;
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *mask_line, *mask, ma;
- int dst_stride, mask_stride;
- int32_t w;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- srca = src >> 24;
- if (src == 0)
- return;
-
- src16 = CONVERT_8888_TO_0565 (src);
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- ma = *mask++;
- if (ma == 0xffffffff)
- {
- if (srca == 0xff)
- {
- *dst = src16;
- }
- else
- {
- d = *dst;
- d = over (src, CONVERT_0565_TO_0888 (d));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- }
- else if (ma)
- {
- d = *dst;
- d = CONVERT_0565_TO_0888 (d);
-
- s = src;
-
- UN8x4_MUL_UN8x4 (s, ma);
- UN8x4_MUL_UN8 (ma, srca);
- ma = ~ma;
- UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
-
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_over_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- uint8_t a;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a == 0xff)
- *dst = s;
- else if (s)
- *dst = over (s, *dst);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- *dst++ = (*src++) | 0xff000000;
- }
-}
-
-#if 0
-static void
-fast_composite_over_8888_0888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (a)
- {
- if (a == 0xff)
- d = s;
- else
- d = over (s, fetch_24 (dst));
-
- store_24 (dst, d);
- }
- dst += 3;
- }
- }
-}
-#endif
-
-static void
-fast_composite_over_8888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t d;
- uint32_t *src_line, *src, s;
- uint8_t a;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- a = s >> 24;
- if (s)
- {
- if (a == 0xff)
- {
- d = s;
- }
- else
- {
- d = *dst;
- d = over (s, CONVERT_0565_TO_0888 (d));
- }
- *dst = CONVERT_8888_TO_0565 (d);
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_src_x888_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint16_t *dst_line, *dst;
- uint32_t *src_line, *src, s;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- *dst = CONVERT_8888_TO_0565 (s);
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint8_t s, d;
- uint16_t t;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xff)
- {
- d = *dst;
- t = d + s;
- s = t | (0 - (t >> 8));
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_8888_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
- uint32_t s, d;
-
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- s = *src++;
- if (s)
- {
- if (s != 0xffffffff)
- {
- d = *dst;
- if (d)
- UN8x4_ADD_UN8x4 (s, d);
- }
- *dst = s;
- }
- dst++;
- }
- }
-}
-
-static void
-fast_composite_add_n_8_8 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint8_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
- int32_t w;
- uint32_t src;
- uint8_t sa;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- sa = (src >> 24);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- uint16_t tmp;
- uint16_t a;
- uint32_t m, d;
- uint32_t r;
-
- a = *mask++;
- d = *dst;
-
- m = MUL_UN8 (sa, a, tmp);
- r = ADD_UN8 (m, d, tmp);
-
- *dst++ = r;
- }
- }
-}
-
-#ifdef WORDS_BIGENDIAN
-#define CREATE_BITMASK(n) (0x80000000 >> (n))
-#define UPDATE_BITMASK(n) ((n) >> 1)
-#else
-#define CREATE_BITMASK(n) (1 << (n))
-#define UPDATE_BITMASK(n) ((n) << 1)
-#endif
-
-#define TEST_BIT(p, n) \
- (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
-#define SET_BIT(p, n) \
- do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
-
-static void
-fast_composite_add_1000_1000 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
- int32_t w;
-
- PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
- src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
- dst_stride, dst_line, 1);
-
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- src = src_line;
- src_line += src_stride;
- w = width;
-
- while (w--)
- {
- /*
- * TODO: improve performance by processing uint32_t data instead
- * of individual bits
- */
- if (TEST_BIT (src, src_x + w))
- SET_BIT (dst, dest_x + w);
- }
- }
-}
-
-static void
-fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint32_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = over (src, *dst);
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-static void
-fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src, srca;
- uint16_t *dst, *dst_line;
- uint32_t *mask, *mask_line;
- int mask_stride, dst_stride;
- uint32_t bitcache, bitmask;
- int32_t w;
- uint32_t d;
- uint16_t src565;
-
- if (width <= 0)
- return;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
- srca = src >> 24;
- if (src == 0)
- return;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
- dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
- mask_stride, mask_line, 1);
- mask_line += mask_x >> 5;
-
- if (srca == 0xff)
- {
- src565 = CONVERT_8888_TO_0565 (src);
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- *dst = src565;
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
- else
- {
- while (height--)
- {
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (mask_x & 31);
-
- while (w--)
- {
- if (bitmask == 0)
- {
- bitcache = *mask++;
- bitmask = CREATE_BITMASK (0);
- }
- if (bitcache & bitmask)
- {
- d = over (src, CONVERT_0565_TO_0888 (*dst));
- *dst = CONVERT_8888_TO_0565 (d);
- }
- bitmask = UPDATE_BITMASK (bitmask);
- dst++;
- }
- }
- }
-}
-
-/*
- * Simple bitblt
- */
-
-static void
-fast_composite_solid_fill (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t src;
-
- src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
-
- if (dst_image->bits.format == PIXMAN_a1)
- {
- src = src >> 31;
- }
- else if (dst_image->bits.format == PIXMAN_a8)
- {
- src = src >> 24;
- }
- else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
- dst_image->bits.format == PIXMAN_b5g6r5)
- {
- src = CONVERT_8888_TO_0565 (src);
- }
-
- pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
- PIXMAN_FORMAT_BPP (dst_image->bits.format),
- dest_x, dest_y,
- width, height,
- src);
-}
-
-static void
-fast_composite_src_memcpy (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
- uint32_t n_bytes = width * bpp;
- int dst_stride, src_stride;
- uint8_t *dst;
- uint8_t *src;
-
- src_stride = src_image->bits.rowstride * 4;
- dst_stride = dst_image->bits.rowstride * 4;
-
- src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
- dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
-
- while (height--)
- {
- memcpy (dst, src, n_bytes);
-
- dst += dst_stride;
- src += src_stride;
- }
-}
-
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
-FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
-FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
-FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
-FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
-FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
-FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
-FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
-FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
-
-/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
-static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
- uint16_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
-{
- uint16_t tmp1, tmp2, tmp3, tmp4;
- while ((w -= 4) >= 0)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp3 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp4 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- *dst++ = tmp3;
- *dst++ = tmp4;
- }
- if (w & 2)
- {
- tmp1 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- tmp2 = src[pixman_fixed_to_int (vx)];
- vx += unit_x;
- *dst++ = tmp1;
- *dst++ = tmp2;
- }
- if (w & 1)
- *dst++ = src[pixman_fixed_to_int (vx)];
-}
-
-FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, COVER)
-FAST_NEAREST_MAINLOOP (565_565_none_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, NONE)
-FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
- scaled_nearest_scanline_565_565_SRC,
- uint16_t, uint16_t, PAD)
-
-static force_inline uint32_t
-fetch_nearest (pixman_repeat_t src_repeat,
- pixman_format_code_t format,
- uint32_t *src, int x, int src_width)
-{
- if (repeat (src_repeat, &x, src_width))
- {
- if (format == PIXMAN_x8r8g8b8)
- return *(src + x) | 0xff000000;
- else
- return *(src + x);
- }
- else
- {
- return 0;
- }
-}
-
-static force_inline void
-combine_over (uint32_t s, uint32_t *dst)
-{
- if (s)
- {
- uint8_t ia = 0xff - (s >> 24);
-
- if (ia)
- UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
- else
- *dst = s;
- }
-}
-
-static force_inline void
-combine_src (uint32_t s, uint32_t *dst)
-{
- *dst = s;
-}
-
-static void
-fast_composite_scaled_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
-{
- uint32_t *dst_line;
- uint32_t *src_line;
- int dst_stride, src_stride;
- int src_width, src_height;
- pixman_repeat_t src_repeat;
- pixman_fixed_t unit_x, unit_y;
- pixman_format_code_t src_format;
- pixman_vector_t v;
- pixman_fixed_t vy;
-
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space
- */
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
-
- /* reference point is the center of the pixel */
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
- v.vector[2] = pixman_fixed_1;
-
- if (!pixman_transform_point_3d (src_image->common.transform, &v))
- return;
-
- unit_x = src_image->common.transform->matrix[0][0];
- unit_y = src_image->common.transform->matrix[1][1];
-
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
- v.vector[0] -= pixman_fixed_e;
- v.vector[1] -= pixman_fixed_e;
-
- src_height = src_image->bits.height;
- src_width = src_image->bits.width;
- src_repeat = src_image->common.repeat;
- src_format = src_image->bits.format;
-
- vy = v.vector[1];
- while (height--)
- {
- pixman_fixed_t vx = v.vector[0];
- int y = pixman_fixed_to_int (vy);
- uint32_t *dst = dst_line;
-
- dst_line += dst_stride;
-
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point to source space */
- vy += unit_y;
-
- if (!repeat (src_repeat, &y, src_height))
- {
- if (op == PIXMAN_OP_SRC)
- memset (dst, 0, sizeof (*dst) * width);
- }
- else
- {
- int w = width;
-
- uint32_t *src = src_line + y * src_stride;
-
- while (w >= 2)
- {
- uint32_t s1, s2;
- int x1, x2;
-
- x1 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- x2 = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- w -= 2;
-
- s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
- s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
-
- if (op == PIXMAN_OP_OVER)
- {
- combine_over (s1, dst++);
- combine_over (s2, dst++);
- }
- else
- {
- combine_src (s1, dst++);
- combine_src (s2, dst++);
- }
- }
-
- while (w--)
- {
- uint32_t s;
- int x;
-
- x = pixman_fixed_to_int (vx);
- vx += unit_x;
-
- s = fetch_nearest (src_repeat, src_format, src, x, src_width);
-
- if (op == PIXMAN_OP_OVER)
- combine_over (s, dst++);
- else
- combine_src (s, dst++);
- }
- }
- }
-}
-
-#define CACHE_LINE_SIZE 64
-
-#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
- \
-static void \
-blt_rotated_90_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + (h - y - 1); \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_270_trivial_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int w, \
- int h) \
-{ \
- int x, y; \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = src + src_stride * (w - 1) + y; \
- pix_type *d = dst + dst_stride * y; \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s -= src_stride; \
- } \
- } \
-} \
- \
-static void \
-blt_rotated_90_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src, \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- src += leading_pixels * src_stride; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * x, \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_90_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src + W * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-blt_rotated_270_##suffix (pix_type *dst, \
- int dst_stride, \
- const pix_type *src, \
- int src_stride, \
- int W, \
- int H) \
-{ \
- int x; \
- int leading_pixels = 0, trailing_pixels = 0; \
- const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
- \
- /* \
- * split processing into handling destination as TILE_SIZExH cache line \
- * aligned vertical stripes (optimistically assuming that destination \
- * stride is a multiple of cache line, if not - it will be just a bit \
- * slower) \
- */ \
- \
- if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > W) \
- leading_pixels = W; \
- \
- /* unaligned leading part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst, \
- dst_stride, \
- src + src_stride * (W - leading_pixels), \
- src_stride, \
- leading_pixels, \
- H); \
- \
- dst += leading_pixels; \
- W -= leading_pixels; \
- } \
- \
- if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + W) & \
- (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > W) \
- trailing_pixels = W; \
- W -= trailing_pixels; \
- src += trailing_pixels * src_stride; \
- } \
- \
- for (x = 0; x < W; x += TILE_SIZE) \
- { \
- /* aligned middle part TILE_SIZExH */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + x, \
- dst_stride, \
- src + src_stride * (W - x - TILE_SIZE), \
- src_stride, \
- TILE_SIZE, \
- H); \
- } \
- \
- if (trailing_pixels) \
- { \
- /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
- blt_rotated_270_trivial_##suffix ( \
- dst + W, \
- dst_stride, \
- src - trailing_pixels * src_stride, \
- src_stride, \
- trailing_pixels, \
- H); \
- } \
-} \
- \
-static void \
-fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = -src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
- src_y_t = src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-} \
- \
-static void \
-fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- pix_type *dst_line; \
- pix_type *src_line; \
- int dst_stride, src_stride; \
- int src_x_t, src_y_t; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
- dst_stride, dst_line, 1); \
- src_x_t = src_y + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[0][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e); \
- src_y_t = -src_x + pixman_fixed_to_int ( \
- src_image->common.transform->matrix[1][2] + \
- pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
- src_stride, src_line, 1); \
- blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
- width, height); \
-}
-
-FAST_SIMPLE_ROTATE (8, uint8_t)
-FAST_SIMPLE_ROTATE (565, uint16_t)
-FAST_SIMPLE_ROTATE (8888, uint32_t)
-
-static const pixman_fast_path_t c_fast_paths[] =
-{
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
- PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
- PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
- PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
- PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
- PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
- PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
- SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
-
- SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
-
- SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
-
-#define NEAREST_FAST_PATH(op,s,d) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest, \
- }
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
-
- NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
- NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
- NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
-
-#define SIMPLE_ROTATE_FLAGS(angle) \
- (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_SAMPLES_COVER_CLIP | \
- FAST_PATH_STANDARD_FLAGS)
-
-#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_90_##suffix, \
- }, \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_rotate_270_##suffix, \
- }
-
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
- SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
- SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
-
- { PIXMAN_OP_NONE },
-};
-
-#ifdef WORDS_BIGENDIAN
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
-#else
-#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
-#endif
-
-static force_inline void
-pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
-{
- if (offs)
- {
- int leading_pixels = 32 - offs;
- if (leading_pixels >= width)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, offs);
- else
- *dst &= ~A1_FILL_MASK (width, offs);
- return;
- }
- else
- {
- if (v)
- *dst++ |= A1_FILL_MASK (leading_pixels, offs);
- else
- *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
- width -= leading_pixels;
- }
- }
- while (width >= 32)
- {
- if (v)
- *dst++ = 0xFFFFFFFF;
- else
- *dst++ = 0;
- width -= 32;
- }
- if (width > 0)
- {
- if (v)
- *dst |= A1_FILL_MASK (width, 0);
- else
- *dst &= ~A1_FILL_MASK (width, 0);
- }
-}
-
-static void
-pixman_fill1 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- uint32_t *dst = bits + y * stride + (x >> 5);
- int offs = x & 31;
-
- if (xor & 1)
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 1);
- dst += stride;
- }
- }
- else
- {
- while (height--)
- {
- pixman_fill1_line (dst, offs, width, 0);
- dst += stride;
- }
- }
-}
-
-static void
-pixman_fill8 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int byte_stride = stride * (int) sizeof (uint32_t);
- uint8_t *dst = (uint8_t *) bits;
- uint8_t v = xor & 0xff;
- int i;
-
- dst = dst + y * byte_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += byte_stride;
- }
-}
-
-static void
-pixman_fill16 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int short_stride =
- (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
- uint16_t *dst = (uint16_t *)bits;
- uint16_t v = xor & 0xffff;
- int i;
-
- dst = dst + y * short_stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += short_stride;
- }
-}
-
-static void
-pixman_fill32 (uint32_t *bits,
- int stride,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- int i;
-
- bits = bits + y * stride + x;
-
- while (height--)
- {
- for (i = 0; i < width; ++i)
- bits[i] = xor;
-
- bits += stride;
- }
-}
-
-static pixman_bool_t
-fast_path_fill (pixman_implementation_t *imp,
- uint32_t * bits,
- int stride,
- int bpp,
- int x,
- int y,
- int width,
- int height,
- uint32_t xor)
-{
- switch (bpp)
- {
- case 1:
- pixman_fill1 (bits, stride, x, y, width, height, xor);
- break;
-
- case 8:
- pixman_fill8 (bits, stride, x, y, width, height, xor);
- break;
-
- case 16:
- pixman_fill16 (bits, stride, x, y, width, height, xor);
- break;
-
- case 32:
- pixman_fill32 (bits, stride, x, y, width, height, xor);
- break;
-
- default:
- return _pixman_implementation_fill (
- imp->delegate, bits, stride, bpp, x, y, width, height, xor);
- break;
- }
-
- return TRUE;
-}
-
-pixman_implementation_t *
-_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
-{
- pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
-
- imp->fill = fast_path_fill;
-
- return imp;
-}
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <string.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
+
+static force_inline uint32_t
+fetch_24 (uint8_t *a)
+{
+ if (((unsigned long)a) & 1)
+ {
+#ifdef WORDS_BIGENDIAN
+ return (*a << 16) | (*(uint16_t *)(a + 1));
+#else
+ return *a | (*(uint16_t *)(a + 1) << 8);
+#endif
+ }
+ else
+ {
+#ifdef WORDS_BIGENDIAN
+ return (*(uint16_t *)a << 8) | *(a + 2);
+#else
+ return *(uint16_t *)a | (*(a + 2) << 16);
+#endif
+ }
+}
+
+static force_inline void
+store_24 (uint8_t *a,
+ uint32_t v)
+{
+ if (((unsigned long)a) & 1)
+ {
+#ifdef WORDS_BIGENDIAN
+ *a = (uint8_t) (v >> 16);
+ *(uint16_t *)(a + 1) = (uint16_t) (v);
+#else
+ *a = (uint8_t) (v);
+ *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
+#endif
+ }
+ else
+ {
+#ifdef WORDS_BIGENDIAN
+ *(uint16_t *)a = (uint16_t)(v >> 8);
+ *(a + 2) = (uint8_t)v;
+#else
+ *(uint16_t *)a = (uint16_t)v;
+ *(a + 2) = (uint8_t)(v >> 16);
+#endif
+ }
+}
+
+static force_inline uint32_t
+over (uint32_t src,
+ uint32_t dest)
+{
+ uint32_t a = ~src >> 24;
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
+
+ return dest;
+}
+
+static uint32_t
+in (uint32_t x,
+ uint8_t y)
+{
+ uint16_t a = y;
+
+ UN8x4_MUL_UN8 (x, a);
+
+ return x;
+}
+
+/*
+ * Naming convention:
+ *
+ * op_src_mask_dest
+ */
+static void
+fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *src, *src_line;
+ uint32_t *dst, *dst_line;
+ uint8_t *mask, *mask_line;
+ int src_stride, mask_stride, dst_stride;
+ uint8_t m;
+ uint32_t s, d;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ src = src_line;
+ src_line += src_stride;
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ w = width;
+ while (w--)
+ {
+ m = *mask++;
+ if (m)
+ {
+ s = *src | 0xff000000;
+
+ if (m == 0xff)
+ {
+ *dst = s;
+ }
+ else
+ {
+ d = in (s, m);
+ *dst = over (d, *dst);
+ }
+ }
+ src++;
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_in_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dest_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint16_t t;
+
+ src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+ srca = src >> 24;
+
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ if (srca == 0xff)
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+
+ if (m == 0)
+ *dst = 0;
+ else if (m != 0xff)
+ *dst = MUL_UN8 (m, *dst, t);
+
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ m = MUL_UN8 (m, srca, t);
+
+ if (m == 0)
+ *dst = 0;
+ else if (m != 0xff)
+ *dst = MUL_UN8 (m, *dst, t);
+
+ dst++;
+ }
+ }
+ }
+}
+
+static void
+fast_composite_in_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dest_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint8_t s;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+
+ if (s == 0)
+ *dst = 0;
+ else if (s != 0xff)
+ *dst = MUL_UN8 (s, *dst, t);
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst, d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ *dst = src;
+ else
+ *dst = over (src, *dst);
+ }
+ else if (m)
+ {
+ d = in (src, m);
+ *dst = over (d, *dst);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, s;
+ uint32_t *dst_line, *dst, d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+
+ if (ma)
+ {
+ d = *dst;
+ s = src;
+
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
+
+ *dst = s;
+ }
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca, s;
+ uint32_t *dst_line, *dst, d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+ if (ma == 0xffffffff)
+ {
+ if (srca == 0xff)
+ *dst = src;
+ else
+ *dst = over (src, *dst);
+ }
+ else if (ma)
+ {
+ d = *dst;
+ s = src;
+
+ UN8x4_MUL_UN8x4 (s, ma);
+ UN8x4_MUL_UN8 (ma, srca);
+ ma = ~ma;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+
+ *dst = d;
+ }
+
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint8_t *dst_line, *dst;
+ uint32_t d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ d = src;
+ }
+ else
+ {
+ d = fetch_24 (dst);
+ d = over (src, d);
+ }
+ store_24 (dst, d);
+ }
+ else if (m)
+ {
+ d = over (in (src, m), fetch_24 (dst));
+ store_24 (dst, d);
+ }
+ dst += 3;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint8_t *mask_line, *mask, m;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ d = src;
+ }
+ else
+ {
+ d = *dst;
+ d = over (src, CONVERT_0565_TO_0888 (d));
+ }
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ else if (m)
+ {
+ d = *dst;
+ d = over (in (src, m), CONVERT_0565_TO_0888 (d));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca, s;
+ uint16_t src16;
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *mask_line, *mask, ma;
+ int dst_stride, mask_stride;
+ int32_t w;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ src16 = CONVERT_8888_TO_0565 (src);
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ ma = *mask++;
+ if (ma == 0xffffffff)
+ {
+ if (srca == 0xff)
+ {
+ *dst = src16;
+ }
+ else
+ {
+ d = *dst;
+ d = over (src, CONVERT_0565_TO_0888 (d));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ }
+ else if (ma)
+ {
+ d = *dst;
+ d = CONVERT_0565_TO_0888 (d);
+
+ s = src;
+
+ UN8x4_MUL_UN8x4 (s, ma);
+ UN8x4_MUL_UN8 (ma, srca);
+ ma = ~ma;
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
+
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_over_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ uint8_t a;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (a == 0xff)
+ *dst = s;
+ else if (s)
+ *dst = over (s, *dst);
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_src_x888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ *dst++ = (*src++) | 0xff000000;
+ }
+}
+
+#if 0
+static void
+fast_composite_over_8888_0888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *src_line, *src, s;
+ uint8_t a;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (a)
+ {
+ if (a == 0xff)
+ d = s;
+ else
+ d = over (s, fetch_24 (dst));
+
+ store_24 (dst, d);
+ }
+ dst += 3;
+ }
+ }
+}
+#endif
+
+static void
+fast_composite_over_8888_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst;
+ uint32_t d;
+ uint32_t *src_line, *src, s;
+ uint8_t a;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ a = s >> 24;
+ if (s)
+ {
+ if (a == 0xff)
+ {
+ d = s;
+ }
+ else
+ {
+ d = *dst;
+ d = over (s, CONVERT_0565_TO_0888 (d));
+ }
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_src_x888_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint16_t *dst_line, *dst;
+ uint32_t *src_line, *src, s;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ *dst = CONVERT_8888_TO_0565 (s);
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint8_t s, d;
+ uint16_t t;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ if (s)
+ {
+ if (s != 0xff)
+ {
+ d = *dst;
+ t = d + s;
+ s = t | (0 - (t >> 8));
+ }
+ *dst = s;
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_8888_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+ uint32_t s, d;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ s = *src++;
+ if (s)
+ {
+ if (s != 0xffffffff)
+ {
+ d = *dst;
+ if (d)
+ UN8x4_ADD_UN8x4 (s, d);
+ }
+ *dst = s;
+ }
+ dst++;
+ }
+ }
+}
+
+static void
+fast_composite_add_n_8_8 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint8_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+ int32_t w;
+ uint32_t src;
+ uint8_t sa;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ sa = (src >> 24);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ uint16_t tmp;
+ uint16_t a;
+ uint32_t m, d;
+ uint32_t r;
+
+ a = *mask++;
+ d = *dst;
+
+ m = MUL_UN8 (sa, a, tmp);
+ r = ADD_UN8 (m, d, tmp);
+
+ *dst++ = r;
+ }
+ }
+}
+
+#ifdef WORDS_BIGENDIAN
+#define CREATE_BITMASK(n) (0x80000000 >> (n))
+#define UPDATE_BITMASK(n) ((n) >> 1)
+#else
+#define CREATE_BITMASK(n) (1 << (n))
+#define UPDATE_BITMASK(n) ((n) << 1)
+#endif
+
+#define TEST_BIT(p, n) \
+ (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
+#define SET_BIT(p, n) \
+ do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
+
+static void
+fast_composite_add_1000_1000 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line, *dst;
+ uint32_t *src_line, *src;
+ int dst_stride, src_stride;
+ int32_t w;
+
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
+ src_stride, src_line, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ src = src_line;
+ src_line += src_stride;
+ w = width;
+
+ while (w--)
+ {
+ /*
+ * TODO: improve performance by processing uint32_t data instead
+ * of individual bits
+ */
+ if (TEST_BIT (src, src_x + w))
+ SET_BIT (dst, dest_x + w);
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = over (src, *dst);
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
+static void
+fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint16_t *dst, *dst_line;
+ uint32_t *mask, *mask_line;
+ int mask_stride, dst_stride;
+ uint32_t bitcache, bitmask;
+ int32_t w;
+ uint32_t d;
+ uint16_t src565;
+
+ if (width <= 0)
+ return;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
+ dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
+ mask_stride, mask_line, 1);
+ mask_line += mask_x >> 5;
+
+ if (srca == 0xff)
+ {
+ src565 = CONVERT_8888_TO_0565 (src);
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ *dst = src565;
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (mask_x & 31);
+
+ while (w--)
+ {
+ if (bitmask == 0)
+ {
+ bitcache = *mask++;
+ bitmask = CREATE_BITMASK (0);
+ }
+ if (bitcache & bitmask)
+ {
+ d = over (src, CONVERT_0565_TO_0888 (*dst));
+ *dst = CONVERT_8888_TO_0565 (d);
+ }
+ bitmask = UPDATE_BITMASK (bitmask);
+ dst++;
+ }
+ }
+ }
+}
+
+/*
+ * Simple bitblt
+ */
+
+static void
+fast_composite_solid_fill (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ if (dst_image->bits.format == PIXMAN_a1)
+ {
+ src = src >> 31;
+ }
+ else if (dst_image->bits.format == PIXMAN_a8)
+ {
+ src = src >> 24;
+ }
+ else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
+ dst_image->bits.format == PIXMAN_b5g6r5)
+ {
+ src = CONVERT_8888_TO_0565 (src);
+ }
+
+ pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+ PIXMAN_FORMAT_BPP (dst_image->bits.format),
+ dest_x, dest_y,
+ width, height,
+ src);
+}
+
+static void
+fast_composite_src_memcpy (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
+ uint32_t n_bytes = width * bpp;
+ int dst_stride, src_stride;
+ uint8_t *dst;
+ uint8_t *src;
+
+ src_stride = src_image->bits.rowstride * 4;
+ dst_stride = dst_image->bits.rowstride * 4;
+
+ src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
+ dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
+
+ while (height--)
+ {
+ memcpy (dst, src, n_bytes);
+
+ dst += dst_stride;
+ src += src_stride;
+ }
+}
+
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
+FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
+FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
+FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
+FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
+FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
+FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
+FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
+FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
+
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ const uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
+{
+ uint16_t tmp1, tmp2, tmp3, tmp4;
+ while ((w -= 4) >= 0)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ *dst++ = tmp3;
+ *dst++ = tmp4;
+ }
+ if (w & 2)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ }
+ if (w & 1)
+ *dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, COVER)
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, NONE)
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, PAD)
+
+static force_inline uint32_t
+fetch_nearest (pixman_repeat_t src_repeat,
+ pixman_format_code_t format,
+ uint32_t *src, int x, int src_width)
+{
+ if (repeat (src_repeat, &x, src_width))
+ {
+ if (format == PIXMAN_x8r8g8b8)
+ return *(src + x) | 0xff000000;
+ else
+ return *(src + x);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+static force_inline void
+combine_over (uint32_t s, uint32_t *dst)
+{
+ if (s)
+ {
+ uint8_t ia = 0xff - (s >> 24);
+
+ if (ia)
+ UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
+ else
+ *dst = s;
+ }
+}
+
+static force_inline void
+combine_src (uint32_t s, uint32_t *dst)
+{
+ *dst = s;
+}
+
+static void
+fast_composite_scaled_nearest (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t *dst_line;
+ uint32_t *src_line;
+ int dst_stride, src_stride;
+ int src_width, src_height;
+ pixman_repeat_t src_repeat;
+ pixman_fixed_t unit_x, unit_y;
+ pixman_format_code_t src_format;
+ pixman_vector_t v;
+ pixman_fixed_t vy;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
+ * transformed from destination space to source space
+ */
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
+
+ /* reference point is the center of the pixel */
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
+ v.vector[2] = pixman_fixed_1;
+
+ if (!pixman_transform_point_3d (src_image->common.transform, &v))
+ return;
+
+ unit_x = src_image->common.transform->matrix[0][0];
+ unit_y = src_image->common.transform->matrix[1][1];
+
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+ v.vector[0] -= pixman_fixed_e;
+ v.vector[1] -= pixman_fixed_e;
+
+ src_height = src_image->bits.height;
+ src_width = src_image->bits.width;
+ src_repeat = src_image->common.repeat;
+ src_format = src_image->bits.format;
+
+ vy = v.vector[1];
+ while (height--)
+ {
+ pixman_fixed_t vx = v.vector[0];
+ int y = pixman_fixed_to_int (vy);
+ uint32_t *dst = dst_line;
+
+ dst_line += dst_stride;
+
+ /* adjust the y location by a unit vector in the y direction
+ * this is equivalent to transforming y+1 of the destination point to source space */
+ vy += unit_y;
+
+ if (!repeat (src_repeat, &y, src_height))
+ {
+ if (op == PIXMAN_OP_SRC)
+ memset (dst, 0, sizeof (*dst) * width);
+ }
+ else
+ {
+ int w = width;
+
+ uint32_t *src = src_line + y * src_stride;
+
+ while (w >= 2)
+ {
+ uint32_t s1, s2;
+ int x1, x2;
+
+ x1 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ x2 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ w -= 2;
+
+ s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
+ s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ {
+ combine_over (s1, dst++);
+ combine_over (s2, dst++);
+ }
+ else
+ {
+ combine_src (s1, dst++);
+ combine_src (s2, dst++);
+ }
+ }
+
+ while (w--)
+ {
+ uint32_t s;
+ int x;
+
+ x = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ s = fetch_nearest (src_repeat, src_format, src, x, src_width);
+
+ if (op == PIXMAN_OP_OVER)
+ combine_over (s, dst++);
+ else
+ combine_src (s, dst++);
+ }
+ }
+ }
+}
+
+#define CACHE_LINE_SIZE 64
+
+#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
+ \
+static void \
+blt_rotated_90_trivial_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int w, \
+ int h) \
+{ \
+ int x, y; \
+ for (y = 0; y < h; y++) \
+ { \
+ const pix_type *s = src + (h - y - 1); \
+ pix_type *d = dst + dst_stride * y; \
+ for (x = 0; x < w; x++) \
+ { \
+ *d++ = *s; \
+ s += src_stride; \
+ } \
+ } \
+} \
+ \
+static void \
+blt_rotated_270_trivial_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int w, \
+ int h) \
+{ \
+ int x, y; \
+ for (y = 0; y < h; y++) \
+ { \
+ const pix_type *s = src + src_stride * (w - 1) + y; \
+ pix_type *d = dst + dst_stride * y; \
+ for (x = 0; x < w; x++) \
+ { \
+ *d++ = *s; \
+ s -= src_stride; \
+ } \
+ } \
+} \
+ \
+static void \
+blt_rotated_90_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int W, \
+ int H) \
+{ \
+ int x; \
+ int leading_pixels = 0, trailing_pixels = 0; \
+ const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
+ \
+ /* \
+ * split processing into handling destination as TILE_SIZExH cache line \
+ * aligned vertical stripes (optimistically assuming that destination \
+ * stride is a multiple of cache line, if not - it will be just a bit \
+ * slower) \
+ */ \
+ \
+ if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
+ { \
+ leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > W) \
+ leading_pixels = W; \
+ \
+ /* unaligned leading part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst, \
+ dst_stride, \
+ src, \
+ src_stride, \
+ leading_pixels, \
+ H); \
+ \
+ dst += leading_pixels; \
+ src += leading_pixels * src_stride; \
+ W -= leading_pixels; \
+ } \
+ \
+ if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
+ { \
+ trailing_pixels = (((uintptr_t)(dst + W) & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > W) \
+ trailing_pixels = W; \
+ W -= trailing_pixels; \
+ } \
+ \
+ for (x = 0; x < W; x += TILE_SIZE) \
+ { \
+ /* aligned middle part TILE_SIZExH */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst + x, \
+ dst_stride, \
+ src + src_stride * x, \
+ src_stride, \
+ TILE_SIZE, \
+ H); \
+ } \
+ \
+ if (trailing_pixels) \
+ { \
+ /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_90_trivial_##suffix ( \
+ dst + W, \
+ dst_stride, \
+ src + W * src_stride, \
+ src_stride, \
+ trailing_pixels, \
+ H); \
+ } \
+} \
+ \
+static void \
+blt_rotated_270_##suffix (pix_type *dst, \
+ int dst_stride, \
+ const pix_type *src, \
+ int src_stride, \
+ int W, \
+ int H) \
+{ \
+ int x; \
+ int leading_pixels = 0, trailing_pixels = 0; \
+ const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
+ \
+ /* \
+ * split processing into handling destination as TILE_SIZExH cache line \
+ * aligned vertical stripes (optimistically assuming that destination \
+ * stride is a multiple of cache line, if not - it will be just a bit \
+ * slower) \
+ */ \
+ \
+ if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
+ { \
+ leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > W) \
+ leading_pixels = W; \
+ \
+ /* unaligned leading part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst, \
+ dst_stride, \
+ src + src_stride * (W - leading_pixels), \
+ src_stride, \
+ leading_pixels, \
+ H); \
+ \
+ dst += leading_pixels; \
+ W -= leading_pixels; \
+ } \
+ \
+ if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
+ { \
+ trailing_pixels = (((uintptr_t)(dst + W) & \
+ (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > W) \
+ trailing_pixels = W; \
+ W -= trailing_pixels; \
+ src += trailing_pixels * src_stride; \
+ } \
+ \
+ for (x = 0; x < W; x += TILE_SIZE) \
+ { \
+ /* aligned middle part TILE_SIZExH */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst + x, \
+ dst_stride, \
+ src + src_stride * (W - x - TILE_SIZE), \
+ src_stride, \
+ TILE_SIZE, \
+ H); \
+ } \
+ \
+ if (trailing_pixels) \
+ { \
+ /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
+ blt_rotated_270_trivial_##suffix ( \
+ dst + W, \
+ dst_stride, \
+ src - trailing_pixels * src_stride, \
+ src_stride, \
+ trailing_pixels, \
+ H); \
+ } \
+} \
+ \
+static void \
+fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ pix_type *dst_line; \
+ pix_type *src_line; \
+ int dst_stride, src_stride; \
+ int src_x_t, src_y_t; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
+ dst_stride, dst_line, 1); \
+ src_x_t = -src_y + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[0][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+ src_y_t = src_x + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[1][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
+ src_stride, src_line, 1); \
+ blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
+ width, height); \
+} \
+ \
+static void \
+fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ pix_type *dst_line; \
+ pix_type *src_line; \
+ int dst_stride, src_stride; \
+ int src_x_t, src_y_t; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \
+ dst_stride, dst_line, 1); \
+ src_x_t = src_y + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[0][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e); \
+ src_y_t = -src_x + pixman_fixed_to_int ( \
+ src_image->common.transform->matrix[1][2] + \
+ pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
+ src_stride, src_line, 1); \
+ blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
+ width, height); \
+}
+
+FAST_SIMPLE_ROTATE (8, uint8_t)
+FAST_SIMPLE_ROTATE (565, uint16_t)
+FAST_SIMPLE_ROTATE (8888, uint32_t)
+
+static const pixman_fast_path_t c_fast_paths[] =
+{
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
+ PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
+ PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
+ PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
+ PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
+ PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
+
+#define NEAREST_FAST_PATH(op,s,d) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest, \
+ }
+
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
+
+ NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
+ NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
+ NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
+
+#define SIMPLE_ROTATE_FLAGS(angle) \
+ (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_SAMPLES_COVER_CLIP | \
+ FAST_PATH_STANDARD_FLAGS)
+
+#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_rotate_90_##suffix, \
+ }, \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_rotate_270_##suffix, \
+ }
+
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
+ SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
+ SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+
+ { PIXMAN_OP_NONE },
+};
+
+#ifdef WORDS_BIGENDIAN
+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n)))
+#else
+#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs))
+#endif
+
+static force_inline void
+pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
+{
+ if (offs)
+ {
+ int leading_pixels = 32 - offs;
+ if (leading_pixels >= width)
+ {
+ if (v)
+ *dst |= A1_FILL_MASK (width, offs);
+ else
+ *dst &= ~A1_FILL_MASK (width, offs);
+ return;
+ }
+ else
+ {
+ if (v)
+ *dst++ |= A1_FILL_MASK (leading_pixels, offs);
+ else
+ *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
+ width -= leading_pixels;
+ }
+ }
+ while (width >= 32)
+ {
+ if (v)
+ *dst++ = 0xFFFFFFFF;
+ else
+ *dst++ = 0;
+ width -= 32;
+ }
+ if (width > 0)
+ {
+ if (v)
+ *dst |= A1_FILL_MASK (width, 0);
+ else
+ *dst &= ~A1_FILL_MASK (width, 0);
+ }
+}
+
+static void
+pixman_fill1 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ uint32_t *dst = bits + y * stride + (x >> 5);
+ int offs = x & 31;
+
+ if (xor & 1)
+ {
+ while (height--)
+ {
+ pixman_fill1_line (dst, offs, width, 1);
+ dst += stride;
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ pixman_fill1_line (dst, offs, width, 0);
+ dst += stride;
+ }
+ }
+}
+
+static void
+pixman_fill8 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int byte_stride = stride * (int) sizeof (uint32_t);
+ uint8_t *dst = (uint8_t *) bits;
+ uint8_t v = xor & 0xff;
+ int i;
+
+ dst = dst + y * byte_stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ dst[i] = v;
+
+ dst += byte_stride;
+ }
+}
+
+static void
+pixman_fill16 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int short_stride =
+ (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+ uint16_t *dst = (uint16_t *)bits;
+ uint16_t v = xor & 0xffff;
+ int i;
+
+ dst = dst + y * short_stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ dst[i] = v;
+
+ dst += short_stride;
+ }
+}
+
+static void
+pixman_fill32 (uint32_t *bits,
+ int stride,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ int i;
+
+ bits = bits + y * stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ bits[i] = xor;
+
+ bits += stride;
+ }
+}
+
+static pixman_bool_t
+fast_path_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ switch (bpp)
+ {
+ case 1:
+ pixman_fill1 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 8:
+ pixman_fill8 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 16:
+ pixman_fill16 (bits, stride, x, y, width, height, xor);
+ break;
+
+ case 32:
+ pixman_fill32 (bits, stride, x, y, width, height, xor);
+ break;
+
+ default:
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ break;
+ }
+
+ return TRUE;
+}
+
+pixman_implementation_t *
+_pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
+
+ imp->fill = fast_path_fill;
+
+ return imp;
+}
diff --git a/pixman/pixman/pixman-fast-path.h b/pixman/pixman/pixman-fast-path.h
index bb7032d86..d08122293 100644
--- a/pixman/pixman/pixman-fast-path.h
+++ b/pixman/pixman/pixman-fast-path.h
@@ -1,449 +1,590 @@
-/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission. SuSE makes no representations about the
- * suitability of this software for any purpose. It is provided "as is"
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: Keith Packard, SuSE, Inc.
- */
-
-#ifndef PIXMAN_FAST_PATH_H__
-#define PIXMAN_FAST_PATH_H__
-
-#include "pixman-private.h"
-
-#define PIXMAN_REPEAT_COVER -1
-
-static force_inline pixman_bool_t
-repeat (pixman_repeat_t repeat, int *c, int size)
-{
- if (repeat == PIXMAN_REPEAT_NONE)
- {
- if (*c < 0 || *c >= size)
- return FALSE;
- }
- else if (repeat == PIXMAN_REPEAT_NORMAL)
- {
- while (*c >= size)
- *c -= size;
- while (*c < 0)
- *c += size;
- }
- else if (repeat == PIXMAN_REPEAT_PAD)
- {
- *c = CLIP (*c, 0, size - 1);
- }
- else /* REFLECT */
- {
- *c = MOD (*c, size * 2);
- if (*c >= size)
- *c = size * 2 - *c - 1;
- }
- return TRUE;
-}
-
-/*
- * For each scanline fetched from source image with PAD repeat:
- * - calculate how many pixels need to be padded on the left side
- * - calculate how many pixels need to be padded on the right side
- * - update width to only count pixels which are fetched from the image
- * All this information is returned via 'width', 'left_pad', 'right_pad'
- * arguments. The code is assuming that 'unit_x' is positive.
- *
- * Note: 64-bit math is used in order to avoid potential overflows, which
- * is probably excessive in many cases. This particular function
- * may need its own correctness test and performance tuning.
- */
-static force_inline void
-pad_repeat_get_scanline_bounds (int32_t source_image_width,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- int32_t * width,
- int32_t * left_pad,
- int32_t * right_pad)
-{
- int64_t max_vx = (int64_t) source_image_width << 16;
- int64_t tmp;
- if (vx < 0)
- {
- tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
- if (tmp > *width)
- {
- *left_pad = *width;
- *width = 0;
- }
- else
- {
- *left_pad = (int32_t) tmp;
- *width -= (int32_t) tmp;
- }
- }
- else
- {
- *left_pad = 0;
- }
- tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
- if (tmp < 0)
- {
- *right_pad = *width;
- *width = 0;
- }
- else if (tmp >= *width)
- {
- *right_pad = 0;
- }
- else
- {
- *right_pad = *width - (int32_t) tmp;
- *width = (int32_t) tmp;
- }
-}
-
-/* A macroified version of specialized nearest scalers for some
- * common 8888 and 565 formats. It supports SRC and OVER ops.
- *
- * There are two repeat versions, one that handles repeat normal,
- * and one without repeat handling that only works if the src region
- * used is completely covered by the pre-repeated source samples.
- *
- * The loops are unrolled to process two pixels per iteration for better
- * performance on most CPU architectures (superscalar processors
- * can issue several operations simultaneously, other processors can hide
- * instructions latencies by pipelining operations). Unrolling more
- * does not make much sense because the compiler will start running out
- * of spare registers soon.
- */
-
-#define GET_8888_ALPHA(s) ((s) >> 24)
- /* This is not actually used since we don't have an OVER with
- 565 source, but it is needed to build. */
-#define GET_0565_ALPHA(s) 0xff
-
-#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
-static force_inline void \
-scanline_func_name (dst_type_t *dst, \
- src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
-{ \
- uint32_t d; \
- src_type_t s1, s2; \
- uint8_t a1, a2; \
- int x1, x2; \
- \
- if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
- abort(); \
- \
- while ((w -= 2) >= 0) \
- { \
- x1 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s1 = src[x1]; \
- \
- x2 = vx >> 16; \
- vx += unit_x; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* This works because we know that unit_x is positive */ \
- while (vx >= max_vx) \
- vx -= max_vx; \
- } \
- s2 = src[x2]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- \
- if (a2 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- else if (s2) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
- a2 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
- } \
- } \
- \
- if (w & 1) \
- { \
- x1 = vx >> 16; \
- s1 = src[x1]; \
- \
- if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
- { \
- a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
- \
- if (a1 == 0xff) \
- { \
- *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- else if (s1) \
- { \
- d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
- s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
- a1 ^= 0xff; \
- UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
- *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
- } \
- dst++; \
- } \
- else /* PIXMAN_OP_SRC */ \
- { \
- *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
- } \
- } \
-}
-
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
-static void \
-fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dst_x, \
- int32_t dst_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type_t *dst_line; \
- src_type_t *src_first_line; \
- int y; \
- pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
- pixman_fixed_t max_vy; \
- pixman_vector_t v; \
- pixman_fixed_t vx, vy; \
- pixman_fixed_t unit_x, unit_y; \
- int32_t left_pad, right_pad; \
- \
- src_type_t *src; \
- dst_type_t *dst; \
- int src_stride, dst_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
- /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
- * transformed from destination space to source space */ \
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
- \
- /* reference point is the center of the pixel */ \
- v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
- v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
- v.vector[2] = pixman_fixed_1; \
- \
- if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
- return; \
- \
- unit_x = src_image->common.transform->matrix[0][0]; \
- unit_y = src_image->common.transform->matrix[1][1]; \
- \
- /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
- v.vector[0] -= pixman_fixed_e; \
- v.vector[1] -= pixman_fixed_e; \
- \
- vx = v.vector[0]; \
- vy = v.vector[1]; \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- { \
- /* Clamp repeating positions inside the actual samples */ \
- max_vx = src_image->bits.width << 16; \
- max_vy = src_image->bits.height << 16; \
- \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- } \
- \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
- PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
- &width, &left_pad, &right_pad); \
- vx += left_pad * unit_x; \
- } \
- \
- while (--height >= 0) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- \
- y = vy >> 16; \
- vy += unit_y; \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
- repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
- if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
- { \
- repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (dst, src, left_pad, 0, 0, 0); \
- } \
- if (width > 0) \
- { \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0); \
- } \
- } \
- else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
- { \
- static src_type_t zero[1] = { 0 }; \
- if (y < 0 || y >= src_image->bits.height) \
- { \
- scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0); \
- continue; \
- } \
- src = src_first_line + src_stride * y; \
- if (left_pad > 0) \
- { \
- scanline_func (dst, zero, left_pad, 0, 0, 0); \
- } \
- if (width > 0) \
- { \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
- } \
- if (right_pad > 0) \
- { \
- scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0); \
- } \
- } \
- else \
- { \
- src = src_first_line + src_stride * y; \
- scanline_func (dst, src, width, vx, unit_x, max_vx); \
- } \
- } \
-}
-
-/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
-
-#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
- src_type_t, dst_type_t, OP, repeat_mode) \
- FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
- OP, repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \
- scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
- src_type_t, dst_type_t, repeat_mode)
-
-
-#define SCALED_NEAREST_FLAGS \
- (FAST_PATH_SCALE_TRANSFORM | \
- FAST_PATH_NO_ALPHA_MAP | \
- FAST_PATH_NEAREST_FILTER | \
- FAST_PATH_NO_ACCESSORS | \
- FAST_PATH_NARROW_FORMAT)
-
-#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NORMAL_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_PAD_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- (SCALED_NEAREST_FLAGS | \
- FAST_PATH_NONE_REPEAT | \
- FAST_PATH_X_UNIT_POSITIVE), \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
- }
-
-#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
- { PIXMAN_OP_ ## op, \
- PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
- PIXMAN_null, 0, \
- PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
- fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
- }
-
-/* Prefer the use of 'cover' variant, because it is faster */
-#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
-
-#endif
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. SuSE makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author: Keith Packard, SuSE, Inc.
+ */
+
+#ifndef PIXMAN_FAST_PATH_H__
+#define PIXMAN_FAST_PATH_H__
+
+#include "pixman-private.h"
+
+#define PIXMAN_REPEAT_COVER -1
+
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
+/*
+ * For each scanline fetched from source image with PAD repeat:
+ * - calculate how many pixels need to be padded on the left side
+ * - calculate how many pixels need to be padded on the right side
+ * - update width to only count pixels which are fetched from the image
+ * All this information is returned via 'width', 'left_pad', 'right_pad'
+ * arguments. The code is assuming that 'unit_x' is positive.
+ *
+ * Note: 64-bit math is used in order to avoid potential overflows, which
+ * is probably excessive in many cases. This particular function
+ * may need its own correctness test and performance tuning.
+ */
+static force_inline void
+pad_repeat_get_scanline_bounds (int32_t source_image_width,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ int32_t * width,
+ int32_t * left_pad,
+ int32_t * right_pad)
+{
+ int64_t max_vx = (int64_t) source_image_width << 16;
+ int64_t tmp;
+ if (vx < 0)
+ {
+ tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
+ if (tmp > *width)
+ {
+ *left_pad = *width;
+ *width = 0;
+ }
+ else
+ {
+ *left_pad = (int32_t) tmp;
+ *width -= (int32_t) tmp;
+ }
+ }
+ else
+ {
+ *left_pad = 0;
+ }
+ tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
+ if (tmp < 0)
+ {
+ *right_pad = *width;
+ *width = 0;
+ }
+ else if (tmp >= *width)
+ {
+ *right_pad = 0;
+ }
+ else
+ {
+ *right_pad = *width - (int32_t) tmp;
+ *width = (int32_t) tmp;
+ }
+}
+
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+static force_inline void \
+scanline_func_name (dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
+{ \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int x1, x2; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
+ return; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+}
+
+#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+static void \
+fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ mask_type_t *mask_line; \
+ src_type_t *src_first_line; \
+ int y; \
+ pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
+ pixman_fixed_t max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ int32_t left_pad, right_pad; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ mask_type_t solid_mask; \
+ const mask_type_t *mask = &solid_mask; \
+ int src_stride, mask_stride, dst_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ if (have_mask) \
+ { \
+ if (mask_is_solid) \
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
+ else \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
+ mask_stride, mask_line, 1); \
+ } \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
+ PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
+ &width, &left_pad, &right_pad); \
+ vx += left_pad * unit_x; \
+ } \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ if (have_mask && !mask_is_solid) \
+ { \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ } \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
+ { \
+ repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, src + src_image->bits.width - 1, \
+ right_pad, 0, 0, 0, FALSE); \
+ } \
+ } \
+ else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
+ { \
+ static const src_type_t zero[1] = { 0 }; \
+ if (y < 0 || y >= src_image->bits.height) \
+ { \
+ scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
+ continue; \
+ } \
+ src = src_first_line + src_stride * y; \
+ if (left_pad > 0) \
+ { \
+ scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
+ } \
+ if (width > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
+ } \
+ if (right_pad > 0) \
+ { \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
+ } \
+ } \
+ else \
+ { \
+ src = src_first_line + src_stride * y; \
+ scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
+ } \
+ } \
+}
+
+/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
+#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid)
+
+#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+ static force_inline void \
+ scanline_func##scale_func_name##_wrapper ( \
+ const uint8_t *mask, \
+ dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
+ { \
+ scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
+ } \
+ FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
+ src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ repeat_mode) \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
+ dst_type_t, repeat_mode)
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, repeat_mode) \
+ FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
+ OP, repeat_mode) \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
+ scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
+ src_type_t, dst_type_t, repeat_mode)
+
+
+#define SCALED_NEAREST_FLAGS \
+ (FAST_PATH_SCALE_TRANSFORM | \
+ FAST_PATH_NO_ALPHA_MAP | \
+ FAST_PATH_NEAREST_FILTER | \
+ FAST_PATH_NO_ACCESSORS | \
+ FAST_PATH_NARROW_FORMAT)
+
+#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+/* Prefer the use of 'cover' variant, because it is faster */
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#endif
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 91adc0560..2e135e2fe 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -5800,7 +5800,8 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
{
uint32_t s, d;
const uint32_t* pm = NULL;
@@ -5809,6 +5810,9 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
+ if (fully_transparent_src)
+ return;
+
/* Align dst on a 16-byte boundary */
while (w && ((unsigned long)pd & 15))
{
@@ -5894,6 +5898,119 @@ FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, PAD)
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ __m128i xmm_mask;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ xmm_mask = create_mask_16_128 (*mask >> 24);
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m64 ms = unpack_32_1x64 (s);
+ __m64 alpha = expand_alpha_1x64 (ms);
+ __m64 dest = _mm_movepi64_pi64 (xmm_mask);
+ __m64 alpha_dst = unpack_32_1x64 (d);
+
+ *dst = pack_1x64_32 (
+ in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ if (!is_zero (xmm_src))
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m64 ms = unpack_32_1x64 (s);
+ __m64 alpha = expand_alpha_1x64 (ms);
+ __m64 mask = _mm_movepi64_pi64 (xmm_mask);
+ __m64 dest = unpack_32_1x64 (d);
+
+ *dst = pack_1x64_32 (
+ in_over_1x64 (&ms, &alpha, &mask, &dest));
+ }
+
+ dst++;
+ w--;
+ }
+
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5990,6 +6107,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+
{ PIXMAN_OP_NONE },
};
diff --git a/pixman/test/Makefile.am b/pixman/test/Makefile.am
index 8d8471d1c..3ce466eec 100644
--- a/pixman/test/Makefile.am
+++ b/pixman/test/Makefile.am
@@ -1,7 +1,6 @@
AM_CFLAGS = @OPENMP_CFLAGS@
-AM_LDFLAGS = @OPENMP_CFLAGS@
-
-TEST_LDADD = $(top_builddir)/pixman/libpixman-1.la -lm
+AM_LDFLAGS = @OPENMP_CFLAGS@ @TESTPROGS_EXTRA_LDFLAGS@
+LDADD = $(top_builddir)/pixman/libpixman-1.la -lm
INCLUDES = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman
TESTPROGRAMS = \
@@ -22,121 +21,24 @@ TESTPROGRAMS = \
affine-test \
composite
-a1_trap_test_LDADD = $(TEST_LDADD)
-a1_trap_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-fetch_test_LDADD = $(TEST_LDADD)
-fetch_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-trap_crasher_LDADD = $(TEST_LDADD)
-trap_crasher_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-oob_test_LDADD = $(TEST_LDADD)
-oob_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-scaling_crash_test_LDADD = $(TEST_LDADD)
-scaling_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-region_translate_test_LDADD = $(TEST_LDADD)
-region_translate_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
-
-pdf_op_test_LDADD = $(TEST_LDADD)
-pdf_op_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
pdf_op_test_SOURCES = pdf-op-test.c utils.c utils.h
-
-region_test_LDADD = $(TEST_LDADD)
-region_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
region_test_SOURCES = region-test.c utils.c utils.h
-
-blitters_test_LDADD = $(TEST_LDADD)
-blitters_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
blitters_test_SOURCES = blitters-test.c utils.c utils.h
-
-scaling_test_LDADD = $(TEST_LDADD)
-scaling_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
scaling_test_SOURCES = scaling-test.c utils.c utils.h
-
-affine_test_LDADD = $(TEST_LDADD)
-affine_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
affine_test_SOURCES = affine-test.c utils.c utils.h
-
-alphamap_LDADD = $(TEST_LDADD)
-alphamap_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
alphamap_SOURCES = alphamap.c utils.c utils.h
-
-alpha_loop_LDADD = $(TEST_LDADD)
-alpha_loop_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
-
-composite_LDADD = $(TEST_LDADD)
-composite_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
composite_SOURCES = composite.c utils.c utils.h
-
-gradient_crash_test_LDADD = $(TEST_LDADD)
-gradient_crash_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
-
-stress_test_LDADD = $(TEST_LDADD)
-stress_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
stress_test_SOURCES = stress-test.c utils.c utils.h
-# GTK using test programs
-
-if HAVE_GTK
-
-GTK_LDADD = $(TEST_LDADD) $(GTK_LIBS)
-GTK_UTILS = gtk-utils.c gtk-utils.h
-
-TESTPROGRAMS_GTK = \
- clip-test \
- clip-in \
- composite-test \
- gradient-test \
- radial-test \
- alpha-test \
- screen-test \
- convolution-test \
- trap-test
-
-INCLUDES += $(GTK_CFLAGS)
-
-gradient_test_LDADD = $(GTK_LDADD)
-gradient_test_SOURCES = gradient-test.c $(GTK_UTILS)
-
-radial_test_LDADD = $(GTK_LDADD)
-radial_test_SOURCES = radial-test.c utils.c utils.h $(GTK_UTILS)
-
-alpha_test_LDADD = $(GTK_LDADD)
-alpha_test_SOURCES = alpha-test.c $(GTK_UTILS)
-
-composite_test_LDADD = $(GTK_LDADD)
-composite_test_SOURCES = composite-test.c $(GTK_UTILS)
-
-clip_test_LDADD = $(GTK_LDADD)
-clip_test_SOURCES = clip-test.c $(GTK_UTILS)
-
-clip_in_LDADD = $(GTK_LDADD)
-clip_in_SOURCES = clip-in.c $(GTK_UTILS)
-
-trap_test_LDADD = $(GTK_LDADD)
-trap_test_SOURCES = trap-test.c $(GTK_UTILS)
-
-screen_test_LDADD = $(GTK_LDADD)
-screen_test_SOURCES = screen-test.c $(GTK_UTILS)
-
-convolution_test_LDADD = $(GTK_LDADD)
-convolution_test_SOURCES = convolution-test.c $(GTK_UTILS)
-
-endif
-
# Benchmarks
BENCHMARKS = \
lowlevel-blt-bench
lowlevel_blt_bench_SOURCES = lowlevel-blt-bench.c utils.c utils.h
-lowlevel_blt_bench_LDADD = $(TEST_LDADD)
-noinst_PROGRAMS = $(TESTPROGRAMS) $(TESTPROGRAMS_GTK) $(BENCHMARKS)
+noinst_PROGRAMS = $(TESTPROGRAMS) $(BENCHMARKS)
TESTS = $(TESTPROGRAMS)
diff --git a/pixman/test/scaling-test.c b/pixman/test/scaling-test.c
index 7b78017a3..dbb9d39b0 100644
--- a/pixman/test/scaling-test.c
+++ b/pixman/test/scaling-test.c
@@ -1,250 +1,368 @@
-/*
- * Test program, which can detect some problems with nearest neighbour
- * and bilinear scaling in pixman. Testing is done by running lots
- * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8
- * and r5g6b5 color formats.
- *
- * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in
- * the case of test failure.
- */
-#include <assert.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "utils.h"
-
-#define MAX_SRC_WIDTH 16
-#define MAX_SRC_HEIGHT 16
-#define MAX_DST_WIDTH 16
-#define MAX_DST_HEIGHT 16
-#define MAX_STRIDE 4
-
-/*
- * Composite operation with pseudorandom images
- */
-uint32_t
-test_composite (int testnum,
- int verbose)
-{
- int i;
- pixman_image_t * src_img;
- pixman_image_t * dst_img;
- pixman_transform_t transform;
- pixman_region16_t clip;
- int src_width, src_height;
- int dst_width, dst_height;
- int src_stride, dst_stride;
- int src_x, src_y;
- int dst_x, dst_y;
- int src_bpp;
- int dst_bpp;
- int w, h;
- pixman_fixed_t scale_x = 65536, scale_y = 65536;
- pixman_fixed_t translate_x = 0, translate_y = 0;
- pixman_op_t op;
- pixman_repeat_t repeat = PIXMAN_REPEAT_NONE;
- pixman_format_code_t src_fmt, dst_fmt;
- uint32_t * srcbuf;
- uint32_t * dstbuf;
- uint32_t crc32;
- FLOAT_REGS_CORRUPTION_DETECTOR_START ();
-
- lcg_srand (testnum);
-
- src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
- dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
- op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
-
- src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
- src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
- dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
- dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
- src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
- dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
-
- if (src_stride & 3)
- src_stride += 2;
-
- if (dst_stride & 3)
- dst_stride += 2;
-
- src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
- src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
- dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
- dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
- w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
- h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
-
- srcbuf = (uint32_t *)malloc (src_stride * src_height);
- dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
-
- for (i = 0; i < src_stride * src_height; i++)
- *((uint8_t *)srcbuf + i) = lcg_rand_n (256);
-
- for (i = 0; i < dst_stride * dst_height; i++)
- *((uint8_t *)dstbuf + i) = lcg_rand_n (256);
-
- src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
- PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
-
- dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
- PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
-
- src_img = pixman_image_create_bits (
- src_fmt, src_width, src_height, srcbuf, src_stride);
-
- dst_img = pixman_image_create_bits (
- dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
-
- image_endian_swap (src_img, src_bpp * 8);
- image_endian_swap (dst_img, dst_bpp * 8);
-
- if (lcg_rand_n (8) > 0)
- {
- scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
- scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
- translate_x = lcg_rand_N (65536);
- translate_y = lcg_rand_N (65536);
- pixman_transform_init_scale (&transform, scale_x, scale_y);
- pixman_transform_translate (&transform, NULL, translate_x, translate_y);
- pixman_image_set_transform (src_img, &transform);
- }
-
- switch (lcg_rand_n (4))
- {
- case 0:
- repeat = PIXMAN_REPEAT_NONE;
- break;
-
- case 1:
- repeat = PIXMAN_REPEAT_NORMAL;
- break;
-
- case 2:
- repeat = PIXMAN_REPEAT_PAD;
- break;
-
- case 3:
- repeat = PIXMAN_REPEAT_REFLECT;
- break;
-
- default:
- break;
- }
- pixman_image_set_repeat (src_img, repeat);
-
- if (lcg_rand_n (2))
- pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
- else
- pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
-
- if (verbose)
- {
- printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
- printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
- op, scale_x, scale_y, repeat);
- printf ("translate_x=%d, translate_y=%d\n",
- translate_x, translate_y);
- printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
- src_width, src_height, dst_width, dst_height);
- printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
- src_x, src_y, dst_x, dst_y);
- printf ("w=%d, h=%d\n", w, h);
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
-
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (src_width);
- clip_boxes[i].y1 = lcg_rand_n (src_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("source clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
-
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (src_img, &clip);
- pixman_image_set_source_clipping (src_img, 1);
- pixman_region_fini (&clip);
- }
-
- if (lcg_rand_n (8) == 0)
- {
- pixman_box16_t clip_boxes[2];
- int n = lcg_rand_n (2) + 1;
- for (i = 0; i < n; i++)
- {
- clip_boxes[i].x1 = lcg_rand_n (dst_width);
- clip_boxes[i].y1 = lcg_rand_n (dst_height);
- clip_boxes[i].x2 =
- clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
- clip_boxes[i].y2 =
- clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
-
- if (verbose)
- {
- printf ("destination clip box: [%d,%d-%d,%d]\n",
- clip_boxes[i].x1, clip_boxes[i].y1,
- clip_boxes[i].x2, clip_boxes[i].y2);
- }
- }
- pixman_region_init_rects (&clip, clip_boxes, n);
- pixman_image_set_clip_region (dst_img, &clip);
- pixman_region_fini (&clip);
- }
-
- pixman_image_composite (op, src_img, NULL, dst_img,
- src_x, src_y, 0, 0, dst_x, dst_y, w, h);
-
- if (dst_fmt == PIXMAN_x8r8g8b8)
- {
- /* ignore unused part */
- for (i = 0; i < dst_stride * dst_height / 4; i++)
- dstbuf[i] &= 0xFFFFFF;
- }
-
- image_endian_swap (dst_img, dst_bpp * 8);
-
- if (verbose)
- {
- int j;
-
- for (i = 0; i < dst_height; i++)
- {
- for (j = 0; j < dst_stride; j++)
- printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
-
- printf ("\n");
- }
- }
-
- pixman_image_unref (src_img);
- pixman_image_unref (dst_img);
-
- crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
- free (srcbuf);
- free (dstbuf);
-
- FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
- return crc32;
-}
-
-int
-main (int argc, const char *argv[])
-{
- pixman_disable_out_of_bounds_workaround ();
-
- return fuzzer_test_main("scaling", 8000000, 0x7F1AB59F,
- test_composite, argc, argv);
-}
+/*
+ * Test program, which can detect some problems with nearest neighbour
+ * and bilinear scaling in pixman. Testing is done by running lots
+ * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8
+ * and r5g6b5 color formats.
+ *
+ * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in
+ * the case of test failure.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define MAX_SRC_WIDTH 48
+#define MAX_SRC_HEIGHT 8
+#define MAX_DST_WIDTH 48
+#define MAX_DST_HEIGHT 8
+#define MAX_STRIDE 4
+
+/*
+ * Composite operation with pseudorandom images
+ */
+uint32_t
+test_composite (int testnum,
+ int verbose)
+{
+ int i;
+ pixman_image_t * src_img;
+ pixman_image_t * mask_img;
+ pixman_image_t * dst_img;
+ pixman_transform_t transform;
+ pixman_region16_t clip;
+ int src_width, src_height;
+ int mask_width, mask_height;
+ int dst_width, dst_height;
+ int src_stride, mask_stride, dst_stride;
+ int src_x, src_y;
+ int mask_x, mask_y;
+ int dst_x, dst_y;
+ int src_bpp;
+ int mask_bpp = 1;
+ int dst_bpp;
+ int w, h;
+ pixman_fixed_t scale_x = 65536, scale_y = 65536;
+ pixman_fixed_t translate_x = 0, translate_y = 0;
+ pixman_fixed_t mask_scale_x = 65536, mask_scale_y = 65536;
+ pixman_fixed_t mask_translate_x = 0, mask_translate_y = 0;
+ pixman_op_t op;
+ pixman_repeat_t repeat = PIXMAN_REPEAT_NONE;
+ pixman_repeat_t mask_repeat = PIXMAN_REPEAT_NONE;
+ pixman_format_code_t src_fmt, dst_fmt;
+ uint32_t * srcbuf;
+ uint32_t * dstbuf;
+ uint32_t * maskbuf;
+ uint32_t crc32;
+ FLOAT_REGS_CORRUPTION_DETECTOR_START ();
+
+ lcg_srand (testnum);
+
+ src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+ dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+ switch (lcg_rand_n (3))
+ {
+ case 0:
+ op = PIXMAN_OP_SRC;
+ break;
+ case 1:
+ op = PIXMAN_OP_OVER;
+ break;
+ default:
+ op = PIXMAN_OP_ADD;
+ break;
+ }
+
+ src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+ src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+
+ if (lcg_rand_n (2))
+ {
+ mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+ mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+ }
+ else
+ {
+ mask_width = mask_height = 1;
+ }
+
+ dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
+ dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
+ src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+ mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp;
+ dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+
+ if (src_stride & 3)
+ src_stride += 2;
+
+ if (mask_stride & 1)
+ mask_stride += 1;
+ if (mask_stride & 2)
+ mask_stride += 2;
+
+ if (dst_stride & 3)
+ dst_stride += 2;
+
+ src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
+ src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+ mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2);
+ mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2);
+ dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
+ dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
+ w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
+ h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
+
+ srcbuf = (uint32_t *)malloc (src_stride * src_height);
+ maskbuf = (uint32_t *)malloc (mask_stride * mask_height);
+ dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
+
+ for (i = 0; i < src_stride * src_height; i++)
+ *((uint8_t *)srcbuf + i) = lcg_rand_n (256);
+
+ for (i = 0; i < mask_stride * mask_height; i++)
+ *((uint8_t *)maskbuf + i) = lcg_rand_n (256);
+
+ for (i = 0; i < dst_stride * dst_height; i++)
+ *((uint8_t *)dstbuf + i) = lcg_rand_n (256);
+
+ src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+ PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+
+ dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+ PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+
+ src_img = pixman_image_create_bits (
+ src_fmt, src_width, src_height, srcbuf, src_stride);
+
+ mask_img = pixman_image_create_bits (
+ PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride);
+
+ dst_img = pixman_image_create_bits (
+ dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+
+ image_endian_swap (src_img, src_bpp * 8);
+ image_endian_swap (dst_img, dst_bpp * 8);
+
+ if (lcg_rand_n (4) > 0)
+ {
+ scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
+ scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
+ translate_x = lcg_rand_N (65536);
+ translate_y = lcg_rand_N (65536);
+ pixman_transform_init_scale (&transform, scale_x, scale_y);
+ pixman_transform_translate (&transform, NULL, translate_x, translate_y);
+ pixman_image_set_transform (src_img, &transform);
+ }
+
+ if (lcg_rand_n (2) > 0)
+ {
+ mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
+ mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
+ mask_translate_x = lcg_rand_N (65536);
+ mask_translate_y = lcg_rand_N (65536);
+ pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y);
+ pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y);
+ pixman_image_set_transform (mask_img, &transform);
+ }
+
+ switch (lcg_rand_n (4))
+ {
+ case 0:
+ mask_repeat = PIXMAN_REPEAT_NONE;
+ break;
+
+ case 1:
+ mask_repeat = PIXMAN_REPEAT_NORMAL;
+ break;
+
+ case 2:
+ mask_repeat = PIXMAN_REPEAT_PAD;
+ break;
+
+ case 3:
+ mask_repeat = PIXMAN_REPEAT_REFLECT;
+ break;
+
+ default:
+ break;
+ }
+ pixman_image_set_repeat (mask_img, mask_repeat);
+
+ switch (lcg_rand_n (4))
+ {
+ case 0:
+ repeat = PIXMAN_REPEAT_NONE;
+ break;
+
+ case 1:
+ repeat = PIXMAN_REPEAT_NORMAL;
+ break;
+
+ case 2:
+ repeat = PIXMAN_REPEAT_PAD;
+ break;
+
+ case 3:
+ repeat = PIXMAN_REPEAT_REFLECT;
+ break;
+
+ default:
+ break;
+ }
+ pixman_image_set_repeat (src_img, repeat);
+
+ if (lcg_rand_n (2))
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+ else
+ pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+ if (lcg_rand_n (2))
+ pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+ else
+ pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+ if (verbose)
+ {
+ printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
+ printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
+ op, scale_x, scale_y, repeat);
+ printf ("translate_x=%d, translate_y=%d\n",
+ translate_x, translate_y);
+ printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
+ src_width, src_height, dst_width, dst_height);
+ printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
+ src_x, src_y, dst_x, dst_y);
+ printf ("w=%d, h=%d\n", w, h);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (src_width);
+ clip_boxes[i].y1 = lcg_rand_n (src_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("source clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (src_img, &clip);
+ pixman_image_set_source_clipping (src_img, 1);
+ pixman_region_fini (&clip);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (mask_width);
+ clip_boxes[i].y1 = lcg_rand_n (mask_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("mask clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (mask_img, &clip);
+ pixman_image_set_source_clipping (mask_img, 1);
+ pixman_region_fini (&clip);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (dst_width);
+ clip_boxes[i].y1 = lcg_rand_n (dst_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("destination clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (dst_img, &clip);
+ pixman_region_fini (&clip);
+ }
+
+ if (lcg_rand_n (2) == 0)
+ pixman_image_composite (op, src_img, NULL, dst_img,
+ src_x, src_y, 0, 0, dst_x, dst_y, w, h);
+ else
+ pixman_image_composite (op, src_img, mask_img, dst_img,
+ src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
+
+ if (dst_fmt == PIXMAN_x8r8g8b8)
+ {
+ /* ignore unused part */
+ for (i = 0; i < dst_stride * dst_height / 4; i++)
+ dstbuf[i] &= 0xFFFFFF;
+ }
+
+ image_endian_swap (dst_img, dst_bpp * 8);
+
+ if (verbose)
+ {
+ int j;
+
+ for (i = 0; i < dst_height; i++)
+ {
+ for (j = 0; j < dst_stride; j++)
+ printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
+
+ printf ("\n");
+ }
+ }
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (mask_img);
+ pixman_image_unref (dst_img);
+
+ crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
+ free (srcbuf);
+ free (maskbuf);
+ free (dstbuf);
+
+ FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
+ return crc32;
+}
+
+int
+main (int argc, const char *argv[])
+{
+ pixman_disable_out_of_bounds_workaround ();
+
+ return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2,
+ test_composite, argc, argv);
+}