From 01df5d59e56a1b060568f8cad2e89f7eea22fc70 Mon Sep 17 00:00:00 2001 From: marha Date: Mon, 29 Aug 2011 08:51:20 +0200 Subject: xwininfo libX11 libXmu libxcb mesa xserver xkeyboard-config git update 29 aug 2011 --- mesalib/src/mesa/swrast/s_aatritemp.h | 674 ++++----- mesalib/src/mesa/swrast/s_context.c | 105 +- mesalib/src/mesa/swrast/s_stencil.c | 2491 ++++++++++++++++---------------- mesalib/src/mesa/swrast/s_texcombine.c | 1506 +++++++++---------- 4 files changed, 2364 insertions(+), 2412 deletions(-) (limited to 'mesalib/src/mesa/swrast') diff --git a/mesalib/src/mesa/swrast/s_aatritemp.h b/mesalib/src/mesa/swrast/s_aatritemp.h index 4136df3a7..77b3ae6ec 100644 --- a/mesalib/src/mesa/swrast/s_aatritemp.h +++ b/mesalib/src/mesa/swrast/s_aatritemp.h @@ -1,331 +1,343 @@ -/* - * Mesa 3-D graphics library - * Version: 7.0.3 - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/* - * Antialiased Triangle Rasterizer Template - * - * This file is #include'd to generate custom AA triangle rasterizers. - * NOTE: this code hasn't been optimized yet. That'll come after it - * works correctly. - * - * The following macros may be defined to indicate what auxillary information - * must be copmuted across the triangle: - * DO_Z - if defined, compute Z values - * DO_ATTRIBS - if defined, compute texcoords, varying, etc. - */ - -/*void triangle( struct gl_context *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint pv )*/ -{ - const SWcontext *swrast = SWRAST_CONTEXT(ctx); - const GLfloat *p0 = v0->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *p1 = v1->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *p2 = v2->attrib[FRAG_ATTRIB_WPOS]; - const SWvertex *vMin, *vMid, *vMax; - GLint iyMin, iyMax; - GLfloat yMin, yMax; - GLboolean ltor; - GLfloat majDx, majDy; /* major (i.e. long) edge dx and dy */ - - SWspan span; - -#ifdef DO_Z - GLfloat zPlane[4]; -#endif - GLfloat rPlane[4], gPlane[4], bPlane[4], aPlane[4]; -#if defined(DO_ATTRIBS) - GLfloat attrPlane[FRAG_ATTRIB_MAX][4][4]; - GLfloat wPlane[4]; /* win[3] */ -#endif - GLfloat bf = SWRAST_CONTEXT(ctx)->_BackfaceCullSign; - - (void) swrast; - - INIT_SPAN(span, GL_POLYGON); - span.arrayMask = SPAN_COVERAGE; - - /* determine bottom to top order of vertices */ - { - GLfloat y0 = v0->attrib[FRAG_ATTRIB_WPOS][1]; - GLfloat y1 = v1->attrib[FRAG_ATTRIB_WPOS][1]; - GLfloat y2 = v2->attrib[FRAG_ATTRIB_WPOS][1]; - if (y0 <= y1) { - if (y1 <= y2) { - vMin = v0; vMid = v1; vMax = v2; /* y0<=y1<=y2 */ - } - else if (y2 <= y0) { - vMin = v2; vMid = v0; vMax = v1; /* y2<=y0<=y1 */ - } - else { - vMin = v0; vMid = v2; vMax = v1; bf = -bf; /* y0<=y2<=y1 */ - } - } - else { - if (y0 <= y2) { - vMin = v1; vMid = v0; vMax = v2; bf = -bf; /* y1<=y0<=y2 */ - } - else if (y2 <= y1) { - vMin = v2; vMid = v1; vMax = v0; bf = -bf; /* y2<=y1<=y0 */ - } - else { - vMin = v1; vMid = v2; vMax = v0; /* y1<=y2<=y0 */ - } - } - } - - majDx = vMax->attrib[FRAG_ATTRIB_WPOS][0] - vMin->attrib[FRAG_ATTRIB_WPOS][0]; - majDy = vMax->attrib[FRAG_ATTRIB_WPOS][1] - vMin->attrib[FRAG_ATTRIB_WPOS][1]; - - /* front/back-face determination and cullling */ - { - const GLfloat botDx = vMid->attrib[FRAG_ATTRIB_WPOS][0] - vMin->attrib[FRAG_ATTRIB_WPOS][0]; - const GLfloat botDy = vMid->attrib[FRAG_ATTRIB_WPOS][1] - vMin->attrib[FRAG_ATTRIB_WPOS][1]; - const GLfloat area = majDx * botDy - botDx * majDy; - /* Do backface culling */ - if (area * bf < 0 || area == 0 || IS_INF_OR_NAN(area)) - return; - ltor = (GLboolean) (area < 0.0F); - - span.facing = area * swrast->_BackfaceSign > 0.0F; - } - - /* Plane equation setup: - * We evaluate plane equations at window (x,y) coordinates in order - * to compute color, Z, fog, texcoords, etc. This isn't terribly - * efficient but it's easy and reliable. - */ -#ifdef DO_Z - compute_plane(p0, p1, p2, p0[2], p1[2], p2[2], zPlane); - span.arrayMask |= SPAN_Z; -#endif - if (ctx->Light.ShadeModel == GL_SMOOTH) { - compute_plane(p0, p1, p2, v0->color[RCOMP], v1->color[RCOMP], v2->color[RCOMP], rPlane); - compute_plane(p0, p1, p2, v0->color[GCOMP], v1->color[GCOMP], v2->color[GCOMP], gPlane); - compute_plane(p0, p1, p2, v0->color[BCOMP], v1->color[BCOMP], v2->color[BCOMP], bPlane); - compute_plane(p0, p1, p2, v0->color[ACOMP], v1->color[ACOMP], v2->color[ACOMP], aPlane); - } - else { - constant_plane(v2->color[RCOMP], rPlane); - constant_plane(v2->color[GCOMP], gPlane); - constant_plane(v2->color[BCOMP], bPlane); - constant_plane(v2->color[ACOMP], aPlane); - } - span.arrayMask |= SPAN_RGBA; -#if defined(DO_ATTRIBS) - { - const GLfloat invW0 = v0->attrib[FRAG_ATTRIB_WPOS][3]; - const GLfloat invW1 = v1->attrib[FRAG_ATTRIB_WPOS][3]; - const GLfloat invW2 = v2->attrib[FRAG_ATTRIB_WPOS][3]; - compute_plane(p0, p1, p2, invW0, invW1, invW2, wPlane); - span.attrStepX[FRAG_ATTRIB_WPOS][3] = plane_dx(wPlane); - span.attrStepY[FRAG_ATTRIB_WPOS][3] = plane_dy(wPlane); - ATTRIB_LOOP_BEGIN - GLuint c; - if (swrast->_InterpMode[attr] == GL_FLAT) { - for (c = 0; c < 4; c++) { - constant_plane(v2->attrib[attr][c] * invW2, attrPlane[attr][c]); - } - } - else { - for (c = 0; c < 4; c++) { - const GLfloat a0 = v0->attrib[attr][c] * invW0; - const GLfloat a1 = v1->attrib[attr][c] * invW1; - const GLfloat a2 = v2->attrib[attr][c] * invW2; - compute_plane(p0, p1, p2, a0, a1, a2, attrPlane[attr][c]); - } - } - for (c = 0; c < 4; c++) { - span.attrStepX[attr][c] = plane_dx(attrPlane[attr][c]); - span.attrStepY[attr][c] = plane_dy(attrPlane[attr][c]); - } - ATTRIB_LOOP_END - } -#endif - - /* Begin bottom-to-top scan over the triangle. - * The long edge will either be on the left or right side of the - * triangle. We always scan from the long edge toward the shorter - * edges, stopping when we find that coverage = 0. If the long edge - * is on the left we scan left-to-right. Else, we scan right-to-left. - */ - yMin = vMin->attrib[FRAG_ATTRIB_WPOS][1]; - yMax = vMax->attrib[FRAG_ATTRIB_WPOS][1]; - iyMin = (GLint) yMin; - iyMax = (GLint) yMax + 1; - - if (ltor) { - /* scan left to right */ - const GLfloat *pMin = vMin->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *pMid = vMid->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat dxdy = majDx / majDy; - const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; - GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { - GLint ix, startX = (GLint) (x - xAdj); - GLuint count; - GLfloat coverage = 0.0F; - - /* skip over fragments with zero coverage */ - while (startX < MAX_WIDTH) { - coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); - if (coverage > 0.0F) - break; - startX++; - } - - /* enter interior of triangle */ - ix = startX; - -#if defined(DO_ATTRIBS) - /* compute attributes at left-most fragment */ - span.attrStart[FRAG_ATTRIB_WPOS][3] = solve_plane(ix + 0.5F, iy + 0.5F, wPlane); - ATTRIB_LOOP_BEGIN - GLuint c; - for (c = 0; c < 4; c++) { - span.attrStart[attr][c] = solve_plane(ix + 0.5F, iy + 0.5F, attrPlane[attr][c]); - } - ATTRIB_LOOP_END -#endif - - count = 0; - while (coverage > 0.0F) { - /* (cx,cy) = center of fragment */ - const GLfloat cx = ix + 0.5F, cy = iy + 0.5F; - SWspanarrays *array = span.array; - array->coverage[count] = coverage; -#ifdef DO_Z - array->z[count] = (GLuint) solve_plane(cx, cy, zPlane); -#endif - array->rgba[count][RCOMP] = solve_plane_chan(cx, cy, rPlane); - array->rgba[count][GCOMP] = solve_plane_chan(cx, cy, gPlane); - array->rgba[count][BCOMP] = solve_plane_chan(cx, cy, bPlane); - array->rgba[count][ACOMP] = solve_plane_chan(cx, cy, aPlane); - ix++; - count++; - coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); - } - - if (ix <= startX) - continue; - - span.x = startX; - span.y = iy; - span.end = (GLuint) ix - (GLuint) startX; - _swrast_write_rgba_span(ctx, &span); - } - } - else { - /* scan right to left */ - const GLfloat *pMin = vMin->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *pMid = vMid->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; - const GLfloat dxdy = majDx / majDy; - const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; - GLfloat x = pMin[0] - (yMin - iyMin) * dxdy; - GLint iy; - for (iy = iyMin; iy < iyMax; iy++, x += dxdy) { - GLint ix, left, startX = (GLint) (x + xAdj); - GLuint count, n; - GLfloat coverage = 0.0F; - - /* make sure we're not past the window edge */ - if (startX >= ctx->DrawBuffer->_Xmax) { - startX = ctx->DrawBuffer->_Xmax - 1; - } - - /* skip fragments with zero coverage */ - while (startX > 0) { - coverage = compute_coveragef(pMin, pMax, pMid, startX, iy); - if (coverage > 0.0F) - break; - startX--; - } - - /* enter interior of triangle */ - ix = startX; - count = 0; - while (coverage > 0.0F) { - /* (cx,cy) = center of fragment */ - const GLfloat cx = ix + 0.5F, cy = iy + 0.5F; - SWspanarrays *array = span.array; - ASSERT(ix >= 0); - array->coverage[ix] = coverage; -#ifdef DO_Z - array->z[ix] = (GLuint) solve_plane(cx, cy, zPlane); -#endif - array->rgba[ix][RCOMP] = solve_plane_chan(cx, cy, rPlane); - array->rgba[ix][GCOMP] = solve_plane_chan(cx, cy, gPlane); - array->rgba[ix][BCOMP] = solve_plane_chan(cx, cy, bPlane); - array->rgba[ix][ACOMP] = solve_plane_chan(cx, cy, aPlane); - ix--; - count++; - coverage = compute_coveragef(pMin, pMax, pMid, ix, iy); - } - -#if defined(DO_ATTRIBS) - /* compute attributes at left-most fragment */ - span.attrStart[FRAG_ATTRIB_WPOS][3] = solve_plane(ix + 1.5F, iy + 0.5F, wPlane); - ATTRIB_LOOP_BEGIN - GLuint c; - for (c = 0; c < 4; c++) { - span.attrStart[attr][c] = solve_plane(ix + 1.5F, iy + 0.5F, attrPlane[attr][c]); - } - ATTRIB_LOOP_END -#endif - - if (startX <= ix) - continue; - - n = (GLuint) startX - (GLuint) ix; - - left = ix + 1; - - /* shift all values to the left */ - /* XXX this is temporary */ - { - SWspanarrays *array = span.array; - GLint j; - for (j = 0; j < (GLint) n; j++) { - array->coverage[j] = array->coverage[j + left]; - COPY_CHAN4(array->rgba[j], array->rgba[j + left]); -#ifdef DO_Z - array->z[j] = array->z[j + left]; -#endif - } - } - - span.x = left; - span.y = iy; - span.end = n; - _swrast_write_rgba_span(ctx, &span); - } - } -} - - -#undef DO_Z -#undef DO_ATTRIBS -#undef DO_OCCLUSION_TEST +/* + * Mesa 3-D graphics library + * Version: 7.0.3 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * Antialiased Triangle Rasterizer Template + * + * This file is #include'd to generate custom AA triangle rasterizers. + * NOTE: this code hasn't been optimized yet. That'll come after it + * works correctly. + * + * The following macros may be defined to indicate what auxillary information + * must be copmuted across the triangle: + * DO_Z - if defined, compute Z values + * DO_ATTRIBS - if defined, compute texcoords, varying, etc. + */ + +/*void triangle( struct gl_context *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint pv )*/ +{ + const SWcontext *swrast = SWRAST_CONTEXT(ctx); + const GLfloat *p0 = v0->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *p1 = v1->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *p2 = v2->attrib[FRAG_ATTRIB_WPOS]; + const SWvertex *vMin, *vMid, *vMax; + GLint iyMin, iyMax; + GLfloat yMin, yMax; + GLboolean ltor; + GLfloat majDx, majDy; /* major (i.e. long) edge dx and dy */ + + SWspan span; + +#ifdef DO_Z + GLfloat zPlane[4]; +#endif + GLfloat rPlane[4], gPlane[4], bPlane[4], aPlane[4]; +#if defined(DO_ATTRIBS) + GLfloat attrPlane[FRAG_ATTRIB_MAX][4][4]; + GLfloat wPlane[4]; /* win[3] */ +#endif + GLfloat bf = SWRAST_CONTEXT(ctx)->_BackfaceCullSign; + + (void) swrast; + + INIT_SPAN(span, GL_POLYGON); + span.arrayMask = SPAN_COVERAGE; + + /* determine bottom to top order of vertices */ + { + GLfloat y0 = v0->attrib[FRAG_ATTRIB_WPOS][1]; + GLfloat y1 = v1->attrib[FRAG_ATTRIB_WPOS][1]; + GLfloat y2 = v2->attrib[FRAG_ATTRIB_WPOS][1]; + if (y0 <= y1) { + if (y1 <= y2) { + vMin = v0; vMid = v1; vMax = v2; /* y0<=y1<=y2 */ + } + else if (y2 <= y0) { + vMin = v2; vMid = v0; vMax = v1; /* y2<=y0<=y1 */ + } + else { + vMin = v0; vMid = v2; vMax = v1; bf = -bf; /* y0<=y2<=y1 */ + } + } + else { + if (y0 <= y2) { + vMin = v1; vMid = v0; vMax = v2; bf = -bf; /* y1<=y0<=y2 */ + } + else if (y2 <= y1) { + vMin = v2; vMid = v1; vMax = v0; bf = -bf; /* y2<=y1<=y0 */ + } + else { + vMin = v1; vMid = v2; vMax = v0; /* y1<=y2<=y0 */ + } + } + } + + majDx = vMax->attrib[FRAG_ATTRIB_WPOS][0] - vMin->attrib[FRAG_ATTRIB_WPOS][0]; + majDy = vMax->attrib[FRAG_ATTRIB_WPOS][1] - vMin->attrib[FRAG_ATTRIB_WPOS][1]; + + /* front/back-face determination and cullling */ + { + const GLfloat botDx = vMid->attrib[FRAG_ATTRIB_WPOS][0] - vMin->attrib[FRAG_ATTRIB_WPOS][0]; + const GLfloat botDy = vMid->attrib[FRAG_ATTRIB_WPOS][1] - vMin->attrib[FRAG_ATTRIB_WPOS][1]; + const GLfloat area = majDx * botDy - botDx * majDy; + /* Do backface culling */ + if (area * bf < 0 || area == 0 || IS_INF_OR_NAN(area)) + return; + ltor = (GLboolean) (area < 0.0F); + + span.facing = area * swrast->_BackfaceSign > 0.0F; + } + + /* Plane equation setup: + * We evaluate plane equations at window (x,y) coordinates in order + * to compute color, Z, fog, texcoords, etc. This isn't terribly + * efficient but it's easy and reliable. + */ +#ifdef DO_Z + compute_plane(p0, p1, p2, p0[2], p1[2], p2[2], zPlane); + span.arrayMask |= SPAN_Z; +#endif + if (ctx->Light.ShadeModel == GL_SMOOTH) { + compute_plane(p0, p1, p2, v0->color[RCOMP], v1->color[RCOMP], v2->color[RCOMP], rPlane); + compute_plane(p0, p1, p2, v0->color[GCOMP], v1->color[GCOMP], v2->color[GCOMP], gPlane); + compute_plane(p0, p1, p2, v0->color[BCOMP], v1->color[BCOMP], v2->color[BCOMP], bPlane); + compute_plane(p0, p1, p2, v0->color[ACOMP], v1->color[ACOMP], v2->color[ACOMP], aPlane); + } + else { + constant_plane(v2->color[RCOMP], rPlane); + constant_plane(v2->color[GCOMP], gPlane); + constant_plane(v2->color[BCOMP], bPlane); + constant_plane(v2->color[ACOMP], aPlane); + } + span.arrayMask |= SPAN_RGBA; +#if defined(DO_ATTRIBS) + { + const GLfloat invW0 = v0->attrib[FRAG_ATTRIB_WPOS][3]; + const GLfloat invW1 = v1->attrib[FRAG_ATTRIB_WPOS][3]; + const GLfloat invW2 = v2->attrib[FRAG_ATTRIB_WPOS][3]; + compute_plane(p0, p1, p2, invW0, invW1, invW2, wPlane); + span.attrStepX[FRAG_ATTRIB_WPOS][3] = plane_dx(wPlane); + span.attrStepY[FRAG_ATTRIB_WPOS][3] = plane_dy(wPlane); + ATTRIB_LOOP_BEGIN + GLuint c; + if (swrast->_InterpMode[attr] == GL_FLAT) { + for (c = 0; c < 4; c++) { + constant_plane(v2->attrib[attr][c] * invW2, attrPlane[attr][c]); + } + } + else { + for (c = 0; c < 4; c++) { + const GLfloat a0 = v0->attrib[attr][c] * invW0; + const GLfloat a1 = v1->attrib[attr][c] * invW1; + const GLfloat a2 = v2->attrib[attr][c] * invW2; + compute_plane(p0, p1, p2, a0, a1, a2, attrPlane[attr][c]); + } + } + for (c = 0; c < 4; c++) { + span.attrStepX[attr][c] = plane_dx(attrPlane[attr][c]); + span.attrStepY[attr][c] = plane_dy(attrPlane[attr][c]); + } + ATTRIB_LOOP_END + } +#endif + + /* Begin bottom-to-top scan over the triangle. + * The long edge will either be on the left or right side of the + * triangle. We always scan from the long edge toward the shorter + * edges, stopping when we find that coverage = 0. If the long edge + * is on the left we scan left-to-right. Else, we scan right-to-left. + */ + yMin = vMin->attrib[FRAG_ATTRIB_WPOS][1]; + yMax = vMax->attrib[FRAG_ATTRIB_WPOS][1]; + iyMin = (GLint) yMin; + iyMax = (GLint) yMax + 1; + + if (ltor) { + /* scan left to right */ + const GLfloat *pMin = vMin->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *pMid = vMid->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat dxdy = majDx / majDy; + const GLfloat xAdj = dxdy < 0.0F ? -dxdy : 0.0F; + GLint iy; +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; + GLint ix, startX = (GLint) (x - xAdj); + GLuint count; + GLfloat coverage = 0.0F; + +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif + /* skip over fragments with zero coverage */ + while (startX < MAX_WIDTH) { + coverage = compute_coveragef(pMin, pMid, pMax, startX, iy); + if (coverage > 0.0F) + break; + startX++; + } + + /* enter interior of triangle */ + ix = startX; + +#if defined(DO_ATTRIBS) + /* compute attributes at left-most fragment */ + span.attrStart[FRAG_ATTRIB_WPOS][3] = solve_plane(ix + 0.5F, iy + 0.5F, wPlane); + ATTRIB_LOOP_BEGIN + GLuint c; + for (c = 0; c < 4; c++) { + span.attrStart[attr][c] = solve_plane(ix + 0.5F, iy + 0.5F, attrPlane[attr][c]); + } + ATTRIB_LOOP_END +#endif + + count = 0; + while (coverage > 0.0F) { + /* (cx,cy) = center of fragment */ + const GLfloat cx = ix + 0.5F, cy = iy + 0.5F; + SWspanarrays *array = span.array; + array->coverage[count] = coverage; +#ifdef DO_Z + array->z[count] = (GLuint) solve_plane(cx, cy, zPlane); +#endif + array->rgba[count][RCOMP] = solve_plane_chan(cx, cy, rPlane); + array->rgba[count][GCOMP] = solve_plane_chan(cx, cy, gPlane); + array->rgba[count][BCOMP] = solve_plane_chan(cx, cy, bPlane); + array->rgba[count][ACOMP] = solve_plane_chan(cx, cy, aPlane); + ix++; + count++; + coverage = compute_coveragef(pMin, pMid, pMax, ix, iy); + } + + if (ix > startX) { + span.x = startX; + span.y = iy; + span.end = (GLuint) ix - (GLuint) startX; + _swrast_write_rgba_span(ctx, &span); + } + } + } + else { + /* scan right to left */ + const GLfloat *pMin = vMin->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *pMid = vMid->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat *pMax = vMax->attrib[FRAG_ATTRIB_WPOS]; + const GLfloat dxdy = majDx / majDy; + const GLfloat xAdj = dxdy > 0 ? dxdy : 0.0F; + GLint iy; +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) private(iy) firstprivate(span) +#endif + for (iy = iyMin; iy < iyMax; iy++) { + GLfloat x = pMin[0] - (yMin - iy) * dxdy; + GLint ix, left, startX = (GLint) (x + xAdj); + GLuint count, n; + GLfloat coverage = 0.0F; + +#ifdef _OPENMP + /* each thread needs to use a different (global) SpanArrays variable */ + span.array = SWRAST_CONTEXT(ctx)->SpanArrays + omp_get_thread_num(); +#endif + /* make sure we're not past the window edge */ + if (startX >= ctx->DrawBuffer->_Xmax) { + startX = ctx->DrawBuffer->_Xmax - 1; + } + + /* skip fragments with zero coverage */ + while (startX > 0) { + coverage = compute_coveragef(pMin, pMax, pMid, startX, iy); + if (coverage > 0.0F) + break; + startX--; + } + + /* enter interior of triangle */ + ix = startX; + count = 0; + while (coverage > 0.0F) { + /* (cx,cy) = center of fragment */ + const GLfloat cx = ix + 0.5F, cy = iy + 0.5F; + SWspanarrays *array = span.array; + ASSERT(ix >= 0); + array->coverage[ix] = coverage; +#ifdef DO_Z + array->z[ix] = (GLuint) solve_plane(cx, cy, zPlane); +#endif + array->rgba[ix][RCOMP] = solve_plane_chan(cx, cy, rPlane); + array->rgba[ix][GCOMP] = solve_plane_chan(cx, cy, gPlane); + array->rgba[ix][BCOMP] = solve_plane_chan(cx, cy, bPlane); + array->rgba[ix][ACOMP] = solve_plane_chan(cx, cy, aPlane); + ix--; + count++; + coverage = compute_coveragef(pMin, pMax, pMid, ix, iy); + } + +#if defined(DO_ATTRIBS) + /* compute attributes at left-most fragment */ + span.attrStart[FRAG_ATTRIB_WPOS][3] = solve_plane(ix + 1.5F, iy + 0.5F, wPlane); + ATTRIB_LOOP_BEGIN + GLuint c; + for (c = 0; c < 4; c++) { + span.attrStart[attr][c] = solve_plane(ix + 1.5F, iy + 0.5F, attrPlane[attr][c]); + } + ATTRIB_LOOP_END +#endif + + if (startX > ix) { + n = (GLuint) startX - (GLuint) ix; + + left = ix + 1; + + /* shift all values to the left */ + /* XXX this is temporary */ + { + SWspanarrays *array = span.array; + GLint j; + for (j = 0; j < (GLint) n; j++) { + array->coverage[j] = array->coverage[j + left]; + COPY_CHAN4(array->rgba[j], array->rgba[j + left]); +#ifdef DO_Z + array->z[j] = array->z[j + left]; +#endif + } + } + + span.x = left; + span.y = iy; + span.end = n; + _swrast_write_rgba_span(ctx, &span); + } + } + } +} + + +#undef DO_Z +#undef DO_ATTRIBS +#undef DO_OCCLUSION_TEST diff --git a/mesalib/src/mesa/swrast/s_context.c b/mesalib/src/mesa/swrast/s_context.c index def1531d7..792b528ee 100644 --- a/mesalib/src/mesa/swrast/s_context.c +++ b/mesalib/src/mesa/swrast/s_context.c @@ -417,84 +417,6 @@ _swrast_validate_blend_func(struct gl_context *ctx, GLuint n, const GLubyte mask swrast->BlendFunc( ctx, n, mask, src, dst, chanType ); } - -/** - * Make sure we have texture image data for all the textures we may need - * for subsequent rendering. - */ -static void -_swrast_validate_texture_images(struct gl_context *ctx) -{ - SWcontext *swrast = SWRAST_CONTEXT(ctx); - GLuint u; - - if (!swrast->ValidateTextureImage || !ctx->Texture._EnabledUnits) { - /* no textures enabled, or no way to validate images! */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && !texImg->Data) { - swrast->ValidateTextureImage(ctx, texObj, face, lvl); - ASSERT(texObj->Image[face][lvl]->Data); - } - } - } - } - } - } -} - - -/** - * Free the texture image data attached to all currently enabled - * textures. Meant to be called by device drivers when transitioning - * from software to hardware rendering. - */ -void -_swrast_eject_texture_images(struct gl_context *ctx) -{ - GLuint u; - - if (!ctx->Texture._EnabledUnits) { - /* no textures enabled */ - return; - } - - for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { - if (ctx->Texture.Unit[u]._ReallyEnabled) { - struct gl_texture_object *texObj = ctx->Texture.Unit[u]._Current; - ASSERT(texObj); - if (texObj) { - GLuint numFaces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - GLuint face; - for (face = 0; face < numFaces; face++) { - GLint lvl; - for (lvl = texObj->BaseLevel; lvl <= texObj->_MaxLevel; lvl++) { - struct gl_texture_image *texImg = texObj->Image[face][lvl]; - if (texImg && texImg->Data) { - _mesa_free_texmemory(texImg->Data); - texImg->Data = NULL; - } - } - } - } - } - } -} - - - static void _swrast_sleep( struct gl_context *ctx, GLbitfield new_state ) { @@ -640,7 +562,6 @@ _swrast_validate_derived( struct gl_context *ctx ) if (swrast->NewState & (_NEW_TEXTURE | _NEW_PROGRAM)) { _swrast_update_texture_samplers( ctx ); - _swrast_validate_texture_images(ctx); } if (swrast->NewState & (_NEW_COLOR | _NEW_PROGRAM)) @@ -772,6 +693,11 @@ _swrast_CreateContext( struct gl_context *ctx ) { GLuint i; SWcontext *swrast = (SWcontext *)CALLOC(sizeof(SWcontext)); +#ifdef _OPENMP + const GLint maxThreads = omp_get_max_threads(); +#else + const GLint maxThreads = 1; +#endif if (SWRAST_DEBUG) { _mesa_debug(ctx, "_swrast_CreateContext\n"); @@ -806,19 +732,25 @@ _swrast_CreateContext( struct gl_context *ctx ) for (i = 0; i < MAX_TEXTURE_IMAGE_UNITS; i++) swrast->TextureSample[i] = NULL; - swrast->SpanArrays = MALLOC_STRUCT(sw_span_arrays); + /* SpanArrays is global and shared by all SWspan instances. However, when + * using multiple threads, it is necessary to have one SpanArrays instance + * per thread. + */ + swrast->SpanArrays = (SWspanarrays *) MALLOC(maxThreads * sizeof(SWspanarrays)); if (!swrast->SpanArrays) { FREE(swrast); return GL_FALSE; } - swrast->SpanArrays->ChanType = CHAN_TYPE; + for(i = 0; i < maxThreads; i++) { + swrast->SpanArrays[i].ChanType = CHAN_TYPE; #if CHAN_TYPE == GL_UNSIGNED_BYTE - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba8; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba8; #elif CHAN_TYPE == GL_UNSIGNED_SHORT - swrast->SpanArrays->rgba = swrast->SpanArrays->rgba16; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].rgba16; #else - swrast->SpanArrays->rgba = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0]; + swrast->SpanArrays[i].rgba = swrast->SpanArrays[i].attribs[FRAG_ATTRIB_COL0]; #endif + } /* init point span buffer */ swrast->PointSpan.primitive = GL_POINT; @@ -826,7 +758,10 @@ _swrast_CreateContext( struct gl_context *ctx ) swrast->PointSpan.facing = 0; swrast->PointSpan.array = swrast->SpanArrays; - swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * + /* TexelBuffer is also global and normally shared by all SWspan instances; + * when running with multiple threads, create one per thread. + */ + swrast->TexelBuffer = (GLfloat *) MALLOC(ctx->Const.MaxTextureImageUnits * maxThreads * MAX_WIDTH * 4 * sizeof(GLfloat)); if (!swrast->TexelBuffer) { FREE(swrast->SpanArrays); diff --git a/mesalib/src/mesa/swrast/s_stencil.c b/mesalib/src/mesa/swrast/s_stencil.c index 999fe3c3f..fa5093a34 100644 --- a/mesalib/src/mesa/swrast/s_stencil.c +++ b/mesalib/src/mesa/swrast/s_stencil.c @@ -1,1245 +1,1246 @@ -/* - * Mesa 3-D graphics library - * Version: 7.1 - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/imports.h" - -#include "s_context.h" -#include "s_depth.h" -#include "s_stencil.h" -#include "s_span.h" - - - -/* Stencil Logic: - -IF stencil test fails THEN - Apply fail-op to stencil value - Don't write the pixel (RGBA,Z) -ELSE - IF doing depth test && depth test fails THEN - Apply zfail-op to stencil value - Write RGBA and Z to appropriate buffers - ELSE - Apply zpass-op to stencil value -ENDIF - -*/ - - -/** - * Apply the given stencil operator to the array of stencil values. - * Don't touch stencil[i] if mask[i] is zero. - * Input: n - size of stencil array - * oper - the stencil buffer operator - * face - 0 or 1 for front or back face operation - * stencil - array of stencil values - * mask - array [n] of flag: 1=apply operator, 0=don't apply operator - * Output: stencil - modified values - */ -static void -apply_stencil_op( const struct gl_context *ctx, GLenum oper, GLuint face, - GLuint n, GLstencil stencil[], const GLubyte mask[] ) -{ - const GLstencil ref = ctx->Stencil.Ref[face]; - const GLstencil wrtmask = ctx->Stencil.WriteMask[face]; - const GLstencil invmask = (GLstencil) (~wrtmask); - const GLstencil stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; - GLuint i; - - switch (oper) { - case GL_KEEP: - /* do nothing */ - break; - case GL_ZERO: - if (invmask==0) { - for (i=0;i0) { - stencil[i] = (GLstencil) (s-1); - } - } - } - } - else { - for (i=0;i0) { - stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s-1))); - } - } - } - } - break; - case GL_INCR_WRAP_EXT: - if (invmask==0) { - for (i=0;iStencil.ValueMask[face]; - const GLstencil r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); - GLstencil s; - - ASSERT(n <= MAX_WIDTH); - - /* - * Perform stencil test. The results of this operation are stored - * in the fail[] array: - * IF fail[i] is non-zero THEN - * the stencil fail operator is to be applied - * ELSE - * the stencil fail operator is not to be applied - * ENDIF - */ - switch (ctx->Stencil.Function[face]) { - case GL_NEVER: - /* never pass; always fail */ - for (i=0;i s) { - /* passed */ - fail[i] = 0; - } - else { - fail[i] = 1; - mask[i] = 0; - } - } - else { - fail[i] = 0; - } - } - break; - case GL_GEQUAL: - for (i=0;i= s) { - /* passed */ - fail[i] = 0; - } - else { - fail[i] = 1; - mask[i] = 0; - } - } - else { - fail[i] = 0; - } - } - break; - case GL_EQUAL: - for (i=0;iStencil.FailFunc[face] != GL_KEEP) { - apply_stencil_op( ctx, ctx->Stencil.FailFunc[face], face, n, stencil, fail ); - } - - return !allfail; -} - - -/** - * Compute the zpass/zfail masks by comparing the pre- and post-depth test - * masks. - */ -static INLINE void -compute_pass_fail_masks(GLuint n, const GLubyte origMask[], - const GLubyte newMask[], - GLubyte passMask[], GLubyte failMask[]) -{ - GLuint i; - for (i = 0; i < n; i++) { - ASSERT(newMask[i] == 0 || newMask[i] == 1); - passMask[i] = origMask[i] & newMask[i]; - failMask[i] = origMask[i] & (newMask[i] ^ 1); - } -} - - -/** - * Apply stencil and depth testing to the span of pixels. - * Both software and hardware stencil buffers are acceptable. - * Input: n - number of pixels in the span - * x, y - location of leftmost pixel in span - * z - array [n] of z values - * mask - array [n] of flags (1=test this pixel, 0=skip the pixel) - * Output: mask - array [n] of flags (1=stencil and depth test passed) - * Return: GL_FALSE - all fragments failed the testing - * GL_TRUE - one or more fragments passed the testing - * - */ -static GLboolean -stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) -{ - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct gl_renderbuffer *rb = fb->_StencilBuffer; - GLstencil stencilRow[MAX_WIDTH]; - GLstencil *stencil; - const GLuint n = span->end; - const GLint x = span->x; - const GLint y = span->y; - GLubyte *mask = span->array->mask; - - ASSERT((span->arrayMask & SPAN_XY) == 0); - ASSERT(ctx->Stencil.Enabled); - ASSERT(n <= MAX_WIDTH); -#ifdef DEBUG - if (ctx->Depth.Test) { - ASSERT(span->arrayMask & SPAN_Z); - } -#endif - - stencil = (GLstencil *) rb->GetPointer(ctx, rb, x, y); - if (!stencil) { - rb->GetRow(ctx, rb, n, x, y, stencilRow); - stencil = stencilRow; - } - - /* - * Apply the stencil test to the fragments. - * failMask[i] is 1 if the stencil test failed. - */ - if (do_stencil_test( ctx, face, n, stencil, mask ) == GL_FALSE) { - /* all fragments failed the stencil test, we're done. */ - span->writeAll = GL_FALSE; - if (!rb->GetPointer(ctx, rb, 0, 0)) { - /* put updated stencil values into buffer */ - rb->PutRow(ctx, rb, n, x, y, stencil, NULL); - } - return GL_FALSE; - } - - /* - * Some fragments passed the stencil test, apply depth test to them - * and apply Zpass and Zfail stencil ops. - */ - if (ctx->Depth.Test == GL_FALSE) { - /* - * No depth buffer, just apply zpass stencil function to active pixels. - */ - apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, n, stencil, mask ); - } - else { - /* - * Perform depth buffering, then apply zpass or zfail stencil function. - */ - GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; - - /* save the current mask bits */ - memcpy(origMask, mask, n * sizeof(GLubyte)); - - /* apply the depth test */ - _swrast_depth_test_span(ctx, span); - - compute_pass_fail_masks(n, origMask, mask, passMask, failMask); - - /* apply the pass and fail operations */ - if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { - apply_stencil_op( ctx, ctx->Stencil.ZFailFunc[face], face, - n, stencil, failMask ); - } - if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { - apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, - n, stencil, passMask ); - } - } - - /* - * Write updated stencil values back into hardware stencil buffer. - */ - if (!rb->GetPointer(ctx, rb, 0, 0)) { - rb->PutRow(ctx, rb, n, x, y, stencil, NULL); - } - - span->writeAll = GL_FALSE; - - return GL_TRUE; /* one or more fragments passed both tests */ -} - - - -/* - * Return the address of a stencil buffer value given the window coords: - */ -#define STENCIL_ADDRESS(X, Y) (stencilStart + (Y) * stride + (X)) - - - -/** - * Apply the given stencil operator for each pixel in the array whose - * mask flag is set. - * \note This is for software stencil buffers only. - * Input: n - number of pixels in the span - * x, y - array of [n] pixels - * operator - the stencil buffer operator - * mask - array [n] of flag: 1=apply operator, 0=don't apply operator - */ -static void -apply_stencil_op_to_pixels( struct gl_context *ctx, - GLuint n, const GLint x[], const GLint y[], - GLenum oper, GLuint face, const GLubyte mask[] ) -{ - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct gl_renderbuffer *rb = fb->_StencilBuffer; - const GLstencil stencilMax = (1 << fb->Visual.stencilBits) - 1; - const GLstencil ref = ctx->Stencil.Ref[face]; - const GLstencil wrtmask = ctx->Stencil.WriteMask[face]; - const GLstencil invmask = (GLstencil) (~wrtmask); - GLuint i; - GLstencil *stencilStart = (GLubyte *) rb->Data; - const GLuint stride = rb->Width; - - ASSERT(rb->GetPointer(ctx, rb, 0, 0)); - ASSERT(sizeof(GLstencil) == 1); - - switch (oper) { - case GL_KEEP: - /* do nothing */ - break; - case GL_ZERO: - if (invmask==0) { - for (i=0;i0) { - *sptr = (GLstencil) (*sptr - 1); - } - } - } - } - else { - for (i=0;i0) { - *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr-1))); - } - } - } - } - break; - case GL_INCR_WRAP_EXT: - if (invmask==0) { - for (i=0;iDrawBuffer; - struct gl_renderbuffer *rb = fb->_StencilBuffer; - GLubyte fail[MAX_WIDTH]; - GLstencil r, s; - GLuint i; - GLboolean allfail = GL_FALSE; - const GLuint valueMask = ctx->Stencil.ValueMask[face]; - const GLstencil *stencilStart = (GLstencil *) rb->Data; - const GLuint stride = rb->Width; - - ASSERT(rb->GetPointer(ctx, rb, 0, 0)); - ASSERT(sizeof(GLstencil) == 1); - - /* - * Perform stencil test. The results of this operation are stored - * in the fail[] array: - * IF fail[i] is non-zero THEN - * the stencil fail operator is to be applied - * ELSE - * the stencil fail operator is not to be applied - * ENDIF - */ - - switch (ctx->Stencil.Function[face]) { - case GL_NEVER: - /* always fail */ - for (i=0;iStencil.Ref[face] & valueMask); - for (i=0;iStencil.Ref[face] & valueMask); - for (i=0;iStencil.Ref[face] & valueMask); - for (i=0;i s) { - /* passed */ - fail[i] = 0; - } - else { - fail[i] = 1; - mask[i] = 0; - } - } - else { - fail[i] = 0; - } - } - break; - case GL_GEQUAL: - r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); - for (i=0;i= s) { - /* passed */ - fail[i] = 0; - } - else { - fail[i] = 1; - mask[i] = 0; - } - } - else { - fail[i] = 0; - } - } - break; - case GL_EQUAL: - r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); - for (i=0;iStencil.Ref[face] & valueMask); - for (i=0;iStencil.FailFunc[face] != GL_KEEP) { - apply_stencil_op_to_pixels( ctx, n, x, y, ctx->Stencil.FailFunc[face], - face, fail ); - } - - return !allfail; -} - - - - -/** - * Apply stencil and depth testing to an array of pixels. - * This is used both for software and hardware stencil buffers. - * - * The comments in this function are a bit sparse but the code is - * almost identical to stencil_and_ztest_span(), which is well - * commented. - * - * Input: n - number of pixels in the array - * x, y - array of [n] pixel positions - * z - array [n] of z values - * mask - array [n] of flags (1=test this pixel, 0=skip the pixel) - * Output: mask - array [n] of flags (1=stencil and depth test passed) - * Return: GL_FALSE - all fragments failed the testing - * GL_TRUE - one or more fragments passed the testing - */ -static GLboolean -stencil_and_ztest_pixels( struct gl_context *ctx, SWspan *span, GLuint face ) -{ - GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct gl_renderbuffer *rb = fb->_StencilBuffer; - const GLuint n = span->end; - const GLint *x = span->array->x; - const GLint *y = span->array->y; - GLubyte *mask = span->array->mask; - - ASSERT(span->arrayMask & SPAN_XY); - ASSERT(ctx->Stencil.Enabled); - ASSERT(n <= MAX_WIDTH); - - if (!rb->GetPointer(ctx, rb, 0, 0)) { - /* No direct access */ - GLstencil stencil[MAX_WIDTH]; - - ASSERT(rb->DataType == GL_UNSIGNED_BYTE); - _swrast_get_values(ctx, rb, n, x, y, stencil, sizeof(GLubyte)); - - memcpy(origMask, mask, n * sizeof(GLubyte)); - - (void) do_stencil_test(ctx, face, n, stencil, mask); - - if (ctx->Depth.Test == GL_FALSE) { - apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face, - n, stencil, mask); - } - else { - GLubyte tmpMask[MAX_WIDTH]; - memcpy(tmpMask, mask, n * sizeof(GLubyte)); - - _swrast_depth_test_span(ctx, span); - - compute_pass_fail_masks(n, tmpMask, mask, passMask, failMask); - - if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { - apply_stencil_op(ctx, ctx->Stencil.ZFailFunc[face], face, - n, stencil, failMask); - } - if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { - apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face, - n, stencil, passMask); - } - } - - /* Write updated stencil values into hardware stencil buffer */ - rb->PutValues(ctx, rb, n, x, y, stencil, origMask); - - return GL_TRUE; - } - else { - /* Direct access to stencil buffer */ - - if (stencil_test_pixels(ctx, face, n, x, y, mask) == GL_FALSE) { - /* all fragments failed the stencil test, we're done. */ - return GL_FALSE; - } - - if (ctx->Depth.Test==GL_FALSE) { - apply_stencil_op_to_pixels(ctx, n, x, y, - ctx->Stencil.ZPassFunc[face], face, mask); - } - else { - memcpy(origMask, mask, n * sizeof(GLubyte)); - - _swrast_depth_test_span(ctx, span); - - compute_pass_fail_masks(n, origMask, mask, passMask, failMask); - - if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { - apply_stencil_op_to_pixels(ctx, n, x, y, - ctx->Stencil.ZFailFunc[face], - face, failMask); - } - if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { - apply_stencil_op_to_pixels(ctx, n, x, y, - ctx->Stencil.ZPassFunc[face], - face, passMask); - } - } - - return GL_TRUE; /* one or more fragments passed both tests */ - } -} - - -/** - * /return GL_TRUE = one or more fragments passed, - * GL_FALSE = all fragments failed. - */ -GLboolean -_swrast_stencil_and_ztest_span(struct gl_context *ctx, SWspan *span) -{ - const GLuint face = (span->facing == 0) ? 0 : ctx->Stencil._BackFace; - - if (span->arrayMask & SPAN_XY) - return stencil_and_ztest_pixels(ctx, span, face); - else - return stencil_and_ztest_span(ctx, span, face); -} - - -#if 0 -GLuint -clip_span(GLuint bufferWidth, GLuint bufferHeight, - GLint x, GLint y, GLuint *count) -{ - GLuint n = *count; - GLuint skipPixels = 0; - - if (y < 0 || y >= bufferHeight || x + n <= 0 || x >= bufferWidth) { - /* totally out of bounds */ - n = 0; - } - else { - /* left clip */ - if (x < 0) { - skipPixels = -x; - x = 0; - n -= skipPixels; - } - /* right clip */ - if (x + n > bufferWidth) { - GLint dx = x + n - bufferWidth; - n -= dx; - } - } - - *count = n; - - return skipPixels; -} -#endif - - -/** - * Return a span of stencil values from the stencil buffer. - * Used for glRead/CopyPixels - * Input: n - how many pixels - * x,y - location of first pixel - * Output: stencil - the array of stencil values - */ -void -_swrast_read_stencil_span(struct gl_context *ctx, struct gl_renderbuffer *rb, - GLint n, GLint x, GLint y, GLstencil stencil[]) -{ - if (y < 0 || y >= (GLint) rb->Height || - x + n <= 0 || x >= (GLint) rb->Width) { - /* span is completely outside framebuffer */ - return; /* undefined values OK */ - } - - if (x < 0) { - GLint dx = -x; - x = 0; - n -= dx; - stencil += dx; - } - if (x + n > (GLint) rb->Width) { - GLint dx = x + n - rb->Width; - n -= dx; - } - if (n <= 0) { - return; - } - - rb->GetRow(ctx, rb, n, x, y, stencil); -} - - - -/** - * Write a span of stencil values to the stencil buffer. This function - * applies the stencil write mask when needed. - * Used for glDraw/CopyPixels - * Input: n - how many pixels - * x, y - location of first pixel - * stencil - the array of stencil values - */ -void -_swrast_write_stencil_span(struct gl_context *ctx, GLint n, GLint x, GLint y, - const GLstencil stencil[] ) -{ - struct gl_framebuffer *fb = ctx->DrawBuffer; - struct gl_renderbuffer *rb = fb->_StencilBuffer; - const GLuint stencilMax = (1 << fb->Visual.stencilBits) - 1; - const GLuint stencilMask = ctx->Stencil.WriteMask[0]; - - if (y < 0 || y >= (GLint) rb->Height || - x + n <= 0 || x >= (GLint) rb->Width) { - /* span is completely outside framebuffer */ - return; /* undefined values OK */ - } - if (x < 0) { - GLint dx = -x; - x = 0; - n -= dx; - stencil += dx; - } - if (x + n > (GLint) rb->Width) { - GLint dx = x + n - rb->Width; - n -= dx; - } - if (n <= 0) { - return; - } - - if ((stencilMask & stencilMax) != stencilMax) { - /* need to apply writemask */ - GLstencil destVals[MAX_WIDTH], newVals[MAX_WIDTH]; - GLint i; - rb->GetRow(ctx, rb, n, x, y, destVals); - for (i = 0; i < n; i++) { - newVals[i] - = (stencil[i] & stencilMask) | (destVals[i] & ~stencilMask); - } - rb->PutRow(ctx, rb, n, x, y, newVals, NULL); - } - else { - rb->PutRow(ctx, rb, n, x, y, stencil, NULL); - } -} - - - -/** - * Clear the stencil buffer. - */ -void -_swrast_clear_stencil_buffer( struct gl_context *ctx, struct gl_renderbuffer *rb ) -{ - const GLubyte stencilBits = ctx->DrawBuffer->Visual.stencilBits; - const GLuint mask = ctx->Stencil.WriteMask[0]; - const GLuint invMask = ~mask; - const GLuint clearVal = (ctx->Stencil.Clear & mask); - const GLuint stencilMax = (1 << stencilBits) - 1; - GLint x, y, width, height; - - if (!rb || mask == 0) - return; - - ASSERT(rb->DataType == GL_UNSIGNED_BYTE || - rb->DataType == GL_UNSIGNED_SHORT); - - ASSERT(rb->_BaseFormat == GL_STENCIL_INDEX); - - /* compute region to clear */ - x = ctx->DrawBuffer->_Xmin; - y = ctx->DrawBuffer->_Ymin; - width = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; - height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; - - if (rb->GetPointer(ctx, rb, 0, 0)) { - /* Direct buffer access */ - if ((mask & stencilMax) != stencilMax) { - /* need to mask the clear */ - if (rb->DataType == GL_UNSIGNED_BYTE) { - GLint i, j; - for (i = 0; i < height; i++) { - GLubyte *stencil = (GLubyte*) rb->GetPointer(ctx, rb, x, y + i); - for (j = 0; j < width; j++) { - stencil[j] = (stencil[j] & invMask) | clearVal; - } - } - } - else { - GLint i, j; - for (i = 0; i < height; i++) { - GLushort *stencil = (GLushort*) rb->GetPointer(ctx, rb, x, y + i); - for (j = 0; j < width; j++) { - stencil[j] = (stencil[j] & invMask) | clearVal; - } - } - } - } - else { - /* no bit masking */ - if (width == (GLint) rb->Width && rb->DataType == GL_UNSIGNED_BYTE) { - /* optimized case */ - /* Note: bottom-to-top raster assumed! */ - GLubyte *stencil = (GLubyte *) rb->GetPointer(ctx, rb, x, y); - GLuint len = width * height * sizeof(GLubyte); - memset(stencil, clearVal, len); - } - else { - /* general case */ - GLint i; - for (i = 0; i < height; i++) { - GLvoid *stencil = rb->GetPointer(ctx, rb, x, y + i); - if (rb->DataType == GL_UNSIGNED_BYTE) { - memset(stencil, clearVal, width); - } - else { - _mesa_memset16((short unsigned int*) stencil, clearVal, width); - } - } - } - } - } - else { - /* no direct access */ - if ((mask & stencilMax) != stencilMax) { - /* need to mask the clear */ - if (rb->DataType == GL_UNSIGNED_BYTE) { - GLint i, j; - for (i = 0; i < height; i++) { - GLubyte stencil[MAX_WIDTH]; - rb->GetRow(ctx, rb, width, x, y + i, stencil); - for (j = 0; j < width; j++) { - stencil[j] = (stencil[j] & invMask) | clearVal; - } - rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL); - } - } - else { - GLint i, j; - for (i = 0; i < height; i++) { - GLushort stencil[MAX_WIDTH]; - rb->GetRow(ctx, rb, width, x, y + i, stencil); - for (j = 0; j < width; j++) { - stencil[j] = (stencil[j] & invMask) | clearVal; - } - rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL); - } - } - } - else { - /* no bit masking */ - const GLubyte clear8 = (GLubyte) clearVal; - const GLushort clear16 = (GLushort) clearVal; - const void *clear; - GLint i; - if (rb->DataType == GL_UNSIGNED_BYTE) { - clear = &clear8; - } - else { - clear = &clear16; - } - for (i = 0; i < height; i++) { - rb->PutMonoRow(ctx, rb, width, x, y + i, clear, NULL); - } - } - } -} +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/imports.h" + +#include "s_context.h" +#include "s_depth.h" +#include "s_stencil.h" +#include "s_span.h" + + + +/* Stencil Logic: + +IF stencil test fails THEN + Apply fail-op to stencil value + Don't write the pixel (RGBA,Z) +ELSE + IF doing depth test && depth test fails THEN + Apply zfail-op to stencil value + Write RGBA and Z to appropriate buffers + ELSE + Apply zpass-op to stencil value +ENDIF + +*/ + + +/** + * Apply the given stencil operator to the array of stencil values. + * Don't touch stencil[i] if mask[i] is zero. + * Input: n - size of stencil array + * oper - the stencil buffer operator + * face - 0 or 1 for front or back face operation + * stencil - array of stencil values + * mask - array [n] of flag: 1=apply operator, 0=don't apply operator + * Output: stencil - modified values + */ +static void +apply_stencil_op( const struct gl_context *ctx, GLenum oper, GLuint face, + GLuint n, GLstencil stencil[], const GLubyte mask[] ) +{ + const GLstencil ref = ctx->Stencil.Ref[face]; + const GLstencil wrtmask = ctx->Stencil.WriteMask[face]; + const GLstencil invmask = (GLstencil) (~wrtmask); + const GLstencil stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1; + GLuint i; + + switch (oper) { + case GL_KEEP: + /* do nothing */ + break; + case GL_ZERO: + if (invmask==0) { + for (i=0;i0) { + stencil[i] = (GLstencil) (s-1); + } + } + } + } + else { + for (i=0;i0) { + stencil[i] = (GLstencil) ((invmask & s) | (wrtmask & (s-1))); + } + } + } + } + break; + case GL_INCR_WRAP_EXT: + if (invmask==0) { + for (i=0;iStencil.ValueMask[face]; + const GLstencil r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); + GLstencil s; + + ASSERT(n <= MAX_WIDTH); + + /* + * Perform stencil test. The results of this operation are stored + * in the fail[] array: + * IF fail[i] is non-zero THEN + * the stencil fail operator is to be applied + * ELSE + * the stencil fail operator is not to be applied + * ENDIF + */ + switch (ctx->Stencil.Function[face]) { + case GL_NEVER: + /* never pass; always fail */ + for (i=0;i s) { + /* passed */ + fail[i] = 0; + } + else { + fail[i] = 1; + mask[i] = 0; + } + } + else { + fail[i] = 0; + } + } + break; + case GL_GEQUAL: + for (i=0;i= s) { + /* passed */ + fail[i] = 0; + } + else { + fail[i] = 1; + mask[i] = 0; + } + } + else { + fail[i] = 0; + } + } + break; + case GL_EQUAL: + for (i=0;iStencil.FailFunc[face] != GL_KEEP) { + apply_stencil_op( ctx, ctx->Stencil.FailFunc[face], face, n, stencil, fail ); + } + + return !allfail; +} + + +/** + * Compute the zpass/zfail masks by comparing the pre- and post-depth test + * masks. + */ +static INLINE void +compute_pass_fail_masks(GLuint n, const GLubyte origMask[], + const GLubyte newMask[], + GLubyte passMask[], GLubyte failMask[]) +{ + GLuint i; + for (i = 0; i < n; i++) { + ASSERT(newMask[i] == 0 || newMask[i] == 1); + passMask[i] = origMask[i] & newMask[i]; + failMask[i] = origMask[i] & (newMask[i] ^ 1); + } +} + + +/** + * Apply stencil and depth testing to the span of pixels. + * Both software and hardware stencil buffers are acceptable. + * Input: n - number of pixels in the span + * x, y - location of leftmost pixel in span + * z - array [n] of z values + * mask - array [n] of flags (1=test this pixel, 0=skip the pixel) + * Output: mask - array [n] of flags (1=stencil and depth test passed) + * Return: GL_FALSE - all fragments failed the testing + * GL_TRUE - one or more fragments passed the testing + * + */ +static GLboolean +stencil_and_ztest_span(struct gl_context *ctx, SWspan *span, GLuint face) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_renderbuffer *rb = fb->_StencilBuffer; + GLstencil stencilRow[MAX_WIDTH]; + GLstencil *stencil; + const GLuint n = span->end; + const GLint x = span->x; + const GLint y = span->y; + GLubyte *mask = span->array->mask; + + ASSERT((span->arrayMask & SPAN_XY) == 0); + ASSERT(ctx->Stencil.Enabled); + ASSERT(n <= MAX_WIDTH); +#ifdef DEBUG + if (ctx->Depth.Test) { + ASSERT(span->arrayMask & SPAN_Z); + } +#endif + + stencil = (GLstencil *) rb->GetPointer(ctx, rb, x, y); + if (!stencil) { + rb->GetRow(ctx, rb, n, x, y, stencilRow); + stencil = stencilRow; + } + + /* + * Apply the stencil test to the fragments. + * failMask[i] is 1 if the stencil test failed. + */ + if (do_stencil_test( ctx, face, n, stencil, mask ) == GL_FALSE) { + /* all fragments failed the stencil test, we're done. */ + span->writeAll = GL_FALSE; + if (!rb->GetPointer(ctx, rb, 0, 0)) { + /* put updated stencil values into buffer */ + rb->PutRow(ctx, rb, n, x, y, stencil, NULL); + } + return GL_FALSE; + } + + /* + * Some fragments passed the stencil test, apply depth test to them + * and apply Zpass and Zfail stencil ops. + */ + if (ctx->Depth.Test == GL_FALSE || + ctx->DrawBuffer->_DepthBuffer == NULL) { + /* + * No depth buffer, just apply zpass stencil function to active pixels. + */ + apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, n, stencil, mask ); + } + else { + /* + * Perform depth buffering, then apply zpass or zfail stencil function. + */ + GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; + + /* save the current mask bits */ + memcpy(origMask, mask, n * sizeof(GLubyte)); + + /* apply the depth test */ + _swrast_depth_test_span(ctx, span); + + compute_pass_fail_masks(n, origMask, mask, passMask, failMask); + + /* apply the pass and fail operations */ + if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { + apply_stencil_op( ctx, ctx->Stencil.ZFailFunc[face], face, + n, stencil, failMask ); + } + if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { + apply_stencil_op( ctx, ctx->Stencil.ZPassFunc[face], face, + n, stencil, passMask ); + } + } + + /* + * Write updated stencil values back into hardware stencil buffer. + */ + if (!rb->GetPointer(ctx, rb, 0, 0)) { + rb->PutRow(ctx, rb, n, x, y, stencil, NULL); + } + + span->writeAll = GL_FALSE; + + return GL_TRUE; /* one or more fragments passed both tests */ +} + + + +/* + * Return the address of a stencil buffer value given the window coords: + */ +#define STENCIL_ADDRESS(X, Y) (stencilStart + (Y) * stride + (X)) + + + +/** + * Apply the given stencil operator for each pixel in the array whose + * mask flag is set. + * \note This is for software stencil buffers only. + * Input: n - number of pixels in the span + * x, y - array of [n] pixels + * operator - the stencil buffer operator + * mask - array [n] of flag: 1=apply operator, 0=don't apply operator + */ +static void +apply_stencil_op_to_pixels( struct gl_context *ctx, + GLuint n, const GLint x[], const GLint y[], + GLenum oper, GLuint face, const GLubyte mask[] ) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_renderbuffer *rb = fb->_StencilBuffer; + const GLstencil stencilMax = (1 << fb->Visual.stencilBits) - 1; + const GLstencil ref = ctx->Stencil.Ref[face]; + const GLstencil wrtmask = ctx->Stencil.WriteMask[face]; + const GLstencil invmask = (GLstencil) (~wrtmask); + GLuint i; + GLstencil *stencilStart = (GLubyte *) rb->Data; + const GLuint stride = rb->Width; + + ASSERT(rb->GetPointer(ctx, rb, 0, 0)); + ASSERT(sizeof(GLstencil) == 1); + + switch (oper) { + case GL_KEEP: + /* do nothing */ + break; + case GL_ZERO: + if (invmask==0) { + for (i=0;i0) { + *sptr = (GLstencil) (*sptr - 1); + } + } + } + } + else { + for (i=0;i0) { + *sptr = (GLstencil) ((invmask & *sptr) | (wrtmask & (*sptr-1))); + } + } + } + } + break; + case GL_INCR_WRAP_EXT: + if (invmask==0) { + for (i=0;iDrawBuffer; + struct gl_renderbuffer *rb = fb->_StencilBuffer; + GLubyte fail[MAX_WIDTH]; + GLstencil r, s; + GLuint i; + GLboolean allfail = GL_FALSE; + const GLuint valueMask = ctx->Stencil.ValueMask[face]; + const GLstencil *stencilStart = (GLstencil *) rb->Data; + const GLuint stride = rb->Width; + + ASSERT(rb->GetPointer(ctx, rb, 0, 0)); + ASSERT(sizeof(GLstencil) == 1); + + /* + * Perform stencil test. The results of this operation are stored + * in the fail[] array: + * IF fail[i] is non-zero THEN + * the stencil fail operator is to be applied + * ELSE + * the stencil fail operator is not to be applied + * ENDIF + */ + + switch (ctx->Stencil.Function[face]) { + case GL_NEVER: + /* always fail */ + for (i=0;iStencil.Ref[face] & valueMask); + for (i=0;iStencil.Ref[face] & valueMask); + for (i=0;iStencil.Ref[face] & valueMask); + for (i=0;i s) { + /* passed */ + fail[i] = 0; + } + else { + fail[i] = 1; + mask[i] = 0; + } + } + else { + fail[i] = 0; + } + } + break; + case GL_GEQUAL: + r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); + for (i=0;i= s) { + /* passed */ + fail[i] = 0; + } + else { + fail[i] = 1; + mask[i] = 0; + } + } + else { + fail[i] = 0; + } + } + break; + case GL_EQUAL: + r = (GLstencil) (ctx->Stencil.Ref[face] & valueMask); + for (i=0;iStencil.Ref[face] & valueMask); + for (i=0;iStencil.FailFunc[face] != GL_KEEP) { + apply_stencil_op_to_pixels( ctx, n, x, y, ctx->Stencil.FailFunc[face], + face, fail ); + } + + return !allfail; +} + + + + +/** + * Apply stencil and depth testing to an array of pixels. + * This is used both for software and hardware stencil buffers. + * + * The comments in this function are a bit sparse but the code is + * almost identical to stencil_and_ztest_span(), which is well + * commented. + * + * Input: n - number of pixels in the array + * x, y - array of [n] pixel positions + * z - array [n] of z values + * mask - array [n] of flags (1=test this pixel, 0=skip the pixel) + * Output: mask - array [n] of flags (1=stencil and depth test passed) + * Return: GL_FALSE - all fragments failed the testing + * GL_TRUE - one or more fragments passed the testing + */ +static GLboolean +stencil_and_ztest_pixels( struct gl_context *ctx, SWspan *span, GLuint face ) +{ + GLubyte passMask[MAX_WIDTH], failMask[MAX_WIDTH], origMask[MAX_WIDTH]; + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_renderbuffer *rb = fb->_StencilBuffer; + const GLuint n = span->end; + const GLint *x = span->array->x; + const GLint *y = span->array->y; + GLubyte *mask = span->array->mask; + + ASSERT(span->arrayMask & SPAN_XY); + ASSERT(ctx->Stencil.Enabled); + ASSERT(n <= MAX_WIDTH); + + if (!rb->GetPointer(ctx, rb, 0, 0)) { + /* No direct access */ + GLstencil stencil[MAX_WIDTH]; + + ASSERT(rb->DataType == GL_UNSIGNED_BYTE); + _swrast_get_values(ctx, rb, n, x, y, stencil, sizeof(GLubyte)); + + memcpy(origMask, mask, n * sizeof(GLubyte)); + + (void) do_stencil_test(ctx, face, n, stencil, mask); + + if (ctx->Depth.Test == GL_FALSE) { + apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face, + n, stencil, mask); + } + else { + GLubyte tmpMask[MAX_WIDTH]; + memcpy(tmpMask, mask, n * sizeof(GLubyte)); + + _swrast_depth_test_span(ctx, span); + + compute_pass_fail_masks(n, tmpMask, mask, passMask, failMask); + + if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { + apply_stencil_op(ctx, ctx->Stencil.ZFailFunc[face], face, + n, stencil, failMask); + } + if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { + apply_stencil_op(ctx, ctx->Stencil.ZPassFunc[face], face, + n, stencil, passMask); + } + } + + /* Write updated stencil values into hardware stencil buffer */ + rb->PutValues(ctx, rb, n, x, y, stencil, origMask); + + return GL_TRUE; + } + else { + /* Direct access to stencil buffer */ + + if (stencil_test_pixels(ctx, face, n, x, y, mask) == GL_FALSE) { + /* all fragments failed the stencil test, we're done. */ + return GL_FALSE; + } + + if (ctx->Depth.Test==GL_FALSE) { + apply_stencil_op_to_pixels(ctx, n, x, y, + ctx->Stencil.ZPassFunc[face], face, mask); + } + else { + memcpy(origMask, mask, n * sizeof(GLubyte)); + + _swrast_depth_test_span(ctx, span); + + compute_pass_fail_masks(n, origMask, mask, passMask, failMask); + + if (ctx->Stencil.ZFailFunc[face] != GL_KEEP) { + apply_stencil_op_to_pixels(ctx, n, x, y, + ctx->Stencil.ZFailFunc[face], + face, failMask); + } + if (ctx->Stencil.ZPassFunc[face] != GL_KEEP) { + apply_stencil_op_to_pixels(ctx, n, x, y, + ctx->Stencil.ZPassFunc[face], + face, passMask); + } + } + + return GL_TRUE; /* one or more fragments passed both tests */ + } +} + + +/** + * /return GL_TRUE = one or more fragments passed, + * GL_FALSE = all fragments failed. + */ +GLboolean +_swrast_stencil_and_ztest_span(struct gl_context *ctx, SWspan *span) +{ + const GLuint face = (span->facing == 0) ? 0 : ctx->Stencil._BackFace; + + if (span->arrayMask & SPAN_XY) + return stencil_and_ztest_pixels(ctx, span, face); + else + return stencil_and_ztest_span(ctx, span, face); +} + + +#if 0 +GLuint +clip_span(GLuint bufferWidth, GLuint bufferHeight, + GLint x, GLint y, GLuint *count) +{ + GLuint n = *count; + GLuint skipPixels = 0; + + if (y < 0 || y >= bufferHeight || x + n <= 0 || x >= bufferWidth) { + /* totally out of bounds */ + n = 0; + } + else { + /* left clip */ + if (x < 0) { + skipPixels = -x; + x = 0; + n -= skipPixels; + } + /* right clip */ + if (x + n > bufferWidth) { + GLint dx = x + n - bufferWidth; + n -= dx; + } + } + + *count = n; + + return skipPixels; +} +#endif + + +/** + * Return a span of stencil values from the stencil buffer. + * Used for glRead/CopyPixels + * Input: n - how many pixels + * x,y - location of first pixel + * Output: stencil - the array of stencil values + */ +void +_swrast_read_stencil_span(struct gl_context *ctx, struct gl_renderbuffer *rb, + GLint n, GLint x, GLint y, GLstencil stencil[]) +{ + if (y < 0 || y >= (GLint) rb->Height || + x + n <= 0 || x >= (GLint) rb->Width) { + /* span is completely outside framebuffer */ + return; /* undefined values OK */ + } + + if (x < 0) { + GLint dx = -x; + x = 0; + n -= dx; + stencil += dx; + } + if (x + n > (GLint) rb->Width) { + GLint dx = x + n - rb->Width; + n -= dx; + } + if (n <= 0) { + return; + } + + rb->GetRow(ctx, rb, n, x, y, stencil); +} + + + +/** + * Write a span of stencil values to the stencil buffer. This function + * applies the stencil write mask when needed. + * Used for glDraw/CopyPixels + * Input: n - how many pixels + * x, y - location of first pixel + * stencil - the array of stencil values + */ +void +_swrast_write_stencil_span(struct gl_context *ctx, GLint n, GLint x, GLint y, + const GLstencil stencil[] ) +{ + struct gl_framebuffer *fb = ctx->DrawBuffer; + struct gl_renderbuffer *rb = fb->_StencilBuffer; + const GLuint stencilMax = (1 << fb->Visual.stencilBits) - 1; + const GLuint stencilMask = ctx->Stencil.WriteMask[0]; + + if (y < 0 || y >= (GLint) rb->Height || + x + n <= 0 || x >= (GLint) rb->Width) { + /* span is completely outside framebuffer */ + return; /* undefined values OK */ + } + if (x < 0) { + GLint dx = -x; + x = 0; + n -= dx; + stencil += dx; + } + if (x + n > (GLint) rb->Width) { + GLint dx = x + n - rb->Width; + n -= dx; + } + if (n <= 0) { + return; + } + + if ((stencilMask & stencilMax) != stencilMax) { + /* need to apply writemask */ + GLstencil destVals[MAX_WIDTH], newVals[MAX_WIDTH]; + GLint i; + rb->GetRow(ctx, rb, n, x, y, destVals); + for (i = 0; i < n; i++) { + newVals[i] + = (stencil[i] & stencilMask) | (destVals[i] & ~stencilMask); + } + rb->PutRow(ctx, rb, n, x, y, newVals, NULL); + } + else { + rb->PutRow(ctx, rb, n, x, y, stencil, NULL); + } +} + + + +/** + * Clear the stencil buffer. + */ +void +_swrast_clear_stencil_buffer( struct gl_context *ctx, struct gl_renderbuffer *rb ) +{ + const GLubyte stencilBits = ctx->DrawBuffer->Visual.stencilBits; + const GLuint mask = ctx->Stencil.WriteMask[0]; + const GLuint invMask = ~mask; + const GLuint clearVal = (ctx->Stencil.Clear & mask); + const GLuint stencilMax = (1 << stencilBits) - 1; + GLint x, y, width, height; + + if (!rb || mask == 0) + return; + + ASSERT(rb->DataType == GL_UNSIGNED_BYTE || + rb->DataType == GL_UNSIGNED_SHORT); + + ASSERT(rb->_BaseFormat == GL_STENCIL_INDEX); + + /* compute region to clear */ + x = ctx->DrawBuffer->_Xmin; + y = ctx->DrawBuffer->_Ymin; + width = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin; + height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin; + + if (rb->GetPointer(ctx, rb, 0, 0)) { + /* Direct buffer access */ + if ((mask & stencilMax) != stencilMax) { + /* need to mask the clear */ + if (rb->DataType == GL_UNSIGNED_BYTE) { + GLint i, j; + for (i = 0; i < height; i++) { + GLubyte *stencil = (GLubyte*) rb->GetPointer(ctx, rb, x, y + i); + for (j = 0; j < width; j++) { + stencil[j] = (stencil[j] & invMask) | clearVal; + } + } + } + else { + GLint i, j; + for (i = 0; i < height; i++) { + GLushort *stencil = (GLushort*) rb->GetPointer(ctx, rb, x, y + i); + for (j = 0; j < width; j++) { + stencil[j] = (stencil[j] & invMask) | clearVal; + } + } + } + } + else { + /* no bit masking */ + if (width == (GLint) rb->Width && rb->DataType == GL_UNSIGNED_BYTE) { + /* optimized case */ + /* Note: bottom-to-top raster assumed! */ + GLubyte *stencil = (GLubyte *) rb->GetPointer(ctx, rb, x, y); + GLuint len = width * height * sizeof(GLubyte); + memset(stencil, clearVal, len); + } + else { + /* general case */ + GLint i; + for (i = 0; i < height; i++) { + GLvoid *stencil = rb->GetPointer(ctx, rb, x, y + i); + if (rb->DataType == GL_UNSIGNED_BYTE) { + memset(stencil, clearVal, width); + } + else { + _mesa_memset16((short unsigned int*) stencil, clearVal, width); + } + } + } + } + } + else { + /* no direct access */ + if ((mask & stencilMax) != stencilMax) { + /* need to mask the clear */ + if (rb->DataType == GL_UNSIGNED_BYTE) { + GLint i, j; + for (i = 0; i < height; i++) { + GLubyte stencil[MAX_WIDTH]; + rb->GetRow(ctx, rb, width, x, y + i, stencil); + for (j = 0; j < width; j++) { + stencil[j] = (stencil[j] & invMask) | clearVal; + } + rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL); + } + } + else { + GLint i, j; + for (i = 0; i < height; i++) { + GLushort stencil[MAX_WIDTH]; + rb->GetRow(ctx, rb, width, x, y + i, stencil); + for (j = 0; j < width; j++) { + stencil[j] = (stencil[j] & invMask) | clearVal; + } + rb->PutRow(ctx, rb, width, x, y + i, stencil, NULL); + } + } + } + else { + /* no bit masking */ + const GLubyte clear8 = (GLubyte) clearVal; + const GLushort clear16 = (GLushort) clearVal; + const void *clear; + GLint i; + if (rb->DataType == GL_UNSIGNED_BYTE) { + clear = &clear8; + } + else { + clear = &clear16; + } + for (i = 0; i < height; i++) { + rb->PutMonoRow(ctx, rb, width, x, y + i, clear, NULL); + } + } + } +} diff --git a/mesalib/src/mesa/swrast/s_texcombine.c b/mesalib/src/mesa/swrast/s_texcombine.c index 53ef2f890..80b9dff3c 100644 --- a/mesalib/src/mesa/swrast/s_texcombine.c +++ b/mesalib/src/mesa/swrast/s_texcombine.c @@ -1,751 +1,755 @@ -/* - * Mesa 3-D graphics library - * Version: 7.5 - * - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "main/glheader.h" -#include "main/context.h" -#include "main/colormac.h" -#include "main/imports.h" -#include "main/pixeltransfer.h" -#include "program/prog_instruction.h" - -#include "s_context.h" -#include "s_texcombine.h" - - -/** - * Pointer to array of float[4] - * This type makes the code below more concise and avoids a lot of casting. - */ -typedef float (*float4_array)[4]; - - -/** - * Return array of texels for given unit. - */ -static INLINE float4_array -get_texel_array(SWcontext *swrast, GLuint unit) -{ - return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); -} - - - -/** - * Do texture application for: - * GL_EXT_texture_env_combine - * GL_ARB_texture_env_combine - * GL_EXT_texture_env_dot3 - * GL_ARB_texture_env_dot3 - * GL_ATI_texture_env_combine3 - * GL_NV_texture_env_combine4 - * conventional GL texture env modes - * - * \param ctx rendering context - * \param unit the texture combiner unit - * \param n number of fragments to process (span width) - * \param primary_rgba incoming fragment color array - * \param texelBuffer pointer to texel colors for all texture units - * - * \param rgba incoming/result fragment colors - */ -static void -texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, - const float4_array primary_rgba, - const GLfloat *texelBuffer, - GLchan (*rgbaChan)[4] ) -{ - SWcontext *swrast = SWRAST_CONTEXT(ctx); - const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]); - const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine; - float4_array argRGB[MAX_COMBINER_TERMS]; - float4_array argA[MAX_COMBINER_TERMS]; - const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB); - const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA); - const GLuint numArgsRGB = combine->_NumArgsRGB; - const GLuint numArgsA = combine->_NumArgsA; - float4_array ccolor[4], rgba; - GLuint i, term; - - /* alloc temp pixel buffers */ - rgba = (float4_array) malloc(4 * n * sizeof(GLfloat)); - if (!rgba) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); - return; - } - - for (i = 0; i < numArgsRGB || i < numArgsA; i++) { - ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat)); - if (!ccolor[i]) { - while (i) { - free(ccolor[i]); - i--; - } - _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); - return; - } - } - - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]); - rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]); - rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]); - rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]); - } - - /* - printf("modeRGB 0x%x modeA 0x%x srcRGB1 0x%x srcA1 0x%x srcRGB2 0x%x srcA2 0x%x\n", - combine->ModeRGB, - combine->ModeA, - combine->SourceRGB[0], - combine->SourceA[0], - combine->SourceRGB[1], - combine->SourceA[1]); - */ - - /* - * Do operand setup for up to 4 operands. Loop over the terms. - */ - for (term = 0; term < numArgsRGB; term++) { - const GLenum srcRGB = combine->SourceRGB[term]; - const GLenum operandRGB = combine->OperandRGB[term]; - - switch (srcRGB) { - case GL_TEXTURE: - argRGB[term] = get_texel_array(swrast, unit); - break; - case GL_PRIMARY_COLOR: - argRGB[term] = primary_rgba; - break; - case GL_PREVIOUS: - argRGB[term] = rgba; - break; - case GL_CONSTANT: - { - float4_array c = ccolor[term]; - GLfloat red = textureUnit->EnvColor[0]; - GLfloat green = textureUnit->EnvColor[1]; - GLfloat blue = textureUnit->EnvColor[2]; - GLfloat alpha = textureUnit->EnvColor[3]; - for (i = 0; i < n; i++) { - ASSIGN_4V(c[i], red, green, blue, alpha); - } - argRGB[term] = ccolor[term]; - } - break; - /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources. - */ - case GL_ZERO: - { - float4_array c = ccolor[term]; - for (i = 0; i < n; i++) { - ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F); - } - argRGB[term] = ccolor[term]; - } - break; - case GL_ONE: - { - float4_array c = ccolor[term]; - for (i = 0; i < n; i++) { - ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F); - } - argRGB[term] = ccolor[term]; - } - break; - default: - /* ARB_texture_env_crossbar source */ - { - const GLuint srcUnit = srcRGB - GL_TEXTURE0; - ASSERT(srcUnit < ctx->Const.MaxTextureUnits); - if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) - goto end; - argRGB[term] = get_texel_array(swrast, srcUnit); - } - } - - if (operandRGB != GL_SRC_COLOR) { - float4_array src = argRGB[term]; - float4_array dst = ccolor[term]; - - /* point to new arg[term] storage */ - argRGB[term] = ccolor[term]; - - switch (operandRGB) { - case GL_ONE_MINUS_SRC_COLOR: - for (i = 0; i < n; i++) { - dst[i][RCOMP] = 1.0F - src[i][RCOMP]; - dst[i][GCOMP] = 1.0F - src[i][GCOMP]; - dst[i][BCOMP] = 1.0F - src[i][BCOMP]; - } - break; - case GL_SRC_ALPHA: - for (i = 0; i < n; i++) { - dst[i][RCOMP] = - dst[i][GCOMP] = - dst[i][BCOMP] = src[i][ACOMP]; - } - break; - case GL_ONE_MINUS_SRC_ALPHA: - for (i = 0; i < n; i++) { - dst[i][RCOMP] = - dst[i][GCOMP] = - dst[i][BCOMP] = 1.0F - src[i][ACOMP]; - } - break; - default: - _mesa_problem(ctx, "Bad operandRGB"); - } - } - } - - /* - * Set up the argA[term] pointers - */ - for (term = 0; term < numArgsA; term++) { - const GLenum srcA = combine->SourceA[term]; - const GLenum operandA = combine->OperandA[term]; - - switch (srcA) { - case GL_TEXTURE: - argA[term] = get_texel_array(swrast, unit); - break; - case GL_PRIMARY_COLOR: - argA[term] = primary_rgba; - break; - case GL_PREVIOUS: - argA[term] = rgba; - break; - case GL_CONSTANT: - { - float4_array c = ccolor[term]; - GLfloat alpha = textureUnit->EnvColor[3]; - for (i = 0; i < n; i++) - c[i][ACOMP] = alpha; - argA[term] = ccolor[term]; - } - break; - /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources. - */ - case GL_ZERO: - { - float4_array c = ccolor[term]; - for (i = 0; i < n; i++) - c[i][ACOMP] = 0.0F; - argA[term] = ccolor[term]; - } - break; - case GL_ONE: - { - float4_array c = ccolor[term]; - for (i = 0; i < n; i++) - c[i][ACOMP] = 1.0F; - argA[term] = ccolor[term]; - } - break; - default: - /* ARB_texture_env_crossbar source */ - { - const GLuint srcUnit = srcA - GL_TEXTURE0; - ASSERT(srcUnit < ctx->Const.MaxTextureUnits); - if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) - goto end; - argA[term] = get_texel_array(swrast, srcUnit); - } - } - - if (operandA == GL_ONE_MINUS_SRC_ALPHA) { - float4_array src = argA[term]; - float4_array dst = ccolor[term]; - argA[term] = ccolor[term]; - for (i = 0; i < n; i++) { - dst[i][ACOMP] = 1.0F - src[i][ACOMP]; - } - } - } - - /* RGB channel combine */ - { - float4_array arg0 = argRGB[0]; - float4_array arg1 = argRGB[1]; - float4_array arg2 = argRGB[2]; - float4_array arg3 = argRGB[3]; - - switch (combine->ModeRGB) { - case GL_REPLACE: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB; - rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB; - rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB; - } - break; - case GL_MODULATE: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB; - rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB; - rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB; - } - break; - case GL_ADD: - if (textureUnit->EnvMode == GL_COMBINE4_NV) { - /* (a * b) + (c * d) */ - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] + - arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] + - arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] + - arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB; - } - } - else { - /* 2-term addition */ - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB; - } - } - break; - case GL_ADD_SIGNED: - if (textureUnit->EnvMode == GL_COMBINE4_NV) { - /* (a * b) + (c * d) - 0.5 */ - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] + - arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] + - arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] + - arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB; - } - } - else { - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB; - } - } - break; - case GL_INTERPOLATE: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] + - arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] + - arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] + - arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB; - } - break; - case GL_SUBTRACT: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB; - rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB; - rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB; - } - break; - case GL_DOT3_RGB_EXT: - case GL_DOT3_RGBA_EXT: - /* Do not scale the result by 1 2 or 4 */ - for (i = 0; i < n; i++) { - GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) + - (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) + - (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F)) - * 4.0F; - dot = CLAMP(dot, 0.0F, 1.0F); - rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot; - } - break; - case GL_DOT3_RGB: - case GL_DOT3_RGBA: - /* DO scale the result by 1 2 or 4 */ - for (i = 0; i < n; i++) { - GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) + - (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) + - (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F)) - * 4.0F * scaleRGB; - dot = CLAMP(dot, 0.0F, 1.0F); - rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot; - } - break; - case GL_MODULATE_ADD_ATI: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + - arg1[i][RCOMP]) * scaleRGB; - rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + - arg1[i][GCOMP]) * scaleRGB; - rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + - arg1[i][BCOMP]) * scaleRGB; - } - break; - case GL_MODULATE_SIGNED_ADD_ATI: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + - arg1[i][RCOMP] - 0.5F) * scaleRGB; - rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + - arg1[i][GCOMP] - 0.5F) * scaleRGB; - rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + - arg1[i][BCOMP] - 0.5F) * scaleRGB; - } - break; - case GL_MODULATE_SUBTRACT_ATI: - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) - - arg1[i][RCOMP]) * scaleRGB; - rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) - - arg1[i][GCOMP]) * scaleRGB; - rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) - - arg1[i][BCOMP]) * scaleRGB; - } - break; - case GL_BUMP_ENVMAP_ATI: - /* this produces a fixed rgba color, and the coord calc is done elsewhere */ - for (i = 0; i < n; i++) { - /* rgba result is 0,0,0,1 */ - rgba[i][RCOMP] = 0.0; - rgba[i][GCOMP] = 0.0; - rgba[i][BCOMP] = 0.0; - rgba[i][ACOMP] = 1.0; - } - goto end; /* no alpha processing */ - default: - _mesa_problem(ctx, "invalid combine mode"); - } - } - - /* Alpha channel combine */ - { - float4_array arg0 = argA[0]; - float4_array arg1 = argA[1]; - float4_array arg2 = argA[2]; - float4_array arg3 = argA[3]; - - switch (combine->ModeA) { - case GL_REPLACE: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA; - } - break; - case GL_MODULATE: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA; - } - break; - case GL_ADD: - if (textureUnit->EnvMode == GL_COMBINE4_NV) { - /* (a * b) + (c * d) */ - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] + - arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA; - } - } - else { - /* two-term add */ - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA; - } - } - break; - case GL_ADD_SIGNED: - if (textureUnit->EnvMode == GL_COMBINE4_NV) { - /* (a * b) + (c * d) - 0.5 */ - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] + - arg2[i][ACOMP] * arg3[i][ACOMP] - - 0.5F) * scaleA; - } - } - else { - /* a + b - 0.5 */ - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA; - } - } - break; - case GL_INTERPOLATE: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] + - arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP])) - * scaleA; - } - break; - case GL_SUBTRACT: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA; - } - break; - case GL_MODULATE_ADD_ATI: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) - + arg1[i][ACOMP]) * scaleA; - } - break; - case GL_MODULATE_SIGNED_ADD_ATI: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + - arg1[i][ACOMP] - 0.5F) * scaleA; - } - break; - case GL_MODULATE_SUBTRACT_ATI: - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) - - arg1[i][ACOMP]) * scaleA; - } - break; - default: - _mesa_problem(ctx, "invalid combine mode"); - } - } - - /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining. - * This is kind of a kludge. It would have been better if the spec - * were written such that the GL_COMBINE_ALPHA value could be set to - * GL_DOT3. - */ - if (combine->ModeRGB == GL_DOT3_RGBA_EXT || - combine->ModeRGB == GL_DOT3_RGBA) { - for (i = 0; i < n; i++) { - rgba[i][ACOMP] = rgba[i][RCOMP]; - } - } - - for (i = 0; i < n; i++) { - UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]); - UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]); - UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]); - UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]); - } - -end: - for (i = 0; i < numArgsRGB || i < numArgsA; i++) { - free(ccolor[i]); - } - free(rgba); -} - - -/** - * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels. - * See GL_EXT_texture_swizzle. - */ -static void -swizzle_texels(GLuint swizzle, GLuint count, float4_array texels) -{ - const GLuint swzR = GET_SWZ(swizzle, 0); - const GLuint swzG = GET_SWZ(swizzle, 1); - const GLuint swzB = GET_SWZ(swizzle, 2); - const GLuint swzA = GET_SWZ(swizzle, 3); - GLfloat vector[6]; - GLuint i; - - vector[SWIZZLE_ZERO] = 0; - vector[SWIZZLE_ONE] = 1.0F; - - for (i = 0; i < count; i++) { - vector[SWIZZLE_X] = texels[i][0]; - vector[SWIZZLE_Y] = texels[i][1]; - vector[SWIZZLE_Z] = texels[i][2]; - vector[SWIZZLE_W] = texels[i][3]; - texels[i][RCOMP] = vector[swzR]; - texels[i][GCOMP] = vector[swzG]; - texels[i][BCOMP] = vector[swzB]; - texels[i][ACOMP] = vector[swzA]; - } -} - - -/** - * Apply texture mapping to a span of fragments. - */ -void -_swrast_texture_span( struct gl_context *ctx, SWspan *span ) -{ - SWcontext *swrast = SWRAST_CONTEXT(ctx); - float4_array primary_rgba; - GLuint unit; - - primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat)); - - if (!primary_rgba) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span"); - return; - } - - ASSERT(span->end <= MAX_WIDTH); - - /* - * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR) - */ - if (swrast->_TextureCombinePrimary) { - GLuint i; - for (i = 0; i < span->end; i++) { - primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]); - primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]); - primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]); - primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]); - } - } - - /* First must sample all bump maps */ - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - - if (texUnit->_ReallyEnabled && - texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) { - const GLfloat (*texcoords)[4] = (const GLfloat (*)[4]) - span->array->attribs[FRAG_ATTRIB_TEX0 + unit]; - float4_array targetcoords = - span->array->attribs[FRAG_ATTRIB_TEX0 + - ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0]; - - const struct gl_texture_object *curObj = texUnit->_Current; - GLfloat *lambda = span->array->lambda[unit]; - float4_array texels = get_texel_array(swrast, unit); - GLuint i; - GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0]; - GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1]; - GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2]; - GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3]; - - /* adjust texture lod (lambda) */ - if (span->arrayMask & SPAN_LAMBDA) { - if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) { - /* apply LOD bias, but don't clamp yet */ - const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias, - -ctx->Const.MaxTextureLodBias, - ctx->Const.MaxTextureLodBias); - GLuint i; - for (i = 0; i < span->end; i++) { - lambda[i] += bias; - } - } - - if (curObj->Sampler.MinLod != -1000.0 || - curObj->Sampler.MaxLod != 1000.0) { - /* apply LOD clamping to lambda */ - const GLfloat min = curObj->Sampler.MinLod; - const GLfloat max = curObj->Sampler.MaxLod; - GLuint i; - for (i = 0; i < span->end; i++) { - GLfloat l = lambda[i]; - lambda[i] = CLAMP(l, min, max); - } - } - } - - /* Sample the texture (span->end = number of fragments) */ - swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end, - texcoords, lambda, texels ); - - /* manipulate the span values of the bump target - not sure this can work correctly even ignoring - the problem that channel is unsigned */ - for (i = 0; i < span->end; i++) { - targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] * - rotMatrix01) / targetcoords[i][3]; - targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] * - rotMatrix11) / targetcoords[i][3]; - } - } - } - - /* - * Must do all texture sampling before combining in order to - * accomodate GL_ARB_texture_env_crossbar. - */ - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - if (texUnit->_ReallyEnabled && - texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) { - const GLfloat (*texcoords)[4] = (const GLfloat (*)[4]) - span->array->attribs[FRAG_ATTRIB_TEX0 + unit]; - const struct gl_texture_object *curObj = texUnit->_Current; - GLfloat *lambda = span->array->lambda[unit]; - float4_array texels = get_texel_array(swrast, unit); - - /* adjust texture lod (lambda) */ - if (span->arrayMask & SPAN_LAMBDA) { - if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) { - /* apply LOD bias, but don't clamp yet */ - const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias, - -ctx->Const.MaxTextureLodBias, - ctx->Const.MaxTextureLodBias); - GLuint i; - for (i = 0; i < span->end; i++) { - lambda[i] += bias; - } - } - - if (curObj->Sampler.MinLod != -1000.0 || - curObj->Sampler.MaxLod != 1000.0) { - /* apply LOD clamping to lambda */ - const GLfloat min = curObj->Sampler.MinLod; - const GLfloat max = curObj->Sampler.MaxLod; - GLuint i; - for (i = 0; i < span->end; i++) { - GLfloat l = lambda[i]; - lambda[i] = CLAMP(l, min, max); - } - } - } - else if (curObj->Sampler.MaxAnisotropy > 1.0 && - curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) { - /* sample_lambda_2d_aniso is beeing used as texture_sample_func, - * it requires the current SWspan *span as an additional parameter. - * In order to keep the same function signature, the unused lambda - * parameter will be modified to actually contain the SWspan pointer. - * This is a Hack. To make it right, the texture_sample_func - * signature and all implementing functions need to be modified. - */ - /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */ - lambda = (GLfloat *)span; - } - - /* Sample the texture (span->end = number of fragments) */ - swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end, - texcoords, lambda, texels ); - - /* GL_EXT_texture_swizzle */ - if (curObj->_Swizzle != SWIZZLE_NOOP) { - swizzle_texels(curObj->_Swizzle, span->end, texels); - } - } - } - - /* - * OK, now apply the texture (aka texture combine/blend). - * We modify the span->color.rgba values. - */ - for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - texture_combine( ctx, unit, span->end, - primary_rgba, - swrast->TexelBuffer, - span->array->rgba ); - } - } - - free(primary_rgba); -} +/* + * Mesa 3-D graphics library + * Version: 7.5 + * + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/colormac.h" +#include "main/imports.h" +#include "main/pixeltransfer.h" +#include "program/prog_instruction.h" + +#include "s_context.h" +#include "s_texcombine.h" + + +/** + * Pointer to array of float[4] + * This type makes the code below more concise and avoids a lot of casting. + */ +typedef float (*float4_array)[4]; + + +/** + * Return array of texels for given unit. + */ +static INLINE float4_array +get_texel_array(SWcontext *swrast, GLuint unit) +{ +#ifdef _OPENMP + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4 * omp_get_num_threads() + (MAX_WIDTH * 4 * omp_get_thread_num())); +#else + return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4); +#endif +} + + + +/** + * Do texture application for: + * GL_EXT_texture_env_combine + * GL_ARB_texture_env_combine + * GL_EXT_texture_env_dot3 + * GL_ARB_texture_env_dot3 + * GL_ATI_texture_env_combine3 + * GL_NV_texture_env_combine4 + * conventional GL texture env modes + * + * \param ctx rendering context + * \param unit the texture combiner unit + * \param n number of fragments to process (span width) + * \param primary_rgba incoming fragment color array + * \param texelBuffer pointer to texel colors for all texture units + * + * \param rgba incoming/result fragment colors + */ +static void +texture_combine( struct gl_context *ctx, GLuint unit, GLuint n, + const float4_array primary_rgba, + const GLfloat *texelBuffer, + GLchan (*rgbaChan)[4] ) +{ + SWcontext *swrast = SWRAST_CONTEXT(ctx); + const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]); + const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine; + float4_array argRGB[MAX_COMBINER_TERMS]; + float4_array argA[MAX_COMBINER_TERMS]; + const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB); + const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA); + const GLuint numArgsRGB = combine->_NumArgsRGB; + const GLuint numArgsA = combine->_NumArgsA; + float4_array ccolor[4], rgba; + GLuint i, term; + + /* alloc temp pixel buffers */ + rgba = (float4_array) malloc(4 * n * sizeof(GLfloat)); + if (!rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); + return; + } + + for (i = 0; i < numArgsRGB || i < numArgsA; i++) { + ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat)); + if (!ccolor[i]) { + while (i) { + free(ccolor[i]); + i--; + } + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine"); + return; + } + } + + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]); + rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]); + rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]); + rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]); + } + + /* + printf("modeRGB 0x%x modeA 0x%x srcRGB1 0x%x srcA1 0x%x srcRGB2 0x%x srcA2 0x%x\n", + combine->ModeRGB, + combine->ModeA, + combine->SourceRGB[0], + combine->SourceA[0], + combine->SourceRGB[1], + combine->SourceA[1]); + */ + + /* + * Do operand setup for up to 4 operands. Loop over the terms. + */ + for (term = 0; term < numArgsRGB; term++) { + const GLenum srcRGB = combine->SourceRGB[term]; + const GLenum operandRGB = combine->OperandRGB[term]; + + switch (srcRGB) { + case GL_TEXTURE: + argRGB[term] = get_texel_array(swrast, unit); + break; + case GL_PRIMARY_COLOR: + argRGB[term] = primary_rgba; + break; + case GL_PREVIOUS: + argRGB[term] = rgba; + break; + case GL_CONSTANT: + { + float4_array c = ccolor[term]; + GLfloat red = textureUnit->EnvColor[0]; + GLfloat green = textureUnit->EnvColor[1]; + GLfloat blue = textureUnit->EnvColor[2]; + GLfloat alpha = textureUnit->EnvColor[3]; + for (i = 0; i < n; i++) { + ASSIGN_4V(c[i], red, green, blue, alpha); + } + argRGB[term] = ccolor[term]; + } + break; + /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources. + */ + case GL_ZERO: + { + float4_array c = ccolor[term]; + for (i = 0; i < n; i++) { + ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F); + } + argRGB[term] = ccolor[term]; + } + break; + case GL_ONE: + { + float4_array c = ccolor[term]; + for (i = 0; i < n; i++) { + ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F); + } + argRGB[term] = ccolor[term]; + } + break; + default: + /* ARB_texture_env_crossbar source */ + { + const GLuint srcUnit = srcRGB - GL_TEXTURE0; + ASSERT(srcUnit < ctx->Const.MaxTextureUnits); + if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) + goto end; + argRGB[term] = get_texel_array(swrast, srcUnit); + } + } + + if (operandRGB != GL_SRC_COLOR) { + float4_array src = argRGB[term]; + float4_array dst = ccolor[term]; + + /* point to new arg[term] storage */ + argRGB[term] = ccolor[term]; + + switch (operandRGB) { + case GL_ONE_MINUS_SRC_COLOR: + for (i = 0; i < n; i++) { + dst[i][RCOMP] = 1.0F - src[i][RCOMP]; + dst[i][GCOMP] = 1.0F - src[i][GCOMP]; + dst[i][BCOMP] = 1.0F - src[i][BCOMP]; + } + break; + case GL_SRC_ALPHA: + for (i = 0; i < n; i++) { + dst[i][RCOMP] = + dst[i][GCOMP] = + dst[i][BCOMP] = src[i][ACOMP]; + } + break; + case GL_ONE_MINUS_SRC_ALPHA: + for (i = 0; i < n; i++) { + dst[i][RCOMP] = + dst[i][GCOMP] = + dst[i][BCOMP] = 1.0F - src[i][ACOMP]; + } + break; + default: + _mesa_problem(ctx, "Bad operandRGB"); + } + } + } + + /* + * Set up the argA[term] pointers + */ + for (term = 0; term < numArgsA; term++) { + const GLenum srcA = combine->SourceA[term]; + const GLenum operandA = combine->OperandA[term]; + + switch (srcA) { + case GL_TEXTURE: + argA[term] = get_texel_array(swrast, unit); + break; + case GL_PRIMARY_COLOR: + argA[term] = primary_rgba; + break; + case GL_PREVIOUS: + argA[term] = rgba; + break; + case GL_CONSTANT: + { + float4_array c = ccolor[term]; + GLfloat alpha = textureUnit->EnvColor[3]; + for (i = 0; i < n; i++) + c[i][ACOMP] = alpha; + argA[term] = ccolor[term]; + } + break; + /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources. + */ + case GL_ZERO: + { + float4_array c = ccolor[term]; + for (i = 0; i < n; i++) + c[i][ACOMP] = 0.0F; + argA[term] = ccolor[term]; + } + break; + case GL_ONE: + { + float4_array c = ccolor[term]; + for (i = 0; i < n; i++) + c[i][ACOMP] = 1.0F; + argA[term] = ccolor[term]; + } + break; + default: + /* ARB_texture_env_crossbar source */ + { + const GLuint srcUnit = srcA - GL_TEXTURE0; + ASSERT(srcUnit < ctx->Const.MaxTextureUnits); + if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled) + goto end; + argA[term] = get_texel_array(swrast, srcUnit); + } + } + + if (operandA == GL_ONE_MINUS_SRC_ALPHA) { + float4_array src = argA[term]; + float4_array dst = ccolor[term]; + argA[term] = ccolor[term]; + for (i = 0; i < n; i++) { + dst[i][ACOMP] = 1.0F - src[i][ACOMP]; + } + } + } + + /* RGB channel combine */ + { + float4_array arg0 = argRGB[0]; + float4_array arg1 = argRGB[1]; + float4_array arg2 = argRGB[2]; + float4_array arg3 = argRGB[3]; + + switch (combine->ModeRGB) { + case GL_REPLACE: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB; + rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB; + rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB; + } + break; + case GL_MODULATE: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB; + rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB; + rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB; + } + break; + case GL_ADD: + if (textureUnit->EnvMode == GL_COMBINE4_NV) { + /* (a * b) + (c * d) */ + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] + + arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] + + arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] + + arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB; + } + } + else { + /* 2-term addition */ + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB; + } + } + break; + case GL_ADD_SIGNED: + if (textureUnit->EnvMode == GL_COMBINE4_NV) { + /* (a * b) + (c * d) - 0.5 */ + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] + + arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] + + arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] + + arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB; + } + } + else { + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB; + } + } + break; + case GL_INTERPOLATE: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] + + arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] + + arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] + + arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB; + } + break; + case GL_SUBTRACT: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB; + rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB; + rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB; + } + break; + case GL_DOT3_RGB_EXT: + case GL_DOT3_RGBA_EXT: + /* Do not scale the result by 1 2 or 4 */ + for (i = 0; i < n; i++) { + GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) + + (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) + + (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F)) + * 4.0F; + dot = CLAMP(dot, 0.0F, 1.0F); + rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot; + } + break; + case GL_DOT3_RGB: + case GL_DOT3_RGBA: + /* DO scale the result by 1 2 or 4 */ + for (i = 0; i < n; i++) { + GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) + + (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) + + (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F)) + * 4.0F * scaleRGB; + dot = CLAMP(dot, 0.0F, 1.0F); + rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot; + } + break; + case GL_MODULATE_ADD_ATI: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + + arg1[i][RCOMP]) * scaleRGB; + rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + + arg1[i][GCOMP]) * scaleRGB; + rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + + arg1[i][BCOMP]) * scaleRGB; + } + break; + case GL_MODULATE_SIGNED_ADD_ATI: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) + + arg1[i][RCOMP] - 0.5F) * scaleRGB; + rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) + + arg1[i][GCOMP] - 0.5F) * scaleRGB; + rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) + + arg1[i][BCOMP] - 0.5F) * scaleRGB; + } + break; + case GL_MODULATE_SUBTRACT_ATI: + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) - + arg1[i][RCOMP]) * scaleRGB; + rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) - + arg1[i][GCOMP]) * scaleRGB; + rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) - + arg1[i][BCOMP]) * scaleRGB; + } + break; + case GL_BUMP_ENVMAP_ATI: + /* this produces a fixed rgba color, and the coord calc is done elsewhere */ + for (i = 0; i < n; i++) { + /* rgba result is 0,0,0,1 */ + rgba[i][RCOMP] = 0.0; + rgba[i][GCOMP] = 0.0; + rgba[i][BCOMP] = 0.0; + rgba[i][ACOMP] = 1.0; + } + goto end; /* no alpha processing */ + default: + _mesa_problem(ctx, "invalid combine mode"); + } + } + + /* Alpha channel combine */ + { + float4_array arg0 = argA[0]; + float4_array arg1 = argA[1]; + float4_array arg2 = argA[2]; + float4_array arg3 = argA[3]; + + switch (combine->ModeA) { + case GL_REPLACE: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA; + } + break; + case GL_MODULATE: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA; + } + break; + case GL_ADD: + if (textureUnit->EnvMode == GL_COMBINE4_NV) { + /* (a * b) + (c * d) */ + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] + + arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA; + } + } + else { + /* two-term add */ + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA; + } + } + break; + case GL_ADD_SIGNED: + if (textureUnit->EnvMode == GL_COMBINE4_NV) { + /* (a * b) + (c * d) - 0.5 */ + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] + + arg2[i][ACOMP] * arg3[i][ACOMP] - + 0.5F) * scaleA; + } + } + else { + /* a + b - 0.5 */ + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA; + } + } + break; + case GL_INTERPOLATE: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] + + arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP])) + * scaleA; + } + break; + case GL_SUBTRACT: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA; + } + break; + case GL_MODULATE_ADD_ATI: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + + arg1[i][ACOMP]) * scaleA; + } + break; + case GL_MODULATE_SIGNED_ADD_ATI: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + + arg1[i][ACOMP] - 0.5F) * scaleA; + } + break; + case GL_MODULATE_SUBTRACT_ATI: + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) + - arg1[i][ACOMP]) * scaleA; + } + break; + default: + _mesa_problem(ctx, "invalid combine mode"); + } + } + + /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining. + * This is kind of a kludge. It would have been better if the spec + * were written such that the GL_COMBINE_ALPHA value could be set to + * GL_DOT3. + */ + if (combine->ModeRGB == GL_DOT3_RGBA_EXT || + combine->ModeRGB == GL_DOT3_RGBA) { + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = rgba[i][RCOMP]; + } + } + + for (i = 0; i < n; i++) { + UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]); + UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]); + UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]); + UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]); + } + +end: + for (i = 0; i < numArgsRGB || i < numArgsA; i++) { + free(ccolor[i]); + } + free(rgba); +} + + +/** + * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels. + * See GL_EXT_texture_swizzle. + */ +static void +swizzle_texels(GLuint swizzle, GLuint count, float4_array texels) +{ + const GLuint swzR = GET_SWZ(swizzle, 0); + const GLuint swzG = GET_SWZ(swizzle, 1); + const GLuint swzB = GET_SWZ(swizzle, 2); + const GLuint swzA = GET_SWZ(swizzle, 3); + GLfloat vector[6]; + GLuint i; + + vector[SWIZZLE_ZERO] = 0; + vector[SWIZZLE_ONE] = 1.0F; + + for (i = 0; i < count; i++) { + vector[SWIZZLE_X] = texels[i][0]; + vector[SWIZZLE_Y] = texels[i][1]; + vector[SWIZZLE_Z] = texels[i][2]; + vector[SWIZZLE_W] = texels[i][3]; + texels[i][RCOMP] = vector[swzR]; + texels[i][GCOMP] = vector[swzG]; + texels[i][BCOMP] = vector[swzB]; + texels[i][ACOMP] = vector[swzA]; + } +} + + +/** + * Apply texture mapping to a span of fragments. + */ +void +_swrast_texture_span( struct gl_context *ctx, SWspan *span ) +{ + SWcontext *swrast = SWRAST_CONTEXT(ctx); + float4_array primary_rgba; + GLuint unit; + + primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat)); + + if (!primary_rgba) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span"); + return; + } + + ASSERT(span->end <= MAX_WIDTH); + + /* + * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR) + */ + if (swrast->_TextureCombinePrimary) { + GLuint i; + for (i = 0; i < span->end; i++) { + primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]); + primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]); + primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]); + primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]); + } + } + + /* First must sample all bump maps */ + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + if (texUnit->_ReallyEnabled && + texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) { + const GLfloat (*texcoords)[4] = (const GLfloat (*)[4]) + span->array->attribs[FRAG_ATTRIB_TEX0 + unit]; + float4_array targetcoords = + span->array->attribs[FRAG_ATTRIB_TEX0 + + ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0]; + + const struct gl_texture_object *curObj = texUnit->_Current; + GLfloat *lambda = span->array->lambda[unit]; + float4_array texels = get_texel_array(swrast, unit); + GLuint i; + GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0]; + GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1]; + GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2]; + GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3]; + + /* adjust texture lod (lambda) */ + if (span->arrayMask & SPAN_LAMBDA) { + if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) { + /* apply LOD bias, but don't clamp yet */ + const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias, + -ctx->Const.MaxTextureLodBias, + ctx->Const.MaxTextureLodBias); + GLuint i; + for (i = 0; i < span->end; i++) { + lambda[i] += bias; + } + } + + if (curObj->Sampler.MinLod != -1000.0 || + curObj->Sampler.MaxLod != 1000.0) { + /* apply LOD clamping to lambda */ + const GLfloat min = curObj->Sampler.MinLod; + const GLfloat max = curObj->Sampler.MaxLod; + GLuint i; + for (i = 0; i < span->end; i++) { + GLfloat l = lambda[i]; + lambda[i] = CLAMP(l, min, max); + } + } + } + + /* Sample the texture (span->end = number of fragments) */ + swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end, + texcoords, lambda, texels ); + + /* manipulate the span values of the bump target + not sure this can work correctly even ignoring + the problem that channel is unsigned */ + for (i = 0; i < span->end; i++) { + targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] * + rotMatrix01) / targetcoords[i][3]; + targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] * + rotMatrix11) / targetcoords[i][3]; + } + } + } + + /* + * Must do all texture sampling before combining in order to + * accomodate GL_ARB_texture_env_crossbar. + */ + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + if (texUnit->_ReallyEnabled && + texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) { + const GLfloat (*texcoords)[4] = (const GLfloat (*)[4]) + span->array->attribs[FRAG_ATTRIB_TEX0 + unit]; + const struct gl_texture_object *curObj = texUnit->_Current; + GLfloat *lambda = span->array->lambda[unit]; + float4_array texels = get_texel_array(swrast, unit); + + /* adjust texture lod (lambda) */ + if (span->arrayMask & SPAN_LAMBDA) { + if (texUnit->LodBias + curObj->Sampler.LodBias != 0.0F) { + /* apply LOD bias, but don't clamp yet */ + const GLfloat bias = CLAMP(texUnit->LodBias + curObj->Sampler.LodBias, + -ctx->Const.MaxTextureLodBias, + ctx->Const.MaxTextureLodBias); + GLuint i; + for (i = 0; i < span->end; i++) { + lambda[i] += bias; + } + } + + if (curObj->Sampler.MinLod != -1000.0 || + curObj->Sampler.MaxLod != 1000.0) { + /* apply LOD clamping to lambda */ + const GLfloat min = curObj->Sampler.MinLod; + const GLfloat max = curObj->Sampler.MaxLod; + GLuint i; + for (i = 0; i < span->end; i++) { + GLfloat l = lambda[i]; + lambda[i] = CLAMP(l, min, max); + } + } + } + else if (curObj->Sampler.MaxAnisotropy > 1.0 && + curObj->Sampler.MinFilter == GL_LINEAR_MIPMAP_LINEAR) { + /* sample_lambda_2d_aniso is beeing used as texture_sample_func, + * it requires the current SWspan *span as an additional parameter. + * In order to keep the same function signature, the unused lambda + * parameter will be modified to actually contain the SWspan pointer. + * This is a Hack. To make it right, the texture_sample_func + * signature and all implementing functions need to be modified. + */ + /* "hide" SWspan struct; cast to (GLfloat *) to suppress warning */ + lambda = (GLfloat *)span; + } + + /* Sample the texture (span->end = number of fragments) */ + swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end, + texcoords, lambda, texels ); + + /* GL_EXT_texture_swizzle */ + if (curObj->_Swizzle != SWIZZLE_NOOP) { + swizzle_texels(curObj->_Swizzle, span->end, texels); + } + } + } + + /* + * OK, now apply the texture (aka texture combine/blend). + * We modify the span->color.rgba values. + */ + for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + texture_combine( ctx, unit, span->end, + primary_rgba, + swrast->TexelBuffer, + span->array->rgba ); + } + } + + free(primary_rgba); +} -- cgit v1.2.3