From a18769801e521ccb5a409c377ac0a6a1141f5776 Mon Sep 17 00:00:00 2001
From: marha <marha@users.sourceforge.net>
Date: Tue, 1 Mar 2011 07:17:23 +0000
Subject: pixman mesa git update 1 Mar 2011

---
 mesalib/src/mesa/swrast/s_bitmap.c     |  445 +++++-----
 mesalib/src/mesa/swrast/s_drawpix.c    | 1505 ++++++++++++++++----------------
 mesalib/src/mesa/swrast/s_readpix.c    | 1016 ++++++++++-----------
 mesalib/src/mesa/swrast/s_texcombine.c | 1479 ++++++++++++++++---------------
 4 files changed, 2221 insertions(+), 2224 deletions(-)

(limited to 'mesalib/src/mesa/swrast')

diff --git a/mesalib/src/mesa/swrast/s_bitmap.c b/mesalib/src/mesa/swrast/s_bitmap.c
index 9b52e9053..18f1c1866 100644
--- a/mesalib/src/mesa/swrast/s_bitmap.c
+++ b/mesalib/src/mesa/swrast/s_bitmap.c
@@ -1,222 +1,223 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.1
- *
- * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file swrast/s_bitmap.c
- * \brief glBitmap rendering.
- * \author Brian Paul
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/condrender.h"
-#include "main/image.h"
-#include "main/macros.h"
-
-#include "s_context.h"
-#include "s_span.h"
-
-
-
-/**
- * Render a bitmap.
- * Called via ctx->Driver.Bitmap()
- * All parameter error checking will have been done before this is called.
- */
-void
-_swrast_Bitmap( struct gl_context *ctx, GLint px, GLint py,
-		GLsizei width, GLsizei height,
-		const struct gl_pixelstore_attrib *unpack,
-		const GLubyte *bitmap )
-{
-   GLint row, col;
-   GLuint count = 0;
-   SWspan span;
-
-   ASSERT(ctx->RenderMode == GL_RENDER);
-
-   if (!_mesa_check_conditional_render(ctx))
-      return; /* don't draw */
-
-   bitmap = (const GLubyte *) _mesa_map_pbo_source(ctx, unpack, bitmap);
-   if (!bitmap)
-      return;
-
-   swrast_render_start(ctx);
-
-   if (SWRAST_CONTEXT(ctx)->NewState)
-      _swrast_validate_derived( ctx );
-
-   INIT_SPAN(span, GL_BITMAP);
-   span.end = width;
-   span.arrayMask = SPAN_XY;
-   _swrast_span_default_attribs(ctx, &span);
-
-   for (row = 0; row < height; row++) {
-      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
-                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
-
-      if (unpack->LsbFirst) {
-         /* Lsb first */
-         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
-         for (col = 0; col < width; col++) {
-            if (*src & mask) {
-               span.array->x[count] = px + col;
-               span.array->y[count] = py + row;
-               count++;
-            }
-            if (mask == 128U) {
-               src++;
-               mask = 1U;
-            }
-            else {
-               mask = mask << 1;
-            }
-         }
-
-         /* get ready for next row */
-         if (mask != 1)
-            src++;
-      }
-      else {
-         /* Msb first */
-         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
-         for (col = 0; col < width; col++) {
-            if (*src & mask) {
-               span.array->x[count] = px + col;
-               span.array->y[count] = py + row;
-               count++;
-            }
-            if (mask == 1U) {
-               src++;
-               mask = 128U;
-            }
-            else {
-               mask = mask >> 1;
-            }
-         }
-
-         /* get ready for next row */
-         if (mask != 128)
-            src++;
-      }
-
-      if (count + width >= MAX_WIDTH || row + 1 == height) {
-         /* flush the span */
-         span.end = count;
-         _swrast_write_rgba_span(ctx, &span);
-         span.end = 0;
-         count = 0;
-      }
-   }
-
-   swrast_render_finish(ctx);
-
-   _mesa_unmap_pbo_source(ctx, unpack);
-}
-
-
-#if 0
-/*
- * XXX this is another way to implement Bitmap.  Use horizontal runs of
- * fragments, initializing the mask array to indicate which fragments to
- * draw or skip.
- */
-void
-_swrast_Bitmap( struct gl_context *ctx, GLint px, GLint py,
-		GLsizei width, GLsizei height,
-		const struct gl_pixelstore_attrib *unpack,
-		const GLubyte *bitmap )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLint row, col;
-   SWspan span;
-
-   ASSERT(ctx->RenderMode == GL_RENDER);
-   ASSERT(bitmap);
-
-   swrast_render_start(ctx);
-
-   if (SWRAST_CONTEXT(ctx)->NewState)
-      _swrast_validate_derived( ctx );
-
-   INIT_SPAN(span, GL_BITMAP);
-   span.end = width;
-   span.arrayMask = SPAN_MASK;
-   _swrast_span_default_attribs(ctx, &span);
-
-   /*span.arrayMask |= SPAN_MASK;*/  /* we'll init span.mask[] */
-   span.x = px;
-   span.y = py;
-   /*span.end = width;*/
-
-   for (row=0; row<height; row++, span.y++) {
-      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
-                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
-
-      if (unpack->LsbFirst) {
-         /* Lsb first */
-         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
-         for (col=0; col<width; col++) {
-            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
-            if (mask == 128U) {
-               src++;
-               mask = 1U;
-            }
-            else {
-               mask = mask << 1;
-            }
-         }
-
-         _swrast_write_rgba_span(ctx, &span);
-
-         /* get ready for next row */
-         if (mask != 1)
-            src++;
-      }
-      else {
-         /* Msb first */
-         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
-         for (col=0; col<width; col++) {
-            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
-            if (mask == 1U) {
-               src++;
-               mask = 128U;
-            }
-            else {
-               mask = mask >> 1;
-            }
-         }
-
-         _swrast_write_rgba_span(ctx, &span);
-
-         /* get ready for next row */
-         if (mask != 128)
-            src++;
-      }
-   }
-
-   swrast_render_finish(ctx);
-}
-#endif
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file swrast/s_bitmap.c
+ * \brief glBitmap rendering.
+ * \author Brian Paul
+ */
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/condrender.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/pbo.h"
+
+#include "s_context.h"
+#include "s_span.h"
+
+
+
+/**
+ * Render a bitmap.
+ * Called via ctx->Driver.Bitmap()
+ * All parameter error checking will have been done before this is called.
+ */
+void
+_swrast_Bitmap( struct gl_context *ctx, GLint px, GLint py,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   GLint row, col;
+   GLuint count = 0;
+   SWspan span;
+
+   ASSERT(ctx->RenderMode == GL_RENDER);
+
+   if (!_mesa_check_conditional_render(ctx))
+      return; /* don't draw */
+
+   bitmap = (const GLubyte *) _mesa_map_pbo_source(ctx, unpack, bitmap);
+   if (!bitmap)
+      return;
+
+   swrast_render_start(ctx);
+
+   if (SWRAST_CONTEXT(ctx)->NewState)
+      _swrast_validate_derived( ctx );
+
+   INIT_SPAN(span, GL_BITMAP);
+   span.end = width;
+   span.arrayMask = SPAN_XY;
+   _swrast_span_default_attribs(ctx, &span);
+
+   for (row = 0; row < height; row++) {
+      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
+                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col = 0; col < width; col++) {
+            if (*src & mask) {
+               span.array->x[count] = px + col;
+               span.array->y[count] = py + row;
+               count++;
+            }
+            if (mask == 128U) {
+               src++;
+               mask = 1U;
+            }
+            else {
+               mask = mask << 1;
+            }
+         }
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col = 0; col < width; col++) {
+            if (*src & mask) {
+               span.array->x[count] = px + col;
+               span.array->y[count] = py + row;
+               count++;
+            }
+            if (mask == 1U) {
+               src++;
+               mask = 128U;
+            }
+            else {
+               mask = mask >> 1;
+            }
+         }
+
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+
+      if (count + width >= MAX_WIDTH || row + 1 == height) {
+         /* flush the span */
+         span.end = count;
+         _swrast_write_rgba_span(ctx, &span);
+         span.end = 0;
+         count = 0;
+      }
+   }
+
+   swrast_render_finish(ctx);
+
+   _mesa_unmap_pbo_source(ctx, unpack);
+}
+
+
+#if 0
+/*
+ * XXX this is another way to implement Bitmap.  Use horizontal runs of
+ * fragments, initializing the mask array to indicate which fragments to
+ * draw or skip.
+ */
+void
+_swrast_Bitmap( struct gl_context *ctx, GLint px, GLint py,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLint row, col;
+   SWspan span;
+
+   ASSERT(ctx->RenderMode == GL_RENDER);
+   ASSERT(bitmap);
+
+   swrast_render_start(ctx);
+
+   if (SWRAST_CONTEXT(ctx)->NewState)
+      _swrast_validate_derived( ctx );
+
+   INIT_SPAN(span, GL_BITMAP);
+   span.end = width;
+   span.arrayMask = SPAN_MASK;
+   _swrast_span_default_attribs(ctx, &span);
+
+   /*span.arrayMask |= SPAN_MASK;*/  /* we'll init span.mask[] */
+   span.x = px;
+   span.y = py;
+   /*span.end = width;*/
+
+   for (row=0; row<height; row++, span.y++) {
+      const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack,
+                 bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0);
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
+            if (mask == 128U) {
+               src++;
+               mask = 1U;
+            }
+            else {
+               mask = mask << 1;
+            }
+         }
+
+         _swrast_write_rgba_span(ctx, &span);
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            span.array->mask[col] = (*src & mask) ? GL_TRUE : GL_FALSE;
+            if (mask == 1U) {
+               src++;
+               mask = 128U;
+            }
+            else {
+               mask = mask >> 1;
+            }
+         }
+
+         _swrast_write_rgba_span(ctx, &span);
+
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+   }
+
+   swrast_render_finish(ctx);
+}
+#endif
diff --git a/mesalib/src/mesa/swrast/s_drawpix.c b/mesalib/src/mesa/swrast/s_drawpix.c
index cca75784a..11c63457f 100644
--- a/mesalib/src/mesa/swrast/s_drawpix.c
+++ b/mesalib/src/mesa/swrast/s_drawpix.c
@@ -1,752 +1,753 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.1
- *
- * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/condrender.h"
-#include "main/context.h"
-#include "main/image.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "main/pack.h"
-#include "main/pixeltransfer.h"
-#include "main/state.h"
-
-#include "s_context.h"
-#include "s_span.h"
-#include "s_stencil.h"
-#include "s_zoom.h"
-
-
-
-/**
- * Try to do a fast and simple RGB(a) glDrawPixels.
- * Return:  GL_TRUE if success, GL_FALSE if slow path must be used instead
- */
-static GLboolean
-fast_draw_rgba_pixels(struct gl_context *ctx, GLint x, GLint y,
-                      GLsizei width, GLsizei height,
-                      GLenum format, GLenum type,
-                      const struct gl_pixelstore_attrib *userUnpack,
-                      const GLvoid *pixels)
-{
-   const GLint imgX = x, imgY = y;
-   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
-   GLenum rbType;
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   SWspan span;
-   GLboolean simpleZoom;
-   GLint yStep;  /* +1 or -1 */
-   struct gl_pixelstore_attrib unpack;
-   GLint destX, destY, drawWidth, drawHeight; /* post clipping */
-
-   if (!rb)
-      return GL_TRUE; /* no-op */
-
-   rbType = rb->DataType;
-
-   if ((swrast->_RasterMask & ~CLIP_BIT) ||
-       ctx->Texture._EnabledCoordUnits ||
-       userUnpack->SwapBytes ||
-       ctx->_ImageTransferState) {
-      /* can't handle any of those conditions */
-      return GL_FALSE;
-   }
-
-   INIT_SPAN(span, GL_BITMAP);
-   span.arrayMask = SPAN_RGBA;
-   span.arrayAttribs = FRAG_BIT_COL0;
-   _swrast_span_default_attribs(ctx, &span);
-
-   /* copy input params since clipping may change them */
-   unpack = *userUnpack;
-   destX = x;
-   destY = y;
-   drawWidth = width;
-   drawHeight = height;
-
-   /* check for simple zooming and clipping */
-   if (ctx->Pixel.ZoomX == 1.0F &&
-       (ctx->Pixel.ZoomY == 1.0F || ctx->Pixel.ZoomY == -1.0F)) {
-      if (!_mesa_clip_drawpixels(ctx, &destX, &destY,
-                                 &drawWidth, &drawHeight, &unpack)) {
-         /* image was completely clipped: no-op, all done */
-         return GL_TRUE;
-      }
-      simpleZoom = GL_TRUE;
-      yStep = (GLint) ctx->Pixel.ZoomY;
-      ASSERT(yStep == 1 || yStep == -1);
-   }
-   else {
-      /* non-simple zooming */
-      simpleZoom = GL_FALSE;
-      yStep = 1;
-      if (unpack.RowLength == 0)
-         unpack.RowLength = width;
-   }
-
-   /*
-    * Ready to draw!
-    */
-
-   if (format == GL_RGBA && type == rbType) {
-      const GLubyte *src
-         = (const GLubyte *) _mesa_image_address2d(&unpack, pixels, width,
-                                                   height, format, type, 0, 0);
-      const GLint srcStride = _mesa_image_row_stride(&unpack, width,
-                                                     format, type);
-      if (simpleZoom) {
-         GLint row;
-         for (row = 0; row < drawHeight; row++) {
-            rb->PutRow(ctx, rb, drawWidth, destX, destY, src, NULL);
-            src += srcStride;
-            destY += yStep;
-         }
-      }
-      else {
-         /* with zooming */
-         GLint row;
-         for (row = 0; row < drawHeight; row++) {
-            span.x = destX;
-            span.y = destY + row;
-            span.end = drawWidth;
-            span.array->ChanType = rbType;
-            _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span, src);
-            src += srcStride;
-         }
-         span.array->ChanType = CHAN_TYPE;
-      }
-      return GL_TRUE;
-   }
-
-   if (format == GL_RGB && type == rbType) {
-      const GLubyte *src
-         = (const GLubyte *) _mesa_image_address2d(&unpack, pixels, width,
-                                                   height, format, type, 0, 0);
-      const GLint srcStride = _mesa_image_row_stride(&unpack, width,
-                                                     format, type);
-      if (simpleZoom) {
-         GLint row;
-         for (row = 0; row < drawHeight; row++) {
-            rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, src, NULL);
-            src += srcStride;
-            destY += yStep;
-         }
-      }
-      else {
-         /* with zooming */
-         GLint row;
-         for (row = 0; row < drawHeight; row++) {
-            span.x = destX;
-            span.y = destY;
-            span.end = drawWidth;
-            span.array->ChanType = rbType;
-            _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span, src);
-            src += srcStride;
-            destY++;
-         }
-         span.array->ChanType = CHAN_TYPE;
-      }
-      return GL_TRUE;
-   }
-
-   /* Remaining cases haven't been tested with alignment != 1 */
-   if (userUnpack->Alignment != 1)
-      return GL_FALSE;
-
-   if (format == GL_LUMINANCE && type == CHAN_TYPE && rbType == CHAN_TYPE) {
-      const GLchan *src = (const GLchan *) pixels
-         + (unpack.SkipRows * unpack.RowLength + unpack.SkipPixels);
-      if (simpleZoom) {
-         /* no zooming */
-         GLint row;
-         ASSERT(drawWidth <= MAX_WIDTH);
-         for (row = 0; row < drawHeight; row++) {
-            GLchan rgb[MAX_WIDTH][3];
-            GLint i;
-            for (i = 0;i<drawWidth;i++) {
-               rgb[i][0] = src[i];
-               rgb[i][1] = src[i];
-               rgb[i][2] = src[i];
-            }
-            rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, rgb, NULL);
-            src += unpack.RowLength;
-            destY += yStep;
-         }
-      }
-      else {
-         /* with zooming */
-         GLint row;
-         ASSERT(drawWidth <= MAX_WIDTH);
-         for (row = 0; row < drawHeight; row++) {
-            GLchan rgb[MAX_WIDTH][3];
-            GLint i;
-            for (i = 0;i<drawWidth;i++) {
-               rgb[i][0] = src[i];
-               rgb[i][1] = src[i];
-               rgb[i][2] = src[i];
-            }
-            span.x = destX;
-            span.y = destY;
-            span.end = drawWidth;
-            _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span, rgb);
-            src += unpack.RowLength;
-            destY++;
-         }
-      }
-      return GL_TRUE;
-   }
-
-   if (format == GL_LUMINANCE_ALPHA && type == CHAN_TYPE && rbType == CHAN_TYPE) {
-      const GLchan *src = (const GLchan *) pixels
-         + (unpack.SkipRows * unpack.RowLength + unpack.SkipPixels)*2;
-      if (simpleZoom) {
-         GLint row;
-         ASSERT(drawWidth <= MAX_WIDTH);
-         for (row = 0; row < drawHeight; row++) {
-            GLint i;
-            const GLchan *ptr = src;
-            for (i = 0;i<drawWidth;i++) {
-               span.array->rgba[i][0] = *ptr;
-               span.array->rgba[i][1] = *ptr;
-               span.array->rgba[i][2] = *ptr++;
-               span.array->rgba[i][3] = *ptr++;
-            }
-            rb->PutRow(ctx, rb, drawWidth, destX, destY,
-                       span.array->rgba, NULL);
-            src += unpack.RowLength*2;
-            destY += yStep;
-         }
-      }
-      else {
-         /* with zooming */
-         GLint row;
-         ASSERT(drawWidth <= MAX_WIDTH);
-         for (row = 0; row < drawHeight; row++) {
-            const GLchan *ptr = src;
-            GLint i;
-            for (i = 0;i<drawWidth;i++) {
-               span.array->rgba[i][0] = *ptr;
-               span.array->rgba[i][1] = *ptr;
-               span.array->rgba[i][2] = *ptr++;
-               span.array->rgba[i][3] = *ptr++;
-            }
-            span.x = destX;
-            span.y = destY;
-            span.end = drawWidth;
-            _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
-                                           span.array->rgba);
-            src += unpack.RowLength*2;
-            destY++;
-         }
-      }
-      return GL_TRUE;
-   }
-
-   if (format == GL_COLOR_INDEX && type == GL_UNSIGNED_BYTE) {
-      const GLubyte *src = (const GLubyte *) pixels
-         + unpack.SkipRows * unpack.RowLength + unpack.SkipPixels;
-      if (rbType == GL_UNSIGNED_BYTE) {
-         /* convert ubyte/CI data to ubyte/RGBA */
-         if (simpleZoom) {
-            GLint row;
-            for (row = 0; row < drawHeight; row++) {
-               ASSERT(drawWidth <= MAX_WIDTH);
-               _mesa_map_ci8_to_rgba8(ctx, drawWidth, src,
-                                      span.array->rgba8);
-               rb->PutRow(ctx, rb, drawWidth, destX, destY,
-                          span.array->rgba8, NULL);
-               src += unpack.RowLength;
-               destY += yStep;
-            }
-         }
-         else {
-            /* ubyte/CI to ubyte/RGBA with zooming */
-            GLint row;
-            for (row = 0; row < drawHeight; row++) {
-               ASSERT(drawWidth <= MAX_WIDTH);
-               _mesa_map_ci8_to_rgba8(ctx, drawWidth, src,
-                                      span.array->rgba8);
-               span.x = destX;
-               span.y = destY;
-               span.end = drawWidth;
-               _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
-                                              span.array->rgba8);
-               src += unpack.RowLength;
-               destY++;
-            }
-         }
-         return GL_TRUE;
-      }
-   }
-
-   /* can't handle this pixel format and/or data type */
-   return GL_FALSE;
-}
-
-
-
-/*
- * Draw stencil image.
- */
-static void
-draw_stencil_pixels( struct gl_context *ctx, GLint x, GLint y,
-                     GLsizei width, GLsizei height,
-                     GLenum type,
-                     const struct gl_pixelstore_attrib *unpack,
-                     const GLvoid *pixels )
-{
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
-   GLint skipPixels;
-
-   /* if width > MAX_WIDTH, have to process image in chunks */
-   skipPixels = 0;
-   while (skipPixels < width) {
-      const GLint spanX = x + skipPixels;
-      const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
-      GLint row;
-      for (row = 0; row < height; row++) {
-         const GLint spanY = y + row;
-         GLstencil values[MAX_WIDTH];
-         GLenum destType = (sizeof(GLstencil) == sizeof(GLubyte))
-                         ? GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
-         const GLvoid *source = _mesa_image_address2d(unpack, pixels,
-                                                      width, height,
-                                                      GL_COLOR_INDEX, type,
-                                                      row, skipPixels);
-         _mesa_unpack_stencil_span(ctx, spanWidth, destType, values,
-                                   type, source, unpack,
-                                   ctx->_ImageTransferState);
-         if (zoom) {
-            _swrast_write_zoomed_stencil_span(ctx, x, y, spanWidth,
-                                              spanX, spanY, values);
-         }
-         else {
-            _swrast_write_stencil_span(ctx, spanWidth, spanX, spanY, values);
-         }
-      }
-      skipPixels += spanWidth;
-   }
-}
-
-
-/*
- * Draw depth image.
- */
-static void
-draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y,
-                   GLsizei width, GLsizei height,
-                   GLenum type,
-                   const struct gl_pixelstore_attrib *unpack,
-                   const GLvoid *pixels )
-{
-   const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
-   SWspan span;
-
-   INIT_SPAN(span, GL_BITMAP);
-   span.arrayMask = SPAN_Z;
-   _swrast_span_default_attribs(ctx, &span);
-
-   if (type == GL_UNSIGNED_SHORT
-       && ctx->DrawBuffer->Visual.depthBits == 16
-       && !scaleOrBias
-       && !zoom
-       && width <= MAX_WIDTH
-       && !unpack->SwapBytes) {
-      /* Special case: directly write 16-bit depth values */
-      GLint row;
-      for (row = 0; row < height; row++) {
-         const GLushort *zSrc = (const GLushort *)
-            _mesa_image_address2d(unpack, pixels, width, height,
-                                  GL_DEPTH_COMPONENT, type, row, 0);
-         GLint i;
-         for (i = 0; i < width; i++)
-            span.array->z[i] = zSrc[i];
-         span.x = x;
-         span.y = y + row;
-         span.end = width;
-         _swrast_write_rgba_span(ctx, &span);
-      }
-   }
-   else if (type == GL_UNSIGNED_INT
-            && !scaleOrBias
-            && !zoom
-            && width <= MAX_WIDTH
-            && !unpack->SwapBytes) {
-      /* Special case: shift 32-bit values down to Visual.depthBits */
-      const GLint shift = 32 - ctx->DrawBuffer->Visual.depthBits;
-      GLint row;
-      for (row = 0; row < height; row++) {
-         const GLuint *zSrc = (const GLuint *)
-            _mesa_image_address2d(unpack, pixels, width, height,
-                                  GL_DEPTH_COMPONENT, type, row, 0);
-         if (shift == 0) {
-            memcpy(span.array->z, zSrc, width * sizeof(GLuint));
-         }
-         else {
-            GLint col;
-            for (col = 0; col < width; col++)
-               span.array->z[col] = zSrc[col] >> shift;
-         }
-         span.x = x;
-         span.y = y + row;
-         span.end = width;
-         _swrast_write_rgba_span(ctx, &span);
-      }
-   }
-   else {
-      /* General case */
-      const GLuint depthMax = ctx->DrawBuffer->_DepthMax;
-      GLint skipPixels = 0;
-
-      /* in case width > MAX_WIDTH do the copy in chunks */
-      while (skipPixels < width) {
-         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
-         GLint row;
-         ASSERT(span.end <= MAX_WIDTH);
-         for (row = 0; row < height; row++) {
-            const GLvoid *zSrc = _mesa_image_address2d(unpack,
-                                                      pixels, width, height,
-                                                      GL_DEPTH_COMPONENT, type,
-                                                      row, skipPixels);
-
-            /* Set these for each row since the _swrast_write_* function may
-             * change them while clipping.
-             */
-            span.x = x + skipPixels;
-            span.y = y + row;
-            span.end = spanWidth;
-
-            _mesa_unpack_depth_span(ctx, spanWidth,
-                                    GL_UNSIGNED_INT, span.array->z, depthMax,
-                                    type, zSrc, unpack);
-            if (zoom) {
-               _swrast_write_zoomed_depth_span(ctx, x, y, &span);
-            }
-            else {
-               _swrast_write_rgba_span(ctx, &span);
-            }
-         }
-         skipPixels += spanWidth;
-      }
-   }
-}
-
-
-
-/**
- * Draw RGBA image.
- */
-static void
-draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
-                  GLsizei width, GLsizei height,
-                  GLenum format, GLenum type,
-                  const struct gl_pixelstore_attrib *unpack,
-                  const GLvoid *pixels )
-{
-   const GLint imgX = x, imgY = y;
-   const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
-   GLfloat *convImage = NULL;
-   GLbitfield transferOps = ctx->_ImageTransferState;
-   SWspan span;
-
-   /* Try an optimized glDrawPixels first */
-   if (fast_draw_rgba_pixels(ctx, x, y, width, height, format, type,
-                             unpack, pixels)) {
-      return;
-   }
-
-   INIT_SPAN(span, GL_BITMAP);
-   _swrast_span_default_attribs(ctx, &span);
-   span.arrayMask = SPAN_RGBA;
-   span.arrayAttribs = FRAG_BIT_COL0; /* we're fill in COL0 attrib values */
-
-   if (ctx->DrawBuffer->_NumColorDrawBuffers > 0 &&
-       ctx->DrawBuffer->_ColorDrawBuffers[0]->DataType != GL_FLOAT &&
-       ctx->Color.ClampFragmentColor != GL_FALSE) {
-      /* need to clamp colors before applying fragment ops */
-      transferOps |= IMAGE_CLAMP_BIT;
-   }
-
-   /*
-    * General solution
-    */
-   {
-      const GLbitfield interpMask = span.interpMask;
-      const GLbitfield arrayMask = span.arrayMask;
-      const GLint srcStride
-         = _mesa_image_row_stride(unpack, width, format, type);
-      GLint skipPixels = 0;
-      /* use span array for temp color storage */
-      GLfloat *rgba = (GLfloat *) span.array->attribs[FRAG_ATTRIB_COL0];
-
-      /* if the span is wider than MAX_WIDTH we have to do it in chunks */
-      while (skipPixels < width) {
-         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
-         const GLubyte *source
-            = (const GLubyte *) _mesa_image_address2d(unpack, pixels,
-                                                      width, height, format,
-                                                      type, 0, skipPixels);
-         GLint row;
-
-         for (row = 0; row < height; row++) {
-            /* get image row as float/RGBA */
-            _mesa_unpack_color_span_float(ctx, spanWidth, GL_RGBA, rgba,
-                                     format, type, source, unpack,
-                                     transferOps);
-	    /* Set these for each row since the _swrast_write_* functions
-	     * may change them while clipping/rendering.
-	     */
-	    span.array->ChanType = GL_FLOAT;
-	    span.x = x + skipPixels;
-	    span.y = y + row;
-	    span.end = spanWidth;
-	    span.arrayMask = arrayMask;
-	    span.interpMask = interpMask;
-	    if (zoom) {
-	       _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span, rgba);
-	    }
-	    else {
-	       _swrast_write_rgba_span(ctx, &span);
-	    }
-
-            source += srcStride;
-         } /* for row */
-
-         skipPixels += spanWidth;
-      } /* while skipPixels < width */
-
-      /* XXX this is ugly/temporary, to undo above change */
-      span.array->ChanType = CHAN_TYPE;
-   }
-
-   if (convImage) {
-      free(convImage);
-   }
-}
-
-
-/**
- * This is a bit different from drawing GL_DEPTH_COMPONENT pixels.
- * The only per-pixel operations that apply are depth scale/bias,
- * stencil offset/shift, GL_DEPTH_WRITEMASK and GL_STENCIL_WRITEMASK,
- * and pixel zoom.
- * Also, only the depth buffer and stencil buffers are touched, not the
- * color buffer(s).
- */
-static void
-draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
-                          GLsizei width, GLsizei height, GLenum type,
-                          const struct gl_pixelstore_attrib *unpack,
-                          const GLvoid *pixels)
-{
-   const GLint imgX = x, imgY = y;
-   const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
-   const GLuint depthMax = ctx->DrawBuffer->_DepthMax;
-   const GLuint stencilMask = ctx->Stencil.WriteMask[0];
-   const GLuint stencilType = (STENCIL_BITS == 8) ? 
-      GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
-   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
-   struct gl_renderbuffer *depthRb, *stencilRb;
-   struct gl_pixelstore_attrib clippedUnpack = *unpack;
-
-   if (!zoom) {
-      if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
-                                 &clippedUnpack)) {
-         /* totally clipped */
-         return;
-      }
-   }
-   
-   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
-   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
-   ASSERT(depthRb);
-   ASSERT(stencilRb);
-
-   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       depthRb == stencilRb &&
-       !scaleOrBias &&
-       !zoom &&
-       ctx->Depth.Mask &&
-       (stencilMask & 0xff) == 0xff) {
-      /* This is the ideal case.
-       * Drawing GL_DEPTH_STENCIL pixels into a combined depth/stencil buffer.
-       * Plus, no pixel transfer ops, zooming, or masking needed.
-       */
-      GLint i;
-      for (i = 0; i < height; i++) {
-         const GLuint *src = (const GLuint *) 
-            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
-                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
-         depthRb->PutRow(ctx, depthRb, width, x, y + i, src, NULL);
-      }
-   }
-   else {
-      /* sub-optimal cases:
-       * Separate depth/stencil buffers, or pixel transfer ops required.
-       */
-      /* XXX need to handle very wide images (skippixels) */
-      GLint i;
-
-      depthRb = ctx->DrawBuffer->_DepthBuffer;
-      stencilRb = ctx->DrawBuffer->_StencilBuffer;
-
-      for (i = 0; i < height; i++) {
-         const GLuint *depthStencilSrc = (const GLuint *)
-            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
-                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
-
-         if (ctx->Depth.Mask) {
-            if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 24) {
-               /* fast path 24-bit zbuffer */
-               GLuint zValues[MAX_WIDTH];
-               GLint j;
-               ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
-               for (j = 0; j < width; j++) {
-                  zValues[j] = depthStencilSrc[j] >> 8;
-               }
-               if (zoom)
-                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
-                                              x, y + i, zValues);
-               else
-                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
-            }
-            else if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 16) {
-               /* fast path 16-bit zbuffer */
-               GLushort zValues[MAX_WIDTH];
-               GLint j;
-               ASSERT(depthRb->DataType == GL_UNSIGNED_SHORT);
-               for (j = 0; j < width; j++) {
-                  zValues[j] = depthStencilSrc[j] >> 16;
-               }
-               if (zoom)
-                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
-                                              x, y + i, zValues);
-               else
-                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
-            }
-            else {
-               /* general case */
-               GLuint zValues[MAX_WIDTH];  /* 16 or 32-bit Z value storage */
-               _mesa_unpack_depth_span(ctx, width,
-                                       depthRb->DataType, zValues, depthMax,
-                                       type, depthStencilSrc, &clippedUnpack);
-               if (zoom) {
-                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width, x,
-                                              y + i, zValues);
-               }
-               else {
-                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
-               }
-            }
-         }
-
-         if (stencilMask != 0x0) {
-            GLstencil stencilValues[MAX_WIDTH];
-            /* get stencil values, with shift/offset/mapping */
-            _mesa_unpack_stencil_span(ctx, width, stencilType, stencilValues,
-                                      type, depthStencilSrc, &clippedUnpack,
-                                      ctx->_ImageTransferState);
-            if (zoom)
-               _swrast_write_zoomed_stencil_span(ctx, imgX, imgY, width,
-                                                  x, y + i, stencilValues);
-            else
-               _swrast_write_stencil_span(ctx, width, x, y + i, stencilValues);
-         }
-      }
-   }
-}
-
-
-/**
- * Execute software-based glDrawPixels.
- * By time we get here, all error checking will have been done.
- */
-void
-_swrast_DrawPixels( struct gl_context *ctx,
-		    GLint x, GLint y,
-		    GLsizei width, GLsizei height,
-		    GLenum format, GLenum type,
-		    const struct gl_pixelstore_attrib *unpack,
-		    const GLvoid *pixels )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLboolean save_vp_override = ctx->VertexProgram._Overriden;
-
-   if (!_mesa_check_conditional_render(ctx))
-      return; /* don't draw */
-
-   /* We are creating fragments directly, without going through vertex
-    * programs.
-    *
-    * This override flag tells the fragment processing code that its input
-    * comes from a non-standard source, and it may therefore not rely on
-    * optimizations that assume e.g. constant color if there is no color
-    * vertex array.
-    */
-   _mesa_set_vp_override(ctx, GL_TRUE);
-
-   swrast_render_start(ctx);
-
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   if (swrast->NewState)
-      _swrast_validate_derived( ctx );
-
-   pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
-   if (!pixels) {
-      swrast_render_finish(ctx);
-      _mesa_set_vp_override(ctx, save_vp_override);
-      return;
-   }
-
-   /*
-    * By time we get here, all error checking should have been done.
-    */
-   switch (format) {
-   case GL_STENCIL_INDEX:
-      draw_stencil_pixels( ctx, x, y, width, height, type, unpack, pixels );
-      break;
-   case GL_DEPTH_COMPONENT:
-      draw_depth_pixels( ctx, x, y, width, height, type, unpack, pixels );
-      break;
-   case GL_DEPTH_STENCIL_EXT:
-      draw_depth_stencil_pixels(ctx, x, y, width, height, type, unpack, pixels);
-      break;
-   default:
-      /* all other formats should be color formats */
-      draw_rgba_pixels(ctx, x, y, width, height, format, type, unpack, pixels);
-   }
-
-   swrast_render_finish(ctx);
-   _mesa_set_vp_override(ctx, save_vp_override);
-
-   _mesa_unmap_pbo_source(ctx, unpack);
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/condrender.h"
+#include "main/context.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/pack.h"
+#include "main/pbo.h"
+#include "main/pixeltransfer.h"
+#include "main/state.h"
+
+#include "s_context.h"
+#include "s_span.h"
+#include "s_stencil.h"
+#include "s_zoom.h"
+
+
+
+/**
+ * Try to do a fast and simple RGB(a) glDrawPixels.
+ * Return:  GL_TRUE if success, GL_FALSE if slow path must be used instead
+ */
+static GLboolean
+fast_draw_rgba_pixels(struct gl_context *ctx, GLint x, GLint y,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *userUnpack,
+                      const GLvoid *pixels)
+{
+   const GLint imgX = x, imgY = y;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   GLenum rbType;
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   SWspan span;
+   GLboolean simpleZoom;
+   GLint yStep;  /* +1 or -1 */
+   struct gl_pixelstore_attrib unpack;
+   GLint destX, destY, drawWidth, drawHeight; /* post clipping */
+
+   if (!rb)
+      return GL_TRUE; /* no-op */
+
+   rbType = rb->DataType;
+
+   if ((swrast->_RasterMask & ~CLIP_BIT) ||
+       ctx->Texture._EnabledCoordUnits ||
+       userUnpack->SwapBytes ||
+       ctx->_ImageTransferState) {
+      /* can't handle any of those conditions */
+      return GL_FALSE;
+   }
+
+   INIT_SPAN(span, GL_BITMAP);
+   span.arrayMask = SPAN_RGBA;
+   span.arrayAttribs = FRAG_BIT_COL0;
+   _swrast_span_default_attribs(ctx, &span);
+
+   /* copy input params since clipping may change them */
+   unpack = *userUnpack;
+   destX = x;
+   destY = y;
+   drawWidth = width;
+   drawHeight = height;
+
+   /* check for simple zooming and clipping */
+   if (ctx->Pixel.ZoomX == 1.0F &&
+       (ctx->Pixel.ZoomY == 1.0F || ctx->Pixel.ZoomY == -1.0F)) {
+      if (!_mesa_clip_drawpixels(ctx, &destX, &destY,
+                                 &drawWidth, &drawHeight, &unpack)) {
+         /* image was completely clipped: no-op, all done */
+         return GL_TRUE;
+      }
+      simpleZoom = GL_TRUE;
+      yStep = (GLint) ctx->Pixel.ZoomY;
+      ASSERT(yStep == 1 || yStep == -1);
+   }
+   else {
+      /* non-simple zooming */
+      simpleZoom = GL_FALSE;
+      yStep = 1;
+      if (unpack.RowLength == 0)
+         unpack.RowLength = width;
+   }
+
+   /*
+    * Ready to draw!
+    */
+
+   if (format == GL_RGBA && type == rbType) {
+      const GLubyte *src
+         = (const GLubyte *) _mesa_image_address2d(&unpack, pixels, width,
+                                                   height, format, type, 0, 0);
+      const GLint srcStride = _mesa_image_row_stride(&unpack, width,
+                                                     format, type);
+      if (simpleZoom) {
+         GLint row;
+         for (row = 0; row < drawHeight; row++) {
+            rb->PutRow(ctx, rb, drawWidth, destX, destY, src, NULL);
+            src += srcStride;
+            destY += yStep;
+         }
+      }
+      else {
+         /* with zooming */
+         GLint row;
+         for (row = 0; row < drawHeight; row++) {
+            span.x = destX;
+            span.y = destY + row;
+            span.end = drawWidth;
+            span.array->ChanType = rbType;
+            _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span, src);
+            src += srcStride;
+         }
+         span.array->ChanType = CHAN_TYPE;
+      }
+      return GL_TRUE;
+   }
+
+   if (format == GL_RGB && type == rbType) {
+      const GLubyte *src
+         = (const GLubyte *) _mesa_image_address2d(&unpack, pixels, width,
+                                                   height, format, type, 0, 0);
+      const GLint srcStride = _mesa_image_row_stride(&unpack, width,
+                                                     format, type);
+      if (simpleZoom) {
+         GLint row;
+         for (row = 0; row < drawHeight; row++) {
+            rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, src, NULL);
+            src += srcStride;
+            destY += yStep;
+         }
+      }
+      else {
+         /* with zooming */
+         GLint row;
+         for (row = 0; row < drawHeight; row++) {
+            span.x = destX;
+            span.y = destY;
+            span.end = drawWidth;
+            span.array->ChanType = rbType;
+            _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span, src);
+            src += srcStride;
+            destY++;
+         }
+         span.array->ChanType = CHAN_TYPE;
+      }
+      return GL_TRUE;
+   }
+
+   /* Remaining cases haven't been tested with alignment != 1 */
+   if (userUnpack->Alignment != 1)
+      return GL_FALSE;
+
+   if (format == GL_LUMINANCE && type == CHAN_TYPE && rbType == CHAN_TYPE) {
+      const GLchan *src = (const GLchan *) pixels
+         + (unpack.SkipRows * unpack.RowLength + unpack.SkipPixels);
+      if (simpleZoom) {
+         /* no zooming */
+         GLint row;
+         ASSERT(drawWidth <= MAX_WIDTH);
+         for (row = 0; row < drawHeight; row++) {
+            GLchan rgb[MAX_WIDTH][3];
+            GLint i;
+            for (i = 0;i<drawWidth;i++) {
+               rgb[i][0] = src[i];
+               rgb[i][1] = src[i];
+               rgb[i][2] = src[i];
+            }
+            rb->PutRowRGB(ctx, rb, drawWidth, destX, destY, rgb, NULL);
+            src += unpack.RowLength;
+            destY += yStep;
+         }
+      }
+      else {
+         /* with zooming */
+         GLint row;
+         ASSERT(drawWidth <= MAX_WIDTH);
+         for (row = 0; row < drawHeight; row++) {
+            GLchan rgb[MAX_WIDTH][3];
+            GLint i;
+            for (i = 0;i<drawWidth;i++) {
+               rgb[i][0] = src[i];
+               rgb[i][1] = src[i];
+               rgb[i][2] = src[i];
+            }
+            span.x = destX;
+            span.y = destY;
+            span.end = drawWidth;
+            _swrast_write_zoomed_rgb_span(ctx, imgX, imgY, &span, rgb);
+            src += unpack.RowLength;
+            destY++;
+         }
+      }
+      return GL_TRUE;
+   }
+
+   if (format == GL_LUMINANCE_ALPHA && type == CHAN_TYPE && rbType == CHAN_TYPE) {
+      const GLchan *src = (const GLchan *) pixels
+         + (unpack.SkipRows * unpack.RowLength + unpack.SkipPixels)*2;
+      if (simpleZoom) {
+         GLint row;
+         ASSERT(drawWidth <= MAX_WIDTH);
+         for (row = 0; row < drawHeight; row++) {
+            GLint i;
+            const GLchan *ptr = src;
+            for (i = 0;i<drawWidth;i++) {
+               span.array->rgba[i][0] = *ptr;
+               span.array->rgba[i][1] = *ptr;
+               span.array->rgba[i][2] = *ptr++;
+               span.array->rgba[i][3] = *ptr++;
+            }
+            rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                       span.array->rgba, NULL);
+            src += unpack.RowLength*2;
+            destY += yStep;
+         }
+      }
+      else {
+         /* with zooming */
+         GLint row;
+         ASSERT(drawWidth <= MAX_WIDTH);
+         for (row = 0; row < drawHeight; row++) {
+            const GLchan *ptr = src;
+            GLint i;
+            for (i = 0;i<drawWidth;i++) {
+               span.array->rgba[i][0] = *ptr;
+               span.array->rgba[i][1] = *ptr;
+               span.array->rgba[i][2] = *ptr++;
+               span.array->rgba[i][3] = *ptr++;
+            }
+            span.x = destX;
+            span.y = destY;
+            span.end = drawWidth;
+            _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                                           span.array->rgba);
+            src += unpack.RowLength*2;
+            destY++;
+         }
+      }
+      return GL_TRUE;
+   }
+
+   if (format == GL_COLOR_INDEX && type == GL_UNSIGNED_BYTE) {
+      const GLubyte *src = (const GLubyte *) pixels
+         + unpack.SkipRows * unpack.RowLength + unpack.SkipPixels;
+      if (rbType == GL_UNSIGNED_BYTE) {
+         /* convert ubyte/CI data to ubyte/RGBA */
+         if (simpleZoom) {
+            GLint row;
+            for (row = 0; row < drawHeight; row++) {
+               ASSERT(drawWidth <= MAX_WIDTH);
+               _mesa_map_ci8_to_rgba8(ctx, drawWidth, src,
+                                      span.array->rgba8);
+               rb->PutRow(ctx, rb, drawWidth, destX, destY,
+                          span.array->rgba8, NULL);
+               src += unpack.RowLength;
+               destY += yStep;
+            }
+         }
+         else {
+            /* ubyte/CI to ubyte/RGBA with zooming */
+            GLint row;
+            for (row = 0; row < drawHeight; row++) {
+               ASSERT(drawWidth <= MAX_WIDTH);
+               _mesa_map_ci8_to_rgba8(ctx, drawWidth, src,
+                                      span.array->rgba8);
+               span.x = destX;
+               span.y = destY;
+               span.end = drawWidth;
+               _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span,
+                                              span.array->rgba8);
+               src += unpack.RowLength;
+               destY++;
+            }
+         }
+         return GL_TRUE;
+      }
+   }
+
+   /* can't handle this pixel format and/or data type */
+   return GL_FALSE;
+}
+
+
+
+/*
+ * Draw stencil image.
+ */
+static void
+draw_stencil_pixels( struct gl_context *ctx, GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid *pixels )
+{
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   GLint skipPixels;
+
+   /* if width > MAX_WIDTH, have to process image in chunks */
+   skipPixels = 0;
+   while (skipPixels < width) {
+      const GLint spanX = x + skipPixels;
+      const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLint spanY = y + row;
+         GLstencil values[MAX_WIDTH];
+         GLenum destType = (sizeof(GLstencil) == sizeof(GLubyte))
+                         ? GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
+         const GLvoid *source = _mesa_image_address2d(unpack, pixels,
+                                                      width, height,
+                                                      GL_COLOR_INDEX, type,
+                                                      row, skipPixels);
+         _mesa_unpack_stencil_span(ctx, spanWidth, destType, values,
+                                   type, source, unpack,
+                                   ctx->_ImageTransferState);
+         if (zoom) {
+            _swrast_write_zoomed_stencil_span(ctx, x, y, spanWidth,
+                                              spanX, spanY, values);
+         }
+         else {
+            _swrast_write_stencil_span(ctx, spanWidth, spanX, spanY, values);
+         }
+      }
+      skipPixels += spanWidth;
+   }
+}
+
+
+/*
+ * Draw depth image.
+ */
+static void
+draw_depth_pixels( struct gl_context *ctx, GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type,
+                   const struct gl_pixelstore_attrib *unpack,
+                   const GLvoid *pixels )
+{
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   SWspan span;
+
+   INIT_SPAN(span, GL_BITMAP);
+   span.arrayMask = SPAN_Z;
+   _swrast_span_default_attribs(ctx, &span);
+
+   if (type == GL_UNSIGNED_SHORT
+       && ctx->DrawBuffer->Visual.depthBits == 16
+       && !scaleOrBias
+       && !zoom
+       && width <= MAX_WIDTH
+       && !unpack->SwapBytes) {
+      /* Special case: directly write 16-bit depth values */
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLushort *zSrc = (const GLushort *)
+            _mesa_image_address2d(unpack, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, row, 0);
+         GLint i;
+         for (i = 0; i < width; i++)
+            span.array->z[i] = zSrc[i];
+         span.x = x;
+         span.y = y + row;
+         span.end = width;
+         _swrast_write_rgba_span(ctx, &span);
+      }
+   }
+   else if (type == GL_UNSIGNED_INT
+            && !scaleOrBias
+            && !zoom
+            && width <= MAX_WIDTH
+            && !unpack->SwapBytes) {
+      /* Special case: shift 32-bit values down to Visual.depthBits */
+      const GLint shift = 32 - ctx->DrawBuffer->Visual.depthBits;
+      GLint row;
+      for (row = 0; row < height; row++) {
+         const GLuint *zSrc = (const GLuint *)
+            _mesa_image_address2d(unpack, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, row, 0);
+         if (shift == 0) {
+            memcpy(span.array->z, zSrc, width * sizeof(GLuint));
+         }
+         else {
+            GLint col;
+            for (col = 0; col < width; col++)
+               span.array->z[col] = zSrc[col] >> shift;
+         }
+         span.x = x;
+         span.y = y + row;
+         span.end = width;
+         _swrast_write_rgba_span(ctx, &span);
+      }
+   }
+   else {
+      /* General case */
+      const GLuint depthMax = ctx->DrawBuffer->_DepthMax;
+      GLint skipPixels = 0;
+
+      /* in case width > MAX_WIDTH do the copy in chunks */
+      while (skipPixels < width) {
+         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+         GLint row;
+         ASSERT(span.end <= MAX_WIDTH);
+         for (row = 0; row < height; row++) {
+            const GLvoid *zSrc = _mesa_image_address2d(unpack,
+                                                      pixels, width, height,
+                                                      GL_DEPTH_COMPONENT, type,
+                                                      row, skipPixels);
+
+            /* Set these for each row since the _swrast_write_* function may
+             * change them while clipping.
+             */
+            span.x = x + skipPixels;
+            span.y = y + row;
+            span.end = spanWidth;
+
+            _mesa_unpack_depth_span(ctx, spanWidth,
+                                    GL_UNSIGNED_INT, span.array->z, depthMax,
+                                    type, zSrc, unpack);
+            if (zoom) {
+               _swrast_write_zoomed_depth_span(ctx, x, y, &span);
+            }
+            else {
+               _swrast_write_rgba_span(ctx, &span);
+            }
+         }
+         skipPixels += spanWidth;
+      }
+   }
+}
+
+
+
+/**
+ * Draw RGBA image.
+ */
+static void
+draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
+                  GLsizei width, GLsizei height,
+                  GLenum format, GLenum type,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLvoid *pixels )
+{
+   const GLint imgX = x, imgY = y;
+   const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
+   GLfloat *convImage = NULL;
+   GLbitfield transferOps = ctx->_ImageTransferState;
+   SWspan span;
+
+   /* Try an optimized glDrawPixels first */
+   if (fast_draw_rgba_pixels(ctx, x, y, width, height, format, type,
+                             unpack, pixels)) {
+      return;
+   }
+
+   INIT_SPAN(span, GL_BITMAP);
+   _swrast_span_default_attribs(ctx, &span);
+   span.arrayMask = SPAN_RGBA;
+   span.arrayAttribs = FRAG_BIT_COL0; /* we're fill in COL0 attrib values */
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers > 0 &&
+       ctx->DrawBuffer->_ColorDrawBuffers[0]->DataType != GL_FLOAT &&
+       ctx->Color.ClampFragmentColor != GL_FALSE) {
+      /* need to clamp colors before applying fragment ops */
+      transferOps |= IMAGE_CLAMP_BIT;
+   }
+
+   /*
+    * General solution
+    */
+   {
+      const GLbitfield interpMask = span.interpMask;
+      const GLbitfield arrayMask = span.arrayMask;
+      const GLint srcStride
+         = _mesa_image_row_stride(unpack, width, format, type);
+      GLint skipPixels = 0;
+      /* use span array for temp color storage */
+      GLfloat *rgba = (GLfloat *) span.array->attribs[FRAG_ATTRIB_COL0];
+
+      /* if the span is wider than MAX_WIDTH we have to do it in chunks */
+      while (skipPixels < width) {
+         const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH);
+         const GLubyte *source
+            = (const GLubyte *) _mesa_image_address2d(unpack, pixels,
+                                                      width, height, format,
+                                                      type, 0, skipPixels);
+         GLint row;
+
+         for (row = 0; row < height; row++) {
+            /* get image row as float/RGBA */
+            _mesa_unpack_color_span_float(ctx, spanWidth, GL_RGBA, rgba,
+                                     format, type, source, unpack,
+                                     transferOps);
+	    /* Set these for each row since the _swrast_write_* functions
+	     * may change them while clipping/rendering.
+	     */
+	    span.array->ChanType = GL_FLOAT;
+	    span.x = x + skipPixels;
+	    span.y = y + row;
+	    span.end = spanWidth;
+	    span.arrayMask = arrayMask;
+	    span.interpMask = interpMask;
+	    if (zoom) {
+	       _swrast_write_zoomed_rgba_span(ctx, imgX, imgY, &span, rgba);
+	    }
+	    else {
+	       _swrast_write_rgba_span(ctx, &span);
+	    }
+
+            source += srcStride;
+         } /* for row */
+
+         skipPixels += spanWidth;
+      } /* while skipPixels < width */
+
+      /* XXX this is ugly/temporary, to undo above change */
+      span.array->ChanType = CHAN_TYPE;
+   }
+
+   if (convImage) {
+      free(convImage);
+   }
+}
+
+
+/**
+ * This is a bit different from drawing GL_DEPTH_COMPONENT pixels.
+ * The only per-pixel operations that apply are depth scale/bias,
+ * stencil offset/shift, GL_DEPTH_WRITEMASK and GL_STENCIL_WRITEMASK,
+ * and pixel zoom.
+ * Also, only the depth buffer and stencil buffers are touched, not the
+ * color buffer(s).
+ */
+static void
+draw_depth_stencil_pixels(struct gl_context *ctx, GLint x, GLint y,
+                          GLsizei width, GLsizei height, GLenum type,
+                          const struct gl_pixelstore_attrib *unpack,
+                          const GLvoid *pixels)
+{
+   const GLint imgX = x, imgY = y;
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLuint depthMax = ctx->DrawBuffer->_DepthMax;
+   const GLuint stencilMask = ctx->Stencil.WriteMask[0];
+   const GLuint stencilType = (STENCIL_BITS == 8) ? 
+      GL_UNSIGNED_BYTE : GL_UNSIGNED_SHORT;
+   const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0;
+   struct gl_renderbuffer *depthRb, *stencilRb;
+   struct gl_pixelstore_attrib clippedUnpack = *unpack;
+
+   if (!zoom) {
+      if (!_mesa_clip_drawpixels(ctx, &x, &y, &width, &height,
+                                 &clippedUnpack)) {
+         /* totally clipped */
+         return;
+      }
+   }
+   
+   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+   ASSERT(depthRb);
+   ASSERT(stencilRb);
+
+   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       depthRb == stencilRb &&
+       !scaleOrBias &&
+       !zoom &&
+       ctx->Depth.Mask &&
+       (stencilMask & 0xff) == 0xff) {
+      /* This is the ideal case.
+       * Drawing GL_DEPTH_STENCIL pixels into a combined depth/stencil buffer.
+       * Plus, no pixel transfer ops, zooming, or masking needed.
+       */
+      GLint i;
+      for (i = 0; i < height; i++) {
+         const GLuint *src = (const GLuint *) 
+            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+         depthRb->PutRow(ctx, depthRb, width, x, y + i, src, NULL);
+      }
+   }
+   else {
+      /* sub-optimal cases:
+       * Separate depth/stencil buffers, or pixel transfer ops required.
+       */
+      /* XXX need to handle very wide images (skippixels) */
+      GLint i;
+
+      depthRb = ctx->DrawBuffer->_DepthBuffer;
+      stencilRb = ctx->DrawBuffer->_StencilBuffer;
+
+      for (i = 0; i < height; i++) {
+         const GLuint *depthStencilSrc = (const GLuint *)
+            _mesa_image_address2d(&clippedUnpack, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+
+         if (ctx->Depth.Mask) {
+            if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 24) {
+               /* fast path 24-bit zbuffer */
+               GLuint zValues[MAX_WIDTH];
+               GLint j;
+               ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
+               for (j = 0; j < width; j++) {
+                  zValues[j] = depthStencilSrc[j] >> 8;
+               }
+               if (zoom)
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
+                                              x, y + i, zValues);
+               else
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+            }
+            else if (!scaleOrBias && ctx->DrawBuffer->Visual.depthBits == 16) {
+               /* fast path 16-bit zbuffer */
+               GLushort zValues[MAX_WIDTH];
+               GLint j;
+               ASSERT(depthRb->DataType == GL_UNSIGNED_SHORT);
+               for (j = 0; j < width; j++) {
+                  zValues[j] = depthStencilSrc[j] >> 16;
+               }
+               if (zoom)
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width,
+                                              x, y + i, zValues);
+               else
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+            }
+            else {
+               /* general case */
+               GLuint zValues[MAX_WIDTH];  /* 16 or 32-bit Z value storage */
+               _mesa_unpack_depth_span(ctx, width,
+                                       depthRb->DataType, zValues, depthMax,
+                                       type, depthStencilSrc, &clippedUnpack);
+               if (zoom) {
+                  _swrast_write_zoomed_z_span(ctx, imgX, imgY, width, x,
+                                              y + i, zValues);
+               }
+               else {
+                  depthRb->PutRow(ctx, depthRb, width, x, y + i, zValues,NULL);
+               }
+            }
+         }
+
+         if (stencilMask != 0x0) {
+            GLstencil stencilValues[MAX_WIDTH];
+            /* get stencil values, with shift/offset/mapping */
+            _mesa_unpack_stencil_span(ctx, width, stencilType, stencilValues,
+                                      type, depthStencilSrc, &clippedUnpack,
+                                      ctx->_ImageTransferState);
+            if (zoom)
+               _swrast_write_zoomed_stencil_span(ctx, imgX, imgY, width,
+                                                  x, y + i, stencilValues);
+            else
+               _swrast_write_stencil_span(ctx, width, x, y + i, stencilValues);
+         }
+      }
+   }
+}
+
+
+/**
+ * Execute software-based glDrawPixels.
+ * By time we get here, all error checking will have been done.
+ */
+void
+_swrast_DrawPixels( struct gl_context *ctx,
+		    GLint x, GLint y,
+		    GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *unpack,
+		    const GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLboolean save_vp_override = ctx->VertexProgram._Overriden;
+
+   if (!_mesa_check_conditional_render(ctx))
+      return; /* don't draw */
+
+   /* We are creating fragments directly, without going through vertex
+    * programs.
+    *
+    * This override flag tells the fragment processing code that its input
+    * comes from a non-standard source, and it may therefore not rely on
+    * optimizations that assume e.g. constant color if there is no color
+    * vertex array.
+    */
+   _mesa_set_vp_override(ctx, GL_TRUE);
+
+   swrast_render_start(ctx);
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   pixels = _mesa_map_pbo_source(ctx, unpack, pixels);
+   if (!pixels) {
+      swrast_render_finish(ctx);
+      _mesa_set_vp_override(ctx, save_vp_override);
+      return;
+   }
+
+   /*
+    * By time we get here, all error checking should have been done.
+    */
+   switch (format) {
+   case GL_STENCIL_INDEX:
+      draw_stencil_pixels( ctx, x, y, width, height, type, unpack, pixels );
+      break;
+   case GL_DEPTH_COMPONENT:
+      draw_depth_pixels( ctx, x, y, width, height, type, unpack, pixels );
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      draw_depth_stencil_pixels(ctx, x, y, width, height, type, unpack, pixels);
+      break;
+   default:
+      /* all other formats should be color formats */
+      draw_rgba_pixels(ctx, x, y, width, height, format, type, unpack, pixels);
+   }
+
+   swrast_render_finish(ctx);
+   _mesa_set_vp_override(ctx, save_vp_override);
+
+   _mesa_unmap_pbo_source(ctx, unpack);
+}
diff --git a/mesalib/src/mesa/swrast/s_readpix.c b/mesalib/src/mesa/swrast/s_readpix.c
index 61399010b..5c8d7e9c5 100644
--- a/mesalib/src/mesa/swrast/s_readpix.c
+++ b/mesalib/src/mesa/swrast/s_readpix.c
@@ -1,508 +1,508 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.0.3
- *
- * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/colormac.h"
-#include "main/feedback.h"
-#include "main/formats.h"
-#include "main/image.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "main/pack.h"
-#include "main/state.h"
-
-#include "s_context.h"
-#include "s_depth.h"
-#include "s_span.h"
-#include "s_stencil.h"
-
-
-/**
- * Read pixels for format=GL_DEPTH_COMPONENT.
- */
-static void
-read_depth_pixels( struct gl_context *ctx,
-                   GLint x, GLint y,
-                   GLsizei width, GLsizei height,
-                   GLenum type, GLvoid *pixels,
-                   const struct gl_pixelstore_attrib *packing )
-{
-   struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_DepthBuffer;
-   const GLboolean biasOrScale
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
-
-   if (!rb)
-      return;
-
-   /* clipping should have been done already */
-   ASSERT(x >= 0);
-   ASSERT(y >= 0);
-   ASSERT(x + width <= (GLint) rb->Width);
-   ASSERT(y + height <= (GLint) rb->Height);
-   /* width should never be > MAX_WIDTH since we did clipping earlier */
-   ASSERT(width <= MAX_WIDTH);
-
-   if (type == GL_UNSIGNED_SHORT && fb->Visual.depthBits == 16
-       && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 16-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_Z16);
-      ASSERT(rb->DataType == GL_UNSIGNED_SHORT);
-      for (j = 0; j < height; j++, y++) {
-         void *dest =_mesa_image_address2d(packing, pixels, width, height,
-                                           GL_DEPTH_COMPONENT, type, j, 0);
-         rb->GetRow(ctx, rb, width, x, y, dest);
-      }
-   }
-   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 24
-            && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 24-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_X8_Z24 ||
-             rb->Format == MESA_FORMAT_S8_Z24 ||
-             rb->Format == MESA_FORMAT_Z24_X8 ||
-             rb->Format == MESA_FORMAT_Z24_S8);
-      ASSERT(rb->DataType == GL_UNSIGNED_INT ||
-             rb->DataType == GL_UNSIGNED_INT_24_8);
-      for (j = 0; j < height; j++, y++) {
-         GLuint *dest = (GLuint *)
-            _mesa_image_address2d(packing, pixels, width, height,
-                                  GL_DEPTH_COMPONENT, type, j, 0);
-         GLint k;
-         rb->GetRow(ctx, rb, width, x, y, dest);
-         /* convert range from 24-bit to 32-bit */
-         if (rb->Format == MESA_FORMAT_X8_Z24 ||
-             rb->Format == MESA_FORMAT_S8_Z24) {
-            for (k = 0; k < width; k++) {
-               /* Note: put MSByte of 24-bit value into LSByte */
-               dest[k] = (dest[k] << 8) | ((dest[k] >> 16) & 0xff);
-            }
-         }
-         else {
-            for (k = 0; k < width; k++) {
-               /* Note: fill in LSByte by replication */
-               dest[k] = dest[k] | ((dest[k] >> 8) & 0xff);
-            }
-         }
-      }
-   }
-   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 32
-            && !biasOrScale && !packing->SwapBytes) {
-      /* Special case: directly read 32-bit unsigned depth values. */
-      GLint j;
-      ASSERT(rb->Format == MESA_FORMAT_Z32);
-      ASSERT(rb->DataType == GL_UNSIGNED_INT);
-      for (j = 0; j < height; j++, y++) {
-         void *dest = _mesa_image_address2d(packing, pixels, width, height,
-                                            GL_DEPTH_COMPONENT, type, j, 0);
-         rb->GetRow(ctx, rb, width, x, y, dest);
-      }
-   }
-   else {
-      /* General case (slower) */
-      GLint j;
-      for (j = 0; j < height; j++, y++) {
-         GLfloat depthValues[MAX_WIDTH];
-         GLvoid *dest = _mesa_image_address2d(packing, pixels, width, height,
-                                              GL_DEPTH_COMPONENT, type, j, 0);
-         _swrast_read_depth_span_float(ctx, rb, width, x, y, depthValues);
-         _mesa_pack_depth_span(ctx, width, dest, type, depthValues, packing);
-      }
-   }
-}
-
-
-/**
- * Read pixels for format=GL_STENCIL_INDEX.
- */
-static void
-read_stencil_pixels( struct gl_context *ctx,
-                     GLint x, GLint y,
-                     GLsizei width, GLsizei height,
-                     GLenum type, GLvoid *pixels,
-                     const struct gl_pixelstore_attrib *packing )
-{
-   struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_StencilBuffer;
-   GLint j;
-
-   if (!rb)
-      return;
-
-   /* width should never be > MAX_WIDTH since we did clipping earlier */
-   ASSERT(width <= MAX_WIDTH);
-
-   /* process image row by row */
-   for (j=0;j<height;j++,y++) {
-      GLvoid *dest;
-      GLstencil stencil[MAX_WIDTH];
-
-      _swrast_read_stencil_span(ctx, rb, width, x, y, stencil);
-
-      dest = _mesa_image_address2d(packing, pixels, width, height,
-                                   GL_STENCIL_INDEX, type, j, 0);
-
-      _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing);
-   }
-}
-
-
-
-/**
- * Optimized glReadPixels for particular pixel formats when pixel
- * scaling, biasing, mapping, etc. are disabled.
- * \return GL_TRUE if success, GL_FALSE if unable to do the readpixels
- */
-static GLboolean
-fast_read_rgba_pixels( struct gl_context *ctx,
-                       GLint x, GLint y,
-                       GLsizei width, GLsizei height,
-                       GLenum format, GLenum type,
-                       GLvoid *pixels,
-                       const struct gl_pixelstore_attrib *packing,
-                       GLbitfield transferOps)
-{
-   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
-
-   if (!rb)
-      return GL_FALSE;
-
-   ASSERT(rb->_BaseFormat == GL_RGBA || rb->_BaseFormat == GL_RGB ||
-	  rb->_BaseFormat == GL_ALPHA);
-
-   /* clipping should have already been done */
-   ASSERT(x + width <= (GLint) rb->Width);
-   ASSERT(y + height <= (GLint) rb->Height);
-
-   /* check for things we can't handle here */
-   if (transferOps ||
-       packing->SwapBytes ||
-       packing->LsbFirst) {
-      return GL_FALSE;
-   }
-
-   if (format == GL_RGBA && rb->DataType == type) {
-      const GLint dstStride = _mesa_image_row_stride(packing, width,
-                                                     format, type);
-      GLubyte *dest
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-      GLint row;
-      ASSERT(rb->GetRow);
-      for (row = 0; row < height; row++) {
-         rb->GetRow(ctx, rb, width, x, y + row, dest);
-         dest += dstStride;
-      }
-      return GL_TRUE;
-   }
-
-   if (format == GL_RGB &&
-       rb->DataType == GL_UNSIGNED_BYTE &&
-       type == GL_UNSIGNED_BYTE) {
-      const GLint dstStride = _mesa_image_row_stride(packing, width,
-                                                     format, type);
-      GLubyte *dest
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-      GLint row;
-      ASSERT(rb->GetRow);
-      for (row = 0; row < height; row++) {
-         GLubyte tempRow[MAX_WIDTH][4];
-         GLint col;
-         rb->GetRow(ctx, rb, width, x, y + row, tempRow);
-         /* convert RGBA to RGB */
-         for (col = 0; col < width; col++) {
-            dest[col * 3 + 0] = tempRow[col][0];
-            dest[col * 3 + 1] = tempRow[col][1];
-            dest[col * 3 + 2] = tempRow[col][2];
-         }
-         dest += dstStride;
-      }
-      return GL_TRUE;
-   }
-
-   /* not handled */
-   return GL_FALSE;
-}
-
-
-/**
- * When we're using a low-precision color buffer (like 16-bit 5/6/5)
- * we have to adjust our color values a bit to pass conformance.
- * The problem is when a 5 or 6-bit color value is converted to an 8-bit
- * value and then a floating point value, the floating point values don't
- * increment uniformly as the 5 or 6-bit value is incremented.
- *
- * This function adjusts floating point values to compensate.
- */
-static void
-adjust_colors(const struct gl_framebuffer *fb, GLuint n, GLfloat rgba[][4])
-{
-   const GLuint rShift = 8 - fb->Visual.redBits;
-   const GLuint gShift = 8 - fb->Visual.greenBits;
-   const GLuint bShift = 8 - fb->Visual.blueBits;
-   GLfloat rScale = 1.0F / (GLfloat) ((1 << fb->Visual.redBits  ) - 1);
-   GLfloat gScale = 1.0F / (GLfloat) ((1 << fb->Visual.greenBits) - 1);
-   GLfloat bScale = 1.0F / (GLfloat) ((1 << fb->Visual.blueBits ) - 1);
-   GLuint i;
-
-   if (fb->Visual.redBits == 0)
-      rScale = 0;
-   if (fb->Visual.greenBits == 0)
-      gScale = 0;
-   if (fb->Visual.blueBits == 0)
-      bScale = 0;
-
-   for (i = 0; i < n; i++) {
-      GLint r, g, b;
-      /* convert float back to ubyte */
-      CLAMPED_FLOAT_TO_UBYTE(r, rgba[i][RCOMP]);
-      CLAMPED_FLOAT_TO_UBYTE(g, rgba[i][GCOMP]);
-      CLAMPED_FLOAT_TO_UBYTE(b, rgba[i][BCOMP]);
-      /* using only the N most significant bits of the ubyte value, convert to
-       * float in [0,1].
-       */
-      rgba[i][RCOMP] = (GLfloat) (r >> rShift) * rScale;
-      rgba[i][GCOMP] = (GLfloat) (g >> gShift) * gScale;
-      rgba[i][BCOMP] = (GLfloat) (b >> bShift) * bScale;
-   }
-}
-
-
-
-/*
- * Read R, G, B, A, RGB, L, or LA pixels.
- */
-static void
-read_rgba_pixels( struct gl_context *ctx,
-                  GLint x, GLint y,
-                  GLsizei width, GLsizei height,
-                  GLenum format, GLenum type, GLvoid *pixels,
-                  const struct gl_pixelstore_attrib *packing )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   GLbitfield transferOps = ctx->_ImageTransferState;
-   struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
-
-   if (!rb)
-      return;
-
-   if (type == GL_FLOAT && ((ctx->Color.ClampReadColor == GL_TRUE) ||
-                            (ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB &&
-                             rb->DataType != GL_FLOAT)))
-      transferOps |= IMAGE_CLAMP_BIT;
-
-   /* Try optimized path first */
-   if (fast_read_rgba_pixels(ctx, x, y, width, height,
-                             format, type, pixels, packing, transferOps)) {
-      return; /* done! */
-   }
-
-   /* width should never be > MAX_WIDTH since we did clipping earlier */
-   ASSERT(width <= MAX_WIDTH);
-
-   do {
-      const GLint dstStride
-         = _mesa_image_row_stride(packing, width, format, type);
-      GLfloat (*rgba)[4] = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
-      GLint row;
-      GLubyte *dst
-         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-                                             format, type, 0, 0);
-
-      for (row = 0; row < height; row++, y++) {
-
-         /* Get float rgba pixels */
-         _swrast_read_rgba_span(ctx, rb, width, x, y, GL_FLOAT, rgba);
-
-         /* apply fudge factor for shallow color buffers */
-         if (fb->Visual.redBits < 8 ||
-             fb->Visual.greenBits < 8 ||
-             fb->Visual.blueBits < 8) {
-            adjust_colors(fb, width, rgba);
-         }
-
-         /* pack the row of RGBA pixels into user's buffer */
-         _mesa_pack_rgba_span_float(ctx, width, rgba, format, type, dst,
-                                    packing, transferOps);
-
-         dst += dstStride;
-      }
-   } while (0);
-}
-
-
-/**
- * Read combined depth/stencil values.
- * We'll have already done error checking to be sure the expected
- * depth and stencil buffers really exist.
- */
-static void
-read_depth_stencil_pixels(struct gl_context *ctx,
-                          GLint x, GLint y,
-                          GLsizei width, GLsizei height,
-                          GLenum type, GLvoid *pixels,
-                          const struct gl_pixelstore_attrib *packing )
-{
-   const GLboolean scaleOrBias
-      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
-   const GLboolean stencilTransfer = ctx->Pixel.IndexShift
-      || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
-   struct gl_renderbuffer *depthRb, *stencilRb;
-
-   depthRb = ctx->ReadBuffer->_DepthBuffer;
-   stencilRb = ctx->ReadBuffer->_StencilBuffer;
-
-   if (!depthRb || !stencilRb)
-      return;
-
-   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
-   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
-
-   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
-       depthRb == stencilRb &&
-       !scaleOrBias &&
-       !stencilTransfer) {
-      /* This is the ideal case.
-       * Reading GL_DEPTH_STENCIL pixels from combined depth/stencil buffer.
-       * Plus, no pixel transfer ops to worry about!
-       */
-      GLint i;
-      GLint dstStride = _mesa_image_row_stride(packing, width,
-                                               GL_DEPTH_STENCIL_EXT, type);
-      GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, pixels,
-                                                       width, height,
-                                                       GL_DEPTH_STENCIL_EXT,
-                                                       type, 0, 0);
-      for (i = 0; i < height; i++) {
-         depthRb->GetRow(ctx, depthRb, width, x, y + i, dst);
-         dst += dstStride;
-      }
-   }
-   else {
-      /* Reading GL_DEPTH_STENCIL pixels from separate depth/stencil buffers,
-       * or we need pixel transfer.
-       */
-      GLint i;
-      depthRb = ctx->ReadBuffer->_DepthBuffer;
-      stencilRb = ctx->ReadBuffer->_StencilBuffer;
-
-      for (i = 0; i < height; i++) {
-         GLstencil stencilVals[MAX_WIDTH];
-
-         GLuint *depthStencilDst = (GLuint *)
-            _mesa_image_address2d(packing, pixels, width, height,
-                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
-
-         _swrast_read_stencil_span(ctx, stencilRb, width,
-                                   x, y + i, stencilVals);
-
-         if (!scaleOrBias && !stencilTransfer
-             && ctx->ReadBuffer->Visual.depthBits == 24) {
-            /* ideal case */
-            GLuint zVals[MAX_WIDTH]; /* 24-bit values! */
-            GLint j;
-            ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
-            /* note, we've already been clipped */
-            depthRb->GetRow(ctx, depthRb, width, x, y + i, zVals);
-            for (j = 0; j < width; j++) {
-               depthStencilDst[j] = (zVals[j] << 8) | (stencilVals[j] & 0xff);
-            }
-         }
-         else {
-            /* general case */
-            GLfloat depthVals[MAX_WIDTH];
-            _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i,
-                                          depthVals);
-            _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst,
-                                          depthVals, stencilVals, packing);
-         }
-      }
-   }
-}
-
-
-
-/**
- * Software fallback routine for ctx->Driver.ReadPixels().
- * By time we get here, all error checking will have been done.
- */
-void
-_swrast_ReadPixels( struct gl_context *ctx,
-		    GLint x, GLint y, GLsizei width, GLsizei height,
-		    GLenum format, GLenum type,
-		    const struct gl_pixelstore_attrib *packing,
-		    GLvoid *pixels )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   struct gl_pixelstore_attrib clippedPacking = *packing;
-
-   if (ctx->NewState)
-      _mesa_update_state(ctx);
-
-   /* Need to do swrast_render_start() before clipping or anything else
-    * since this is where a driver may grab the hw lock and get an updated
-    * window size.
-    */
-   swrast_render_start(ctx);
-
-   if (swrast->NewState)
-      _swrast_validate_derived( ctx );
-
-   /* Do all needed clipping here, so that we can forget about it later */
-   if (_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
-
-      pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels);
-
-      if (pixels) {
-         switch (format) {
-         case GL_STENCIL_INDEX:
-            read_stencil_pixels(ctx, x, y, width, height, type, pixels,
-                                &clippedPacking);
-            break;
-         case GL_DEPTH_COMPONENT:
-            read_depth_pixels(ctx, x, y, width, height, type, pixels,
-                              &clippedPacking);
-            break;
-         case GL_DEPTH_STENCIL_EXT:
-            read_depth_stencil_pixels(ctx, x, y, width, height, type, pixels,
-                                      &clippedPacking);
-            break;
-         default:
-            /* all other formats should be color formats */
-            read_rgba_pixels(ctx, x, y, width, height, format, type, pixels,
-                             &clippedPacking);
-         }
-
-         _mesa_unmap_pbo_dest(ctx, &clippedPacking);
-      }
-   }
-
-   swrast_render_finish(ctx);
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.0.3
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/glheader.h"
+#include "main/colormac.h"
+#include "main/feedback.h"
+#include "main/formats.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/pack.h"
+#include "main/pbo.h"
+#include "main/state.h"
+
+#include "s_context.h"
+#include "s_depth.h"
+#include "s_span.h"
+#include "s_stencil.h"
+
+
+/**
+ * Read pixels for format=GL_DEPTH_COMPONENT.
+ */
+static void
+read_depth_pixels( struct gl_context *ctx,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height,
+                   GLenum type, GLvoid *pixels,
+                   const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_DepthBuffer;
+   const GLboolean biasOrScale
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+
+   if (!rb)
+      return;
+
+   /* clipping should have been done already */
+   ASSERT(x >= 0);
+   ASSERT(y >= 0);
+   ASSERT(x + width <= (GLint) rb->Width);
+   ASSERT(y + height <= (GLint) rb->Height);
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   if (type == GL_UNSIGNED_SHORT && fb->Visual.depthBits == 16
+       && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 16-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->Format == MESA_FORMAT_Z16);
+      ASSERT(rb->DataType == GL_UNSIGNED_SHORT);
+      for (j = 0; j < height; j++, y++) {
+         void *dest =_mesa_image_address2d(packing, pixels, width, height,
+                                           GL_DEPTH_COMPONENT, type, j, 0);
+         rb->GetRow(ctx, rb, width, x, y, dest);
+      }
+   }
+   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 24
+            && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 24-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->Format == MESA_FORMAT_X8_Z24 ||
+             rb->Format == MESA_FORMAT_S8_Z24 ||
+             rb->Format == MESA_FORMAT_Z24_X8 ||
+             rb->Format == MESA_FORMAT_Z24_S8);
+      ASSERT(rb->DataType == GL_UNSIGNED_INT ||
+             rb->DataType == GL_UNSIGNED_INT_24_8);
+      for (j = 0; j < height; j++, y++) {
+         GLuint *dest = (GLuint *)
+            _mesa_image_address2d(packing, pixels, width, height,
+                                  GL_DEPTH_COMPONENT, type, j, 0);
+         GLint k;
+         rb->GetRow(ctx, rb, width, x, y, dest);
+         /* convert range from 24-bit to 32-bit */
+         if (rb->Format == MESA_FORMAT_X8_Z24 ||
+             rb->Format == MESA_FORMAT_S8_Z24) {
+            for (k = 0; k < width; k++) {
+               /* Note: put MSByte of 24-bit value into LSByte */
+               dest[k] = (dest[k] << 8) | ((dest[k] >> 16) & 0xff);
+            }
+         }
+         else {
+            for (k = 0; k < width; k++) {
+               /* Note: fill in LSByte by replication */
+               dest[k] = dest[k] | ((dest[k] >> 8) & 0xff);
+            }
+         }
+      }
+   }
+   else if (type == GL_UNSIGNED_INT && fb->Visual.depthBits == 32
+            && !biasOrScale && !packing->SwapBytes) {
+      /* Special case: directly read 32-bit unsigned depth values. */
+      GLint j;
+      ASSERT(rb->Format == MESA_FORMAT_Z32);
+      ASSERT(rb->DataType == GL_UNSIGNED_INT);
+      for (j = 0; j < height; j++, y++) {
+         void *dest = _mesa_image_address2d(packing, pixels, width, height,
+                                            GL_DEPTH_COMPONENT, type, j, 0);
+         rb->GetRow(ctx, rb, width, x, y, dest);
+      }
+   }
+   else {
+      /* General case (slower) */
+      GLint j;
+      for (j = 0; j < height; j++, y++) {
+         GLfloat depthValues[MAX_WIDTH];
+         GLvoid *dest = _mesa_image_address2d(packing, pixels, width, height,
+                                              GL_DEPTH_COMPONENT, type, j, 0);
+         _swrast_read_depth_span_float(ctx, rb, width, x, y, depthValues);
+         _mesa_pack_depth_span(ctx, width, dest, type, depthValues, packing);
+      }
+   }
+}
+
+
+/**
+ * Read pixels for format=GL_STENCIL_INDEX.
+ */
+static void
+read_stencil_pixels( struct gl_context *ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum type, GLvoid *pixels,
+                     const struct gl_pixelstore_attrib *packing )
+{
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_StencilBuffer;
+   GLint j;
+
+   if (!rb)
+      return;
+
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   /* process image row by row */
+   for (j=0;j<height;j++,y++) {
+      GLvoid *dest;
+      GLstencil stencil[MAX_WIDTH];
+
+      _swrast_read_stencil_span(ctx, rb, width, x, y, stencil);
+
+      dest = _mesa_image_address2d(packing, pixels, width, height,
+                                   GL_STENCIL_INDEX, type, j, 0);
+
+      _mesa_pack_stencil_span(ctx, width, type, dest, stencil, packing);
+   }
+}
+
+
+
+/**
+ * Optimized glReadPixels for particular pixel formats when pixel
+ * scaling, biasing, mapping, etc. are disabled.
+ * \return GL_TRUE if success, GL_FALSE if unable to do the readpixels
+ */
+static GLboolean
+fast_read_rgba_pixels( struct gl_context *ctx,
+                       GLint x, GLint y,
+                       GLsizei width, GLsizei height,
+                       GLenum format, GLenum type,
+                       GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing,
+                       GLbitfield transferOps)
+{
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+
+   if (!rb)
+      return GL_FALSE;
+
+   ASSERT(rb->_BaseFormat == GL_RGBA || rb->_BaseFormat == GL_RGB ||
+	  rb->_BaseFormat == GL_ALPHA);
+
+   /* clipping should have already been done */
+   ASSERT(x + width <= (GLint) rb->Width);
+   ASSERT(y + height <= (GLint) rb->Height);
+
+   /* check for things we can't handle here */
+   if (transferOps ||
+       packing->SwapBytes ||
+       packing->LsbFirst) {
+      return GL_FALSE;
+   }
+
+   if (format == GL_RGBA && rb->DataType == type) {
+      const GLint dstStride = _mesa_image_row_stride(packing, width,
+                                                     format, type);
+      GLubyte *dest
+         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                             format, type, 0, 0);
+      GLint row;
+      ASSERT(rb->GetRow);
+      for (row = 0; row < height; row++) {
+         rb->GetRow(ctx, rb, width, x, y + row, dest);
+         dest += dstStride;
+      }
+      return GL_TRUE;
+   }
+
+   if (format == GL_RGB &&
+       rb->DataType == GL_UNSIGNED_BYTE &&
+       type == GL_UNSIGNED_BYTE) {
+      const GLint dstStride = _mesa_image_row_stride(packing, width,
+                                                     format, type);
+      GLubyte *dest
+         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                             format, type, 0, 0);
+      GLint row;
+      ASSERT(rb->GetRow);
+      for (row = 0; row < height; row++) {
+         GLubyte tempRow[MAX_WIDTH][4];
+         GLint col;
+         rb->GetRow(ctx, rb, width, x, y + row, tempRow);
+         /* convert RGBA to RGB */
+         for (col = 0; col < width; col++) {
+            dest[col * 3 + 0] = tempRow[col][0];
+            dest[col * 3 + 1] = tempRow[col][1];
+            dest[col * 3 + 2] = tempRow[col][2];
+         }
+         dest += dstStride;
+      }
+      return GL_TRUE;
+   }
+
+   /* not handled */
+   return GL_FALSE;
+}
+
+
+/**
+ * When we're using a low-precision color buffer (like 16-bit 5/6/5)
+ * we have to adjust our color values a bit to pass conformance.
+ * The problem is when a 5 or 6-bit color value is converted to an 8-bit
+ * value and then a floating point value, the floating point values don't
+ * increment uniformly as the 5 or 6-bit value is incremented.
+ *
+ * This function adjusts floating point values to compensate.
+ */
+static void
+adjust_colors(const struct gl_framebuffer *fb, GLuint n, GLfloat rgba[][4])
+{
+   const GLuint rShift = 8 - fb->Visual.redBits;
+   const GLuint gShift = 8 - fb->Visual.greenBits;
+   const GLuint bShift = 8 - fb->Visual.blueBits;
+   GLfloat rScale = 1.0F / (GLfloat) ((1 << fb->Visual.redBits  ) - 1);
+   GLfloat gScale = 1.0F / (GLfloat) ((1 << fb->Visual.greenBits) - 1);
+   GLfloat bScale = 1.0F / (GLfloat) ((1 << fb->Visual.blueBits ) - 1);
+   GLuint i;
+
+   if (fb->Visual.redBits == 0)
+      rScale = 0;
+   if (fb->Visual.greenBits == 0)
+      gScale = 0;
+   if (fb->Visual.blueBits == 0)
+      bScale = 0;
+
+   for (i = 0; i < n; i++) {
+      GLint r, g, b;
+      /* convert float back to ubyte */
+      CLAMPED_FLOAT_TO_UBYTE(r, rgba[i][RCOMP]);
+      CLAMPED_FLOAT_TO_UBYTE(g, rgba[i][GCOMP]);
+      CLAMPED_FLOAT_TO_UBYTE(b, rgba[i][BCOMP]);
+      /* using only the N most significant bits of the ubyte value, convert to
+       * float in [0,1].
+       */
+      rgba[i][RCOMP] = (GLfloat) (r >> rShift) * rScale;
+      rgba[i][GCOMP] = (GLfloat) (g >> gShift) * gScale;
+      rgba[i][BCOMP] = (GLfloat) (b >> bShift) * bScale;
+   }
+}
+
+
+
+/*
+ * Read R, G, B, A, RGB, L, or LA pixels.
+ */
+static void
+read_rgba_pixels( struct gl_context *ctx,
+                  GLint x, GLint y,
+                  GLsizei width, GLsizei height,
+                  GLenum format, GLenum type, GLvoid *pixels,
+                  const struct gl_pixelstore_attrib *packing )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   GLbitfield transferOps = ctx->_ImageTransferState;
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
+
+   if (!rb)
+      return;
+
+   if (type == GL_FLOAT && ((ctx->Color.ClampReadColor == GL_TRUE) ||
+                            (ctx->Color.ClampReadColor == GL_FIXED_ONLY_ARB &&
+                             rb->DataType != GL_FLOAT)))
+      transferOps |= IMAGE_CLAMP_BIT;
+
+   /* Try optimized path first */
+   if (fast_read_rgba_pixels(ctx, x, y, width, height,
+                             format, type, pixels, packing, transferOps)) {
+      return; /* done! */
+   }
+
+   /* width should never be > MAX_WIDTH since we did clipping earlier */
+   ASSERT(width <= MAX_WIDTH);
+
+   do {
+      const GLint dstStride
+         = _mesa_image_row_stride(packing, width, format, type);
+      GLfloat (*rgba)[4] = swrast->SpanArrays->attribs[FRAG_ATTRIB_COL0];
+      GLint row;
+      GLubyte *dst
+         = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
+                                             format, type, 0, 0);
+
+      for (row = 0; row < height; row++, y++) {
+
+         /* Get float rgba pixels */
+         _swrast_read_rgba_span(ctx, rb, width, x, y, GL_FLOAT, rgba);
+
+         /* apply fudge factor for shallow color buffers */
+         if (fb->Visual.redBits < 8 ||
+             fb->Visual.greenBits < 8 ||
+             fb->Visual.blueBits < 8) {
+            adjust_colors(fb, width, rgba);
+         }
+
+         /* pack the row of RGBA pixels into user's buffer */
+         _mesa_pack_rgba_span_float(ctx, width, rgba, format, type, dst,
+                                    packing, transferOps);
+
+         dst += dstStride;
+      }
+   } while (0);
+}
+
+
+/**
+ * Read combined depth/stencil values.
+ * We'll have already done error checking to be sure the expected
+ * depth and stencil buffers really exist.
+ */
+static void
+read_depth_stencil_pixels(struct gl_context *ctx,
+                          GLint x, GLint y,
+                          GLsizei width, GLsizei height,
+                          GLenum type, GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing )
+{
+   const GLboolean scaleOrBias
+      = ctx->Pixel.DepthScale != 1.0 || ctx->Pixel.DepthBias != 0.0;
+   const GLboolean stencilTransfer = ctx->Pixel.IndexShift
+      || ctx->Pixel.IndexOffset || ctx->Pixel.MapStencilFlag;
+   struct gl_renderbuffer *depthRb, *stencilRb;
+
+   depthRb = ctx->ReadBuffer->_DepthBuffer;
+   stencilRb = ctx->ReadBuffer->_StencilBuffer;
+
+   if (!depthRb || !stencilRb)
+      return;
+
+   depthRb = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+   stencilRb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
+
+   if (depthRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       stencilRb->_BaseFormat == GL_DEPTH_STENCIL_EXT &&
+       depthRb == stencilRb &&
+       !scaleOrBias &&
+       !stencilTransfer) {
+      /* This is the ideal case.
+       * Reading GL_DEPTH_STENCIL pixels from combined depth/stencil buffer.
+       * Plus, no pixel transfer ops to worry about!
+       */
+      GLint i;
+      GLint dstStride = _mesa_image_row_stride(packing, width,
+                                               GL_DEPTH_STENCIL_EXT, type);
+      GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, pixels,
+                                                       width, height,
+                                                       GL_DEPTH_STENCIL_EXT,
+                                                       type, 0, 0);
+      for (i = 0; i < height; i++) {
+         depthRb->GetRow(ctx, depthRb, width, x, y + i, dst);
+         dst += dstStride;
+      }
+   }
+   else {
+      /* Reading GL_DEPTH_STENCIL pixels from separate depth/stencil buffers,
+       * or we need pixel transfer.
+       */
+      GLint i;
+      depthRb = ctx->ReadBuffer->_DepthBuffer;
+      stencilRb = ctx->ReadBuffer->_StencilBuffer;
+
+      for (i = 0; i < height; i++) {
+         GLstencil stencilVals[MAX_WIDTH];
+
+         GLuint *depthStencilDst = (GLuint *)
+            _mesa_image_address2d(packing, pixels, width, height,
+                                  GL_DEPTH_STENCIL_EXT, type, i, 0);
+
+         _swrast_read_stencil_span(ctx, stencilRb, width,
+                                   x, y + i, stencilVals);
+
+         if (!scaleOrBias && !stencilTransfer
+             && ctx->ReadBuffer->Visual.depthBits == 24) {
+            /* ideal case */
+            GLuint zVals[MAX_WIDTH]; /* 24-bit values! */
+            GLint j;
+            ASSERT(depthRb->DataType == GL_UNSIGNED_INT);
+            /* note, we've already been clipped */
+            depthRb->GetRow(ctx, depthRb, width, x, y + i, zVals);
+            for (j = 0; j < width; j++) {
+               depthStencilDst[j] = (zVals[j] << 8) | (stencilVals[j] & 0xff);
+            }
+         }
+         else {
+            /* general case */
+            GLfloat depthVals[MAX_WIDTH];
+            _swrast_read_depth_span_float(ctx, depthRb, width, x, y + i,
+                                          depthVals);
+            _mesa_pack_depth_stencil_span(ctx, width, depthStencilDst,
+                                          depthVals, stencilVals, packing);
+         }
+      }
+   }
+}
+
+
+
+/**
+ * Software fallback routine for ctx->Driver.ReadPixels().
+ * By time we get here, all error checking will have been done.
+ */
+void
+_swrast_ReadPixels( struct gl_context *ctx,
+		    GLint x, GLint y, GLsizei width, GLsizei height,
+		    GLenum format, GLenum type,
+		    const struct gl_pixelstore_attrib *packing,
+		    GLvoid *pixels )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   struct gl_pixelstore_attrib clippedPacking = *packing;
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Need to do swrast_render_start() before clipping or anything else
+    * since this is where a driver may grab the hw lock and get an updated
+    * window size.
+    */
+   swrast_render_start(ctx);
+
+   if (swrast->NewState)
+      _swrast_validate_derived( ctx );
+
+   /* Do all needed clipping here, so that we can forget about it later */
+   if (_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) {
+
+      pixels = _mesa_map_pbo_dest(ctx, &clippedPacking, pixels);
+
+      if (pixels) {
+         switch (format) {
+         case GL_STENCIL_INDEX:
+            read_stencil_pixels(ctx, x, y, width, height, type, pixels,
+                                &clippedPacking);
+            break;
+         case GL_DEPTH_COMPONENT:
+            read_depth_pixels(ctx, x, y, width, height, type, pixels,
+                              &clippedPacking);
+            break;
+         case GL_DEPTH_STENCIL_EXT:
+            read_depth_stencil_pixels(ctx, x, y, width, height, type, pixels,
+                                      &clippedPacking);
+            break;
+         default:
+            /* all other formats should be color formats */
+            read_rgba_pixels(ctx, x, y, width, height, format, type, pixels,
+                             &clippedPacking);
+         }
+
+         _mesa_unmap_pbo_dest(ctx, &clippedPacking);
+      }
+   }
+
+   swrast_render_finish(ctx);
+}
diff --git a/mesalib/src/mesa/swrast/s_texcombine.c b/mesalib/src/mesa/swrast/s_texcombine.c
index 672cd77bf..0c8cc9ff3 100644
--- a/mesalib/src/mesa/swrast/s_texcombine.c
+++ b/mesalib/src/mesa/swrast/s_texcombine.c
@@ -1,742 +1,737 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.5
- *
- * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
- * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/colormac.h"
-#include "main/imports.h"
-#include "main/pixeltransfer.h"
-#include "program/prog_instruction.h"
-
-#include "s_context.h"
-#include "s_texcombine.h"
-
-
-/**
- * Pointer to array of float[4]
- * This type makes the code below more concise and avoids a lot of casting.
- */
-typedef float (*float4_array)[4];
-
-
-/**
- * Return array of texels for given unit.
- */
-static INLINE float4_array
-get_texel_array(SWcontext *swrast, GLuint unit)
-{
-   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
-}
-
-
-
-/**
- * Do texture application for:
- *  GL_EXT_texture_env_combine
- *  GL_ARB_texture_env_combine
- *  GL_EXT_texture_env_dot3
- *  GL_ARB_texture_env_dot3
- *  GL_ATI_texture_env_combine3
- *  GL_NV_texture_env_combine4
- *  conventional GL texture env modes
- *
- * \param ctx          rendering context
- * \param unit         the texture combiner unit
- * \param n            number of fragments to process (span width)
- * \param primary_rgba incoming fragment color array
- * \param texelBuffer  pointer to texel colors for all texture units
- * 
- * \param rgba         incoming/result fragment colors
- */
-static void
-texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
-                 const float4_array primary_rgba,
-                 const GLfloat *texelBuffer,
-                 GLchan (*rgbaChan)[4] )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
-   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
-   float4_array argRGB[MAX_COMBINER_TERMS];
-   float4_array argA[MAX_COMBINER_TERMS];
-   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
-   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
-   const GLuint numArgsRGB = combine->_NumArgsRGB;
-   const GLuint numArgsA = combine->_NumArgsA;
-   float4_array ccolor[4], rgba;
-   GLuint i, term;
-
-   /* alloc temp pixel buffers */
-   rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
-   if (!rgba) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
-      return;
-   }
-
-   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
-      ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
-      if (!ccolor[i]) {
-         while (i) {
-            free(ccolor[i]);
-            i--;
-         }
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
-         return;
-      }
-   }
-
-   for (i = 0; i < n; i++) {
-      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
-      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
-      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
-      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
-   }
-
-   /*
-   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
-          combine->ModeRGB,
-          combine->ModeA,
-          combine->SourceRGB[0],
-          combine->SourceA[0],
-          combine->SourceRGB[1],
-          combine->SourceA[1]);
-   */
-
-   /*
-    * Do operand setup for up to 4 operands.  Loop over the terms.
-    */
-   for (term = 0; term < numArgsRGB; term++) {
-      const GLenum srcRGB = combine->SourceRGB[term];
-      const GLenum operandRGB = combine->OperandRGB[term];
-
-      switch (srcRGB) {
-         case GL_TEXTURE:
-            argRGB[term] = get_texel_array(swrast, unit);
-            break;
-         case GL_PRIMARY_COLOR:
-            argRGB[term] = primary_rgba;
-            break;
-         case GL_PREVIOUS:
-            argRGB[term] = rgba;
-            break;
-         case GL_CONSTANT:
-            {
-               float4_array c = ccolor[term];
-               GLfloat red   = textureUnit->EnvColor[0];
-               GLfloat green = textureUnit->EnvColor[1];
-               GLfloat blue  = textureUnit->EnvColor[2];
-               GLfloat alpha = textureUnit->EnvColor[3];
-               for (i = 0; i < n; i++) {
-                  ASSIGN_4V(c[i], red, green, blue, alpha);
-               }
-               argRGB[term] = ccolor[term];
-            }
-            break;
-	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
-	  */
-	 case GL_ZERO:
-            {
-               float4_array c = ccolor[term];
-               for (i = 0; i < n; i++) {
-                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
-               }
-               argRGB[term] = ccolor[term];
-            }
-            break;
-	 case GL_ONE:
-            {
-               float4_array c = ccolor[term];
-               for (i = 0; i < n; i++) {
-                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
-               }
-               argRGB[term] = ccolor[term];
-            }
-            break;
-         default:
-            /* ARB_texture_env_crossbar source */
-            {
-               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
-               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
-               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
-                  goto end;
-               argRGB[term] = get_texel_array(swrast, srcUnit);
-            }
-      }
-
-      if (operandRGB != GL_SRC_COLOR) {
-         float4_array src = argRGB[term];
-         float4_array dst = ccolor[term];
-
-         /* point to new arg[term] storage */
-         argRGB[term] = ccolor[term];
-
-         switch (operandRGB) {
-         case GL_ONE_MINUS_SRC_COLOR:
-            for (i = 0; i < n; i++) {
-               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
-               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
-               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
-            }
-            break;
-         case GL_SRC_ALPHA:
-            for (i = 0; i < n; i++) {
-               dst[i][RCOMP] =
-               dst[i][GCOMP] =
-               dst[i][BCOMP] = src[i][ACOMP];
-            }
-            break;
-         case GL_ONE_MINUS_SRC_ALPHA:
-            for (i = 0; i < n; i++) {
-               dst[i][RCOMP] =
-               dst[i][GCOMP] =
-               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
-            }
-            break;
-         default:
-            _mesa_problem(ctx, "Bad operandRGB");
-         }
-      }
-   }
-
-   /*
-    * Set up the argA[term] pointers
-    */
-   for (term = 0; term < numArgsA; term++) {
-      const GLenum srcA = combine->SourceA[term];
-      const GLenum operandA = combine->OperandA[term];
-
-      switch (srcA) {
-         case GL_TEXTURE:
-            argA[term] = get_texel_array(swrast, unit);
-            break;
-         case GL_PRIMARY_COLOR:
-            argA[term] = primary_rgba;
-            break;
-         case GL_PREVIOUS:
-            argA[term] = rgba;
-            break;
-         case GL_CONSTANT:
-            {
-               float4_array c = ccolor[term];
-               GLfloat alpha = textureUnit->EnvColor[3];
-               for (i = 0; i < n; i++)
-                  c[i][ACOMP] = alpha;
-               argA[term] = ccolor[term];
-            }
-            break;
-	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
-	  */
-	 case GL_ZERO:
-            {
-               float4_array c = ccolor[term];
-               for (i = 0; i < n; i++)
-                  c[i][ACOMP] = 0.0F;
-               argA[term] = ccolor[term];
-            }
-            break;
-	 case GL_ONE:
-            {
-               float4_array c = ccolor[term];
-               for (i = 0; i < n; i++)
-                  c[i][ACOMP] = 1.0F;
-               argA[term] = ccolor[term];
-            }
-            break;
-         default:
-            /* ARB_texture_env_crossbar source */
-            {
-               const GLuint srcUnit = srcA - GL_TEXTURE0;
-               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
-               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
-                  goto end;
-               argA[term] = get_texel_array(swrast, srcUnit);
-            }
-      }
-
-      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
-         float4_array src = argA[term];
-         float4_array dst = ccolor[term];
-         argA[term] = ccolor[term];
-         for (i = 0; i < n; i++) {
-            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
-         }
-      }
-   }
-
-   /* RGB channel combine */
-   {
-      float4_array arg0 = argRGB[0];
-      float4_array arg1 = argRGB[1];
-      float4_array arg2 = argRGB[2];
-      float4_array arg3 = argRGB[3];
-
-      switch (combine->ModeRGB) {
-      case GL_REPLACE:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
-            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
-            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
-         }
-         break;
-      case GL_MODULATE:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
-            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
-            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
-         }
-         break;
-      case GL_ADD:
-         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
-            /* (a * b) + (c * d) */
-            for (i = 0; i < n; i++) {
-               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
-                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
-               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
-                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
-               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
-                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
-            }
-         }
-         else {
-            /* 2-term addition */
-            for (i = 0; i < n; i++) {
-               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
-               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
-               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
-            }
-         }
-         break;
-      case GL_ADD_SIGNED:
-         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
-            /* (a * b) + (c * d) - 0.5 */
-            for (i = 0; i < n; i++) {
-               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
-                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
-               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
-                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
-               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
-                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
-            }
-         }
-         else {
-            for (i = 0; i < n; i++) {
-               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
-               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
-               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
-            }
-         }
-         break;
-      case GL_INTERPOLATE:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
-                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
-            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
-                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
-            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
-                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
-         }
-         break;
-      case GL_SUBTRACT:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
-            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
-            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
-         }
-         break;
-      case GL_DOT3_RGB_EXT:
-      case GL_DOT3_RGBA_EXT:
-         /* Do not scale the result by 1 2 or 4 */
-         for (i = 0; i < n; i++) {
-            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
-                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
-                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
-               * 4.0F;
-            dot = CLAMP(dot, 0.0F, 1.0F);
-            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
-         }
-         break;
-      case GL_DOT3_RGB:
-      case GL_DOT3_RGBA:
-         /* DO scale the result by 1 2 or 4 */
-         for (i = 0; i < n; i++) {
-            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
-                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
-                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
-               * 4.0F * scaleRGB;
-            dot = CLAMP(dot, 0.0F, 1.0F);
-            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
-         }
-         break;
-      case GL_MODULATE_ADD_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
-                              arg1[i][RCOMP]) * scaleRGB;
-            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
-                              arg1[i][GCOMP]) * scaleRGB;
-            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
-                              arg1[i][BCOMP]) * scaleRGB;
-	 }
-         break;
-      case GL_MODULATE_SIGNED_ADD_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
-                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
-            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
-                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
-            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
-                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
-	 }
-         break;
-      case GL_MODULATE_SUBTRACT_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
-                              arg1[i][RCOMP]) * scaleRGB;
-            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
-                              arg1[i][GCOMP]) * scaleRGB;
-            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
-                              arg1[i][BCOMP]) * scaleRGB;
-	 }
-         break;
-      case GL_BUMP_ENVMAP_ATI:
-         /* this produces a fixed rgba color, and the coord calc is done elsewhere */
-         for (i = 0; i < n; i++) {
-            /* rgba result is 0,0,0,1 */
-            rgba[i][RCOMP] = 0.0;
-            rgba[i][GCOMP] = 0.0;
-            rgba[i][BCOMP] = 0.0;
-            rgba[i][ACOMP] = 1.0;
-	 }
-         goto end; /* no alpha processing */
-      default:
-         _mesa_problem(ctx, "invalid combine mode");
-      }
-   }
-
-   /* Alpha channel combine */
-   {
-      float4_array arg0 = argA[0];
-      float4_array arg1 = argA[1];
-      float4_array arg2 = argA[2];
-      float4_array arg3 = argA[3];
-
-      switch (combine->ModeA) {
-      case GL_REPLACE:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
-         }
-         break;
-      case GL_MODULATE:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
-         }
-         break;
-      case GL_ADD:
-         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
-            /* (a * b) + (c * d) */
-            for (i = 0; i < n; i++) {
-               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
-                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
-            }
-         }
-         else {
-            /* two-term add */
-            for (i = 0; i < n; i++) {
-               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
-            }
-         }
-         break;
-      case GL_ADD_SIGNED:
-         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
-            /* (a * b) + (c * d) - 0.5 */
-            for (i = 0; i < n; i++) {
-               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
-                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
-                                 0.5F) * scaleA;
-            }
-         }
-         else {
-            /* a + b - 0.5 */
-            for (i = 0; i < n; i++) {
-               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
-            }
-         }
-         break;
-      case GL_INTERPOLATE:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
-                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
-               * scaleA;
-         }
-         break;
-      case GL_SUBTRACT:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
-         }
-         break;
-      case GL_MODULATE_ADD_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
-                              + arg1[i][ACOMP]) * scaleA;
-         }
-         break;
-      case GL_MODULATE_SIGNED_ADD_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
-                              arg1[i][ACOMP] - 0.5F) * scaleA;
-         }
-         break;
-      case GL_MODULATE_SUBTRACT_ATI:
-         for (i = 0; i < n; i++) {
-            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
-                              - arg1[i][ACOMP]) * scaleA;
-         }
-         break;
-      default:
-         _mesa_problem(ctx, "invalid combine mode");
-      }
-   }
-
-   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
-    * This is kind of a kludge.  It would have been better if the spec
-    * were written such that the GL_COMBINE_ALPHA value could be set to
-    * GL_DOT3.
-    */
-   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
-       combine->ModeRGB == GL_DOT3_RGBA) {
-      for (i = 0; i < n; i++) {
-	 rgba[i][ACOMP] = rgba[i][RCOMP];
-      }
-   }
-
-   for (i = 0; i < n; i++) {
-      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
-      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
-      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
-      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
-   }
-
-end:
-   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
-      free(ccolor[i]);
-   }
-   free(rgba);
-}
-
-
-/**
- * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
- * See GL_EXT_texture_swizzle.
- */
-static void
-swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
-{
-   const GLuint swzR = GET_SWZ(swizzle, 0);
-   const GLuint swzG = GET_SWZ(swizzle, 1);
-   const GLuint swzB = GET_SWZ(swizzle, 2);
-   const GLuint swzA = GET_SWZ(swizzle, 3);
-   GLfloat vector[6];
-   GLuint i;
-
-   vector[SWIZZLE_ZERO] = 0;
-   vector[SWIZZLE_ONE] = 1.0F;
-
-   for (i = 0; i < count; i++) {
-      vector[SWIZZLE_X] = texels[i][0];
-      vector[SWIZZLE_Y] = texels[i][1];
-      vector[SWIZZLE_Z] = texels[i][2];
-      vector[SWIZZLE_W] = texels[i][3];
-      texels[i][RCOMP] = vector[swzR];
-      texels[i][GCOMP] = vector[swzG];
-      texels[i][BCOMP] = vector[swzB];
-      texels[i][ACOMP] = vector[swzA];
-   }
-}
-
-
-/**
- * Apply texture mapping to a span of fragments.
- */
-void
-_swrast_texture_span( struct gl_context *ctx, SWspan *span )
-{
-   SWcontext *swrast = SWRAST_CONTEXT(ctx);
-   float4_array primary_rgba;
-   GLuint unit;
-
-   primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
-
-   if (!primary_rgba) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
-      return;
-   }
-
-   ASSERT(span->end <= MAX_WIDTH);
-
-   /*
-    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
-    */
-   if (swrast->_TextureCombinePrimary) {
-      GLuint i;
-      for (i = 0; i < span->end; i++) {
-         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
-         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
-         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
-         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
-      }
-   }
-
-   /* First must sample all bump maps */
-   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
-      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-
-      if (texUnit->_ReallyEnabled &&
-         texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
-         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
-            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
-         float4_array targetcoords =
-            span->array->attribs[FRAG_ATTRIB_TEX0 +
-               ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
-
-         const struct gl_texture_object *curObj = texUnit->_Current;
-         GLfloat *lambda = span->array->lambda[unit];
-         float4_array texels = get_texel_array(swrast, unit);
-         GLuint i;
-         GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
-         GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
-         GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
-         GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
-
-         /* adjust texture lod (lambda) */
-         if (span->arrayMask & SPAN_LAMBDA) {
-            if (texUnit->LodBias + curObj->LodBias != 0.0F) {
-               /* apply LOD bias, but don't clamp yet */
-               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->LodBias,
-                                          -ctx->Const.MaxTextureLodBias,
-                                          ctx->Const.MaxTextureLodBias);
-               GLuint i;
-               for (i = 0; i < span->end; i++) {
-                  lambda[i] += bias;
-               }
-            }
-
-            if (curObj->MinLod != -1000.0 || curObj->MaxLod != 1000.0) {
-               /* apply LOD clamping to lambda */
-               const GLfloat min = curObj->MinLod;
-               const GLfloat max = curObj->MaxLod;
-               GLuint i;
-               for (i = 0; i < span->end; i++) {
-                  GLfloat l = lambda[i];
-                  lambda[i] = CLAMP(l, min, max);
-               }
-            }
-         }
-
-         /* Sample the texture (span->end = number of fragments) */
-         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
-                                      texcoords, lambda, texels );
-
-         /* manipulate the span values of the bump target
-            not sure this can work correctly even ignoring
-            the problem that channel is unsigned */
-         for (i = 0; i < span->end; i++) {
-            targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
-                                  rotMatrix01) / targetcoords[i][3];
-            targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
-                                  rotMatrix11) / targetcoords[i][3];
-         }
-      }
-   }
-
-   /*
-    * Must do all texture sampling before combining in order to
-    * accomodate GL_ARB_texture_env_crossbar.
-    */
-   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
-      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-      if (texUnit->_ReallyEnabled &&
-          texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
-         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
-            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
-         const struct gl_texture_object *curObj = texUnit->_Current;
-         GLfloat *lambda = span->array->lambda[unit];
-         float4_array texels = get_texel_array(swrast, unit);
-
-         /* adjust texture lod (lambda) */
-         if (span->arrayMask & SPAN_LAMBDA) {
-            if (texUnit->LodBias + curObj->LodBias != 0.0F) {
-               /* apply LOD bias, but don't clamp yet */
-               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->LodBias,
-                                          -ctx->Const.MaxTextureLodBias,
-                                          ctx->Const.MaxTextureLodBias);
-               GLuint i;
-               for (i = 0; i < span->end; i++) {
-                  lambda[i] += bias;
-               }
-            }
-
-            if (curObj->MinLod != -1000.0 || curObj->MaxLod != 1000.0) {
-               /* apply LOD clamping to lambda */
-               const GLfloat min = curObj->MinLod;
-               const GLfloat max = curObj->MaxLod;
-               GLuint i;
-               for (i = 0; i < span->end; i++) {
-                  GLfloat l = lambda[i];
-                  lambda[i] = CLAMP(l, min, max);
-               }
-            }
-         }
-
-         /* Sample the texture (span->end = number of fragments) */
-         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
-                                      texcoords, lambda, texels );
-
-         /* GL_SGI_texture_color_table */
-         if (texUnit->ColorTableEnabled) {
-            _mesa_lookup_rgba_float(&texUnit->ColorTable, span->end, texels);
-         }
-
-         /* GL_EXT_texture_swizzle */
-         if (curObj->_Swizzle != SWIZZLE_NOOP) {
-            swizzle_texels(curObj->_Swizzle, span->end, texels);
-         }
-      }
-   }
-
-   /*
-    * OK, now apply the texture (aka texture combine/blend).
-    * We modify the span->color.rgba values.
-    */
-   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
-      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
-         texture_combine( ctx, unit, span->end,
-                          primary_rgba,
-                          swrast->TexelBuffer,
-                          span->array->rgba );
-      }
-   }
-
-   free(primary_rgba);
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.5
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/pixeltransfer.h"
+#include "program/prog_instruction.h"
+
+#include "s_context.h"
+#include "s_texcombine.h"
+
+
+/**
+ * Pointer to array of float[4]
+ * This type makes the code below more concise and avoids a lot of casting.
+ */
+typedef float (*float4_array)[4];
+
+
+/**
+ * Return array of texels for given unit.
+ */
+static INLINE float4_array
+get_texel_array(SWcontext *swrast, GLuint unit)
+{
+   return (float4_array) (swrast->TexelBuffer + unit * MAX_WIDTH * 4);
+}
+
+
+
+/**
+ * Do texture application for:
+ *  GL_EXT_texture_env_combine
+ *  GL_ARB_texture_env_combine
+ *  GL_EXT_texture_env_dot3
+ *  GL_ARB_texture_env_dot3
+ *  GL_ATI_texture_env_combine3
+ *  GL_NV_texture_env_combine4
+ *  conventional GL texture env modes
+ *
+ * \param ctx          rendering context
+ * \param unit         the texture combiner unit
+ * \param n            number of fragments to process (span width)
+ * \param primary_rgba incoming fragment color array
+ * \param texelBuffer  pointer to texel colors for all texture units
+ * 
+ * \param rgba         incoming/result fragment colors
+ */
+static void
+texture_combine( struct gl_context *ctx, GLuint unit, GLuint n,
+                 const float4_array primary_rgba,
+                 const GLfloat *texelBuffer,
+                 GLchan (*rgbaChan)[4] )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   const struct gl_texture_unit *textureUnit = &(ctx->Texture.Unit[unit]);
+   const struct gl_tex_env_combine_state *combine = textureUnit->_CurrentCombine;
+   float4_array argRGB[MAX_COMBINER_TERMS];
+   float4_array argA[MAX_COMBINER_TERMS];
+   const GLfloat scaleRGB = (GLfloat) (1 << combine->ScaleShiftRGB);
+   const GLfloat scaleA = (GLfloat) (1 << combine->ScaleShiftA);
+   const GLuint numArgsRGB = combine->_NumArgsRGB;
+   const GLuint numArgsA = combine->_NumArgsA;
+   float4_array ccolor[4], rgba;
+   GLuint i, term;
+
+   /* alloc temp pixel buffers */
+   rgba = (float4_array) malloc(4 * n * sizeof(GLfloat));
+   if (!rgba) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
+      return;
+   }
+
+   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
+      ccolor[i] = (float4_array) malloc(4 * n * sizeof(GLfloat));
+      if (!ccolor[i]) {
+         while (i) {
+            free(ccolor[i]);
+            i--;
+         }
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_combine");
+         return;
+      }
+   }
+
+   for (i = 0; i < n; i++) {
+      rgba[i][RCOMP] = CHAN_TO_FLOAT(rgbaChan[i][RCOMP]);
+      rgba[i][GCOMP] = CHAN_TO_FLOAT(rgbaChan[i][GCOMP]);
+      rgba[i][BCOMP] = CHAN_TO_FLOAT(rgbaChan[i][BCOMP]);
+      rgba[i][ACOMP] = CHAN_TO_FLOAT(rgbaChan[i][ACOMP]);
+   }
+
+   /*
+   printf("modeRGB 0x%x  modeA 0x%x  srcRGB1 0x%x  srcA1 0x%x  srcRGB2 0x%x  srcA2 0x%x\n",
+          combine->ModeRGB,
+          combine->ModeA,
+          combine->SourceRGB[0],
+          combine->SourceA[0],
+          combine->SourceRGB[1],
+          combine->SourceA[1]);
+   */
+
+   /*
+    * Do operand setup for up to 4 operands.  Loop over the terms.
+    */
+   for (term = 0; term < numArgsRGB; term++) {
+      const GLenum srcRGB = combine->SourceRGB[term];
+      const GLenum operandRGB = combine->OperandRGB[term];
+
+      switch (srcRGB) {
+         case GL_TEXTURE:
+            argRGB[term] = get_texel_array(swrast, unit);
+            break;
+         case GL_PRIMARY_COLOR:
+            argRGB[term] = primary_rgba;
+            break;
+         case GL_PREVIOUS:
+            argRGB[term] = rgba;
+            break;
+         case GL_CONSTANT:
+            {
+               float4_array c = ccolor[term];
+               GLfloat red   = textureUnit->EnvColor[0];
+               GLfloat green = textureUnit->EnvColor[1];
+               GLfloat blue  = textureUnit->EnvColor[2];
+               GLfloat alpha = textureUnit->EnvColor[3];
+               for (i = 0; i < n; i++) {
+                  ASSIGN_4V(c[i], red, green, blue, alpha);
+               }
+               argRGB[term] = ccolor[term];
+            }
+            break;
+	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
+	  */
+	 case GL_ZERO:
+            {
+               float4_array c = ccolor[term];
+               for (i = 0; i < n; i++) {
+                  ASSIGN_4V(c[i], 0.0F, 0.0F, 0.0F, 0.0F);
+               }
+               argRGB[term] = ccolor[term];
+            }
+            break;
+	 case GL_ONE:
+            {
+               float4_array c = ccolor[term];
+               for (i = 0; i < n; i++) {
+                  ASSIGN_4V(c[i], 1.0F, 1.0F, 1.0F, 1.0F);
+               }
+               argRGB[term] = ccolor[term];
+            }
+            break;
+         default:
+            /* ARB_texture_env_crossbar source */
+            {
+               const GLuint srcUnit = srcRGB - GL_TEXTURE0;
+               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
+               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
+                  goto end;
+               argRGB[term] = get_texel_array(swrast, srcUnit);
+            }
+      }
+
+      if (operandRGB != GL_SRC_COLOR) {
+         float4_array src = argRGB[term];
+         float4_array dst = ccolor[term];
+
+         /* point to new arg[term] storage */
+         argRGB[term] = ccolor[term];
+
+         switch (operandRGB) {
+         case GL_ONE_MINUS_SRC_COLOR:
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] = 1.0F - src[i][RCOMP];
+               dst[i][GCOMP] = 1.0F - src[i][GCOMP];
+               dst[i][BCOMP] = 1.0F - src[i][BCOMP];
+            }
+            break;
+         case GL_SRC_ALPHA:
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] =
+               dst[i][GCOMP] =
+               dst[i][BCOMP] = src[i][ACOMP];
+            }
+            break;
+         case GL_ONE_MINUS_SRC_ALPHA:
+            for (i = 0; i < n; i++) {
+               dst[i][RCOMP] =
+               dst[i][GCOMP] =
+               dst[i][BCOMP] = 1.0F - src[i][ACOMP];
+            }
+            break;
+         default:
+            _mesa_problem(ctx, "Bad operandRGB");
+         }
+      }
+   }
+
+   /*
+    * Set up the argA[term] pointers
+    */
+   for (term = 0; term < numArgsA; term++) {
+      const GLenum srcA = combine->SourceA[term];
+      const GLenum operandA = combine->OperandA[term];
+
+      switch (srcA) {
+         case GL_TEXTURE:
+            argA[term] = get_texel_array(swrast, unit);
+            break;
+         case GL_PRIMARY_COLOR:
+            argA[term] = primary_rgba;
+            break;
+         case GL_PREVIOUS:
+            argA[term] = rgba;
+            break;
+         case GL_CONSTANT:
+            {
+               float4_array c = ccolor[term];
+               GLfloat alpha = textureUnit->EnvColor[3];
+               for (i = 0; i < n; i++)
+                  c[i][ACOMP] = alpha;
+               argA[term] = ccolor[term];
+            }
+            break;
+	 /* GL_ATI_texture_env_combine3 allows GL_ZERO & GL_ONE as sources.
+	  */
+	 case GL_ZERO:
+            {
+               float4_array c = ccolor[term];
+               for (i = 0; i < n; i++)
+                  c[i][ACOMP] = 0.0F;
+               argA[term] = ccolor[term];
+            }
+            break;
+	 case GL_ONE:
+            {
+               float4_array c = ccolor[term];
+               for (i = 0; i < n; i++)
+                  c[i][ACOMP] = 1.0F;
+               argA[term] = ccolor[term];
+            }
+            break;
+         default:
+            /* ARB_texture_env_crossbar source */
+            {
+               const GLuint srcUnit = srcA - GL_TEXTURE0;
+               ASSERT(srcUnit < ctx->Const.MaxTextureUnits);
+               if (!ctx->Texture.Unit[srcUnit]._ReallyEnabled)
+                  goto end;
+               argA[term] = get_texel_array(swrast, srcUnit);
+            }
+      }
+
+      if (operandA == GL_ONE_MINUS_SRC_ALPHA) {
+         float4_array src = argA[term];
+         float4_array dst = ccolor[term];
+         argA[term] = ccolor[term];
+         for (i = 0; i < n; i++) {
+            dst[i][ACOMP] = 1.0F - src[i][ACOMP];
+         }
+      }
+   }
+
+   /* RGB channel combine */
+   {
+      float4_array arg0 = argRGB[0];
+      float4_array arg1 = argRGB[1];
+      float4_array arg2 = argRGB[2];
+      float4_array arg3 = argRGB[3];
+
+      switch (combine->ModeRGB) {
+      case GL_REPLACE:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = arg0[i][RCOMP] * scaleRGB;
+            rgba[i][GCOMP] = arg0[i][GCOMP] * scaleRGB;
+            rgba[i][BCOMP] = arg0[i][BCOMP] * scaleRGB;
+         }
+         break;
+      case GL_MODULATE:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = arg0[i][RCOMP] * arg1[i][RCOMP] * scaleRGB;
+            rgba[i][GCOMP] = arg0[i][GCOMP] * arg1[i][GCOMP] * scaleRGB;
+            rgba[i][BCOMP] = arg0[i][BCOMP] * arg1[i][BCOMP] * scaleRGB;
+         }
+         break;
+      case GL_ADD:
+         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
+            /* (a * b) + (c * d) */
+            for (i = 0; i < n; i++) {
+               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
+                                 arg2[i][RCOMP] * arg3[i][RCOMP]) * scaleRGB;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
+                                 arg2[i][GCOMP] * arg3[i][GCOMP]) * scaleRGB;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
+                                 arg2[i][BCOMP] * arg3[i][BCOMP]) * scaleRGB;
+            }
+         }
+         else {
+            /* 2-term addition */
+            for (i = 0; i < n; i++) {
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP]) * scaleRGB;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP]) * scaleRGB;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP]) * scaleRGB;
+            }
+         }
+         break;
+      case GL_ADD_SIGNED:
+         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
+            /* (a * b) + (c * d) - 0.5 */
+            for (i = 0; i < n; i++) {
+               rgba[i][RCOMP] = (arg0[i][RCOMP] * arg1[i][RCOMP] +
+                                 arg2[i][RCOMP] * arg3[i][RCOMP] - 0.5F) * scaleRGB;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] * arg1[i][GCOMP] +
+                                 arg2[i][GCOMP] * arg3[i][GCOMP] - 0.5F) * scaleRGB;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] * arg1[i][BCOMP] +
+                                 arg2[i][BCOMP] * arg3[i][BCOMP] - 0.5F) * scaleRGB;
+            }
+         }
+         else {
+            for (i = 0; i < n; i++) {
+               rgba[i][RCOMP] = (arg0[i][RCOMP] + arg1[i][RCOMP] - 0.5F) * scaleRGB;
+               rgba[i][GCOMP] = (arg0[i][GCOMP] + arg1[i][GCOMP] - 0.5F) * scaleRGB;
+               rgba[i][BCOMP] = (arg0[i][BCOMP] + arg1[i][BCOMP] - 0.5F) * scaleRGB;
+            }
+         }
+         break;
+      case GL_INTERPOLATE:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = (arg0[i][RCOMP] * arg2[i][RCOMP] +
+                          arg1[i][RCOMP] * (1.0F - arg2[i][RCOMP])) * scaleRGB;
+            rgba[i][GCOMP] = (arg0[i][GCOMP] * arg2[i][GCOMP] +
+                          arg1[i][GCOMP] * (1.0F - arg2[i][GCOMP])) * scaleRGB;
+            rgba[i][BCOMP] = (arg0[i][BCOMP] * arg2[i][BCOMP] +
+                          arg1[i][BCOMP] * (1.0F - arg2[i][BCOMP])) * scaleRGB;
+         }
+         break;
+      case GL_SUBTRACT:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = (arg0[i][RCOMP] - arg1[i][RCOMP]) * scaleRGB;
+            rgba[i][GCOMP] = (arg0[i][GCOMP] - arg1[i][GCOMP]) * scaleRGB;
+            rgba[i][BCOMP] = (arg0[i][BCOMP] - arg1[i][BCOMP]) * scaleRGB;
+         }
+         break;
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+         /* Do not scale the result by 1 2 or 4 */
+         for (i = 0; i < n; i++) {
+            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
+                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
+                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
+               * 4.0F;
+            dot = CLAMP(dot, 0.0F, 1.0F);
+            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
+         }
+         break;
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+         /* DO scale the result by 1 2 or 4 */
+         for (i = 0; i < n; i++) {
+            GLfloat dot = ((arg0[i][RCOMP] - 0.5F) * (arg1[i][RCOMP] - 0.5F) +
+                           (arg0[i][GCOMP] - 0.5F) * (arg1[i][GCOMP] - 0.5F) +
+                           (arg0[i][BCOMP] - 0.5F) * (arg1[i][BCOMP] - 0.5F))
+               * 4.0F * scaleRGB;
+            dot = CLAMP(dot, 0.0F, 1.0F);
+            rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = dot;
+         }
+         break;
+      case GL_MODULATE_ADD_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
+                              arg1[i][RCOMP]) * scaleRGB;
+            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
+                              arg1[i][GCOMP]) * scaleRGB;
+            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
+                              arg1[i][BCOMP]) * scaleRGB;
+	 }
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) +
+                              arg1[i][RCOMP] - 0.5F) * scaleRGB;
+            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) +
+                              arg1[i][GCOMP] - 0.5F) * scaleRGB;
+            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) +
+                              arg1[i][BCOMP] - 0.5F) * scaleRGB;
+	 }
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][RCOMP] = ((arg0[i][RCOMP] * arg2[i][RCOMP]) -
+                              arg1[i][RCOMP]) * scaleRGB;
+            rgba[i][GCOMP] = ((arg0[i][GCOMP] * arg2[i][GCOMP]) -
+                              arg1[i][GCOMP]) * scaleRGB;
+            rgba[i][BCOMP] = ((arg0[i][BCOMP] * arg2[i][BCOMP]) -
+                              arg1[i][BCOMP]) * scaleRGB;
+	 }
+         break;
+      case GL_BUMP_ENVMAP_ATI:
+         /* this produces a fixed rgba color, and the coord calc is done elsewhere */
+         for (i = 0; i < n; i++) {
+            /* rgba result is 0,0,0,1 */
+            rgba[i][RCOMP] = 0.0;
+            rgba[i][GCOMP] = 0.0;
+            rgba[i][BCOMP] = 0.0;
+            rgba[i][ACOMP] = 1.0;
+	 }
+         goto end; /* no alpha processing */
+      default:
+         _mesa_problem(ctx, "invalid combine mode");
+      }
+   }
+
+   /* Alpha channel combine */
+   {
+      float4_array arg0 = argA[0];
+      float4_array arg1 = argA[1];
+      float4_array arg2 = argA[2];
+      float4_array arg3 = argA[3];
+
+      switch (combine->ModeA) {
+      case GL_REPLACE:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = arg0[i][ACOMP] * scaleA;
+         }
+         break;
+      case GL_MODULATE:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = arg0[i][ACOMP] * arg1[i][ACOMP] * scaleA;
+         }
+         break;
+      case GL_ADD:
+         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
+            /* (a * b) + (c * d) */
+            for (i = 0; i < n; i++) {
+               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
+                                 arg2[i][ACOMP] * arg3[i][ACOMP]) * scaleA;
+            }
+         }
+         else {
+            /* two-term add */
+            for (i = 0; i < n; i++) {
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP]) * scaleA;
+            }
+         }
+         break;
+      case GL_ADD_SIGNED:
+         if (textureUnit->EnvMode == GL_COMBINE4_NV) {
+            /* (a * b) + (c * d) - 0.5 */
+            for (i = 0; i < n; i++) {
+               rgba[i][ACOMP] = (arg0[i][ACOMP] * arg1[i][ACOMP] +
+                                 arg2[i][ACOMP] * arg3[i][ACOMP] -
+                                 0.5F) * scaleA;
+            }
+         }
+         else {
+            /* a + b - 0.5 */
+            for (i = 0; i < n; i++) {
+               rgba[i][ACOMP] = (arg0[i][ACOMP] + arg1[i][ACOMP] - 0.5F) * scaleA;
+            }
+         }
+         break;
+      case GL_INTERPOLATE:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = (arg0[i][ACOMP] * arg2[i][ACOMP] +
+                              arg1[i][ACOMP] * (1.0F - arg2[i][ACOMP]))
+               * scaleA;
+         }
+         break;
+      case GL_SUBTRACT:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = (arg0[i][ACOMP] - arg1[i][ACOMP]) * scaleA;
+         }
+         break;
+      case GL_MODULATE_ADD_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
+                              + arg1[i][ACOMP]) * scaleA;
+         }
+         break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP]) +
+                              arg1[i][ACOMP] - 0.5F) * scaleA;
+         }
+         break;
+      case GL_MODULATE_SUBTRACT_ATI:
+         for (i = 0; i < n; i++) {
+            rgba[i][ACOMP] = ((arg0[i][ACOMP] * arg2[i][ACOMP])
+                              - arg1[i][ACOMP]) * scaleA;
+         }
+         break;
+      default:
+         _mesa_problem(ctx, "invalid combine mode");
+      }
+   }
+
+   /* Fix the alpha component for GL_DOT3_RGBA_EXT/ARB combining.
+    * This is kind of a kludge.  It would have been better if the spec
+    * were written such that the GL_COMBINE_ALPHA value could be set to
+    * GL_DOT3.
+    */
+   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
+       combine->ModeRGB == GL_DOT3_RGBA) {
+      for (i = 0; i < n; i++) {
+	 rgba[i][ACOMP] = rgba[i][RCOMP];
+      }
+   }
+
+   for (i = 0; i < n; i++) {
+      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][RCOMP], rgba[i][RCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][GCOMP], rgba[i][GCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][BCOMP], rgba[i][BCOMP]);
+      UNCLAMPED_FLOAT_TO_CHAN(rgbaChan[i][ACOMP], rgba[i][ACOMP]);
+   }
+
+end:
+   for (i = 0; i < numArgsRGB || i < numArgsA; i++) {
+      free(ccolor[i]);
+   }
+   free(rgba);
+}
+
+
+/**
+ * Apply X/Y/Z/W/0/1 swizzle to an array of colors/texels.
+ * See GL_EXT_texture_swizzle.
+ */
+static void
+swizzle_texels(GLuint swizzle, GLuint count, float4_array texels)
+{
+   const GLuint swzR = GET_SWZ(swizzle, 0);
+   const GLuint swzG = GET_SWZ(swizzle, 1);
+   const GLuint swzB = GET_SWZ(swizzle, 2);
+   const GLuint swzA = GET_SWZ(swizzle, 3);
+   GLfloat vector[6];
+   GLuint i;
+
+   vector[SWIZZLE_ZERO] = 0;
+   vector[SWIZZLE_ONE] = 1.0F;
+
+   for (i = 0; i < count; i++) {
+      vector[SWIZZLE_X] = texels[i][0];
+      vector[SWIZZLE_Y] = texels[i][1];
+      vector[SWIZZLE_Z] = texels[i][2];
+      vector[SWIZZLE_W] = texels[i][3];
+      texels[i][RCOMP] = vector[swzR];
+      texels[i][GCOMP] = vector[swzG];
+      texels[i][BCOMP] = vector[swzB];
+      texels[i][ACOMP] = vector[swzA];
+   }
+}
+
+
+/**
+ * Apply texture mapping to a span of fragments.
+ */
+void
+_swrast_texture_span( struct gl_context *ctx, SWspan *span )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   float4_array primary_rgba;
+   GLuint unit;
+
+   primary_rgba = (float4_array) malloc(span->end * 4 * sizeof(GLfloat));
+
+   if (!primary_rgba) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture_span");
+      return;
+   }
+
+   ASSERT(span->end <= MAX_WIDTH);
+
+   /*
+    * Save copy of the incoming fragment colors (the GL_PRIMARY_COLOR)
+    */
+   if (swrast->_TextureCombinePrimary) {
+      GLuint i;
+      for (i = 0; i < span->end; i++) {
+         primary_rgba[i][RCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
+         primary_rgba[i][GCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
+         primary_rgba[i][BCOMP] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
+         primary_rgba[i][ACOMP] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
+      }
+   }
+
+   /* First must sample all bump maps */
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+      if (texUnit->_ReallyEnabled &&
+         texUnit->_CurrentCombine->ModeRGB == GL_BUMP_ENVMAP_ATI) {
+         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
+            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
+         float4_array targetcoords =
+            span->array->attribs[FRAG_ATTRIB_TEX0 +
+               ctx->Texture.Unit[unit].BumpTarget - GL_TEXTURE0];
+
+         const struct gl_texture_object *curObj = texUnit->_Current;
+         GLfloat *lambda = span->array->lambda[unit];
+         float4_array texels = get_texel_array(swrast, unit);
+         GLuint i;
+         GLfloat rotMatrix00 = ctx->Texture.Unit[unit].RotMatrix[0];
+         GLfloat rotMatrix01 = ctx->Texture.Unit[unit].RotMatrix[1];
+         GLfloat rotMatrix10 = ctx->Texture.Unit[unit].RotMatrix[2];
+         GLfloat rotMatrix11 = ctx->Texture.Unit[unit].RotMatrix[3];
+
+         /* adjust texture lod (lambda) */
+         if (span->arrayMask & SPAN_LAMBDA) {
+            if (texUnit->LodBias + curObj->LodBias != 0.0F) {
+               /* apply LOD bias, but don't clamp yet */
+               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->LodBias,
+                                          -ctx->Const.MaxTextureLodBias,
+                                          ctx->Const.MaxTextureLodBias);
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  lambda[i] += bias;
+               }
+            }
+
+            if (curObj->MinLod != -1000.0 || curObj->MaxLod != 1000.0) {
+               /* apply LOD clamping to lambda */
+               const GLfloat min = curObj->MinLod;
+               const GLfloat max = curObj->MaxLod;
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  GLfloat l = lambda[i];
+                  lambda[i] = CLAMP(l, min, max);
+               }
+            }
+         }
+
+         /* Sample the texture (span->end = number of fragments) */
+         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
+                                      texcoords, lambda, texels );
+
+         /* manipulate the span values of the bump target
+            not sure this can work correctly even ignoring
+            the problem that channel is unsigned */
+         for (i = 0; i < span->end; i++) {
+            targetcoords[i][0] += (texels[i][0] * rotMatrix00 + texels[i][1] *
+                                  rotMatrix01) / targetcoords[i][3];
+            targetcoords[i][1] += (texels[i][0] * rotMatrix10 + texels[i][1] *
+                                  rotMatrix11) / targetcoords[i][3];
+         }
+      }
+   }
+
+   /*
+    * Must do all texture sampling before combining in order to
+    * accomodate GL_ARB_texture_env_crossbar.
+    */
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+      if (texUnit->_ReallyEnabled &&
+          texUnit->_CurrentCombine->ModeRGB != GL_BUMP_ENVMAP_ATI) {
+         const GLfloat (*texcoords)[4] = (const GLfloat (*)[4])
+            span->array->attribs[FRAG_ATTRIB_TEX0 + unit];
+         const struct gl_texture_object *curObj = texUnit->_Current;
+         GLfloat *lambda = span->array->lambda[unit];
+         float4_array texels = get_texel_array(swrast, unit);
+
+         /* adjust texture lod (lambda) */
+         if (span->arrayMask & SPAN_LAMBDA) {
+            if (texUnit->LodBias + curObj->LodBias != 0.0F) {
+               /* apply LOD bias, but don't clamp yet */
+               const GLfloat bias = CLAMP(texUnit->LodBias + curObj->LodBias,
+                                          -ctx->Const.MaxTextureLodBias,
+                                          ctx->Const.MaxTextureLodBias);
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  lambda[i] += bias;
+               }
+            }
+
+            if (curObj->MinLod != -1000.0 || curObj->MaxLod != 1000.0) {
+               /* apply LOD clamping to lambda */
+               const GLfloat min = curObj->MinLod;
+               const GLfloat max = curObj->MaxLod;
+               GLuint i;
+               for (i = 0; i < span->end; i++) {
+                  GLfloat l = lambda[i];
+                  lambda[i] = CLAMP(l, min, max);
+               }
+            }
+         }
+
+         /* Sample the texture (span->end = number of fragments) */
+         swrast->TextureSample[unit]( ctx, texUnit->_Current, span->end,
+                                      texcoords, lambda, texels );
+
+         /* GL_EXT_texture_swizzle */
+         if (curObj->_Swizzle != SWIZZLE_NOOP) {
+            swizzle_texels(curObj->_Swizzle, span->end, texels);
+         }
+      }
+   }
+
+   /*
+    * OK, now apply the texture (aka texture combine/blend).
+    * We modify the span->color.rgba values.
+    */
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+         texture_combine( ctx, unit, span->end,
+                          primary_rgba,
+                          swrast->TexelBuffer,
+                          span->array->rgba );
+      }
+   }
+
+   free(primary_rgba);
+}
-- 
cgit v1.2.3