From 9273afeeb4499a0493f120b7525e17b6ae51113e Mon Sep 17 00:00:00 2001
From: marha <marha@users.sourceforge.net>
Date: Tue, 22 Feb 2011 13:55:31 +0000
Subject: xserver libX11 pixman mesa git update 22 Feb 2011

---
 libX11/modules/im/ximcp/imLcFlt.c                |  240 +-
 libX11/modules/im/ximcp/imLcIc.c                 |    2 +-
 libX11/modules/im/ximcp/imLcLkup.c               |   30 +-
 mesalib/src/glsl/ast_expr.cpp                    |    5 +-
 mesalib/src/glsl/ast_type.cpp                    |    3 -
 mesalib/src/glsl/glsl_parser_extras.h            |    2 +-
 mesalib/src/glsl/glsl_types.cpp                  |    2 +-
 mesalib/src/glsl/glsl_types.h                    |    4 +-
 mesalib/src/glsl/hir_field_selection.cpp         |    2 -
 mesalib/src/glsl/ir.h                            |    4 +-
 mesalib/src/glsl/ir_import_prototypes.cpp        |    1 -
 mesalib/src/glsl/ir_print_visitor.cpp            |    3 -
 mesalib/src/glsl/ir_validate.cpp                 |    3 -
 mesalib/src/glsl/link_functions.cpp              |    4 -
 mesalib/src/glsl/linker.cpp                      |    4 -
 mesalib/src/glsl/loop_controls.cpp               |    2 +-
 mesalib/src/glsl/lower_mat_op_to_vec.cpp         |    3 -
 mesalib/src/glsl/main.cpp                        |    2 -
 mesalib/src/glsl/opt_constant_propagation.cpp    |    2 -
 mesalib/src/glsl/opt_constant_variable.cpp       |    3 -
 mesalib/src/glsl/opt_dead_code.cpp               |    2 -
 mesalib/src/glsl/opt_dead_code_local.cpp         |    2 -
 mesalib/src/glsl/opt_dead_functions.cpp          |    2 -
 mesalib/src/glsl/opt_structure_splitting.cpp     |    3 -
 mesalib/src/glsl/opt_swizzle_swizzle.cpp         |    2 -
 mesalib/src/glsl/opt_tree_grafting.cpp           |    2 -
 mesalib/src/glsl/s_expression.cpp                |    3 -
 mesalib/src/mesa/main/imports.c                  | 2061 ++++++------
 mesalib/src/mesa/main/imports.h                  | 1217 +++----
 mesalib/src/mesa/program/sampler.cpp             |    1 -
 mesalib/src/mesa/state_tracker/st_atom_sampler.c |  498 ++-
 mesalib/src/mesa/state_tracker/st_cb_clear.c     |   22 +-
 mesalib/src/mesa/state_tracker/st_cb_texture.c   | 3796 +++++++++++-----------
 mesalib/src/mesa/state_tracker/st_draw.c         | 1510 ++++-----
 mesalib/src/mesa/state_tracker/st_format.c       |   59 +-
 mesalib/src/mesa/state_tracker/st_format.h       |  166 +-
 mesalib/src/mesa/state_tracker/st_gen_mipmap.c   |   14 +-
 mesalib/src/mesa/vbo/vbo_exec.h                  |  397 ++-
 mesalib/src/mesa/vbo/vbo_exec_api.c              | 2187 +++++++------
 mesalib/src/mesa/vbo/vbo_exec_draw.c             |   25 +-
 pixman/configure.ac                              |    2 +-
 pixman/pixman/pixman-sse2.c                      | 1601 ++++-----
 xorg-server/dix/eventconvert.c                   | 1480 ++++-----
 xorg-server/test/input.c                         |  139 +
 44 files changed, 7645 insertions(+), 7867 deletions(-)

diff --git a/libX11/modules/im/ximcp/imLcFlt.c b/libX11/modules/im/ximcp/imLcFlt.c
index 014ad6add..06aa9980a 100644
--- a/libX11/modules/im/ximcp/imLcFlt.c
+++ b/libX11/modules/im/ximcp/imLcFlt.c
@@ -1,115 +1,125 @@
-/******************************************************************
-
-              Copyright 1992 by Fuji Xerox Co., Ltd.
-              Copyright 1992, 1994 by FUJITSU LIMITED
-
-Permission to use, copy, modify, distribute, and sell this software
-and its documentation for any purpose is hereby granted without fee,
-provided that the above copyright notice appear in all copies and
-that both that copyright notice and this permission notice appear
-in supporting documentation, and that the name of Fuji Xerox,
-FUJITSU LIMITED not be used in advertising or publicity pertaining
-to distribution of the software without specific, written prior
-permission. Fuji Xerox, FUJITSU LIMITED make no representations
-about the suitability of this software for any purpose.
-It is provided "as is" without express or implied warranty.
-
-FUJI XEROX, FUJITSU LIMITED DISCLAIM ALL WARRANTIES WITH
-REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL FUJI XEROX,
-FUJITSU LIMITED BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
-DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
-OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
-TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
-PERFORMANCE OF THIS SOFTWARE.
-
-  Author   : Kazunori Nishihara	Fuji Xerox
-  Modifier : Takashi Fujiwara   FUJITSU LIMITED
-                                fujiwara@a80.tech.yk.fujitsu.co.jp
-
-******************************************************************/
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-#include "Xlibint.h"
-#include <X11/keysym.h>
-#include "Xlcint.h"
-#include "Ximint.h"
-
-Bool
-_XimLocalFilter(Display *d, Window w, XEvent *ev, XPointer client_data)
-{
-    Xic		 ic = (Xic)client_data;
-    KeySym	 keysym;
-    static char	 buf[256];
-    DefTree	*b = ic->private.local.base.tree;
-    DTIndex	 t;
-
-    if(ev->xkey.keycode == 0)
-	return (False);
-
-    XLookupString((XKeyEvent *)ev, buf, sizeof(buf), &keysym, NULL);
-
-    if(IsModifierKey(keysym))
-	return (False);
-
-    if(keysym >= XK_braille_dot_1 && keysym <= XK_braille_dot_8) {
-	if(ev->type == KeyPress) {
-	    ic->private.local.brl_pressed |=
-		1<<(keysym-XK_braille_dot_1);
-	} else {
-	    if(!ic->private.local.brl_committing
-		    || ev->xkey.time - ic->private.local.brl_release_start > 300) {
-	    	ic->private.local.brl_committing = ic->private.local.brl_pressed;
-		ic->private.local.brl_release_start = ev->xkey.time;
-	    }
-	    ic->private.local.brl_pressed &= ~(1<<(keysym-XK_braille_dot_1));
-	    if(!ic->private.local.brl_pressed) {
-		if(ic->private.local.brl_committing) {
-		    ic->private.local.brl_committed =
-			ic->private.local.brl_committing;
-		    ic->private.local.composed = 0;
-		    ev->type = KeyPress;
-		    ev->xkey.keycode = 0;
-		    _XPutBackEvent(d, ev);
-		}
-	    }
-	}
-	return(True);
-    }
-
-    if(   (ev->type != KeyPress)
-       || (((Xim)ic->core.im)->private.local.top == 0 ) )
-	return(False);
-
-    for(t = ic->private.local.context; t; t = b[t].next) {
-	if(((ev->xkey.state & b[t].modifier_mask) == b[t].modifier) &&
-	   (keysym == b[t].keysym))
-	    break;
-    }
-
-    if(t) { /* Matched */
-	if(b[t].succession) { /* Intermediate */
-	    ic->private.local.context = b[t].succession;
-	    return(True);
-	} else { /* Terminate (reached to leaf) */
-	    ic->private.local.composed = t;
-	    ic->private.local.brl_committed = 0;
-	    /* return back to client KeyPressEvent keycode == 0 */
-	    ev->xkey.keycode = 0;
-	    XPutBackEvent(d, ev);
-	    /* initialize internal state for next key sequence */
-	    ic->private.local.context = ((Xim)ic->core.im)->private.local.top;
-	    return(True);
-	}
-    } else { /* Unmatched */
-	if(ic->private.local.context == ((Xim)ic->core.im)->private.local.top) {
-	    return(False);
-	}
-	/* Error (Sequence Unmatch occured) */
-	/* initialize internal state for next key sequence */
-	ic->private.local.context = ((Xim)ic->core.im)->private.local.top;
-	return(True);
-    }
-}
+/******************************************************************
+
+              Copyright 1992 by Fuji Xerox Co., Ltd.
+              Copyright 1992, 1994 by FUJITSU LIMITED
+
+Permission to use, copy, modify, distribute, and sell this software
+and its documentation for any purpose is hereby granted without fee,
+provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear
+in supporting documentation, and that the name of Fuji Xerox,
+FUJITSU LIMITED not be used in advertising or publicity pertaining
+to distribution of the software without specific, written prior
+permission. Fuji Xerox, FUJITSU LIMITED make no representations
+about the suitability of this software for any purpose.
+It is provided "as is" without express or implied warranty.
+
+FUJI XEROX, FUJITSU LIMITED DISCLAIM ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL FUJI XEROX,
+FUJITSU LIMITED BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA
+OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+  Author   : Kazunori Nishihara	Fuji Xerox
+  Modifier : Takashi Fujiwara   FUJITSU LIMITED
+                                fujiwara@a80.tech.yk.fujitsu.co.jp
+
+******************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include "Xlibint.h"
+#include <X11/keysym.h>
+#include "Xlcint.h"
+#include "Ximint.h"
+
+Bool
+_XimLocalFilter(Display *d, Window w, XEvent *ev, XPointer client_data)
+{
+    Xic		 ic = (Xic)client_data;
+    KeySym	 keysym;
+    static char	 buf[256];
+    DefTree	*b = ic->private.local.base.tree;
+    DTIndex	 t;
+    Bool	 braille = False;
+
+    if(ev->xkey.keycode == 0)
+	return (False);
+
+    XLookupString((XKeyEvent *)ev, buf, sizeof(buf), &keysym, NULL);
+
+    if(IsModifierKey(keysym))
+	return (False);
+
+    if(keysym >= XK_braille_dot_1 && keysym <= XK_braille_dot_8) {
+	if(ev->type == KeyPress) {
+	    ic->private.local.brl_pressed |=
+		1<<(keysym-XK_braille_dot_1);
+	    return(True);
+	} else {
+	    if(!ic->private.local.brl_committing
+		    || ev->xkey.time - ic->private.local.brl_release_start > 300) {
+	    	ic->private.local.brl_committing = ic->private.local.brl_pressed;
+		ic->private.local.brl_release_start = ev->xkey.time;
+	    }
+	    ic->private.local.brl_pressed &= ~(1<<(keysym-XK_braille_dot_1));
+	    if(!ic->private.local.brl_pressed && ic->private.local.brl_committing) {
+		/* Commited a braille pattern, let it go through compose tree */
+		keysym = XK_braille_blank | ic->private.local.brl_committing;
+		ev->type = KeyPress;
+		braille = True;
+	    } else {
+	        return(True);
+	    }
+	}
+    }
+
+    if(   (ev->type != KeyPress)
+       || (((Xim)ic->core.im)->private.local.top == 0 ) )
+	goto emit_braille;
+
+    for(t = ic->private.local.context; t; t = b[t].next) {
+	if(((ev->xkey.state & b[t].modifier_mask) == b[t].modifier) &&
+	   (keysym == b[t].keysym))
+	    break;
+    }
+
+    if(t) { /* Matched */
+	if(b[t].succession) { /* Intermediate */
+	    ic->private.local.context = b[t].succession;
+	    return(True);
+	} else { /* Terminate (reached to leaf) */
+	    ic->private.local.composed = t;
+	    ic->private.local.brl_committed = 0;
+	    /* return back to client KeyPressEvent keycode == 0 */
+	    ev->xkey.keycode = 0;
+	    XPutBackEvent(d, ev);
+	    /* initialize internal state for next key sequence */
+	    ic->private.local.context = ((Xim)ic->core.im)->private.local.top;
+	    return(True);
+	}
+    } else { /* Unmatched */
+	if(ic->private.local.context == ((Xim)ic->core.im)->private.local.top) {
+	    goto emit_braille;
+	}
+	/* Error (Sequence Unmatch occured) */
+	/* initialize internal state for next key sequence */
+	ic->private.local.context = ((Xim)ic->core.im)->private.local.top;
+	return(True);
+    }
+
+emit_braille:
+    if(braille) {
+	/* Braille pattern is not in compose tree, emit alone */
+	ic->private.local.brl_committed = ic->private.local.brl_committing;
+	ic->private.local.composed = 0;
+	ev->xkey.keycode = 0;
+	_XPutBackEvent(d, ev);
+	return(True);
+    }
+    return(False);
+}
diff --git a/libX11/modules/im/ximcp/imLcIc.c b/libX11/modules/im/ximcp/imLcIc.c
index 53d53efec..c0728084f 100644
--- a/libX11/modules/im/ximcp/imLcIc.c
+++ b/libX11/modules/im/ximcp/imLcIc.c
@@ -180,7 +180,7 @@ _XimLocalCreateIC(
 			values, XIM_CREATEIC, True)) {
 	goto Set_Error;
     }
-    ic_values.filter_events = KeyPressMask;
+    ic_values.filter_events = KeyPressMask | KeyReleaseMask;
     _XimSetCurrentICValues(ic, &ic_values);
     if(_XimSetICDefaults(ic, (XPointer)&ic_values,
 				XIM_SETICDEFAULTS, res, num) == False) {
diff --git a/libX11/modules/im/ximcp/imLcLkup.c b/libX11/modules/im/ximcp/imLcLkup.c
index 4891176cc..8e4111a6a 100644
--- a/libX11/modules/im/ximcp/imLcLkup.c
+++ b/libX11/modules/im/ximcp/imLcLkup.c
@@ -63,20 +63,25 @@ _XimLocalMbLookupString(XIC xic, XKeyEvent *ev, char *buffer, int bytes,
 	    unsigned char pattern = ic->private.local.brl_committed;
 	    char mb[XLC_PUBLIC(ic->core.im->core.lcd, mb_cur_max)];
 	    ret = _Xlcwctomb(ic->core.im->core.lcd, mb, BRL_UC_ROW | pattern);
-	    if(ret < 0) {
-		if(status) *status = XLookupNone;
-		return(0);
-	    }
 	    if(ret > bytes) {
 		if(status) *status = XBufferOverflow;
 		return(ret);
 	    }
-	    if(keysym) {
-		*keysym = XK_braille_blank | pattern;
-		if(status) *status = XLookupBoth;
-	    } else
-		if(status) *status = XLookupChars;
-	    memcpy(buffer, mb, ret);
+	    if(keysym) *keysym = XK_braille_blank | pattern;
+	    if(ret > 0) {
+		if (keysym) {
+		    if(status) *status = XLookupBoth;
+		} else {
+		    if(status) *status = XLookupChars;
+		}
+		memcpy(buffer, mb, ret);
+	    } else {
+		if(keysym) {
+		    if(status) *status = XLookupKeySym;
+		} else {
+		    if(status) *status = XLookupNone;
+		}
+	    }
 	} else { /* Composed Event */
 	    ret = strlen(&mb[b[ic->private.local.composed].mb]);
 	    if(ret > bytes) {
@@ -217,6 +222,11 @@ _XimLocalUtf8LookupString(XIC xic, XKeyEvent *ev, char *buffer, int bytes,
 	    buffer[0] = 0xe0 | ((BRL_UC_ROW >> 12) & 0x0f);
 	    buffer[1] = 0x80 | ((BRL_UC_ROW >> 8) & 0x30) | (pattern >> 6);
 	    buffer[2] = 0x80 | (pattern & 0x3f);
+	    if(keysym) {
+		*keysym = XK_braille_blank | pattern;
+		if(status) *status = XLookupBoth;
+	    } else
+		if(status) *status = XLookupChars;
 	} else { /* Composed Event */
 	    ret = strlen(&utf8[b[ic->private.local.composed].utf8]);
 	    if(ret > bytes) {
diff --git a/mesalib/src/glsl/ast_expr.cpp b/mesalib/src/glsl/ast_expr.cpp
index 974beb9f6..e624d11cf 100644
--- a/mesalib/src/glsl/ast_expr.cpp
+++ b/mesalib/src/glsl/ast_expr.cpp
@@ -20,12 +20,9 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <cstdio>
-#include <cassert>
+#include <assert.h>
 #include "ast.h"
 
-using std::printf;
-
 const char *
 ast_expression::operator_string(enum ast_operators op)
 {
diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp
index 5ddfeec87..c680ae5f6 100644
--- a/mesalib/src/glsl/ast_type.cpp
+++ b/mesalib/src/glsl/ast_type.cpp
@@ -21,14 +21,11 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <cstdio>
 #include "ast.h"
 extern "C" {
 #include "program/symbol_table.h"
 }
 
-using std::printf;
-
 void
 ast_type_specifier::print(void) const
 {
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 8d4fca787..10cb673c6 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -31,7 +31,7 @@
 #ifdef __cplusplus
 
 
-#include <cstdlib>
+#include <stdlib.h>
 #include "glsl_symbol_table.h"
 
 enum _mesa_glsl_parser_targets {
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index f4d9242b2..76b4f3e4c 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -21,7 +21,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <cstdio>
+#include <stdio.h>
 #include <stdlib.h>
 #include "main/core.h" /* for Elements */
 #include "glsl_symbol_table.h"
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index ab6858faa..61bf5e0cf 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -26,8 +26,8 @@
 #ifndef GLSL_TYPES_H
 #define GLSL_TYPES_H
 
-#include <cstring>
-#include <cassert>
+#include <string.h>
+#include <assert.h>
 
 extern "C" {
 #include "GL/gl.h"
diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp
index 995f284fa..3c33127b5 100644
--- a/mesalib/src/glsl/hir_field_selection.cpp
+++ b/mesalib/src/glsl/hir_field_selection.cpp
@@ -27,8 +27,6 @@
 #include "ast.h"
 #include "glsl_types.h"
 
-using std::strcmp;
-
 ir_rvalue *
 _mesa_ast_field_selection_to_hir(const ast_expression *expr,
 				 exec_list *instructions,
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index 74a8b06b1..f2f902c0a 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -26,8 +26,8 @@
 #ifndef IR_H
 #define IR_H
 
-#include <cstdio>
-#include <cstdlib>
+#include <stdio.h>
+#include <stdlib.h>
 
 #include "ralloc.h"
 #include "glsl_types.h"
diff --git a/mesalib/src/glsl/ir_import_prototypes.cpp b/mesalib/src/glsl/ir_import_prototypes.cpp
index be5e0c1d3..3585bf6b2 100644
--- a/mesalib/src/glsl/ir_import_prototypes.cpp
+++ b/mesalib/src/glsl/ir_import_prototypes.cpp
@@ -27,7 +27,6 @@
  *
  * \author Ian Romanick
  */
-#include <cstdio>
 #include "ir.h"
 #include "glsl_symbol_table.h"
 
diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp
index be76945a2..82ccc722f 100644
--- a/mesalib/src/glsl/ir_print_visitor.cpp
+++ b/mesalib/src/glsl/ir_print_visitor.cpp
@@ -25,9 +25,6 @@
 #include "glsl_types.h"
 #include "glsl_parser_extras.h"
 
-using std::printf;
-using std::strncmp;
-
 static void print_type(const glsl_type *t);
 
 void
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index b0dd6c21f..44d7549ea 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -39,9 +39,6 @@
 #include "program/hash_table.h"
 #include "glsl_types.h"
 
-using std::abort;
-using std::printf;
-
 class ir_validate : public ir_hierarchical_visitor {
 public:
    ir_validate()
diff --git a/mesalib/src/glsl/link_functions.cpp b/mesalib/src/glsl/link_functions.cpp
index 861fa39b5..5851c14d6 100644
--- a/mesalib/src/glsl/link_functions.cpp
+++ b/mesalib/src/glsl/link_functions.cpp
@@ -21,10 +21,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <cstdlib>
-#include <cstdio>
-#include <cstdarg>
-
 #include "main/core.h"
 #include "glsl_symbol_table.h"
 #include "glsl_parser_extras.h"
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 6c003bb02..7db5c5e8d 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -63,10 +63,6 @@
  *
  * \author Ian Romanick <ian.d.romanick@intel.com>
  */
-#include <cstdlib>
-#include <cstdio>
-#include <cstdarg>
-#include <climits>
 
 #include "main/core.h"
 #include "glsl_symbol_table.h"
diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp
index 9eaa50f22..9acbadc50 100644
--- a/mesalib/src/glsl/loop_controls.cpp
+++ b/mesalib/src/glsl/loop_controls.cpp
@@ -21,7 +21,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <climits>
+#include <limits.h>
 #include "main/compiler.h"
 #include "glsl_types.h"
 #include "loop_analysis.h"
diff --git a/mesalib/src/glsl/lower_mat_op_to_vec.cpp b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
index bdc53a1f8..8cbbfa713 100644
--- a/mesalib/src/glsl/lower_mat_op_to_vec.cpp
+++ b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
@@ -35,9 +35,6 @@
 #include "ir_expression_flattening.h"
 #include "glsl_types.h"
 
-using std::abort;
-using std::printf;
-
 class ir_mat_op_to_vec_visitor : public ir_hierarchical_visitor {
 public:
    ir_mat_op_to_vec_visitor()
diff --git a/mesalib/src/glsl/main.cpp b/mesalib/src/glsl/main.cpp
index 6f0552931..096da93dc 100644
--- a/mesalib/src/glsl/main.cpp
+++ b/mesalib/src/glsl/main.cpp
@@ -20,8 +20,6 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <cstdlib>
-#include <cstdio>
 #include <getopt.h>
 
 #include "ast.h"
diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp
index 814f6e61f..4425f4211 100644
--- a/mesalib/src/glsl/opt_constant_propagation.cpp
+++ b/mesalib/src/glsl/opt_constant_propagation.cpp
@@ -41,8 +41,6 @@
 #include "ir_optimization.h"
 #include "glsl_types.h"
 
-using std::memset;
-
 class acp_entry : public exec_node
 {
 public:
diff --git a/mesalib/src/glsl/opt_constant_variable.cpp b/mesalib/src/glsl/opt_constant_variable.cpp
index 9dca0ff8e..3fa7c3bad 100644
--- a/mesalib/src/glsl/opt_constant_variable.cpp
+++ b/mesalib/src/glsl/opt_constant_variable.cpp
@@ -37,9 +37,6 @@
 #include "ir_optimization.h"
 #include "glsl_types.h"
 
-using std::calloc;
-using std::free;
-
 struct assignment_entry {
    exec_node link;
    int assignment_count;
diff --git a/mesalib/src/glsl/opt_dead_code.cpp b/mesalib/src/glsl/opt_dead_code.cpp
index f47b9613e..cb500d2d1 100644
--- a/mesalib/src/glsl/opt_dead_code.cpp
+++ b/mesalib/src/glsl/opt_dead_code.cpp
@@ -32,8 +32,6 @@
 #include "ir_variable_refcount.h"
 #include "glsl_types.h"
 
-using std::printf;
-
 static bool debug = false;
 
 /**
diff --git a/mesalib/src/glsl/opt_dead_code_local.cpp b/mesalib/src/glsl/opt_dead_code_local.cpp
index f67b46337..39962bd60 100644
--- a/mesalib/src/glsl/opt_dead_code_local.cpp
+++ b/mesalib/src/glsl/opt_dead_code_local.cpp
@@ -38,8 +38,6 @@
 #include "ir_optimization.h"
 #include "glsl_types.h"
 
-using std::printf;
-
 static bool debug = false;
 
 class assignment_entry : public exec_node
diff --git a/mesalib/src/glsl/opt_dead_functions.cpp b/mesalib/src/glsl/opt_dead_functions.cpp
index d72eb6115..ceb79080a 100644
--- a/mesalib/src/glsl/opt_dead_functions.cpp
+++ b/mesalib/src/glsl/opt_dead_functions.cpp
@@ -32,8 +32,6 @@
  #include "ir_expression_flattening.h"
  #include "glsl_types.h"
 
- using std::strcmp;
-
  class signature_entry : public exec_node
  {
  public:
diff --git a/mesalib/src/glsl/opt_structure_splitting.cpp b/mesalib/src/glsl/opt_structure_splitting.cpp
index 8686da06a..014407c0b 100644
--- a/mesalib/src/glsl/opt_structure_splitting.cpp
+++ b/mesalib/src/glsl/opt_structure_splitting.cpp
@@ -38,9 +38,6 @@
 #include "ir_rvalue_visitor.h"
 #include "glsl_types.h"
 
-using std::printf;
-using std::strcmp;
-
 static bool debug = false;
 
 // XXX using variable_entry2 here to avoid collision (MSVC multiply-defined
diff --git a/mesalib/src/glsl/opt_swizzle_swizzle.cpp b/mesalib/src/glsl/opt_swizzle_swizzle.cpp
index 8d0e1051d..bc442fa86 100644
--- a/mesalib/src/glsl/opt_swizzle_swizzle.cpp
+++ b/mesalib/src/glsl/opt_swizzle_swizzle.cpp
@@ -32,8 +32,6 @@
 #include "ir_optimization.h"
 #include "glsl_types.h"
 
-using std::memset;
-
 class ir_swizzle_swizzle_visitor : public ir_hierarchical_visitor {
 public:
    ir_swizzle_swizzle_visitor()
diff --git a/mesalib/src/glsl/opt_tree_grafting.cpp b/mesalib/src/glsl/opt_tree_grafting.cpp
index a85ba8234..1ef940f9c 100644
--- a/mesalib/src/glsl/opt_tree_grafting.cpp
+++ b/mesalib/src/glsl/opt_tree_grafting.cpp
@@ -54,8 +54,6 @@
 #include "ir_optimization.h"
 #include "glsl_types.h"
 
-using std::printf;
-
 static bool debug = false;
 
 class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp
index 77ac08ac7..a922a50d3 100644
--- a/mesalib/src/glsl/s_expression.cpp
+++ b/mesalib/src/glsl/s_expression.cpp
@@ -22,9 +22,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
 #include <assert.h>
 #include "s_expression.h"
 
diff --git a/mesalib/src/mesa/main/imports.c b/mesalib/src/mesa/main/imports.c
index f98098230..bf89815f2 100644
--- a/mesalib/src/mesa/main/imports.c
+++ b/mesalib/src/mesa/main/imports.c
@@ -1,1033 +1,1028 @@
-/**
- * \file imports.c
- * Standard C library function wrappers.
- * 
- * Imports are services which the device driver or window system or
- * operating system provides to the core renderer.  The core renderer (Mesa)
- * will call these functions in order to do memory allocation, simple I/O,
- * etc.
- *
- * Some drivers will want to override/replace this file with something
- * specialized, but that'll be rare.
- *
- * Eventually, I want to move roll the glheader.h file into this.
- *
- * \todo Functions still needed:
- * - scanf
- * - qsort
- * - rand and RAND_MAX
- */
-
-/*
- * Mesa 3-D graphics library
- * Version:  7.1
- *
- * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-
-#include "imports.h"
-#include "context.h"
-#include "mtypes.h"
-#include "version.h"
-
-#ifdef _GNU_SOURCE
-#include <locale.h>
-#ifdef __APPLE__
-#include <xlocale.h>
-#endif
-#endif
-
-
-#define MAXSTRING 4000  /* for vsnprintf() */
-
-#ifdef WIN32
-#define vsnprintf _vsnprintf
-#elif defined(__IBMC__) || defined(__IBMCPP__) || ( defined(__VMS) && __CRTL_VER < 70312000 )
-extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg);
-#ifdef __VMS
-#include "vsnprintf.c"
-#endif
-#endif
-
-/**********************************************************************/
-/** \name Memory */
-/*@{*/
-
-/**
- * Allocate aligned memory.
- *
- * \param bytes number of bytes to allocate.
- * \param alignment alignment (must be greater than zero).
- * 
- * Allocates extra memory to accommodate rounding up the address for
- * alignment and to record the real malloc address.
- *
- * \sa _mesa_align_free().
- */
-void *
-_mesa_align_malloc(size_t bytes, unsigned long alignment)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-   void *mem;
-   int err = posix_memalign(& mem, alignment, bytes);
-   if (err)
-      return NULL;
-   return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
-   return _aligned_malloc(bytes, alignment);
-#else
-   uintptr_t ptr, buf;
-
-   ASSERT( alignment > 0 );
-
-   ptr = (uintptr_t) malloc(bytes + alignment + sizeof(void *));
-   if (!ptr)
-      return NULL;
-
-   buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
-   *(uintptr_t *)(buf - sizeof(void *)) = ptr;
-
-#ifdef DEBUG
-   /* mark the non-aligned area */
-   while ( ptr < buf - sizeof(void *) ) {
-      *(unsigned long *)ptr = 0xcdcdcdcd;
-      ptr += sizeof(unsigned long);
-   }
-#endif
-
-   return (void *) buf;
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-/**
- * Same as _mesa_align_malloc(), but using calloc(1, ) instead of
- * malloc()
- */
-void *
-_mesa_align_calloc(size_t bytes, unsigned long alignment)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-   void *mem;
-   
-   mem = _mesa_align_malloc(bytes, alignment);
-   if (mem != NULL) {
-      (void) memset(mem, 0, bytes);
-   }
-
-   return mem;
-#elif defined(_WIN32) && defined(_MSC_VER)
-   void *mem;
-
-   mem = _aligned_malloc(bytes, alignment);
-   if (mem != NULL) {
-      (void) memset(mem, 0, bytes);
-   }
-
-   return mem;
-#else
-   uintptr_t ptr, buf;
-
-   ASSERT( alignment > 0 );
-
-   ptr = (uintptr_t) calloc(1, bytes + alignment + sizeof(void *));
-   if (!ptr)
-      return NULL;
-
-   buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
-   *(uintptr_t *)(buf - sizeof(void *)) = ptr;
-
-#ifdef DEBUG
-   /* mark the non-aligned area */
-   while ( ptr < buf - sizeof(void *) ) {
-      *(unsigned long *)ptr = 0xcdcdcdcd;
-      ptr += sizeof(unsigned long);
-   }
-#endif
-
-   return (void *)buf;
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-/**
- * Free memory which was allocated with either _mesa_align_malloc()
- * or _mesa_align_calloc().
- * \param ptr pointer to the memory to be freed.
- * The actual address to free is stored in the word immediately before the
- * address the client sees.
- */
-void
-_mesa_align_free(void *ptr)
-{
-#if defined(HAVE_POSIX_MEMALIGN)
-   free(ptr);
-#elif defined(_WIN32) && defined(_MSC_VER)
-   _aligned_free(ptr);
-#else
-   void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
-   void *realAddr = *cubbyHole;
-   free(realAddr);
-#endif /* defined(HAVE_POSIX_MEMALIGN) */
-}
-
-/**
- * Reallocate memory, with alignment.
- */
-void *
-_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
-                    unsigned long alignment)
-{
-#if defined(_WIN32) && defined(_MSC_VER)
-   (void) oldSize;
-   return _aligned_realloc(oldBuffer, newSize, alignment);
-#else
-   const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
-   void *newBuf = _mesa_align_malloc(newSize, alignment);
-   if (newBuf && oldBuffer && copySize > 0) {
-      memcpy(newBuf, oldBuffer, copySize);
-   }
-   if (oldBuffer)
-      _mesa_align_free(oldBuffer);
-   return newBuf;
-#endif
-}
-
-
-
-/** Reallocate memory */
-void *
-_mesa_realloc(void *oldBuffer, size_t oldSize, size_t newSize)
-{
-   const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
-   void *newBuffer = malloc(newSize);
-   if (newBuffer && oldBuffer && copySize > 0)
-      memcpy(newBuffer, oldBuffer, copySize);
-   if (oldBuffer)
-      free(oldBuffer);
-   return newBuffer;
-}
-
-/**
- * Fill memory with a constant 16bit word.
- * \param dst destination pointer.
- * \param val value.
- * \param n number of words.
- */
-void
-_mesa_memset16( unsigned short *dst, unsigned short val, size_t n )
-{
-   while (n-- > 0)
-      *dst++ = val;
-}
-
-/*@}*/
-
-
-/**********************************************************************/
-/** \name Math */
-/*@{*/
-
-/** Wrapper around sqrt() */
-double
-_mesa_sqrtd(double x)
-{
-   return sqrt(x);
-}
-
-
-/*
- * A High Speed, Low Precision Square Root
- * by Paul Lalonde and Robert Dawson
- * from "Graphics Gems", Academic Press, 1990
- *
- * SPARC implementation of a fast square root by table
- * lookup.
- * SPARC floating point format is as follows:
- *
- * BIT 31 	30 	23 	22 	0
- *     sign	exponent	mantissa
- */
-static short sqrttab[0x100];    /* declare table of square roots */
-
-void
-_mesa_init_sqrt_table(void)
-{
-#if defined(USE_IEEE) && !defined(DEBUG)
-   unsigned short i;
-   fi_type fi;     /* to access the bits of a float in  C quickly  */
-                   /* we use a union defined in glheader.h         */
-
-   for(i=0; i<= 0x7f; i++) {
-      fi.i = 0;
-
-      /*
-       * Build a float with the bit pattern i as mantissa
-       * and an exponent of 0, stored as 127
-       */
-
-      fi.i = (i << 16) | (127 << 23);
-      fi.f = _mesa_sqrtd(fi.f);
-
-      /*
-       * Take the square root then strip the first 7 bits of
-       * the mantissa into the table
-       */
-
-      sqrttab[i] = (fi.i & 0x7fffff) >> 16;
-
-      /*
-       * Repeat the process, this time with an exponent of
-       * 1, stored as 128
-       */
-
-      fi.i = 0;
-      fi.i = (i << 16) | (128 << 23);
-      fi.f = sqrt(fi.f);
-      sqrttab[i+0x80] = (fi.i & 0x7fffff) >> 16;
-   }
-#else
-   (void) sqrttab;  /* silence compiler warnings */
-#endif /*HAVE_FAST_MATH*/
-}
-
-
-/**
- * Single precision square root.
- */
-float
-_mesa_sqrtf( float x )
-{
-#if defined(USE_IEEE) && !defined(DEBUG)
-   fi_type num;
-                                /* to access the bits of a float in C
-                                 * we use a union from glheader.h     */
-
-   short e;                     /* the exponent */
-   if (x == 0.0F) return 0.0F;  /* check for square root of 0 */
-   num.f = x;
-   e = (num.i >> 23) - 127;     /* get the exponent - on a SPARC the */
-                                /* exponent is stored with 127 added */
-   num.i &= 0x7fffff;           /* leave only the mantissa */
-   if (e & 0x01) num.i |= 0x800000;
-                                /* the exponent is odd so we have to */
-                                /* look it up in the second half of  */
-                                /* the lookup table, so we set the   */
-                                /* high bit                                */
-   e >>= 1;                     /* divide the exponent by two */
-                                /* note that in C the shift */
-                                /* operators are sign preserving */
-                                /* for signed operands */
-   /* Do the table lookup, based on the quaternary mantissa,
-    * then reconstruct the result back into a float
-    */
-   num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23);
-
-   return num.f;
-#else
-   return (float) _mesa_sqrtd((double) x);
-#endif
-}
-
-
-/**
- inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
- written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
- and Vesa Karvonen.
-*/
-float
-_mesa_inv_sqrtf(float n)
-{
-#if defined(USE_IEEE) && !defined(DEBUG)
-        float r0, x0, y0;
-        float r1, x1, y1;
-        float r2, x2, y2;
-#if 0 /* not used, see below -BP */
-        float r3, x3, y3;
-#endif
-        fi_type u;
-        unsigned int magic;
-
-        /*
-         Exponent part of the magic number -
-
-         We want to:
-         1. subtract the bias from the exponent,
-         2. negate it
-         3. divide by two (rounding towards -inf)
-         4. add the bias back
-
-         Which is the same as subtracting the exponent from 381 and dividing
-         by 2.
-
-         floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
-        */
-
-        magic = 381 << 23;
-
-        /*
-         Significand part of magic number -
-
-         With the current magic number, "(magic - u.i) >> 1" will give you:
-
-         for 1 <= u.f <= 2: 1.25 - u.f / 4
-         for 2 <= u.f <= 4: 1.00 - u.f / 8
-
-         This isn't a bad approximation of 1/sqrt.  The maximum difference from
-         1/sqrt will be around .06.  After three Newton-Raphson iterations, the
-         maximum difference is less than 4.5e-8.  (Which is actually close
-         enough to make the following bias academic...)
-
-         To get a better approximation you can add a bias to the magic
-         number.  For example, if you subtract 1/2 of the maximum difference in
-         the first approximation (.03), you will get the following function:
-
-         for 1 <= u.f <= 2:    1.22 - u.f / 4
-         for 2 <= u.f <= 3.76: 0.97 - u.f / 8
-         for 3.76 <= u.f <= 4: 0.72 - u.f / 16
-         (The 3.76 to 4 range is where the result is < .5.)
-
-         This is the closest possible initial approximation, but with a maximum
-         error of 8e-11 after three NR iterations, it is still not perfect.  If
-         you subtract 0.0332281 instead of .03, the maximum error will be
-         2.5e-11 after three NR iterations, which should be about as close as
-         is possible.
-
-         for 1 <= u.f <= 2:    1.2167719 - u.f / 4
-         for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
-         for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
-
-        */
-
-        magic -= (int)(0.0332281 * (1 << 25));
-
-        u.f = n;
-        u.i = (magic - u.i) >> 1;
-
-        /*
-         Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
-         allows more parallelism.  From what I understand, the parallelism
-         comes at the cost of less precision, because it lets error
-         accumulate across iterations.
-        */
-        x0 = 1.0f;
-        y0 = 0.5f * n;
-        r0 = u.f;
-
-        x1 = x0 * r0;
-        y1 = y0 * r0 * r0;
-        r1 = 1.5f - y1;
-
-        x2 = x1 * r1;
-        y2 = y1 * r1 * r1;
-        r2 = 1.5f - y2;
-
-#if 1
-        return x2 * r2;  /* we can stop here, and be conformant -BP */
-#else
-        x3 = x2 * r2;
-        y3 = y2 * r2 * r2;
-        r3 = 1.5f - y3;
-
-        return x3 * r3;
-#endif
-#else
-        return (float) (1.0 / sqrt(n));
-#endif
-}
-
-/**
- * Find the first bit set in a word.
- */
-int
-_mesa_ffs(int32_t i)
-{
-#if (defined(_WIN32) ) || defined(__IBMC__) || defined(__IBMCPP__)
-   register int bit = 0;
-   if (i != 0) {
-      if ((i & 0xffff) == 0) {
-         bit += 16;
-         i >>= 16;
-      }
-      if ((i & 0xff) == 0) {
-         bit += 8;
-         i >>= 8;
-      }
-      if ((i & 0xf) == 0) {
-         bit += 4;
-         i >>= 4;
-      }
-      while ((i & 1) == 0) {
-         bit++;
-         i >>= 1;
-      }
-      bit++;
-   }
-   return bit;
-#else
-   return ffs(i);
-#endif
-}
-
-
-/**
- * Find position of first bit set in given value.
- * XXX Warning: this function can only be used on 64-bit systems!
- * \return  position of least-significant bit set, starting at 1, return zero
- *          if no bits set.
- */
-int
-_mesa_ffsll(int64_t val)
-{
-#ifdef ffsll
-   return ffsll(val);
-#else
-   int bit;
-
-   assert(sizeof(val) == 8);
-
-   bit = _mesa_ffs((int32_t)val);
-   if (bit != 0)
-      return bit;
-
-   bit = _mesa_ffs((int32_t)(val >> 32));
-   if (bit != 0)
-      return 32 + bit;
-
-   return 0;
-#endif
-}
-
-
-/**
- * Return number of bits set in given GLuint.
- */
-unsigned int
-_mesa_bitcount(unsigned int n)
-{
-#if defined(__GNUC__) && \
-	((_GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-   return __builtin_popcount(n);
-#else
-   unsigned int bits;
-   for (bits = 0; n > 0; n = n >> 1) {
-      bits += (n & 1);
-   }
-   return bits;
-#endif
-}
-
-
-/**
- * Convert a 4-byte float to a 2-byte half float.
- * Based on code from:
- * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
- */
-GLhalfARB
-_mesa_float_to_half(float val)
-{
-   const fi_type fi = {val};
-   const int flt_m = fi.i & 0x7fffff;
-   const int flt_e = (fi.i >> 23) & 0xff;
-   const int flt_s = (fi.i >> 31) & 0x1;
-   int s, e, m = 0;
-   GLhalfARB result;
-   
-   /* sign bit */
-   s = flt_s;
-
-   /* handle special cases */
-   if ((flt_e == 0) && (flt_m == 0)) {
-      /* zero */
-      /* m = 0; - already set */
-      e = 0;
-   }
-   else if ((flt_e == 0) && (flt_m != 0)) {
-      /* denorm -- denorm float maps to 0 half */
-      /* m = 0; - already set */
-      e = 0;
-   }
-   else if ((flt_e == 0xff) && (flt_m == 0)) {
-      /* infinity */
-      /* m = 0; - already set */
-      e = 31;
-   }
-   else if ((flt_e == 0xff) && (flt_m != 0)) {
-      /* NaN */
-      m = 1;
-      e = 31;
-   }
-   else {
-      /* regular number */
-      const int new_exp = flt_e - 127;
-      if (new_exp < -24) {
-         /* this maps to 0 */
-         /* m = 0; - already set */
-         e = 0;
-      }
-      else if (new_exp < -14) {
-         /* this maps to a denorm */
-         unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/
-         e = 0;
-         switch (exp_val) {
-            case 0:
-               _mesa_warning(NULL,
-                   "float_to_half: logical error in denorm creation!\n");
-               /* m = 0; - already set */
-               break;
-            case 1: m = 512 + (flt_m >> 14); break;
-            case 2: m = 256 + (flt_m >> 15); break;
-            case 3: m = 128 + (flt_m >> 16); break;
-            case 4: m = 64 + (flt_m >> 17); break;
-            case 5: m = 32 + (flt_m >> 18); break;
-            case 6: m = 16 + (flt_m >> 19); break;
-            case 7: m = 8 + (flt_m >> 20); break;
-            case 8: m = 4 + (flt_m >> 21); break;
-            case 9: m = 2 + (flt_m >> 22); break;
-            case 10: m = 1; break;
-         }
-      }
-      else if (new_exp > 15) {
-         /* map this value to infinity */
-         /* m = 0; - already set */
-         e = 31;
-      }
-      else {
-         /* regular */
-         e = new_exp + 15;
-         m = flt_m >> 13;
-      }
-   }
-
-   result = (s << 15) | (e << 10) | m;
-   return result;
-}
-
-
-/**
- * Convert a 2-byte half float to a 4-byte float.
- * Based on code from:
- * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
- */
-float
-_mesa_half_to_float(GLhalfARB val)
-{
-   /* XXX could also use a 64K-entry lookup table */
-   const int m = val & 0x3ff;
-   const int e = (val >> 10) & 0x1f;
-   const int s = (val >> 15) & 0x1;
-   int flt_m, flt_e, flt_s;
-   fi_type fi;
-   float result;
-
-   /* sign bit */
-   flt_s = s;
-
-   /* handle special cases */
-   if ((e == 0) && (m == 0)) {
-      /* zero */
-      flt_m = 0;
-      flt_e = 0;
-   }
-   else if ((e == 0) && (m != 0)) {
-      /* denorm -- denorm half will fit in non-denorm single */
-      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
-      float mantissa = ((float) (m)) / 1024.0f;
-      float sign = s ? -1.0f : 1.0f;
-      return sign * mantissa * half_denorm;
-   }
-   else if ((e == 31) && (m == 0)) {
-      /* infinity */
-      flt_e = 0xff;
-      flt_m = 0;
-   }
-   else if ((e == 31) && (m != 0)) {
-      /* NaN */
-      flt_e = 0xff;
-      flt_m = 1;
-   }
-   else {
-      /* regular */
-      flt_e = e + 112;
-      flt_m = m << 13;
-   }
-
-   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
-   result = fi.f;
-   return result;
-}
-
-/*@}*/
-
-
-/**********************************************************************/
-/** \name Sort & Search */
-/*@{*/
-
-/**
- * Wrapper for bsearch().
- */
-void *
-_mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, 
-               int (*compar)(const void *, const void *) )
-{
-#if defined(_WIN32_WCE)
-   void *mid;
-   int cmp;
-   while (nmemb) {
-      nmemb >>= 1;
-      mid = (char *)base + nmemb * size;
-      cmp = (*compar)(key, mid);
-      if (cmp == 0)
-	 return mid;
-      if (cmp > 0) {
-	 base = (char *)mid + size;
-	 --nmemb;
-      }
-   }
-   return NULL;
-#else
-   return bsearch(key, base, nmemb, size, compar);
-#endif
-}
-
-/*@}*/
-
-
-/**********************************************************************/
-/** \name Environment vars */
-/*@{*/
-
-/**
- * Wrapper for getenv().
- */
-char *
-_mesa_getenv( const char *var )
-{
-#if defined(_XBOX) || defined(_WIN32_WCE)
-   return NULL;
-#else
-   return getenv(var);
-#endif
-}
-
-/*@}*/
-
-
-/**********************************************************************/
-/** \name String */
-/*@{*/
-
-/**
- * Implemented using malloc() and strcpy.
- * Note that NULL is handled accordingly.
- */
-char *
-_mesa_strdup( const char *s )
-{
-   if (s) {
-      size_t l = strlen(s);
-      char *s2 = (char *) malloc(l + 1);
-      if (s2)
-         strcpy(s2, s);
-      return s2;
-   }
-   else {
-      return NULL;
-   }
-}
-
-/** Wrapper around strtof() */
-float
-_mesa_strtof( const char *s, char **end )
-{
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
-   static locale_t loc = NULL;
-   if (!loc) {
-      loc = newlocale(LC_CTYPE_MASK, "C", NULL);
-   }
-   return strtof_l(s, end, loc);
-#elif defined(_ISOC99_SOURCE) || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600)
-   return strtof(s, end);
-#else
-   return (float)strtod(s, end);
-#endif
-}
-
-/** Compute simple checksum/hash for a string */
-unsigned int
-_mesa_str_checksum(const char *str)
-{
-   /* This could probably be much better */
-   unsigned int sum, i;
-   const char *c;
-   sum = i = 1;
-   for (c = str; *c; c++, i++)
-      sum += *c * (i % 100);
-   return sum + i;
-}
-
-
-/*@}*/
-
-
-/** Wrapper around vsnprintf() */
-int
-_mesa_snprintf( char *str, size_t size, const char *fmt, ... )
-{
-   int r;
-   va_list args;
-   va_start( args, fmt );  
-   r = vsnprintf( str, size, fmt, args );
-   va_end( args );
-   return r;
-}
-
-
-/**********************************************************************/
-/** \name Diagnostics */
-/*@{*/
-
-static void
-output_if_debug(const char *prefixString, const char *outputString,
-                GLboolean newline)
-{
-   static int debug = -1;
-
-   /* Check the MESA_DEBUG environment variable if it hasn't
-    * been checked yet.  We only have to check it once...
-    */
-   if (debug == -1) {
-      char *env = _mesa_getenv("MESA_DEBUG");
-
-      /* In a debug build, we print warning messages *unless*
-       * MESA_DEBUG is 0.  In a non-debug build, we don't
-       * print warning messages *unless* MESA_DEBUG is
-       * set *to any value*.
-       */
-#ifdef DEBUG
-      debug = (env != NULL && atoi(env) == 0) ? 0 : 1;
-#else
-      debug = (env != NULL) ? 1 : 0;
-#endif
-   }
-
-   /* Now only print the string if we're required to do so. */
-   if (debug) {
-      fprintf(stderr, "%s: %s", prefixString, outputString);
-      if (newline)
-         fprintf(stderr, "\n");
-
-#if defined(_WIN32) && !defined(_WIN32_WCE)
-      /* stderr from windows applications without console is not usually 
-       * visible, so communicate with the debugger instead */ 
-      {
-         char buf[4096];
-         _mesa_snprintf(buf, sizeof(buf), "%s: %s%s", prefixString, outputString, newline ? "\n" : "");
-         OutputDebugStringA(buf);
-      }
-#endif
-   }
-}
-
-
-/**
- * Return string version of GL error code.
- */
-static const char *
-error_string( GLenum error )
-{
-   switch (error) {
-   case GL_NO_ERROR:
-      return "GL_NO_ERROR";
-   case GL_INVALID_VALUE:
-      return "GL_INVALID_VALUE";
-   case GL_INVALID_ENUM:
-      return "GL_INVALID_ENUM";
-   case GL_INVALID_OPERATION:
-      return "GL_INVALID_OPERATION";
-   case GL_STACK_OVERFLOW:
-      return "GL_STACK_OVERFLOW";
-   case GL_STACK_UNDERFLOW:
-      return "GL_STACK_UNDERFLOW";
-   case GL_OUT_OF_MEMORY:
-      return "GL_OUT_OF_MEMORY";
-   case GL_TABLE_TOO_LARGE:
-      return "GL_TABLE_TOO_LARGE";
-   case GL_INVALID_FRAMEBUFFER_OPERATION_EXT:
-      return "GL_INVALID_FRAMEBUFFER_OPERATION";
-   default:
-      return "unknown";
-   }
-}
-
-
-/**
- * When a new type of error is recorded, print a message describing
- * previous errors which were accumulated.
- */
-static void
-flush_delayed_errors( struct gl_context *ctx )
-{
-   char s[MAXSTRING];
-
-   if (ctx->ErrorDebugCount) {
-      _mesa_snprintf(s, MAXSTRING, "%d similar %s errors", 
-                     ctx->ErrorDebugCount,
-                     error_string(ctx->ErrorValue));
-
-      output_if_debug("Mesa", s, GL_TRUE);
-
-      ctx->ErrorDebugCount = 0;
-   }
-}
-
-
-/**
- * Report a warning (a recoverable error condition) to stderr if
- * either DEBUG is defined or the MESA_DEBUG env var is set.
- *
- * \param ctx GL context.
- * \param fmtString printf()-like format string.
- */
-void
-_mesa_warning( struct gl_context *ctx, const char *fmtString, ... )
-{
-   char str[MAXSTRING];
-   va_list args;
-   va_start( args, fmtString );  
-   (void) vsnprintf( str, MAXSTRING, fmtString, args );
-   va_end( args );
-   
-   if (ctx)
-      flush_delayed_errors( ctx );
-
-   output_if_debug("Mesa warning", str, GL_TRUE);
-}
-
-
-/**
- * Report an internal implementation problem.
- * Prints the message to stderr via fprintf().
- *
- * \param ctx GL context.
- * \param fmtString problem description string.
- */
-void
-_mesa_problem( const struct gl_context *ctx, const char *fmtString, ... )
-{
-   va_list args;
-   char str[MAXSTRING];
-   (void) ctx;
-
-   va_start( args, fmtString );  
-   vsnprintf( str, MAXSTRING, fmtString, args );
-   va_end( args );
-
-   fprintf(stderr, "Mesa %s implementation error: %s\n", MESA_VERSION_STRING, str);
-   fprintf(stderr, "Please report at bugs.freedesktop.org\n");
-}
-
-
-/**
- * Record an OpenGL state error.  These usually occur when the user
- * passes invalid parameters to a GL function.
- *
- * If debugging is enabled (either at compile-time via the DEBUG macro, or
- * run-time via the MESA_DEBUG environment variable), report the error with
- * _mesa_debug().
- * 
- * \param ctx the GL context.
- * \param error the error value.
- * \param fmtString printf() style format string, followed by optional args
- */
-void
-_mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
-{
-   static GLint debug = -1;
-
-   /* Check debug environment variable only once:
-    */
-   if (debug == -1) {
-      const char *debugEnv = _mesa_getenv("MESA_DEBUG");
-
-#ifdef DEBUG
-      if (debugEnv && strstr(debugEnv, "silent"))
-         debug = GL_FALSE;
-      else
-         debug = GL_TRUE;
-#else
-      if (debugEnv)
-         debug = GL_TRUE;
-      else
-         debug = GL_FALSE;
-#endif
-   }
-
-   if (debug) {      
-      if (ctx->ErrorValue == error &&
-          ctx->ErrorDebugFmtString == fmtString) {
-         ctx->ErrorDebugCount++;
-      }
-      else {
-         char s[MAXSTRING], s2[MAXSTRING];
-         va_list args;
-
-         flush_delayed_errors( ctx );
-         
-         va_start(args, fmtString);
-         vsnprintf(s, MAXSTRING, fmtString, args);
-         va_end(args);
-
-         _mesa_snprintf(s2, MAXSTRING, "%s in %s", error_string(error), s);
-         output_if_debug("Mesa: User error", s2, GL_TRUE);
-         
-         ctx->ErrorDebugFmtString = fmtString;
-         ctx->ErrorDebugCount = 0;
-      }
-   }
-
-   _mesa_record_error(ctx, error);
-}
-
-
-/**
- * Report debug information.  Print error message to stderr via fprintf().
- * No-op if DEBUG mode not enabled.
- * 
- * \param ctx GL context.
- * \param fmtString printf()-style format string, followed by optional args.
- */
-void
-_mesa_debug( const struct gl_context *ctx, const char *fmtString, ... )
-{
-#ifdef DEBUG
-   char s[MAXSTRING];
-   va_list args;
-   va_start(args, fmtString);
-   vsnprintf(s, MAXSTRING, fmtString, args);
-   va_end(args);
-   output_if_debug("Mesa", s, GL_FALSE);
-#endif /* DEBUG */
-   (void) ctx;
-   (void) fmtString;
-}
-
-/*@}*/
+/**
+ * \file imports.c
+ * Standard C library function wrappers.
+ * 
+ * Imports are services which the device driver or window system or
+ * operating system provides to the core renderer.  The core renderer (Mesa)
+ * will call these functions in order to do memory allocation, simple I/O,
+ * etc.
+ *
+ * Some drivers will want to override/replace this file with something
+ * specialized, but that'll be rare.
+ *
+ * Eventually, I want to move roll the glheader.h file into this.
+ *
+ * \todo Functions still needed:
+ * - scanf
+ * - qsort
+ * - rand and RAND_MAX
+ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "imports.h"
+#include "context.h"
+#include "mtypes.h"
+#include "version.h"
+
+#ifdef _GNU_SOURCE
+#include <locale.h>
+#ifdef __APPLE__
+#include <xlocale.h>
+#endif
+#endif
+
+
+#define MAXSTRING 4000  /* for vsnprintf() */
+
+#ifdef WIN32
+#define vsnprintf _vsnprintf
+#elif defined(__IBMC__) || defined(__IBMCPP__) || ( defined(__VMS) && __CRTL_VER < 70312000 )
+extern int vsnprintf(char *str, size_t count, const char *fmt, va_list arg);
+#ifdef __VMS
+#include "vsnprintf.c"
+#endif
+#endif
+
+/**********************************************************************/
+/** \name Memory */
+/*@{*/
+
+/**
+ * Allocate aligned memory.
+ *
+ * \param bytes number of bytes to allocate.
+ * \param alignment alignment (must be greater than zero).
+ * 
+ * Allocates extra memory to accommodate rounding up the address for
+ * alignment and to record the real malloc address.
+ *
+ * \sa _mesa_align_free().
+ */
+void *
+_mesa_align_malloc(size_t bytes, unsigned long alignment)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+   void *mem;
+   int err = posix_memalign(& mem, alignment, bytes);
+   if (err)
+      return NULL;
+   return mem;
+#elif defined(_WIN32) && defined(_MSC_VER)
+   return _aligned_malloc(bytes, alignment);
+#else
+   uintptr_t ptr, buf;
+
+   ASSERT( alignment > 0 );
+
+   ptr = (uintptr_t) malloc(bytes + alignment + sizeof(void *));
+   if (!ptr)
+      return NULL;
+
+   buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
+   *(uintptr_t *)(buf - sizeof(void *)) = ptr;
+
+#ifdef DEBUG
+   /* mark the non-aligned area */
+   while ( ptr < buf - sizeof(void *) ) {
+      *(unsigned long *)ptr = 0xcdcdcdcd;
+      ptr += sizeof(unsigned long);
+   }
+#endif
+
+   return (void *) buf;
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+/**
+ * Same as _mesa_align_malloc(), but using calloc(1, ) instead of
+ * malloc()
+ */
+void *
+_mesa_align_calloc(size_t bytes, unsigned long alignment)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+   void *mem;
+   
+   mem = _mesa_align_malloc(bytes, alignment);
+   if (mem != NULL) {
+      (void) memset(mem, 0, bytes);
+   }
+
+   return mem;
+#elif defined(_WIN32) && defined(_MSC_VER)
+   void *mem;
+
+   mem = _aligned_malloc(bytes, alignment);
+   if (mem != NULL) {
+      (void) memset(mem, 0, bytes);
+   }
+
+   return mem;
+#else
+   uintptr_t ptr, buf;
+
+   ASSERT( alignment > 0 );
+
+   ptr = (uintptr_t) calloc(1, bytes + alignment + sizeof(void *));
+   if (!ptr)
+      return NULL;
+
+   buf = (ptr + alignment + sizeof(void *)) & ~(uintptr_t)(alignment - 1);
+   *(uintptr_t *)(buf - sizeof(void *)) = ptr;
+
+#ifdef DEBUG
+   /* mark the non-aligned area */
+   while ( ptr < buf - sizeof(void *) ) {
+      *(unsigned long *)ptr = 0xcdcdcdcd;
+      ptr += sizeof(unsigned long);
+   }
+#endif
+
+   return (void *)buf;
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+/**
+ * Free memory which was allocated with either _mesa_align_malloc()
+ * or _mesa_align_calloc().
+ * \param ptr pointer to the memory to be freed.
+ * The actual address to free is stored in the word immediately before the
+ * address the client sees.
+ */
+void
+_mesa_align_free(void *ptr)
+{
+#if defined(HAVE_POSIX_MEMALIGN)
+   free(ptr);
+#elif defined(_WIN32) && defined(_MSC_VER)
+   _aligned_free(ptr);
+#else
+   void **cubbyHole = (void **) ((char *) ptr - sizeof(void *));
+   void *realAddr = *cubbyHole;
+   free(realAddr);
+#endif /* defined(HAVE_POSIX_MEMALIGN) */
+}
+
+/**
+ * Reallocate memory, with alignment.
+ */
+void *
+_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
+                    unsigned long alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+   (void) oldSize;
+   return _aligned_realloc(oldBuffer, newSize, alignment);
+#else
+   const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
+   void *newBuf = _mesa_align_malloc(newSize, alignment);
+   if (newBuf && oldBuffer && copySize > 0) {
+      memcpy(newBuf, oldBuffer, copySize);
+   }
+   if (oldBuffer)
+      _mesa_align_free(oldBuffer);
+   return newBuf;
+#endif
+}
+
+
+
+/** Reallocate memory */
+void *
+_mesa_realloc(void *oldBuffer, size_t oldSize, size_t newSize)
+{
+   const size_t copySize = (oldSize < newSize) ? oldSize : newSize;
+   void *newBuffer = malloc(newSize);
+   if (newBuffer && oldBuffer && copySize > 0)
+      memcpy(newBuffer, oldBuffer, copySize);
+   if (oldBuffer)
+      free(oldBuffer);
+   return newBuffer;
+}
+
+/**
+ * Fill memory with a constant 16bit word.
+ * \param dst destination pointer.
+ * \param val value.
+ * \param n number of words.
+ */
+void
+_mesa_memset16( unsigned short *dst, unsigned short val, size_t n )
+{
+   while (n-- > 0)
+      *dst++ = val;
+}
+
+/*@}*/
+
+
+/**********************************************************************/
+/** \name Math */
+/*@{*/
+
+/** Wrapper around sqrt() */
+double
+_mesa_sqrtd(double x)
+{
+   return sqrt(x);
+}
+
+
+/*
+ * A High Speed, Low Precision Square Root
+ * by Paul Lalonde and Robert Dawson
+ * from "Graphics Gems", Academic Press, 1990
+ *
+ * SPARC implementation of a fast square root by table
+ * lookup.
+ * SPARC floating point format is as follows:
+ *
+ * BIT 31 	30 	23 	22 	0
+ *     sign	exponent	mantissa
+ */
+static short sqrttab[0x100];    /* declare table of square roots */
+
+void
+_mesa_init_sqrt_table(void)
+{
+#if defined(USE_IEEE) && !defined(DEBUG)
+   unsigned short i;
+   fi_type fi;     /* to access the bits of a float in  C quickly  */
+                   /* we use a union defined in glheader.h         */
+
+   for(i=0; i<= 0x7f; i++) {
+      fi.i = 0;
+
+      /*
+       * Build a float with the bit pattern i as mantissa
+       * and an exponent of 0, stored as 127
+       */
+
+      fi.i = (i << 16) | (127 << 23);
+      fi.f = _mesa_sqrtd(fi.f);
+
+      /*
+       * Take the square root then strip the first 7 bits of
+       * the mantissa into the table
+       */
+
+      sqrttab[i] = (fi.i & 0x7fffff) >> 16;
+
+      /*
+       * Repeat the process, this time with an exponent of
+       * 1, stored as 128
+       */
+
+      fi.i = 0;
+      fi.i = (i << 16) | (128 << 23);
+      fi.f = sqrt(fi.f);
+      sqrttab[i+0x80] = (fi.i & 0x7fffff) >> 16;
+   }
+#else
+   (void) sqrttab;  /* silence compiler warnings */
+#endif /*HAVE_FAST_MATH*/
+}
+
+
+/**
+ * Single precision square root.
+ */
+float
+_mesa_sqrtf( float x )
+{
+#if defined(USE_IEEE) && !defined(DEBUG)
+   fi_type num;
+                                /* to access the bits of a float in C
+                                 * we use a union from glheader.h     */
+
+   short e;                     /* the exponent */
+   if (x == 0.0F) return 0.0F;  /* check for square root of 0 */
+   num.f = x;
+   e = (num.i >> 23) - 127;     /* get the exponent - on a SPARC the */
+                                /* exponent is stored with 127 added */
+   num.i &= 0x7fffff;           /* leave only the mantissa */
+   if (e & 0x01) num.i |= 0x800000;
+                                /* the exponent is odd so we have to */
+                                /* look it up in the second half of  */
+                                /* the lookup table, so we set the   */
+                                /* high bit                                */
+   e >>= 1;                     /* divide the exponent by two */
+                                /* note that in C the shift */
+                                /* operators are sign preserving */
+                                /* for signed operands */
+   /* Do the table lookup, based on the quaternary mantissa,
+    * then reconstruct the result back into a float
+    */
+   num.i = ((sqrttab[num.i >> 16]) << 16) | ((e + 127) << 23);
+
+   return num.f;
+#else
+   return (float) _mesa_sqrtd((double) x);
+#endif
+}
+
+
+/**
+ inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
+ written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
+ and Vesa Karvonen.
+*/
+float
+_mesa_inv_sqrtf(float n)
+{
+#if defined(USE_IEEE) && !defined(DEBUG)
+        float r0, x0, y0;
+        float r1, x1, y1;
+        float r2, x2, y2;
+#if 0 /* not used, see below -BP */
+        float r3, x3, y3;
+#endif
+        fi_type u;
+        unsigned int magic;
+
+        /*
+         Exponent part of the magic number -
+
+         We want to:
+         1. subtract the bias from the exponent,
+         2. negate it
+         3. divide by two (rounding towards -inf)
+         4. add the bias back
+
+         Which is the same as subtracting the exponent from 381 and dividing
+         by 2.
+
+         floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
+        */
+
+        magic = 381 << 23;
+
+        /*
+         Significand part of magic number -
+
+         With the current magic number, "(magic - u.i) >> 1" will give you:
+
+         for 1 <= u.f <= 2: 1.25 - u.f / 4
+         for 2 <= u.f <= 4: 1.00 - u.f / 8
+
+         This isn't a bad approximation of 1/sqrt.  The maximum difference from
+         1/sqrt will be around .06.  After three Newton-Raphson iterations, the
+         maximum difference is less than 4.5e-8.  (Which is actually close
+         enough to make the following bias academic...)
+
+         To get a better approximation you can add a bias to the magic
+         number.  For example, if you subtract 1/2 of the maximum difference in
+         the first approximation (.03), you will get the following function:
+
+         for 1 <= u.f <= 2:    1.22 - u.f / 4
+         for 2 <= u.f <= 3.76: 0.97 - u.f / 8
+         for 3.76 <= u.f <= 4: 0.72 - u.f / 16
+         (The 3.76 to 4 range is where the result is < .5.)
+
+         This is the closest possible initial approximation, but with a maximum
+         error of 8e-11 after three NR iterations, it is still not perfect.  If
+         you subtract 0.0332281 instead of .03, the maximum error will be
+         2.5e-11 after three NR iterations, which should be about as close as
+         is possible.
+
+         for 1 <= u.f <= 2:    1.2167719 - u.f / 4
+         for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
+         for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
+
+        */
+
+        magic -= (int)(0.0332281 * (1 << 25));
+
+        u.f = n;
+        u.i = (magic - u.i) >> 1;
+
+        /*
+         Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
+         allows more parallelism.  From what I understand, the parallelism
+         comes at the cost of less precision, because it lets error
+         accumulate across iterations.
+        */
+        x0 = 1.0f;
+        y0 = 0.5f * n;
+        r0 = u.f;
+
+        x1 = x0 * r0;
+        y1 = y0 * r0 * r0;
+        r1 = 1.5f - y1;
+
+        x2 = x1 * r1;
+        y2 = y1 * r1 * r1;
+        r2 = 1.5f - y2;
+
+#if 1
+        return x2 * r2;  /* we can stop here, and be conformant -BP */
+#else
+        x3 = x2 * r2;
+        y3 = y2 * r2 * r2;
+        r3 = 1.5f - y3;
+
+        return x3 * r3;
+#endif
+#else
+        return (float) (1.0 / sqrt(n));
+#endif
+}
+
+#ifndef __GNUC__
+/**
+ * Find the first bit set in a word.
+ */
+int
+_mesa_ffs(int32_t i)
+{
+#if (defined(_WIN32) ) || defined(__IBMC__) || defined(__IBMCPP__)
+   register int bit = 0;
+   if (i != 0) {
+      if ((i & 0xffff) == 0) {
+         bit += 16;
+         i >>= 16;
+      }
+      if ((i & 0xff) == 0) {
+         bit += 8;
+         i >>= 8;
+      }
+      if ((i & 0xf) == 0) {
+         bit += 4;
+         i >>= 4;
+      }
+      while ((i & 1) == 0) {
+         bit++;
+         i >>= 1;
+      }
+      bit++;
+   }
+   return bit;
+#else
+   return ffs(i);
+#endif
+}
+
+
+/**
+ * Find position of first bit set in given value.
+ * XXX Warning: this function can only be used on 64-bit systems!
+ * \return  position of least-significant bit set, starting at 1, return zero
+ *          if no bits set.
+ */
+int
+_mesa_ffsll(int64_t val)
+{
+   int bit;
+
+   assert(sizeof(val) == 8);
+
+   bit = _mesa_ffs((int32_t)val);
+   if (bit != 0)
+      return bit;
+
+   bit = _mesa_ffs((int32_t)(val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+
+
+#if ((_GNUC__ == 3 && __GNUC_MINOR__ < 4) || __GNUC__ < 4)
+/**
+ * Return number of bits set in given GLuint.
+ */
+unsigned int
+_mesa_bitcount(unsigned int n)
+{
+   unsigned int bits;
+   for (bits = 0; n > 0; n = n >> 1) {
+      bits += (n & 1);
+   }
+   return bits;
+}
+#endif
+#endif
+
+
+/**
+ * Convert a 4-byte float to a 2-byte half float.
+ * Based on code from:
+ * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
+ */
+GLhalfARB
+_mesa_float_to_half(float val)
+{
+   const fi_type fi = {val};
+   const int flt_m = fi.i & 0x7fffff;
+   const int flt_e = (fi.i >> 23) & 0xff;
+   const int flt_s = (fi.i >> 31) & 0x1;
+   int s, e, m = 0;
+   GLhalfARB result;
+   
+   /* sign bit */
+   s = flt_s;
+
+   /* handle special cases */
+   if ((flt_e == 0) && (flt_m == 0)) {
+      /* zero */
+      /* m = 0; - already set */
+      e = 0;
+   }
+   else if ((flt_e == 0) && (flt_m != 0)) {
+      /* denorm -- denorm float maps to 0 half */
+      /* m = 0; - already set */
+      e = 0;
+   }
+   else if ((flt_e == 0xff) && (flt_m == 0)) {
+      /* infinity */
+      /* m = 0; - already set */
+      e = 31;
+   }
+   else if ((flt_e == 0xff) && (flt_m != 0)) {
+      /* NaN */
+      m = 1;
+      e = 31;
+   }
+   else {
+      /* regular number */
+      const int new_exp = flt_e - 127;
+      if (new_exp < -24) {
+         /* this maps to 0 */
+         /* m = 0; - already set */
+         e = 0;
+      }
+      else if (new_exp < -14) {
+         /* this maps to a denorm */
+         unsigned int exp_val = (unsigned int) (-14 - new_exp); /* 2^-exp_val*/
+         e = 0;
+         switch (exp_val) {
+            case 0:
+               _mesa_warning(NULL,
+                   "float_to_half: logical error in denorm creation!\n");
+               /* m = 0; - already set */
+               break;
+            case 1: m = 512 + (flt_m >> 14); break;
+            case 2: m = 256 + (flt_m >> 15); break;
+            case 3: m = 128 + (flt_m >> 16); break;
+            case 4: m = 64 + (flt_m >> 17); break;
+            case 5: m = 32 + (flt_m >> 18); break;
+            case 6: m = 16 + (flt_m >> 19); break;
+            case 7: m = 8 + (flt_m >> 20); break;
+            case 8: m = 4 + (flt_m >> 21); break;
+            case 9: m = 2 + (flt_m >> 22); break;
+            case 10: m = 1; break;
+         }
+      }
+      else if (new_exp > 15) {
+         /* map this value to infinity */
+         /* m = 0; - already set */
+         e = 31;
+      }
+      else {
+         /* regular */
+         e = new_exp + 15;
+         m = flt_m >> 13;
+      }
+   }
+
+   result = (s << 15) | (e << 10) | m;
+   return result;
+}
+
+
+/**
+ * Convert a 2-byte half float to a 4-byte float.
+ * Based on code from:
+ * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
+ */
+float
+_mesa_half_to_float(GLhalfARB val)
+{
+   /* XXX could also use a 64K-entry lookup table */
+   const int m = val & 0x3ff;
+   const int e = (val >> 10) & 0x1f;
+   const int s = (val >> 15) & 0x1;
+   int flt_m, flt_e, flt_s;
+   fi_type fi;
+   float result;
+
+   /* sign bit */
+   flt_s = s;
+
+   /* handle special cases */
+   if ((e == 0) && (m == 0)) {
+      /* zero */
+      flt_m = 0;
+      flt_e = 0;
+   }
+   else if ((e == 0) && (m != 0)) {
+      /* denorm -- denorm half will fit in non-denorm single */
+      const float half_denorm = 1.0f / 16384.0f; /* 2^-14 */
+      float mantissa = ((float) (m)) / 1024.0f;
+      float sign = s ? -1.0f : 1.0f;
+      return sign * mantissa * half_denorm;
+   }
+   else if ((e == 31) && (m == 0)) {
+      /* infinity */
+      flt_e = 0xff;
+      flt_m = 0;
+   }
+   else if ((e == 31) && (m != 0)) {
+      /* NaN */
+      flt_e = 0xff;
+      flt_m = 1;
+   }
+   else {
+      /* regular */
+      flt_e = e + 112;
+      flt_m = m << 13;
+   }
+
+   fi.i = (flt_s << 31) | (flt_e << 23) | flt_m;
+   result = fi.f;
+   return result;
+}
+
+/*@}*/
+
+
+/**********************************************************************/
+/** \name Sort & Search */
+/*@{*/
+
+/**
+ * Wrapper for bsearch().
+ */
+void *
+_mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, 
+               int (*compar)(const void *, const void *) )
+{
+#if defined(_WIN32_WCE)
+   void *mid;
+   int cmp;
+   while (nmemb) {
+      nmemb >>= 1;
+      mid = (char *)base + nmemb * size;
+      cmp = (*compar)(key, mid);
+      if (cmp == 0)
+	 return mid;
+      if (cmp > 0) {
+	 base = (char *)mid + size;
+	 --nmemb;
+      }
+   }
+   return NULL;
+#else
+   return bsearch(key, base, nmemb, size, compar);
+#endif
+}
+
+/*@}*/
+
+
+/**********************************************************************/
+/** \name Environment vars */
+/*@{*/
+
+/**
+ * Wrapper for getenv().
+ */
+char *
+_mesa_getenv( const char *var )
+{
+#if defined(_XBOX) || defined(_WIN32_WCE)
+   return NULL;
+#else
+   return getenv(var);
+#endif
+}
+
+/*@}*/
+
+
+/**********************************************************************/
+/** \name String */
+/*@{*/
+
+/**
+ * Implemented using malloc() and strcpy.
+ * Note that NULL is handled accordingly.
+ */
+char *
+_mesa_strdup( const char *s )
+{
+   if (s) {
+      size_t l = strlen(s);
+      char *s2 = (char *) malloc(l + 1);
+      if (s2)
+         strcpy(s2, s);
+      return s2;
+   }
+   else {
+      return NULL;
+   }
+}
+
+/** Wrapper around strtof() */
+float
+_mesa_strtof( const char *s, char **end )
+{
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
+   static locale_t loc = NULL;
+   if (!loc) {
+      loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+   }
+   return strtof_l(s, end, loc);
+#elif defined(_ISOC99_SOURCE) || (defined(_XOPEN_SOURCE) && _XOPEN_SOURCE >= 600)
+   return strtof(s, end);
+#else
+   return (float)strtod(s, end);
+#endif
+}
+
+/** Compute simple checksum/hash for a string */
+unsigned int
+_mesa_str_checksum(const char *str)
+{
+   /* This could probably be much better */
+   unsigned int sum, i;
+   const char *c;
+   sum = i = 1;
+   for (c = str; *c; c++, i++)
+      sum += *c * (i % 100);
+   return sum + i;
+}
+
+
+/*@}*/
+
+
+/** Wrapper around vsnprintf() */
+int
+_mesa_snprintf( char *str, size_t size, const char *fmt, ... )
+{
+   int r;
+   va_list args;
+   va_start( args, fmt );  
+   r = vsnprintf( str, size, fmt, args );
+   va_end( args );
+   return r;
+}
+
+
+/**********************************************************************/
+/** \name Diagnostics */
+/*@{*/
+
+static void
+output_if_debug(const char *prefixString, const char *outputString,
+                GLboolean newline)
+{
+   static int debug = -1;
+
+   /* Check the MESA_DEBUG environment variable if it hasn't
+    * been checked yet.  We only have to check it once...
+    */
+   if (debug == -1) {
+      char *env = _mesa_getenv("MESA_DEBUG");
+
+      /* In a debug build, we print warning messages *unless*
+       * MESA_DEBUG is 0.  In a non-debug build, we don't
+       * print warning messages *unless* MESA_DEBUG is
+       * set *to any value*.
+       */
+#ifdef DEBUG
+      debug = (env != NULL && atoi(env) == 0) ? 0 : 1;
+#else
+      debug = (env != NULL) ? 1 : 0;
+#endif
+   }
+
+   /* Now only print the string if we're required to do so. */
+   if (debug) {
+      fprintf(stderr, "%s: %s", prefixString, outputString);
+      if (newline)
+         fprintf(stderr, "\n");
+
+#if defined(_WIN32) && !defined(_WIN32_WCE)
+      /* stderr from windows applications without console is not usually 
+       * visible, so communicate with the debugger instead */ 
+      {
+         char buf[4096];
+         _mesa_snprintf(buf, sizeof(buf), "%s: %s%s", prefixString, outputString, newline ? "\n" : "");
+         OutputDebugStringA(buf);
+      }
+#endif
+   }
+}
+
+
+/**
+ * Return string version of GL error code.
+ */
+static const char *
+error_string( GLenum error )
+{
+   switch (error) {
+   case GL_NO_ERROR:
+      return "GL_NO_ERROR";
+   case GL_INVALID_VALUE:
+      return "GL_INVALID_VALUE";
+   case GL_INVALID_ENUM:
+      return "GL_INVALID_ENUM";
+   case GL_INVALID_OPERATION:
+      return "GL_INVALID_OPERATION";
+   case GL_STACK_OVERFLOW:
+      return "GL_STACK_OVERFLOW";
+   case GL_STACK_UNDERFLOW:
+      return "GL_STACK_UNDERFLOW";
+   case GL_OUT_OF_MEMORY:
+      return "GL_OUT_OF_MEMORY";
+   case GL_TABLE_TOO_LARGE:
+      return "GL_TABLE_TOO_LARGE";
+   case GL_INVALID_FRAMEBUFFER_OPERATION_EXT:
+      return "GL_INVALID_FRAMEBUFFER_OPERATION";
+   default:
+      return "unknown";
+   }
+}
+
+
+/**
+ * When a new type of error is recorded, print a message describing
+ * previous errors which were accumulated.
+ */
+static void
+flush_delayed_errors( struct gl_context *ctx )
+{
+   char s[MAXSTRING];
+
+   if (ctx->ErrorDebugCount) {
+      _mesa_snprintf(s, MAXSTRING, "%d similar %s errors", 
+                     ctx->ErrorDebugCount,
+                     error_string(ctx->ErrorValue));
+
+      output_if_debug("Mesa", s, GL_TRUE);
+
+      ctx->ErrorDebugCount = 0;
+   }
+}
+
+
+/**
+ * Report a warning (a recoverable error condition) to stderr if
+ * either DEBUG is defined or the MESA_DEBUG env var is set.
+ *
+ * \param ctx GL context.
+ * \param fmtString printf()-like format string.
+ */
+void
+_mesa_warning( struct gl_context *ctx, const char *fmtString, ... )
+{
+   char str[MAXSTRING];
+   va_list args;
+   va_start( args, fmtString );  
+   (void) vsnprintf( str, MAXSTRING, fmtString, args );
+   va_end( args );
+   
+   if (ctx)
+      flush_delayed_errors( ctx );
+
+   output_if_debug("Mesa warning", str, GL_TRUE);
+}
+
+
+/**
+ * Report an internal implementation problem.
+ * Prints the message to stderr via fprintf().
+ *
+ * \param ctx GL context.
+ * \param fmtString problem description string.
+ */
+void
+_mesa_problem( const struct gl_context *ctx, const char *fmtString, ... )
+{
+   va_list args;
+   char str[MAXSTRING];
+   (void) ctx;
+
+   va_start( args, fmtString );  
+   vsnprintf( str, MAXSTRING, fmtString, args );
+   va_end( args );
+
+   fprintf(stderr, "Mesa %s implementation error: %s\n", MESA_VERSION_STRING, str);
+   fprintf(stderr, "Please report at bugs.freedesktop.org\n");
+}
+
+
+/**
+ * Record an OpenGL state error.  These usually occur when the user
+ * passes invalid parameters to a GL function.
+ *
+ * If debugging is enabled (either at compile-time via the DEBUG macro, or
+ * run-time via the MESA_DEBUG environment variable), report the error with
+ * _mesa_debug().
+ * 
+ * \param ctx the GL context.
+ * \param error the error value.
+ * \param fmtString printf() style format string, followed by optional args
+ */
+void
+_mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... )
+{
+   static GLint debug = -1;
+
+   /* Check debug environment variable only once:
+    */
+   if (debug == -1) {
+      const char *debugEnv = _mesa_getenv("MESA_DEBUG");
+
+#ifdef DEBUG
+      if (debugEnv && strstr(debugEnv, "silent"))
+         debug = GL_FALSE;
+      else
+         debug = GL_TRUE;
+#else
+      if (debugEnv)
+         debug = GL_TRUE;
+      else
+         debug = GL_FALSE;
+#endif
+   }
+
+   if (debug) {      
+      if (ctx->ErrorValue == error &&
+          ctx->ErrorDebugFmtString == fmtString) {
+         ctx->ErrorDebugCount++;
+      }
+      else {
+         char s[MAXSTRING], s2[MAXSTRING];
+         va_list args;
+
+         flush_delayed_errors( ctx );
+         
+         va_start(args, fmtString);
+         vsnprintf(s, MAXSTRING, fmtString, args);
+         va_end(args);
+
+         _mesa_snprintf(s2, MAXSTRING, "%s in %s", error_string(error), s);
+         output_if_debug("Mesa: User error", s2, GL_TRUE);
+         
+         ctx->ErrorDebugFmtString = fmtString;
+         ctx->ErrorDebugCount = 0;
+      }
+   }
+
+   _mesa_record_error(ctx, error);
+}
+
+
+/**
+ * Report debug information.  Print error message to stderr via fprintf().
+ * No-op if DEBUG mode not enabled.
+ * 
+ * \param ctx GL context.
+ * \param fmtString printf()-style format string, followed by optional args.
+ */
+void
+_mesa_debug( const struct gl_context *ctx, const char *fmtString, ... )
+{
+#ifdef DEBUG
+   char s[MAXSTRING];
+   va_list args;
+   va_start(args, fmtString);
+   vsnprintf(s, MAXSTRING, fmtString, args);
+   va_end(args);
+   output_if_debug("Mesa", s, GL_FALSE);
+#endif /* DEBUG */
+   (void) ctx;
+   (void) fmtString;
+}
+
+/*@}*/
diff --git a/mesalib/src/mesa/main/imports.h b/mesalib/src/mesa/main/imports.h
index af7a8cc00..a994dbcae 100644
--- a/mesalib/src/mesa/main/imports.h
+++ b/mesalib/src/mesa/main/imports.h
@@ -1,602 +1,615 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.5
- *
- * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * \file imports.h
- * Standard C library function wrappers.
- *
- * This file provides wrappers for all the standard C library functions
- * like malloc(), free(), printf(), getenv(), etc.
- */
-
-
-#ifndef IMPORTS_H
-#define IMPORTS_H
-
-
-#include "compiler.h"
-#include "glheader.h"
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/**********************************************************************/
-/** Memory macros */
-/*@{*/
-
-/** Allocate \p BYTES bytes */
-#define MALLOC(BYTES)      malloc(BYTES)
-/** Allocate and zero \p BYTES bytes */
-#define CALLOC(BYTES)      calloc(1, BYTES)
-/** Allocate a structure of type \p T */
-#define MALLOC_STRUCT(T)   (struct T *) malloc(sizeof(struct T))
-/** Allocate and zero a structure of type \p T */
-#define CALLOC_STRUCT(T)   (struct T *) calloc(1, sizeof(struct T))
-/** Free memory */
-#define FREE(PTR)          free(PTR)
-
-/*@}*/
-
-
-/*
- * For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
- * as offsets into buffer stores.  Since the vertex array pointer and
- * buffer store pointer are both pointers and we need to add them, we use
- * this macro.
- * Both pointers/offsets are expressed in bytes.
- */
-#define ADD_POINTERS(A, B)  ( (GLubyte *) (A) + (uintptr_t) (B) )
-
-
-/**
- * Sometimes we treat GLfloats as GLints.  On x86 systems, moving a float
- * as a int (thereby using integer registers instead of FP registers) is
- * a performance win.  Typically, this can be done with ordinary casts.
- * But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
- * these casts generate warnings.
- * The following union typedef is used to solve that.
- */
-typedef union { GLfloat f; GLint i; } fi_type;
-
-
-
-/**********************************************************************
- * Math macros
- */
-
-#define MAX_GLUSHORT	0xffff
-#define MAX_GLUINT	0xffffffff
-
-/* Degrees to radians conversion: */
-#define DEG2RAD (M_PI/180.0)
-
-
-/***
- *** SQRTF: single-precision square root
- ***/
-#if 0 /* _mesa_sqrtf() not accurate enough - temporarily disabled */
-#  define SQRTF(X)  _mesa_sqrtf(X)
-#else
-#  define SQRTF(X)  (float) sqrt((float) (X))
-#endif
-
-
-/***
- *** INV_SQRTF: single-precision inverse square root
- ***/
-#if 0
-#define INV_SQRTF(X) _mesa_inv_sqrt(X)
-#else
-#define INV_SQRTF(X) (1.0F / SQRTF(X))  /* this is faster on a P4 */
-#endif
-
-
-/**
- * \name Work-arounds for platforms that lack C99 math functions
- */
-/*@{*/
-#if (!defined(_XOPEN_SOURCE) || (_XOPEN_SOURCE < 600)) && !defined(_ISOC99_SOURCE) \
-   && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)) \
-   && (!defined(_MSC_VER) || (_MSC_VER < 1400))
-#define acosf(f) ((float) acos(f))
-#define asinf(f) ((float) asin(f))
-#define atan2f(x,y) ((float) atan2(x,y))
-#define atanf(f) ((float) atan(f))
-#define cielf(f) ((float) ciel(f))
-#define cosf(f) ((float) cos(f))
-#define coshf(f) ((float) cosh(f))
-#define expf(f) ((float) exp(f))
-#define exp2f(f) ((float) exp2(f))
-#define floorf(f) ((float) floor(f))
-#define logf(f) ((float) log(f))
-#define log2f(f) ((float) log2(f))
-#define powf(x,y) ((float) pow(x,y))
-#define sinf(f) ((float) sin(f))
-#define sinhf(f) ((float) sinh(f))
-#define sqrtf(f) ((float) sqrt(f))
-#define tanf(f) ((float) tan(f))
-#define tanhf(f) ((float) tanh(f))
-#define acoshf(f) ((float) acosh(f))
-#define asinhf(f) ((float) asinh(f))
-#define atanhf(f) ((float) atanh(f))
-#endif
-
-#if defined(_MSC_VER)
-static INLINE float truncf(float x) { return x < 0.0f ? ceilf(x) : floorf(x); }
-static INLINE float exp2f(float x) { return powf(2.0f, x); }
-static INLINE float log2f(float x) { return logf(x) * 1.442695041f; }
-static INLINE float asinhf(float x) { return logf(x + sqrtf(x * x + 1.0f)); }
-static INLINE float acoshf(float x) { return logf(x + sqrtf(x * x - 1.0f)); }
-static INLINE float atanhf(float x) { return (logf(1.0f + x) - logf(1.0f - x)) / 2.0f; }
-static INLINE int isblank(int ch) { return ch == ' ' || ch == '\t'; }
-#define strtoll(p, e, b) _strtoi64(p, e, b)
-#endif
-/*@}*/
-
-/***
- *** LOG2: Log base 2 of float
- ***/
-#ifdef USE_IEEE
-#if 0
-/* This is pretty fast, but not accurate enough (only 2 fractional bits).
- * Based on code from http://www.stereopsis.com/log2.html
- */
-static INLINE GLfloat LOG2(GLfloat x)
-{
-   const GLfloat y = x * x * x * x;
-   const GLuint ix = *((GLuint *) &y);
-   const GLuint exp = (ix >> 23) & 0xFF;
-   const GLint log2 = ((GLint) exp) - 127;
-   return (GLfloat) log2 * (1.0 / 4.0);  /* 4, because of x^4 above */
-}
-#endif
-/* Pretty fast, and accurate.
- * Based on code from http://www.flipcode.com/totd/
- */
-static INLINE GLfloat LOG2(GLfloat val)
-{
-   fi_type num;
-   GLint log_2;
-   num.f = val;
-   log_2 = ((num.i >> 23) & 255) - 128;
-   num.i &= ~(255 << 23);
-   num.i += 127 << 23;
-   num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
-   return num.f + log_2;
-}
-#else
-/*
- * NOTE: log_base_2(x) = log(x) / log(2)
- * NOTE: 1.442695 = 1/log(2).
- */
-#define LOG2(x)  ((GLfloat) (log(x) * 1.442695F))
-#endif
-
-
-/***
- *** IS_INF_OR_NAN: test if float is infinite or NaN
- ***/
-#ifdef USE_IEEE
-static INLINE int IS_INF_OR_NAN( float x )
-{
-   fi_type tmp;
-   tmp.f = x;
-   return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
-}
-#elif defined(isfinite)
-#define IS_INF_OR_NAN(x)        (!isfinite(x))
-#elif defined(finite)
-#define IS_INF_OR_NAN(x)        (!finite(x))
-#elif defined(__VMS)
-#define IS_INF_OR_NAN(x)        (!finite(x))
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-#define IS_INF_OR_NAN(x)        (!isfinite(x))
-#else
-#define IS_INF_OR_NAN(x)        (!finite(x))
-#endif
-
-
-/***
- *** IS_NEGATIVE: test if float is negative
- ***/
-#if defined(USE_IEEE)
-static INLINE int GET_FLOAT_BITS( float x )
-{
-   fi_type fi;
-   fi.f = x;
-   return fi.i;
-}
-#define IS_NEGATIVE(x) (GET_FLOAT_BITS(x) < 0)
-#else
-#define IS_NEGATIVE(x) (x < 0.0F)
-#endif
-
-
-/***
- *** DIFFERENT_SIGNS: test if two floats have opposite signs
- ***/
-#if defined(USE_IEEE)
-#define DIFFERENT_SIGNS(x,y) ((GET_FLOAT_BITS(x) ^ GET_FLOAT_BITS(y)) & (1<<31))
-#else
-/* Could just use (x*y<0) except for the flatshading requirements.
- * Maybe there's a better way?
- */
-#define DIFFERENT_SIGNS(x,y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
-#endif
-
-
-/***
- *** CEILF: ceiling of float
- *** FLOORF: floor of float
- *** FABSF: absolute value of float
- *** LOGF: the natural logarithm (base e) of the value
- *** EXPF: raise e to the value
- *** LDEXPF: multiply value by an integral power of two
- *** FREXPF: extract mantissa and exponent from value
- ***/
-#if defined(__gnu_linux__)
-/* C99 functions */
-#define CEILF(x)   ceilf(x)
-#define FLOORF(x)  floorf(x)
-#define FABSF(x)   fabsf(x)
-#define LOGF(x)    logf(x)
-#define EXPF(x)    expf(x)
-#define LDEXPF(x,y)  ldexpf(x,y)
-#define FREXPF(x,y)  frexpf(x,y)
-#else
-#define CEILF(x)   ((GLfloat) ceil(x))
-#define FLOORF(x)  ((GLfloat) floor(x))
-#define FABSF(x)   ((GLfloat) fabs(x))
-#define LOGF(x)    ((GLfloat) log(x))
-#define EXPF(x)    ((GLfloat) exp(x))
-#define LDEXPF(x,y)  ((GLfloat) ldexp(x,y))
-#define FREXPF(x,y)  ((GLfloat) frexp(x,y))
-#endif
-
-
-/***
- *** IROUND: return (as an integer) float rounded to nearest integer
- ***/
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
-static INLINE int iround(float f)
-{
-   int r;
-   __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
-   return r;
-}
-#define IROUND(x)  iround(x)
-#elif defined(USE_X86_ASM) && defined(_MSC_VER)
-static INLINE int iround(float f)
-{
-   int r;
-   _asm {
-	 fld f
-	 fistp r
-	}
-   return r;
-}
-#define IROUND(x)  iround(x)
-#elif defined(__WATCOMC__) && defined(__386__)
-long iround(float f);
-#pragma aux iround =                    \
-	"push   eax"                        \
-	"fistp  dword ptr [esp]"            \
-	"pop    eax"                        \
-	parm [8087]                         \
-	value [eax]                         \
-	modify exact [eax];
-#define IROUND(x)  iround(x)
-#else
-#define IROUND(f)  ((int) (((f) >= 0.0F) ? ((f) + 0.5F) : ((f) - 0.5F)))
-#endif
-
-#define IROUND64(f)  ((GLint64) (((f) >= 0.0F) ? ((f) + 0.5F) : ((f) - 0.5F)))
-
-/***
- *** IROUND_POS: return (as an integer) positive float rounded to nearest int
- ***/
-#ifdef DEBUG
-#define IROUND_POS(f) (assert((f) >= 0.0F), IROUND(f))
-#else
-#define IROUND_POS(f) (IROUND(f))
-#endif
-
-
-/***
- *** IFLOOR: return (as an integer) floor of float
- ***/
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
-/*
- * IEEE floor for computers that round to nearest or even.
- * 'f' must be between -4194304 and 4194303.
- * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
- * but uses some IEEE specific tricks for better speed.
- * Contributed by Josh Vanderhoof
- */
-static INLINE int ifloor(float f)
-{
-   int ai, bi;
-   double af, bf;
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   /* GCC generates an extra fstp/fld without this. */
-   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
-   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
-   return (ai - bi) >> 1;
-}
-#define IFLOOR(x)  ifloor(x)
-#elif defined(USE_IEEE)
-static INLINE int ifloor(float f)
-{
-   int ai, bi;
-   double af, bf;
-   fi_type u;
-
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   u.f = (float) af;  ai = u.i;
-   u.f = (float) bf;  bi = u.i;
-   return (ai - bi) >> 1;
-}
-#define IFLOOR(x)  ifloor(x)
-#else
-static INLINE int ifloor(float f)
-{
-   int i = IROUND(f);
-   return (i > f) ? i - 1 : i;
-}
-#define IFLOOR(x)  ifloor(x)
-#endif
-
-
-/***
- *** ICEIL: return (as an integer) ceiling of float
- ***/
-#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
-/*
- * IEEE ceil for computers that round to nearest or even.
- * 'f' must be between -4194304 and 4194303.
- * This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1",
- * but uses some IEEE specific tricks for better speed.
- * Contributed by Josh Vanderhoof
- */
-static INLINE int iceil(float f)
-{
-   int ai, bi;
-   double af, bf;
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   /* GCC generates an extra fstp/fld without this. */
-   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
-   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
-   return (ai - bi + 1) >> 1;
-}
-#define ICEIL(x)  iceil(x)
-#elif defined(USE_IEEE)
-static INLINE int iceil(float f)
-{
-   int ai, bi;
-   double af, bf;
-   fi_type u;
-   af = (3 << 22) + 0.5 + (double)f;
-   bf = (3 << 22) + 0.5 - (double)f;
-   u.f = (float) af; ai = u.i;
-   u.f = (float) bf; bi = u.i;
-   return (ai - bi + 1) >> 1;
-}
-#define ICEIL(x)  iceil(x)
-#else
-static INLINE int iceil(float f)
-{
-   int i = IROUND(f);
-   return (i < f) ? i + 1 : i;
-}
-#define ICEIL(x)  iceil(x)
-#endif
-
-
-/**
- * Is x a power of two?
- */
-static INLINE int
-_mesa_is_pow_two(int x)
-{
-   return !(x & (x - 1));
-}
-
-/**
- * Round given integer to next higer power of two
- * If X is zero result is undefined.
- *
- * Source for the fallback implementation is
- * Sean Eron Anderson's webpage "Bit Twiddling Hacks"
- * http://graphics.stanford.edu/~seander/bithacks.html
- *
- * When using builtin function have to do some work
- * for case when passed values 1 to prevent hiting
- * undefined result from __builtin_clz. Undefined
- * results would be different depending on optimization
- * level used for build.
- */
-static INLINE int32_t
-_mesa_next_pow_two_32(uint32_t x)
-{
-#if defined(__GNUC__) && \
-	((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-	uint32_t y = (x != 1);
-	return (1 + y) << ((__builtin_clz(x - y) ^ 31) );
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x++;
-	return x;
-#endif
-}
-
-static INLINE int64_t
-_mesa_next_pow_two_64(uint64_t x)
-{
-#if defined(__GNUC__) && \
-	((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-	uint64_t y = (x != 1);
-	if (sizeof(x) == sizeof(long))
-		return (1 + y) << ((__builtin_clzl(x - y) ^ 63));
-	else
-		return (1 + y) << ((__builtin_clzll(x - y) ^ 63));
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x |= x >> 32;
-	x++;
-	return x;
-#endif
-}
-
-
-/**
- * Return 1 if this is a little endian machine, 0 if big endian.
- */
-static INLINE GLboolean
-_mesa_little_endian(void)
-{
-   const GLuint ui = 1; /* intentionally not static */
-   return *((const GLubyte *) &ui);
-}
-
-
-
-/**********************************************************************
- * Functions
- */
-
-extern void *
-_mesa_align_malloc( size_t bytes, unsigned long alignment );
-
-extern void *
-_mesa_align_calloc( size_t bytes, unsigned long alignment );
-
-extern void
-_mesa_align_free( void *ptr );
-
-extern void *
-_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
-                    unsigned long alignment);
-
-extern void *
-_mesa_exec_malloc( GLuint size );
-
-extern void 
-_mesa_exec_free( void *addr );
-
-extern void *
-_mesa_realloc( void *oldBuffer, size_t oldSize, size_t newSize );
-
-extern void
-_mesa_memset16( unsigned short *dst, unsigned short val, size_t n );
-
-extern double
-_mesa_sqrtd(double x);
-
-extern float
-_mesa_sqrtf(float x);
-
-extern float
-_mesa_inv_sqrtf(float x);
-
-extern void
-_mesa_init_sqrt_table(void);
-
-extern int
-_mesa_ffs(int32_t i);
-
-extern int
-_mesa_ffsll(int64_t i);
-
-extern unsigned int
-_mesa_bitcount(unsigned int n);
-
-extern GLhalfARB
-_mesa_float_to_half(float f);
-
-extern float
-_mesa_half_to_float(GLhalfARB h);
-
-
-extern void *
-_mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, 
-               int (*compar)(const void *, const void *) );
-
-extern char *
-_mesa_getenv( const char *var );
-
-extern char *
-_mesa_strdup( const char *s );
-
-extern float
-_mesa_strtof( const char *s, char **end );
-
-extern unsigned int
-_mesa_str_checksum(const char *str);
-
-extern int
-_mesa_snprintf( char *str, size_t size, const char *fmt, ... ) PRINTFLIKE(3, 4);
-
-struct gl_context;
-
-extern void
-_mesa_warning( struct gl_context *gc, const char *fmtString, ... ) PRINTFLIKE(2, 3);
-
-extern void
-_mesa_problem( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3);
-
-extern void
-_mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... ) PRINTFLIKE(3, 4);
-
-extern void
-_mesa_debug( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3);
-
-
-#if defined(_MSC_VER) && !defined(snprintf)
-#define snprintf _snprintf
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* IMPORTS_H */
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.5
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file imports.h
+ * Standard C library function wrappers.
+ *
+ * This file provides wrappers for all the standard C library functions
+ * like malloc(), free(), printf(), getenv(), etc.
+ */
+
+
+#ifndef IMPORTS_H
+#define IMPORTS_H
+
+
+#include "compiler.h"
+#include "glheader.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**********************************************************************/
+/** Memory macros */
+/*@{*/
+
+/** Allocate \p BYTES bytes */
+#define MALLOC(BYTES)      malloc(BYTES)
+/** Allocate and zero \p BYTES bytes */
+#define CALLOC(BYTES)      calloc(1, BYTES)
+/** Allocate a structure of type \p T */
+#define MALLOC_STRUCT(T)   (struct T *) malloc(sizeof(struct T))
+/** Allocate and zero a structure of type \p T */
+#define CALLOC_STRUCT(T)   (struct T *) calloc(1, sizeof(struct T))
+/** Free memory */
+#define FREE(PTR)          free(PTR)
+
+/*@}*/
+
+
+/*
+ * For GL_ARB_vertex_buffer_object we need to treat vertex array pointers
+ * as offsets into buffer stores.  Since the vertex array pointer and
+ * buffer store pointer are both pointers and we need to add them, we use
+ * this macro.
+ * Both pointers/offsets are expressed in bytes.
+ */
+#define ADD_POINTERS(A, B)  ( (GLubyte *) (A) + (uintptr_t) (B) )
+
+
+/**
+ * Sometimes we treat GLfloats as GLints.  On x86 systems, moving a float
+ * as a int (thereby using integer registers instead of FP registers) is
+ * a performance win.  Typically, this can be done with ordinary casts.
+ * But with gcc's -fstrict-aliasing flag (which defaults to on in gcc 3.0)
+ * these casts generate warnings.
+ * The following union typedef is used to solve that.
+ */
+typedef union { GLfloat f; GLint i; } fi_type;
+
+
+
+/**********************************************************************
+ * Math macros
+ */
+
+#define MAX_GLUSHORT	0xffff
+#define MAX_GLUINT	0xffffffff
+
+/* Degrees to radians conversion: */
+#define DEG2RAD (M_PI/180.0)
+
+
+/***
+ *** SQRTF: single-precision square root
+ ***/
+#if 0 /* _mesa_sqrtf() not accurate enough - temporarily disabled */
+#  define SQRTF(X)  _mesa_sqrtf(X)
+#else
+#  define SQRTF(X)  (float) sqrt((float) (X))
+#endif
+
+
+/***
+ *** INV_SQRTF: single-precision inverse square root
+ ***/
+#if 0
+#define INV_SQRTF(X) _mesa_inv_sqrt(X)
+#else
+#define INV_SQRTF(X) (1.0F / SQRTF(X))  /* this is faster on a P4 */
+#endif
+
+
+/**
+ * \name Work-arounds for platforms that lack C99 math functions
+ */
+/*@{*/
+#if (!defined(_XOPEN_SOURCE) || (_XOPEN_SOURCE < 600)) && !defined(_ISOC99_SOURCE) \
+   && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)) \
+   && (!defined(_MSC_VER) || (_MSC_VER < 1400))
+#define acosf(f) ((float) acos(f))
+#define asinf(f) ((float) asin(f))
+#define atan2f(x,y) ((float) atan2(x,y))
+#define atanf(f) ((float) atan(f))
+#define cielf(f) ((float) ciel(f))
+#define cosf(f) ((float) cos(f))
+#define coshf(f) ((float) cosh(f))
+#define expf(f) ((float) exp(f))
+#define exp2f(f) ((float) exp2(f))
+#define floorf(f) ((float) floor(f))
+#define logf(f) ((float) log(f))
+#define log2f(f) ((float) log2(f))
+#define powf(x,y) ((float) pow(x,y))
+#define sinf(f) ((float) sin(f))
+#define sinhf(f) ((float) sinh(f))
+#define sqrtf(f) ((float) sqrt(f))
+#define tanf(f) ((float) tan(f))
+#define tanhf(f) ((float) tanh(f))
+#define acoshf(f) ((float) acosh(f))
+#define asinhf(f) ((float) asinh(f))
+#define atanhf(f) ((float) atanh(f))
+#endif
+
+#if defined(_MSC_VER)
+static INLINE float truncf(float x) { return x < 0.0f ? ceilf(x) : floorf(x); }
+static INLINE float exp2f(float x) { return powf(2.0f, x); }
+static INLINE float log2f(float x) { return logf(x) * 1.442695041f; }
+static INLINE float asinhf(float x) { return logf(x + sqrtf(x * x + 1.0f)); }
+static INLINE float acoshf(float x) { return logf(x + sqrtf(x * x - 1.0f)); }
+static INLINE float atanhf(float x) { return (logf(1.0f + x) - logf(1.0f - x)) / 2.0f; }
+static INLINE int isblank(int ch) { return ch == ' ' || ch == '\t'; }
+#define strtoll(p, e, b) _strtoi64(p, e, b)
+#endif
+/*@}*/
+
+/***
+ *** LOG2: Log base 2 of float
+ ***/
+#ifdef USE_IEEE
+#if 0
+/* This is pretty fast, but not accurate enough (only 2 fractional bits).
+ * Based on code from http://www.stereopsis.com/log2.html
+ */
+static INLINE GLfloat LOG2(GLfloat x)
+{
+   const GLfloat y = x * x * x * x;
+   const GLuint ix = *((GLuint *) &y);
+   const GLuint exp = (ix >> 23) & 0xFF;
+   const GLint log2 = ((GLint) exp) - 127;
+   return (GLfloat) log2 * (1.0 / 4.0);  /* 4, because of x^4 above */
+}
+#endif
+/* Pretty fast, and accurate.
+ * Based on code from http://www.flipcode.com/totd/
+ */
+static INLINE GLfloat LOG2(GLfloat val)
+{
+   fi_type num;
+   GLint log_2;
+   num.f = val;
+   log_2 = ((num.i >> 23) & 255) - 128;
+   num.i &= ~(255 << 23);
+   num.i += 127 << 23;
+   num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3;
+   return num.f + log_2;
+}
+#else
+/*
+ * NOTE: log_base_2(x) = log(x) / log(2)
+ * NOTE: 1.442695 = 1/log(2).
+ */
+#define LOG2(x)  ((GLfloat) (log(x) * 1.442695F))
+#endif
+
+
+/***
+ *** IS_INF_OR_NAN: test if float is infinite or NaN
+ ***/
+#ifdef USE_IEEE
+static INLINE int IS_INF_OR_NAN( float x )
+{
+   fi_type tmp;
+   tmp.f = x;
+   return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31);
+}
+#elif defined(isfinite)
+#define IS_INF_OR_NAN(x)        (!isfinite(x))
+#elif defined(finite)
+#define IS_INF_OR_NAN(x)        (!finite(x))
+#elif defined(__VMS)
+#define IS_INF_OR_NAN(x)        (!finite(x))
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define IS_INF_OR_NAN(x)        (!isfinite(x))
+#else
+#define IS_INF_OR_NAN(x)        (!finite(x))
+#endif
+
+
+/***
+ *** IS_NEGATIVE: test if float is negative
+ ***/
+#if defined(USE_IEEE)
+static INLINE int GET_FLOAT_BITS( float x )
+{
+   fi_type fi;
+   fi.f = x;
+   return fi.i;
+}
+#define IS_NEGATIVE(x) (GET_FLOAT_BITS(x) < 0)
+#else
+#define IS_NEGATIVE(x) (x < 0.0F)
+#endif
+
+
+/***
+ *** DIFFERENT_SIGNS: test if two floats have opposite signs
+ ***/
+#if defined(USE_IEEE)
+#define DIFFERENT_SIGNS(x,y) ((GET_FLOAT_BITS(x) ^ GET_FLOAT_BITS(y)) & (1<<31))
+#else
+/* Could just use (x*y<0) except for the flatshading requirements.
+ * Maybe there's a better way?
+ */
+#define DIFFERENT_SIGNS(x,y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
+#endif
+
+
+/***
+ *** CEILF: ceiling of float
+ *** FLOORF: floor of float
+ *** FABSF: absolute value of float
+ *** LOGF: the natural logarithm (base e) of the value
+ *** EXPF: raise e to the value
+ *** LDEXPF: multiply value by an integral power of two
+ *** FREXPF: extract mantissa and exponent from value
+ ***/
+#if defined(__gnu_linux__)
+/* C99 functions */
+#define CEILF(x)   ceilf(x)
+#define FLOORF(x)  floorf(x)
+#define FABSF(x)   fabsf(x)
+#define LOGF(x)    logf(x)
+#define EXPF(x)    expf(x)
+#define LDEXPF(x,y)  ldexpf(x,y)
+#define FREXPF(x,y)  frexpf(x,y)
+#else
+#define CEILF(x)   ((GLfloat) ceil(x))
+#define FLOORF(x)  ((GLfloat) floor(x))
+#define FABSF(x)   ((GLfloat) fabs(x))
+#define LOGF(x)    ((GLfloat) log(x))
+#define EXPF(x)    ((GLfloat) exp(x))
+#define LDEXPF(x,y)  ((GLfloat) ldexp(x,y))
+#define FREXPF(x,y)  ((GLfloat) frexp(x,y))
+#endif
+
+
+/***
+ *** IROUND: return (as an integer) float rounded to nearest integer
+ ***/
+#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
+static INLINE int iround(float f)
+{
+   int r;
+   __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
+   return r;
+}
+#define IROUND(x)  iround(x)
+#elif defined(USE_X86_ASM) && defined(_MSC_VER)
+static INLINE int iround(float f)
+{
+   int r;
+   _asm {
+	 fld f
+	 fistp r
+	}
+   return r;
+}
+#define IROUND(x)  iround(x)
+#elif defined(__WATCOMC__) && defined(__386__)
+long iround(float f);
+#pragma aux iround =                    \
+	"push   eax"                        \
+	"fistp  dword ptr [esp]"            \
+	"pop    eax"                        \
+	parm [8087]                         \
+	value [eax]                         \
+	modify exact [eax];
+#define IROUND(x)  iround(x)
+#else
+#define IROUND(f)  ((int) (((f) >= 0.0F) ? ((f) + 0.5F) : ((f) - 0.5F)))
+#endif
+
+#define IROUND64(f)  ((GLint64) (((f) >= 0.0F) ? ((f) + 0.5F) : ((f) - 0.5F)))
+
+/***
+ *** IROUND_POS: return (as an integer) positive float rounded to nearest int
+ ***/
+#ifdef DEBUG
+#define IROUND_POS(f) (assert((f) >= 0.0F), IROUND(f))
+#else
+#define IROUND_POS(f) (IROUND(f))
+#endif
+
+
+/***
+ *** IFLOOR: return (as an integer) floor of float
+ ***/
+#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
+/*
+ * IEEE floor for computers that round to nearest or even.
+ * 'f' must be between -4194304 and 4194303.
+ * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
+ * but uses some IEEE specific tricks for better speed.
+ * Contributed by Josh Vanderhoof
+ */
+static INLINE int ifloor(float f)
+{
+   int ai, bi;
+   double af, bf;
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   /* GCC generates an extra fstp/fld without this. */
+   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
+   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
+   return (ai - bi) >> 1;
+}
+#define IFLOOR(x)  ifloor(x)
+#elif defined(USE_IEEE)
+static INLINE int ifloor(float f)
+{
+   int ai, bi;
+   double af, bf;
+   fi_type u;
+
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   u.f = (float) af;  ai = u.i;
+   u.f = (float) bf;  bi = u.i;
+   return (ai - bi) >> 1;
+}
+#define IFLOOR(x)  ifloor(x)
+#else
+static INLINE int ifloor(float f)
+{
+   int i = IROUND(f);
+   return (i > f) ? i - 1 : i;
+}
+#define IFLOOR(x)  ifloor(x)
+#endif
+
+
+/***
+ *** ICEIL: return (as an integer) ceiling of float
+ ***/
+#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
+/*
+ * IEEE ceil for computers that round to nearest or even.
+ * 'f' must be between -4194304 and 4194303.
+ * This ceil operation is done by "(iround(f + .5) + iround(f - .5) + 1) >> 1",
+ * but uses some IEEE specific tricks for better speed.
+ * Contributed by Josh Vanderhoof
+ */
+static INLINE int iceil(float f)
+{
+   int ai, bi;
+   double af, bf;
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   /* GCC generates an extra fstp/fld without this. */
+   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
+   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
+   return (ai - bi + 1) >> 1;
+}
+#define ICEIL(x)  iceil(x)
+#elif defined(USE_IEEE)
+static INLINE int iceil(float f)
+{
+   int ai, bi;
+   double af, bf;
+   fi_type u;
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   u.f = (float) af; ai = u.i;
+   u.f = (float) bf; bi = u.i;
+   return (ai - bi + 1) >> 1;
+}
+#define ICEIL(x)  iceil(x)
+#else
+static INLINE int iceil(float f)
+{
+   int i = IROUND(f);
+   return (i < f) ? i + 1 : i;
+}
+#define ICEIL(x)  iceil(x)
+#endif
+
+
+/**
+ * Is x a power of two?
+ */
+static INLINE int
+_mesa_is_pow_two(int x)
+{
+   return !(x & (x - 1));
+}
+
+/**
+ * Round given integer to next higer power of two
+ * If X is zero result is undefined.
+ *
+ * Source for the fallback implementation is
+ * Sean Eron Anderson's webpage "Bit Twiddling Hacks"
+ * http://graphics.stanford.edu/~seander/bithacks.html
+ *
+ * When using builtin function have to do some work
+ * for case when passed values 1 to prevent hiting
+ * undefined result from __builtin_clz. Undefined
+ * results would be different depending on optimization
+ * level used for build.
+ */
+static INLINE int32_t
+_mesa_next_pow_two_32(uint32_t x)
+{
+#if defined(__GNUC__) && \
+	((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+	uint32_t y = (x != 1);
+	return (1 + y) << ((__builtin_clz(x - y) ^ 31) );
+#else
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x++;
+	return x;
+#endif
+}
+
+static INLINE int64_t
+_mesa_next_pow_two_64(uint64_t x)
+{
+#if defined(__GNUC__) && \
+	((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+	uint64_t y = (x != 1);
+	if (sizeof(x) == sizeof(long))
+		return (1 + y) << ((__builtin_clzl(x - y) ^ 63));
+	else
+		return (1 + y) << ((__builtin_clzll(x - y) ^ 63));
+#else
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x |= x >> 32;
+	x++;
+	return x;
+#endif
+}
+
+
+/**
+ * Return 1 if this is a little endian machine, 0 if big endian.
+ */
+static INLINE GLboolean
+_mesa_little_endian(void)
+{
+   const GLuint ui = 1; /* intentionally not static */
+   return *((const GLubyte *) &ui);
+}
+
+
+
+/**********************************************************************
+ * Functions
+ */
+
+extern void *
+_mesa_align_malloc( size_t bytes, unsigned long alignment );
+
+extern void *
+_mesa_align_calloc( size_t bytes, unsigned long alignment );
+
+extern void
+_mesa_align_free( void *ptr );
+
+extern void *
+_mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize,
+                    unsigned long alignment);
+
+extern void *
+_mesa_exec_malloc( GLuint size );
+
+extern void 
+_mesa_exec_free( void *addr );
+
+extern void *
+_mesa_realloc( void *oldBuffer, size_t oldSize, size_t newSize );
+
+extern void
+_mesa_memset16( unsigned short *dst, unsigned short val, size_t n );
+
+extern double
+_mesa_sqrtd(double x);
+
+extern float
+_mesa_sqrtf(float x);
+
+extern float
+_mesa_inv_sqrtf(float x);
+
+extern void
+_mesa_init_sqrt_table(void);
+
+#ifdef __GNUC__
+#define _mesa_ffs(i)  ffs(i)
+#define _mesa_ffsll(i)  ffsll(i)
+
+#if ((_GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+#define _mesa_bitcount(i) __builtin_popcount(i)
+#else
+extern unsigned int
+_mesa_bitcount(unsigned int n);
+#endif
+
+#else
+extern int
+_mesa_ffs(int32_t i);
+
+extern int
+_mesa_ffsll(int64_t i);
+
+extern unsigned int
+_mesa_bitcount(unsigned int n);
+#endif
+
+extern GLhalfARB
+_mesa_float_to_half(float f);
+
+extern float
+_mesa_half_to_float(GLhalfARB h);
+
+
+extern void *
+_mesa_bsearch( const void *key, const void *base, size_t nmemb, size_t size, 
+               int (*compar)(const void *, const void *) );
+
+extern char *
+_mesa_getenv( const char *var );
+
+extern char *
+_mesa_strdup( const char *s );
+
+extern float
+_mesa_strtof( const char *s, char **end );
+
+extern unsigned int
+_mesa_str_checksum(const char *str);
+
+extern int
+_mesa_snprintf( char *str, size_t size, const char *fmt, ... ) PRINTFLIKE(3, 4);
+
+struct gl_context;
+
+extern void
+_mesa_warning( struct gl_context *gc, const char *fmtString, ... ) PRINTFLIKE(2, 3);
+
+extern void
+_mesa_problem( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3);
+
+extern void
+_mesa_error( struct gl_context *ctx, GLenum error, const char *fmtString, ... ) PRINTFLIKE(3, 4);
+
+extern void
+_mesa_debug( const struct gl_context *ctx, const char *fmtString, ... ) PRINTFLIKE(2, 3);
+
+
+#if defined(_MSC_VER) && !defined(snprintf)
+#define snprintf _snprintf
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* IMPORTS_H */
diff --git a/mesalib/src/mesa/program/sampler.cpp b/mesalib/src/mesa/program/sampler.cpp
index 12c4a40a2..1457d1199 100644
--- a/mesalib/src/mesa/program/sampler.cpp
+++ b/mesalib/src/mesa/program/sampler.cpp
@@ -23,7 +23,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <cstdio>
 #include "ir.h"
 #include "glsl_types.h"
 #include "ir_visitor.h"
diff --git a/mesalib/src/mesa/state_tracker/st_atom_sampler.c b/mesalib/src/mesa/state_tracker/st_atom_sampler.c
index c9dfe248f..474cbd589 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_sampler.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_sampler.c
@@ -1,270 +1,228 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  *   Brian Paul
-  */
- 
-
-#include "main/macros.h"
-
-#include "st_context.h"
-#include "st_cb_texture.h"
-#include "st_atom.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-
-#include "cso_cache/cso_context.h"
-
-
-/**
- * Convert GLenum texcoord wrap tokens to pipe tokens.
- */
-static GLuint
-gl_wrap_xlate(GLenum wrap)
-{
-   switch (wrap) {
-   case GL_REPEAT:
-      return PIPE_TEX_WRAP_REPEAT;
-   case GL_CLAMP:
-      return PIPE_TEX_WRAP_CLAMP;
-   case GL_CLAMP_TO_EDGE:
-      return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   case GL_CLAMP_TO_BORDER:
-      return PIPE_TEX_WRAP_CLAMP_TO_BORDER;
-   case GL_MIRRORED_REPEAT:
-      return PIPE_TEX_WRAP_MIRROR_REPEAT;
-   case GL_MIRROR_CLAMP_EXT:
-      return PIPE_TEX_WRAP_MIRROR_CLAMP;
-   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
-      return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
-   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
-      return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER;
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-
-static GLuint
-gl_filter_to_mip_filter(GLenum filter)
-{
-   switch (filter) {
-   case GL_NEAREST:
-   case GL_LINEAR:
-      return PIPE_TEX_MIPFILTER_NONE;
-
-   case GL_NEAREST_MIPMAP_NEAREST:
-   case GL_LINEAR_MIPMAP_NEAREST:
-      return PIPE_TEX_MIPFILTER_NEAREST;
-
-   case GL_NEAREST_MIPMAP_LINEAR:
-   case GL_LINEAR_MIPMAP_LINEAR:
-      return PIPE_TEX_MIPFILTER_LINEAR;
-
-   default:
-      assert(0);
-      return PIPE_TEX_MIPFILTER_NONE;
-   }
-}
-
-
-static GLuint
-gl_filter_to_img_filter(GLenum filter)
-{
-   switch (filter) {
-   case GL_NEAREST:
-   case GL_NEAREST_MIPMAP_NEAREST:
-   case GL_NEAREST_MIPMAP_LINEAR:
-      return PIPE_TEX_FILTER_NEAREST;
-
-   case GL_LINEAR:
-   case GL_LINEAR_MIPMAP_NEAREST:
-   case GL_LINEAR_MIPMAP_LINEAR:
-      return PIPE_TEX_FILTER_LINEAR;
-
-   default:
-      assert(0);
-      return PIPE_TEX_FILTER_NEAREST;
-   }
-}
-
-
-static void
-xlate_border_color(const GLfloat *colorIn, GLenum baseFormat, GLfloat *colorOut)
-{
-   switch (baseFormat) {
-   case GL_RED:
-      colorOut[0] = colorIn[0];
-      colorOut[1] = 0.0F;
-      colorOut[2] = 0.0F;
-      colorOut[3] = 1.0F;
-      break;
-   case GL_RG:
-      colorOut[0] = colorIn[0];
-      colorOut[1] = colorIn[1];
-      colorOut[2] = 0.0F;
-      colorOut[3] = 1.0F;
-      break;
-   case GL_RGB:
-      colorOut[0] = colorIn[0];
-      colorOut[1] = colorIn[1];
-      colorOut[2] = colorIn[2];
-      colorOut[3] = 1.0F;
-      break;
-   case GL_ALPHA:
-      colorOut[0] = colorOut[1] = colorOut[2] = 0.0;
-      colorOut[3] = colorIn[3];
-      break;
-   case GL_LUMINANCE:
-      colorOut[0] = colorOut[1] = colorOut[2] = colorIn[0];
-      colorOut[3] = 1.0;
-      break;
-   case GL_LUMINANCE_ALPHA:
-      colorOut[0] = colorOut[1] = colorOut[2] = colorIn[0];
-      colorOut[3] = colorIn[3];
-      break;
-   case GL_INTENSITY:
-      colorOut[0] = colorOut[1] = colorOut[2] = colorOut[3] = colorIn[0];
-      break;
-   default:
-      COPY_4V(colorOut, colorIn);
-   }
-}
-
-
-static void 
-update_samplers(struct st_context *st)
-{
-   struct gl_vertex_program *vprog = st->ctx->VertexProgram._Current;
-   struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
-   const GLbitfield samplersUsed = (vprog->Base.SamplersUsed |
-                                    fprog->Base.SamplersUsed);
-   GLuint su;
-
-   st->state.num_samplers = 0;
-
-   /* loop over sampler units (aka tex image units) */
-   for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) {
-      struct pipe_sampler_state *sampler = st->state.samplers + su;
-
-      memset(sampler, 0, sizeof(*sampler));
-
-      if (samplersUsed & (1 << su)) {
-         struct gl_texture_object *texobj;
-         struct gl_texture_image *teximg;
-         GLuint texUnit;
-
-         if (fprog->Base.SamplersUsed & (1 << su))
-            texUnit = fprog->Base.SamplerUnits[su];
-         else
-            texUnit = vprog->Base.SamplerUnits[su];
-
-         texobj = st->ctx->Texture.Unit[texUnit]._Current;
-         if (!texobj) {
-            texobj = st_get_default_texture(st);
-         }
-
-         teximg = texobj->Image[0][texobj->BaseLevel];
-
-         sampler->wrap_s = gl_wrap_xlate(texobj->WrapS);
-         sampler->wrap_t = gl_wrap_xlate(texobj->WrapT);
-         sampler->wrap_r = gl_wrap_xlate(texobj->WrapR);
-
-         sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter);
-         sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter);
-         sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter);
-
-         if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
-            sampler->normalized_coords = 1;
-
-         sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias;
-
-         sampler->min_lod = texobj->BaseLevel + texobj->MinLod;
-         if (sampler->min_lod < texobj->BaseLevel)
-            sampler->min_lod = texobj->BaseLevel;
-
-         sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel,
-                                 (texobj->MaxLod + texobj->BaseLevel));
-         if (sampler->max_lod < sampler->min_lod) {
-            /* The GL spec doesn't seem to specify what to do in this case.
-             * Swap the values.
-             */
-            float tmp = sampler->max_lod;
-            sampler->max_lod = sampler->min_lod;
-            sampler->min_lod = tmp;
-            assert(sampler->min_lod <= sampler->max_lod);
-         }
-
-         xlate_border_color(texobj->BorderColor.f,
-                            teximg ? teximg->_BaseFormat : GL_RGBA,
-                            sampler->border_color);
-
-	 sampler->max_anisotropy = (texobj->MaxAnisotropy == 1.0 ? 0 : (GLuint)texobj->MaxAnisotropy);
-
-         /* only care about ARB_shadow, not SGI shadow */
-         if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
-            sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
-            sampler->compare_func
-               = st_compare_func_to_pipe(texobj->CompareFunc);
-         }
-
-         st->state.num_samplers = su + 1;
-
-         /*printf("%s su=%u non-null\n", __FUNCTION__, su);*/
-         cso_single_sampler(st->cso_context, su, sampler);
-         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
-            cso_single_vertex_sampler(st->cso_context, su, sampler);
-         }
-      }
-      else {
-         /*printf("%s su=%u null\n", __FUNCTION__, su);*/
-         cso_single_sampler(st->cso_context, su, NULL);
-         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
-            cso_single_vertex_sampler(st->cso_context, su, NULL);
-         }
-      }
-   }
-
-   cso_single_sampler_done(st->cso_context);
-   if (st->ctx->Const.MaxVertexTextureImageUnits > 0) {
-      cso_single_vertex_sampler_done(st->cso_context);
-   }
-}
-
-
-const struct st_tracked_state st_update_sampler = {
-   "st_update_sampler",					/* name */
-   {							/* dirty */
-      _NEW_TEXTURE,					/* mesa */
-      0,						/* st */
-   },
-   update_samplers					/* update */
-};
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Brian Paul
+  */
+ 
+
+#include "main/macros.h"
+
+#include "st_context.h"
+#include "st_cb_texture.h"
+#include "st_format.h"
+#include "st_atom.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+
+#include "cso_cache/cso_context.h"
+
+
+/**
+ * Convert GLenum texcoord wrap tokens to pipe tokens.
+ */
+static GLuint
+gl_wrap_xlate(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return PIPE_TEX_WRAP_REPEAT;
+   case GL_CLAMP:
+      return PIPE_TEX_WRAP_CLAMP;
+   case GL_CLAMP_TO_EDGE:
+      return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   case GL_CLAMP_TO_BORDER:
+      return PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return PIPE_TEX_WRAP_MIRROR_REPEAT;
+   case GL_MIRROR_CLAMP_EXT:
+      return PIPE_TEX_WRAP_MIRROR_CLAMP;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+
+static GLuint
+gl_filter_to_mip_filter(GLenum filter)
+{
+   switch (filter) {
+   case GL_NEAREST:
+   case GL_LINEAR:
+      return PIPE_TEX_MIPFILTER_NONE;
+
+   case GL_NEAREST_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_NEAREST:
+      return PIPE_TEX_MIPFILTER_NEAREST;
+
+   case GL_NEAREST_MIPMAP_LINEAR:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      return PIPE_TEX_MIPFILTER_LINEAR;
+
+   default:
+      assert(0);
+      return PIPE_TEX_MIPFILTER_NONE;
+   }
+}
+
+
+static GLuint
+gl_filter_to_img_filter(GLenum filter)
+{
+   switch (filter) {
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+   case GL_NEAREST_MIPMAP_LINEAR:
+      return PIPE_TEX_FILTER_NEAREST;
+
+   case GL_LINEAR:
+   case GL_LINEAR_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      return PIPE_TEX_FILTER_LINEAR;
+
+   default:
+      assert(0);
+      return PIPE_TEX_FILTER_NEAREST;
+   }
+}
+
+
+static void 
+update_samplers(struct st_context *st)
+{
+   struct gl_vertex_program *vprog = st->ctx->VertexProgram._Current;
+   struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
+   const GLbitfield samplersUsed = (vprog->Base.SamplersUsed |
+                                    fprog->Base.SamplersUsed);
+   GLuint su;
+
+   st->state.num_samplers = 0;
+
+   /* loop over sampler units (aka tex image units) */
+   for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) {
+      struct pipe_sampler_state *sampler = st->state.samplers + su;
+
+      memset(sampler, 0, sizeof(*sampler));
+
+      if (samplersUsed & (1 << su)) {
+         struct gl_texture_object *texobj;
+         struct gl_texture_image *teximg;
+         GLuint texUnit;
+
+         if (fprog->Base.SamplersUsed & (1 << su))
+            texUnit = fprog->Base.SamplerUnits[su];
+         else
+            texUnit = vprog->Base.SamplerUnits[su];
+
+         texobj = st->ctx->Texture.Unit[texUnit]._Current;
+         if (!texobj) {
+            texobj = st_get_default_texture(st);
+         }
+
+         teximg = texobj->Image[0][texobj->BaseLevel];
+
+         sampler->wrap_s = gl_wrap_xlate(texobj->WrapS);
+         sampler->wrap_t = gl_wrap_xlate(texobj->WrapT);
+         sampler->wrap_r = gl_wrap_xlate(texobj->WrapR);
+
+         sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter);
+         sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter);
+         sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter);
+
+         if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
+            sampler->normalized_coords = 1;
+
+         sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias;
+
+         sampler->min_lod = texobj->BaseLevel + texobj->MinLod;
+         if (sampler->min_lod < texobj->BaseLevel)
+            sampler->min_lod = texobj->BaseLevel;
+
+         sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel,
+                                 (texobj->MaxLod + texobj->BaseLevel));
+         if (sampler->max_lod < sampler->min_lod) {
+            /* The GL spec doesn't seem to specify what to do in this case.
+             * Swap the values.
+             */
+            float tmp = sampler->max_lod;
+            sampler->max_lod = sampler->min_lod;
+            sampler->min_lod = tmp;
+            assert(sampler->min_lod <= sampler->max_lod);
+         }
+
+         st_translate_color(texobj->BorderColor.f,
+                            teximg ? teximg->_BaseFormat : GL_RGBA,
+                            sampler->border_color);
+
+	 sampler->max_anisotropy = (texobj->MaxAnisotropy == 1.0 ? 0 : (GLuint)texobj->MaxAnisotropy);
+
+         /* only care about ARB_shadow, not SGI shadow */
+         if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
+            sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
+            sampler->compare_func
+               = st_compare_func_to_pipe(texobj->CompareFunc);
+         }
+
+         st->state.num_samplers = su + 1;
+
+         /*printf("%s su=%u non-null\n", __FUNCTION__, su);*/
+         cso_single_sampler(st->cso_context, su, sampler);
+         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
+            cso_single_vertex_sampler(st->cso_context, su, sampler);
+         }
+      }
+      else {
+         /*printf("%s su=%u null\n", __FUNCTION__, su);*/
+         cso_single_sampler(st->cso_context, su, NULL);
+         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
+            cso_single_vertex_sampler(st->cso_context, su, NULL);
+         }
+      }
+   }
+
+   cso_single_sampler_done(st->cso_context);
+   if (st->ctx->Const.MaxVertexTextureImageUnits > 0) {
+      cso_single_vertex_sampler_done(st->cso_context);
+   }
+}
+
+
+const struct st_tracked_state st_update_sampler = {
+   "st_update_sampler",					/* name */
+   {							/* dirty */
+      _NEW_TEXTURE,					/* mesa */
+      0,						/* st */
+   },
+   update_samplers					/* update */
+};
diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c
index d2e0cd73c..0e0c4326e 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_clear.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c
@@ -42,6 +42,7 @@
 #include "st_cb_accum.h"
 #include "st_cb_clear.h"
 #include "st_cb_fbo.h"
+#include "st_format.h"
 #include "st_program.h"
 
 #include "pipe/p_context.h"
@@ -204,6 +205,7 @@ clear_with_quad(struct gl_context *ctx,
    const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
    const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
    const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
+   float clearColor[4];
 
    /*
    printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, 
@@ -298,9 +300,14 @@ clear_with_quad(struct gl_context *ctx,
    cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
    cso_set_vertex_shader_handle(st->cso_context, st->clear.vs);
 
-   /* draw quad matching scissor rect (XXX verify coord round-off) */
-   draw_quad(st, x0, y0, x1, y1,
-             (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor);
+   if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
+      st_translate_color(ctx->Color.ClearColor,
+                         ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
+                         clearColor);
+   }
+
+   /* draw quad matching scissor rect */
+   draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, clearColor);
 
    /* Restore pipe state */
    cso_restore_blend(st->cso_context);
@@ -541,12 +548,21 @@ st_Clear(struct gl_context *ctx, GLbitfield mask)
        * required from the visual. Hence fix this up to avoid potential
        * read-modify-write in the driver.
        */
+      float clearColor[4];
+
       if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) &&
           ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
           (depthRb == stencilRb) &&
           (ctx->DrawBuffer->Visual.depthBits == 0 ||
            ctx->DrawBuffer->Visual.stencilBits == 0))
          clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
+
+      if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
+         st_translate_color(ctx->Color.ClearColor,
+                            ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
+                            clearColor);
+      }
+
       st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
                       ctx->Depth.Clear, ctx->Stencil.Clear);
    }
diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c
index c0d1bd94a..a40a79bb8 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c
@@ -1,1878 +1,1918 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "main/mfeatures.h"
-#include "main/bufferobj.h"
-#include "main/enums.h"
-#include "main/fbobject.h"
-#include "main/formats.h"
-#include "main/image.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "main/mipmap.h"
-#include "main/pack.h"
-#include "main/pixeltransfer.h"
-#include "main/texcompress.h"
-#include "main/texfetch.h"
-#include "main/texgetimage.h"
-#include "main/teximage.h"
-#include "main/texobj.h"
-#include "main/texstore.h"
-
-#include "state_tracker/st_debug.h"
-#include "state_tracker/st_context.h"
-#include "state_tracker/st_cb_fbo.h"
-#include "state_tracker/st_cb_flush.h"
-#include "state_tracker/st_cb_texture.h"
-#include "state_tracker/st_format.h"
-#include "state_tracker/st_texture.h"
-#include "state_tracker/st_gen_mipmap.h"
-#include "state_tracker/st_atom.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_tile.h"
-#include "util/u_blit.h"
-#include "util/u_format.h"
-#include "util/u_surface.h"
-#include "util/u_sampler.h"
-#include "util/u_math.h"
-#include "util/u_box.h"
-
-#define DBG if (0) printf
-
-
-static enum pipe_texture_target
-gl_target_to_pipe(GLenum target)
-{
-   switch (target) {
-   case GL_TEXTURE_1D:
-      return PIPE_TEXTURE_1D;
-   case GL_TEXTURE_2D:
-      return PIPE_TEXTURE_2D;
-   case GL_TEXTURE_RECTANGLE_NV:
-      return PIPE_TEXTURE_RECT;
-   case GL_TEXTURE_3D:
-      return PIPE_TEXTURE_3D;
-   case GL_TEXTURE_CUBE_MAP_ARB:
-      return PIPE_TEXTURE_CUBE;
-   case GL_TEXTURE_1D_ARRAY_EXT:
-      return PIPE_TEXTURE_1D_ARRAY;
-   case GL_TEXTURE_2D_ARRAY_EXT:
-      return PIPE_TEXTURE_2D_ARRAY;
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-
-/** called via ctx->Driver.NewTextureImage() */
-static struct gl_texture_image *
-st_NewTextureImage(struct gl_context * ctx)
-{
-   DBG("%s\n", __FUNCTION__);
-   (void) ctx;
-   return (struct gl_texture_image *) ST_CALLOC_STRUCT(st_texture_image);
-}
-
-
-/** called via ctx->Driver.NewTextureObject() */
-static struct gl_texture_object *
-st_NewTextureObject(struct gl_context * ctx, GLuint name, GLenum target)
-{
-   struct st_texture_object *obj = ST_CALLOC_STRUCT(st_texture_object);
-
-   DBG("%s\n", __FUNCTION__);
-   _mesa_initialize_texture_object(&obj->base, name, target);
-
-   return &obj->base;
-}
-
-/** called via ctx->Driver.DeleteTextureObject() */
-static void 
-st_DeleteTextureObject(struct gl_context *ctx,
-                       struct gl_texture_object *texObj)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_object *stObj = st_texture_object(texObj);
-   if (stObj->pt)
-      pipe_resource_reference(&stObj->pt, NULL);
-   if (stObj->sampler_view) {
-      if (stObj->sampler_view->context != st->pipe) {
-         /* Take "ownership" of this texture sampler view by setting
-          * its context pointer to this context.  This avoids potential
-          * crashes when the texture object is shared among contexts
-          * and the original/owner context has already been destroyed.
-          */
-         stObj->sampler_view->context = st->pipe;
-      }
-      pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-   }
-   _mesa_delete_texture_object(ctx, texObj);
-}
-
-
-/** called via ctx->Driver.FreeTexImageData() */
-static void
-st_FreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texImage)
-{
-   struct st_texture_image *stImage = st_texture_image(texImage);
-
-   DBG("%s\n", __FUNCTION__);
-
-   if (stImage->pt) {
-      pipe_resource_reference(&stImage->pt, NULL);
-   }
-
-   if (texImage->Data) {
-      _mesa_align_free(texImage->Data);
-      texImage->Data = NULL;
-   }
-}
-
-
-/**
- * From linux kernel i386 header files, copes with odd sizes better
- * than COPY_DWORDS would:
- * XXX Put this in src/mesa/main/imports.h ???
- */
-#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
-static INLINE void *
-__memcpy(void *to, const void *from, size_t n)
-{
-   int d0, d1, d2;
-   __asm__ __volatile__("rep ; movsl\n\t"
-                        "testb $2,%b4\n\t"
-                        "je 1f\n\t"
-                        "movsw\n"
-                        "1:\ttestb $1,%b4\n\t"
-                        "je 2f\n\t"
-                        "movsb\n" "2:":"=&c"(d0), "=&D"(d1), "=&S"(d2)
-                        :"0"(n / 4), "q"(n), "1"((long) to), "2"((long) from)
-                        :"memory");
-   return (to);
-}
-#else
-#define __memcpy(a,b,c) memcpy(a,b,c)
-#endif
-
-
-/**
- * The system memcpy (at least on ubuntu 5.10) has problems copying
- * to agp (writecombined) memory from a source which isn't 64-byte
- * aligned - there is a 4x performance falloff.
- *
- * The x86 __memcpy is immune to this but is slightly slower
- * (10%-ish) than the system memcpy.
- *
- * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
- * isn't much faster than x86_memcpy for agp copies.
- * 
- * TODO: switch dynamically.
- */
-static void *
-do_memcpy(void *dest, const void *src, size_t n)
-{
-   if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) {
-      return __memcpy(dest, src, n);
-   }
-   else
-      return memcpy(dest, src, n);
-}
-
-
-/**
- * Return default texture resource binding bitmask for the given format.
- */
-static GLuint
-default_bindings(struct st_context *st, enum pipe_format format)
-{
-   struct pipe_screen *screen = st->pipe->screen;
-   const unsigned target = PIPE_TEXTURE_2D;
-   const unsigned geom = 0x0;
-   unsigned bindings;
-
-   if (util_format_is_depth_or_stencil(format))
-      bindings = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL;
-   else
-      bindings = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-
-   if (screen->is_format_supported(screen, format, target, 0, bindings, geom))
-      return bindings;
-   else
-      return PIPE_BIND_SAMPLER_VIEW;
-}
-
-
-/** Return number of image dimensions (1, 2 or 3) for a texture target. */
-static GLuint
-get_texture_dims(GLenum target)
-{
-   switch (target) {
-   case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY_EXT:
-      return 1;
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_CUBE_MAP_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-   case GL_TEXTURE_RECTANGLE_NV:
-   case GL_TEXTURE_2D_ARRAY_EXT:
-      return 2;
-   case GL_TEXTURE_3D:
-      return 3;
-   default:
-      assert(0 && "invalid texture target in get_texture_dims()");
-      return 1;
-   }
-}
-
-
-/**
- * Try to allocate a pipe_resource object for the given st_texture_object.
- *
- * We use the given st_texture_image as a clue to determine the size of the
- * mipmap image at level=0.
- *
- * \return GL_TRUE for success, GL_FALSE if out of memory.
- */
-static GLboolean
-guess_and_alloc_texture(struct st_context *st,
-			struct st_texture_object *stObj,
-			const struct st_texture_image *stImage)
-{
-   const GLuint dims = get_texture_dims(stObj->base.Target);
-   GLuint level, lastLevel, width, height, depth;
-   GLuint bindings;
-   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
-   enum pipe_format fmt;
-
-   DBG("%s\n", __FUNCTION__);
-
-   assert(!stObj->pt);
-
-   level = stImage->level;
-   width = stImage->base.Width2;  /* size w/out border */
-   height = stImage->base.Height2;
-   depth = stImage->base.Depth2;
-
-   assert(width > 0);
-   assert(height > 0);
-   assert(depth > 0);
-
-   /* Depending on the image's size, we can't always make a guess here.
-    */
-   if (level > 0) {
-      if ( (dims >= 1 && width == 1) ||
-           (dims >= 2 && height == 1) ||
-           (dims >= 3 && depth == 1) ) {
-         /* we can't determine the image size at level=0 */
-         stObj->width0 = stObj->height0 = stObj->depth0 = 0;
-         /* this is not an out of memory error */
-         return GL_TRUE;
-      }
-   }
-
-   /* grow the image size until we hit level = 0 */
-   while (level > 0) {
-      if (width != 1)
-         width <<= 1;
-      if (height != 1)
-         height <<= 1;
-      if (depth != 1)
-         depth <<= 1;
-      level--;
-   }      
-
-   assert(level == 0);
-
-   /* At this point, (width x height x depth) is the expected size of
-    * the level=0 mipmap image.
-    */
-
-   /* Guess a reasonable value for lastLevel.  With OpenGL we have no
-    * idea how many mipmap levels will be in a texture until we start
-    * to render with it.  Make an educated guess here but be prepared
-    * to re-allocating a texture buffer with space for more (or fewer)
-    * mipmap levels later.
-    */
-   if ((stObj->base.MinFilter == GL_NEAREST ||
-        stObj->base.MinFilter == GL_LINEAR ||
-        stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
-        stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
-       !stObj->base.GenerateMipmap &&
-       stImage->level == 0) {
-      /* only alloc space for a single mipmap level */
-      lastLevel = 0;
-   }
-   else {
-      /* alloc space for a full mipmap */
-      GLuint l2width = util_logbase2(width);
-      GLuint l2height = util_logbase2(height);
-      GLuint l2depth = util_logbase2(depth);
-      lastLevel = MAX2(MAX2(l2width, l2height), l2depth);
-   }
-
-   /* Save the level=0 dimensions */
-   stObj->width0 = width;
-   stObj->height0 = height;
-   stObj->depth0 = depth;
-
-   fmt = st_mesa_format_to_pipe_format(stImage->base.TexFormat);
-
-   bindings = default_bindings(st, fmt);
-
-   st_gl_texture_dims_to_pipe_dims(stObj->base.Target,
-                                   width, height, depth,
-                                   &ptWidth, &ptHeight, &ptDepth, &ptLayers);
-
-   stObj->pt = st_texture_create(st,
-                                 gl_target_to_pipe(stObj->base.Target),
-                                 fmt,
-                                 lastLevel,
-                                 ptWidth,
-                                 ptHeight,
-                                 ptDepth,
-                                 ptLayers,
-                                 bindings);
-
-   DBG("%s returning %d\n", __FUNCTION__, (stObj->pt != NULL));
-
-   return stObj->pt != NULL;
-}
-
-
-/**
- * Adjust pixel unpack params and image dimensions to strip off the
- * texture border.
- * Gallium doesn't support texture borders.  They've seldem been used
- * and seldom been implemented correctly anyway.
- * \param unpackNew  returns the new pixel unpack parameters
- */
-static void
-strip_texture_border(GLint border,
-                     GLint *width, GLint *height, GLint *depth,
-                     const struct gl_pixelstore_attrib *unpack,
-                     struct gl_pixelstore_attrib *unpackNew)
-{
-   assert(border > 0);  /* sanity check */
-
-   *unpackNew = *unpack;
-
-   if (unpackNew->RowLength == 0)
-      unpackNew->RowLength = *width;
-
-   if (depth && unpackNew->ImageHeight == 0)
-      unpackNew->ImageHeight = *height;
-
-   unpackNew->SkipPixels += border;
-   if (height)
-      unpackNew->SkipRows += border;
-   if (depth)
-      unpackNew->SkipImages += border;
-
-   assert(*width >= 3);
-   *width = *width - 2 * border;
-   if (height && *height >= 3)
-      *height = *height - 2 * border;
-   if (depth && *depth >= 3)
-      *depth = *depth - 2 * border;
-}
-
-
-/**
- * Do glTexImage1/2/3D().
- */
-static void
-st_TexImage(struct gl_context * ctx,
-            GLint dims,
-            GLenum target, GLint level,
-            GLint internalFormat,
-            GLint width, GLint height, GLint depth,
-            GLint border,
-            GLenum format, GLenum type, const void *pixels,
-            const struct gl_pixelstore_attrib *unpack,
-            struct gl_texture_object *texObj,
-            struct gl_texture_image *texImage,
-            GLsizei imageSize, GLboolean compressed_src)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_object *stObj = st_texture_object(texObj);
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   GLuint dstRowStride = 0;
-   struct gl_pixelstore_attrib unpackNB;
-   enum pipe_transfer_usage transfer_usage = 0;
-
-   DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
-       _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
-
-   /* switch to "normal" */
-   if (stObj->surface_based) {
-      gl_format texFormat;
-
-      _mesa_clear_texture_object(ctx, texObj);
-      pipe_resource_reference(&stObj->pt, NULL);
-
-      /* oops, need to init this image again */
-      texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
-                                              internalFormat, format, type);
-
-      _mesa_init_teximage_fields(ctx, target, texImage,
-                                 width, height, depth, border,
-                                 internalFormat, texFormat);
-
-      stObj->surface_based = GL_FALSE;
-   }
-
-   /* gallium does not support texture borders, strip it off */
-   if (border) {
-      strip_texture_border(border, &width, &height, &depth, unpack, &unpackNB);
-      unpack = &unpackNB;
-      texImage->Width = width;
-      texImage->Height = height;
-      texImage->Depth = depth;
-      texImage->Border = 0;
-      border = 0;
-   }
-   else {
-      assert(texImage->Width == width);
-      assert(texImage->Height == height);
-      assert(texImage->Depth == depth);
-   }
-
-   stImage->face = _mesa_tex_target_to_face(target);
-   stImage->level = level;
-
-   _mesa_set_fetch_functions(texImage, dims);
-
-   /* Release the reference to a potentially orphaned buffer.   
-    * Release any old malloced memory.
-    */
-   if (stImage->pt) {
-      pipe_resource_reference(&stImage->pt, NULL);
-      assert(!texImage->Data);
-   }
-   else if (texImage->Data) {
-      _mesa_align_free(texImage->Data);
-   }
-
-   /*
-    * See if the new image is somehow incompatible with the existing
-    * mipmap.  If so, free the old mipmap.
-    */
-   if (stObj->pt) {
-      if (level > (GLint) stObj->pt->last_level ||
-          !st_texture_match_image(stObj->pt, &stImage->base,
-                                  stImage->face, stImage->level)) {
-         DBG("release it\n");
-         pipe_resource_reference(&stObj->pt, NULL);
-         assert(!stObj->pt);
-         pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-      }
-   }
-
-   if (width == 0 || height == 0 || depth == 0) {
-      /* stop after freeing old image */
-      return;
-   }
-
-   if (!stObj->pt) {
-      if (!guess_and_alloc_texture(st, stObj, stImage)) {
-         /* Probably out of memory.
-          * Try flushing any pending rendering, then retry.
-          */
-         st_finish(st);
-         if (!guess_and_alloc_texture(st, stObj, stImage)) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-            return;
-         }
-      }
-   }
-
-   assert(!stImage->pt);
-
-   /* Check if this texture image can live inside the texture object's buffer.
-    * If so, store the image there.  Otherwise the image will temporarily live
-    * in its own buffer.
-    */
-   if (stObj->pt &&
-       st_texture_match_image(stObj->pt, &stImage->base,
-                              stImage->face, stImage->level)) {
-
-      pipe_resource_reference(&stImage->pt, stObj->pt);
-      assert(stImage->pt);
-   }
-
-   if (!stImage->pt)
-      DBG("XXX: Image did not fit into texture - storing in local memory!\n");
-
-   /* Pixel data may come from regular user memory or a PBO.  For the later,
-    * do bounds checking and map the PBO to read pixels data from it.
-    *
-    * XXX we should try to use a GPU-accelerated path to copy the image data
-    * from the PBO to the texture.
-    */
-   if (compressed_src) {
-      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels,
-						      unpack,
-						      "glCompressedTexImage");
-   }
-   else {
-      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1,
-					   format, type,
-					   pixels, unpack, "glTexImage");
-   }
-
-   /*
-    * Prepare to store the texture data.  Either map the gallium texture buffer
-    * memory or malloc space for it.
-    */
-   if (stImage->pt) {
-      /* Store the image in the gallium texture memory buffer */
-      if (format == GL_DEPTH_COMPONENT &&
-          util_format_is_depth_and_stencil(stImage->pt->format))
-         transfer_usage = PIPE_TRANSFER_READ_WRITE;
-      else
-         transfer_usage = PIPE_TRANSFER_WRITE;
-
-      texImage->Data = st_texture_image_map(st, stImage, 0,
-                                            transfer_usage, 0, 0, width, height);
-      if(stImage->transfer)
-         dstRowStride = stImage->transfer->stride;
-   }
-   else {
-      /* Allocate regular memory and store the image there temporarily.   */
-      GLuint imageSize = _mesa_format_image_size(texImage->TexFormat,
-                                                 width, height, depth);
-      dstRowStride = _mesa_format_row_stride(texImage->TexFormat, width);
-
-      texImage->Data = _mesa_align_malloc(imageSize, 16);
-   }
-
-   if (!texImage->Data) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-      return;
-   }
-
-   if (!pixels) {
-      /* We've allocated texture memory, but have no pixel data - all done. */
-      goto done;
-   }
-
-   DBG("Upload image %dx%dx%d row_len %x pitch %x\n",
-       width, height, depth, width, dstRowStride);
-
-   /* Copy user texture image into the texture buffer.
-    */
-   if (compressed_src) {
-      const GLuint srcRowStride =
-         _mesa_format_row_stride(texImage->TexFormat, width);
-      if (dstRowStride == srcRowStride) {
-         memcpy(texImage->Data, pixels, imageSize);
-      }
-      else {
-         char *dst = texImage->Data;
-         const char *src = pixels;
-         GLuint i, bw, bh, lines;
-         _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
-         lines = (height + bh - 1) / bh;
-
-         for (i = 0; i < lines; ++i) {
-            memcpy(dst, src, srcRowStride);
-            dst += dstRowStride;
-            src += srcRowStride;
-         }
-      }
-   }
-   else {
-      const GLuint srcImageStride =
-         _mesa_image_image_stride(unpack, width, height, format, type);
-      GLint i;
-      const GLubyte *src = (const GLubyte *) pixels;
-
-      for (i = 0; i < depth; i++) {
-	 if (!_mesa_texstore(ctx, dims, 
-                             texImage->_BaseFormat, 
-                             texImage->TexFormat, 
-                             texImage->Data,
-                             0, 0, 0, /* dstX/Y/Zoffset */
-                             dstRowStride,
-                             texImage->ImageOffsets,
-                             width, height, 1,
-                             format, type, src, unpack)) {
-	    _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-	 }
-
-	 if (stImage->pt && i + 1 < depth) {
-            /* unmap this slice */
-	    st_texture_image_unmap(st, stImage);
-            /* map next slice of 3D texture */
-	    texImage->Data = st_texture_image_map(st, stImage, i + 1,
-                                                  transfer_usage, 0, 0,
-                                                  width, height);
-	    src += srcImageStride;
-	 }
-      }
-   }
-
-done:
-   _mesa_unmap_teximage_pbo(ctx, unpack);
-
-   if (stImage->pt && texImage->Data) {
-      st_texture_image_unmap(st, stImage);
-      texImage->Data = NULL;
-   }
-}
-
-
-static void
-st_TexImage3D(struct gl_context * ctx,
-              GLenum target, GLint level,
-              GLint internalFormat,
-              GLint width, GLint height, GLint depth,
-              GLint border,
-              GLenum format, GLenum type, const void *pixels,
-              const struct gl_pixelstore_attrib *unpack,
-              struct gl_texture_object *texObj,
-              struct gl_texture_image *texImage)
-{
-   st_TexImage(ctx, 3, target, level, internalFormat, width, height, depth,
-               border, format, type, pixels, unpack, texObj, texImage,
-               0, GL_FALSE);
-}
-
-
-static void
-st_TexImage2D(struct gl_context * ctx,
-              GLenum target, GLint level,
-              GLint internalFormat,
-              GLint width, GLint height, GLint border,
-              GLenum format, GLenum type, const void *pixels,
-              const struct gl_pixelstore_attrib *unpack,
-              struct gl_texture_object *texObj,
-              struct gl_texture_image *texImage)
-{
-   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
-               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
-}
-
-
-static void
-st_TexImage1D(struct gl_context * ctx,
-              GLenum target, GLint level,
-              GLint internalFormat,
-              GLint width, GLint border,
-              GLenum format, GLenum type, const void *pixels,
-              const struct gl_pixelstore_attrib *unpack,
-              struct gl_texture_object *texObj,
-              struct gl_texture_image *texImage)
-{
-   st_TexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border,
-               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
-}
-
-
-static void
-st_CompressedTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                        GLint internalFormat,
-                        GLint width, GLint height, GLint border,
-                        GLsizei imageSize, const GLvoid *data,
-                        struct gl_texture_object *texObj,
-                        struct gl_texture_image *texImage)
-{
-   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
-               0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
-}
-
-
-
-/**
- * glGetTexImage() helper: decompress a compressed texture by rendering
- * a textured quad.  Store the results in the user's buffer.
- */
-static void
-decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level,
-                     GLenum format, GLenum type, GLvoid *pixels,
-                     struct gl_texture_object *texObj,
-                     struct gl_texture_image *texImage)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   struct st_texture_object *stObj = st_texture_object(texObj);
-   struct pipe_sampler_view *src_view =
-      st_get_texture_sampler_view(stObj, pipe);
-   const GLuint width = texImage->Width;
-   const GLuint height = texImage->Height;
-   struct pipe_surface *dst_surface;
-   struct pipe_resource *dst_texture;
-   struct pipe_transfer *tex_xfer;
-   unsigned bind = (PIPE_BIND_RENDER_TARGET | /* util_blit may choose to render */
-		    PIPE_BIND_TRANSFER_READ);
-
-   /* create temp / dest surface */
-   if (!util_create_rgba_surface(pipe, width, height, bind,
-                                 &dst_texture, &dst_surface)) {
-      _mesa_problem(ctx, "util_create_rgba_surface() failed "
-                    "in decompress_with_blit()");
-      return;
-   }
-
-   /* blit/render/decompress */
-   util_blit_pixels_tex(st->blit,
-                        src_view,      /* pipe_resource (src) */
-                        0, 0,             /* src x0, y0 */
-                        width, height,    /* src x1, y1 */
-                        dst_surface,      /* pipe_surface (dst) */
-                        0, 0,             /* dst x0, y0 */
-                        width, height,    /* dst x1, y1 */
-                        0.0,              /* z */
-                        PIPE_TEX_MIPFILTER_NEAREST);
-
-   /* map the dst_surface so we can read from it */
-   tex_xfer = pipe_get_transfer(st_context(ctx)->pipe,
-                                dst_texture, 0, 0,
-                                PIPE_TRANSFER_READ,
-                                0, 0, width, height);
-
-   pixels = _mesa_map_pbo_dest(ctx, &ctx->Pack, pixels);
-
-   /* copy/pack data into user buffer */
-   if (st_equal_formats(stImage->pt->format, format, type)) {
-      /* memcpy */
-      const uint bytesPerRow = width * util_format_get_blocksize(stImage->pt->format);
-      ubyte *map = pipe_transfer_map(pipe, tex_xfer);
-      GLuint row;
-      for (row = 0; row < height; row++) {
-         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
-                                              height, format, type, row, 0);
-         memcpy(dest, map, bytesPerRow);
-         map += tex_xfer->stride;
-      }
-      pipe_transfer_unmap(pipe, tex_xfer);
-   }
-   else {
-      /* format translation via floats */
-      GLuint row;
-      enum pipe_format format = util_format_linear(dst_texture->format);
-      for (row = 0; row < height; row++) {
-         const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */
-         GLfloat rgba[4 * MAX_WIDTH];
-         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
-                                              height, format, type, row, 0);
-
-         if (ST_DEBUG & DEBUG_FALLBACK)
-            debug_printf("%s: fallback format translation\n", __FUNCTION__);
-
-         /* get float[4] rgba row from surface */
-         pipe_get_tile_rgba_format(pipe, tex_xfer, 0, row, width, 1,
-                                   format, rgba);
-
-         _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
-                                    type, dest, &ctx->Pack, transferOps);
-      }
-   }
-
-   _mesa_unmap_pbo_dest(ctx, &ctx->Pack);
-
-   pipe->transfer_destroy(pipe, tex_xfer);
-
-   /* destroy the temp / dest surface */
-   util_destroy_rgba_surface(dst_texture, dst_surface);
-}
-
-
-
-/**
- * Need to map texture image into memory before copying image data,
- * then unmap it.
- */
-static void
-st_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
-                 GLenum format, GLenum type, GLvoid * pixels,
-                 struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage, GLboolean compressed_dst)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   const GLuint dstImageStride =
-      _mesa_image_image_stride(&ctx->Pack, texImage->Width, texImage->Height,
-                               format, type);
-   GLuint depth, i;
-   GLubyte *dest;
-
-   if (stImage->pt &&
-       util_format_is_s3tc(stImage->pt->format) &&
-       !compressed_dst) {
-      /* Need to decompress the texture.
-       * We'll do this by rendering a textured quad.
-       * Note that we only expect RGBA formats (no Z/depth formats).
-       */
-      decompress_with_blit(ctx, target, level, format, type, pixels,
-                           texObj, texImage);
-      return;
-   }
-
-   /* Map */
-   if (stImage->pt) {
-      /* Image is stored in hardware format in a buffer managed by the
-       * kernel.  Need to explicitly map and unmap it.
-       */
-      texImage->Data = st_texture_image_map(st, stImage, 0,
-                                            PIPE_TRANSFER_READ, 0, 0,
-                                            stImage->base.Width,
-                                            stImage->base.Height);
-      /* compute stride in texels from stride in bytes */
-      texImage->RowStride = stImage->transfer->stride
-         * util_format_get_blockwidth(stImage->pt->format)
-         / util_format_get_blocksize(stImage->pt->format);
-   }
-   else {
-      /* Otherwise, the image should actually be stored in
-       * texImage->Data.  This is pretty confusing for
-       * everybody, I'd much prefer to separate the two functions of
-       * texImage->Data - storage for texture images in main memory
-       * and access (ie mappings) of images.  In other words, we'd
-       * create a new texImage->Map field and leave Data simply for
-       * storage.
-       */
-      assert(texImage->Data);
-   }
-
-   depth = texImage->Depth;
-   texImage->Depth = 1;
-
-   dest = (GLubyte *) pixels;
-
-   _mesa_set_fetch_functions(texImage, get_texture_dims(target));
-
-   for (i = 0; i < depth; i++) {
-      if (compressed_dst) {
-	 _mesa_get_compressed_teximage(ctx, target, level, dest,
-				       texObj, texImage);
-      }
-      else {
-	 _mesa_get_teximage(ctx, target, level, format, type, dest,
-			    texObj, texImage);
-      }
-
-      if (stImage->pt && i + 1 < depth) {
-         /* unmap this slice */
-	 st_texture_image_unmap(st, stImage);
-         /* map next slice of 3D texture */
-	 texImage->Data = st_texture_image_map(st, stImage, i + 1,
-                                               PIPE_TRANSFER_READ, 0, 0,
-                                               stImage->base.Width,
-                                               stImage->base.Height);
-	 dest += dstImageStride;
-      }
-   }
-
-   texImage->Depth = depth;
-
-   /* Unmap */
-   if (stImage->pt) {
-      st_texture_image_unmap(st, stImage);
-      texImage->Data = NULL;
-   }
-}
-
-
-static void
-st_GetTexImage(struct gl_context * ctx, GLenum target, GLint level,
-               GLenum format, GLenum type, GLvoid * pixels,
-               struct gl_texture_object *texObj,
-               struct gl_texture_image *texImage)
-{
-   st_get_tex_image(ctx, target, level, format, type, pixels, texObj, texImage,
-                    GL_FALSE);
-}
-
-
-static void
-st_GetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint level,
-                         GLvoid *pixels,
-                         struct gl_texture_object *texObj,
-                         struct gl_texture_image *texImage)
-{
-   st_get_tex_image(ctx, target, level, 0, 0, pixels, texObj, texImage,
-                    GL_TRUE);
-}
-
-
-
-static void
-st_TexSubimage(struct gl_context *ctx, GLint dims, GLenum target, GLint level,
-               GLint xoffset, GLint yoffset, GLint zoffset,
-               GLint width, GLint height, GLint depth,
-               GLenum format, GLenum type, const void *pixels,
-               const struct gl_pixelstore_attrib *packing,
-               struct gl_texture_object *texObj,
-               struct gl_texture_image *texImage)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   GLuint dstRowStride;
-   const GLuint srcImageStride =
-      _mesa_image_image_stride(packing, width, height, format, type);
-   GLint i;
-   const GLubyte *src;
-   /* init to silence warning only: */
-   enum pipe_transfer_usage transfer_usage = PIPE_TRANSFER_WRITE;
-
-   DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
-       _mesa_lookup_enum_by_nr(target),
-       level, xoffset, yoffset, width, height);
-
-   pixels =
-      _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
-                                  type, pixels, packing, "glTexSubImage2D");
-   if (!pixels)
-      return;
-
-   /* Map buffer if necessary.  Need to lock to prevent other contexts
-    * from uploading the buffer under us.
-    */
-   if (stImage->pt) {
-      if (format == GL_DEPTH_COMPONENT &&
-          util_format_is_depth_and_stencil(stImage->pt->format))
-         transfer_usage = PIPE_TRANSFER_READ_WRITE;
-      else
-         transfer_usage = PIPE_TRANSFER_WRITE;
-
-      texImage->Data = st_texture_image_map(st, stImage, zoffset, 
-                                            transfer_usage,
-                                            xoffset, yoffset,
-                                            width, height);
-   }
-
-   if (!texImage->Data) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
-      goto done;
-   }
-
-   src = (const GLubyte *) pixels;
-   dstRowStride = stImage->transfer->stride;
-
-   for (i = 0; i < depth; i++) {
-      if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
-                          texImage->TexFormat,
-                          texImage->Data,
-                          0, 0, 0,
-                          dstRowStride,
-                          texImage->ImageOffsets,
-                          width, height, 1,
-                          format, type, src, packing)) {
-	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
-      }
-
-      if (stImage->pt && i + 1 < depth) {
-         /* unmap this slice */
-	 st_texture_image_unmap(st, stImage);
-         /* map next slice of 3D texture */
-	 texImage->Data = st_texture_image_map(st, stImage,
-                                               zoffset + i + 1,
-                                               transfer_usage,
-                                               xoffset, yoffset,
-                                               width, height);
-	 src += srcImageStride;
-      }
-   }
-
-done:
-   _mesa_unmap_teximage_pbo(ctx, packing);
-
-   if (stImage->pt && texImage->Data) {
-      st_texture_image_unmap(st, stImage);
-      texImage->Data = NULL;
-   }
-}
-
-
-
-static void
-st_TexSubImage3D(struct gl_context *ctx, GLenum target, GLint level,
-                 GLint xoffset, GLint yoffset, GLint zoffset,
-                 GLsizei width, GLsizei height, GLsizei depth,
-                 GLenum format, GLenum type, const GLvoid *pixels,
-                 const struct gl_pixelstore_attrib *packing,
-                 struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage)
-{
-   st_TexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset,
-                  width, height, depth, format, type,
-                  pixels, packing, texObj, texImage);
-}
-
-
-static void
-st_TexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                 GLint xoffset, GLint yoffset,
-                 GLsizei width, GLsizei height,
-                 GLenum format, GLenum type, const GLvoid * pixels,
-                 const struct gl_pixelstore_attrib *packing,
-                 struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage)
-{
-   st_TexSubimage(ctx, 2, target, level, xoffset, yoffset, 0,
-                  width, height, 1, format, type,
-                  pixels, packing, texObj, texImage);
-}
-
-
-static void
-st_TexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                 GLint xoffset, GLsizei width, GLenum format, GLenum type,
-                 const GLvoid * pixels,
-                 const struct gl_pixelstore_attrib *packing,
-                 struct gl_texture_object *texObj,
-                 struct gl_texture_image *texImage)
-{
-   st_TexSubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1,
-                  format, type, pixels, packing, texObj, texImage);
-}
-
-
-static void
-st_CompressedTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
-                           GLint xoffset, GLsizei width,
-                           GLenum format,
-                           GLsizei imageSize, const GLvoid *data,
-                           struct gl_texture_object *texObj,
-                           struct gl_texture_image *texImage)
-{
-   assert(0);
-}
-
-
-static void
-st_CompressedTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
-                           GLint xoffset, GLint yoffset,
-                           GLsizei width, GLint height,
-                           GLenum format,
-                           GLsizei imageSize, const GLvoid *data,
-                           struct gl_texture_object *texObj,
-                           struct gl_texture_image *texImage)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   int srcBlockStride;
-   int dstBlockStride;
-   int y;
-   enum pipe_format pformat;
-
-   if (stImage->pt) {
-      pformat = stImage->pt->format;
-
-      texImage->Data = st_texture_image_map(st, stImage, 0, 
-                                            PIPE_TRANSFER_WRITE,
-                                            xoffset, yoffset,
-                                            width, height);
-      
-      srcBlockStride = util_format_get_stride(pformat, width);
-      dstBlockStride = stImage->transfer->stride;
-   } else {
-      assert(stImage->pt);
-      /* TODO find good values for block and strides */
-      /* TODO also adjust texImage->data for yoffset/xoffset */
-      return;
-   }
-
-   if (!texImage->Data) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage");
-      return;
-   }
-
-   assert(xoffset % util_format_get_blockwidth(pformat) == 0);
-   assert(yoffset % util_format_get_blockheight(pformat) == 0);
-
-   for (y = 0; y < height; y += util_format_get_blockheight(pformat)) {
-      /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
-      const char *src = (const char*)data + srcBlockStride * util_format_get_nblocksy(pformat, y);
-      char *dst = (char*)texImage->Data + dstBlockStride * util_format_get_nblocksy(pformat, y);
-      memcpy(dst, src, util_format_get_stride(pformat, width));
-   }
-
-   if (stImage->pt) {
-      st_texture_image_unmap(st, stImage);
-      texImage->Data = NULL;
-   }
-}
-
-
-static void
-st_CompressedTexSubImage3D(struct gl_context *ctx, GLenum target, GLint level,
-                           GLint xoffset, GLint yoffset, GLint zoffset,
-                           GLsizei width, GLint height, GLint depth,
-                           GLenum format,
-                           GLsizei imageSize, const GLvoid *data,
-                           struct gl_texture_object *texObj,
-                           struct gl_texture_image *texImage)
-{
-   assert(0);
-}
-
-
-
-/**
- * Do a CopyTexSubImage operation using a read transfer from the source,
- * a write transfer to the destination and get_tile()/put_tile() to access
- * the pixels/texels.
- *
- * Note: srcY=0=TOP of renderbuffer
- */
-static void
-fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level,
-                          struct st_renderbuffer *strb,
-                          struct st_texture_image *stImage,
-                          GLenum baseFormat,
-                          GLint destX, GLint destY, GLint destZ,
-                          GLint srcX, GLint srcY,
-                          GLsizei width, GLsizei height)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_transfer *src_trans;
-   GLvoid *texDest;
-   enum pipe_transfer_usage transfer_usage;
-
-   if (ST_DEBUG & DEBUG_FALLBACK)
-      debug_printf("%s: fallback processing\n", __FUNCTION__);
-
-   assert(width <= MAX_WIDTH);
-
-   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-      srcY = strb->Base.Height - srcY - height;
-   }
-
-   src_trans = pipe_get_transfer(st_context(ctx)->pipe,
-                                 strb->texture,
-                                 0, 0,
-                                 PIPE_TRANSFER_READ,
-                                 srcX, srcY,
-                                 width, height);
-
-   if ((baseFormat == GL_DEPTH_COMPONENT ||
-        baseFormat == GL_DEPTH_STENCIL) &&
-       util_format_is_depth_and_stencil(stImage->pt->format))
-      transfer_usage = PIPE_TRANSFER_READ_WRITE;
-   else
-      transfer_usage = PIPE_TRANSFER_WRITE;
-
-   /* XXX this used to ignore destZ param */
-   texDest = st_texture_image_map(st, stImage, destZ, transfer_usage,
-                                  destX, destY, width, height);
-
-   if (baseFormat == GL_DEPTH_COMPONENT ||
-       baseFormat == GL_DEPTH_STENCIL) {
-      const GLboolean scaleOrBias = (ctx->Pixel.DepthScale != 1.0F ||
-                                     ctx->Pixel.DepthBias != 0.0F);
-      GLint row, yStep;
-
-      /* determine bottom-to-top vs. top-to-bottom order for src buffer */
-      if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-         srcY = height - 1;
-         yStep = -1;
-      }
-      else {
-         srcY = 0;
-         yStep = 1;
-      }
-
-      /* To avoid a large temp memory allocation, do copy row by row */
-      for (row = 0; row < height; row++, srcY += yStep) {
-         uint data[MAX_WIDTH];
-         pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data);
-         if (scaleOrBias) {
-            _mesa_scale_and_bias_depth_uint(ctx, width, data);
-         }
-         pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data);
-      }
-   }
-   else {
-      /* RGBA format */
-      GLfloat *tempSrc =
-         (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
-
-      if (tempSrc && texDest) {
-         const GLint dims = 2;
-         const GLint dstRowStride = stImage->transfer->stride;
-         struct gl_texture_image *texImage = &stImage->base;
-         struct gl_pixelstore_attrib unpack = ctx->DefaultPacking;
-
-         if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-            unpack.Invert = GL_TRUE;
-         }
-
-         /* get float/RGBA image from framebuffer */
-         /* XXX this usually involves a lot of int/float conversion.
-          * try to avoid that someday.
-          */
-         pipe_get_tile_rgba_format(pipe, src_trans, 0, 0, width, height,
-                                   util_format_linear(strb->texture->format),
-                                   tempSrc);
-
-         /* Store into texture memory.
-          * Note that this does some special things such as pixel transfer
-          * ops and format conversion.  In particular, if the dest tex format
-          * is actually RGBA but the user created the texture as GL_RGB we
-          * need to fill-in/override the alpha channel with 1.0.
-          */
-         _mesa_texstore(ctx, dims,
-                        texImage->_BaseFormat, 
-                        texImage->TexFormat, 
-                        texDest,
-                        0, 0, 0,
-                        dstRowStride,
-                        texImage->ImageOffsets,
-                        width, height, 1,
-                        GL_RGBA, GL_FLOAT, tempSrc, /* src */
-                        &unpack);
-      }
-      else {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
-      }
-
-      if (tempSrc)
-         free(tempSrc);
-   }
-
-   st_texture_image_unmap(st, stImage);
-   pipe->transfer_destroy(pipe, src_trans);
-}
-
-
-
-/**
- * If the format of the src renderbuffer and the format of the dest
- * texture are compatible (in terms of blitting), return a TGSI writemask
- * to be used during the blit.
- * If the src/dest are incompatible, return 0.
- */
-static unsigned
-compatible_src_dst_formats(struct gl_context *ctx,
-                           const struct gl_renderbuffer *src,
-                           const struct gl_texture_image *dst)
-{
-   /* Get logical base formats for the src and dest.
-    * That is, use the user-requested formats and not the actual, device-
-    * chosen formats.
-    * For example, the user may have requested an A8 texture but the
-    * driver may actually be using an RGBA texture format.  When we
-    * copy/blit to that texture, we only want to copy the Alpha channel
-    * and not the RGB channels.
-    *
-    * Similarly, when the src FBO was created an RGB format may have been
-    * requested but the driver actually chose an RGBA format.  In that case,
-    * we don't want to copy the undefined Alpha channel to the dest texture
-    * (it should be 1.0).
-    */
-   const GLenum srcFormat = _mesa_base_fbo_format(ctx, src->InternalFormat);
-   const GLenum dstFormat = _mesa_base_tex_format(ctx, dst->InternalFormat);
-
-   /**
-    * XXX when we have red-only and red/green renderbuffers we'll need
-    * to add more cases here (or implement a general-purpose routine that
-    * queries the existance of the R,G,B,A channels in the src and dest).
-    */
-   if (srcFormat == dstFormat) {
-      /* This is the same as matching_base_formats, which should
-       * always pass, as it did previously.
-       */
-      return TGSI_WRITEMASK_XYZW;
-   }
-   else if (srcFormat == GL_RGB && dstFormat == GL_RGBA) {
-      /* Make sure that A in the dest is 1.  The actual src format
-       * may be RGBA and have undefined A values.
-       */
-      return TGSI_WRITEMASK_XYZ;
-   }
-   else if (srcFormat == GL_RGBA && dstFormat == GL_RGB) {
-      /* Make sure that A in the dest is 1.  The actual dst format
-       * may be RGBA and will need A=1 to provide proper alpha values
-       * when sampled later.
-       */
-      return TGSI_WRITEMASK_XYZ;
-   }
-   else {
-      if (ST_DEBUG & DEBUG_FALLBACK)
-         debug_printf("%s failed for src %s, dst %s\n",
-                      __FUNCTION__, 
-                      _mesa_lookup_enum_by_nr(srcFormat),
-                      _mesa_lookup_enum_by_nr(dstFormat));
-
-      /* Otherwise fail.
-       */
-      return 0;
-   }
-}
-
-
-
-/**
- * Do a CopyTex[Sub]Image1/2/3D() using a hardware (blit) path if possible.
- * Note that the region to copy has already been clipped so we know we
- * won't read from outside the source renderbuffer's bounds.
- *
- * Note: srcY=0=Bottom of renderbuffer (GL convention)
- */
-static void
-st_copy_texsubimage(struct gl_context *ctx,
-                    GLenum target, GLint level,
-                    GLint destX, GLint destY, GLint destZ,
-                    GLint srcX, GLint srcY,
-                    GLsizei width, GLsizei height)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-   struct st_texture_image *stImage = st_texture_image(texImage);
-   const GLenum texBaseFormat = texImage->_BaseFormat;
-   struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct st_renderbuffer *strb;
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_screen *screen = pipe->screen;
-   enum pipe_format dest_format, src_format;
-   GLboolean use_fallback = GL_TRUE;
-   GLboolean matching_base_formats;
-   GLuint format_writemask, sample_count;
-   struct pipe_surface *dest_surface = NULL;
-   GLboolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP);
-
-   /* make sure finalize_textures has been called? 
-    */
-   if (0) st_validate_state(st);
-
-   /* determine if copying depth or color data */
-   if (texBaseFormat == GL_DEPTH_COMPONENT ||
-       texBaseFormat == GL_DEPTH_STENCIL) {
-      strb = st_renderbuffer(fb->_DepthBuffer);
-      if (strb->Base.Wrapped) {
-         strb = st_renderbuffer(strb->Base.Wrapped);
-      }
-   }
-   else {
-      /* texBaseFormat == GL_RGB, GL_RGBA, GL_ALPHA, etc */
-      strb = st_renderbuffer(fb->_ColorReadBuffer);
-   }
-
-   if (!strb || !strb->surface || !stImage->pt) {
-      debug_printf("%s: null strb or stImage\n", __FUNCTION__);
-      return;
-   }
-
-   sample_count = strb->surface->texture->nr_samples;
-   /* I believe this would be legal, presumably would need to do a resolve
-      for color, and for depth/stencil spec says to just use one of the
-      depth/stencil samples per pixel? Need some transfer clarifications. */
-   assert(sample_count < 2);
-
-   if (srcX < 0) {
-      width -= -srcX;
-      destX += -srcX;
-      srcX = 0;
-   }
-
-   if (srcY < 0) {
-      height -= -srcY;
-      destY += -srcY;
-      srcY = 0;
-   }
-
-   if (destX < 0) {
-      width -= -destX;
-      srcX += -destX;
-      destX = 0;
-   }
-
-   if (destY < 0) {
-      height -= -destY;
-      srcY += -destY;
-      destY = 0;
-   }
-
-   if (width < 0 || height < 0)
-      return;
-
-
-   assert(strb);
-   assert(strb->surface);
-   assert(stImage->pt);
-
-   src_format = strb->surface->format;
-   dest_format = stImage->pt->format;
-
-   /*
-    * Determine if the src framebuffer and dest texture have the same
-    * base format.  We need this to detect a case such as the framebuffer
-    * being GL_RGBA but the texture being GL_RGB.  If the actual hardware
-    * texture format stores RGBA we need to set A=1 (overriding the
-    * framebuffer's alpha values).  We can't do that with the blit or
-    * textured-quad paths.
-    */
-   matching_base_formats =
-      (_mesa_get_format_base_format(strb->Base.Format) ==
-       _mesa_get_format_base_format(texImage->TexFormat));
-   format_writemask = compatible_src_dst_formats(ctx, &strb->Base, texImage);
-
-   if (ctx->_ImageTransferState == 0x0) {
-
-      if (matching_base_formats &&
-          src_format == dest_format &&
-          !do_flip)
-      {
-         /* use surface_copy() / blit */
-         struct pipe_box src_box;
-         u_box_2d_zslice(srcX, srcY, strb->surface->u.tex.first_layer,
-                         width, height, &src_box);
-
-         /* for resource_copy_region(), y=0=top, always */
-         pipe->resource_copy_region(pipe,
-                                    /* dest */
-                                    stImage->pt,
-                                    stImage->level,
-                                    destX, destY, destZ + stImage->face,
-                                    /* src */
-                                    strb->texture,
-                                    strb->surface->u.tex.level,
-                                    &src_box);
-         use_fallback = GL_FALSE;
-      }
-      else if (format_writemask &&
-               texBaseFormat != GL_DEPTH_COMPONENT &&
-               texBaseFormat != GL_DEPTH_STENCIL &&
-               screen->is_format_supported(screen, src_format,
-                                           PIPE_TEXTURE_2D, sample_count,
-                                           PIPE_BIND_SAMPLER_VIEW,
-                                           0) &&
-               screen->is_format_supported(screen, dest_format,
-                                           PIPE_TEXTURE_2D, 0,
-                                           PIPE_BIND_RENDER_TARGET,
-                                           0)) {
-         /* draw textured quad to do the copy */
-         GLint srcY0, srcY1;
-         struct pipe_surface surf_tmpl;
-         memset(&surf_tmpl, 0, sizeof(surf_tmpl));
-         surf_tmpl.format = stImage->pt->format;
-         surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
-         surf_tmpl.u.tex.level = stImage->level;
-         surf_tmpl.u.tex.first_layer = stImage->face + destZ;
-         surf_tmpl.u.tex.last_layer = stImage->face + destZ;
-
-         dest_surface = pipe->create_surface(pipe, stImage->pt,
-                                             &surf_tmpl);
-
-         if (do_flip) {
-            srcY1 = strb->Base.Height - srcY - height;
-            srcY0 = srcY1 + height;
-         }
-         else {
-            srcY0 = srcY;
-            srcY1 = srcY0 + height;
-         }
-
-         util_blit_pixels_writemask(st->blit,
-                                    strb->texture,
-                                    strb->surface->u.tex.level,
-                                    srcX, srcY0,
-                                    srcX + width, srcY1,
-                                    strb->surface->u.tex.first_layer,
-                                    dest_surface,
-                                    destX, destY,
-                                    destX + width, destY + height,
-                                    0.0, PIPE_TEX_MIPFILTER_NEAREST,
-                                    format_writemask);
-         use_fallback = GL_FALSE;
-      }
-
-      if (dest_surface)
-         pipe_surface_reference(&dest_surface, NULL);
-   }
-
-   if (use_fallback) {
-      /* software fallback */
-      fallback_copy_texsubimage(ctx, target, level,
-                                strb, stImage, texBaseFormat,
-                                destX, destY, destZ,
-                                srcX, srcY, width, height);
-   }
-}
-
-
-
-static void
-st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
-                          width, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, 1);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                  GLenum internalFormat,
-                  GLint x, GLint y, GLsizei width, GLsizei height,
-                  GLint border)
-{
-   struct gl_texture_unit *texUnit =
-      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
-   struct gl_texture_object *texObj =
-      _mesa_select_tex_object(ctx, texUnit, target);
-   struct gl_texture_image *texImage =
-      _mesa_select_tex_image(ctx, texObj, target, level);
-
-   /* Setup or redefine the texture object, texture and texture
-    * image.  Don't populate yet.  
-    */
-   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
-                          width, height, border,
-                          GL_RGBA, CHAN_TYPE, NULL,
-                          &ctx->DefaultPacking, texObj, texImage);
-
-   st_copy_texsubimage(ctx, target, level,
-                       0, 0, 0,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
-                     GLint xoffset, GLint x, GLint y, GLsizei width)
-{
-   const GLint yoffset = 0, zoffset = 0;
-   const GLsizei height = 1;
-   st_copy_texsubimage(ctx, target, level,
-                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
-                     GLint xoffset, GLint yoffset,
-                     GLint x, GLint y, GLsizei width, GLsizei height)
-{
-   const GLint zoffset = 0;
-   st_copy_texsubimage(ctx, target, level,
-                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
-static void
-st_CopyTexSubImage3D(struct gl_context * ctx, GLenum target, GLint level,
-                     GLint xoffset, GLint yoffset, GLint zoffset,
-                     GLint x, GLint y, GLsizei width, GLsizei height)
-{
-   st_copy_texsubimage(ctx, target, level,
-                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
-                       x, y, width, height);  /* src X, Y, size */
-}
-
-
-/**
- * Copy image data from stImage into the texture object 'stObj' at level
- * 'dstLevel'.
- */
-static void
-copy_image_data_to_texture(struct st_context *st,
-			   struct st_texture_object *stObj,
-                           GLuint dstLevel,
-			   struct st_texture_image *stImage)
-{
-   /* debug checks */
-   {
-      const struct gl_texture_image *dstImage =
-         stObj->base.Image[stImage->face][stImage->level];
-      assert(dstImage);
-      assert(dstImage->Width == stImage->base.Width);
-      assert(dstImage->Height == stImage->base.Height);
-      assert(dstImage->Depth == stImage->base.Depth);
-   }
-
-   if (stImage->pt) {
-      /* Copy potentially with the blitter:
-       */
-      st_texture_image_copy(st->pipe,
-                            stObj->pt, dstLevel,  /* dest texture, level */
-                            stImage->pt, stImage->level, /* src texture, level */
-                            stImage->face);
-
-      pipe_resource_reference(&stImage->pt, NULL);
-   }
-   else if (stImage->base.Data) {
-      st_texture_image_data(st,
-                            stObj->pt,
-                            stImage->face,
-                            dstLevel,
-                            stImage->base.Data,
-                            stImage->base.RowStride * 
-                            util_format_get_blocksize(stObj->pt->format),
-                            stImage->base.RowStride *
-                            stImage->base.Height *
-                            util_format_get_blocksize(stObj->pt->format));
-      _mesa_align_free(stImage->base.Data);
-      stImage->base.Data = NULL;
-   }
-
-   pipe_resource_reference(&stImage->pt, stObj->pt);
-}
-
-
-/**
- * Called during state validation.  When this function is finished,
- * the texture object should be ready for rendering.
- * \return GL_TRUE for success, GL_FALSE for failure (out of mem)
- */
-GLboolean
-st_finalize_texture(struct gl_context *ctx,
-		    struct pipe_context *pipe,
-		    struct gl_texture_object *tObj)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_texture_object *stObj = st_texture_object(tObj);
-   const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
-   GLuint face;
-   struct st_texture_image *firstImage;
-   enum pipe_format firstImageFormat;
-   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
-
-   if (stObj->base._Complete) {
-      /* The texture is complete and we know exactly how many mipmap levels
-       * are present/needed.  This is conditional because we may be called
-       * from the st_generate_mipmap() function when the texture object is
-       * incomplete.  In that case, we'll have set stObj->lastLevel before
-       * we get here.
-       */
-      if (stObj->base.MinFilter == GL_LINEAR ||
-          stObj->base.MinFilter == GL_NEAREST)
-         stObj->lastLevel = stObj->base.BaseLevel;
-      else
-         stObj->lastLevel = stObj->base._MaxLevel;
-   }
-
-   firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
-   assert(firstImage);
-
-   /* If both firstImage and stObj point to a texture which can contain
-    * all active images, favour firstImage.  Note that because of the
-    * completeness requirement, we know that the image dimensions
-    * will match.
-    */
-   if (firstImage->pt &&
-       firstImage->pt != stObj->pt &&
-       (!stObj->pt || firstImage->pt->last_level >= stObj->pt->last_level)) {
-      pipe_resource_reference(&stObj->pt, firstImage->pt);
-      pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-   }
-
-   /* Find gallium format for the Mesa texture */
-   firstImageFormat = st_mesa_format_to_pipe_format(firstImage->base.TexFormat);
-   st_gl_texture_dims_to_pipe_dims(stObj->base.Target, stObj->width0,
-                                   stObj->height0, stObj->depth0,
-                                   &ptWidth, &ptHeight, &ptDepth, &ptLayers);
-
-   /* If we already have a gallium texture, check that it matches the texture
-    * object's format, target, size, num_levels, etc.
-    */
-   if (stObj->pt) {
-      if (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) ||
-          !st_sampler_compat_formats(stObj->pt->format, firstImageFormat) ||
-          stObj->pt->last_level < stObj->lastLevel ||
-          stObj->pt->width0 != ptWidth ||
-          stObj->pt->height0 != ptHeight ||
-          stObj->pt->depth0 != ptDepth ||
-          stObj->pt->array_size != ptLayers)
-      {
-         /* The gallium texture does not match the Mesa texture so delete the
-          * gallium texture now.  We'll make a new one below.
-          */
-         pipe_resource_reference(&stObj->pt, NULL);
-         pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-         st->dirty.st |= ST_NEW_FRAMEBUFFER;
-      }
-   }
-
-   /* May need to create a new gallium texture:
-    */
-   if (!stObj->pt) {
-      GLuint bindings = default_bindings(st, firstImageFormat);
-
-      stObj->pt = st_texture_create(st,
-                                    gl_target_to_pipe(stObj->base.Target),
-                                    firstImageFormat,
-                                    stObj->lastLevel,
-                                    ptWidth,
-                                    ptHeight,
-                                    ptDepth,
-                                    ptLayers,
-                                    bindings);
-
-      if (!stObj->pt) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
-         return GL_FALSE;
-      }
-   }
-
-   /* Pull in any images not in the object's texture:
-    */
-   for (face = 0; face < nr_faces; face++) {
-      GLuint level;
-      for (level = stObj->base.BaseLevel; level <= stObj->lastLevel; level++) {
-         struct st_texture_image *stImage =
-            st_texture_image(stObj->base.Image[face][level]);
-
-         /* Need to import images in main memory or held in other textures.
-          */
-         if (stImage && stObj->pt != stImage->pt) {
-            copy_image_data_to_texture(st, stObj, level, stImage);
-         }
-      }
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Returns pointer to a default/dummy texture.
- * This is typically used when the current shader has tex/sample instructions
- * but the user has not provided a (any) texture(s).
- */
-struct gl_texture_object *
-st_get_default_texture(struct st_context *st)
-{
-   if (!st->default_texture) {
-      static const GLenum target = GL_TEXTURE_2D;
-      GLubyte pixels[16][16][4];
-      struct gl_texture_object *texObj;
-      struct gl_texture_image *texImg;
-      GLuint i, j;
-
-      /* The ARB_fragment_program spec says (0,0,0,1) should be returned
-       * when attempting to sample incomplete textures.
-       */
-      for (i = 0; i < 16; i++) {
-         for (j = 0; j < 16; j++) {
-            pixels[i][j][0] = 0;
-            pixels[i][j][1] = 0;
-            pixels[i][j][2] = 0;
-            pixels[i][j][3] = 255;
-         }
-      }
-
-      texObj = st->ctx->Driver.NewTextureObject(st->ctx, 0, target);
-
-      texImg = _mesa_get_tex_image(st->ctx, texObj, target, 0);
-
-      _mesa_init_teximage_fields(st->ctx, target, texImg,
-                                 16, 16, 1, 0,  /* w, h, d, border */
-                                 GL_RGBA, MESA_FORMAT_RGBA8888);
-
-      st_TexImage(st->ctx, 2, target,
-                  0, GL_RGBA,    /* level, intformat */
-                  16, 16, 1, 0,  /* w, h, d, border */
-                  GL_RGBA, GL_UNSIGNED_BYTE, pixels,
-                  &st->ctx->DefaultPacking,
-                  texObj, texImg,
-                  0, 0);
-
-      texObj->MinFilter = GL_NEAREST;
-      texObj->MagFilter = GL_NEAREST;
-      texObj->_Complete = GL_TRUE;
-
-      st->default_texture = texObj;
-   }
-   return st->default_texture;
-}
-
-
-void
-st_init_texture_functions(struct dd_function_table *functions)
-{
-   functions->ChooseTextureFormat = st_ChooseTextureFormat;
-   functions->TexImage1D = st_TexImage1D;
-   functions->TexImage2D = st_TexImage2D;
-   functions->TexImage3D = st_TexImage3D;
-   functions->TexSubImage1D = st_TexSubImage1D;
-   functions->TexSubImage2D = st_TexSubImage2D;
-   functions->TexSubImage3D = st_TexSubImage3D;
-   functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
-   functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
-   functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
-   functions->CopyTexImage1D = st_CopyTexImage1D;
-   functions->CopyTexImage2D = st_CopyTexImage2D;
-   functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
-   functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
-   functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
-   functions->GenerateMipmap = st_generate_mipmap;
-
-   functions->GetTexImage = st_GetTexImage;
-
-   /* compressed texture functions */
-   functions->CompressedTexImage2D = st_CompressedTexImage2D;
-   functions->GetCompressedTexImage = st_GetCompressedTexImage;
-
-   functions->NewTextureObject = st_NewTextureObject;
-   functions->NewTextureImage = st_NewTextureImage;
-   functions->DeleteTexture = st_DeleteTextureObject;
-   functions->FreeTexImageData = st_FreeTextureImageData;
-
-   functions->TextureMemCpy = do_memcpy;
-
-   /* XXX Temporary until we can query pipe's texture sizes */
-   functions->TestProxyTexImage = _mesa_test_proxy_teximage;
-}
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/mfeatures.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/formats.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mipmap.h"
+#include "main/pack.h"
+#include "main/pixeltransfer.h"
+#include "main/texcompress.h"
+#include "main/texfetch.h"
+#include "main/texgetimage.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+
+#include "state_tracker/st_debug.h"
+#include "state_tracker/st_context.h"
+#include "state_tracker/st_cb_fbo.h"
+#include "state_tracker/st_cb_flush.h"
+#include "state_tracker/st_cb_texture.h"
+#include "state_tracker/st_format.h"
+#include "state_tracker/st_texture.h"
+#include "state_tracker/st_gen_mipmap.h"
+#include "state_tracker/st_atom.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_tile.h"
+#include "util/u_blit.h"
+#include "util/u_format.h"
+#include "util/u_surface.h"
+#include "util/u_sampler.h"
+#include "util/u_math.h"
+#include "util/u_box.h"
+
+#define DBG if (0) printf
+
+
+static enum pipe_texture_target
+gl_target_to_pipe(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_1D:
+      return PIPE_TEXTURE_1D;
+   case GL_TEXTURE_2D:
+      return PIPE_TEXTURE_2D;
+   case GL_TEXTURE_RECTANGLE_NV:
+      return PIPE_TEXTURE_RECT;
+   case GL_TEXTURE_3D:
+      return PIPE_TEXTURE_3D;
+   case GL_TEXTURE_CUBE_MAP_ARB:
+      return PIPE_TEXTURE_CUBE;
+   case GL_TEXTURE_1D_ARRAY_EXT:
+      return PIPE_TEXTURE_1D_ARRAY;
+   case GL_TEXTURE_2D_ARRAY_EXT:
+      return PIPE_TEXTURE_2D_ARRAY;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+
+/** called via ctx->Driver.NewTextureImage() */
+static struct gl_texture_image *
+st_NewTextureImage(struct gl_context * ctx)
+{
+   DBG("%s\n", __FUNCTION__);
+   (void) ctx;
+   return (struct gl_texture_image *) ST_CALLOC_STRUCT(st_texture_image);
+}
+
+
+/** called via ctx->Driver.NewTextureObject() */
+static struct gl_texture_object *
+st_NewTextureObject(struct gl_context * ctx, GLuint name, GLenum target)
+{
+   struct st_texture_object *obj = ST_CALLOC_STRUCT(st_texture_object);
+
+   DBG("%s\n", __FUNCTION__);
+   _mesa_initialize_texture_object(&obj->base, name, target);
+
+   return &obj->base;
+}
+
+/** called via ctx->Driver.DeleteTextureObject() */
+static void 
+st_DeleteTextureObject(struct gl_context *ctx,
+                       struct gl_texture_object *texObj)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_object *stObj = st_texture_object(texObj);
+   if (stObj->pt)
+      pipe_resource_reference(&stObj->pt, NULL);
+   if (stObj->sampler_view) {
+      if (stObj->sampler_view->context != st->pipe) {
+         /* Take "ownership" of this texture sampler view by setting
+          * its context pointer to this context.  This avoids potential
+          * crashes when the texture object is shared among contexts
+          * and the original/owner context has already been destroyed.
+          */
+         stObj->sampler_view->context = st->pipe;
+      }
+      pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+   }
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+
+/** called via ctx->Driver.FreeTexImageData() */
+static void
+st_FreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texImage)
+{
+   struct st_texture_image *stImage = st_texture_image(texImage);
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (stImage->pt) {
+      pipe_resource_reference(&stImage->pt, NULL);
+   }
+
+   if (texImage->Data) {
+      _mesa_align_free(texImage->Data);
+      texImage->Data = NULL;
+   }
+}
+
+
+/**
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ * XXX Put this in src/mesa/main/imports.h ???
+ */
+#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
+static INLINE void *
+__memcpy(void *to, const void *from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__("rep ; movsl\n\t"
+                        "testb $2,%b4\n\t"
+                        "je 1f\n\t"
+                        "movsw\n"
+                        "1:\ttestb $1,%b4\n\t"
+                        "je 2f\n\t"
+                        "movsb\n" "2:":"=&c"(d0), "=&D"(d1), "=&S"(d2)
+                        :"0"(n / 4), "q"(n), "1"((long) to), "2"((long) from)
+                        :"memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+
+/**
+ * The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ * 
+ * TODO: switch dynamically.
+ */
+static void *
+do_memcpy(void *dest, const void *src, size_t n)
+{
+   if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) {
+      return __memcpy(dest, src, n);
+   }
+   else
+      return memcpy(dest, src, n);
+}
+
+
+/**
+ * Return default texture resource binding bitmask for the given format.
+ */
+static GLuint
+default_bindings(struct st_context *st, enum pipe_format format)
+{
+   struct pipe_screen *screen = st->pipe->screen;
+   const unsigned target = PIPE_TEXTURE_2D;
+   const unsigned geom = 0x0;
+   unsigned bindings;
+
+   if (util_format_is_depth_or_stencil(format))
+      bindings = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL;
+   else
+      bindings = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+
+   if (screen->is_format_supported(screen, format, target, 0, bindings, geom))
+      return bindings;
+   else
+      return PIPE_BIND_SAMPLER_VIEW;
+}
+
+
+/** Return number of image dimensions (1, 2 or 3) for a texture target. */
+static GLuint
+get_texture_dims(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY_EXT:
+      return 1;
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_CUBE_MAP_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_2D_ARRAY_EXT:
+      return 2;
+   case GL_TEXTURE_3D:
+      return 3;
+   default:
+      assert(0 && "invalid texture target in get_texture_dims()");
+      return 1;
+   }
+}
+
+
+/**
+ * Given the size of a mipmap image, try to compute the size of the level=0
+ * mipmap image.
+ *
+ * Note that this isn't always accurate for odd-sized, non-POW textures.
+ * For example, if level=1 and width=40 then the level=0 width may be 80 or 81.
+ *
+ * \return GL_TRUE for success, GL_FALSE for failure
+ */
+static GLboolean
+guess_base_level_size(GLenum target,
+                      GLuint width, GLuint height, GLuint depth, GLuint level,
+                      GLuint *width0, GLuint *height0, GLuint *depth0)
+{ 
+   const GLuint dims = get_texture_dims(target);
+
+   assert(width >= 1);
+   assert(height >= 1);
+   assert(depth >= 1);
+
+   if (level > 0) {
+      /* Depending on the image's size, we can't always make a guess here */
+      if ((dims >= 1 && width == 1) ||
+          (dims >= 2 && height == 1) ||
+          (dims >= 3 && depth == 1)) {
+         /* we can't determine the image size at level=0 */
+         return GL_FALSE;
+      }
+
+      /* grow the image size until we hit level = 0 */
+      while (level > 0) {
+         if (width > 1)
+            width <<= 1;
+         if (height > 1)
+            height <<= 1;
+         if (depth > 1)
+            depth <<= 1;
+         level--;
+      }
+   }      
+
+   *width0 = width;
+   *height0 = height;
+   *depth0 = depth;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Try to allocate a pipe_resource object for the given st_texture_object.
+ *
+ * We use the given st_texture_image as a clue to determine the size of the
+ * mipmap image at level=0.
+ *
+ * \return GL_TRUE for success, GL_FALSE if out of memory.
+ */
+static GLboolean
+guess_and_alloc_texture(struct st_context *st,
+			struct st_texture_object *stObj,
+			const struct st_texture_image *stImage)
+{
+   GLuint lastLevel, width, height, depth;
+   GLuint bindings;
+   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
+   enum pipe_format fmt;
+
+   DBG("%s\n", __FUNCTION__);
+
+   assert(!stObj->pt);
+
+   if (!guess_base_level_size(stObj->base.Target,
+                              stImage->base.Width2,
+                              stImage->base.Height2,
+                              stImage->base.Depth2,
+                              stImage->level,
+                              &width, &height, &depth)) {
+      /* we can't determine the image size at level=0 */
+      stObj->width0 = stObj->height0 = stObj->depth0 = 0;
+      /* this is not an out of memory error */
+      return GL_TRUE;
+   }
+
+   /* At this point, (width x height x depth) is the expected size of
+    * the level=0 mipmap image.
+    */
+
+   /* Guess a reasonable value for lastLevel.  With OpenGL we have no
+    * idea how many mipmap levels will be in a texture until we start
+    * to render with it.  Make an educated guess here but be prepared
+    * to re-allocating a texture buffer with space for more (or fewer)
+    * mipmap levels later.
+    */
+   if ((stObj->base.MinFilter == GL_NEAREST ||
+        stObj->base.MinFilter == GL_LINEAR ||
+        stImage->base._BaseFormat == GL_DEPTH_COMPONENT ||
+        stImage->base._BaseFormat == GL_DEPTH_STENCIL_EXT) &&
+       !stObj->base.GenerateMipmap &&
+       stImage->level == 0) {
+      /* only alloc space for a single mipmap level */
+      lastLevel = 0;
+   }
+   else {
+      /* alloc space for a full mipmap */
+      GLuint l2width = util_logbase2(width);
+      GLuint l2height = util_logbase2(height);
+      GLuint l2depth = util_logbase2(depth);
+      lastLevel = MAX2(MAX2(l2width, l2height), l2depth);
+   }
+
+   /* Save the level=0 dimensions */
+   stObj->width0 = width;
+   stObj->height0 = height;
+   stObj->depth0 = depth;
+
+   fmt = st_mesa_format_to_pipe_format(stImage->base.TexFormat);
+
+   bindings = default_bindings(st, fmt);
+
+   st_gl_texture_dims_to_pipe_dims(stObj->base.Target,
+                                   width, height, depth,
+                                   &ptWidth, &ptHeight, &ptDepth, &ptLayers);
+
+   stObj->pt = st_texture_create(st,
+                                 gl_target_to_pipe(stObj->base.Target),
+                                 fmt,
+                                 lastLevel,
+                                 ptWidth,
+                                 ptHeight,
+                                 ptDepth,
+                                 ptLayers,
+                                 bindings);
+
+   DBG("%s returning %d\n", __FUNCTION__, (stObj->pt != NULL));
+
+   return stObj->pt != NULL;
+}
+
+
+/**
+ * Adjust pixel unpack params and image dimensions to strip off the
+ * texture border.
+ * Gallium doesn't support texture borders.  They've seldem been used
+ * and seldom been implemented correctly anyway.
+ * \param unpackNew  returns the new pixel unpack parameters
+ */
+static void
+strip_texture_border(GLint border,
+                     GLint *width, GLint *height, GLint *depth,
+                     const struct gl_pixelstore_attrib *unpack,
+                     struct gl_pixelstore_attrib *unpackNew)
+{
+   assert(border > 0);  /* sanity check */
+
+   *unpackNew = *unpack;
+
+   if (unpackNew->RowLength == 0)
+      unpackNew->RowLength = *width;
+
+   if (depth && unpackNew->ImageHeight == 0)
+      unpackNew->ImageHeight = *height;
+
+   unpackNew->SkipPixels += border;
+   if (height)
+      unpackNew->SkipRows += border;
+   if (depth)
+      unpackNew->SkipImages += border;
+
+   assert(*width >= 3);
+   *width = *width - 2 * border;
+   if (height && *height >= 3)
+      *height = *height - 2 * border;
+   if (depth && *depth >= 3)
+      *depth = *depth - 2 * border;
+}
+
+
+/**
+ * Do glTexImage1/2/3D().
+ */
+static void
+st_TexImage(struct gl_context * ctx,
+            GLint dims,
+            GLenum target, GLint level,
+            GLint internalFormat,
+            GLint width, GLint height, GLint depth,
+            GLint border,
+            GLenum format, GLenum type, const void *pixels,
+            const struct gl_pixelstore_attrib *unpack,
+            struct gl_texture_object *texObj,
+            struct gl_texture_image *texImage,
+            GLsizei imageSize, GLboolean compressed_src)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_object *stObj = st_texture_object(texObj);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   GLuint dstRowStride = 0;
+   struct gl_pixelstore_attrib unpackNB;
+   enum pipe_transfer_usage transfer_usage = 0;
+
+   DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
+
+   /* switch to "normal" */
+   if (stObj->surface_based) {
+      gl_format texFormat;
+
+      _mesa_clear_texture_object(ctx, texObj);
+      pipe_resource_reference(&stObj->pt, NULL);
+
+      /* oops, need to init this image again */
+      texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
+                                              internalFormat, format, type);
+
+      _mesa_init_teximage_fields(ctx, target, texImage,
+                                 width, height, depth, border,
+                                 internalFormat, texFormat);
+
+      stObj->surface_based = GL_FALSE;
+   }
+
+   /* gallium does not support texture borders, strip it off */
+   if (border) {
+      strip_texture_border(border, &width, &height, &depth, unpack, &unpackNB);
+      unpack = &unpackNB;
+      texImage->Width = width;
+      texImage->Height = height;
+      texImage->Depth = depth;
+      texImage->Border = 0;
+      border = 0;
+   }
+   else {
+      assert(texImage->Width == width);
+      assert(texImage->Height == height);
+      assert(texImage->Depth == depth);
+   }
+
+   stImage->face = _mesa_tex_target_to_face(target);
+   stImage->level = level;
+
+   _mesa_set_fetch_functions(texImage, dims);
+
+   /* Release the reference to a potentially orphaned buffer.   
+    * Release any old malloced memory.
+    */
+   if (stImage->pt) {
+      pipe_resource_reference(&stImage->pt, NULL);
+      assert(!texImage->Data);
+   }
+   else if (texImage->Data) {
+      _mesa_align_free(texImage->Data);
+   }
+
+   /*
+    * See if the new image is somehow incompatible with the existing
+    * mipmap.  If so, free the old mipmap.
+    */
+   if (stObj->pt) {
+      if (level > (GLint) stObj->pt->last_level ||
+          !st_texture_match_image(stObj->pt, &stImage->base,
+                                  stImage->face, stImage->level)) {
+         DBG("release it\n");
+         pipe_resource_reference(&stObj->pt, NULL);
+         assert(!stObj->pt);
+         pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+      }
+   }
+
+   if (width == 0 || height == 0 || depth == 0) {
+      /* stop after freeing old image */
+      return;
+   }
+
+   if (!stObj->pt) {
+      if (!guess_and_alloc_texture(st, stObj, stImage)) {
+         /* Probably out of memory.
+          * Try flushing any pending rendering, then retry.
+          */
+         st_finish(st);
+         if (!guess_and_alloc_texture(st, stObj, stImage)) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+            return;
+         }
+      }
+   }
+
+   assert(!stImage->pt);
+
+   /* Check if this texture image can live inside the texture object's buffer.
+    * If so, store the image there.  Otherwise the image will temporarily live
+    * in its own buffer.
+    */
+   if (stObj->pt &&
+       st_texture_match_image(stObj->pt, &stImage->base,
+                              stImage->face, stImage->level)) {
+
+      pipe_resource_reference(&stImage->pt, stObj->pt);
+      assert(stImage->pt);
+   }
+
+   if (!stImage->pt)
+      DBG("XXX: Image did not fit into texture - storing in local memory!\n");
+
+   /* Pixel data may come from regular user memory or a PBO.  For the later,
+    * do bounds checking and map the PBO to read pixels data from it.
+    *
+    * XXX we should try to use a GPU-accelerated path to copy the image data
+    * from the PBO to the texture.
+    */
+   if (compressed_src) {
+      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels,
+						      unpack,
+						      "glCompressedTexImage");
+   }
+   else {
+      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1,
+					   format, type,
+					   pixels, unpack, "glTexImage");
+   }
+
+   /*
+    * Prepare to store the texture data.  Either map the gallium texture buffer
+    * memory or malloc space for it.
+    */
+   if (stImage->pt) {
+      /* Store the image in the gallium texture memory buffer */
+      if (format == GL_DEPTH_COMPONENT &&
+          util_format_is_depth_and_stencil(stImage->pt->format))
+         transfer_usage = PIPE_TRANSFER_READ_WRITE;
+      else
+         transfer_usage = PIPE_TRANSFER_WRITE;
+
+      texImage->Data = st_texture_image_map(st, stImage, 0,
+                                            transfer_usage, 0, 0, width, height);
+      if(stImage->transfer)
+         dstRowStride = stImage->transfer->stride;
+   }
+   else {
+      /* Allocate regular memory and store the image there temporarily.   */
+      GLuint imageSize = _mesa_format_image_size(texImage->TexFormat,
+                                                 width, height, depth);
+      dstRowStride = _mesa_format_row_stride(texImage->TexFormat, width);
+
+      texImage->Data = _mesa_align_malloc(imageSize, 16);
+   }
+
+   if (!texImage->Data) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+      return;
+   }
+
+   if (!pixels) {
+      /* We've allocated texture memory, but have no pixel data - all done. */
+      goto done;
+   }
+
+   DBG("Upload image %dx%dx%d row_len %x pitch %x\n",
+       width, height, depth, width, dstRowStride);
+
+   /* Copy user texture image into the texture buffer.
+    */
+   if (compressed_src) {
+      const GLuint srcRowStride =
+         _mesa_format_row_stride(texImage->TexFormat, width);
+      if (dstRowStride == srcRowStride) {
+         memcpy(texImage->Data, pixels, imageSize);
+      }
+      else {
+         char *dst = texImage->Data;
+         const char *src = pixels;
+         GLuint i, bw, bh, lines;
+         _mesa_get_format_block_size(texImage->TexFormat, &bw, &bh);
+         lines = (height + bh - 1) / bh;
+
+         for (i = 0; i < lines; ++i) {
+            memcpy(dst, src, srcRowStride);
+            dst += dstRowStride;
+            src += srcRowStride;
+         }
+      }
+   }
+   else {
+      const GLuint srcImageStride =
+         _mesa_image_image_stride(unpack, width, height, format, type);
+      GLint i;
+      const GLubyte *src = (const GLubyte *) pixels;
+
+      for (i = 0; i < depth; i++) {
+	 if (!_mesa_texstore(ctx, dims, 
+                             texImage->_BaseFormat, 
+                             texImage->TexFormat, 
+                             texImage->Data,
+                             0, 0, 0, /* dstX/Y/Zoffset */
+                             dstRowStride,
+                             texImage->ImageOffsets,
+                             width, height, 1,
+                             format, type, src, unpack)) {
+	    _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+	 }
+
+	 if (stImage->pt && i + 1 < depth) {
+            /* unmap this slice */
+	    st_texture_image_unmap(st, stImage);
+            /* map next slice of 3D texture */
+	    texImage->Data = st_texture_image_map(st, stImage, i + 1,
+                                                  transfer_usage, 0, 0,
+                                                  width, height);
+	    src += srcImageStride;
+	 }
+      }
+   }
+
+done:
+   _mesa_unmap_teximage_pbo(ctx, unpack);
+
+   if (stImage->pt && texImage->Data) {
+      st_texture_image_unmap(st, stImage);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+st_TexImage3D(struct gl_context * ctx,
+              GLenum target, GLint level,
+              GLint internalFormat,
+              GLint width, GLint height, GLint depth,
+              GLint border,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack,
+              struct gl_texture_object *texObj,
+              struct gl_texture_image *texImage)
+{
+   st_TexImage(ctx, 3, target, level, internalFormat, width, height, depth,
+               border, format, type, pixels, unpack, texObj, texImage,
+               0, GL_FALSE);
+}
+
+
+static void
+st_TexImage2D(struct gl_context * ctx,
+              GLenum target, GLint level,
+              GLint internalFormat,
+              GLint width, GLint height, GLint border,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack,
+              struct gl_texture_object *texObj,
+              struct gl_texture_image *texImage)
+{
+   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
+               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
+}
+
+
+static void
+st_TexImage1D(struct gl_context * ctx,
+              GLenum target, GLint level,
+              GLint internalFormat,
+              GLint width, GLint border,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack,
+              struct gl_texture_object *texObj,
+              struct gl_texture_image *texImage)
+{
+   st_TexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border,
+               format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
+}
+
+
+static void
+st_CompressedTexImage2D(struct gl_context *ctx, GLenum target, GLint level,
+                        GLint internalFormat,
+                        GLint width, GLint height, GLint border,
+                        GLsizei imageSize, const GLvoid *data,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage)
+{
+   st_TexImage(ctx, 2, target, level, internalFormat, width, height, 1, border,
+               0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
+}
+
+
+
+/**
+ * glGetTexImage() helper: decompress a compressed texture by rendering
+ * a textured quad.  Store the results in the user's buffer.
+ */
+static void
+decompress_with_blit(struct gl_context * ctx, GLenum target, GLint level,
+                     GLenum format, GLenum type, GLvoid *pixels,
+                     struct gl_texture_object *texObj,
+                     struct gl_texture_image *texImage)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   struct st_texture_object *stObj = st_texture_object(texObj);
+   struct pipe_sampler_view *src_view =
+      st_get_texture_sampler_view(stObj, pipe);
+   const GLuint width = texImage->Width;
+   const GLuint height = texImage->Height;
+   struct pipe_surface *dst_surface;
+   struct pipe_resource *dst_texture;
+   struct pipe_transfer *tex_xfer;
+   unsigned bind = (PIPE_BIND_RENDER_TARGET | /* util_blit may choose to render */
+		    PIPE_BIND_TRANSFER_READ);
+
+   /* create temp / dest surface */
+   if (!util_create_rgba_surface(pipe, width, height, bind,
+                                 &dst_texture, &dst_surface)) {
+      _mesa_problem(ctx, "util_create_rgba_surface() failed "
+                    "in decompress_with_blit()");
+      return;
+   }
+
+   /* blit/render/decompress */
+   util_blit_pixels_tex(st->blit,
+                        src_view,      /* pipe_resource (src) */
+                        0, 0,             /* src x0, y0 */
+                        width, height,    /* src x1, y1 */
+                        dst_surface,      /* pipe_surface (dst) */
+                        0, 0,             /* dst x0, y0 */
+                        width, height,    /* dst x1, y1 */
+                        0.0,              /* z */
+                        PIPE_TEX_MIPFILTER_NEAREST);
+
+   /* map the dst_surface so we can read from it */
+   tex_xfer = pipe_get_transfer(st_context(ctx)->pipe,
+                                dst_texture, 0, 0,
+                                PIPE_TRANSFER_READ,
+                                0, 0, width, height);
+
+   pixels = _mesa_map_pbo_dest(ctx, &ctx->Pack, pixels);
+
+   /* copy/pack data into user buffer */
+   if (st_equal_formats(stImage->pt->format, format, type)) {
+      /* memcpy */
+      const uint bytesPerRow = width * util_format_get_blocksize(stImage->pt->format);
+      ubyte *map = pipe_transfer_map(pipe, tex_xfer);
+      GLuint row;
+      for (row = 0; row < height; row++) {
+         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
+                                              height, format, type, row, 0);
+         memcpy(dest, map, bytesPerRow);
+         map += tex_xfer->stride;
+      }
+      pipe_transfer_unmap(pipe, tex_xfer);
+   }
+   else {
+      /* format translation via floats */
+      GLuint row;
+      enum pipe_format format = util_format_linear(dst_texture->format);
+      for (row = 0; row < height; row++) {
+         const GLbitfield transferOps = 0x0; /* bypassed for glGetTexImage() */
+         GLfloat rgba[4 * MAX_WIDTH];
+         GLvoid *dest = _mesa_image_address2d(&ctx->Pack, pixels, width,
+                                              height, format, type, row, 0);
+
+         if (ST_DEBUG & DEBUG_FALLBACK)
+            debug_printf("%s: fallback format translation\n", __FUNCTION__);
+
+         /* get float[4] rgba row from surface */
+         pipe_get_tile_rgba_format(pipe, tex_xfer, 0, row, width, 1,
+                                   format, rgba);
+
+         _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
+                                    type, dest, &ctx->Pack, transferOps);
+      }
+   }
+
+   _mesa_unmap_pbo_dest(ctx, &ctx->Pack);
+
+   pipe->transfer_destroy(pipe, tex_xfer);
+
+   /* destroy the temp / dest surface */
+   util_destroy_rgba_surface(dst_texture, dst_surface);
+}
+
+
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+st_get_tex_image(struct gl_context * ctx, GLenum target, GLint level,
+                 GLenum format, GLenum type, GLvoid * pixels,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage, GLboolean compressed_dst)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   const GLuint dstImageStride =
+      _mesa_image_image_stride(&ctx->Pack, texImage->Width, texImage->Height,
+                               format, type);
+   GLuint depth, i;
+   GLubyte *dest;
+
+   if (stImage->pt &&
+       util_format_is_s3tc(stImage->pt->format) &&
+       !compressed_dst) {
+      /* Need to decompress the texture.
+       * We'll do this by rendering a textured quad.
+       * Note that we only expect RGBA formats (no Z/depth formats).
+       */
+      decompress_with_blit(ctx, target, level, format, type, pixels,
+                           texObj, texImage);
+      return;
+   }
+
+   /* Map */
+   if (stImage->pt) {
+      /* Image is stored in hardware format in a buffer managed by the
+       * kernel.  Need to explicitly map and unmap it.
+       */
+      texImage->Data = st_texture_image_map(st, stImage, 0,
+                                            PIPE_TRANSFER_READ, 0, 0,
+                                            stImage->base.Width,
+                                            stImage->base.Height);
+      /* compute stride in texels from stride in bytes */
+      texImage->RowStride = stImage->transfer->stride
+         * util_format_get_blockwidth(stImage->pt->format)
+         / util_format_get_blocksize(stImage->pt->format);
+   }
+   else {
+      /* Otherwise, the image should actually be stored in
+       * texImage->Data.  This is pretty confusing for
+       * everybody, I'd much prefer to separate the two functions of
+       * texImage->Data - storage for texture images in main memory
+       * and access (ie mappings) of images.  In other words, we'd
+       * create a new texImage->Map field and leave Data simply for
+       * storage.
+       */
+      assert(texImage->Data);
+   }
+
+   depth = texImage->Depth;
+   texImage->Depth = 1;
+
+   dest = (GLubyte *) pixels;
+
+   _mesa_set_fetch_functions(texImage, get_texture_dims(target));
+
+   for (i = 0; i < depth; i++) {
+      if (compressed_dst) {
+	 _mesa_get_compressed_teximage(ctx, target, level, dest,
+				       texObj, texImage);
+      }
+      else {
+	 _mesa_get_teximage(ctx, target, level, format, type, dest,
+			    texObj, texImage);
+      }
+
+      if (stImage->pt && i + 1 < depth) {
+         /* unmap this slice */
+	 st_texture_image_unmap(st, stImage);
+         /* map next slice of 3D texture */
+	 texImage->Data = st_texture_image_map(st, stImage, i + 1,
+                                               PIPE_TRANSFER_READ, 0, 0,
+                                               stImage->base.Width,
+                                               stImage->base.Height);
+	 dest += dstImageStride;
+      }
+   }
+
+   texImage->Depth = depth;
+
+   /* Unmap */
+   if (stImage->pt) {
+      st_texture_image_unmap(st, stImage);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+st_GetTexImage(struct gl_context * ctx, GLenum target, GLint level,
+               GLenum format, GLenum type, GLvoid * pixels,
+               struct gl_texture_object *texObj,
+               struct gl_texture_image *texImage)
+{
+   st_get_tex_image(ctx, target, level, format, type, pixels, texObj, texImage,
+                    GL_FALSE);
+}
+
+
+static void
+st_GetCompressedTexImage(struct gl_context *ctx, GLenum target, GLint level,
+                         GLvoid *pixels,
+                         struct gl_texture_object *texObj,
+                         struct gl_texture_image *texImage)
+{
+   st_get_tex_image(ctx, target, level, 0, 0, pixels, texObj, texImage,
+                    GL_TRUE);
+}
+
+
+
+static void
+st_TexSubimage(struct gl_context *ctx, GLint dims, GLenum target, GLint level,
+               GLint xoffset, GLint yoffset, GLint zoffset,
+               GLint width, GLint height, GLint depth,
+               GLenum format, GLenum type, const void *pixels,
+               const struct gl_pixelstore_attrib *packing,
+               struct gl_texture_object *texObj,
+               struct gl_texture_image *texImage)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   GLuint dstRowStride;
+   const GLuint srcImageStride =
+      _mesa_image_image_stride(packing, width, height, format, type);
+   GLint i;
+   const GLubyte *src;
+   /* init to silence warning only: */
+   enum pipe_transfer_usage transfer_usage = PIPE_TRANSFER_WRITE;
+
+   DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       level, xoffset, yoffset, width, height);
+
+   pixels =
+      _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format,
+                                  type, pixels, packing, "glTexSubImage2D");
+   if (!pixels)
+      return;
+
+   /* Map buffer if necessary.  Need to lock to prevent other contexts
+    * from uploading the buffer under us.
+    */
+   if (stImage->pt) {
+      if (format == GL_DEPTH_COMPONENT &&
+          util_format_is_depth_and_stencil(stImage->pt->format))
+         transfer_usage = PIPE_TRANSFER_READ_WRITE;
+      else
+         transfer_usage = PIPE_TRANSFER_WRITE;
+
+      texImage->Data = st_texture_image_map(st, stImage, zoffset, 
+                                            transfer_usage,
+                                            xoffset, yoffset,
+                                            width, height);
+   }
+
+   if (!texImage->Data) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+      goto done;
+   }
+
+   src = (const GLubyte *) pixels;
+   dstRowStride = stImage->transfer->stride;
+
+   for (i = 0; i < depth; i++) {
+      if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
+                          texImage->TexFormat,
+                          texImage->Data,
+                          0, 0, 0,
+                          dstRowStride,
+                          texImage->ImageOffsets,
+                          width, height, 1,
+                          format, type, src, packing)) {
+	 _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+      }
+
+      if (stImage->pt && i + 1 < depth) {
+         /* unmap this slice */
+	 st_texture_image_unmap(st, stImage);
+         /* map next slice of 3D texture */
+	 texImage->Data = st_texture_image_map(st, stImage,
+                                               zoffset + i + 1,
+                                               transfer_usage,
+                                               xoffset, yoffset,
+                                               width, height);
+	 src += srcImageStride;
+      }
+   }
+
+done:
+   _mesa_unmap_teximage_pbo(ctx, packing);
+
+   if (stImage->pt && texImage->Data) {
+      st_texture_image_unmap(st, stImage);
+      texImage->Data = NULL;
+   }
+}
+
+
+
+static void
+st_TexSubImage3D(struct gl_context *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLsizei width, GLsizei height, GLsizei depth,
+                 GLenum format, GLenum type, const GLvoid *pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   st_TexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset,
+                  width, height, depth, format, type,
+                  pixels, packing, texObj, texImage);
+}
+
+
+static void
+st_TexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset,
+                 GLsizei width, GLsizei height,
+                 GLenum format, GLenum type, const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   st_TexSubimage(ctx, 2, target, level, xoffset, yoffset, 0,
+                  width, height, 1, format, type,
+                  pixels, packing, texObj, texImage);
+}
+
+
+static void
+st_TexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
+                 GLint xoffset, GLsizei width, GLenum format, GLenum type,
+                 const GLvoid * pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   st_TexSubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1,
+                  format, type, pixels, packing, texObj, texImage);
+}
+
+
+static void
+st_CompressedTexSubImage1D(struct gl_context *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLsizei width,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
+{
+   assert(0);
+}
+
+
+static void
+st_CompressedTexSubImage2D(struct gl_context *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset,
+                           GLsizei width, GLint height,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   int srcBlockStride;
+   int dstBlockStride;
+   int y;
+   enum pipe_format pformat;
+
+   if (stImage->pt) {
+      pformat = stImage->pt->format;
+
+      texImage->Data = st_texture_image_map(st, stImage, 0, 
+                                            PIPE_TRANSFER_WRITE,
+                                            xoffset, yoffset,
+                                            width, height);
+      
+      srcBlockStride = util_format_get_stride(pformat, width);
+      dstBlockStride = stImage->transfer->stride;
+   } else {
+      assert(stImage->pt);
+      /* TODO find good values for block and strides */
+      /* TODO also adjust texImage->data for yoffset/xoffset */
+      return;
+   }
+
+   if (!texImage->Data) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage");
+      return;
+   }
+
+   assert(xoffset % util_format_get_blockwidth(pformat) == 0);
+   assert(yoffset % util_format_get_blockheight(pformat) == 0);
+
+   for (y = 0; y < height; y += util_format_get_blockheight(pformat)) {
+      /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
+      const char *src = (const char*)data + srcBlockStride * util_format_get_nblocksy(pformat, y);
+      char *dst = (char*)texImage->Data + dstBlockStride * util_format_get_nblocksy(pformat, y);
+      memcpy(dst, src, util_format_get_stride(pformat, width));
+   }
+
+   if (stImage->pt) {
+      st_texture_image_unmap(st, stImage);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+st_CompressedTexSubImage3D(struct gl_context *ctx, GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset, GLint zoffset,
+                           GLsizei width, GLint height, GLint depth,
+                           GLenum format,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage)
+{
+   assert(0);
+}
+
+
+
+/**
+ * Do a CopyTexSubImage operation using a read transfer from the source,
+ * a write transfer to the destination and get_tile()/put_tile() to access
+ * the pixels/texels.
+ *
+ * Note: srcY=0=TOP of renderbuffer
+ */
+static void
+fallback_copy_texsubimage(struct gl_context *ctx, GLenum target, GLint level,
+                          struct st_renderbuffer *strb,
+                          struct st_texture_image *stImage,
+                          GLenum baseFormat,
+                          GLint destX, GLint destY, GLint destZ,
+                          GLint srcX, GLint srcY,
+                          GLsizei width, GLsizei height)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_transfer *src_trans;
+   GLvoid *texDest;
+   enum pipe_transfer_usage transfer_usage;
+
+   if (ST_DEBUG & DEBUG_FALLBACK)
+      debug_printf("%s: fallback processing\n", __FUNCTION__);
+
+   assert(width <= MAX_WIDTH);
+
+   if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+      srcY = strb->Base.Height - srcY - height;
+   }
+
+   src_trans = pipe_get_transfer(st_context(ctx)->pipe,
+                                 strb->texture,
+                                 0, 0,
+                                 PIPE_TRANSFER_READ,
+                                 srcX, srcY,
+                                 width, height);
+
+   if ((baseFormat == GL_DEPTH_COMPONENT ||
+        baseFormat == GL_DEPTH_STENCIL) &&
+       util_format_is_depth_and_stencil(stImage->pt->format))
+      transfer_usage = PIPE_TRANSFER_READ_WRITE;
+   else
+      transfer_usage = PIPE_TRANSFER_WRITE;
+
+   /* XXX this used to ignore destZ param */
+   texDest = st_texture_image_map(st, stImage, destZ, transfer_usage,
+                                  destX, destY, width, height);
+
+   if (baseFormat == GL_DEPTH_COMPONENT ||
+       baseFormat == GL_DEPTH_STENCIL) {
+      const GLboolean scaleOrBias = (ctx->Pixel.DepthScale != 1.0F ||
+                                     ctx->Pixel.DepthBias != 0.0F);
+      GLint row, yStep;
+
+      /* determine bottom-to-top vs. top-to-bottom order for src buffer */
+      if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+         srcY = height - 1;
+         yStep = -1;
+      }
+      else {
+         srcY = 0;
+         yStep = 1;
+      }
+
+      /* To avoid a large temp memory allocation, do copy row by row */
+      for (row = 0; row < height; row++, srcY += yStep) {
+         uint data[MAX_WIDTH];
+         pipe_get_tile_z(pipe, src_trans, 0, srcY, width, 1, data);
+         if (scaleOrBias) {
+            _mesa_scale_and_bias_depth_uint(ctx, width, data);
+         }
+         pipe_put_tile_z(pipe, stImage->transfer, 0, row, width, 1, data);
+      }
+   }
+   else {
+      /* RGBA format */
+      GLfloat *tempSrc =
+         (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+
+      if (tempSrc && texDest) {
+         const GLint dims = 2;
+         const GLint dstRowStride = stImage->transfer->stride;
+         struct gl_texture_image *texImage = &stImage->base;
+         struct gl_pixelstore_attrib unpack = ctx->DefaultPacking;
+
+         if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
+            unpack.Invert = GL_TRUE;
+         }
+
+         /* get float/RGBA image from framebuffer */
+         /* XXX this usually involves a lot of int/float conversion.
+          * try to avoid that someday.
+          */
+         pipe_get_tile_rgba_format(pipe, src_trans, 0, 0, width, height,
+                                   util_format_linear(strb->texture->format),
+                                   tempSrc);
+
+         /* Store into texture memory.
+          * Note that this does some special things such as pixel transfer
+          * ops and format conversion.  In particular, if the dest tex format
+          * is actually RGBA but the user created the texture as GL_RGB we
+          * need to fill-in/override the alpha channel with 1.0.
+          */
+         _mesa_texstore(ctx, dims,
+                        texImage->_BaseFormat, 
+                        texImage->TexFormat, 
+                        texDest,
+                        0, 0, 0,
+                        dstRowStride,
+                        texImage->ImageOffsets,
+                        width, height, 1,
+                        GL_RGBA, GL_FLOAT, tempSrc, /* src */
+                        &unpack);
+      }
+      else {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+      }
+
+      if (tempSrc)
+         free(tempSrc);
+   }
+
+   st_texture_image_unmap(st, stImage);
+   pipe->transfer_destroy(pipe, src_trans);
+}
+
+
+
+/**
+ * If the format of the src renderbuffer and the format of the dest
+ * texture are compatible (in terms of blitting), return a TGSI writemask
+ * to be used during the blit.
+ * If the src/dest are incompatible, return 0.
+ */
+static unsigned
+compatible_src_dst_formats(struct gl_context *ctx,
+                           const struct gl_renderbuffer *src,
+                           const struct gl_texture_image *dst)
+{
+   /* Get logical base formats for the src and dest.
+    * That is, use the user-requested formats and not the actual, device-
+    * chosen formats.
+    * For example, the user may have requested an A8 texture but the
+    * driver may actually be using an RGBA texture format.  When we
+    * copy/blit to that texture, we only want to copy the Alpha channel
+    * and not the RGB channels.
+    *
+    * Similarly, when the src FBO was created an RGB format may have been
+    * requested but the driver actually chose an RGBA format.  In that case,
+    * we don't want to copy the undefined Alpha channel to the dest texture
+    * (it should be 1.0).
+    */
+   const GLenum srcFormat = _mesa_base_fbo_format(ctx, src->InternalFormat);
+   const GLenum dstFormat = _mesa_base_tex_format(ctx, dst->InternalFormat);
+
+   /**
+    * XXX when we have red-only and red/green renderbuffers we'll need
+    * to add more cases here (or implement a general-purpose routine that
+    * queries the existance of the R,G,B,A channels in the src and dest).
+    */
+   if (srcFormat == dstFormat) {
+      /* This is the same as matching_base_formats, which should
+       * always pass, as it did previously.
+       */
+      return TGSI_WRITEMASK_XYZW;
+   }
+   else if (srcFormat == GL_RGB && dstFormat == GL_RGBA) {
+      /* Make sure that A in the dest is 1.  The actual src format
+       * may be RGBA and have undefined A values.
+       */
+      return TGSI_WRITEMASK_XYZ;
+   }
+   else if (srcFormat == GL_RGBA && dstFormat == GL_RGB) {
+      /* Make sure that A in the dest is 1.  The actual dst format
+       * may be RGBA and will need A=1 to provide proper alpha values
+       * when sampled later.
+       */
+      return TGSI_WRITEMASK_XYZ;
+   }
+   else {
+      if (ST_DEBUG & DEBUG_FALLBACK)
+         debug_printf("%s failed for src %s, dst %s\n",
+                      __FUNCTION__, 
+                      _mesa_lookup_enum_by_nr(srcFormat),
+                      _mesa_lookup_enum_by_nr(dstFormat));
+
+      /* Otherwise fail.
+       */
+      return 0;
+   }
+}
+
+
+
+/**
+ * Do a CopyTex[Sub]Image1/2/3D() using a hardware (blit) path if possible.
+ * Note that the region to copy has already been clipped so we know we
+ * won't read from outside the source renderbuffer's bounds.
+ *
+ * Note: srcY=0=Bottom of renderbuffer (GL convention)
+ */
+static void
+st_copy_texsubimage(struct gl_context *ctx,
+                    GLenum target, GLint level,
+                    GLint destX, GLint destY, GLint destZ,
+                    GLint srcX, GLint srcY,
+                    GLsizei width, GLsizei height)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   const GLenum texBaseFormat = texImage->_BaseFormat;
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct st_renderbuffer *strb;
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   enum pipe_format dest_format, src_format;
+   GLboolean use_fallback = GL_TRUE;
+   GLboolean matching_base_formats;
+   GLuint format_writemask, sample_count;
+   struct pipe_surface *dest_surface = NULL;
+   GLboolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP);
+
+   /* make sure finalize_textures has been called? 
+    */
+   if (0) st_validate_state(st);
+
+   /* determine if copying depth or color data */
+   if (texBaseFormat == GL_DEPTH_COMPONENT ||
+       texBaseFormat == GL_DEPTH_STENCIL) {
+      strb = st_renderbuffer(fb->_DepthBuffer);
+      if (strb->Base.Wrapped) {
+         strb = st_renderbuffer(strb->Base.Wrapped);
+      }
+   }
+   else {
+      /* texBaseFormat == GL_RGB, GL_RGBA, GL_ALPHA, etc */
+      strb = st_renderbuffer(fb->_ColorReadBuffer);
+   }
+
+   if (!strb || !strb->surface || !stImage->pt) {
+      debug_printf("%s: null strb or stImage\n", __FUNCTION__);
+      return;
+   }
+
+   sample_count = strb->surface->texture->nr_samples;
+   /* I believe this would be legal, presumably would need to do a resolve
+      for color, and for depth/stencil spec says to just use one of the
+      depth/stencil samples per pixel? Need some transfer clarifications. */
+   assert(sample_count < 2);
+
+   if (srcX < 0) {
+      width -= -srcX;
+      destX += -srcX;
+      srcX = 0;
+   }
+
+   if (srcY < 0) {
+      height -= -srcY;
+      destY += -srcY;
+      srcY = 0;
+   }
+
+   if (destX < 0) {
+      width -= -destX;
+      srcX += -destX;
+      destX = 0;
+   }
+
+   if (destY < 0) {
+      height -= -destY;
+      srcY += -destY;
+      destY = 0;
+   }
+
+   if (width < 0 || height < 0)
+      return;
+
+
+   assert(strb);
+   assert(strb->surface);
+   assert(stImage->pt);
+
+   src_format = strb->surface->format;
+   dest_format = stImage->pt->format;
+
+   /*
+    * Determine if the src framebuffer and dest texture have the same
+    * base format.  We need this to detect a case such as the framebuffer
+    * being GL_RGBA but the texture being GL_RGB.  If the actual hardware
+    * texture format stores RGBA we need to set A=1 (overriding the
+    * framebuffer's alpha values).  We can't do that with the blit or
+    * textured-quad paths.
+    */
+   matching_base_formats =
+      (_mesa_get_format_base_format(strb->Base.Format) ==
+       _mesa_get_format_base_format(texImage->TexFormat));
+   format_writemask = compatible_src_dst_formats(ctx, &strb->Base, texImage);
+
+   if (ctx->_ImageTransferState == 0x0) {
+
+      if (matching_base_formats &&
+          src_format == dest_format &&
+          !do_flip)
+      {
+         /* use surface_copy() / blit */
+         struct pipe_box src_box;
+         u_box_2d_zslice(srcX, srcY, strb->surface->u.tex.first_layer,
+                         width, height, &src_box);
+
+         /* for resource_copy_region(), y=0=top, always */
+         pipe->resource_copy_region(pipe,
+                                    /* dest */
+                                    stImage->pt,
+                                    stImage->level,
+                                    destX, destY, destZ + stImage->face,
+                                    /* src */
+                                    strb->texture,
+                                    strb->surface->u.tex.level,
+                                    &src_box);
+         use_fallback = GL_FALSE;
+      }
+      else if (format_writemask &&
+               texBaseFormat != GL_DEPTH_COMPONENT &&
+               texBaseFormat != GL_DEPTH_STENCIL &&
+               screen->is_format_supported(screen, src_format,
+                                           PIPE_TEXTURE_2D, sample_count,
+                                           PIPE_BIND_SAMPLER_VIEW,
+                                           0) &&
+               screen->is_format_supported(screen, dest_format,
+                                           PIPE_TEXTURE_2D, 0,
+                                           PIPE_BIND_RENDER_TARGET,
+                                           0)) {
+         /* draw textured quad to do the copy */
+         GLint srcY0, srcY1;
+         struct pipe_surface surf_tmpl;
+         memset(&surf_tmpl, 0, sizeof(surf_tmpl));
+         surf_tmpl.format = stImage->pt->format;
+         surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
+         surf_tmpl.u.tex.level = stImage->level;
+         surf_tmpl.u.tex.first_layer = stImage->face + destZ;
+         surf_tmpl.u.tex.last_layer = stImage->face + destZ;
+
+         dest_surface = pipe->create_surface(pipe, stImage->pt,
+                                             &surf_tmpl);
+
+         if (do_flip) {
+            srcY1 = strb->Base.Height - srcY - height;
+            srcY0 = srcY1 + height;
+         }
+         else {
+            srcY0 = srcY;
+            srcY1 = srcY0 + height;
+         }
+
+         util_blit_pixels_writemask(st->blit,
+                                    strb->texture,
+                                    strb->surface->u.tex.level,
+                                    srcX, srcY0,
+                                    srcX + width, srcY1,
+                                    strb->surface->u.tex.first_layer,
+                                    dest_surface,
+                                    destX, destY,
+                                    destX + width, destY + height,
+                                    0.0, PIPE_TEX_MIPFILTER_NEAREST,
+                                    format_writemask);
+         use_fallback = GL_FALSE;
+      }
+
+      if (dest_surface)
+         pipe_surface_reference(&dest_surface, NULL);
+   }
+
+   if (use_fallback) {
+      /* software fallback */
+      fallback_copy_texsubimage(ctx, target, level,
+                                strb, stImage, texBaseFormat,
+                                destX, destY, destZ,
+                                srcX, srcY, width, height);
+   }
+}
+
+
+
+static void
+st_CopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level,
+                  GLenum internalFormat,
+                  GLint x, GLint y, GLsizei width, GLint border)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+
+   /* Setup or redefine the texture object, texture and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                          width, border,
+                          GL_RGBA, CHAN_TYPE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+
+   st_copy_texsubimage(ctx, target, level,
+                       0, 0, 0,  /* destX,Y,Z */
+                       x, y, width, 1);  /* src X, Y, size */
+}
+
+
+static void
+st_CopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level,
+                  GLenum internalFormat,
+                  GLint x, GLint y, GLsizei width, GLsizei height,
+                  GLint border)
+{
+   struct gl_texture_unit *texUnit =
+      &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+
+   /* Setup or redefine the texture object, texture and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                          width, height, border,
+                          GL_RGBA, CHAN_TYPE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+
+   st_copy_texsubimage(ctx, target, level,
+                       0, 0, 0,  /* destX,Y,Z */
+                       x, y, width, height);  /* src X, Y, size */
+}
+
+
+static void
+st_CopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level,
+                     GLint xoffset, GLint x, GLint y, GLsizei width)
+{
+   const GLint yoffset = 0, zoffset = 0;
+   const GLsizei height = 1;
+   st_copy_texsubimage(ctx, target, level,
+                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
+                       x, y, width, height);  /* src X, Y, size */
+}
+
+
+static void
+st_CopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level,
+                     GLint xoffset, GLint yoffset,
+                     GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   const GLint zoffset = 0;
+   st_copy_texsubimage(ctx, target, level,
+                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
+                       x, y, width, height);  /* src X, Y, size */
+}
+
+
+static void
+st_CopyTexSubImage3D(struct gl_context * ctx, GLenum target, GLint level,
+                     GLint xoffset, GLint yoffset, GLint zoffset,
+                     GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   st_copy_texsubimage(ctx, target, level,
+                       xoffset, yoffset, zoffset,  /* destX,Y,Z */
+                       x, y, width, height);  /* src X, Y, size */
+}
+
+
+/**
+ * Copy image data from stImage into the texture object 'stObj' at level
+ * 'dstLevel'.
+ */
+static void
+copy_image_data_to_texture(struct st_context *st,
+			   struct st_texture_object *stObj,
+                           GLuint dstLevel,
+			   struct st_texture_image *stImage)
+{
+   /* debug checks */
+   {
+      const struct gl_texture_image *dstImage =
+         stObj->base.Image[stImage->face][stImage->level];
+      assert(dstImage);
+      assert(dstImage->Width == stImage->base.Width);
+      assert(dstImage->Height == stImage->base.Height);
+      assert(dstImage->Depth == stImage->base.Depth);
+   }
+
+   if (stImage->pt) {
+      /* Copy potentially with the blitter:
+       */
+      st_texture_image_copy(st->pipe,
+                            stObj->pt, dstLevel,  /* dest texture, level */
+                            stImage->pt, stImage->level, /* src texture, level */
+                            stImage->face);
+
+      pipe_resource_reference(&stImage->pt, NULL);
+   }
+   else if (stImage->base.Data) {
+      st_texture_image_data(st,
+                            stObj->pt,
+                            stImage->face,
+                            dstLevel,
+                            stImage->base.Data,
+                            stImage->base.RowStride * 
+                            util_format_get_blocksize(stObj->pt->format),
+                            stImage->base.RowStride *
+                            stImage->base.Height *
+                            util_format_get_blocksize(stObj->pt->format));
+      _mesa_align_free(stImage->base.Data);
+      stImage->base.Data = NULL;
+   }
+
+   pipe_resource_reference(&stImage->pt, stObj->pt);
+}
+
+
+/**
+ * Called during state validation.  When this function is finished,
+ * the texture object should be ready for rendering.
+ * \return GL_TRUE for success, GL_FALSE for failure (out of mem)
+ */
+GLboolean
+st_finalize_texture(struct gl_context *ctx,
+		    struct pipe_context *pipe,
+		    struct gl_texture_object *tObj)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_texture_object *stObj = st_texture_object(tObj);
+   const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face;
+   struct st_texture_image *firstImage;
+   enum pipe_format firstImageFormat;
+   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
+
+   if (stObj->base._Complete) {
+      /* The texture is complete and we know exactly how many mipmap levels
+       * are present/needed.  This is conditional because we may be called
+       * from the st_generate_mipmap() function when the texture object is
+       * incomplete.  In that case, we'll have set stObj->lastLevel before
+       * we get here.
+       */
+      if (stObj->base.MinFilter == GL_LINEAR ||
+          stObj->base.MinFilter == GL_NEAREST)
+         stObj->lastLevel = stObj->base.BaseLevel;
+      else
+         stObj->lastLevel = stObj->base._MaxLevel;
+   }
+
+   firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
+   assert(firstImage);
+
+   /* If both firstImage and stObj point to a texture which can contain
+    * all active images, favour firstImage.  Note that because of the
+    * completeness requirement, we know that the image dimensions
+    * will match.
+    */
+   if (firstImage->pt &&
+       firstImage->pt != stObj->pt &&
+       (!stObj->pt || firstImage->pt->last_level >= stObj->pt->last_level)) {
+      pipe_resource_reference(&stObj->pt, firstImage->pt);
+      pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+   }
+
+   /* Find gallium format for the Mesa texture */
+   firstImageFormat = st_mesa_format_to_pipe_format(firstImage->base.TexFormat);
+
+   /* Find size of level=0 Gallium mipmap image, plus number of texture layers */
+   {
+      GLuint width, height, depth;
+      if (!guess_base_level_size(stObj->base.Target,
+                                 firstImage->base.Width2,
+                                 firstImage->base.Height2,
+                                 firstImage->base.Depth2,
+                                 stObj->base.BaseLevel,
+                                 &width, &height, &depth)) {
+         width = stObj->width0;
+         height = stObj->height0;
+         depth = stObj->depth0;
+      }
+      /* convert GL dims to Gallium dims */
+      st_gl_texture_dims_to_pipe_dims(stObj->base.Target, width, height, depth,
+                                      &ptWidth, &ptHeight, &ptDepth, &ptLayers);
+   }
+
+   /* If we already have a gallium texture, check that it matches the texture
+    * object's format, target, size, num_levels, etc.
+    */
+   if (stObj->pt) {
+      if (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) ||
+          !st_sampler_compat_formats(stObj->pt->format, firstImageFormat) ||
+          stObj->pt->last_level < stObj->lastLevel ||
+          stObj->pt->width0 != ptWidth ||
+          stObj->pt->height0 != ptHeight ||
+          stObj->pt->depth0 != ptDepth ||
+          stObj->pt->array_size != ptLayers)
+      {
+         /* The gallium texture does not match the Mesa texture so delete the
+          * gallium texture now.  We'll make a new one below.
+          */
+         pipe_resource_reference(&stObj->pt, NULL);
+         pipe_sampler_view_reference(&stObj->sampler_view, NULL);
+         st->dirty.st |= ST_NEW_FRAMEBUFFER;
+      }
+   }
+
+   /* May need to create a new gallium texture:
+    */
+   if (!stObj->pt) {
+      GLuint bindings = default_bindings(st, firstImageFormat);
+
+      stObj->pt = st_texture_create(st,
+                                    gl_target_to_pipe(stObj->base.Target),
+                                    firstImageFormat,
+                                    stObj->lastLevel,
+                                    ptWidth,
+                                    ptHeight,
+                                    ptDepth,
+                                    ptLayers,
+                                    bindings);
+
+      if (!stObj->pt) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+         return GL_FALSE;
+      }
+   }
+
+   /* Pull in any images not in the object's texture:
+    */
+   for (face = 0; face < nr_faces; face++) {
+      GLuint level;
+      for (level = stObj->base.BaseLevel; level <= stObj->lastLevel; level++) {
+         struct st_texture_image *stImage =
+            st_texture_image(stObj->base.Image[face][level]);
+
+         /* Need to import images in main memory or held in other textures.
+          */
+         if (stImage && stObj->pt != stImage->pt) {
+            copy_image_data_to_texture(st, stObj, level, stImage);
+         }
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Returns pointer to a default/dummy texture.
+ * This is typically used when the current shader has tex/sample instructions
+ * but the user has not provided a (any) texture(s).
+ */
+struct gl_texture_object *
+st_get_default_texture(struct st_context *st)
+{
+   if (!st->default_texture) {
+      static const GLenum target = GL_TEXTURE_2D;
+      GLubyte pixels[16][16][4];
+      struct gl_texture_object *texObj;
+      struct gl_texture_image *texImg;
+      GLuint i, j;
+
+      /* The ARB_fragment_program spec says (0,0,0,1) should be returned
+       * when attempting to sample incomplete textures.
+       */
+      for (i = 0; i < 16; i++) {
+         for (j = 0; j < 16; j++) {
+            pixels[i][j][0] = 0;
+            pixels[i][j][1] = 0;
+            pixels[i][j][2] = 0;
+            pixels[i][j][3] = 255;
+         }
+      }
+
+      texObj = st->ctx->Driver.NewTextureObject(st->ctx, 0, target);
+
+      texImg = _mesa_get_tex_image(st->ctx, texObj, target, 0);
+
+      _mesa_init_teximage_fields(st->ctx, target, texImg,
+                                 16, 16, 1, 0,  /* w, h, d, border */
+                                 GL_RGBA, MESA_FORMAT_RGBA8888);
+
+      st_TexImage(st->ctx, 2, target,
+                  0, GL_RGBA,    /* level, intformat */
+                  16, 16, 1, 0,  /* w, h, d, border */
+                  GL_RGBA, GL_UNSIGNED_BYTE, pixels,
+                  &st->ctx->DefaultPacking,
+                  texObj, texImg,
+                  0, 0);
+
+      texObj->MinFilter = GL_NEAREST;
+      texObj->MagFilter = GL_NEAREST;
+      texObj->_Complete = GL_TRUE;
+
+      st->default_texture = texObj;
+   }
+   return st->default_texture;
+}
+
+
+void
+st_init_texture_functions(struct dd_function_table *functions)
+{
+   functions->ChooseTextureFormat = st_ChooseTextureFormat;
+   functions->TexImage1D = st_TexImage1D;
+   functions->TexImage2D = st_TexImage2D;
+   functions->TexImage3D = st_TexImage3D;
+   functions->TexSubImage1D = st_TexSubImage1D;
+   functions->TexSubImage2D = st_TexSubImage2D;
+   functions->TexSubImage3D = st_TexSubImage3D;
+   functions->CompressedTexSubImage1D = st_CompressedTexSubImage1D;
+   functions->CompressedTexSubImage2D = st_CompressedTexSubImage2D;
+   functions->CompressedTexSubImage3D = st_CompressedTexSubImage3D;
+   functions->CopyTexImage1D = st_CopyTexImage1D;
+   functions->CopyTexImage2D = st_CopyTexImage2D;
+   functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
+   functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
+   functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
+   functions->GenerateMipmap = st_generate_mipmap;
+
+   functions->GetTexImage = st_GetTexImage;
+
+   /* compressed texture functions */
+   functions->CompressedTexImage2D = st_CompressedTexImage2D;
+   functions->GetCompressedTexImage = st_GetCompressedTexImage;
+
+   functions->NewTextureObject = st_NewTextureObject;
+   functions->NewTextureImage = st_NewTextureImage;
+   functions->DeleteTexture = st_DeleteTextureObject;
+   functions->FreeTexImageData = st_FreeTextureImageData;
+
+   functions->TextureMemCpy = do_memcpy;
+
+   /* XXX Temporary until we can query pipe's texture sizes */
+   functions->TestProxyTexImage = _mesa_test_proxy_teximage;
+}
diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c
index 564d63483..11ebd067e 100644
--- a/mesalib/src/mesa/state_tracker/st_draw.c
+++ b/mesalib/src/mesa/state_tracker/st_draw.c
@@ -1,751 +1,759 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/*
- * This file implements the st_draw_vbo() function which is called from
- * Mesa's VBO module.  All point/line/triangle rendering is done through
- * this function whether the user called glBegin/End, glDrawArrays,
- * glDrawElements, glEvalMesh, or glCalList, etc.
- *
- * We basically convert the VBO's vertex attribute/array information into
- * Gallium vertex state, bind the vertex buffer objects and call
- * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
- *
- * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "program/prog_uniform.h"
-
-#include "vbo/vbo.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_cb_bufferobjects.h"
-#include "st_draw.h"
-#include "st_program.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_prim.h"
-#include "util/u_draw_quad.h"
-#include "draw/draw_context.h"
-#include "cso_cache/cso_context.h"
-
-
-static GLuint double_types[4] = {
-   PIPE_FORMAT_R64_FLOAT,
-   PIPE_FORMAT_R64G64_FLOAT,
-   PIPE_FORMAT_R64G64B64_FLOAT,
-   PIPE_FORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[4] = {
-   PIPE_FORMAT_R32_FLOAT,
-   PIPE_FORMAT_R32G32_FLOAT,
-   PIPE_FORMAT_R32G32B32_FLOAT,
-   PIPE_FORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint half_float_types[4] = {
-   PIPE_FORMAT_R16_FLOAT,
-   PIPE_FORMAT_R16G16_FLOAT,
-   PIPE_FORMAT_R16G16B16_FLOAT,
-   PIPE_FORMAT_R16G16B16A16_FLOAT
-};
-
-static GLuint uint_types_norm[4] = {
-   PIPE_FORMAT_R32_UNORM,
-   PIPE_FORMAT_R32G32_UNORM,
-   PIPE_FORMAT_R32G32B32_UNORM,
-   PIPE_FORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[4] = {
-   PIPE_FORMAT_R32_USCALED,
-   PIPE_FORMAT_R32G32_USCALED,
-   PIPE_FORMAT_R32G32B32_USCALED,
-   PIPE_FORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint int_types_norm[4] = {
-   PIPE_FORMAT_R32_SNORM,
-   PIPE_FORMAT_R32G32_SNORM,
-   PIPE_FORMAT_R32G32B32_SNORM,
-   PIPE_FORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[4] = {
-   PIPE_FORMAT_R32_SSCALED,
-   PIPE_FORMAT_R32G32_SSCALED,
-   PIPE_FORMAT_R32G32B32_SSCALED,
-   PIPE_FORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint ushort_types_norm[4] = {
-   PIPE_FORMAT_R16_UNORM,
-   PIPE_FORMAT_R16G16_UNORM,
-   PIPE_FORMAT_R16G16B16_UNORM,
-   PIPE_FORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[4] = {
-   PIPE_FORMAT_R16_USCALED,
-   PIPE_FORMAT_R16G16_USCALED,
-   PIPE_FORMAT_R16G16B16_USCALED,
-   PIPE_FORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint short_types_norm[4] = {
-   PIPE_FORMAT_R16_SNORM,
-   PIPE_FORMAT_R16G16_SNORM,
-   PIPE_FORMAT_R16G16B16_SNORM,
-   PIPE_FORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[4] = {
-   PIPE_FORMAT_R16_SSCALED,
-   PIPE_FORMAT_R16G16_SSCALED,
-   PIPE_FORMAT_R16G16B16_SSCALED,
-   PIPE_FORMAT_R16G16B16A16_SSCALED
-};
-
-static GLuint ubyte_types_norm[4] = {
-   PIPE_FORMAT_R8_UNORM,
-   PIPE_FORMAT_R8G8_UNORM,
-   PIPE_FORMAT_R8G8B8_UNORM,
-   PIPE_FORMAT_R8G8B8A8_UNORM
-};
-
-static GLuint ubyte_types_scale[4] = {
-   PIPE_FORMAT_R8_USCALED,
-   PIPE_FORMAT_R8G8_USCALED,
-   PIPE_FORMAT_R8G8B8_USCALED,
-   PIPE_FORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint byte_types_norm[4] = {
-   PIPE_FORMAT_R8_SNORM,
-   PIPE_FORMAT_R8G8_SNORM,
-   PIPE_FORMAT_R8G8B8_SNORM,
-   PIPE_FORMAT_R8G8B8A8_SNORM
-};
-
-static GLuint byte_types_scale[4] = {
-   PIPE_FORMAT_R8_SSCALED,
-   PIPE_FORMAT_R8G8_SSCALED,
-   PIPE_FORMAT_R8G8B8_SSCALED,
-   PIPE_FORMAT_R8G8B8A8_SSCALED
-};
-
-static GLuint fixed_types[4] = {
-   PIPE_FORMAT_R32_FIXED,
-   PIPE_FORMAT_R32G32_FIXED,
-   PIPE_FORMAT_R32G32B32_FIXED,
-   PIPE_FORMAT_R32G32B32A32_FIXED
-};
-
-
-
-/**
- * Return a PIPE_FORMAT_x for the given GL datatype and size.
- */
-GLuint
-st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
-                      GLboolean normalized)
-{
-   assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
-          type == GL_FIXED || type == GL_HALF_FLOAT);
-   assert(size >= 1);
-   assert(size <= 4);
-   assert(format == GL_RGBA || format == GL_BGRA);
-
-   if (format == GL_BGRA) {
-      /* this is an odd-ball case */
-      assert(type == GL_UNSIGNED_BYTE);
-      assert(normalized);
-      return PIPE_FORMAT_B8G8R8A8_UNORM;
-   }
-
-   if (normalized) {
-      switch (type) {
-      case GL_DOUBLE: return double_types[size-1];
-      case GL_FLOAT: return float_types[size-1];
-      case GL_HALF_FLOAT: return half_float_types[size-1];
-      case GL_INT: return int_types_norm[size-1];
-      case GL_SHORT: return short_types_norm[size-1];
-      case GL_BYTE: return byte_types_norm[size-1];
-      case GL_UNSIGNED_INT: return uint_types_norm[size-1];
-      case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
-      case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
-      case GL_FIXED: return fixed_types[size-1];
-      default: assert(0); return 0;
-      }      
-   }
-   else {
-      switch (type) {
-      case GL_DOUBLE: return double_types[size-1];
-      case GL_FLOAT: return float_types[size-1];
-      case GL_HALF_FLOAT: return half_float_types[size-1];
-      case GL_INT: return int_types_scale[size-1];
-      case GL_SHORT: return short_types_scale[size-1];
-      case GL_BYTE: return byte_types_scale[size-1];
-      case GL_UNSIGNED_INT: return uint_types_scale[size-1];
-      case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
-      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
-      case GL_FIXED: return fixed_types[size-1];
-      default: assert(0); return 0;
-      }      
-   }
-   return 0; /* silence compiler warning */
-}
-
-
-
-
-
-/**
- * Examine the active arrays to determine if we have interleaved
- * vertex arrays all living in one VBO, or all living in user space.
- * \param userSpace  returns whether the arrays are in user space.
- */
-static GLboolean
-is_interleaved_arrays(const struct st_vertex_program *vp,
-                      const struct st_vp_variant *vpv,
-                      const struct gl_client_array **arrays)
-{
-   GLuint attr;
-   const struct gl_buffer_object *firstBufObj = NULL;
-   GLint firstStride = -1;
-   const GLubyte *client_addr = NULL;
-
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
-      const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
-
-      if (firstStride < 0) {
-         firstStride = stride;
-      }
-      else if (firstStride != stride) {
-         return GL_FALSE;
-      }
-
-      if (!bufObj || !bufObj->Name) {
-         /* Try to detect if the client-space arrays are
-          * "close" to each other.
-          */
-         if (!client_addr) {
-            client_addr = arrays[mesaAttr]->Ptr;
-         }
-         else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
-            /* arrays start too far apart */
-            return GL_FALSE;
-         }
-      }
-      else if (!firstBufObj) {
-         firstBufObj = bufObj;
-      }
-      else if (bufObj != firstBufObj) {
-         return GL_FALSE;
-      }
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Set up for drawing interleaved arrays that all live in one VBO
- * or all live in user space.
- * \param vbuffer  returns vertex buffer info
- * \param velements  returns vertex element info
- */
-static void
-setup_interleaved_attribs(struct gl_context *ctx,
-                          const struct st_vertex_program *vp,
-                          const struct st_vp_variant *vpv,
-                          const struct gl_client_array **arrays,
-                          struct pipe_vertex_buffer *vbuffer,
-                          struct pipe_vertex_element velements[],
-                          unsigned max_index)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   GLuint attr;
-   const GLubyte *low_addr = NULL;
-
-   /* Find the lowest address. */
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
-
-      low_addr = !low_addr ? start : MIN2(low_addr, start);
-   }
-
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
-      struct st_buffer_object *stobj = st_buffer_object(bufobj);
-      GLsizei stride = arrays[mesaAttr]->StrideB;
-
-      if (attr == 0) {
-         if (bufobj && bufobj->Name) {
-            vbuffer->buffer = NULL;
-            pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
-            vbuffer->buffer_offset = pointer_to_offset(low_addr);
-         } else {
-            vbuffer->buffer =
-               pipe_user_buffer_create(pipe->screen, (void*)low_addr,
-                                       stride * (max_index + 1),
-				       PIPE_BIND_VERTEX_BUFFER);
-            vbuffer->buffer_offset = 0;
-
-            /* Track user vertex buffers. */
-            pipe_resource_reference(&st->user_vb[0], vbuffer->buffer);
-            st->user_vb_stride[0] = stride;
-            st->num_user_vbs = 1;
-         }
-         vbuffer->stride = stride; /* in bytes */
-      }
-
-      velements[attr].src_offset =
-         (unsigned) (arrays[mesaAttr]->Ptr - low_addr);
-      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
-      velements[attr].vertex_buffer_index = 0;
-      velements[attr].src_format =
-         st_pipe_vertex_format(arrays[mesaAttr]->Type,
-                               arrays[mesaAttr]->Size,
-                               arrays[mesaAttr]->Format,
-                               arrays[mesaAttr]->Normalized);
-      assert(velements[attr].src_format);
-   }
-}
-
-
-/**
- * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
- * vertex attribute.
- * \param vbuffer  returns vertex buffer info
- * \param velements  returns vertex element info
- */
-static void
-setup_non_interleaved_attribs(struct gl_context *ctx,
-                              const struct st_vertex_program *vp,
-                              const struct st_vp_variant *vpv,
-                              const struct gl_client_array **arrays,
-                              struct pipe_vertex_buffer vbuffer[],
-                              struct pipe_vertex_element velements[],
-                              unsigned max_index)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   GLuint attr;
-
-   for (attr = 0; attr < vpv->num_inputs; attr++) {
-      const GLuint mesaAttr = vp->index_to_input[attr];
-      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
-      GLsizei stride = arrays[mesaAttr]->StrideB;
-
-      if (bufobj && bufobj->Name) {
-         /* Attribute data is in a VBO.
-          * Recall that for VBOs, the gl_client_array->Ptr field is
-          * really an offset from the start of the VBO, not a pointer.
-          */
-         struct st_buffer_object *stobj = st_buffer_object(bufobj);
-         assert(stobj->buffer);
-
-         vbuffer[attr].buffer = NULL;
-         pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
-         vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
-      }
-      else {
-         /* wrap user data */
-         if (arrays[mesaAttr]->Ptr) {
-            vbuffer[attr].buffer = 
-	       pipe_user_buffer_create(pipe->screen,
-				       (void *) arrays[mesaAttr]->Ptr,
-				       stride * (max_index + 1),
-				       PIPE_BIND_VERTEX_BUFFER);
-         }
-         else {
-            /* no array, use ctx->Current.Attrib[] value */
-            uint bytes = sizeof(ctx->Current.Attrib[0]);
-            vbuffer[attr].buffer = 
-	       pipe_user_buffer_create(pipe->screen,
-				       (void *) ctx->Current.Attrib[mesaAttr],
-				       bytes,
-				       PIPE_BIND_VERTEX_BUFFER);
-            stride = 0;
-         }
-
-         vbuffer[attr].buffer_offset = 0;
-
-         /* Track user vertex buffers. */
-         pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer);
-         st->user_vb_stride[attr] = stride;
-         st->num_user_vbs = MAX2(st->num_user_vbs, attr+1);
-      }
-
-      /* common-case setup */
-      vbuffer[attr].stride = stride; /* in bytes */
-
-      velements[attr].src_offset = 0;
-      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
-      velements[attr].vertex_buffer_index = attr;
-      velements[attr].src_format
-         = st_pipe_vertex_format(arrays[mesaAttr]->Type,
-                                 arrays[mesaAttr]->Size,
-                                 arrays[mesaAttr]->Format,
-                                 arrays[mesaAttr]->Normalized);
-      assert(velements[attr].src_format);
-   }
-}
-
-
-static void
-setup_index_buffer(struct gl_context *ctx,
-                   const struct _mesa_index_buffer *ib,
-                   struct pipe_index_buffer *ibuffer)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-
-   memset(ibuffer, 0, sizeof(*ibuffer));
-   if (ib) {
-      struct gl_buffer_object *bufobj = ib->obj;
-
-      switch (ib->type) {
-      case GL_UNSIGNED_INT:
-         ibuffer->index_size = 4;
-         break;
-      case GL_UNSIGNED_SHORT:
-         ibuffer->index_size = 2;
-         break;
-      case GL_UNSIGNED_BYTE:
-         ibuffer->index_size = 1;
-         break;
-      default:
-         assert(0);
-	 return;
-      }
-
-      /* get/create the index buffer object */
-      if (bufobj && bufobj->Name) {
-         /* elements/indexes are in a real VBO */
-         struct st_buffer_object *stobj = st_buffer_object(bufobj);
-         pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
-         ibuffer->offset = pointer_to_offset(ib->ptr);
-      }
-      else {
-         /* element/indicies are in user space memory */
-         ibuffer->buffer =
-            pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
-                                    ib->count * ibuffer->index_size,
-                                    PIPE_BIND_INDEX_BUFFER);
-      }
-   }
-}
-
-/**
- * Prior to drawing, check that any uniforms referenced by the
- * current shader have been set.  If a uniform has not been set,
- * issue a warning.
- */
-static void
-check_uniforms(struct gl_context *ctx)
-{
-   struct gl_shader_program *shProg[3] = {
-      ctx->Shader.CurrentVertexProgram,
-      ctx->Shader.CurrentGeometryProgram,
-      ctx->Shader.CurrentFragmentProgram,
-   };
-   unsigned j;
-
-   for (j = 0; j < 3; j++) {
-      unsigned i;
-
-      if (shProg[j] == NULL || !shProg[j]->LinkStatus)
-	 continue;
-
-      for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
-         const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
-         if (!u->Initialized) {
-            _mesa_warning(ctx,
-                          "Using shader with uninitialized uniform: %s",
-                          u->Name);
-         }
-      }
-   }
-}
-
-
-/**
- * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
- * the corresponding Gallium type.
- */
-static unsigned
-translate_prim(const struct gl_context *ctx, unsigned prim)
-{
-   /* GL prims should match Gallium prims, spot-check a few */
-   assert(GL_POINTS == PIPE_PRIM_POINTS);
-   assert(GL_QUADS == PIPE_PRIM_QUADS);
-   assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
-
-   /* Avoid quadstrips if it's easy to do so:
-    * Note: it's imporant to do the correct trimming if we change the prim type!
-    * We do that wherever this function is called.
-    */
-   if (prim == GL_QUAD_STRIP &&
-       ctx->Light.ShadeModel != GL_FLAT &&
-       ctx->Polygon.FrontMode == GL_FILL &&
-       ctx->Polygon.BackMode == GL_FILL)
-      prim = GL_TRIANGLE_STRIP;
-
-   return prim;
-}
-
-
-static void
-st_validate_varrays(struct gl_context *ctx,
-                    const struct gl_client_array **arrays,
-                    unsigned max_index)
-{
-   struct st_context *st = st_context(ctx);
-   const struct st_vertex_program *vp;
-   const struct st_vp_variant *vpv;
-   struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
-   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
-   unsigned num_vbuffers, num_velements;
-   GLuint attr;
-   unsigned i;
-
-   /* must get these after state validation! */
-   vp = st->vp;
-   vpv = st->vp_variant;
-
-   memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
-
-   /* Unreference any user vertex buffers. */
-   for (i = 0; i < st->num_user_vbs; i++) {
-      pipe_resource_reference(&st->user_vb[i], NULL);
-   }
-   st->num_user_vbs = 0;
-
-   /*
-    * Setup the vbuffer[] and velements[] arrays.
-    */
-   if (is_interleaved_arrays(vp, vpv, arrays)) {
-      setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
-                                max_index);
-      num_vbuffers = 1;
-      num_velements = vpv->num_inputs;
-      if (num_velements == 0)
-         num_vbuffers = 0;
-   }
-   else {
-      setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
-                                    vbuffer, velements, max_index);
-      num_vbuffers = vpv->num_inputs;
-      num_velements = vpv->num_inputs;
-   }
-
-   cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
-   cso_set_vertex_elements(st->cso_context, num_velements, velements);
-
-   /* unreference buffers (frees wrapped user-space buffer objects)
-    * This is OK, because the pipe driver should reference buffers by itself
-    * in set_vertex_buffers. */
-   for (attr = 0; attr < num_vbuffers; attr++) {
-      pipe_resource_reference(&vbuffer[attr].buffer, NULL);
-      assert(!vbuffer[attr].buffer);
-   }
-}
-
-
-/**
- * This function gets plugged into the VBO module and is called when
- * we have something to render.
- * Basically, translate the information into the format expected by gallium.
- */
-void
-st_draw_vbo(struct gl_context *ctx,
-            const struct gl_client_array **arrays,
-            const struct _mesa_prim *prims,
-            GLuint nr_prims,
-            const struct _mesa_index_buffer *ib,
-	    GLboolean index_bounds_valid,
-            GLuint min_index,
-            GLuint max_index)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_index_buffer ibuffer;
-   struct pipe_draw_info info;
-   unsigned i;
-   GLboolean new_array = GL_TRUE;
-   /* Fix this (Bug 34378):
-   GLboolean new_array =
-         st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;*/
-
-   /* Mesa core state should have been validated already */
-   assert(ctx->NewState == 0x0);
-
-   if (ib) {
-      /* Gallium probably doesn't want this in some cases. */
-      if (!index_bounds_valid)
-         if (!vbo_all_varyings_in_vbos(arrays))
-            vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
-   } else {
-      /* Get min/max index for non-indexed drawing. */
-      min_index = ~0;
-      max_index = 0;
-
-      for (i = 0; i < nr_prims; i++) {
-         min_index = MIN2(min_index, prims[i].start);
-         max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
-      }
-   }
-
-   /* Validate state. */
-   if (st->dirty.st) {
-      GLboolean vertDataEdgeFlags;
-
-      /* sanity check for pointer arithmetic below */
-      assert(sizeof(arrays[0]->Ptr[0]) == 1);
-
-      vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
-                          arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
-      if (vertDataEdgeFlags != st->vertdata_edgeflags) {
-         st->vertdata_edgeflags = vertDataEdgeFlags;
-         st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
-      }
-
-      st_validate_state(st);
-
-      if (new_array) {
-         st_validate_varrays(ctx, arrays, max_index);
-      }
-
-#if 0
-      if (MESA_VERBOSE & VERBOSE_GLSL) {
-         check_uniforms(ctx);
-      }
-#else
-      (void) check_uniforms;
-#endif
-   }
-
-   /* Notify the driver that the content of user buffers may have been
-    * changed. */
-   if (!new_array && st->num_user_vbs) {
-      for (i = 0; i < st->num_user_vbs; i++) {
-         if (st->user_vb[i]) {
-            unsigned stride = st->user_vb_stride[i];
-
-            if (stride) {
-               pipe->redefine_user_buffer(pipe, st->user_vb[i],
-                                          min_index * stride,
-                                          (max_index + 1 - min_index) * stride);
-            } else {
-               /* stride == 0 */
-               pipe->redefine_user_buffer(pipe, st->user_vb[i],
-                                          0, st->user_vb[i]->width0);
-            }
-         }
-      }
-   }
-
-   setup_index_buffer(ctx, ib, &ibuffer);
-   pipe->set_index_buffer(pipe, &ibuffer);
-
-   util_draw_init_info(&info);
-   if (ib) {
-      info.indexed = TRUE;
-      if (min_index != ~0 && max_index != ~0) {
-         info.min_index = min_index;
-         info.max_index = max_index;
-      }
-   }
-
-   info.primitive_restart = st->ctx->Array.PrimitiveRestart;
-   info.restart_index = st->ctx->Array.RestartIndex;
-
-   /* do actual drawing */
-   for (i = 0; i < nr_prims; i++) {
-      info.mode = translate_prim( ctx, prims[i].mode );
-      info.start = prims[i].start;
-      info.count = prims[i].count;
-      info.instance_count = prims[i].num_instances;
-      info.index_bias = prims[i].basevertex;
-      if (!ib) {
-         info.min_index = info.start;
-         info.max_index = info.start + info.count - 1;
-      }
-
-      if (u_trim_pipe_prim(info.mode, &info.count))
-         pipe->draw_vbo(pipe, &info);
-   }
-
-   pipe_resource_reference(&ibuffer.buffer, NULL);
-}
-
-
-void st_init_draw( struct st_context *st )
-{
-   struct gl_context *ctx = st->ctx;
-
-   vbo_set_draw_func(ctx, st_draw_vbo);
-
-#if FEATURE_feedback || FEATURE_rastpos
-   st->draw = draw_create(st->pipe); /* for selection/feedback */
-
-   /* Disable draw options that might convert points/lines to tris, etc.
-    * as that would foul-up feedback/selection mode.
-    */
-   draw_wide_line_threshold(st->draw, 1000.0f);
-   draw_wide_point_threshold(st->draw, 1000.0f);
-   draw_enable_line_stipple(st->draw, FALSE);
-   draw_enable_point_sprites(st->draw, FALSE);
-#endif
-}
-
-
-void st_destroy_draw( struct st_context *st )
-{
-#if FEATURE_feedback || FEATURE_rastpos
-   draw_destroy(st->draw);
-#endif
-}
-
-
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/*
+ * This file implements the st_draw_vbo() function which is called from
+ * Mesa's VBO module.  All point/line/triangle rendering is done through
+ * this function whether the user called glBegin/End, glDrawArrays,
+ * glDrawElements, glEvalMesh, or glCalList, etc.
+ *
+ * We basically convert the VBO's vertex attribute/array information into
+ * Gallium vertex state, bind the vertex buffer objects and call
+ * pipe->draw_elements(), pipe->draw_range_elements() or pipe->draw_arrays().
+ *
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "program/prog_uniform.h"
+
+#include "vbo/vbo.h"
+
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_cb_bufferobjects.h"
+#include "st_draw.h"
+#include "st_program.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_prim.h"
+#include "util/u_draw_quad.h"
+#include "draw/draw_context.h"
+#include "cso_cache/cso_context.h"
+
+
+static GLuint double_types[4] = {
+   PIPE_FORMAT_R64_FLOAT,
+   PIPE_FORMAT_R64G64_FLOAT,
+   PIPE_FORMAT_R64G64B64_FLOAT,
+   PIPE_FORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[4] = {
+   PIPE_FORMAT_R32_FLOAT,
+   PIPE_FORMAT_R32G32_FLOAT,
+   PIPE_FORMAT_R32G32B32_FLOAT,
+   PIPE_FORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint half_float_types[4] = {
+   PIPE_FORMAT_R16_FLOAT,
+   PIPE_FORMAT_R16G16_FLOAT,
+   PIPE_FORMAT_R16G16B16_FLOAT,
+   PIPE_FORMAT_R16G16B16A16_FLOAT
+};
+
+static GLuint uint_types_norm[4] = {
+   PIPE_FORMAT_R32_UNORM,
+   PIPE_FORMAT_R32G32_UNORM,
+   PIPE_FORMAT_R32G32B32_UNORM,
+   PIPE_FORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[4] = {
+   PIPE_FORMAT_R32_USCALED,
+   PIPE_FORMAT_R32G32_USCALED,
+   PIPE_FORMAT_R32G32B32_USCALED,
+   PIPE_FORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[4] = {
+   PIPE_FORMAT_R32_SNORM,
+   PIPE_FORMAT_R32G32_SNORM,
+   PIPE_FORMAT_R32G32B32_SNORM,
+   PIPE_FORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[4] = {
+   PIPE_FORMAT_R32_SSCALED,
+   PIPE_FORMAT_R32G32_SSCALED,
+   PIPE_FORMAT_R32G32B32_SSCALED,
+   PIPE_FORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[4] = {
+   PIPE_FORMAT_R16_UNORM,
+   PIPE_FORMAT_R16G16_UNORM,
+   PIPE_FORMAT_R16G16B16_UNORM,
+   PIPE_FORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[4] = {
+   PIPE_FORMAT_R16_USCALED,
+   PIPE_FORMAT_R16G16_USCALED,
+   PIPE_FORMAT_R16G16B16_USCALED,
+   PIPE_FORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[4] = {
+   PIPE_FORMAT_R16_SNORM,
+   PIPE_FORMAT_R16G16_SNORM,
+   PIPE_FORMAT_R16G16B16_SNORM,
+   PIPE_FORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[4] = {
+   PIPE_FORMAT_R16_SSCALED,
+   PIPE_FORMAT_R16G16_SSCALED,
+   PIPE_FORMAT_R16G16B16_SSCALED,
+   PIPE_FORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[4] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8G8_UNORM,
+   PIPE_FORMAT_R8G8B8_UNORM,
+   PIPE_FORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[4] = {
+   PIPE_FORMAT_R8_USCALED,
+   PIPE_FORMAT_R8G8_USCALED,
+   PIPE_FORMAT_R8G8B8_USCALED,
+   PIPE_FORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[4] = {
+   PIPE_FORMAT_R8_SNORM,
+   PIPE_FORMAT_R8G8_SNORM,
+   PIPE_FORMAT_R8G8B8_SNORM,
+   PIPE_FORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[4] = {
+   PIPE_FORMAT_R8_SSCALED,
+   PIPE_FORMAT_R8G8_SSCALED,
+   PIPE_FORMAT_R8G8B8_SSCALED,
+   PIPE_FORMAT_R8G8B8A8_SSCALED
+};
+
+static GLuint fixed_types[4] = {
+   PIPE_FORMAT_R32_FIXED,
+   PIPE_FORMAT_R32G32_FIXED,
+   PIPE_FORMAT_R32G32B32_FIXED,
+   PIPE_FORMAT_R32G32B32A32_FIXED
+};
+
+
+
+/**
+ * Return a PIPE_FORMAT_x for the given GL datatype and size.
+ */
+GLuint
+st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
+                      GLboolean normalized)
+{
+   assert((type >= GL_BYTE && type <= GL_DOUBLE) ||
+          type == GL_FIXED || type == GL_HALF_FLOAT);
+   assert(size >= 1);
+   assert(size <= 4);
+   assert(format == GL_RGBA || format == GL_BGRA);
+
+   if (format == GL_BGRA) {
+      /* this is an odd-ball case */
+      assert(type == GL_UNSIGNED_BYTE);
+      assert(normalized);
+      return PIPE_FORMAT_B8G8R8A8_UNORM;
+   }
+
+   if (normalized) {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size-1];
+      case GL_FLOAT: return float_types[size-1];
+      case GL_HALF_FLOAT: return half_float_types[size-1];
+      case GL_INT: return int_types_norm[size-1];
+      case GL_SHORT: return short_types_norm[size-1];
+      case GL_BYTE: return byte_types_norm[size-1];
+      case GL_UNSIGNED_INT: return uint_types_norm[size-1];
+      case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1];
+      case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1];
+      case GL_FIXED: return fixed_types[size-1];
+      default: assert(0); return 0;
+      }      
+   }
+   else {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size-1];
+      case GL_FLOAT: return float_types[size-1];
+      case GL_HALF_FLOAT: return half_float_types[size-1];
+      case GL_INT: return int_types_scale[size-1];
+      case GL_SHORT: return short_types_scale[size-1];
+      case GL_BYTE: return byte_types_scale[size-1];
+      case GL_UNSIGNED_INT: return uint_types_scale[size-1];
+      case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1];
+      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1];
+      case GL_FIXED: return fixed_types[size-1];
+      default: assert(0); return 0;
+      }      
+   }
+   return 0; /* silence compiler warning */
+}
+
+
+
+
+
+/**
+ * Examine the active arrays to determine if we have interleaved
+ * vertex arrays all living in one VBO, or all living in user space.
+ * \param userSpace  returns whether the arrays are in user space.
+ */
+static GLboolean
+is_interleaved_arrays(const struct st_vertex_program *vp,
+                      const struct st_vp_variant *vpv,
+                      const struct gl_client_array **arrays)
+{
+   GLuint attr;
+   const struct gl_buffer_object *firstBufObj = NULL;
+   GLint firstStride = -1;
+   const GLubyte *client_addr = NULL;
+   GLboolean user_memory;
+
+   for (attr = 0; attr < vpv->num_inputs; attr++) {
+      const GLuint mesaAttr = vp->index_to_input[attr];
+      const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj;
+      const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */
+
+      if (firstStride < 0) {
+         firstStride = stride;
+         user_memory = !bufObj || !bufObj->Name;
+      }
+      else if (firstStride != stride) {
+         return GL_FALSE;
+      }
+
+      if (!bufObj || !bufObj->Name) {
+         /* Try to detect if the client-space arrays are
+          * "close" to each other.
+          */
+         if (!user_memory) {
+            return GL_FALSE;
+         }
+         if (!client_addr) {
+            client_addr = arrays[mesaAttr]->Ptr;
+         }
+         else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) {
+            /* arrays start too far apart */
+            return GL_FALSE;
+         }
+      }
+      else if (!firstBufObj) {
+         if (user_memory) {
+            return GL_FALSE;
+         }
+         firstBufObj = bufObj;
+      }
+      else if (bufObj != firstBufObj) {
+         return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Set up for drawing interleaved arrays that all live in one VBO
+ * or all live in user space.
+ * \param vbuffer  returns vertex buffer info
+ * \param velements  returns vertex element info
+ */
+static void
+setup_interleaved_attribs(struct gl_context *ctx,
+                          const struct st_vertex_program *vp,
+                          const struct st_vp_variant *vpv,
+                          const struct gl_client_array **arrays,
+                          struct pipe_vertex_buffer *vbuffer,
+                          struct pipe_vertex_element velements[],
+                          unsigned max_index)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   GLuint attr;
+   const GLubyte *low_addr = NULL;
+
+   /* Find the lowest address. */
+   for (attr = 0; attr < vpv->num_inputs; attr++) {
+      const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
+
+      low_addr = !low_addr ? start : MIN2(low_addr, start);
+   }
+
+   for (attr = 0; attr < vpv->num_inputs; attr++) {
+      const GLuint mesaAttr = vp->index_to_input[attr];
+      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
+      struct st_buffer_object *stobj = st_buffer_object(bufobj);
+      GLsizei stride = arrays[mesaAttr]->StrideB;
+
+      if (attr == 0) {
+         if (bufobj && bufobj->Name) {
+            vbuffer->buffer = NULL;
+            pipe_resource_reference(&vbuffer->buffer, stobj->buffer);
+            vbuffer->buffer_offset = pointer_to_offset(low_addr);
+         } else {
+            vbuffer->buffer =
+               pipe_user_buffer_create(pipe->screen, (void*)low_addr,
+                                       stride * (max_index + 1),
+				       PIPE_BIND_VERTEX_BUFFER);
+            vbuffer->buffer_offset = 0;
+
+            /* Track user vertex buffers. */
+            pipe_resource_reference(&st->user_vb[0], vbuffer->buffer);
+            st->user_vb_stride[0] = stride;
+            st->num_user_vbs = 1;
+         }
+         vbuffer->stride = stride; /* in bytes */
+      }
+
+      velements[attr].src_offset =
+         (unsigned) (arrays[mesaAttr]->Ptr - low_addr);
+      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+      velements[attr].vertex_buffer_index = 0;
+      velements[attr].src_format =
+         st_pipe_vertex_format(arrays[mesaAttr]->Type,
+                               arrays[mesaAttr]->Size,
+                               arrays[mesaAttr]->Format,
+                               arrays[mesaAttr]->Normalized);
+      assert(velements[attr].src_format);
+   }
+}
+
+
+/**
+ * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each
+ * vertex attribute.
+ * \param vbuffer  returns vertex buffer info
+ * \param velements  returns vertex element info
+ */
+static void
+setup_non_interleaved_attribs(struct gl_context *ctx,
+                              const struct st_vertex_program *vp,
+                              const struct st_vp_variant *vpv,
+                              const struct gl_client_array **arrays,
+                              struct pipe_vertex_buffer vbuffer[],
+                              struct pipe_vertex_element velements[],
+                              unsigned max_index)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   GLuint attr;
+
+   for (attr = 0; attr < vpv->num_inputs; attr++) {
+      const GLuint mesaAttr = vp->index_to_input[attr];
+      struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj;
+      GLsizei stride = arrays[mesaAttr]->StrideB;
+
+      if (bufobj && bufobj->Name) {
+         /* Attribute data is in a VBO.
+          * Recall that for VBOs, the gl_client_array->Ptr field is
+          * really an offset from the start of the VBO, not a pointer.
+          */
+         struct st_buffer_object *stobj = st_buffer_object(bufobj);
+         assert(stobj->buffer);
+
+         vbuffer[attr].buffer = NULL;
+         pipe_resource_reference(&vbuffer[attr].buffer, stobj->buffer);
+         vbuffer[attr].buffer_offset = pointer_to_offset(arrays[mesaAttr]->Ptr);
+      }
+      else {
+         /* wrap user data */
+         if (arrays[mesaAttr]->Ptr) {
+            vbuffer[attr].buffer = 
+	       pipe_user_buffer_create(pipe->screen,
+				       (void *) arrays[mesaAttr]->Ptr,
+				       stride * (max_index + 1),
+				       PIPE_BIND_VERTEX_BUFFER);
+         }
+         else {
+            /* no array, use ctx->Current.Attrib[] value */
+            uint bytes = sizeof(ctx->Current.Attrib[0]);
+            vbuffer[attr].buffer = 
+	       pipe_user_buffer_create(pipe->screen,
+				       (void *) ctx->Current.Attrib[mesaAttr],
+				       bytes,
+				       PIPE_BIND_VERTEX_BUFFER);
+            stride = 0;
+         }
+
+         vbuffer[attr].buffer_offset = 0;
+
+         /* Track user vertex buffers. */
+         pipe_resource_reference(&st->user_vb[attr], vbuffer->buffer);
+         st->user_vb_stride[attr] = stride;
+         st->num_user_vbs = MAX2(st->num_user_vbs, attr+1);
+      }
+
+      /* common-case setup */
+      vbuffer[attr].stride = stride; /* in bytes */
+
+      velements[attr].src_offset = 0;
+      velements[attr].instance_divisor = arrays[mesaAttr]->InstanceDivisor;
+      velements[attr].vertex_buffer_index = attr;
+      velements[attr].src_format
+         = st_pipe_vertex_format(arrays[mesaAttr]->Type,
+                                 arrays[mesaAttr]->Size,
+                                 arrays[mesaAttr]->Format,
+                                 arrays[mesaAttr]->Normalized);
+      assert(velements[attr].src_format);
+   }
+}
+
+
+static void
+setup_index_buffer(struct gl_context *ctx,
+                   const struct _mesa_index_buffer *ib,
+                   struct pipe_index_buffer *ibuffer)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+
+   memset(ibuffer, 0, sizeof(*ibuffer));
+   if (ib) {
+      struct gl_buffer_object *bufobj = ib->obj;
+
+      switch (ib->type) {
+      case GL_UNSIGNED_INT:
+         ibuffer->index_size = 4;
+         break;
+      case GL_UNSIGNED_SHORT:
+         ibuffer->index_size = 2;
+         break;
+      case GL_UNSIGNED_BYTE:
+         ibuffer->index_size = 1;
+         break;
+      default:
+         assert(0);
+	 return;
+      }
+
+      /* get/create the index buffer object */
+      if (bufobj && bufobj->Name) {
+         /* elements/indexes are in a real VBO */
+         struct st_buffer_object *stobj = st_buffer_object(bufobj);
+         pipe_resource_reference(&ibuffer->buffer, stobj->buffer);
+         ibuffer->offset = pointer_to_offset(ib->ptr);
+      }
+      else {
+         /* element/indicies are in user space memory */
+         ibuffer->buffer =
+            pipe_user_buffer_create(pipe->screen, (void *) ib->ptr,
+                                    ib->count * ibuffer->index_size,
+                                    PIPE_BIND_INDEX_BUFFER);
+      }
+   }
+}
+
+/**
+ * Prior to drawing, check that any uniforms referenced by the
+ * current shader have been set.  If a uniform has not been set,
+ * issue a warning.
+ */
+static void
+check_uniforms(struct gl_context *ctx)
+{
+   struct gl_shader_program *shProg[3] = {
+      ctx->Shader.CurrentVertexProgram,
+      ctx->Shader.CurrentGeometryProgram,
+      ctx->Shader.CurrentFragmentProgram,
+   };
+   unsigned j;
+
+   for (j = 0; j < 3; j++) {
+      unsigned i;
+
+      if (shProg[j] == NULL || !shProg[j]->LinkStatus)
+	 continue;
+
+      for (i = 0; i < shProg[j]->Uniforms->NumUniforms; i++) {
+         const struct gl_uniform *u = &shProg[j]->Uniforms->Uniforms[i];
+         if (!u->Initialized) {
+            _mesa_warning(ctx,
+                          "Using shader with uninitialized uniform: %s",
+                          u->Name);
+         }
+      }
+   }
+}
+
+
+/**
+ * Translate OpenGL primtive type (GL_POINTS, GL_TRIANGLE_STRIP, etc) to
+ * the corresponding Gallium type.
+ */
+static unsigned
+translate_prim(const struct gl_context *ctx, unsigned prim)
+{
+   /* GL prims should match Gallium prims, spot-check a few */
+   assert(GL_POINTS == PIPE_PRIM_POINTS);
+   assert(GL_QUADS == PIPE_PRIM_QUADS);
+   assert(GL_TRIANGLE_STRIP_ADJACENCY == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY);
+
+   /* Avoid quadstrips if it's easy to do so:
+    * Note: it's imporant to do the correct trimming if we change the prim type!
+    * We do that wherever this function is called.
+    */
+   if (prim == GL_QUAD_STRIP &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL)
+      prim = GL_TRIANGLE_STRIP;
+
+   return prim;
+}
+
+
+static void
+st_validate_varrays(struct gl_context *ctx,
+                    const struct gl_client_array **arrays,
+                    unsigned max_index)
+{
+   struct st_context *st = st_context(ctx);
+   const struct st_vertex_program *vp;
+   const struct st_vp_variant *vpv;
+   struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS];
+   struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS];
+   unsigned num_vbuffers, num_velements;
+   GLuint attr;
+   unsigned i;
+
+   /* must get these after state validation! */
+   vp = st->vp;
+   vpv = st->vp_variant;
+
+   memset(velements, 0, sizeof(struct pipe_vertex_element) * vpv->num_inputs);
+
+   /* Unreference any user vertex buffers. */
+   for (i = 0; i < st->num_user_vbs; i++) {
+      pipe_resource_reference(&st->user_vb[i], NULL);
+   }
+   st->num_user_vbs = 0;
+
+   /*
+    * Setup the vbuffer[] and velements[] arrays.
+    */
+   if (is_interleaved_arrays(vp, vpv, arrays)) {
+      setup_interleaved_attribs(ctx, vp, vpv, arrays, vbuffer, velements,
+                                max_index);
+      num_vbuffers = 1;
+      num_velements = vpv->num_inputs;
+      if (num_velements == 0)
+         num_vbuffers = 0;
+   }
+   else {
+      setup_non_interleaved_attribs(ctx, vp, vpv, arrays,
+                                    vbuffer, velements, max_index);
+      num_vbuffers = vpv->num_inputs;
+      num_velements = vpv->num_inputs;
+   }
+
+   cso_set_vertex_buffers(st->cso_context, num_vbuffers, vbuffer);
+   cso_set_vertex_elements(st->cso_context, num_velements, velements);
+
+   /* unreference buffers (frees wrapped user-space buffer objects)
+    * This is OK, because the pipe driver should reference buffers by itself
+    * in set_vertex_buffers. */
+   for (attr = 0; attr < num_vbuffers; attr++) {
+      pipe_resource_reference(&vbuffer[attr].buffer, NULL);
+      assert(!vbuffer[attr].buffer);
+   }
+}
+
+
+/**
+ * This function gets plugged into the VBO module and is called when
+ * we have something to render.
+ * Basically, translate the information into the format expected by gallium.
+ */
+void
+st_draw_vbo(struct gl_context *ctx,
+            const struct gl_client_array **arrays,
+            const struct _mesa_prim *prims,
+            GLuint nr_prims,
+            const struct _mesa_index_buffer *ib,
+	    GLboolean index_bounds_valid,
+            GLuint min_index,
+            GLuint max_index)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_index_buffer ibuffer;
+   struct pipe_draw_info info;
+   unsigned i;
+   GLboolean new_array = GL_TRUE;
+   /* Fix this (Bug 34378):
+   GLboolean new_array =
+         st->dirty.st && (st->dirty.mesa & (_NEW_ARRAY | _NEW_PROGRAM)) != 0;*/
+
+   /* Mesa core state should have been validated already */
+   assert(ctx->NewState == 0x0);
+
+   if (ib) {
+      /* Gallium probably doesn't want this in some cases. */
+      if (!index_bounds_valid)
+         if (!vbo_all_varyings_in_vbos(arrays))
+            vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+   } else {
+      /* Get min/max index for non-indexed drawing. */
+      min_index = ~0;
+      max_index = 0;
+
+      for (i = 0; i < nr_prims; i++) {
+         min_index = MIN2(min_index, prims[i].start);
+         max_index = MAX2(max_index, prims[i].start + prims[i].count - 1);
+      }
+   }
+
+   /* Validate state. */
+   if (st->dirty.st) {
+      GLboolean vertDataEdgeFlags;
+
+      /* sanity check for pointer arithmetic below */
+      assert(sizeof(arrays[0]->Ptr[0]) == 1);
+
+      vertDataEdgeFlags = arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj &&
+                          arrays[VERT_ATTRIB_EDGEFLAG]->BufferObj->Name;
+      if (vertDataEdgeFlags != st->vertdata_edgeflags) {
+         st->vertdata_edgeflags = vertDataEdgeFlags;
+         st->dirty.st |= ST_NEW_EDGEFLAGS_DATA;
+      }
+
+      st_validate_state(st);
+
+      if (new_array) {
+         st_validate_varrays(ctx, arrays, max_index);
+      }
+
+#if 0
+      if (MESA_VERBOSE & VERBOSE_GLSL) {
+         check_uniforms(ctx);
+      }
+#else
+      (void) check_uniforms;
+#endif
+   }
+
+   /* Notify the driver that the content of user buffers may have been
+    * changed. */
+   if (!new_array && st->num_user_vbs) {
+      for (i = 0; i < st->num_user_vbs; i++) {
+         if (st->user_vb[i]) {
+            unsigned stride = st->user_vb_stride[i];
+
+            if (stride) {
+               pipe->redefine_user_buffer(pipe, st->user_vb[i],
+                                          min_index * stride,
+                                          (max_index + 1 - min_index) * stride);
+            } else {
+               /* stride == 0 */
+               pipe->redefine_user_buffer(pipe, st->user_vb[i],
+                                          0, st->user_vb[i]->width0);
+            }
+         }
+      }
+   }
+
+   setup_index_buffer(ctx, ib, &ibuffer);
+   pipe->set_index_buffer(pipe, &ibuffer);
+
+   util_draw_init_info(&info);
+   if (ib) {
+      info.indexed = TRUE;
+      if (min_index != ~0 && max_index != ~0) {
+         info.min_index = min_index;
+         info.max_index = max_index;
+      }
+   }
+
+   info.primitive_restart = st->ctx->Array.PrimitiveRestart;
+   info.restart_index = st->ctx->Array.RestartIndex;
+
+   /* do actual drawing */
+   for (i = 0; i < nr_prims; i++) {
+      info.mode = translate_prim( ctx, prims[i].mode );
+      info.start = prims[i].start;
+      info.count = prims[i].count;
+      info.instance_count = prims[i].num_instances;
+      info.index_bias = prims[i].basevertex;
+      if (!ib) {
+         info.min_index = info.start;
+         info.max_index = info.start + info.count - 1;
+      }
+
+      if (u_trim_pipe_prim(info.mode, &info.count))
+         pipe->draw_vbo(pipe, &info);
+   }
+
+   pipe_resource_reference(&ibuffer.buffer, NULL);
+}
+
+
+void st_init_draw( struct st_context *st )
+{
+   struct gl_context *ctx = st->ctx;
+
+   vbo_set_draw_func(ctx, st_draw_vbo);
+
+#if FEATURE_feedback || FEATURE_rastpos
+   st->draw = draw_create(st->pipe); /* for selection/feedback */
+
+   /* Disable draw options that might convert points/lines to tris, etc.
+    * as that would foul-up feedback/selection mode.
+    */
+   draw_wide_line_threshold(st->draw, 1000.0f);
+   draw_wide_point_threshold(st->draw, 1000.0f);
+   draw_enable_line_stipple(st->draw, FALSE);
+   draw_enable_point_sprites(st->draw, FALSE);
+#endif
+}
+
+
+void st_destroy_draw( struct st_context *st )
+{
+#if FEATURE_feedback || FEATURE_rastpos
+   draw_destroy(st->draw);
+#endif
+}
+
+
diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c
index a532e089c..577ee6189 100644
--- a/mesalib/src/mesa/state_tracker/st_format.c
+++ b/mesalib/src/mesa/state_tracker/st_format.c
@@ -113,12 +113,9 @@ st_format_datatype(enum pipe_format format)
       return GL_UNSIGNED_SHORT;
    }
    else {
-      /* compressed format? */
-      assert(0);
+      /* probably a compressed format, unsupported anyway */
+      return GL_NONE;
    }
-
-   assert(0);
-   return GL_NONE;
 }
 
 
@@ -1098,3 +1095,55 @@ st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2)
 
    return GL_FALSE;
 }
+
+
+
+/**
+ * This is used for translating texture border color and the clear
+ * color.  For example, the clear color is interpreted according to
+ * the renderbuffer's base format.  For example, if clearing a
+ * GL_LUMINANCE buffer, ClearColor[0] = luminance and ClearColor[1] =
+ * alpha.  Similarly for texture border colors.
+ */
+void
+st_translate_color(const GLfloat colorIn[4], GLenum baseFormat,
+                   GLfloat colorOut[4])
+{
+   switch (baseFormat) {
+   case GL_RED:
+      colorOut[0] = colorIn[0];
+      colorOut[1] = 0.0F;
+      colorOut[2] = 0.0F;
+      colorOut[3] = 1.0F;
+      break;
+   case GL_RG:
+      colorOut[0] = colorIn[0];
+      colorOut[1] = colorIn[1];
+      colorOut[2] = 0.0F;
+      colorOut[3] = 1.0F;
+      break;
+   case GL_RGB:
+      colorOut[0] = colorIn[0];
+      colorOut[1] = colorIn[1];
+      colorOut[2] = colorIn[2];
+      colorOut[3] = 1.0F;
+      break;
+   case GL_ALPHA:
+      colorOut[0] = colorOut[1] = colorOut[2] = 0.0;
+      colorOut[3] = colorIn[3];
+      break;
+   case GL_LUMINANCE:
+      colorOut[0] = colorOut[1] = colorOut[2] = colorIn[0];
+      colorOut[3] = 1.0;
+      break;
+   case GL_LUMINANCE_ALPHA:
+      colorOut[0] = colorOut[1] = colorOut[2] = colorIn[0];
+      colorOut[3] = colorIn[3];
+      break;
+   case GL_INTENSITY:
+      colorOut[0] = colorOut[1] = colorOut[2] = colorOut[3] = colorIn[0];
+      break;
+   default:
+      COPY_4V(colorOut, colorIn);
+   }
+}
diff --git a/mesalib/src/mesa/state_tracker/st_format.h b/mesalib/src/mesa/state_tracker/st_format.h
index ffcaf402a..0fb570f6e 100644
--- a/mesalib/src/mesa/state_tracker/st_format.h
+++ b/mesalib/src/mesa/state_tracker/st_format.h
@@ -1,80 +1,86 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * Copyright (c) 2010 VMware, Inc.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef ST_FORMAT_H
-#define ST_FORMAT_H
-
-#include "main/formats.h"
-#include "main/glheader.h"
-
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
-
-struct gl_context;
-struct pipe_screen;
-
-extern GLenum
-st_format_datatype(enum pipe_format format);
-
-
-extern enum pipe_format
-st_mesa_format_to_pipe_format(gl_format mesaFormat);
-
-extern gl_format
-st_pipe_format_to_mesa_format(enum pipe_format pipeFormat);
-
-
-extern enum pipe_format
-st_choose_format(struct pipe_screen *screen, GLenum internalFormat,
-                 enum pipe_texture_target target, unsigned sample_count,
-                 unsigned tex_usage);
-
-extern enum pipe_format
-st_choose_renderbuffer_format(struct pipe_screen *screen,
-                              GLenum internalFormat, unsigned sample_count);
-
-
-gl_format
-st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat,
-				  GLenum format, GLenum type, GLboolean renderable);
-
-extern gl_format
-st_ChooseTextureFormat(struct gl_context * ctx, GLint internalFormat,
-                       GLenum format, GLenum type);
-
-
-extern GLboolean
-st_equal_formats(enum pipe_format pFormat, GLenum format, GLenum type);
-
-/* can we use a sampler view to translate these formats
-   only used to make TFP so far */
-extern GLboolean
-st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2);
-
-#endif /* ST_FORMAT_H */
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright (c) 2010 VMware, Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef ST_FORMAT_H
+#define ST_FORMAT_H
+
+#include "main/formats.h"
+#include "main/glheader.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+
+struct gl_context;
+struct pipe_screen;
+
+extern GLenum
+st_format_datatype(enum pipe_format format);
+
+
+extern enum pipe_format
+st_mesa_format_to_pipe_format(gl_format mesaFormat);
+
+extern gl_format
+st_pipe_format_to_mesa_format(enum pipe_format pipeFormat);
+
+
+extern enum pipe_format
+st_choose_format(struct pipe_screen *screen, GLenum internalFormat,
+                 enum pipe_texture_target target, unsigned sample_count,
+                 unsigned tex_usage);
+
+extern enum pipe_format
+st_choose_renderbuffer_format(struct pipe_screen *screen,
+                              GLenum internalFormat, unsigned sample_count);
+
+
+gl_format
+st_ChooseTextureFormat_renderable(struct gl_context *ctx, GLint internalFormat,
+				  GLenum format, GLenum type, GLboolean renderable);
+
+extern gl_format
+st_ChooseTextureFormat(struct gl_context * ctx, GLint internalFormat,
+                       GLenum format, GLenum type);
+
+
+extern GLboolean
+st_equal_formats(enum pipe_format pFormat, GLenum format, GLenum type);
+
+/* can we use a sampler view to translate these formats
+   only used to make TFP so far */
+extern GLboolean
+st_sampler_compat_formats(enum pipe_format format1, enum pipe_format format2);
+
+
+extern void
+st_translate_color(const GLfloat colorIn[4], GLenum baseFormat,
+                   GLfloat colorOut[4]);
+
+
+#endif /* ST_FORMAT_H */
diff --git a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
index 18eb3be68..4bf682808 100644
--- a/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/mesalib/src/mesa/state_tracker/st_gen_mipmap.c
@@ -336,6 +336,11 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
    if (lastLevel == 0)
       return;
 
+   /* The texture isn't in a "complete" state yet so set the expected
+    * lastLevel here, since it won't get done in st_finalize_texture().
+    */
+   stObj->lastLevel = lastLevel;
+
    if (pt->last_level < lastLevel) {
       /* The current gallium texture doesn't have space for all the
        * mipmap levels we need to generate.  So allocate a new texture.
@@ -353,11 +358,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
                                     oldTex->array_size,
                                     oldTex->bind);
 
-      /* The texture isn't in a "complete" state yet so set the expected
-       * lastLevel here, since it won't get done in st_finalize_texture().
-       */
-      stObj->lastLevel = lastLevel;
-
       /* This will copy the old texture's base image into the new texture
        * which we just allocated.
        */
@@ -366,8 +366,6 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
       /* release the old tex (will likely be freed too) */
       pipe_resource_reference(&oldTex, NULL);
       pipe_sampler_view_reference(&stObj->sampler_view, NULL);
-
-      pt = stObj->pt;
    }
    else {
       /* Make sure that the base texture image data is present in the
@@ -376,6 +374,8 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
       st_finalize_texture(ctx, st->pipe, texObj);
    }
 
+   pt = stObj->pt;
+
    assert(pt->last_level >= lastLevel);
 
    /* Try to generate the mipmap by rendering/texturing.  If that fails,
diff --git a/mesalib/src/mesa/vbo/vbo_exec.h b/mesalib/src/mesa/vbo/vbo_exec.h
index 2508016c0..ca100e428 100644
--- a/mesalib/src/mesa/vbo/vbo_exec.h
+++ b/mesalib/src/mesa/vbo/vbo_exec.h
@@ -1,199 +1,198 @@
-/**************************************************************************
-
-Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
- *
- */
-
-#ifndef __VBO_EXEC_H__
-#define __VBO_EXEC_H__
-
-#include "main/mfeatures.h"
-#include "main/mtypes.h"
-#include "vbo.h"
-#include "vbo_attrib.h"
-
-
-#define VBO_MAX_PRIM 64
-
-/* Wierd implementation stuff:
- */
-#define VBO_VERT_BUFFER_SIZE (1024*64)	/* bytes */
-#define VBO_MAX_ATTR_CODEGEN 16 
-#define ERROR_ATTRIB 16
-
-
-/** Current vertex program mode */
-enum vp_mode {
-   VP_NONE,   /**< fixed function */
-   VP_NV,     /**< NV vertex program */
-   VP_ARB     /**< ARB vertex program or GLSL vertex shader */
-};
-
-
-struct vbo_exec_eval1_map {
-   struct gl_1d_map *map;
-   GLuint sz;
-};
-
-struct vbo_exec_eval2_map {
-   struct gl_2d_map *map;
-   GLuint sz;
-};
-
-
-
-struct vbo_exec_copied_vtx {
-   GLfloat buffer[VBO_ATTRIB_MAX * 4 * VBO_MAX_COPIED_VERTS];
-   GLuint nr;
-};
-
-
-typedef void (*vbo_attrfv_func)( const GLfloat * );
-
-
-struct vbo_exec_context
-{
-   struct gl_context *ctx;   
-   GLvertexformat vtxfmt;
-
-   struct {
-      struct gl_buffer_object *bufferobj;
-
-      GLuint vertex_size;       /* in dwords */
-
-      struct _mesa_prim prim[VBO_MAX_PRIM];
-      GLuint prim_count;
-
-      GLfloat *buffer_map;
-      GLfloat *buffer_ptr;              /* cursor, points into buffer */
-      GLuint   buffer_used;             /* in bytes */
-      GLfloat vertex[VBO_ATTRIB_MAX*4]; /* current vertex */
-
-      GLuint vert_count;
-      GLuint max_vert;
-      struct vbo_exec_copied_vtx copied;
-
-      GLubyte attrsz[VBO_ATTRIB_MAX];
-      GLubyte active_sz[VBO_ATTRIB_MAX];
-
-      GLfloat *attrptr[VBO_ATTRIB_MAX]; 
-      struct gl_client_array arrays[VERT_ATTRIB_MAX];
-
-      /* According to program mode, the values above plus current
-       * values are squashed down to the 32 attributes passed to the
-       * vertex program below:
-       */
-      enum vp_mode program_mode;
-      GLuint enabled_flags;
-      const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
-   } vtx;
-
-   
-   struct {
-      GLboolean recalculate_maps;
-      struct vbo_exec_eval1_map map1[VERT_ATTRIB_MAX];
-      struct vbo_exec_eval2_map map2[VERT_ATTRIB_MAX];
-   } eval;
-
-   struct {
-      enum vp_mode program_mode;
-      GLuint enabled_flags;
-      GLuint array_obj;
-
-      /* These just mirror the current arrayobj (todo: make arrayobj
-       * look like this and remove the mirror):
-       */
-      const struct gl_client_array *legacy_array[16];
-      const struct gl_client_array *generic_array[16];
-
-      /* Arrays and current values manipulated according to program
-       * mode, etc.  These are the attributes as seen by vertex
-       * programs:
-       */
-      const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
-   } array;
-
-#ifdef DEBUG
-   GLint flush_call_depth;
-#endif
-};
-
-
-
-/* External API:
- */
-void vbo_exec_init( struct gl_context *ctx );
-void vbo_exec_destroy( struct gl_context *ctx );
-void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
-void vbo_exec_FlushVertices_internal( struct gl_context *ctx, GLboolean unmap );
-
-void vbo_exec_BeginVertices( struct gl_context *ctx );
-void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags );
-
-
-/* Internal functions:
- */
-void vbo_exec_array_init( struct vbo_exec_context *exec );
-void vbo_exec_array_destroy( struct vbo_exec_context *exec );
-
-
-void vbo_exec_vtx_init( struct vbo_exec_context *exec );
-void vbo_exec_vtx_destroy( struct vbo_exec_context *exec );
-
-#if FEATURE_beginend
-
-void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
-void vbo_exec_vtx_map( struct vbo_exec_context *exec );
-
-#else /* FEATURE_beginend */
-
-static INLINE void
-vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
-{
-}
-
-static INLINE void
-vbo_exec_vtx_map( struct vbo_exec_context *exec )
-{
-}
-
-#endif /* FEATURE_beginend */
-
-void vbo_exec_vtx_wrap( struct vbo_exec_context *exec );
-
-void vbo_exec_eval_update( struct vbo_exec_context *exec );
-
-void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec, 
-				     GLfloat u, GLfloat v );
-
-void vbo_exec_do_EvalCoord1f( struct vbo_exec_context *exec,
-				     GLfloat u);
-
-#endif
+/**************************************************************************
+
+Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __VBO_EXEC_H__
+#define __VBO_EXEC_H__
+
+#include "main/mfeatures.h"
+#include "main/mtypes.h"
+#include "vbo.h"
+#include "vbo_attrib.h"
+
+
+#define VBO_MAX_PRIM 64
+
+/* Wierd implementation stuff:
+ */
+#define VBO_VERT_BUFFER_SIZE (1024*64)	/* bytes */
+#define VBO_MAX_ATTR_CODEGEN 16 
+#define ERROR_ATTRIB 16
+
+
+/** Current vertex program mode */
+enum vp_mode {
+   VP_NONE,   /**< fixed function */
+   VP_NV,     /**< NV vertex program */
+   VP_ARB     /**< ARB vertex program or GLSL vertex shader */
+};
+
+
+struct vbo_exec_eval1_map {
+   struct gl_1d_map *map;
+   GLuint sz;
+};
+
+struct vbo_exec_eval2_map {
+   struct gl_2d_map *map;
+   GLuint sz;
+};
+
+
+
+struct vbo_exec_copied_vtx {
+   GLfloat buffer[VBO_ATTRIB_MAX * 4 * VBO_MAX_COPIED_VERTS];
+   GLuint nr;
+};
+
+
+typedef void (*vbo_attrfv_func)( const GLfloat * );
+
+
+struct vbo_exec_context
+{
+   struct gl_context *ctx;   
+   GLvertexformat vtxfmt;
+
+   struct {
+      struct gl_buffer_object *bufferobj;
+
+      GLuint vertex_size;       /* in dwords */
+
+      struct _mesa_prim prim[VBO_MAX_PRIM];
+      GLuint prim_count;
+
+      GLfloat *buffer_map;
+      GLfloat *buffer_ptr;              /* cursor, points into buffer */
+      GLuint   buffer_used;             /* in bytes */
+      GLfloat vertex[VBO_ATTRIB_MAX*4]; /* current vertex */
+
+      GLuint vert_count;
+      GLuint max_vert;
+      struct vbo_exec_copied_vtx copied;
+
+      GLubyte attrsz[VBO_ATTRIB_MAX];
+      GLubyte active_sz[VBO_ATTRIB_MAX];
+
+      GLfloat *attrptr[VBO_ATTRIB_MAX]; 
+      struct gl_client_array arrays[VERT_ATTRIB_MAX];
+
+      /* According to program mode, the values above plus current
+       * values are squashed down to the 32 attributes passed to the
+       * vertex program below:
+       */
+      enum vp_mode program_mode;
+      GLuint enabled_flags;
+      const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
+   } vtx;
+
+   
+   struct {
+      GLboolean recalculate_maps;
+      struct vbo_exec_eval1_map map1[VERT_ATTRIB_MAX];
+      struct vbo_exec_eval2_map map2[VERT_ATTRIB_MAX];
+   } eval;
+
+   struct {
+      enum vp_mode program_mode;
+      GLuint enabled_flags;
+      GLuint array_obj;
+
+      /* These just mirror the current arrayobj (todo: make arrayobj
+       * look like this and remove the mirror):
+       */
+      const struct gl_client_array *legacy_array[16];
+      const struct gl_client_array *generic_array[16];
+
+      /* Arrays and current values manipulated according to program
+       * mode, etc.  These are the attributes as seen by vertex
+       * programs:
+       */
+      const struct gl_client_array *inputs[VERT_ATTRIB_MAX];
+   } array;
+
+#ifdef DEBUG
+   GLint flush_call_depth;
+#endif
+};
+
+
+
+/* External API:
+ */
+void vbo_exec_init( struct gl_context *ctx );
+void vbo_exec_destroy( struct gl_context *ctx );
+void vbo_exec_invalidate_state( struct gl_context *ctx, GLuint new_state );
+
+void vbo_exec_BeginVertices( struct gl_context *ctx );
+void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags );
+
+
+/* Internal functions:
+ */
+void vbo_exec_array_init( struct vbo_exec_context *exec );
+void vbo_exec_array_destroy( struct vbo_exec_context *exec );
+
+
+void vbo_exec_vtx_init( struct vbo_exec_context *exec );
+void vbo_exec_vtx_destroy( struct vbo_exec_context *exec );
+
+#if FEATURE_beginend
+
+void vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap );
+void vbo_exec_vtx_map( struct vbo_exec_context *exec );
+
+#else /* FEATURE_beginend */
+
+static INLINE void
+vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
+{
+}
+
+static INLINE void
+vbo_exec_vtx_map( struct vbo_exec_context *exec )
+{
+}
+
+#endif /* FEATURE_beginend */
+
+void vbo_exec_vtx_wrap( struct vbo_exec_context *exec );
+
+void vbo_exec_eval_update( struct vbo_exec_context *exec );
+
+void vbo_exec_do_EvalCoord2f( struct vbo_exec_context *exec, 
+				     GLfloat u, GLfloat v );
+
+void vbo_exec_do_EvalCoord1f( struct vbo_exec_context *exec,
+				     GLfloat u);
+
+#endif
diff --git a/mesalib/src/mesa/vbo/vbo_exec_api.c b/mesalib/src/mesa/vbo/vbo_exec_api.c
index 0c59c41ba..c4d39d8f1 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_api.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_api.c
@@ -1,1080 +1,1107 @@
-/**************************************************************************
-
-Copyright 2002-2008 Tungsten Graphics Inc., Cedar Park, Texas.
-
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "main/vtxfmt.h"
-#include "main/dlist.h"
-#include "main/eval.h"
-#include "main/state.h"
-#include "main/light.h"
-#include "main/api_arrayelt.h"
-#include "main/api_noop.h"
-#include "main/dispatch.h"
-
-#include "vbo_context.h"
-
-#ifdef ERROR
-#undef ERROR
-#endif
-
-
-/** ID/name for immediate-mode VBO */
-#define IMM_BUFFER_NAME 0xaabbccdd
-
-
-static void reset_attrfv( struct vbo_exec_context *exec );
-
-
-/**
- * Close off the last primitive, execute the buffer, restart the
- * primitive.  
- */
-static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
-{
-   if (exec->vtx.prim_count == 0) {
-      exec->vtx.copied.nr = 0;
-      exec->vtx.vert_count = 0;
-      exec->vtx.buffer_ptr = exec->vtx.buffer_map;
-   }
-   else {
-      GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin;
-      GLuint last_count;
-
-      if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
-	 GLint i = exec->vtx.prim_count - 1;
-	 assert(i >= 0);
-	 exec->vtx.prim[i].count = (exec->vtx.vert_count - 
-				    exec->vtx.prim[i].start);
-      }
-
-      last_count = exec->vtx.prim[exec->vtx.prim_count-1].count;
-
-      /* Execute the buffer and save copied vertices.
-       */
-      if (exec->vtx.vert_count)
-	 vbo_exec_vtx_flush( exec, GL_FALSE );
-      else {
-	 exec->vtx.prim_count = 0;
-	 exec->vtx.copied.nr = 0;
-      }
-
-      /* Emit a glBegin to start the new list.
-       */
-      assert(exec->vtx.prim_count == 0);
-
-      if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
-	 exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
-	 exec->vtx.prim[0].start = 0;
-	 exec->vtx.prim[0].count = 0;
-	 exec->vtx.prim_count++;
-      
-	 if (exec->vtx.copied.nr == last_count)
-	    exec->vtx.prim[0].begin = last_begin;
-      }
-   }
-}
-
-
-/**
- * Deal with buffer wrapping where provoked by the vertex buffer
- * filling up, as opposed to upgrade_vertex().
- */
-void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
-{
-   GLfloat *data = exec->vtx.copied.buffer;
-   GLuint i;
-
-   /* Run pipeline on current vertices, copy wrapped vertices
-    * to exec->vtx.copied.
-    */
-   vbo_exec_wrap_buffers( exec );
-   
-   /* Copy stored stored vertices to start of new list. 
-    */
-   assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr);
-
-   for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
-      memcpy( exec->vtx.buffer_ptr, data, 
-	      exec->vtx.vertex_size * sizeof(GLfloat));
-      exec->vtx.buffer_ptr += exec->vtx.vertex_size;
-      data += exec->vtx.vertex_size;
-      exec->vtx.vert_count++;
-   }
-
-   exec->vtx.copied.nr = 0;
-}
-
-
-/**
- * Copy the active vertex's values to the ctx->Current fields.
- */
-static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
-{
-   struct gl_context *ctx = exec->ctx;
-   struct vbo_context *vbo = vbo_context(ctx);
-   GLuint i;
-
-   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
-      if (exec->vtx.attrsz[i]) {
-         /* Note: the exec->vtx.current[i] pointers point into the
-          * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
-          */
-	 GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
-         GLfloat tmp[4];
-
-         COPY_CLEAN_4V(tmp, 
-                       exec->vtx.attrsz[i], 
-                       exec->vtx.attrptr[i]);
-         
-         if (memcmp(current, tmp, sizeof(tmp)) != 0)
-         { 
-            memcpy(current, tmp, sizeof(tmp));
-	 
-            /* Given that we explicitly state size here, there is no need
-             * for the COPY_CLEAN above, could just copy 16 bytes and be
-             * done.  The only problem is when Mesa accesses ctx->Current
-             * directly.
-             */
-            vbo->currval[i].Size = exec->vtx.attrsz[i];
-
-            /* This triggers rather too much recalculation of Mesa state
-             * that doesn't get used (eg light positions).
-             */
-            if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
-                i <= VBO_ATTRIB_MAT_BACK_INDEXES)
-               ctx->NewState |= _NEW_LIGHT;
-            
-            ctx->NewState |= _NEW_CURRENT_ATTRIB;
-         }
-      }
-   }
-
-   /* Colormaterial -- this kindof sucks.
-    */
-   if (ctx->Light.ColorMaterialEnabled &&
-       exec->vtx.attrsz[VBO_ATTRIB_COLOR0]) {
-      _mesa_update_color_material(ctx, 
-				  ctx->Current.Attrib[VBO_ATTRIB_COLOR0]);
-   }
-}
-
-
-static void vbo_exec_copy_from_current( struct vbo_exec_context *exec )
-{
-   struct gl_context *ctx = exec->ctx;
-   struct vbo_context *vbo = vbo_context(ctx);
-   GLint i;
-
-   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
-      const GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
-      switch (exec->vtx.attrsz[i]) {
-      case 4: exec->vtx.attrptr[i][3] = current[3];
-      case 3: exec->vtx.attrptr[i][2] = current[2];
-      case 2: exec->vtx.attrptr[i][1] = current[1];
-      case 1: exec->vtx.attrptr[i][0] = current[0];
-	 break;
-      }
-   }
-}
-
-
-/**
- * Flush existing data, set new attrib size, replay copied vertices.
- */ 
-static void vbo_exec_wrap_upgrade_vertex( struct vbo_exec_context *exec,
-					  GLuint attr,
-					  GLuint newsz )
-{
-   struct gl_context *ctx = exec->ctx;
-   struct vbo_context *vbo = vbo_context(ctx);
-   GLint lastcount = exec->vtx.vert_count;
-   GLfloat *old_attrptr[VBO_ATTRIB_MAX];
-   GLuint old_vtx_size = exec->vtx.vertex_size;
-   GLuint oldsz = exec->vtx.attrsz[attr];
-   GLuint i;
-
-   /* Run pipeline on current vertices, copy wrapped vertices
-    * to exec->vtx.copied.
-    */
-   vbo_exec_wrap_buffers( exec );
-
-   if (unlikely(exec->vtx.copied.nr)) {
-      /* We're in the middle of a primitive, keep the old vertex
-       * format around to be able to translate the copied vertices to
-       * the new format.
-       */
-      memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
-   }
-
-   if (unlikely(oldsz)) {
-      /* Do a COPY_TO_CURRENT to ensure back-copying works for the
-       * case when the attribute already exists in the vertex and is
-       * having its size increased.
-       */
-      vbo_exec_copy_to_current( exec );
-   }
-
-   /* Heuristic: Attempt to isolate attributes received outside
-    * begin/end so that they don't bloat the vertices.
-    */
-   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
-       !oldsz && lastcount > 8 && exec->vtx.vertex_size) {
-      vbo_exec_copy_to_current( exec );
-      reset_attrfv( exec );
-   }
-
-   /* Fix up sizes:
-    */
-   exec->vtx.attrsz[attr] = newsz;
-   exec->vtx.vertex_size += newsz - oldsz;
-   exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / 
-                         (exec->vtx.vertex_size * sizeof(GLfloat)));
-   exec->vtx.vert_count = 0;
-   exec->vtx.buffer_ptr = exec->vtx.buffer_map;
-
-   if (unlikely(oldsz)) {
-      /* Size changed, recalculate all the attrptr[] values
-       */
-      GLfloat *tmp = exec->vtx.vertex;
-
-      for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
-	 if (exec->vtx.attrsz[i]) {
-	    exec->vtx.attrptr[i] = tmp;
-	    tmp += exec->vtx.attrsz[i];
-	 }
-	 else
-	    exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
-      }
-
-      /* Copy from current to repopulate the vertex with correct
-       * values.
-       */
-      vbo_exec_copy_from_current( exec );
-
-   } else {
-      /* Just have to append the new attribute at the end */
-      exec->vtx.attrptr[attr] = exec->vtx.vertex +
-	 exec->vtx.vertex_size - newsz;
-   }
-
-   /* Replay stored vertices to translate them
-    * to new format here.
-    *
-    * -- No need to replay - just copy piecewise
-    */
-   if (unlikely(exec->vtx.copied.nr)) {
-      GLfloat *data = exec->vtx.copied.buffer;
-      GLfloat *dest = exec->vtx.buffer_ptr;
-      GLuint j;
-
-      assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
-
-      for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
-	 for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
-	    GLuint sz = exec->vtx.attrsz[j];
-
-	    if (sz) {
-	       GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
-	       GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex;
-
-	       if (j == attr) {
-		  if (oldsz) {
-		     GLfloat tmp[4];
-		     COPY_CLEAN_4V(tmp, oldsz, data + old_offset);
-		     COPY_SZ_4V(dest + new_offset, newsz, tmp);
-		  } else {
-		     GLfloat *current = (GLfloat *)vbo->currval[j].Ptr;
-		     COPY_SZ_4V(dest + new_offset, sz, current);
-		  }
-	       }
-	       else {
-		  COPY_SZ_4V(dest + new_offset, sz, data + old_offset);
-	       }
-	    }
-	 }
-
-	 data += old_vtx_size;
-	 dest += exec->vtx.vertex_size;
-      }
-
-      exec->vtx.buffer_ptr = dest;
-      exec->vtx.vert_count += exec->vtx.copied.nr;
-      exec->vtx.copied.nr = 0;
-   }
-}
-
-
-static void vbo_exec_fixup_vertex( struct gl_context *ctx,
-				   GLuint attr, GLuint sz )
-{
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-   int i;
-
-   if (sz > exec->vtx.attrsz[attr]) {
-      /* New size is larger.  Need to flush existing vertices and get
-       * an enlarged vertex format.
-       */
-      vbo_exec_wrap_upgrade_vertex( exec, attr, sz );
-   }
-   else if (sz < exec->vtx.active_sz[attr]) {
-      static const GLfloat id[4] = { 0, 0, 0, 1 };
-
-      /* New size is smaller - just need to fill in some
-       * zeros.  Don't need to flush or wrap.
-       */
-      for (i = sz ; i <= exec->vtx.attrsz[attr] ; i++)
-	 exec->vtx.attrptr[attr][i-1] = id[i-1];
-   }
-
-   exec->vtx.active_sz[attr] = sz;
-
-   /* Does setting NeedFlush belong here?  Necessitates resetting
-    * vtxfmt on each flush (otherwise flags won't get reset
-    * afterwards).
-    */
-   if (attr == 0) 
-      exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
-}
-
-
-/* 
- */
-#define ATTR( A, N, V0, V1, V2, V3 )				\
-do {								\
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;	\
-									\
-   if (unlikely(!(exec->ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT))) \
-      ctx->Driver.BeginVertices( ctx );                                 \
-   if (unlikely(exec->vtx.active_sz[A] != N))				\
-      vbo_exec_fixup_vertex(ctx, A, N);					\
-   									\
-   {								\
-      GLfloat *dest = exec->vtx.attrptr[A];			\
-      if (N>0) dest[0] = V0;					\
-      if (N>1) dest[1] = V1;					\
-      if (N>2) dest[2] = V2;					\
-      if (N>3) dest[3] = V3;					\
-   }								\
-								\
-   if ((A) == 0) {						\
-      GLuint i;							\
-								\
-      for (i = 0; i < exec->vtx.vertex_size; i++)		\
-	 exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i];		\
-								\
-      exec->vtx.buffer_ptr += exec->vtx.vertex_size;			\
-      exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;	\
-								\
-      if (++exec->vtx.vert_count >= exec->vtx.max_vert)		\
-	 vbo_exec_vtx_wrap( exec );				\
-   }								\
-} while (0)
-
-
-#define ERROR() _mesa_error( ctx, GL_INVALID_ENUM, __FUNCTION__ )
-#define TAG(x) vbo_##x
-
-#include "vbo_attrib_tmp.h"
-
-
-#if FEATURE_beginend
-
-
-#if FEATURE_evaluators
-
-static void GLAPIENTRY vbo_exec_EvalCoord1f( GLfloat u )
-{
-   GET_CURRENT_CONTEXT( ctx );
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
-   {
-      GLint i;
-      if (exec->eval.recalculate_maps) 
-	 vbo_exec_eval_update( exec );
-
-      for (i = 0; i <= VBO_ATTRIB_TEX7; i++) {
-	 if (exec->eval.map1[i].map) 
-	    if (exec->vtx.active_sz[i] != exec->eval.map1[i].sz)
-	       vbo_exec_fixup_vertex( ctx, i, exec->eval.map1[i].sz );
-      }
-   }
-
-
-   memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, 
-           exec->vtx.vertex_size * sizeof(GLfloat));
-
-   vbo_exec_do_EvalCoord1f( exec, u );
-
-   memcpy( exec->vtx.vertex, exec->vtx.copied.buffer,
-           exec->vtx.vertex_size * sizeof(GLfloat));
-}
-
-static void GLAPIENTRY vbo_exec_EvalCoord2f( GLfloat u, GLfloat v )
-{
-   GET_CURRENT_CONTEXT( ctx );
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
-   {
-      GLint i;
-      if (exec->eval.recalculate_maps) 
-	 vbo_exec_eval_update( exec );
-
-      for (i = 0; i <= VBO_ATTRIB_TEX7; i++) {
-	 if (exec->eval.map2[i].map) 
-	    if (exec->vtx.active_sz[i] != exec->eval.map2[i].sz)
-	       vbo_exec_fixup_vertex( ctx, i, exec->eval.map2[i].sz );
-      }
-
-      if (ctx->Eval.AutoNormal) 
-	 if (exec->vtx.active_sz[VBO_ATTRIB_NORMAL] != 3)
-	    vbo_exec_fixup_vertex( ctx, VBO_ATTRIB_NORMAL, 3 );
-   }
-
-   memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, 
-           exec->vtx.vertex_size * sizeof(GLfloat));
-
-   vbo_exec_do_EvalCoord2f( exec, u, v );
-
-   memcpy( exec->vtx.vertex, exec->vtx.copied.buffer, 
-           exec->vtx.vertex_size * sizeof(GLfloat));
-}
-
-static void GLAPIENTRY vbo_exec_EvalCoord1fv( const GLfloat *u )
-{
-   vbo_exec_EvalCoord1f( u[0] );
-}
-
-static void GLAPIENTRY vbo_exec_EvalCoord2fv( const GLfloat *u )
-{
-   vbo_exec_EvalCoord2f( u[0], u[1] );
-}
-
-static void GLAPIENTRY vbo_exec_EvalPoint1( GLint i )
-{
-   GET_CURRENT_CONTEXT( ctx );
-   GLfloat du = ((ctx->Eval.MapGrid1u2 - ctx->Eval.MapGrid1u1) /
-		 (GLfloat) ctx->Eval.MapGrid1un);
-   GLfloat u = i * du + ctx->Eval.MapGrid1u1;
-
-   vbo_exec_EvalCoord1f( u );
-}
-
-
-static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j )
-{
-   GET_CURRENT_CONTEXT( ctx );
-   GLfloat du = ((ctx->Eval.MapGrid2u2 - ctx->Eval.MapGrid2u1) / 
-		 (GLfloat) ctx->Eval.MapGrid2un);
-   GLfloat dv = ((ctx->Eval.MapGrid2v2 - ctx->Eval.MapGrid2v1) / 
-		 (GLfloat) ctx->Eval.MapGrid2vn);
-   GLfloat u = i * du + ctx->Eval.MapGrid2u1;
-   GLfloat v = j * dv + ctx->Eval.MapGrid2v1;
-
-   vbo_exec_EvalCoord2f( u, v );
-}
-
-/* use noop eval mesh */
-#define vbo_exec_EvalMesh1 _mesa_noop_EvalMesh1
-#define vbo_exec_EvalMesh2 _mesa_noop_EvalMesh2
-
-#endif /* FEATURE_evaluators */
-
-
-/**
- * Called via glBegin.
- */
-static void GLAPIENTRY vbo_exec_Begin( GLenum mode )
-{
-   GET_CURRENT_CONTEXT( ctx ); 
-
-   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END) {
-      struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-      int i;
-
-      if (ctx->NewState) {
-	 _mesa_update_state( ctx );
-
-	 CALL_Begin(ctx->Exec, (mode));
-	 return;
-      }
-
-      if (!_mesa_valid_to_render(ctx, "glBegin")) {
-         return;
-      }
-
-      /* Heuristic: attempt to isolate attributes occuring outside
-       * begin/end pairs.
-       */
-      if (exec->vtx.vertex_size && !exec->vtx.attrsz[0]) 
-	 vbo_exec_FlushVertices_internal( ctx, GL_FALSE );
-
-      i = exec->vtx.prim_count++;
-      exec->vtx.prim[i].mode = mode;
-      exec->vtx.prim[i].begin = 1;
-      exec->vtx.prim[i].end = 0;
-      exec->vtx.prim[i].indexed = 0;
-      exec->vtx.prim[i].weak = 0;
-      exec->vtx.prim[i].pad = 0;
-      exec->vtx.prim[i].start = exec->vtx.vert_count;
-      exec->vtx.prim[i].count = 0;
-      exec->vtx.prim[i].num_instances = 1;
-
-      ctx->Driver.CurrentExecPrimitive = mode;
-   }
-   else 
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
-      
-}
-
-
-/**
- * Called via glEnd.
- */
-static void GLAPIENTRY vbo_exec_End( void )
-{
-   GET_CURRENT_CONTEXT( ctx ); 
-
-   if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
-      struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-      int idx = exec->vtx.vert_count;
-      int i = exec->vtx.prim_count - 1;
-
-      exec->vtx.prim[i].end = 1; 
-      exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start;
-
-      ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
-
-      if (exec->vtx.prim_count == VBO_MAX_PRIM)
-	 vbo_exec_vtx_flush( exec, GL_FALSE );
-   }
-   else 
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
-}
-
-
-/**
- * Called via glPrimitiveRestartNV()
- */
-static void GLAPIENTRY
-vbo_exec_PrimitiveRestartNV(void)
-{
-   GLenum curPrim;
-   GET_CURRENT_CONTEXT( ctx ); 
-
-   curPrim = ctx->Driver.CurrentExecPrimitive;
-
-   if (curPrim == PRIM_OUTSIDE_BEGIN_END) {
-      _mesa_error( ctx, GL_INVALID_OPERATION, "glPrimitiveRestartNV" );
-   }
-   else {
-      vbo_exec_End();
-      vbo_exec_Begin(curPrim);
-   }
-}
-
-
-
-static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec )
-{
-   GLvertexformat *vfmt = &exec->vtxfmt;
-
-   _MESA_INIT_ARRAYELT_VTXFMT(vfmt, _ae_);
-
-   vfmt->Begin = vbo_exec_Begin;
-   vfmt->End = vbo_exec_End;
-   vfmt->PrimitiveRestartNV = vbo_exec_PrimitiveRestartNV;
-
-   _MESA_INIT_DLIST_VTXFMT(vfmt, _mesa_);
-   _MESA_INIT_EVAL_VTXFMT(vfmt, vbo_exec_);
-
-   vfmt->Rectf = _mesa_noop_Rectf;
-
-   /* from attrib_tmp.h:
-    */
-   vfmt->Color3f = vbo_Color3f;
-   vfmt->Color3fv = vbo_Color3fv;
-   vfmt->Color4f = vbo_Color4f;
-   vfmt->Color4fv = vbo_Color4fv;
-   vfmt->FogCoordfEXT = vbo_FogCoordfEXT;
-   vfmt->FogCoordfvEXT = vbo_FogCoordfvEXT;
-   vfmt->MultiTexCoord1fARB = vbo_MultiTexCoord1f;
-   vfmt->MultiTexCoord1fvARB = vbo_MultiTexCoord1fv;
-   vfmt->MultiTexCoord2fARB = vbo_MultiTexCoord2f;
-   vfmt->MultiTexCoord2fvARB = vbo_MultiTexCoord2fv;
-   vfmt->MultiTexCoord3fARB = vbo_MultiTexCoord3f;
-   vfmt->MultiTexCoord3fvARB = vbo_MultiTexCoord3fv;
-   vfmt->MultiTexCoord4fARB = vbo_MultiTexCoord4f;
-   vfmt->MultiTexCoord4fvARB = vbo_MultiTexCoord4fv;
-   vfmt->Normal3f = vbo_Normal3f;
-   vfmt->Normal3fv = vbo_Normal3fv;
-   vfmt->SecondaryColor3fEXT = vbo_SecondaryColor3fEXT;
-   vfmt->SecondaryColor3fvEXT = vbo_SecondaryColor3fvEXT;
-   vfmt->TexCoord1f = vbo_TexCoord1f;
-   vfmt->TexCoord1fv = vbo_TexCoord1fv;
-   vfmt->TexCoord2f = vbo_TexCoord2f;
-   vfmt->TexCoord2fv = vbo_TexCoord2fv;
-   vfmt->TexCoord3f = vbo_TexCoord3f;
-   vfmt->TexCoord3fv = vbo_TexCoord3fv;
-   vfmt->TexCoord4f = vbo_TexCoord4f;
-   vfmt->TexCoord4fv = vbo_TexCoord4fv;
-   vfmt->Vertex2f = vbo_Vertex2f;
-   vfmt->Vertex2fv = vbo_Vertex2fv;
-   vfmt->Vertex3f = vbo_Vertex3f;
-   vfmt->Vertex3fv = vbo_Vertex3fv;
-   vfmt->Vertex4f = vbo_Vertex4f;
-   vfmt->Vertex4fv = vbo_Vertex4fv;
-   
-   vfmt->VertexAttrib1fARB = vbo_VertexAttrib1fARB;
-   vfmt->VertexAttrib1fvARB = vbo_VertexAttrib1fvARB;
-   vfmt->VertexAttrib2fARB = vbo_VertexAttrib2fARB;
-   vfmt->VertexAttrib2fvARB = vbo_VertexAttrib2fvARB;
-   vfmt->VertexAttrib3fARB = vbo_VertexAttrib3fARB;
-   vfmt->VertexAttrib3fvARB = vbo_VertexAttrib3fvARB;
-   vfmt->VertexAttrib4fARB = vbo_VertexAttrib4fARB;
-   vfmt->VertexAttrib4fvARB = vbo_VertexAttrib4fvARB;
-
-   vfmt->VertexAttrib1fNV = vbo_VertexAttrib1fNV;
-   vfmt->VertexAttrib1fvNV = vbo_VertexAttrib1fvNV;
-   vfmt->VertexAttrib2fNV = vbo_VertexAttrib2fNV;
-   vfmt->VertexAttrib2fvNV = vbo_VertexAttrib2fvNV;
-   vfmt->VertexAttrib3fNV = vbo_VertexAttrib3fNV;
-   vfmt->VertexAttrib3fvNV = vbo_VertexAttrib3fvNV;
-   vfmt->VertexAttrib4fNV = vbo_VertexAttrib4fNV;
-   vfmt->VertexAttrib4fvNV = vbo_VertexAttrib4fvNV;
-
-   /* integer-valued */
-   vfmt->VertexAttribI1i = vbo_VertexAttribI1i;
-   vfmt->VertexAttribI2i = vbo_VertexAttribI2i;
-   vfmt->VertexAttribI3i = vbo_VertexAttribI3i;
-   vfmt->VertexAttribI4i = vbo_VertexAttribI4i;
-   vfmt->VertexAttribI2iv = vbo_VertexAttribI2iv;
-   vfmt->VertexAttribI3iv = vbo_VertexAttribI3iv;
-   vfmt->VertexAttribI4iv = vbo_VertexAttribI4iv;
-
-   /* unsigned integer-valued */
-   vfmt->VertexAttribI1ui = vbo_VertexAttribI1ui;
-   vfmt->VertexAttribI2ui = vbo_VertexAttribI2ui;
-   vfmt->VertexAttribI3ui = vbo_VertexAttribI3ui;
-   vfmt->VertexAttribI4ui = vbo_VertexAttribI4ui;
-   vfmt->VertexAttribI2uiv = vbo_VertexAttribI2uiv;
-   vfmt->VertexAttribI3uiv = vbo_VertexAttribI3uiv;
-   vfmt->VertexAttribI4uiv = vbo_VertexAttribI4uiv;
-
-   vfmt->Materialfv = vbo_Materialfv;
-
-   vfmt->EdgeFlag = vbo_EdgeFlag;
-   vfmt->Indexf = vbo_Indexf;
-   vfmt->Indexfv = vbo_Indexfv;
-
-}
-
-
-#else /* FEATURE_beginend */
-
-
-static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec )
-{
-   /* silence warnings */
-   (void) vbo_Color3f;
-   (void) vbo_Color3fv;
-   (void) vbo_Color4f;
-   (void) vbo_Color4fv;
-   (void) vbo_FogCoordfEXT;
-   (void) vbo_FogCoordfvEXT;
-   (void) vbo_MultiTexCoord1f;
-   (void) vbo_MultiTexCoord1fv;
-   (void) vbo_MultiTexCoord2f;
-   (void) vbo_MultiTexCoord2fv;
-   (void) vbo_MultiTexCoord3f;
-   (void) vbo_MultiTexCoord3fv;
-   (void) vbo_MultiTexCoord4f;
-   (void) vbo_MultiTexCoord4fv;
-   (void) vbo_Normal3f;
-   (void) vbo_Normal3fv;
-   (void) vbo_SecondaryColor3fEXT;
-   (void) vbo_SecondaryColor3fvEXT;
-   (void) vbo_TexCoord1f;
-   (void) vbo_TexCoord1fv;
-   (void) vbo_TexCoord2f;
-   (void) vbo_TexCoord2fv;
-   (void) vbo_TexCoord3f;
-   (void) vbo_TexCoord3fv;
-   (void) vbo_TexCoord4f;
-   (void) vbo_TexCoord4fv;
-   (void) vbo_Vertex2f;
-   (void) vbo_Vertex2fv;
-   (void) vbo_Vertex3f;
-   (void) vbo_Vertex3fv;
-   (void) vbo_Vertex4f;
-   (void) vbo_Vertex4fv;
-
-   (void) vbo_VertexAttrib1fARB;
-   (void) vbo_VertexAttrib1fvARB;
-   (void) vbo_VertexAttrib2fARB;
-   (void) vbo_VertexAttrib2fvARB;
-   (void) vbo_VertexAttrib3fARB;
-   (void) vbo_VertexAttrib3fvARB;
-   (void) vbo_VertexAttrib4fARB;
-   (void) vbo_VertexAttrib4fvARB;
-
-   (void) vbo_VertexAttrib1fNV;
-   (void) vbo_VertexAttrib1fvNV;
-   (void) vbo_VertexAttrib2fNV;
-   (void) vbo_VertexAttrib2fvNV;
-   (void) vbo_VertexAttrib3fNV;
-   (void) vbo_VertexAttrib3fvNV;
-   (void) vbo_VertexAttrib4fNV;
-   (void) vbo_VertexAttrib4fvNV;
-
-   (void) vbo_Materialfv;
-
-   (void) vbo_EdgeFlag;
-   (void) vbo_Indexf;
-   (void) vbo_Indexfv;
-}
-
-
-#endif /* FEATURE_beginend */
-
-
-/**
- * Tell the VBO module to use a real OpenGL vertex buffer object to
- * store accumulated immediate-mode vertex data.
- * This replaces the malloced buffer which was created in
- * vb_exec_vtx_init() below.
- */
-void vbo_use_buffer_objects(struct gl_context *ctx)
-{
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-   /* Any buffer name but 0 can be used here since this bufferobj won't
-    * go into the bufferobj hashtable.
-    */
-   GLuint bufName = IMM_BUFFER_NAME;
-   GLenum target = GL_ARRAY_BUFFER_ARB;
-   GLenum usage = GL_STREAM_DRAW_ARB;
-   GLsizei size = VBO_VERT_BUFFER_SIZE;
-
-   /* Make sure this func is only used once */
-   assert(exec->vtx.bufferobj == ctx->Shared->NullBufferObj);
-   if (exec->vtx.buffer_map) {
-      _mesa_align_free(exec->vtx.buffer_map);
-      exec->vtx.buffer_map = NULL;
-      exec->vtx.buffer_ptr = NULL;
-   }
-
-   /* Allocate a real buffer object now */
-   _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
-   exec->vtx.bufferobj = ctx->Driver.NewBufferObject(ctx, bufName, target);
-   ctx->Driver.BufferData(ctx, target, size, NULL, usage, exec->vtx.bufferobj);
-}
-
-
-
-void vbo_exec_vtx_init( struct vbo_exec_context *exec )
-{
-   struct gl_context *ctx = exec->ctx;
-   struct vbo_context *vbo = vbo_context(ctx);
-   GLuint i;
-
-   /* Allocate a buffer object.  Will just reuse this object
-    * continuously, unless vbo_use_buffer_objects() is called to enable
-    * use of real VBOs.
-    */
-   _mesa_reference_buffer_object(ctx,
-                                 &exec->vtx.bufferobj,
-                                 ctx->Shared->NullBufferObj);
-
-   ASSERT(!exec->vtx.buffer_map);
-   exec->vtx.buffer_map = (GLfloat *)_mesa_align_malloc(VBO_VERT_BUFFER_SIZE, 64);
-   exec->vtx.buffer_ptr = exec->vtx.buffer_map;
-
-   vbo_exec_vtxfmt_init( exec );
-
-   /* Hook our functions into the dispatch table.
-    */
-   _mesa_install_exec_vtxfmt( exec->ctx, &exec->vtxfmt );
-
-   for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
-      ASSERT(i < Elements(exec->vtx.attrsz));
-      exec->vtx.attrsz[i] = 0;
-      ASSERT(i < Elements(exec->vtx.active_sz));
-      exec->vtx.active_sz[i] = 0;
-   }
-   for (i = 0 ; i < VERT_ATTRIB_MAX; i++) {
-      ASSERT(i < Elements(exec->vtx.inputs));
-      ASSERT(i < Elements(exec->vtx.arrays));
-      exec->vtx.inputs[i] = &exec->vtx.arrays[i];
-   }
-   
-   {
-      struct gl_client_array *arrays = exec->vtx.arrays;
-      unsigned i;
-
-      memcpy(arrays,      vbo->legacy_currval,  16 * sizeof(arrays[0]));
-      memcpy(arrays + 16, vbo->generic_currval, 16 * sizeof(arrays[0]));
-
-      for (i = 0; i < 16; ++i) {
-         arrays[i     ].BufferObj = NULL;
-         arrays[i + 16].BufferObj = NULL;
-         _mesa_reference_buffer_object(ctx, &arrays[i     ].BufferObj,
-                                       vbo->legacy_currval[i].BufferObj);
-         _mesa_reference_buffer_object(ctx, &arrays[i + 16].BufferObj,
-                                       vbo->generic_currval[i].BufferObj);
-      }
-   }
-
-   exec->vtx.vertex_size = 0;
-}
-
-
-void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
-{
-   /* using a real VBO for vertex data */
-   struct gl_context *ctx = exec->ctx;
-   unsigned i;
-
-   /* True VBOs should already be unmapped
-    */
-   if (exec->vtx.buffer_map) {
-      ASSERT(exec->vtx.bufferobj->Name == 0 ||
-             exec->vtx.bufferobj->Name == IMM_BUFFER_NAME);
-      if (exec->vtx.bufferobj->Name == 0) {
-         _mesa_align_free(exec->vtx.buffer_map);
-         exec->vtx.buffer_map = NULL;
-         exec->vtx.buffer_ptr = NULL;
-      }
-   }
-
-   /* Drop any outstanding reference to the vertex buffer
-    */
-   for (i = 0; i < Elements(exec->vtx.arrays); i++) {
-      _mesa_reference_buffer_object(ctx,
-                                    &exec->vtx.arrays[i].BufferObj,
-                                    NULL);
-   }
-
-   /* Free the vertex buffer.  Unmap first if needed.
-    */
-   if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
-      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
-   }
-   _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
-}
-
-void vbo_exec_BeginVertices( struct gl_context *ctx )
-{
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-   if (0) printf("%s\n", __FUNCTION__);
-   vbo_exec_vtx_map( exec );
-
-   assert((exec->ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
-   exec->ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;
-}
-
-void vbo_exec_FlushVertices_internal( struct gl_context *ctx, GLboolean unmap )
-{
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
-   if (exec->vtx.vert_count || unmap) {
-      vbo_exec_vtx_flush( exec, unmap );
-   }
-
-   if (exec->vtx.vertex_size) {
-      vbo_exec_copy_to_current( exec );
-      reset_attrfv( exec );
-   }
-}
-
-
-/**
- * \param flags  bitmask of FLUSH_STORED_VERTICES, FLUSH_UPDATE_CURRENT
- */
-void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
-{
-   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
-
-#ifdef DEBUG
-   /* debug check: make sure we don't get called recursively */
-   exec->flush_call_depth++;
-   assert(exec->flush_call_depth == 1);
-#endif
-
-   if (0) printf("%s\n", __FUNCTION__);
-
-   if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
-      if (0) printf("%s - inside begin/end\n", __FUNCTION__);
-#ifdef DEBUG
-      exec->flush_call_depth--;
-      assert(exec->flush_call_depth == 0);
-#endif
-      return;
-   }
-
-   vbo_exec_FlushVertices_internal( ctx, GL_TRUE );
-
-   /* Need to do this to ensure BeginVertices gets called again:
-    */
-   if (exec->ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)
-      exec->ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
-
-   exec->ctx->Driver.NeedFlush &= ~flags;
-
-#ifdef DEBUG
-   exec->flush_call_depth--;
-   assert(exec->flush_call_depth == 0);
-#endif
-}
-
-
-static void reset_attrfv( struct vbo_exec_context *exec )
-{   
-   GLuint i;
-
-   for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
-      exec->vtx.attrsz[i] = 0;
-      exec->vtx.active_sz[i] = 0;
-   }
-
-   exec->vtx.vertex_size = 0;
-}
-      
-
-void GLAPIENTRY
-_es_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a)
-{
-   vbo_Color4f(r, g, b, a);
-}
-
-
-void GLAPIENTRY
-_es_Normal3f(GLfloat x, GLfloat y, GLfloat z)
-{
-   vbo_Normal3f(x, y, z);
-}
-
-
-void GLAPIENTRY
-_es_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q)
-{
-   vbo_MultiTexCoord4f(target, s, t, r, q);
-}
-
-
-void GLAPIENTRY
-_es_Materialfv(GLenum face, GLenum pname, const GLfloat *params)
-{
-   vbo_Materialfv(face, pname, params);
-}
-
-
-void GLAPIENTRY
-_es_Materialf(GLenum face, GLenum pname, GLfloat param)
-{
-   GLfloat p[4];
-   p[0] = param;
-   p[1] = p[2] = p[3] = 0.0F;
-   vbo_Materialfv(face, pname, p);
-}
-
-
-/**
- * A special version of glVertexAttrib4f that does not treat index 0 as
- * VBO_ATTRIB_POS.
- */
-static void
-VertexAttrib4f_nopos(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   if (index < MAX_VERTEX_GENERIC_ATTRIBS)
-      ATTR(VBO_ATTRIB_GENERIC0 + index, 4, x, y, z, w);
-   else
-      ERROR();
-}
-
-void GLAPIENTRY
-_es_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
-{
-   VertexAttrib4f_nopos(index, x, y, z, w);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib1f(GLuint indx, GLfloat x)
-{
-   VertexAttrib4f_nopos(indx, x, 0.0f, 0.0f, 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib1fv(GLuint indx, const GLfloat* values)
-{
-   VertexAttrib4f_nopos(indx, values[0], 0.0f, 0.0f, 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y)
-{
-   VertexAttrib4f_nopos(indx, x, y, 0.0f, 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib2fv(GLuint indx, const GLfloat* values)
-{
-   VertexAttrib4f_nopos(indx, values[0], values[1], 0.0f, 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z)
-{
-   VertexAttrib4f_nopos(indx, x, y, z, 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib3fv(GLuint indx, const GLfloat* values)
-{
-   VertexAttrib4f_nopos(indx, values[0], values[1], values[2], 1.0f);
-}
-
-
-void GLAPIENTRY
-_es_VertexAttrib4fv(GLuint indx, const GLfloat* values)
-{
-   VertexAttrib4f_nopos(indx, values[0], values[1], values[2], values[3]);
-}
+/**************************************************************************
+
+Copyright 2002-2008 Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "main/vtxfmt.h"
+#include "main/dlist.h"
+#include "main/eval.h"
+#include "main/state.h"
+#include "main/light.h"
+#include "main/api_arrayelt.h"
+#include "main/api_noop.h"
+#include "main/dispatch.h"
+
+#include "vbo_context.h"
+
+#ifdef ERROR
+#undef ERROR
+#endif
+
+
+/** ID/name for immediate-mode VBO */
+#define IMM_BUFFER_NAME 0xaabbccdd
+
+
+static void reset_attrfv( struct vbo_exec_context *exec );
+
+
+/**
+ * Close off the last primitive, execute the buffer, restart the
+ * primitive.  
+ */
+static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
+{
+   if (exec->vtx.prim_count == 0) {
+      exec->vtx.copied.nr = 0;
+      exec->vtx.vert_count = 0;
+      exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+   }
+   else {
+      GLuint last_begin = exec->vtx.prim[exec->vtx.prim_count-1].begin;
+      GLuint last_count;
+
+      if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
+	 GLint i = exec->vtx.prim_count - 1;
+	 assert(i >= 0);
+	 exec->vtx.prim[i].count = (exec->vtx.vert_count - 
+				    exec->vtx.prim[i].start);
+      }
+
+      last_count = exec->vtx.prim[exec->vtx.prim_count-1].count;
+
+      /* Execute the buffer and save copied vertices.
+       */
+      if (exec->vtx.vert_count)
+	 vbo_exec_vtx_flush( exec, GL_FALSE );
+      else {
+	 exec->vtx.prim_count = 0;
+	 exec->vtx.copied.nr = 0;
+      }
+
+      /* Emit a glBegin to start the new list.
+       */
+      assert(exec->vtx.prim_count == 0);
+
+      if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
+	 exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
+	 exec->vtx.prim[0].start = 0;
+	 exec->vtx.prim[0].count = 0;
+	 exec->vtx.prim_count++;
+      
+	 if (exec->vtx.copied.nr == last_count)
+	    exec->vtx.prim[0].begin = last_begin;
+      }
+   }
+}
+
+
+/**
+ * Deal with buffer wrapping where provoked by the vertex buffer
+ * filling up, as opposed to upgrade_vertex().
+ */
+void vbo_exec_vtx_wrap( struct vbo_exec_context *exec )
+{
+   GLfloat *data = exec->vtx.copied.buffer;
+   GLuint i;
+
+   /* Run pipeline on current vertices, copy wrapped vertices
+    * to exec->vtx.copied.
+    */
+   vbo_exec_wrap_buffers( exec );
+   
+   /* Copy stored stored vertices to start of new list. 
+    */
+   assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr);
+
+   for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
+      memcpy( exec->vtx.buffer_ptr, data, 
+	      exec->vtx.vertex_size * sizeof(GLfloat));
+      exec->vtx.buffer_ptr += exec->vtx.vertex_size;
+      data += exec->vtx.vertex_size;
+      exec->vtx.vert_count++;
+   }
+
+   exec->vtx.copied.nr = 0;
+}
+
+
+/**
+ * Copy the active vertex's values to the ctx->Current fields.
+ */
+static void vbo_exec_copy_to_current( struct vbo_exec_context *exec )
+{
+   struct gl_context *ctx = exec->ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   GLuint i;
+
+   for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) {
+      if (exec->vtx.attrsz[i]) {
+         /* Note: the exec->vtx.current[i] pointers point into the
+          * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays.
+          */
+	 GLfloat *current = (GLfloat *)vbo->currval[i].Ptr;
+         GLfloat tmp[4];
+
+         COPY_CLEAN_4V(tmp, 
+                       exec->vtx.attrsz[i], 
+                       exec->vtx.attrptr[i]);
+         
+         if (memcmp(current, tmp, sizeof(tmp)) != 0) { 
+            memcpy(current, tmp, sizeof(tmp));
+	 
+            /* Given that we explicitly state size here, there is no need
+             * for the COPY_CLEAN above, could just copy 16 bytes and be
+             * done.  The only problem is when Mesa accesses ctx->Current
+             * directly.
+             */
+            vbo->currval[i].Size = exec->vtx.attrsz[i];
+
+            /* This triggers rather too much recalculation of Mesa state
+             * that doesn't get used (eg light positions).
+             */
+            if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT &&
+                i <= VBO_ATTRIB_MAT_BACK_INDEXES)
+               ctx->NewState |= _NEW_LIGHT;
+            
+            ctx->NewState |= _NEW_CURRENT_ATTRIB;
+         }
+      }
+   }
+
+   /* Colormaterial -- this kindof sucks.
+    */
+   if (ctx->Light.ColorMaterialEnabled &&
+       exec->vtx.attrsz[VBO_ATTRIB_COLOR0]) {
+      _mesa_update_color_material(ctx, 
+				  ctx->Current.Attrib[VBO_ATTRIB_COLOR0]);
+   }
+}
+
+
+/**
+ * Copy current vertex attribute values into the current vertex.
+ */
+static void
+vbo_exec_copy_from_current(struct vbo_exec_context *exec)
+{
+   struct gl_context *ctx = exec->ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   GLint i;
+
+   for (i = VBO_ATTRIB_POS + 1; i < VBO_ATTRIB_MAX; i++) {
+      const GLfloat *current = (GLfloat *) vbo->currval[i].Ptr;
+      switch (exec->vtx.attrsz[i]) {
+      case 4: exec->vtx.attrptr[i][3] = current[3];
+      case 3: exec->vtx.attrptr[i][2] = current[2];
+      case 2: exec->vtx.attrptr[i][1] = current[1];
+      case 1: exec->vtx.attrptr[i][0] = current[0];
+	 break;
+      }
+   }
+}
+
+
+/**
+ * Flush existing data, set new attrib size, replay copied vertices.
+ * This is called when we transition from a small vertex attribute size
+ * to a larger one.  Ex: glTexCoord2f -> glTexCoord4f.
+ * We need to go back over the previous 2-component texcoords and insert
+ * zero and one values.
+ */ 
+static void
+vbo_exec_wrap_upgrade_vertex(struct vbo_exec_context *exec,
+                             GLuint attr, GLuint newSize )
+{
+   struct gl_context *ctx = exec->ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   const GLint lastcount = exec->vtx.vert_count;
+   GLfloat *old_attrptr[VBO_ATTRIB_MAX];
+   const GLuint old_vtx_size = exec->vtx.vertex_size; /* floats per vertex */
+   const GLuint oldSize = exec->vtx.attrsz[attr];
+   GLuint i;
+
+   /* Run pipeline on current vertices, copy wrapped vertices
+    * to exec->vtx.copied.
+    */
+   vbo_exec_wrap_buffers( exec );
+
+   if (unlikely(exec->vtx.copied.nr)) {
+      /* We're in the middle of a primitive, keep the old vertex
+       * format around to be able to translate the copied vertices to
+       * the new format.
+       */
+      memcpy(old_attrptr, exec->vtx.attrptr, sizeof(old_attrptr));
+   }
+
+   if (unlikely(oldSize)) {
+      /* Do a COPY_TO_CURRENT to ensure back-copying works for the
+       * case when the attribute already exists in the vertex and is
+       * having its size increased.
+       */
+      vbo_exec_copy_to_current( exec );
+   }
+
+   /* Heuristic: Attempt to isolate attributes received outside
+    * begin/end so that they don't bloat the vertices.
+    */
+   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END &&
+       !oldSize && lastcount > 8 && exec->vtx.vertex_size) {
+      vbo_exec_copy_to_current( exec );
+      reset_attrfv( exec );
+   }
+
+   /* Fix up sizes:
+    */
+   exec->vtx.attrsz[attr] = newSize;
+   exec->vtx.vertex_size += newSize - oldSize;
+   exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / 
+                         (exec->vtx.vertex_size * sizeof(GLfloat)));
+   exec->vtx.vert_count = 0;
+   exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+
+   if (unlikely(oldSize)) {
+      /* Size changed, recalculate all the attrptr[] values
+       */
+      GLfloat *tmp = exec->vtx.vertex;
+
+      for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
+	 if (exec->vtx.attrsz[i]) {
+	    exec->vtx.attrptr[i] = tmp;
+	    tmp += exec->vtx.attrsz[i];
+	 }
+	 else
+	    exec->vtx.attrptr[i] = NULL; /* will not be dereferenced */
+      }
+
+      /* Copy from current to repopulate the vertex with correct
+       * values.
+       */
+      vbo_exec_copy_from_current( exec );
+   }
+   else {
+      /* Just have to append the new attribute at the end */
+      exec->vtx.attrptr[attr] = exec->vtx.vertex +
+	 exec->vtx.vertex_size - newSize;
+   }
+
+   /* Replay stored vertices to translate them
+    * to new format here.
+    *
+    * -- No need to replay - just copy piecewise
+    */
+   if (unlikely(exec->vtx.copied.nr)) {
+      GLfloat *data = exec->vtx.copied.buffer;
+      GLfloat *dest = exec->vtx.buffer_ptr;
+      GLuint j;
+
+      assert(exec->vtx.buffer_ptr == exec->vtx.buffer_map);
+
+      for (i = 0 ; i < exec->vtx.copied.nr ; i++) {
+	 for (j = 0 ; j < VBO_ATTRIB_MAX ; j++) {
+	    GLuint sz = exec->vtx.attrsz[j];
+
+	    if (sz) {
+	       GLint old_offset = old_attrptr[j] - exec->vtx.vertex;
+	       GLint new_offset = exec->vtx.attrptr[j] - exec->vtx.vertex;
+
+	       if (j == attr) {
+		  if (oldSize) {
+		     GLfloat tmp[4];
+		     COPY_CLEAN_4V(tmp, oldSize, data + old_offset);
+		     COPY_SZ_4V(dest + new_offset, newSize, tmp);
+		  } else {
+		     GLfloat *current = (GLfloat *)vbo->currval[j].Ptr;
+		     COPY_SZ_4V(dest + new_offset, sz, current);
+		  }
+	       }
+	       else {
+		  COPY_SZ_4V(dest + new_offset, sz, data + old_offset);
+	       }
+	    }
+	 }
+
+	 data += old_vtx_size;
+	 dest += exec->vtx.vertex_size;
+      }
+
+      exec->vtx.buffer_ptr = dest;
+      exec->vtx.vert_count += exec->vtx.copied.nr;
+      exec->vtx.copied.nr = 0;
+   }
+}
+
+
+/**
+ * This is when a vertex attribute transitions to a different size.
+ * For example, we saw a bunch of glTexCoord2f() calls and now we got a
+ * glTexCoord4f() call.  We promote the array from size=2 to size=4.
+ */
+static void
+vbo_exec_fixup_vertex(struct gl_context *ctx, GLuint attr, GLuint newSize)
+{
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+   if (newSize > exec->vtx.attrsz[attr]) {
+      /* New size is larger.  Need to flush existing vertices and get
+       * an enlarged vertex format.
+       */
+      vbo_exec_wrap_upgrade_vertex( exec, attr, newSize );
+   }
+   else if (newSize < exec->vtx.active_sz[attr]) {
+      static const GLfloat id[4] = { 0, 0, 0, 1 };
+      GLuint i;
+
+      /* New size is smaller - just need to fill in some
+       * zeros.  Don't need to flush or wrap.
+       */
+      for (i = newSize; i <= exec->vtx.attrsz[attr]; i++)
+	 exec->vtx.attrptr[attr][i-1] = id[i-1];
+   }
+
+   exec->vtx.active_sz[attr] = newSize;
+
+   /* Does setting NeedFlush belong here?  Necessitates resetting
+    * vtxfmt on each flush (otherwise flags won't get reset
+    * afterwards).
+    */
+   if (attr == 0) 
+      exec->ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+}
+
+
+/**
+ * This macro is used to implement all the glVertex, glColor, glTexCoord,
+ * glVertexAttrib, etc functions.
+ */
+#define ATTR( A, N, V0, V1, V2, V3 )					\
+do {									\
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;		\
+									\
+   if (unlikely(!(ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)))	\
+      ctx->Driver.BeginVertices( ctx );					\
+   									\
+   if (unlikely(exec->vtx.active_sz[A] != N))				\
+      vbo_exec_fixup_vertex(ctx, A, N);					\
+   									\
+   {									\
+      GLfloat *dest = exec->vtx.attrptr[A];				\
+      if (N>0) dest[0] = V0;						\
+      if (N>1) dest[1] = V1;						\
+      if (N>2) dest[2] = V2;						\
+      if (N>3) dest[3] = V3;						\
+   }									\
+									\
+   if ((A) == 0) {							\
+      /* This is a glVertex call */					\
+      GLuint i;								\
+									\
+      for (i = 0; i < exec->vtx.vertex_size; i++)			\
+	 exec->vtx.buffer_ptr[i] = exec->vtx.vertex[i];			\
+									\
+      exec->vtx.buffer_ptr += exec->vtx.vertex_size;			\
+									\
+      /* Set FLUSH_STORED_VERTICES to indicate that there's now */	\
+      /* something to draw (not just updating a color or texcoord).*/	\
+      ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;			\
+									\
+      if (++exec->vtx.vert_count >= exec->vtx.max_vert)			\
+	 vbo_exec_vtx_wrap( exec );					\
+   }									\
+} while (0)
+
+
+#define ERROR() _mesa_error( ctx, GL_INVALID_ENUM, __FUNCTION__ )
+#define TAG(x) vbo_##x
+
+#include "vbo_attrib_tmp.h"
+
+
+#if FEATURE_beginend
+
+
+#if FEATURE_evaluators
+
+static void GLAPIENTRY vbo_exec_EvalCoord1f( GLfloat u )
+{
+   GET_CURRENT_CONTEXT( ctx );
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+   {
+      GLint i;
+      if (exec->eval.recalculate_maps) 
+	 vbo_exec_eval_update( exec );
+
+      for (i = 0; i <= VBO_ATTRIB_TEX7; i++) {
+	 if (exec->eval.map1[i].map) 
+	    if (exec->vtx.active_sz[i] != exec->eval.map1[i].sz)
+	       vbo_exec_fixup_vertex( ctx, i, exec->eval.map1[i].sz );
+      }
+   }
+
+
+   memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, 
+           exec->vtx.vertex_size * sizeof(GLfloat));
+
+   vbo_exec_do_EvalCoord1f( exec, u );
+
+   memcpy( exec->vtx.vertex, exec->vtx.copied.buffer,
+           exec->vtx.vertex_size * sizeof(GLfloat));
+}
+
+static void GLAPIENTRY vbo_exec_EvalCoord2f( GLfloat u, GLfloat v )
+{
+   GET_CURRENT_CONTEXT( ctx );
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+   {
+      GLint i;
+      if (exec->eval.recalculate_maps) 
+	 vbo_exec_eval_update( exec );
+
+      for (i = 0; i <= VBO_ATTRIB_TEX7; i++) {
+	 if (exec->eval.map2[i].map) 
+	    if (exec->vtx.active_sz[i] != exec->eval.map2[i].sz)
+	       vbo_exec_fixup_vertex( ctx, i, exec->eval.map2[i].sz );
+      }
+
+      if (ctx->Eval.AutoNormal) 
+	 if (exec->vtx.active_sz[VBO_ATTRIB_NORMAL] != 3)
+	    vbo_exec_fixup_vertex( ctx, VBO_ATTRIB_NORMAL, 3 );
+   }
+
+   memcpy( exec->vtx.copied.buffer, exec->vtx.vertex, 
+           exec->vtx.vertex_size * sizeof(GLfloat));
+
+   vbo_exec_do_EvalCoord2f( exec, u, v );
+
+   memcpy( exec->vtx.vertex, exec->vtx.copied.buffer, 
+           exec->vtx.vertex_size * sizeof(GLfloat));
+}
+
+static void GLAPIENTRY vbo_exec_EvalCoord1fv( const GLfloat *u )
+{
+   vbo_exec_EvalCoord1f( u[0] );
+}
+
+static void GLAPIENTRY vbo_exec_EvalCoord2fv( const GLfloat *u )
+{
+   vbo_exec_EvalCoord2f( u[0], u[1] );
+}
+
+static void GLAPIENTRY vbo_exec_EvalPoint1( GLint i )
+{
+   GET_CURRENT_CONTEXT( ctx );
+   GLfloat du = ((ctx->Eval.MapGrid1u2 - ctx->Eval.MapGrid1u1) /
+		 (GLfloat) ctx->Eval.MapGrid1un);
+   GLfloat u = i * du + ctx->Eval.MapGrid1u1;
+
+   vbo_exec_EvalCoord1f( u );
+}
+
+
+static void GLAPIENTRY vbo_exec_EvalPoint2( GLint i, GLint j )
+{
+   GET_CURRENT_CONTEXT( ctx );
+   GLfloat du = ((ctx->Eval.MapGrid2u2 - ctx->Eval.MapGrid2u1) / 
+		 (GLfloat) ctx->Eval.MapGrid2un);
+   GLfloat dv = ((ctx->Eval.MapGrid2v2 - ctx->Eval.MapGrid2v1) / 
+		 (GLfloat) ctx->Eval.MapGrid2vn);
+   GLfloat u = i * du + ctx->Eval.MapGrid2u1;
+   GLfloat v = j * dv + ctx->Eval.MapGrid2v1;
+
+   vbo_exec_EvalCoord2f( u, v );
+}
+
+/* use noop eval mesh */
+#define vbo_exec_EvalMesh1 _mesa_noop_EvalMesh1
+#define vbo_exec_EvalMesh2 _mesa_noop_EvalMesh2
+
+#endif /* FEATURE_evaluators */
+
+
+/**
+ * Flush (draw) vertices.
+ * \param  unmap - leave VBO unmapped after flushing?
+ */
+static void
+vbo_exec_FlushVertices_internal(struct vbo_exec_context *exec, GLboolean unmap)
+{
+   if (exec->vtx.vert_count || unmap) {
+      vbo_exec_vtx_flush( exec, unmap );
+   }
+
+   if (exec->vtx.vertex_size) {
+      vbo_exec_copy_to_current( exec );
+      reset_attrfv( exec );
+   }
+}
+
+
+/**
+ * Called via glBegin.
+ */
+static void GLAPIENTRY vbo_exec_Begin( GLenum mode )
+{
+   GET_CURRENT_CONTEXT( ctx ); 
+
+   if (ctx->Driver.CurrentExecPrimitive == PRIM_OUTSIDE_BEGIN_END) {
+      struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+      int i;
+
+      if (ctx->NewState) {
+	 _mesa_update_state( ctx );
+
+	 CALL_Begin(ctx->Exec, (mode));
+	 return;
+      }
+
+      if (!_mesa_valid_to_render(ctx, "glBegin")) {
+         return;
+      }
+
+      /* Heuristic: attempt to isolate attributes occuring outside
+       * begin/end pairs.
+       */
+      if (exec->vtx.vertex_size && !exec->vtx.attrsz[0]) 
+	 vbo_exec_FlushVertices_internal(exec, GL_FALSE);
+
+      i = exec->vtx.prim_count++;
+      exec->vtx.prim[i].mode = mode;
+      exec->vtx.prim[i].begin = 1;
+      exec->vtx.prim[i].end = 0;
+      exec->vtx.prim[i].indexed = 0;
+      exec->vtx.prim[i].weak = 0;
+      exec->vtx.prim[i].pad = 0;
+      exec->vtx.prim[i].start = exec->vtx.vert_count;
+      exec->vtx.prim[i].count = 0;
+      exec->vtx.prim[i].num_instances = 1;
+
+      ctx->Driver.CurrentExecPrimitive = mode;
+   }
+   else 
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glBegin" );
+      
+}
+
+
+/**
+ * Called via glEnd.
+ */
+static void GLAPIENTRY vbo_exec_End( void )
+{
+   GET_CURRENT_CONTEXT( ctx ); 
+
+   if (ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
+      struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+      int idx = exec->vtx.vert_count;
+      int i = exec->vtx.prim_count - 1;
+
+      exec->vtx.prim[i].end = 1; 
+      exec->vtx.prim[i].count = idx - exec->vtx.prim[i].start;
+
+      ctx->Driver.CurrentExecPrimitive = PRIM_OUTSIDE_BEGIN_END;
+
+      if (exec->vtx.prim_count == VBO_MAX_PRIM)
+	 vbo_exec_vtx_flush( exec, GL_FALSE );
+   }
+   else 
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glEnd" );
+}
+
+
+/**
+ * Called via glPrimitiveRestartNV()
+ */
+static void GLAPIENTRY
+vbo_exec_PrimitiveRestartNV(void)
+{
+   GLenum curPrim;
+   GET_CURRENT_CONTEXT( ctx ); 
+
+   curPrim = ctx->Driver.CurrentExecPrimitive;
+
+   if (curPrim == PRIM_OUTSIDE_BEGIN_END) {
+      _mesa_error( ctx, GL_INVALID_OPERATION, "glPrimitiveRestartNV" );
+   }
+   else {
+      vbo_exec_End();
+      vbo_exec_Begin(curPrim);
+   }
+}
+
+
+
+static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec )
+{
+   GLvertexformat *vfmt = &exec->vtxfmt;
+
+   _MESA_INIT_ARRAYELT_VTXFMT(vfmt, _ae_);
+
+   vfmt->Begin = vbo_exec_Begin;
+   vfmt->End = vbo_exec_End;
+   vfmt->PrimitiveRestartNV = vbo_exec_PrimitiveRestartNV;
+
+   _MESA_INIT_DLIST_VTXFMT(vfmt, _mesa_);
+   _MESA_INIT_EVAL_VTXFMT(vfmt, vbo_exec_);
+
+   vfmt->Rectf = _mesa_noop_Rectf;
+
+   /* from attrib_tmp.h:
+    */
+   vfmt->Color3f = vbo_Color3f;
+   vfmt->Color3fv = vbo_Color3fv;
+   vfmt->Color4f = vbo_Color4f;
+   vfmt->Color4fv = vbo_Color4fv;
+   vfmt->FogCoordfEXT = vbo_FogCoordfEXT;
+   vfmt->FogCoordfvEXT = vbo_FogCoordfvEXT;
+   vfmt->MultiTexCoord1fARB = vbo_MultiTexCoord1f;
+   vfmt->MultiTexCoord1fvARB = vbo_MultiTexCoord1fv;
+   vfmt->MultiTexCoord2fARB = vbo_MultiTexCoord2f;
+   vfmt->MultiTexCoord2fvARB = vbo_MultiTexCoord2fv;
+   vfmt->MultiTexCoord3fARB = vbo_MultiTexCoord3f;
+   vfmt->MultiTexCoord3fvARB = vbo_MultiTexCoord3fv;
+   vfmt->MultiTexCoord4fARB = vbo_MultiTexCoord4f;
+   vfmt->MultiTexCoord4fvARB = vbo_MultiTexCoord4fv;
+   vfmt->Normal3f = vbo_Normal3f;
+   vfmt->Normal3fv = vbo_Normal3fv;
+   vfmt->SecondaryColor3fEXT = vbo_SecondaryColor3fEXT;
+   vfmt->SecondaryColor3fvEXT = vbo_SecondaryColor3fvEXT;
+   vfmt->TexCoord1f = vbo_TexCoord1f;
+   vfmt->TexCoord1fv = vbo_TexCoord1fv;
+   vfmt->TexCoord2f = vbo_TexCoord2f;
+   vfmt->TexCoord2fv = vbo_TexCoord2fv;
+   vfmt->TexCoord3f = vbo_TexCoord3f;
+   vfmt->TexCoord3fv = vbo_TexCoord3fv;
+   vfmt->TexCoord4f = vbo_TexCoord4f;
+   vfmt->TexCoord4fv = vbo_TexCoord4fv;
+   vfmt->Vertex2f = vbo_Vertex2f;
+   vfmt->Vertex2fv = vbo_Vertex2fv;
+   vfmt->Vertex3f = vbo_Vertex3f;
+   vfmt->Vertex3fv = vbo_Vertex3fv;
+   vfmt->Vertex4f = vbo_Vertex4f;
+   vfmt->Vertex4fv = vbo_Vertex4fv;
+   
+   vfmt->VertexAttrib1fARB = vbo_VertexAttrib1fARB;
+   vfmt->VertexAttrib1fvARB = vbo_VertexAttrib1fvARB;
+   vfmt->VertexAttrib2fARB = vbo_VertexAttrib2fARB;
+   vfmt->VertexAttrib2fvARB = vbo_VertexAttrib2fvARB;
+   vfmt->VertexAttrib3fARB = vbo_VertexAttrib3fARB;
+   vfmt->VertexAttrib3fvARB = vbo_VertexAttrib3fvARB;
+   vfmt->VertexAttrib4fARB = vbo_VertexAttrib4fARB;
+   vfmt->VertexAttrib4fvARB = vbo_VertexAttrib4fvARB;
+
+   vfmt->VertexAttrib1fNV = vbo_VertexAttrib1fNV;
+   vfmt->VertexAttrib1fvNV = vbo_VertexAttrib1fvNV;
+   vfmt->VertexAttrib2fNV = vbo_VertexAttrib2fNV;
+   vfmt->VertexAttrib2fvNV = vbo_VertexAttrib2fvNV;
+   vfmt->VertexAttrib3fNV = vbo_VertexAttrib3fNV;
+   vfmt->VertexAttrib3fvNV = vbo_VertexAttrib3fvNV;
+   vfmt->VertexAttrib4fNV = vbo_VertexAttrib4fNV;
+   vfmt->VertexAttrib4fvNV = vbo_VertexAttrib4fvNV;
+
+   /* integer-valued */
+   vfmt->VertexAttribI1i = vbo_VertexAttribI1i;
+   vfmt->VertexAttribI2i = vbo_VertexAttribI2i;
+   vfmt->VertexAttribI3i = vbo_VertexAttribI3i;
+   vfmt->VertexAttribI4i = vbo_VertexAttribI4i;
+   vfmt->VertexAttribI2iv = vbo_VertexAttribI2iv;
+   vfmt->VertexAttribI3iv = vbo_VertexAttribI3iv;
+   vfmt->VertexAttribI4iv = vbo_VertexAttribI4iv;
+
+   /* unsigned integer-valued */
+   vfmt->VertexAttribI1ui = vbo_VertexAttribI1ui;
+   vfmt->VertexAttribI2ui = vbo_VertexAttribI2ui;
+   vfmt->VertexAttribI3ui = vbo_VertexAttribI3ui;
+   vfmt->VertexAttribI4ui = vbo_VertexAttribI4ui;
+   vfmt->VertexAttribI2uiv = vbo_VertexAttribI2uiv;
+   vfmt->VertexAttribI3uiv = vbo_VertexAttribI3uiv;
+   vfmt->VertexAttribI4uiv = vbo_VertexAttribI4uiv;
+
+   vfmt->Materialfv = vbo_Materialfv;
+
+   vfmt->EdgeFlag = vbo_EdgeFlag;
+   vfmt->Indexf = vbo_Indexf;
+   vfmt->Indexfv = vbo_Indexfv;
+
+}
+
+
+#else /* FEATURE_beginend */
+
+
+static void vbo_exec_vtxfmt_init( struct vbo_exec_context *exec )
+{
+   /* silence warnings */
+   (void) vbo_Color3f;
+   (void) vbo_Color3fv;
+   (void) vbo_Color4f;
+   (void) vbo_Color4fv;
+   (void) vbo_FogCoordfEXT;
+   (void) vbo_FogCoordfvEXT;
+   (void) vbo_MultiTexCoord1f;
+   (void) vbo_MultiTexCoord1fv;
+   (void) vbo_MultiTexCoord2f;
+   (void) vbo_MultiTexCoord2fv;
+   (void) vbo_MultiTexCoord3f;
+   (void) vbo_MultiTexCoord3fv;
+   (void) vbo_MultiTexCoord4f;
+   (void) vbo_MultiTexCoord4fv;
+   (void) vbo_Normal3f;
+   (void) vbo_Normal3fv;
+   (void) vbo_SecondaryColor3fEXT;
+   (void) vbo_SecondaryColor3fvEXT;
+   (void) vbo_TexCoord1f;
+   (void) vbo_TexCoord1fv;
+   (void) vbo_TexCoord2f;
+   (void) vbo_TexCoord2fv;
+   (void) vbo_TexCoord3f;
+   (void) vbo_TexCoord3fv;
+   (void) vbo_TexCoord4f;
+   (void) vbo_TexCoord4fv;
+   (void) vbo_Vertex2f;
+   (void) vbo_Vertex2fv;
+   (void) vbo_Vertex3f;
+   (void) vbo_Vertex3fv;
+   (void) vbo_Vertex4f;
+   (void) vbo_Vertex4fv;
+
+   (void) vbo_VertexAttrib1fARB;
+   (void) vbo_VertexAttrib1fvARB;
+   (void) vbo_VertexAttrib2fARB;
+   (void) vbo_VertexAttrib2fvARB;
+   (void) vbo_VertexAttrib3fARB;
+   (void) vbo_VertexAttrib3fvARB;
+   (void) vbo_VertexAttrib4fARB;
+   (void) vbo_VertexAttrib4fvARB;
+
+   (void) vbo_VertexAttrib1fNV;
+   (void) vbo_VertexAttrib1fvNV;
+   (void) vbo_VertexAttrib2fNV;
+   (void) vbo_VertexAttrib2fvNV;
+   (void) vbo_VertexAttrib3fNV;
+   (void) vbo_VertexAttrib3fvNV;
+   (void) vbo_VertexAttrib4fNV;
+   (void) vbo_VertexAttrib4fvNV;
+
+   (void) vbo_Materialfv;
+
+   (void) vbo_EdgeFlag;
+   (void) vbo_Indexf;
+   (void) vbo_Indexfv;
+}
+
+
+#endif /* FEATURE_beginend */
+
+
+/**
+ * Tell the VBO module to use a real OpenGL vertex buffer object to
+ * store accumulated immediate-mode vertex data.
+ * This replaces the malloced buffer which was created in
+ * vb_exec_vtx_init() below.
+ */
+void vbo_use_buffer_objects(struct gl_context *ctx)
+{
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+   /* Any buffer name but 0 can be used here since this bufferobj won't
+    * go into the bufferobj hashtable.
+    */
+   GLuint bufName = IMM_BUFFER_NAME;
+   GLenum target = GL_ARRAY_BUFFER_ARB;
+   GLenum usage = GL_STREAM_DRAW_ARB;
+   GLsizei size = VBO_VERT_BUFFER_SIZE;
+
+   /* Make sure this func is only used once */
+   assert(exec->vtx.bufferobj == ctx->Shared->NullBufferObj);
+   if (exec->vtx.buffer_map) {
+      _mesa_align_free(exec->vtx.buffer_map);
+      exec->vtx.buffer_map = NULL;
+      exec->vtx.buffer_ptr = NULL;
+   }
+
+   /* Allocate a real buffer object now */
+   _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
+   exec->vtx.bufferobj = ctx->Driver.NewBufferObject(ctx, bufName, target);
+   ctx->Driver.BufferData(ctx, target, size, NULL, usage, exec->vtx.bufferobj);
+}
+
+
+
+void vbo_exec_vtx_init( struct vbo_exec_context *exec )
+{
+   struct gl_context *ctx = exec->ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+   GLuint i;
+
+   /* Allocate a buffer object.  Will just reuse this object
+    * continuously, unless vbo_use_buffer_objects() is called to enable
+    * use of real VBOs.
+    */
+   _mesa_reference_buffer_object(ctx,
+                                 &exec->vtx.bufferobj,
+                                 ctx->Shared->NullBufferObj);
+
+   ASSERT(!exec->vtx.buffer_map);
+   exec->vtx.buffer_map = (GLfloat *)_mesa_align_malloc(VBO_VERT_BUFFER_SIZE, 64);
+   exec->vtx.buffer_ptr = exec->vtx.buffer_map;
+
+   vbo_exec_vtxfmt_init( exec );
+
+   /* Hook our functions into the dispatch table.
+    */
+   _mesa_install_exec_vtxfmt( exec->ctx, &exec->vtxfmt );
+
+   for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
+      ASSERT(i < Elements(exec->vtx.attrsz));
+      exec->vtx.attrsz[i] = 0;
+      ASSERT(i < Elements(exec->vtx.active_sz));
+      exec->vtx.active_sz[i] = 0;
+   }
+   for (i = 0 ; i < VERT_ATTRIB_MAX; i++) {
+      ASSERT(i < Elements(exec->vtx.inputs));
+      ASSERT(i < Elements(exec->vtx.arrays));
+      exec->vtx.inputs[i] = &exec->vtx.arrays[i];
+   }
+   
+   {
+      struct gl_client_array *arrays = exec->vtx.arrays;
+      unsigned i;
+
+      memcpy(arrays,      vbo->legacy_currval,  16 * sizeof(arrays[0]));
+      memcpy(arrays + 16, vbo->generic_currval, 16 * sizeof(arrays[0]));
+
+      for (i = 0; i < 16; ++i) {
+         arrays[i     ].BufferObj = NULL;
+         arrays[i + 16].BufferObj = NULL;
+         _mesa_reference_buffer_object(ctx, &arrays[i     ].BufferObj,
+                                       vbo->legacy_currval[i].BufferObj);
+         _mesa_reference_buffer_object(ctx, &arrays[i + 16].BufferObj,
+                                       vbo->generic_currval[i].BufferObj);
+      }
+   }
+
+   exec->vtx.vertex_size = 0;
+}
+
+
+void vbo_exec_vtx_destroy( struct vbo_exec_context *exec )
+{
+   /* using a real VBO for vertex data */
+   struct gl_context *ctx = exec->ctx;
+   unsigned i;
+
+   /* True VBOs should already be unmapped
+    */
+   if (exec->vtx.buffer_map) {
+      ASSERT(exec->vtx.bufferobj->Name == 0 ||
+             exec->vtx.bufferobj->Name == IMM_BUFFER_NAME);
+      if (exec->vtx.bufferobj->Name == 0) {
+         _mesa_align_free(exec->vtx.buffer_map);
+         exec->vtx.buffer_map = NULL;
+         exec->vtx.buffer_ptr = NULL;
+      }
+   }
+
+   /* Drop any outstanding reference to the vertex buffer
+    */
+   for (i = 0; i < Elements(exec->vtx.arrays); i++) {
+      _mesa_reference_buffer_object(ctx,
+                                    &exec->vtx.arrays[i].BufferObj,
+                                    NULL);
+   }
+
+   /* Free the vertex buffer.  Unmap first if needed.
+    */
+   if (_mesa_bufferobj_mapped(exec->vtx.bufferobj)) {
+      ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, exec->vtx.bufferobj);
+   }
+   _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, NULL);
+}
+
+
+/**
+ * Called upon first glVertex, glColor, glTexCoord, etc.
+ */
+void vbo_exec_BeginVertices( struct gl_context *ctx )
+{
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+   vbo_exec_vtx_map( exec );
+
+   assert((exec->ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT) == 0);
+   exec->ctx->Driver.NeedFlush |= FLUSH_UPDATE_CURRENT;
+}
+
+
+/**
+ * Called via ctx->Driver.FlushVertices()
+ * \param flags  bitmask of FLUSH_STORED_VERTICES, FLUSH_UPDATE_CURRENT
+ */
+void vbo_exec_FlushVertices( struct gl_context *ctx, GLuint flags )
+{
+   struct vbo_exec_context *exec = &vbo_context(ctx)->exec;
+
+#ifdef DEBUG
+   /* debug check: make sure we don't get called recursively */
+   exec->flush_call_depth++;
+   assert(exec->flush_call_depth == 1);
+#endif
+
+   if (exec->ctx->Driver.CurrentExecPrimitive != PRIM_OUTSIDE_BEGIN_END) {
+      /* We've had glBegin but not glEnd! */
+#ifdef DEBUG
+      exec->flush_call_depth--;
+      assert(exec->flush_call_depth == 0);
+#endif
+      return;
+   }
+
+   /* Flush (draw), and make sure VBO is left unmapped when done */
+   vbo_exec_FlushVertices_internal(exec, GL_TRUE);
+
+   /* Need to do this to ensure BeginVertices gets called again:
+    */
+   if (exec->ctx->Driver.NeedFlush & FLUSH_UPDATE_CURRENT)
+      exec->ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
+
+   exec->ctx->Driver.NeedFlush &= ~flags;
+
+#ifdef DEBUG
+   exec->flush_call_depth--;
+   assert(exec->flush_call_depth == 0);
+#endif
+}
+
+
+static void reset_attrfv( struct vbo_exec_context *exec )
+{   
+   GLuint i;
+
+   for (i = 0 ; i < VBO_ATTRIB_MAX ; i++) {
+      exec->vtx.attrsz[i] = 0;
+      exec->vtx.active_sz[i] = 0;
+   }
+
+   exec->vtx.vertex_size = 0;
+}
+      
+
+void GLAPIENTRY
+_es_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a)
+{
+   vbo_Color4f(r, g, b, a);
+}
+
+
+void GLAPIENTRY
+_es_Normal3f(GLfloat x, GLfloat y, GLfloat z)
+{
+   vbo_Normal3f(x, y, z);
+}
+
+
+void GLAPIENTRY
+_es_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q)
+{
+   vbo_MultiTexCoord4f(target, s, t, r, q);
+}
+
+
+void GLAPIENTRY
+_es_Materialfv(GLenum face, GLenum pname, const GLfloat *params)
+{
+   vbo_Materialfv(face, pname, params);
+}
+
+
+void GLAPIENTRY
+_es_Materialf(GLenum face, GLenum pname, GLfloat param)
+{
+   GLfloat p[4];
+   p[0] = param;
+   p[1] = p[2] = p[3] = 0.0F;
+   vbo_Materialfv(face, pname, p);
+}
+
+
+/**
+ * A special version of glVertexAttrib4f that does not treat index 0 as
+ * VBO_ATTRIB_POS.
+ */
+static void
+VertexAttrib4f_nopos(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (index < MAX_VERTEX_GENERIC_ATTRIBS)
+      ATTR(VBO_ATTRIB_GENERIC0 + index, 4, x, y, z, w);
+   else
+      ERROR();
+}
+
+void GLAPIENTRY
+_es_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+   VertexAttrib4f_nopos(index, x, y, z, w);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib1f(GLuint indx, GLfloat x)
+{
+   VertexAttrib4f_nopos(indx, x, 0.0f, 0.0f, 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib1fv(GLuint indx, const GLfloat* values)
+{
+   VertexAttrib4f_nopos(indx, values[0], 0.0f, 0.0f, 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib2f(GLuint indx, GLfloat x, GLfloat y)
+{
+   VertexAttrib4f_nopos(indx, x, y, 0.0f, 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib2fv(GLuint indx, const GLfloat* values)
+{
+   VertexAttrib4f_nopos(indx, values[0], values[1], 0.0f, 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib3f(GLuint indx, GLfloat x, GLfloat y, GLfloat z)
+{
+   VertexAttrib4f_nopos(indx, x, y, z, 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib3fv(GLuint indx, const GLfloat* values)
+{
+   VertexAttrib4f_nopos(indx, values[0], values[1], values[2], 1.0f);
+}
+
+
+void GLAPIENTRY
+_es_VertexAttrib4fv(GLuint indx, const GLfloat* values)
+{
+   VertexAttrib4f_nopos(indx, values[0], values[1], values[2], values[3]);
+}
diff --git a/mesalib/src/mesa/vbo/vbo_exec_draw.c b/mesalib/src/mesa/vbo/vbo_exec_draw.c
index f8be83ea8..539658021 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_draw.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_draw.c
@@ -253,6 +253,9 @@ vbo_exec_bind_arrays( struct gl_context *ctx )
 }
 
 
+/**
+ * Unmap the VBO.  This is called before drawing.
+ */
 static void
 vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
 {
@@ -285,6 +288,9 @@ vbo_exec_vtx_unmap( struct vbo_exec_context *exec )
 }
 
 
+/**
+ * Map the vertex buffer to begin storing glVertex, glColor, etc data.
+ */
 void
 vbo_exec_vtx_map( struct vbo_exec_context *exec )
 {
@@ -301,14 +307,12 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
    if (!_mesa_is_bufferobj(exec->vtx.bufferobj))
       return;
 
-   if (exec->vtx.buffer_map != NULL) {
-      assert(0);
-      exec->vtx.buffer_map = NULL;
-      exec->vtx.buffer_ptr = NULL;
-   }
+   assert(!exec->vtx.buffer_map);
+   assert(!exec->vtx.buffer_ptr);
 
    if (VBO_VERT_BUFFER_SIZE > exec->vtx.buffer_used + 1024 &&
        ctx->Driver.MapBufferRange) {
+      /* The VBO exists and there's room for more */
       exec->vtx.buffer_map = 
          (GLfloat *)ctx->Driver.MapBufferRange(ctx, 
                                                target, 
@@ -321,6 +325,7 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
    }
    
    if (!exec->vtx.buffer_map) {
+      /* Need to allocate a new VBO */
       exec->vtx.buffer_used = 0;
 
       ctx->Driver.BufferData(ctx, target, 
@@ -349,9 +354,10 @@ vbo_exec_vtx_map( struct vbo_exec_context *exec )
 
 /**
  * Execute the buffer and save copied verts.
+ * \param keep_unmapped  if true, leave the VBO unmapped when we're done.
  */
 void
-vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
+vbo_exec_vtx_flush(struct vbo_exec_context *exec, GLboolean keepUnmapped)
 {
    if (0)
       vbo_exec_debug_verts( exec );
@@ -391,7 +397,7 @@ vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
 
 	 /* If using a real VBO, get new storage -- unless asked not to.
           */
-         if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !unmap) {
+         if (_mesa_is_bufferobj(exec->vtx.bufferobj) && !keepUnmapped) {
             vbo_exec_vtx_map( exec );
          }
       }
@@ -399,14 +405,13 @@ vbo_exec_vtx_flush( struct vbo_exec_context *exec, GLboolean unmap )
 
    /* May have to unmap explicitly if we didn't draw:
     */
-   if (unmap && 
+   if (keepUnmapped &&
        _mesa_is_bufferobj(exec->vtx.bufferobj) &&
        exec->vtx.buffer_map) {
       vbo_exec_vtx_unmap( exec );
    }
 
-
-   if (unmap || exec->vtx.vertex_size == 0)
+   if (keepUnmapped || exec->vtx.vertex_size == 0)
       exec->vtx.max_vert = 0;
    else
       exec->vtx.max_vert = ((VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / 
diff --git a/pixman/configure.ac b/pixman/configure.ac
index 5242799bb..8d96647f9 100644
--- a/pixman/configure.ac
+++ b/pixman/configure.ac
@@ -326,7 +326,7 @@ if test "x$SSE2_CFLAGS" = "x" ; then
          SSE2_CFLAGS="-xarch=sse2"
       fi
    else
-      SSE2_CFLAGS="-mmmx -msse2 -Winline"
+      SSE2_CFLAGS="-msse2 -Winline"
    fi
 fi
 
diff --git a/pixman/pixman/pixman-sse2.c b/pixman/pixman/pixman-sse2.c
index 2e135e2fe..88287b453 100644
--- a/pixman/pixman/pixman-sse2.c
+++ b/pixman/pixman/pixman-sse2.c
@@ -30,36 +30,12 @@
 #include <config.h>
 #endif
 
-#include <mmintrin.h>
 #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
 #include <emmintrin.h> /* for SSE2 intrinsics */
 #include "pixman-private.h"
 #include "pixman-combine32.h"
 #include "pixman-fast-path.h"
 
-#if defined(_MSC_VER) && defined(_M_AMD64)
-/* Windows 64 doesn't allow MMX to be used, so
- * the pixman-x64-mmx-emulation.h file contains
- * implementations of those MMX intrinsics that
- * are used in the SSE2 implementation.
- */
-#   include "pixman-x64-mmx-emulation.h"
-#endif
-
-#ifdef USE_SSE2
-
-/* --------------------------------------------------------------------
- * Locals
- */
-
-static __m64 mask_x0080;
-static __m64 mask_x00ff;
-static __m64 mask_x0101;
-static __m64 mask_x_alpha;
-
-static __m64 mask_x565_rgb;
-static __m64 mask_x565_unpack;
-
 static __m128i mask_0080;
 static __m128i mask_00ff;
 static __m128i mask_0101;
@@ -77,9 +53,6 @@ static __m128i mask_blue;
 static __m128i mask_565_fix_rb;
 static __m128i mask_565_fix_g;
 
-/* ----------------------------------------------------------------------
- * SSE2 Inlines
- */
 static force_inline __m128i
 unpack_32_1x128 (uint32_t data)
 {
@@ -397,146 +370,104 @@ save_128_unaligned (__m128i* dst,
     _mm_storeu_si128 (dst, data);
 }
 
-/* ------------------------------------------------------------------
- * MMX inlines
- */
-
-static force_inline __m64
-load_32_1x64 (uint32_t data)
-{
-    return _mm_cvtsi32_si64 (data);
-}
-
-static force_inline __m64
-unpack_32_1x64 (uint32_t data)
-{
-    return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
-}
-
-static force_inline __m64
-expand_alpha_1x64 (__m64 data)
+static force_inline __m128i
+load_32_1x128 (uint32_t data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3));
+    return _mm_cvtsi32_si128 (data);
 }
 
-static force_inline __m64
-expand_alpha_rev_1x64 (__m64 data)
+static force_inline __m128i
+expand_alpha_rev_1x128 (__m128i data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
+    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
 }
 
-static force_inline __m64
-expand_pixel_8_1x64 (uint8_t data)
+static force_inline __m128i
+expand_pixel_8_1x128 (uint8_t data)
 {
-    return _mm_shuffle_pi16 (
-	unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
+    return _mm_shufflelo_epi16 (
+	unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
 }
 
-static force_inline __m64
-pix_multiply_1x64 (__m64 data,
-                   __m64 alpha)
+static force_inline __m128i
+pix_multiply_1x128 (__m128i data,
+		    __m128i alpha)
 {
-    return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
-                                          mask_x0080),
-                           mask_x0101);
+    return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha),
+					    mask_0080),
+			    mask_0101);
 }
 
-static force_inline __m64
-pix_add_multiply_1x64 (__m64* src,
-                       __m64* alpha_dst,
-                       __m64* dst,
-                       __m64* alpha_src)
+static force_inline __m128i
+pix_add_multiply_1x128 (__m128i* src,
+			__m128i* alpha_dst,
+			__m128i* dst,
+			__m128i* alpha_src)
 {
-    __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);
-    __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);
+    __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst);
+    __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src);
 
-    return _mm_adds_pu8 (t1, t2);
+    return _mm_adds_epu8 (t1, t2);
 }
 
-static force_inline __m64
-negate_1x64 (__m64 data)
+static force_inline __m128i
+negate_1x128 (__m128i data)
 {
-    return _mm_xor_si64 (data, mask_x00ff);
+    return _mm_xor_si128 (data, mask_00ff);
 }
 
-static force_inline __m64
-invert_colors_1x64 (__m64 data)
+static force_inline __m128i
+invert_colors_1x128 (__m128i data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
+    return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
 }
 
-static force_inline __m64
-over_1x64 (__m64 src, __m64 alpha, __m64 dst)
+static force_inline __m128i
+over_1x128 (__m128i src, __m128i alpha, __m128i dst)
 {
-    return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));
+    return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha)));
 }
 
-static force_inline __m64
-in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)
+static force_inline __m128i
+in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst)
 {
-    return over_1x64 (pix_multiply_1x64 (*src, *mask),
-                      pix_multiply_1x64 (*alpha, *mask),
-                      *dst);
+    return over_1x128 (pix_multiply_1x128 (*src, *mask),
+		       pix_multiply_1x128 (*alpha, *mask),
+		       *dst);
 }
 
-static force_inline __m64
-over_rev_non_pre_1x64 (__m64 src, __m64 dst)
+static force_inline __m128i
+over_rev_non_pre_1x128 (__m128i src, __m128i dst)
 {
-    __m64 alpha = expand_alpha_1x64 (src);
+    __m128i alpha = expand_alpha_1x128 (src);
 
-    return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
-                                         _mm_or_si64 (alpha, mask_x_alpha)),
-                      alpha,
-                      dst);
+    return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src),
+					   _mm_or_si128 (alpha, mask_alpha)),
+		       alpha,
+		       dst);
 }
 
 static force_inline uint32_t
-pack_1x64_32 (__m64 data)
+pack_1x128_32 (__m128i data)
 {
-    return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
+    return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ()));
 }
 
-/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
- *
- *    00RR00GG00BB
- *
- * --- Expanding 565 in the low word ---
- *
- * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
- * m = m & (01f0003f001f);
- * m = m * (008404100840);
- * m = m >> 8;
- *
- * Note the trick here - the top word is shifted by another nibble to
- * avoid it bumping into the middle word
- */
-static force_inline __m64
-expand565_16_1x64 (uint16_t pixel)
+static force_inline __m128i
+expand565_16_1x128 (uint16_t pixel)
 {
-    __m64 p;
-    __m64 t1, t2;
+    __m128i m = _mm_cvtsi32_si128 (pixel);
 
-    p = _mm_cvtsi32_si64 ((uint32_t) pixel);
+    m = unpack_565_to_8888 (m);
 
-    t1 = _mm_slli_si64 (p, 36 - 11);
-    t2 = _mm_slli_si64 (p, 16 - 5);
-
-    p = _mm_or_si64 (t1, p);
-    p = _mm_or_si64 (t2, p);
-    p = _mm_and_si64 (p, mask_x565_rgb);
-    p = _mm_mullo_pi16 (p, mask_x565_unpack);
-
-    return _mm_srli_pi16 (p, 8);
+    return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
 }
 
-/* ----------------------------------------------------------------------------
- * Compose Core transformations
- */
 static force_inline uint32_t
 core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
 {
     uint8_t a;
-    __m64 ms;
+    __m128i xmms;
 
     a = src >> 24;
 
@@ -546,9 +477,10 @@ core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
     }
     else if (src)
     {
-	ms = unpack_32_1x64 (src);
-	return pack_1x64_32 (
-	    over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));
+	xmms = unpack_32_1x128 (src);
+	return pack_1x128_32 (
+	    over_1x128 (xmms, expand_alpha_1x128 (xmms),
+			unpack_32_1x128 (dst)));
     }
 
     return dst;
@@ -561,15 +493,15 @@ combine1 (const uint32_t *ps, const uint32_t *pm)
 
     if (pm)
     {
-	__m64 ms, mm;
+	__m128i ms, mm;
 
-	mm = unpack_32_1x64 (*pm);
-	mm = expand_alpha_1x64 (mm);
+	mm = unpack_32_1x128 (*pm);
+	mm = expand_alpha_1x128 (mm);
 
-	ms = unpack_32_1x64 (s);
-	ms = pix_multiply_1x64 (ms, mm);
+	ms = unpack_32_1x128 (s);
+	ms = pix_multiply_1x128 (ms, mm);
 
-	s = pack_1x64_32 (ms);
+	s = pack_1x128_32 (ms);
     }
 
     return s;
@@ -766,10 +698,12 @@ core_combine_over_u_sse2_no_mask (uint32_t *	  pd,
 }
 
 static force_inline void
-core_combine_over_u_sse2 (uint32_t*       pd,
-                          const uint32_t* ps,
-                          const uint32_t* pm,
-                          int             w)
+sse2_combine_over_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     if (pm)
 	core_combine_over_u_sse2_mask (pd, ps, pm, w);
@@ -777,11 +711,13 @@ core_combine_over_u_sse2 (uint32_t*       pd,
 	core_combine_over_u_sse2_no_mask (pd, ps, w);
 }
 
-static force_inline void
-core_combine_over_reverse_u_sse2 (uint32_t*       pd,
-                                  const uint32_t* ps,
-                                  const uint32_t* pm,
-                                  int             w)
+static void
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
 {
     uint32_t s, d;
 
@@ -847,7 +783,7 @@ core_combine_over_reverse_u_sse2 (uint32_t*       pd,
 }
 
 static force_inline uint32_t
-core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
+core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst)
 {
     uint32_t maska = src >> 24;
 
@@ -857,19 +793,21 @@ core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
     }
     else if (maska != 0xff)
     {
-	return pack_1x64_32 (
-	    pix_multiply_1x64 (unpack_32_1x64 (dst),
-			       expand_alpha_1x64 (unpack_32_1x64 (src))));
+	return pack_1x128_32 (
+	    pix_multiply_1x128 (unpack_32_1x128 (dst),
+				expand_alpha_1x128 (unpack_32_1x128 (src))));
     }
 
     return dst;
 }
 
-static force_inline void
-core_combine_in_u_sse2 (uint32_t*       pd,
-                        const uint32_t* ps,
-                        const uint32_t* pm,
-                        int             w)
+static void
+sse2_combine_in_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               pd,
+                   const uint32_t *         ps,
+                   const uint32_t *         pm,
+                   int                      w)
 {
     uint32_t s, d;
 
@@ -881,7 +819,7 @@ core_combine_in_u_sse2 (uint32_t*       pd,
 	s = combine1 (ps, pm);
 	d = *pd;
 
-	*pd++ = core_combine_in_u_pixelsse2 (d, s);
+	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
 	w--;
 	ps++;
 	if (pm)
@@ -916,7 +854,7 @@ core_combine_in_u_sse2 (uint32_t*       pd,
 	s = combine1 (ps, pm);
 	d = *pd;
 
-	*pd++ = core_combine_in_u_pixelsse2 (d, s);
+	*pd++ = core_combine_in_u_pixel_sse2 (d, s);
 	w--;
 	ps++;
 	if (pm)
@@ -924,11 +862,13 @@ core_combine_in_u_sse2 (uint32_t*       pd,
     }
 }
 
-static force_inline void
-core_combine_reverse_in_u_sse2 (uint32_t*       pd,
-                                const uint32_t* ps,
-                                const uint32_t *pm,
-                                int             w)
+static void
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               pd,
+                           const uint32_t *         ps,
+                           const uint32_t *         pm,
+                           int                      w)
 {
     uint32_t s, d;
 
@@ -940,7 +880,7 @@ core_combine_reverse_in_u_sse2 (uint32_t*       pd,
 	s = combine1 (ps, pm);
 	d = *pd;
 
-	*pd++ = core_combine_in_u_pixelsse2 (s, d);
+	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
 	ps++;
 	w--;
 	if (pm)
@@ -975,7 +915,7 @@ core_combine_reverse_in_u_sse2 (uint32_t*       pd,
 	s = combine1 (ps, pm);
 	d = *pd;
 
-	*pd++ = core_combine_in_u_pixelsse2 (s, d);
+	*pd++ = core_combine_in_u_pixel_sse2 (s, d);
 	w--;
 	ps++;
 	if (pm)
@@ -983,21 +923,23 @@ core_combine_reverse_in_u_sse2 (uint32_t*       pd,
     }
 }
 
-static force_inline void
-core_combine_reverse_out_u_sse2 (uint32_t*       pd,
-                                 const uint32_t* ps,
-                                 const uint32_t* pm,
-                                 int             w)
+static void
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               pd,
+                            const uint32_t *         ps,
+                            const uint32_t *         pm,
+                            int                      w)
 {
     while (w && ((unsigned long) pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d), negate_1x64 (
-		    expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d), negate_1x128 (
+		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
 
 	if (pm)
 	    pm++;
@@ -1039,10 +981,10 @@ core_combine_reverse_out_u_sse2 (uint32_t*       pd,
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d), negate_1x64 (
-		    expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d), negate_1x128 (
+		    expand_alpha_1x128 (unpack_32_1x128 (s)))));
 	ps++;
 	if (pm)
 	    pm++;
@@ -1050,21 +992,23 @@ core_combine_reverse_out_u_sse2 (uint32_t*       pd,
     }
 }
 
-static force_inline void
-core_combine_out_u_sse2 (uint32_t*       pd,
-                         const uint32_t* ps,
-                         const uint32_t* pm,
-                         int             w)
+static void
+sse2_combine_out_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               pd,
+                    const uint32_t *         ps,
+                    const uint32_t *         pm,
+                    int                      w)
 {
     while (w && ((unsigned long) pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (s), negate_1x64 (
-		    expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (s), negate_1x128 (
+		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
 	w--;
 	ps++;
 	if (pm)
@@ -1104,10 +1048,10 @@ core_combine_out_u_sse2 (uint32_t*       pd,
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (s), negate_1x64 (
-		    expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (s), negate_1x128 (
+		    expand_alpha_1x128 (unpack_32_1x128 (d)))));
 	w--;
 	ps++;
 	if (pm)
@@ -1119,20 +1063,22 @@ static force_inline uint32_t
 core_combine_atop_u_pixel_sse2 (uint32_t src,
                                 uint32_t dst)
 {
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
 
-    __m64 sa = negate_1x64 (expand_alpha_1x64 (s));
-    __m64 da = expand_alpha_1x64 (d);
+    __m128i sa = negate_1x128 (expand_alpha_1x128 (s));
+    __m128i da = expand_alpha_1x128 (d);
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
 }
 
-static force_inline void
-core_combine_atop_u_sse2 (uint32_t*       pd,
-                          const uint32_t* ps,
-                          const uint32_t* pm,
-                          int             w)
+static void
+sse2_combine_atop_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     uint32_t s, d;
 
@@ -1201,20 +1147,22 @@ static force_inline uint32_t
 core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
                                         uint32_t dst)
 {
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
 
-    __m64 sa = expand_alpha_1x64 (s);
-    __m64 da = negate_1x64 (expand_alpha_1x64 (d));
+    __m128i sa = expand_alpha_1x128 (s);
+    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa));
 }
 
-static force_inline void
-core_combine_reverse_atop_u_sse2 (uint32_t*       pd,
-                                  const uint32_t* ps,
-                                  const uint32_t* pm,
-                                  int             w)
+static void
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
 {
     uint32_t s, d;
 
@@ -1283,20 +1231,22 @@ static force_inline uint32_t
 core_combine_xor_u_pixel_sse2 (uint32_t src,
                                uint32_t dst)
 {
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
 
-    __m64 neg_d = negate_1x64 (expand_alpha_1x64 (d));
-    __m64 neg_s = negate_1x64 (expand_alpha_1x64 (s));
+    __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d));
+    __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s));
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&s, &neg_d, &d, &neg_s));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s));
 }
 
-static force_inline void
-core_combine_xor_u_sse2 (uint32_t*       dst,
-                         const uint32_t* src,
-                         const uint32_t *mask,
-                         int             width)
+static void
+sse2_combine_xor_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int w = width;
     uint32_t s, d;
@@ -1368,10 +1318,12 @@ core_combine_xor_u_sse2 (uint32_t*       dst,
 }
 
 static force_inline void
-core_combine_add_u_sse2 (uint32_t*       dst,
-                         const uint32_t* src,
-                         const uint32_t* mask,
-                         int             width)
+sse2_combine_add_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int w = width;
     uint32_t s, d;
@@ -1387,8 +1339,8 @@ core_combine_add_u_sse2 (uint32_t*       dst,
 	ps++;
 	if (pm)
 	    pm++;
-	*pd++ = _mm_cvtsi64_si32 (
-	    _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+	*pd++ = _mm_cvtsi128_si32 (
+	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
 	w--;
     }
 
@@ -1414,8 +1366,8 @@ core_combine_add_u_sse2 (uint32_t*       dst,
 	d = *pd;
 
 	ps++;
-	*pd++ = _mm_cvtsi64_si32 (
-	    _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+	*pd++ = _mm_cvtsi128_si32 (
+	    _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d)));
 	if (pm)
 	    pm++;
     }
@@ -1425,25 +1377,27 @@ static force_inline uint32_t
 core_combine_saturate_u_pixel_sse2 (uint32_t src,
                                     uint32_t dst)
 {
-    __m64 ms = unpack_32_1x64 (src);
-    __m64 md = unpack_32_1x64 (dst);
+    __m128i ms = unpack_32_1x128 (src);
+    __m128i md = unpack_32_1x128 (dst);
     uint32_t sa = src >> 24;
     uint32_t da = ~dst >> 24;
 
     if (sa > da)
     {
-	ms = pix_multiply_1x64 (
-	    ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa) << 24)));
+	ms = pix_multiply_1x128 (
+	    ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa) << 24)));
     }
 
-    return pack_1x64_32 (_mm_adds_pu16 (md, ms));
+    return pack_1x128_32 (_mm_adds_epu16 (md, ms));
 }
 
-static force_inline void
-core_combine_saturate_u_sse2 (uint32_t *      pd,
-                              const uint32_t *ps,
-                              const uint32_t *pm,
-                              int             w)
+static void
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *               pd,
+                         const uint32_t *         ps,
+                         const uint32_t *         pm,
+                         int                      w)
 {
     uint32_t s, d;
 
@@ -1524,11 +1478,13 @@ core_combine_saturate_u_sse2 (uint32_t *      pd,
     }
 }
 
-static force_inline void
-core_combine_src_ca_sse2 (uint32_t*       pd,
-                          const uint32_t* ps,
-                          const uint32_t *pm,
-                          int             w)
+static void
+sse2_combine_src_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     uint32_t s, m;
 
@@ -1540,8 +1496,8 @@ core_combine_src_ca_sse2 (uint32_t*       pd,
     {
 	s = *ps++;
 	m = *pm++;
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
 	w--;
     }
 
@@ -1570,8 +1526,8 @@ core_combine_src_ca_sse2 (uint32_t*       pd,
     {
 	s = *ps++;
 	m = *pm++;
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)));
 	w--;
     }
 }
@@ -1581,19 +1537,21 @@ core_combine_over_ca_pixel_sse2 (uint32_t src,
                                  uint32_t mask,
                                  uint32_t dst)
 {
-    __m64 s = unpack_32_1x64 (src);
-    __m64 expAlpha = expand_alpha_1x64 (s);
-    __m64 unpk_mask = unpack_32_1x64 (mask);
-    __m64 unpk_dst  = unpack_32_1x64 (dst);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i expAlpha = expand_alpha_1x128 (s);
+    __m128i unpk_mask = unpack_32_1x128 (mask);
+    __m128i unpk_dst  = unpack_32_1x128 (dst);
 
-    return pack_1x64_32 (in_over_1x64 (&s, &expAlpha, &unpk_mask, &unpk_dst));
+    return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst));
 }
 
-static force_inline void
-core_combine_over_ca_sse2 (uint32_t*       pd,
-                           const uint32_t* ps,
-                           const uint32_t *pm,
-                           int             w)
+static void
+sse2_combine_over_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               pd,
+                      const uint32_t *         ps,
+                      const uint32_t *         pm,
+                      int                      w)
 {
     uint32_t s, m, d;
 
@@ -1655,19 +1613,21 @@ core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
                                          uint32_t mask,
                                          uint32_t dst)
 {
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i d = unpack_32_1x128 (dst);
 
-    return pack_1x64_32 (
-	over_1x64 (d, expand_alpha_1x64 (d),
-		   pix_multiply_1x64 (unpack_32_1x64 (src),
-				      unpack_32_1x64 (mask))));
+    return pack_1x128_32 (
+	over_1x128 (d, expand_alpha_1x128 (d),
+		    pix_multiply_1x128 (unpack_32_1x128 (src),
+					unpack_32_1x128 (mask))));
 }
 
-static force_inline void
-core_combine_over_reverse_ca_sse2 (uint32_t*       pd,
-                                   const uint32_t* ps,
-                                   const uint32_t *pm,
-                                   int             w)
+static void
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               pd,
+                              const uint32_t *         ps,
+                              const uint32_t *         pm,
+                              int                      w)
 {
     uint32_t s, m, d;
 
@@ -1726,11 +1686,13 @@ core_combine_over_reverse_ca_sse2 (uint32_t*       pd,
     }
 }
 
-static force_inline void
-core_combine_in_ca_sse2 (uint32_t *      pd,
-                         const uint32_t *ps,
-                         const uint32_t *pm,
-                         int             w)
+static void
+sse2_combine_in_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               pd,
+                    const uint32_t *         ps,
+                    const uint32_t *         pm,
+                    int                      w)
 {
     uint32_t s, m, d;
 
@@ -1745,10 +1707,10 @@ core_combine_in_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
-		expand_alpha_1x64 (unpack_32_1x64 (d))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)),
+		expand_alpha_1x128 (unpack_32_1x128 (d))));
 
 	w--;
     }
@@ -1789,21 +1751,23 @@ core_combine_in_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		pix_multiply_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
-		expand_alpha_1x64 (unpack_32_1x64 (d))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		pix_multiply_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+		expand_alpha_1x128 (unpack_32_1x128 (d))));
 
 	w--;
     }
 }
 
-static force_inline void
-core_combine_in_reverse_ca_sse2 (uint32_t *      pd,
-                                 const uint32_t *ps,
-                                 const uint32_t *pm,
-                                 int             w)
+static void
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               pd,
+                            const uint32_t *         ps,
+                            const uint32_t *         pm,
+                            int                      w)
 {
     uint32_t s, m, d;
 
@@ -1818,11 +1782,11 @@ core_combine_in_reverse_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d),
-		pix_multiply_1x64 (unpack_32_1x64 (m),
-				   expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d),
+		pix_multiply_1x128 (unpack_32_1x128 (m),
+				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
 	w--;
     }
 
@@ -1861,20 +1825,22 @@ core_combine_in_reverse_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d),
-		pix_multiply_1x64 (unpack_32_1x64 (m),
-				   expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d),
+		pix_multiply_1x128 (unpack_32_1x128 (m),
+				   expand_alpha_1x128 (unpack_32_1x128 (s)))));
 	w--;
     }
 }
 
-static force_inline void
-core_combine_out_ca_sse2 (uint32_t *      pd,
-                          const uint32_t *ps,
-                          const uint32_t *pm,
-                          int             w)
+static void
+sse2_combine_out_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     uint32_t s, m, d;
 
@@ -1889,11 +1855,11 @@ core_combine_out_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		pix_multiply_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
-		negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		pix_multiply_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
 	w--;
     }
 
@@ -1934,21 +1900,23 @@ core_combine_out_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		pix_multiply_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
-		negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		pix_multiply_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (m)),
+		negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d)))));
 
 	w--;
     }
 }
 
-static force_inline void
-core_combine_out_reverse_ca_sse2 (uint32_t *      pd,
-                                  const uint32_t *ps,
-                                  const uint32_t *pm,
-                                  int             w)
+static void
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               pd,
+                             const uint32_t *         ps,
+                             const uint32_t *         pm,
+                             int                      w)
 {
     uint32_t s, m, d;
 
@@ -1963,12 +1931,12 @@ core_combine_out_reverse_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d),
-		negate_1x64 (pix_multiply_1x64 (
-				 unpack_32_1x64 (m),
-				 expand_alpha_1x64 (unpack_32_1x64 (s))))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d),
+		negate_1x128 (pix_multiply_1x128 (
+				 unpack_32_1x128 (m),
+				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
 	w--;
     }
 
@@ -2011,12 +1979,12 @@ core_combine_out_reverse_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    pix_multiply_1x64 (
-		unpack_32_1x64 (d),
-		negate_1x64 (pix_multiply_1x64 (
-				 unpack_32_1x64 (m),
-				 expand_alpha_1x64 (unpack_32_1x64 (s))))));
+	*pd++ = pack_1x128_32 (
+	    pix_multiply_1x128 (
+		unpack_32_1x128 (d),
+		negate_1x128 (pix_multiply_1x128 (
+				 unpack_32_1x128 (m),
+				 expand_alpha_1x128 (unpack_32_1x128 (s))))));
 	w--;
     }
 }
@@ -2026,23 +1994,25 @@ core_combine_atop_ca_pixel_sse2 (uint32_t src,
                                  uint32_t mask,
                                  uint32_t dst)
 {
-    __m64 m = unpack_32_1x64 (mask);
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
-    __m64 sa = expand_alpha_1x64 (s);
-    __m64 da = expand_alpha_1x64 (d);
+    __m128i m = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
+    __m128i sa = expand_alpha_1x128 (s);
+    __m128i da = expand_alpha_1x128 (d);
 
-    s = pix_multiply_1x64 (s, m);
-    m = negate_1x64 (pix_multiply_1x64 (m, sa));
+    s = pix_multiply_1x128 (s, m);
+    m = negate_1x128 (pix_multiply_1x128 (m, sa));
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
 }
 
-static force_inline void
-core_combine_atop_ca_sse2 (uint32_t *      pd,
-                           const uint32_t *ps,
-                           const uint32_t *pm,
-                           int             w)
+static void
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               pd,
+                      const uint32_t *         ps,
+                      const uint32_t *         pm,
+                      int                      w)
 {
     uint32_t s, m, d;
 
@@ -2116,24 +2086,26 @@ core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
                                          uint32_t mask,
                                          uint32_t dst)
 {
-    __m64 m = unpack_32_1x64 (mask);
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i m = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
 
-    __m64 da = negate_1x64 (expand_alpha_1x64 (d));
-    __m64 sa = expand_alpha_1x64 (s);
+    __m128i da = negate_1x128 (expand_alpha_1x128 (d));
+    __m128i sa = expand_alpha_1x128 (s);
 
-    s = pix_multiply_1x64 (s, m);
-    m = pix_multiply_1x64 (m, sa);
+    s = pix_multiply_1x128 (s, m);
+    m = pix_multiply_1x128 (m, sa);
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da));
 }
 
-static force_inline void
-core_combine_reverse_atop_ca_sse2 (uint32_t *      pd,
-                                   const uint32_t *ps,
-                                   const uint32_t *pm,
-                                   int             w)
+static void
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               pd,
+                              const uint32_t *         ps,
+                              const uint32_t *         pm,
+                              int                      w)
 {
     uint32_t s, m, d;
 
@@ -2208,26 +2180,28 @@ core_combine_xor_ca_pixel_sse2 (uint32_t src,
                                 uint32_t mask,
                                 uint32_t dst)
 {
-    __m64 a = unpack_32_1x64 (mask);
-    __m64 s = unpack_32_1x64 (src);
-    __m64 d = unpack_32_1x64 (dst);
+    __m128i a = unpack_32_1x128 (mask);
+    __m128i s = unpack_32_1x128 (src);
+    __m128i d = unpack_32_1x128 (dst);
 
-    __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (
-				       a, expand_alpha_1x64 (s)));
-    __m64 dest      = pix_multiply_1x64 (s, a);
-    __m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d));
+    __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 (
+				       a, expand_alpha_1x128 (s)));
+    __m128i dest      = pix_multiply_1x128 (s, a);
+    __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d));
 
-    return pack_1x64_32 (pix_add_multiply_1x64 (&d,
+    return pack_1x128_32 (pix_add_multiply_1x128 (&d,
                                                 &alpha_dst,
                                                 &dest,
                                                 &alpha_src));
 }
 
-static force_inline void
-core_combine_xor_ca_sse2 (uint32_t *      pd,
-                          const uint32_t *ps,
-                          const uint32_t *pm,
-                          int             w)
+static void
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     uint32_t s, m, d;
 
@@ -2299,11 +2273,13 @@ core_combine_xor_ca_sse2 (uint32_t *      pd,
     }
 }
 
-static force_inline void
-core_combine_add_ca_sse2 (uint32_t *      pd,
-                          const uint32_t *ps,
-                          const uint32_t *pm,
-                          int             w)
+static void
+sse2_combine_add_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               pd,
+                     const uint32_t *         ps,
+                     const uint32_t *         pm,
+                     int                      w)
 {
     uint32_t s, m, d;
 
@@ -2317,10 +2293,10 @@ core_combine_add_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
-					     unpack_32_1x64 (m)),
-			  unpack_32_1x64 (d)));
+	*pd++ = pack_1x128_32 (
+	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+					       unpack_32_1x128 (m)),
+			   unpack_32_1x128 (d)));
 	w--;
     }
 
@@ -2355,36 +2331,20 @@ core_combine_add_ca_sse2 (uint32_t *      pd,
 	m = *pm++;
 	d = *pd;
 
-	*pd++ = pack_1x64_32 (
-	    _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
-					     unpack_32_1x64 (m)),
-			  unpack_32_1x64 (d)));
+	*pd++ = pack_1x128_32 (
+	    _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s),
+					       unpack_32_1x128 (m)),
+			   unpack_32_1x128 (d)));
 	w--;
     }
 }
 
-/* ---------------------------------------------------
- * fb_compose_setup_sSE2
- */
-static force_inline __m64
-create_mask_16_64 (uint16_t mask)
-{
-    return _mm_set1_pi16 (mask);
-}
-
 static force_inline __m128i
 create_mask_16_128 (uint16_t mask)
 {
     return _mm_set1_epi16 (mask);
 }
 
-static force_inline __m64
-create_mask_2x32_64 (uint32_t mask0,
-                     uint32_t mask1)
-{
-    return _mm_set_pi32 (mask0, mask1);
-}
-
 /* Work around a code generation bug in Sun Studio 12. */
 #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
 # define create_mask_2x32_128(mask0, mask1)				\
@@ -2398,276 +2358,6 @@ create_mask_2x32_128 (uint32_t mask0,
 }
 #endif
 
-/* SSE2 code patch for fbcompose.c */
-
-static void
-sse2_combine_over_u (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_over_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dst,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    core_combine_over_reverse_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_in_u (pixman_implementation_t *imp,
-                   pixman_op_t              op,
-                   uint32_t *               dst,
-                   const uint32_t *         src,
-                   const uint32_t *         mask,
-                   int                      width)
-{
-    core_combine_in_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp,
-                           pixman_op_t              op,
-                           uint32_t *               dst,
-                           const uint32_t *         src,
-                           const uint32_t *         mask,
-                           int                      width)
-{
-    core_combine_reverse_in_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_out_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    core_combine_out_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dst,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    core_combine_reverse_out_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_atop_u (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_atop_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dst,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_xor_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    core_combine_xor_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_add_u (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    core_combine_add_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_saturate_u (pixman_implementation_t *imp,
-                         pixman_op_t              op,
-                         uint32_t *               dst,
-                         const uint32_t *         src,
-                         const uint32_t *         mask,
-                         int                      width)
-{
-    core_combine_saturate_u_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_src_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_src_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_over_ca (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               dst,
-                      const uint32_t *         src,
-                      const uint32_t *         mask,
-                      int                      width)
-{
-    core_combine_over_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              uint32_t *               dst,
-                              const uint32_t *         src,
-                              const uint32_t *         mask,
-                              int                      width)
-{
-    core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_in_ca (pixman_implementation_t *imp,
-                    pixman_op_t              op,
-                    uint32_t *               dst,
-                    const uint32_t *         src,
-                    const uint32_t *         mask,
-                    int                      width)
-{
-    core_combine_in_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
-                            pixman_op_t              op,
-                            uint32_t *               dst,
-                            const uint32_t *         src,
-                            const uint32_t *         mask,
-                            int                      width)
-{
-    core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_out_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_out_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
-                             pixman_op_t              op,
-                             uint32_t *               dst,
-                             const uint32_t *         src,
-                             const uint32_t *         mask,
-                             int                      width)
-{
-    core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_atop_ca (pixman_implementation_t *imp,
-                      pixman_op_t              op,
-                      uint32_t *               dst,
-                      const uint32_t *         src,
-                      const uint32_t *         mask,
-                      int                      width)
-{
-    core_combine_atop_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
-                              pixman_op_t              op,
-                              uint32_t *               dst,
-                              const uint32_t *         src,
-                              const uint32_t *         mask,
-                              int                      width)
-{
-    core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_xor_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_xor_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-static void
-sse2_combine_add_ca (pixman_implementation_t *imp,
-                     pixman_op_t              op,
-                     uint32_t *               dst,
-                     const uint32_t *         src,
-                     const uint32_t *         mask,
-                     int                      width)
-{
-    core_combine_add_ca_sse2 (dst, src, mask, width);
-    _mm_empty ();
-}
-
-/* -------------------------------------------------------------------
- * composite_over_n_8888
- */
-
 static void
 sse2_composite_over_n_8888 (pixman_implementation_t *imp,
                             pixman_op_t              op,
@@ -2711,9 +2401,9 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
 	while (w && (unsigned long)dst & 15)
 	{
 	    d = *dst;
-	    *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-	                                      _mm_movepi64_pi64 (xmm_alpha),
-	                                      unpack_32_1x64 (d)));
+	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+						xmm_alpha,
+						unpack_32_1x128 (d)));
 	    w--;
 	}
 
@@ -2738,19 +2428,15 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
 	while (w)
 	{
 	    d = *dst;
-	    *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-	                                      _mm_movepi64_pi64 (xmm_alpha),
-	                                      unpack_32_1x64 (d)));
+	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
+						xmm_alpha,
+						unpack_32_1x128 (d)));
 	    w--;
 	}
 
     }
-    _mm_empty ();
 }
 
-/* ---------------------------------------------------------------------
- * composite_over_n_0565
- */
 static void
 sse2_composite_over_n_0565 (pixman_implementation_t *imp,
                             pixman_op_t              op,
@@ -2796,9 +2482,9 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 	    d = *dst;
 
 	    *dst++ = pack_565_32_16 (
-		pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-					 _mm_movepi64_pi64 (xmm_alpha),
-					 expand565_16_1x64 (d))));
+		pack_1x128_32 (over_1x128 (xmm_src,
+					   xmm_alpha,
+					   expand565_16_1x128 (d))));
 	    w--;
 	}
 
@@ -2829,18 +2515,13 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 	{
 	    d = *dst;
 	    *dst++ = pack_565_32_16 (
-		pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-					 _mm_movepi64_pi64 (xmm_alpha),
-					 expand565_16_1x64 (d))));
+		pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha,
+					   expand565_16_1x128 (d))));
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ------------------------------
- * composite_add_n_8888_8888_ca
- */
 static void
 sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 				   pixman_op_t              op,
@@ -2866,7 +2547,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
     __m128i xmm_dst;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
     src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
     srca = src >> 24;
@@ -2882,8 +2563,8 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
     xmm_src = _mm_unpacklo_epi8 (
 	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
     xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src   = _mm_movepi64_pi64 (xmm_src);
-    mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+    mmx_src   = xmm_src;
+    mmx_alpha = xmm_alpha;
 
     while (height--)
     {
@@ -2902,11 +2583,12 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 	    {
 		d = *pd;
 
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*pd = pack_1x64_32 (
-		    _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
+		*pd = pack_1x128_32 (
+		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+				   mmx_dest));
 	    }
 
 	    pd++;
@@ -2950,11 +2632,12 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 	    {
 		d = *pd;
 
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*pd = pack_1x64_32 (
-		    _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
+		*pd = pack_1x128_32 (
+		    _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+				   mmx_dest));
 	    }
 
 	    pd++;
@@ -2962,13 +2645,8 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ---------------------------------------------------------------------------
- * composite_over_n_8888_8888_ca
- */
-
 static void
 sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
                                     pixman_op_t              op,
@@ -2994,7 +2672,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
     src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
@@ -3009,8 +2687,8 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
     xmm_src = _mm_unpacklo_epi8 (
 	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
     xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src   = _mm_movepi64_pi64 (xmm_src);
-    mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+    mmx_src   = xmm_src;
+    mmx_alpha = xmm_alpha;
 
     while (height--)
     {
@@ -3028,10 +2706,10 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *pd;
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		*pd = pack_1x128_32 (in_over_1x128 (&mmx_src,
 		                                  &mmx_alpha,
 		                                  &mmx_mask,
 		                                  &mmx_dest));
@@ -3078,11 +2756,11 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *pd;
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*pd = pack_1x64_32 (
-		    in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
+		*pd = pack_1x128_32 (
+		    in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
 	    }
 
 	    pd++;
@@ -3090,13 +2768,8 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
 static void
 sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
                                  pixman_op_t              op,
@@ -3148,13 +2821,13 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	    {
 		uint32_t d = *dst;
 		
-		__m64 ms = unpack_32_1x64 (s);
-		__m64 alpha    = expand_alpha_1x64 (ms);
-		__m64 dest     = _mm_movepi64_pi64 (xmm_mask);
-		__m64 alpha_dst = unpack_32_1x64 (d);
+		__m128i ms = unpack_32_1x128 (s);
+		__m128i alpha    = expand_alpha_1x128 (ms);
+		__m128i dest     = xmm_mask;
+		__m128i alpha_dst = unpack_32_1x128 (d);
 		
-		*dst = pack_1x64_32 (
-		    in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+		*dst = pack_1x128_32 (
+		    in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
 	    }
 	    dst++;
 	    w--;
@@ -3195,13 +2868,13 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	    {
 		uint32_t d = *dst;
 		
-		__m64 ms = unpack_32_1x64 (s);
-		__m64 alpha = expand_alpha_1x64 (ms);
-		__m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-		__m64 dest  = unpack_32_1x64 (d);
+		__m128i ms = unpack_32_1x128 (s);
+		__m128i alpha = expand_alpha_1x128 (ms);
+		__m128i mask  = xmm_mask;
+		__m128i dest  = unpack_32_1x128 (d);
 		
-		*dst = pack_1x64_32 (
-		    in_over_1x64 (&ms, &alpha, &mask, &dest));
+		*dst = pack_1x128_32 (
+		    in_over_1x128 (&ms, &alpha, &mask, &dest));
 	    }
 
 	    dst++;
@@ -3209,13 +2882,8 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/*---------------------------------------------------------------------
- * composite_over_8888_n_8888
- */
-
 static void
 sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
 			      pixman_op_t              op,
@@ -3282,12 +2950,8 @@ sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ---------------------------------------------------------------------
- * composite_over_x888_n_8888
- */
 static void
 sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
                                  pixman_op_t              op,
@@ -3336,13 +3000,13 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	    uint32_t s = (*src++) | 0xff000000;
 	    uint32_t d = *dst;
 
-	    __m64 src   = unpack_32_1x64 (s);
-	    __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
-	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 dest  = unpack_32_1x64 (d);
+	    __m128i src   = unpack_32_1x128 (s);
+	    __m128i alpha = xmm_alpha;
+	    __m128i mask  = xmm_mask;
+	    __m128i dest  = unpack_32_1x128 (d);
 
-	    *dst++ = pack_1x64_32 (
-		in_over_1x64 (&src, &alpha, &mask, &dest));
+	    *dst++ = pack_1x128_32 (
+		in_over_1x128 (&src, &alpha, &mask, &dest));
 
 	    w--;
 	}
@@ -3375,24 +3039,20 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	    uint32_t s = (*src++) | 0xff000000;
 	    uint32_t d = *dst;
 
-	    __m64 src  = unpack_32_1x64 (s);
-	    __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
-	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 dest  = unpack_32_1x64 (d);
+	    __m128i src  = unpack_32_1x128 (s);
+	    __m128i alpha = xmm_alpha;
+	    __m128i mask  = xmm_mask;
+	    __m128i dest  = unpack_32_1x128 (d);
 
-	    *dst++ = pack_1x64_32 (
-		in_over_1x64 (&src, &alpha, &mask, &dest));
+	    *dst++ = pack_1x128_32 (
+		in_over_1x128 (&src, &alpha, &mask, &dest));
 
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* --------------------------------------------------------------------
- * composite_over_8888_8888
- */
 static void
 sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
                                pixman_op_t              op,
@@ -3422,27 +3082,23 @@ sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
 
     while (height--)
     {
-	core_combine_over_u_sse2 (dst, src, NULL, width);
+	sse2_combine_over_u (imp, op, dst, src, NULL, width);
 
 	dst += dst_stride;
 	src += src_stride;
     }
-    _mm_empty ();
 }
 
-/* ------------------------------------------------------------------
- * composite_over_8888_0565
- */
 static force_inline uint16_t
 composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
 {
-    __m64 ms;
+    __m128i ms;
 
-    ms = unpack_32_1x64 (src);
+    ms = unpack_32_1x128 (src);
     return pack_565_32_16 (
-	pack_1x64_32 (
-	    over_1x64 (
-		ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst))));
+	pack_1x128_32 (
+	    over_1x128 (
+		ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst))));
 }
 
 static void
@@ -3474,15 +3130,6 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-#if 0
-    /* FIXME
-     *
-     * I copy the code from MMX one and keep the fixme.
-     * If it's a problem there, probably is a problem here.
-     */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
     while (height--)
     {
 	dst = dst_line;
@@ -3555,13 +3202,8 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
 static void
 sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
                               pixman_op_t              op,
@@ -3588,7 +3230,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
     src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
@@ -3604,8 +3246,8 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
     xmm_def = create_mask_2x32_128 (src, src);
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src   = _mm_movepi64_pi64 (xmm_src);
-    mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+    mmx_src   = xmm_src;
+    mmx_alpha = xmm_alpha;
 
     while (height--)
     {
@@ -3622,10 +3264,10 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = expand_pixel_8_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = expand_pixel_8_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
 		                                   &mmx_alpha,
 		                                   &mmx_mask,
 		                                   &mmx_dest));
@@ -3677,10 +3319,10 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = expand_pixel_8_1x64 (m);
-		mmx_dest = unpack_32_1x64 (d);
+		mmx_mask = expand_pixel_8_1x128 (m);
+		mmx_dest = unpack_32_1x128 (d);
 
-		*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		*dst = pack_1x128_32 (in_over_1x128 (&mmx_src,
 		                                   &mmx_alpha,
 		                                   &mmx_mask,
 		                                   &mmx_dest));
@@ -3691,14 +3333,9 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ----------------------------------------------------------------
- * composite_over_n_8_8888
- */
-
-pixman_bool_t
+static pixman_bool_t
 pixman_fill_sse2 (uint32_t *bits,
                   int       stride,
                   int       bpp,
@@ -3845,7 +3482,6 @@ pixman_fill_sse2 (uint32_t *bits,
 	}
     }
 
-    _mm_empty ();
     return TRUE;
 }
 
@@ -3907,9 +3543,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		*dst = pack_1x64_32 (
-		    pix_multiply_1x64 (
-			_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+		*dst = pack_1x128_32 (
+		    pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)));
 	    }
 	    else
 	    {
@@ -3962,9 +3597,9 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		*dst = pack_1x64_32 (
-		    pix_multiply_1x64 (
-			_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+		*dst = pack_1x128_32 (
+		    pix_multiply_1x128 (
+			xmm_src, expand_pixel_8_1x128 (m)));
 	    }
 	    else
 	    {
@@ -3976,13 +3611,8 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/*-----------------------------------------------------------------------
- * composite_over_n_8_0565
- */
-
 static void
 sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
                               pixman_op_t              op,
@@ -4004,7 +3634,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
     int dst_stride, mask_stride;
     int32_t w;
     uint32_t m;
-    __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
@@ -4023,8 +3653,8 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src = _mm_movepi64_pi64 (xmm_src);
-    mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+    mmx_src = xmm_src;
+    mmx_alpha = xmm_alpha;
 
     while (height--)
     {
@@ -4041,12 +3671,12 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-		mmx_dest = expand565_16_1x64 (d);
+		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+		mmx_dest = expand565_16_1x128 (d);
 
 		*dst = pack_565_32_16 (
-		    pack_1x64_32 (
-			in_over_1x64 (
+		    pack_1x128_32 (
+			in_over_1x128 (
 			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
 	    }
 
@@ -4114,12 +3744,12 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-		mmx_dest = expand565_16_1x64 (d);
+		mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+		mmx_dest = expand565_16_1x128 (d);
 
 		*dst = pack_565_32_16 (
-		    pack_1x64_32 (
-			in_over_1x64 (
+		    pack_1x128_32 (
+			in_over_1x128 (
 			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
 	    }
 
@@ -4128,13 +3758,8 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -----------------------------------------------------------------------
- * composite_over_pixbuf_0565
- */
-
 static void
 sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
                                  pixman_op_t              op,
@@ -4156,7 +3781,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
     int32_t w;
     uint32_t opaque, zero;
 
-    __m64 ms;
+    __m128i ms;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
@@ -4165,15 +3790,6 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-#if 0
-    /* FIXME
-     *
-     * I copy the code from MMX one and keep the fixme.
-     * If it's a problem there, probably is a problem here.
-     */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
     while (height--)
     {
 	dst = dst_line;
@@ -4187,11 +3803,11 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	    s = *src++;
 	    d = *dst;
 
-	    ms = unpack_32_1x64 (s);
+	    ms = unpack_32_1x128 (s);
 
 	    *dst++ = pack_565_32_16 (
-		pack_1x64_32 (
-		    over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+		pack_1x128_32 (
+		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
 	    w--;
 	}
 
@@ -4253,22 +3869,17 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	    s = *src++;
 	    d = *dst;
 
-	    ms = unpack_32_1x64 (s);
+	    ms = unpack_32_1x128 (s);
 
 	    *dst++ = pack_565_32_16 (
-		pack_1x64_32 (
-		    over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+		pack_1x128_32 (
+		    over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d))));
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------
- * composite_over_pixbuf_8888
- */
-
 static void
 sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
                                  pixman_op_t              op,
@@ -4298,15 +3909,6 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (
 	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-#if 0
-    /* FIXME
-     *
-     * I copy the code from MMX one and keep the fixme.
-     * If it's a problem there, probably is a problem here.
-     */
-    assert (src_image->drawable == mask_image->drawable);
-#endif
-
     while (height--)
     {
 	dst = dst_line;
@@ -4320,9 +3922,9 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	    s = *src++;
 	    d = *dst;
 
-	    *dst++ = pack_1x64_32 (
-		over_rev_non_pre_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = pack_1x128_32 (
+		over_rev_non_pre_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
 
 	    w--;
 	}
@@ -4367,21 +3969,16 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	    s = *src++;
 	    d = *dst;
 
-	    *dst++ = pack_1x64_32 (
-		over_rev_non_pre_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = pack_1x128_32 (
+		over_rev_non_pre_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
 
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * composite_over_n_8888_0565_ca
- */
-
 static void
 sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
                                     pixman_op_t              op,
@@ -4408,7 +4005,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
+    __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
     src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
 
@@ -4422,8 +4019,8 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
-    mmx_src = _mm_movepi64_pi64 (xmm_src);
-    mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
+    mmx_src = xmm_src;
+    mmx_alpha = xmm_alpha;
 
     while (height--)
     {
@@ -4440,12 +4037,12 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = expand565_16_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = expand565_16_1x128 (d);
 
 		*dst = pack_565_32_16 (
-		    pack_1x64_32 (
-			in_over_1x64 (
+		    pack_1x128_32 (
+			in_over_1x128 (
 			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
 	    }
 
@@ -4509,12 +4106,12 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		d = *dst;
-		mmx_mask = unpack_32_1x64 (m);
-		mmx_dest = expand565_16_1x64 (d);
+		mmx_mask = unpack_32_1x128 (m);
+		mmx_dest = expand565_16_1x128 (d);
 
 		*dst = pack_565_32_16 (
-		    pack_1x64_32 (
-			in_over_1x64 (
+		    pack_1x128_32 (
+			in_over_1x128 (
 			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
 	    }
 
@@ -4524,13 +4121,8 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -----------------------------------------------------------------------
- * composite_in_n_8_8
- */
-
 static void
 sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
                          pixman_op_t              op,
@@ -4582,11 +4174,11 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (
-		    pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha),
-				       unpack_32_1x64 (m)),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (
+		    pix_multiply_1x128 (xmm_alpha,
+				       unpack_32_1x128 (m)),
+		    unpack_32_1x128 (d)));
 	    w--;
 	}
 
@@ -4619,22 +4211,17 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (
-		    pix_multiply_1x64 (
-			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (
+		    pix_multiply_1x128 (
+			xmm_alpha, unpack_32_1x128 (m)),
+		    unpack_32_1x128 (d)));
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -----------------------------------------------------------------------
- * composite_in_n_8
- */
-
 static void
 sse2_composite_in_n_8 (pixman_implementation_t *imp,
 		       pixman_op_t              op,
@@ -4689,10 +4276,10 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
 	{
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (
-		    _mm_movepi64_pi64 (xmm_alpha),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (
+		    xmm_alpha,
+		    unpack_32_1x128 (d)));
 	    w--;
 	}
 
@@ -4717,21 +4304,16 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
 	{
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (
-		    _mm_movepi64_pi64 (xmm_alpha),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (
+		    xmm_alpha,
+		    unpack_32_1x128 (d)));
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ---------------------------------------------------------------------------
- * composite_in_8_8
- */
-
 static void
 sse2_composite_in_8_8 (pixman_implementation_t *imp,
                        pixman_op_t              op,
@@ -4774,9 +4356,9 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
 	    s = (uint32_t) *src++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (
-		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (
+		    unpack_32_1x128 (s), unpack_32_1x128 (d)));
 	    w--;
 	}
 
@@ -4805,19 +4387,14 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
 	    s = (uint32_t) *src++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d)));
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
 static void
 sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
 			  pixman_op_t              op,
@@ -4869,11 +4446,11 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		_mm_adds_pu16 (
-		    pix_multiply_1x64 (
-			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		_mm_adds_epu16 (
+		    pix_multiply_1x128 (
+			xmm_alpha, unpack_32_1x128 (m)),
+		    unpack_32_1x128 (d)));
 	    w--;
 	}
 
@@ -4905,23 +4482,18 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
 
-	    *dst++ = (uint8_t) pack_1x64_32 (
-		_mm_adds_pu16 (
-		    pix_multiply_1x64 (
-			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-		    unpack_32_1x64 (d)));
+	    *dst++ = (uint8_t) pack_1x128_32 (
+		_mm_adds_epu16 (
+		    pix_multiply_1x128 (
+			xmm_alpha, unpack_32_1x128 (m)),
+		    unpack_32_1x128 (d)));
 
 	    w--;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------
- * composite_add_n_8_8
- */
-
 static void
 sse2_composite_add_n_8 (pixman_implementation_t *imp,
 			pixman_op_t              op,
@@ -4973,10 +4545,10 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
 
 	while (w && ((unsigned long)dst & 15))
 	{
-	    *dst = (uint8_t)_mm_cvtsi64_si32 (
-		_mm_adds_pu8 (
-		    _mm_movepi64_pi64 (xmm_src),
-		    _mm_cvtsi32_si64 (*dst)));
+	    *dst = (uint8_t)_mm_cvtsi128_si32 (
+		_mm_adds_epu8 (
+		    xmm_src,
+		    _mm_cvtsi32_si128 (*dst)));
 
 	    w--;
 	    dst++;
@@ -4993,23 +4565,18 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
 
 	while (w)
 	{
-	    *dst = (uint8_t)_mm_cvtsi64_si32 (
-		_mm_adds_pu8 (
-		    _mm_movepi64_pi64 (xmm_src),
-		    _mm_cvtsi32_si64 (*dst)));
+	    *dst = (uint8_t)_mm_cvtsi128_si32 (
+		_mm_adds_epu8 (
+		    xmm_src,
+		    _mm_cvtsi32_si128 (*dst)));
 
 	    w--;
 	    dst++;
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ----------------------------------------------------------------------
- * composite_add_8_8
- */
-
 static void
 sse2_composite_add_8_8 (pixman_implementation_t *imp,
 			pixman_op_t              op,
@@ -5053,7 +4620,8 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
 	    w--;
 	}
 
-	core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+	sse2_combine_add_u (imp, op,
+			    (uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
 
 	/* Small tail */
 	dst += w & 0xfffc;
@@ -5069,12 +4637,8 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty ();
 }
 
-/* ---------------------------------------------------------------------
- * composite_add_8888_8888
- */
 static void
 sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
                               pixman_op_t              op,
@@ -5106,16 +4670,11 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
 	src = src_line;
 	src_line += src_stride;
 
-	core_combine_add_u_sse2 (dst, src, NULL, width);
+	sse2_combine_add_u (imp, op, dst, src, NULL, width);
     }
 
-    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * sse2_composite_copy_area
- */
-
 static pixman_bool_t
 pixman_blt_sse2 (uint32_t *src_bits,
                  uint32_t *dst_bits,
@@ -5234,7 +4793,6 @@ pixman_blt_sse2 (uint32_t *src_bits,
 	}
     }
 
-    _mm_empty ();
 
     return TRUE;
 }
@@ -5284,7 +4842,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
     uint32_t m;
     int src_stride, mask_stride, dst_stride;
     int32_t w;
-    __m64 ms;
+    __m128i ms;
 
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -5313,24 +4871,25 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
             s = 0xff000000 | *src++;
             m = (uint32_t) *mask++;
             d = *dst;
-            ms = unpack_32_1x64 (s);
+            ms = unpack_32_1x128 (s);
 
             if (m != 0xff)
             {
-		__m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-		__m64 md = unpack_32_1x64 (d);
+		__m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+		__m128i md = unpack_32_1x128 (d);
 
-                ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md);
+                ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md);
             }
 
-            *dst++ = pack_1x64_32 (ms);
+            *dst++ = pack_1x128_32 (ms);
             w--;
         }
 
         while (w >= 4)
         {
             m = *(uint32_t*) mask;
-            xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+            xmm_src = _mm_or_si128 (
+		load_128_unaligned ((__m128i*)src), mask_ff000000);
 
             if (m == 0xffffffff)
             {
@@ -5346,9 +4905,12 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
                 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
                 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+                expand_alpha_rev_2x128 (
+		    xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-                in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+                in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			       &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst_lo, &xmm_dst_hi);
 
                 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
             }
@@ -5373,15 +4935,15 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
                 }
                 else
                 {
-		    __m64 ma, md, ms;
+		    __m128i ma, md, ms;
 
                     d = *dst;
 
-		    ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-		    md = unpack_32_1x64 (d);
-		    ms = unpack_32_1x64 (s);
+		    ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m));
+		    md = unpack_32_1x128 (d);
+		    ms = unpack_32_1x128 (s);
 
-                    *dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md));
+                    *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md));
                 }
 
             }
@@ -5392,7 +4954,6 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
         }
     }
 
-    _mm_empty ();
 }
 
 static void
@@ -5457,15 +5018,15 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
 		}
 		else
 		{
-		    __m64 ms, md, ma, msa;
+		    __m128i ms, md, ma, msa;
 
-		    ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
-		    ms = unpack_32_1x64 (s);
-		    md = unpack_32_1x64 (d);
+		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+		    ms = unpack_32_1x128 (s);
+		    md = unpack_32_1x128 (d);
 
-		    msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
-		    *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
 		}
 	    }
 
@@ -5529,15 +5090,15 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
 		}
 		else
 		{
-		    __m64 ms, md, ma, msa;
+		    __m128i ms, md, ma, msa;
 
-		    ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
-		    ms = unpack_32_1x64 (s);
-		    md = unpack_32_1x64 (d);
+		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+		    ms = unpack_32_1x128 (s);
+		    md = unpack_32_1x128 (d);
 
-		    msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
-		    *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
 		}
 	    }
 
@@ -5546,7 +5107,6 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
         }
     }
 
-    _mm_empty ();
 }
 
 static void
@@ -5591,12 +5151,12 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 
 	while (w && (unsigned long)dst & 15)
 	{
-	    __m64 vd;
+	    __m128i vd;
 
-	    vd = unpack_32_1x64 (*dst);
+	    vd = unpack_32_1x128 (*dst);
 
-	    *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
-					    _mm_movepi64_pi64 (xmm_src)));
+	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+					      xmm_src));
 	    w--;
 	    dst++;
 	}
@@ -5626,19 +5186,18 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 
 	while (w)
 	{
-	    __m64 vd;
+	    __m128i vd;
 
-	    vd = unpack_32_1x64 (*dst);
+	    vd = unpack_32_1x128 (*dst);
 
-	    *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
-					    _mm_movepi64_pi64 (xmm_src)));
+	    *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd),
+					      xmm_src));
 	    w--;
 	    dst++;
 	}
 
     }
 
-    _mm_empty ();
 }
 
 static void
@@ -5703,15 +5262,15 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 		}
 		else
 		{
-		    __m64 ms, md, ma, msa;
+		    __m128i ms, md, ma, msa;
 
-		    ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
-		    ms = unpack_32_1x64 (s);
-		    md = unpack_32_1x64 (d);
+		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+		    ms = unpack_32_1x128 (s);
+		    md = unpack_32_1x128 (d);
 
-		    msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
-		    *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
 		}
 	    }
 
@@ -5773,15 +5332,15 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 		}
 		else
 		{
-		    __m64 ms, md, ma, msa;
+		    __m128i ms, md, ma, msa;
 
-		    ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
-		    ms = unpack_32_1x64 (s);
-		    md = unpack_32_1x64 (d);
+		    ma = expand_alpha_rev_1x128 (load_32_1x128 (m));
+		    ms = unpack_32_1x128 (s);
+		    md = unpack_32_1x128 (d);
 
-		    msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
+		    msa = expand_alpha_rev_1x128 (load_32_1x128 (sa));
 
-		    *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
+		    *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md));
 		}
 	    }
 
@@ -5790,10 +5349,9 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
         }
     }
 
-    _mm_empty ();
 }
 
-/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
+/* A variant of 'sse2_combine_over_u' with minor tweaks */
 static force_inline void
 scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
                                              const uint32_t* ps,
@@ -5885,7 +5443,6 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
 
 	w--;
     }
-    _mm_empty ();
 }
 
 FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,
@@ -5927,13 +5484,13 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
 	{
 	    uint32_t d = *dst;
 
-	    __m64 ms = unpack_32_1x64 (s);
-	    __m64 alpha     = expand_alpha_1x64 (ms);
-	    __m64 dest      = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 alpha_dst = unpack_32_1x64 (d);
+	    __m128i ms = unpack_32_1x128 (s);
+	    __m128i alpha     = expand_alpha_1x128 (ms);
+	    __m128i dest      = xmm_mask;
+	    __m128i alpha_dst = unpack_32_1x128 (d);
 
-	    *dst = pack_1x64_32 (
-		in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+	    *dst = pack_1x128_32 (
+		in_over_1x128 (&ms, &alpha, &dest, &alpha_dst));
 	}
 	dst++;
 	w--;
@@ -5985,20 +5542,19 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
 	{
 	    uint32_t d = *dst;
 
-	    __m64 ms = unpack_32_1x64 (s);
-	    __m64 alpha = expand_alpha_1x64 (ms);
-	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-	    __m64 dest  = unpack_32_1x64 (d);
+	    __m128i ms = unpack_32_1x128 (s);
+	    __m128i alpha = expand_alpha_1x128 (ms);
+	    __m128i mask  = xmm_mask;
+	    __m128i dest  = unpack_32_1x128 (d);
 
-	    *dst = pack_1x64_32 (
-		in_over_1x64 (&ms, &alpha, &mask, &dest));
+	    *dst = pack_1x128_32 (
+		in_over_1x128 (&ms, &alpha, &mask, &dest));
 	}
 
 	dst++;
 	w--;
     }
 
-    _mm_empty ();
 }
 
 FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
@@ -6374,20 +5930,7 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
     mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
     mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
 
-    /* MMX constants */
-    mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
-    mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
-
-    mask_x0080 = create_mask_16_64 (0x0080);
-    mask_x00ff = create_mask_16_64 (0x00ff);
-    mask_x0101 = create_mask_16_64 (0x0101);
-    mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
-
-    _mm_empty ();
-
     /* Set up function pointers */
-
-    /* SSE code patch for fbcompose.c */
     imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
     imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
     imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
@@ -6420,5 +5963,3 @@ _pixman_implementation_create_sse2 (pixman_implementation_t *fallback)
 
     return imp;
 }
-
-#endif /* USE_SSE2 */
diff --git a/xorg-server/dix/eventconvert.c b/xorg-server/dix/eventconvert.c
index dff1a5631..dd1ca460b 100644
--- a/xorg-server/dix/eventconvert.c
+++ b/xorg-server/dix/eventconvert.c
@@ -1,734 +1,746 @@
-/*
- * Copyright © 2009 Red Hat, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-/**
- * @file eventconvert.c
- * This file contains event conversion routines from InternalEvent to the
- * matching protocol events.
- */
-
-#ifdef HAVE_DIX_CONFIG_H
-#include <dix-config.h>
-#endif
-
-#include <stdint.h>
-#include <X11/X.h>
-#include <X11/extensions/XIproto.h>
-#include <X11/extensions/XI2proto.h>
-#include <X11/extensions/XI.h>
-#include <X11/extensions/XI2.h>
-
-#include "dix.h"
-#include "inputstr.h"
-#include "misc.h"
-#include "eventstr.h"
-#include "exglobals.h"
-#include "eventconvert.h"
-#include "xiquerydevice.h"
-#include "xkbsrv.h"
-
-
-static int countValuators(DeviceEvent *ev, int *first);
-static int getValuatorEvents(DeviceEvent *ev, deviceValuator *xv);
-static int eventToKeyButtonPointer(DeviceEvent *ev, xEvent **xi, int *count);
-static int eventToDeviceChanged(DeviceChangedEvent *ev, xEvent **dcce);
-static int eventToDeviceEvent(DeviceEvent *ev, xEvent **xi);
-static int eventToRawEvent(RawDeviceEvent *ev, xEvent **xi);
-
-/* Do not use, read comments below */
-BOOL EventIsKeyRepeat(xEvent *event);
-
-/**
- * Hack to allow detectable autorepeat for core and XI1 events.
- * The sequence number is unused until we send to the client and can be
- * misused to store data. More or less, anyway.
- *
- * Do not use this. It may change any time without warning, eat your babies
- * and piss on your cat.
- */
-static void
-EventSetKeyRepeatFlag(xEvent *event, BOOL on)
-{
-    event->u.u.sequenceNumber = on;
-}
-
-/**
- * Check if the event was marked as a repeat event before.
- * NOTE: This is a nasty hack and should NOT be used by anyone else but
- * TryClientEvents.
- */
-BOOL
-EventIsKeyRepeat(xEvent *event)
-{
-    return !!event->u.u.sequenceNumber;
-}
-
-/**
- * Convert the given event to the respective core event.
- *
- * Return values:
- * Success ... core contains the matching core event.
- * BadValue .. One or more values in the internal event are invalid.
- * BadMatch .. The event has no core equivalent.
- *
- * @param[in] event The event to convert into a core event.
- * @param[in] core The memory location to store the core event at.
- * @return Success or the matching error code.
- */
-int
-EventToCore(InternalEvent *event, xEvent *core)
-{
-    switch(event->any.type)
-    {
-        case ET_Motion:
-            {
-                DeviceEvent *e = &event->device_event;
-                /* Don't create core motion event if neither x nor y are
-                 * present */
-                if (!BitIsOn(e->valuators.mask, 0) &&
-                    !BitIsOn(e->valuators.mask, 1))
-                    return BadMatch;
-            }
-            /* fallthrough */
-        case ET_ButtonPress:
-        case ET_ButtonRelease:
-        case ET_KeyPress:
-        case ET_KeyRelease:
-            {
-                DeviceEvent *e = &event->device_event;
-
-                if (e->detail.key > 0xFF)
-                    return BadMatch;
-
-                memset(core, 0, sizeof(xEvent));
-                core->u.u.type = e->type - ET_KeyPress + KeyPress;
-                core->u.u.detail = e->detail.key & 0xFF;
-                core->u.keyButtonPointer.time = e->time;
-                core->u.keyButtonPointer.rootX = e->root_x;
-                core->u.keyButtonPointer.rootY = e->root_y;
-                core->u.keyButtonPointer.state = e->corestate;
-                core->u.keyButtonPointer.root = e->root;
-                EventSetKeyRepeatFlag(core, (e->type == ET_KeyPress && e->key_repeat));
-            }
-            break;
-        case ET_ProximityIn:
-        case ET_ProximityOut:
-        case ET_RawKeyPress:
-        case ET_RawKeyRelease:
-        case ET_RawButtonPress:
-        case ET_RawButtonRelease:
-        case ET_RawMotion:
-            return BadMatch;
-        default:
-            /* XXX: */
-            ErrorF("[dix] EventToCore: Not implemented yet \n");
-            return BadImplementation;
-    }
-    return Success;
-}
-
-/**
- * Convert the given event to the respective XI 1.x event and store it in
- * xi. xi is allocated on demand and must be freed by the caller.
- * count returns the number of events in xi. If count is 1, and the type of
- * xi is GenericEvent, then xi may be larger than 32 bytes.
- *
- * Return values:
- * Success ... core contains the matching core event.
- * BadValue .. One or more values in the internal event are invalid.
- * BadMatch .. The event has no XI equivalent.
- *
- * @param[in] ev The event to convert into an XI 1 event.
- * @param[out] xi Future memory location for the XI event.
- * @param[out] count Number of elements in xi.
- *
- * @return Success or the error code.
- */
-int
-EventToXI(InternalEvent *ev, xEvent **xi, int *count)
-{
-    switch (ev->any.type)
-    {
-        case ET_Motion:
-        case ET_ButtonPress:
-        case ET_ButtonRelease:
-        case ET_KeyPress:
-        case ET_KeyRelease:
-        case ET_ProximityIn:
-        case ET_ProximityOut:
-            return eventToKeyButtonPointer(&ev->device_event, xi, count);
-        case ET_DeviceChanged:
-        case ET_RawKeyPress:
-        case ET_RawKeyRelease:
-        case ET_RawButtonPress:
-        case ET_RawButtonRelease:
-        case ET_RawMotion:
-            *count = 0;
-            *xi = NULL;
-            return BadMatch;
-        default:
-            break;
-    }
-
-    ErrorF("[dix] EventToXI: Not implemented for %d \n", ev->any.type);
-    return BadImplementation;
-}
-
-/**
- * Convert the given event to the respective XI 2.x event and store it in xi.
- * xi is allocated on demand and must be freed by the caller.
- *
- * Return values:
- * Success ... core contains the matching core event.
- * BadValue .. One or more values in the internal event are invalid.
- * BadMatch .. The event has no XI2 equivalent.
- *
- * @param[in] ev The event to convert into an XI2 event
- * @param[out] xi Future memory location for the XI2 event.
- *
- * @return Success or the error code.
- */
-int
-EventToXI2(InternalEvent *ev, xEvent **xi)
-{
-    switch (ev->any.type)
-    {
-        /* Enter/FocusIn are for grabs. We don't need an actual event, since
-         * the real events delivered are triggered elsewhere */
-        case ET_Enter:
-        case ET_FocusIn:
-            *xi = NULL;
-            return Success;
-        case ET_Motion:
-        case ET_ButtonPress:
-        case ET_ButtonRelease:
-        case ET_KeyPress:
-        case ET_KeyRelease:
-            return eventToDeviceEvent(&ev->device_event, xi);
-        case ET_ProximityIn:
-        case ET_ProximityOut:
-            *xi = NULL;
-            return BadMatch;
-        case ET_DeviceChanged:
-            return eventToDeviceChanged(&ev->changed_event, xi);
-        case ET_RawKeyPress:
-        case ET_RawKeyRelease:
-        case ET_RawButtonPress:
-        case ET_RawButtonRelease:
-        case ET_RawMotion:
-            return eventToRawEvent(&ev->raw_event, xi);
-        default:
-            break;
-    }
-
-    ErrorF("[dix] EventToXI2: Not implemented for %d \n", ev->any.type);
-    return BadImplementation;
-}
-
-static int
-eventToKeyButtonPointer(DeviceEvent *ev, xEvent **xi, int *count)
-{
-    int num_events;
-    int first; /* dummy */
-    deviceKeyButtonPointer *kbp;
-
-    /* Sorry, XI 1.x protocol restrictions. */
-    if (ev->detail.button > 0xFF || ev->deviceid >= 0x80)
-    {
-        *count = 0;
-        return Success;
-    }
-
-    num_events = (countValuators(ev, &first) + 5)/6; /* valuator ev */
-    if (num_events <= 0)
-    {
-        *count = 0;
-        return BadMatch;
-    }
-
-    num_events++; /* the actual event event */
-
-    *xi = calloc(num_events, sizeof(xEvent));
-    if (!(*xi))
-    {
-        return BadAlloc;
-    }
-
-    kbp           = (deviceKeyButtonPointer*)(*xi);
-    kbp->detail   = ev->detail.button;
-    kbp->time     = ev->time;
-    kbp->root     = ev->root;
-    kbp->root_x   = ev->root_x;
-    kbp->root_y   = ev->root_y;
-    kbp->deviceid = ev->deviceid;
-    kbp->state    = ev->corestate;
-    EventSetKeyRepeatFlag((xEvent*)kbp,
-                          (ev->type == ET_KeyPress && ev->key_repeat));
-
-    if (num_events > 1)
-        kbp->deviceid |= MORE_EVENTS;
-
-    switch(ev->type)
-    {
-        case ET_Motion:        kbp->type = DeviceMotionNotify;  break;
-        case ET_ButtonPress:   kbp->type = DeviceButtonPress;   break;
-        case ET_ButtonRelease: kbp->type = DeviceButtonRelease; break;
-        case ET_KeyPress:      kbp->type = DeviceKeyPress;      break;
-        case ET_KeyRelease:    kbp->type = DeviceKeyRelease;    break;
-        case ET_ProximityIn:   kbp->type = ProximityIn;         break;
-        case ET_ProximityOut:  kbp->type = ProximityOut;        break;
-        default:
-            break;
-    }
-
-    if (num_events > 1)
-    {
-        getValuatorEvents(ev, (deviceValuator*)(kbp + 1));
-    }
-
-    *count = num_events;
-    return Success;
-}
-
-
-/**
- * Set first to the first valuator in the event ev and return the number of
- * valuators from first to the last set valuator.
- */
-static int
-countValuators(DeviceEvent *ev, int *first)
-{
-    int first_valuator = -1, last_valuator = -1, num_valuators = 0;
-    int i;
-
-    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
-    {
-        if (BitIsOn(ev->valuators.mask, i))
-        {
-            if (first_valuator == -1)
-                first_valuator = i;
-            last_valuator = i;
-        }
-    }
-
-    if (first_valuator != -1)
-    {
-        num_valuators = last_valuator - first_valuator + 1;
-        *first = first_valuator;
-    }
-
-    return num_valuators;
-}
-
-static int
-getValuatorEvents(DeviceEvent *ev, deviceValuator *xv)
-{
-    int i;
-    int state = 0;
-    int first_valuator, num_valuators;
-
-
-    num_valuators = countValuators(ev, &first_valuator);
-    if (num_valuators > 0)
-    {
-        DeviceIntPtr dev = NULL;
-        dixLookupDevice(&dev, ev->deviceid, serverClient, DixUseAccess);
-        /* State needs to be assembled BEFORE the device is updated. */
-        state = (dev && dev->key) ? XkbStateFieldFromRec(&dev->key->xkbInfo->state) : 0;
-        state |= (dev && dev->button) ? (dev->button->state) : 0;
-    }
-
-    /* FIXME: non-continuous valuator data in internal events*/
-    for (i = 0; i < num_valuators; i += 6, xv++) {
-        xv->type = DeviceValuator;
-        xv->first_valuator = first_valuator + i;
-        xv->num_valuators = ((num_valuators - i) > 6) ? 6 : (num_valuators - i);
-        xv->deviceid = ev->deviceid;
-        xv->device_state = state;
-        switch (xv->num_valuators) {
-        case 6:
-            xv->valuator5 = ev->valuators.data[xv->first_valuator + 5];
-        case 5:
-            xv->valuator4 = ev->valuators.data[xv->first_valuator + 4];
-        case 4:
-            xv->valuator3 = ev->valuators.data[xv->first_valuator + 3];
-        case 3:
-            xv->valuator2 = ev->valuators.data[xv->first_valuator + 2];
-        case 2:
-            xv->valuator1 = ev->valuators.data[xv->first_valuator + 1];
-        case 1:
-            xv->valuator0 = ev->valuators.data[xv->first_valuator + 0];
-        }
-
-        if (i + 6 < num_valuators)
-            xv->deviceid |= MORE_EVENTS;
-    }
-
-    return (num_valuators + 5) / 6;
-}
-
-
-static int
-appendKeyInfo(DeviceChangedEvent *dce, xXIKeyInfo* info)
-{
-    uint32_t *kc;
-    int i;
-
-    info->type = XIKeyClass;
-    info->num_keycodes = dce->keys.max_keycode - dce->keys.min_keycode + 1;
-    info->length = sizeof(xXIKeyInfo)/4 + info->num_keycodes;
-    info->sourceid = dce->sourceid;
-
-    kc = (uint32_t*)&info[1];
-    for (i = 0; i < info->num_keycodes; i++)
-        *kc++ = i + dce->keys.min_keycode;
-
-    return info->length * 4;
-}
-
-static int
-appendButtonInfo(DeviceChangedEvent *dce, xXIButtonInfo *info)
-{
-    unsigned char *bits;
-    int mask_len;
-
-    mask_len = bytes_to_int32(bits_to_bytes(dce->buttons.num_buttons));
-
-    info->type = XIButtonClass;
-    info->num_buttons = dce->buttons.num_buttons;
-    info->length = bytes_to_int32(sizeof(xXIButtonInfo)) +
-                   info->num_buttons + mask_len;
-    info->sourceid = dce->sourceid;
-
-    bits = (unsigned char*)&info[1];
-    memset(bits, 0, mask_len * 4);
-    /* FIXME: is_down? */
-
-    bits += mask_len * 4;
-    memcpy(bits, dce->buttons.names, dce->buttons.num_buttons * sizeof(Atom));
-
-    return info->length * 4;
-}
-
-static int
-appendValuatorInfo(DeviceChangedEvent *dce, xXIValuatorInfo *info, int axisnumber)
-{
-    info->type = XIValuatorClass;
-    info->length = sizeof(xXIValuatorInfo)/4;
-    info->label = dce->valuators[axisnumber].name;
-    info->min.integral = dce->valuators[axisnumber].min;
-    info->min.frac = 0;
-    info->max.integral = dce->valuators[axisnumber].max;
-    info->max.frac = 0;
-    /* FIXME: value */
-    info->value.integral = 0;
-    info->value.frac = 0;
-    info->resolution = dce->valuators[axisnumber].resolution;
-    info->number = axisnumber;
-    info->mode = dce->valuators[axisnumber].mode;
-    info->sourceid = dce->sourceid;
-
-    return info->length * 4;
-}
-
-static int
-eventToDeviceChanged(DeviceChangedEvent *dce, xEvent **xi)
-{
-    xXIDeviceChangedEvent *dcce;
-    int len = sizeof(xXIDeviceChangedEvent);
-    int nkeys;
-    char *ptr;
-
-    if (dce->buttons.num_buttons)
-    {
-        len += sizeof(xXIButtonInfo);
-        len += dce->buttons.num_buttons * sizeof(Atom); /* button names */
-        len += pad_to_int32(bits_to_bytes(dce->buttons.num_buttons));
-    }
-    if (dce->num_valuators)
-        len += sizeof(xXIValuatorInfo) * dce->num_valuators;
-
-    nkeys = (dce->keys.max_keycode > 0) ?
-                dce->keys.max_keycode - dce->keys.min_keycode + 1 : 0;
-    if (nkeys > 0)
-    {
-        len += sizeof(xXIKeyInfo);
-        len += sizeof(CARD32) * nkeys; /* keycodes */
-    }
-
-    dcce = calloc(1, len);
-    if (!dcce)
-    {
-        ErrorF("[Xi] BadAlloc in SendDeviceChangedEvent.\n");
-        return BadAlloc;
-    }
-
-    dcce->type         = GenericEvent;
-    dcce->extension    = IReqCode;
-    dcce->evtype       = XI_DeviceChanged;
-    dcce->time         = dce->time;
-    dcce->deviceid     = dce->deviceid;
-    dcce->sourceid     = dce->sourceid;
-    dcce->reason       = (dce->flags & DEVCHANGE_DEVICE_CHANGE) ? XIDeviceChange : XISlaveSwitch;
-    dcce->num_classes  = 0;
-    dcce->length = bytes_to_int32(len - sizeof(xEvent));
-
-    ptr = (char*)&dcce[1];
-    if (dce->buttons.num_buttons)
-    {
-        dcce->num_classes++;
-        ptr += appendButtonInfo(dce, (xXIButtonInfo*)ptr);
-    }
-
-    if (nkeys)
-    {
-        dcce->num_classes++;
-        ptr += appendKeyInfo(dce, (xXIKeyInfo*)ptr);
-    }
-
-    if (dce->num_valuators)
-    {
-        int i;
-
-        dcce->num_classes += dce->num_valuators;
-        for (i = 0; i < dce->num_valuators; i++)
-            ptr += appendValuatorInfo(dce, (xXIValuatorInfo*)ptr, i);
-    }
-
-    *xi = (xEvent*)dcce;
-
-    return Success;
-}
-
-static int count_bits(unsigned char* ptr, int len)
-{
-    int bits = 0;
-    unsigned int i;
-    unsigned char x;
-
-    for (i = 0; i < len; i++)
-    {
-        x = ptr[i];
-        while(x > 0)
-        {
-            bits += (x & 0x1);
-            x >>= 1;
-        }
-    }
-    return bits;
-}
-
-static int
-eventToDeviceEvent(DeviceEvent *ev, xEvent **xi)
-{
-    int len = sizeof(xXIDeviceEvent);
-    xXIDeviceEvent *xde;
-    int i, btlen, vallen;
-    char *ptr;
-    FP3232 *axisval;
-
-    /* FIXME: this should just send the buttons we have, not MAX_BUTTONs. Same
-     * with MAX_VALUATORS below */
-    /* btlen is in 4 byte units */
-    btlen = bytes_to_int32(bits_to_bytes(MAX_BUTTONS));
-    len += btlen * 4; /* buttonmask len */
-
-
-    vallen = count_bits(ev->valuators.mask, sizeof(ev->valuators.mask)/sizeof(ev->valuators.mask[0]));
-    len += vallen * 2 * sizeof(uint32_t); /* axisvalues */
-    vallen = bytes_to_int32(bits_to_bytes(MAX_VALUATORS));
-    len += vallen * 4; /* valuators mask */
-
-    *xi = calloc(1, len);
-    xde = (xXIDeviceEvent*)*xi;
-    xde->type           = GenericEvent;
-    xde->extension      = IReqCode;
-    xde->evtype         = GetXI2Type((InternalEvent*)ev);
-    xde->time           = ev->time;
-    xde->length         = bytes_to_int32(len - sizeof(xEvent));
-    xde->detail         = ev->detail.button;
-    xde->root           = ev->root;
-    xde->buttons_len    = btlen;
-    xde->valuators_len  = vallen;
-    xde->deviceid       = ev->deviceid;
-    xde->sourceid       = ev->sourceid;
-    xde->root_x         = FP1616(ev->root_x, ev->root_x_frac);
-    xde->root_y         = FP1616(ev->root_y, ev->root_y_frac);
-
-    if (ev->key_repeat)
-        xde->flags      |= XIKeyRepeat;
-
-    xde->mods.base_mods         = ev->mods.base;
-    xde->mods.latched_mods      = ev->mods.latched;
-    xde->mods.locked_mods       = ev->mods.locked;
-    xde->mods.effective_mods    = ev->mods.effective;
-
-    xde->group.base_group       = ev->group.base;
-    xde->group.latched_group    = ev->group.latched;
-    xde->group.locked_group     = ev->group.locked;
-    xde->group.effective_group  = ev->group.effective;
-
-    ptr = (char*)&xde[1];
-    for (i = 0; i < sizeof(ev->buttons) * 8; i++)
-    {
-        if (BitIsOn(ev->buttons, i))
-            SetBit(ptr, i);
-    }
-
-    ptr += xde->buttons_len * 4;
-    axisval = (FP3232*)(ptr + xde->valuators_len * 4);
-    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
-    {
-        if (BitIsOn(ev->valuators.mask, i))
-        {
-            SetBit(ptr, i);
-            axisval->integral = ev->valuators.data[i];
-            axisval->frac = ev->valuators.data_frac[i];
-            axisval++;
-        }
-    }
-
-    return Success;
-}
-
-static int
-eventToRawEvent(RawDeviceEvent *ev, xEvent **xi)
-{
-    xXIRawEvent* raw;
-    int vallen, nvals;
-    int i, len = sizeof(xXIRawEvent);
-    char *ptr;
-    FP3232 *axisval;
-
-    nvals = count_bits(ev->valuators.mask, sizeof(ev->valuators.mask));
-    len += nvals * sizeof(FP3232) * 2; /* 8 byte per valuator, once
-                                    raw, once processed */
-    vallen = bytes_to_int32(bits_to_bytes(MAX_VALUATORS));
-    len += vallen * 4; /* valuators mask */
-
-    *xi = calloc(1, len);
-    raw = (xXIRawEvent*)*xi;
-    raw->type           = GenericEvent;
-    raw->extension      = IReqCode;
-    raw->evtype         = GetXI2Type((InternalEvent*)ev);
-    raw->time           = ev->time;
-    raw->length         = bytes_to_int32(len - sizeof(xEvent));
-    raw->detail         = ev->detail.button;
-    raw->deviceid       = ev->deviceid;
-    raw->valuators_len  = vallen;
-
-    ptr = (char*)&raw[1];
-    axisval = (FP3232*)(ptr + raw->valuators_len * 4);
-    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
-    {
-        if (BitIsOn(ev->valuators.mask, i))
-        {
-            SetBit(ptr, i);
-            axisval->integral = ev->valuators.data[i];
-            axisval->frac = ev->valuators.data_frac[i];
-            (axisval + nvals)->integral = ev->valuators.data_raw[i];
-            (axisval + nvals)->frac = ev->valuators.data_raw_frac[i];
-            axisval++;
-        }
-    }
-
-    return Success;
-}
-
-/**
- * Return the corresponding core type for the given event or 0 if no core
- * equivalent exists.
- */
-int
-GetCoreType(InternalEvent *event)
-{
-    int coretype = 0;
-    switch(event->any.type)
-    {
-        case ET_Motion:         coretype = MotionNotify;  break;
-        case ET_ButtonPress:    coretype = ButtonPress;   break;
-        case ET_ButtonRelease:  coretype = ButtonRelease; break;
-        case ET_KeyPress:       coretype = KeyPress;      break;
-        case ET_KeyRelease:     coretype = KeyRelease;    break;
-        default:
-            break;
-    }
-    return coretype;
-}
-
-/**
- * Return the corresponding XI 1.x type for the given event or 0 if no
- * equivalent exists.
- */
-int
-GetXIType(InternalEvent *event)
-{
-    int xitype = 0;
-    switch(event->any.type)
-    {
-        case ET_Motion:         xitype = DeviceMotionNotify;  break;
-        case ET_ButtonPress:    xitype = DeviceButtonPress;   break;
-        case ET_ButtonRelease:  xitype = DeviceButtonRelease; break;
-        case ET_KeyPress:       xitype = DeviceKeyPress;      break;
-        case ET_KeyRelease:     xitype = DeviceKeyRelease;    break;
-        case ET_ProximityIn:    xitype = ProximityIn;         break;
-        case ET_ProximityOut:   xitype = ProximityOut;        break;
-        default:
-            break;
-    }
-    return xitype;
-}
-
-/**
- * Return the corresponding XI 2.x type for the given event or 0 if no
- * equivalent exists.
- */
-int
-GetXI2Type(InternalEvent *event)
-{
-    int xi2type = 0;
-
-    switch(event->any.type)
-    {
-        case ET_Motion:         xi2type = XI_Motion;           break;
-        case ET_ButtonPress:    xi2type = XI_ButtonPress;      break;
-        case ET_ButtonRelease:  xi2type = XI_ButtonRelease;    break;
-        case ET_KeyPress:       xi2type = XI_KeyPress;         break;
-        case ET_KeyRelease:     xi2type = XI_KeyRelease;       break;
-        case ET_Enter:          xi2type = XI_Enter;            break;
-        case ET_Leave:          xi2type = XI_Leave;            break;
-        case ET_Hierarchy:      xi2type = XI_HierarchyChanged; break;
-        case ET_DeviceChanged:  xi2type = XI_DeviceChanged;    break;
-        case ET_RawKeyPress:    xi2type = XI_RawKeyPress;      break;
-        case ET_RawKeyRelease:  xi2type = XI_RawKeyRelease;    break;
-        case ET_RawButtonPress: xi2type = XI_RawButtonPress;   break;
-        case ET_RawButtonRelease: xi2type = XI_RawButtonRelease; break;
-        case ET_RawMotion:      xi2type = XI_RawMotion;        break;
-        case ET_FocusIn:        xi2type = XI_FocusIn;          break;
-        case ET_FocusOut:       xi2type = XI_FocusOut;         break;
-        default:
-            break;
-    }
-    return xi2type;
-}
+/*
+ * Copyright © 2009 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file eventconvert.c
+ * This file contains event conversion routines from InternalEvent to the
+ * matching protocol events.
+ */
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#endif
+
+#include <stdint.h>
+#include <X11/X.h>
+#include <X11/extensions/XIproto.h>
+#include <X11/extensions/XI2proto.h>
+#include <X11/extensions/XI.h>
+#include <X11/extensions/XI2.h>
+
+#include "dix.h"
+#include "inputstr.h"
+#include "misc.h"
+#include "eventstr.h"
+#include "exglobals.h"
+#include "eventconvert.h"
+#include "xiquerydevice.h"
+#include "xkbsrv.h"
+
+
+static int countValuators(DeviceEvent *ev, int *first);
+static int getValuatorEvents(DeviceEvent *ev, deviceValuator *xv);
+static int eventToKeyButtonPointer(DeviceEvent *ev, xEvent **xi, int *count);
+static int eventToDeviceChanged(DeviceChangedEvent *ev, xEvent **dcce);
+static int eventToDeviceEvent(DeviceEvent *ev, xEvent **xi);
+static int eventToRawEvent(RawDeviceEvent *ev, xEvent **xi);
+
+/* Do not use, read comments below */
+BOOL EventIsKeyRepeat(xEvent *event);
+
+/**
+ * Hack to allow detectable autorepeat for core and XI1 events.
+ * The sequence number is unused until we send to the client and can be
+ * misused to store data. More or less, anyway.
+ *
+ * Do not use this. It may change any time without warning, eat your babies
+ * and piss on your cat.
+ */
+static void
+EventSetKeyRepeatFlag(xEvent *event, BOOL on)
+{
+    event->u.u.sequenceNumber = on;
+}
+
+/**
+ * Check if the event was marked as a repeat event before.
+ * NOTE: This is a nasty hack and should NOT be used by anyone else but
+ * TryClientEvents.
+ */
+BOOL
+EventIsKeyRepeat(xEvent *event)
+{
+    return !!event->u.u.sequenceNumber;
+}
+
+/**
+ * Convert the given event to the respective core event.
+ *
+ * Return values:
+ * Success ... core contains the matching core event.
+ * BadValue .. One or more values in the internal event are invalid.
+ * BadMatch .. The event has no core equivalent.
+ *
+ * @param[in] event The event to convert into a core event.
+ * @param[in] core The memory location to store the core event at.
+ * @return Success or the matching error code.
+ */
+int
+EventToCore(InternalEvent *event, xEvent *core)
+{
+    switch(event->any.type)
+    {
+        case ET_Motion:
+            {
+                DeviceEvent *e = &event->device_event;
+                /* Don't create core motion event if neither x nor y are
+                 * present */
+                if (!BitIsOn(e->valuators.mask, 0) &&
+                    !BitIsOn(e->valuators.mask, 1))
+                    return BadMatch;
+            }
+            /* fallthrough */
+        case ET_ButtonPress:
+        case ET_ButtonRelease:
+        case ET_KeyPress:
+        case ET_KeyRelease:
+            {
+                DeviceEvent *e = &event->device_event;
+
+                if (e->detail.key > 0xFF)
+                    return BadMatch;
+
+                memset(core, 0, sizeof(xEvent));
+                core->u.u.type = e->type - ET_KeyPress + KeyPress;
+                core->u.u.detail = e->detail.key & 0xFF;
+                core->u.keyButtonPointer.time = e->time;
+                core->u.keyButtonPointer.rootX = e->root_x;
+                core->u.keyButtonPointer.rootY = e->root_y;
+                core->u.keyButtonPointer.state = e->corestate;
+                core->u.keyButtonPointer.root = e->root;
+                EventSetKeyRepeatFlag(core, (e->type == ET_KeyPress && e->key_repeat));
+            }
+            break;
+        case ET_ProximityIn:
+        case ET_ProximityOut:
+        case ET_RawKeyPress:
+        case ET_RawKeyRelease:
+        case ET_RawButtonPress:
+        case ET_RawButtonRelease:
+        case ET_RawMotion:
+            return BadMatch;
+        default:
+            /* XXX: */
+            ErrorF("[dix] EventToCore: Not implemented yet \n");
+            return BadImplementation;
+    }
+    return Success;
+}
+
+/**
+ * Convert the given event to the respective XI 1.x event and store it in
+ * xi. xi is allocated on demand and must be freed by the caller.
+ * count returns the number of events in xi. If count is 1, and the type of
+ * xi is GenericEvent, then xi may be larger than 32 bytes.
+ *
+ * Return values:
+ * Success ... core contains the matching core event.
+ * BadValue .. One or more values in the internal event are invalid.
+ * BadMatch .. The event has no XI equivalent.
+ *
+ * @param[in] ev The event to convert into an XI 1 event.
+ * @param[out] xi Future memory location for the XI event.
+ * @param[out] count Number of elements in xi.
+ *
+ * @return Success or the error code.
+ */
+int
+EventToXI(InternalEvent *ev, xEvent **xi, int *count)
+{
+    switch (ev->any.type)
+    {
+        case ET_Motion:
+        case ET_ButtonPress:
+        case ET_ButtonRelease:
+        case ET_KeyPress:
+        case ET_KeyRelease:
+        case ET_ProximityIn:
+        case ET_ProximityOut:
+            return eventToKeyButtonPointer(&ev->device_event, xi, count);
+        case ET_DeviceChanged:
+        case ET_RawKeyPress:
+        case ET_RawKeyRelease:
+        case ET_RawButtonPress:
+        case ET_RawButtonRelease:
+        case ET_RawMotion:
+            *count = 0;
+            *xi = NULL;
+            return BadMatch;
+        default:
+            break;
+    }
+
+    ErrorF("[dix] EventToXI: Not implemented for %d \n", ev->any.type);
+    return BadImplementation;
+}
+
+/**
+ * Convert the given event to the respective XI 2.x event and store it in xi.
+ * xi is allocated on demand and must be freed by the caller.
+ *
+ * Return values:
+ * Success ... core contains the matching core event.
+ * BadValue .. One or more values in the internal event are invalid.
+ * BadMatch .. The event has no XI2 equivalent.
+ *
+ * @param[in] ev The event to convert into an XI2 event
+ * @param[out] xi Future memory location for the XI2 event.
+ *
+ * @return Success or the error code.
+ */
+int
+EventToXI2(InternalEvent *ev, xEvent **xi)
+{
+    switch (ev->any.type)
+    {
+        /* Enter/FocusIn are for grabs. We don't need an actual event, since
+         * the real events delivered are triggered elsewhere */
+        case ET_Enter:
+        case ET_FocusIn:
+            *xi = NULL;
+            return Success;
+        case ET_Motion:
+        case ET_ButtonPress:
+        case ET_ButtonRelease:
+        case ET_KeyPress:
+        case ET_KeyRelease:
+            return eventToDeviceEvent(&ev->device_event, xi);
+        case ET_ProximityIn:
+        case ET_ProximityOut:
+            *xi = NULL;
+            return BadMatch;
+        case ET_DeviceChanged:
+            return eventToDeviceChanged(&ev->changed_event, xi);
+        case ET_RawKeyPress:
+        case ET_RawKeyRelease:
+        case ET_RawButtonPress:
+        case ET_RawButtonRelease:
+        case ET_RawMotion:
+            return eventToRawEvent(&ev->raw_event, xi);
+        default:
+            break;
+    }
+
+    ErrorF("[dix] EventToXI2: Not implemented for %d \n", ev->any.type);
+    return BadImplementation;
+}
+
+static int
+eventToKeyButtonPointer(DeviceEvent *ev, xEvent **xi, int *count)
+{
+    int num_events;
+    int first; /* dummy */
+    deviceKeyButtonPointer *kbp;
+
+    /* Sorry, XI 1.x protocol restrictions. */
+    if (ev->detail.button > 0xFF || ev->deviceid >= 0x80)
+    {
+        *count = 0;
+        return Success;
+    }
+
+    num_events = (countValuators(ev, &first) + 5)/6; /* valuator ev */
+    if (num_events <= 0)
+    {
+        switch (ev->type)
+        {
+            case ET_KeyPress:
+            case ET_KeyRelease:
+            case ET_ButtonPress:
+            case ET_ButtonRelease:
+                /* no axes is ok */
+                break;
+            case ET_Motion:
+            case ET_ProximityIn:
+            case ET_ProximityOut:
+                *count = 0;
+                return BadMatch;
+        }
+    }
+
+    num_events++; /* the actual event event */
+
+    *xi = calloc(num_events, sizeof(xEvent));
+    if (!(*xi))
+    {
+        return BadAlloc;
+    }
+
+    kbp           = (deviceKeyButtonPointer*)(*xi);
+    kbp->detail   = ev->detail.button;
+    kbp->time     = ev->time;
+    kbp->root     = ev->root;
+    kbp->root_x   = ev->root_x;
+    kbp->root_y   = ev->root_y;
+    kbp->deviceid = ev->deviceid;
+    kbp->state    = ev->corestate;
+    EventSetKeyRepeatFlag((xEvent*)kbp,
+                          (ev->type == ET_KeyPress && ev->key_repeat));
+
+    if (num_events > 1)
+        kbp->deviceid |= MORE_EVENTS;
+
+    switch(ev->type)
+    {
+        case ET_Motion:        kbp->type = DeviceMotionNotify;  break;
+        case ET_ButtonPress:   kbp->type = DeviceButtonPress;   break;
+        case ET_ButtonRelease: kbp->type = DeviceButtonRelease; break;
+        case ET_KeyPress:      kbp->type = DeviceKeyPress;      break;
+        case ET_KeyRelease:    kbp->type = DeviceKeyRelease;    break;
+        case ET_ProximityIn:   kbp->type = ProximityIn;         break;
+        case ET_ProximityOut:  kbp->type = ProximityOut;        break;
+        default:
+            break;
+    }
+
+    if (num_events > 1)
+    {
+        getValuatorEvents(ev, (deviceValuator*)(kbp + 1));
+    }
+
+    *count = num_events;
+    return Success;
+}
+
+
+/**
+ * Set first to the first valuator in the event ev and return the number of
+ * valuators from first to the last set valuator.
+ */
+static int
+countValuators(DeviceEvent *ev, int *first)
+{
+    int first_valuator = -1, last_valuator = -1, num_valuators = 0;
+    int i;
+
+    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
+    {
+        if (BitIsOn(ev->valuators.mask, i))
+        {
+            if (first_valuator == -1)
+                first_valuator = i;
+            last_valuator = i;
+        }
+    }
+
+    if (first_valuator != -1)
+    {
+        num_valuators = last_valuator - first_valuator + 1;
+        *first = first_valuator;
+    }
+
+    return num_valuators;
+}
+
+static int
+getValuatorEvents(DeviceEvent *ev, deviceValuator *xv)
+{
+    int i;
+    int state = 0;
+    int first_valuator, num_valuators;
+
+
+    num_valuators = countValuators(ev, &first_valuator);
+    if (num_valuators > 0)
+    {
+        DeviceIntPtr dev = NULL;
+        dixLookupDevice(&dev, ev->deviceid, serverClient, DixUseAccess);
+        /* State needs to be assembled BEFORE the device is updated. */
+        state = (dev && dev->key) ? XkbStateFieldFromRec(&dev->key->xkbInfo->state) : 0;
+        state |= (dev && dev->button) ? (dev->button->state) : 0;
+    }
+
+    /* FIXME: non-continuous valuator data in internal events*/
+    for (i = 0; i < num_valuators; i += 6, xv++) {
+        xv->type = DeviceValuator;
+        xv->first_valuator = first_valuator + i;
+        xv->num_valuators = ((num_valuators - i) > 6) ? 6 : (num_valuators - i);
+        xv->deviceid = ev->deviceid;
+        xv->device_state = state;
+        switch (xv->num_valuators) {
+        case 6:
+            xv->valuator5 = ev->valuators.data[xv->first_valuator + 5];
+        case 5:
+            xv->valuator4 = ev->valuators.data[xv->first_valuator + 4];
+        case 4:
+            xv->valuator3 = ev->valuators.data[xv->first_valuator + 3];
+        case 3:
+            xv->valuator2 = ev->valuators.data[xv->first_valuator + 2];
+        case 2:
+            xv->valuator1 = ev->valuators.data[xv->first_valuator + 1];
+        case 1:
+            xv->valuator0 = ev->valuators.data[xv->first_valuator + 0];
+        }
+
+        if (i + 6 < num_valuators)
+            xv->deviceid |= MORE_EVENTS;
+    }
+
+    return (num_valuators + 5) / 6;
+}
+
+
+static int
+appendKeyInfo(DeviceChangedEvent *dce, xXIKeyInfo* info)
+{
+    uint32_t *kc;
+    int i;
+
+    info->type = XIKeyClass;
+    info->num_keycodes = dce->keys.max_keycode - dce->keys.min_keycode + 1;
+    info->length = sizeof(xXIKeyInfo)/4 + info->num_keycodes;
+    info->sourceid = dce->sourceid;
+
+    kc = (uint32_t*)&info[1];
+    for (i = 0; i < info->num_keycodes; i++)
+        *kc++ = i + dce->keys.min_keycode;
+
+    return info->length * 4;
+}
+
+static int
+appendButtonInfo(DeviceChangedEvent *dce, xXIButtonInfo *info)
+{
+    unsigned char *bits;
+    int mask_len;
+
+    mask_len = bytes_to_int32(bits_to_bytes(dce->buttons.num_buttons));
+
+    info->type = XIButtonClass;
+    info->num_buttons = dce->buttons.num_buttons;
+    info->length = bytes_to_int32(sizeof(xXIButtonInfo)) +
+                   info->num_buttons + mask_len;
+    info->sourceid = dce->sourceid;
+
+    bits = (unsigned char*)&info[1];
+    memset(bits, 0, mask_len * 4);
+    /* FIXME: is_down? */
+
+    bits += mask_len * 4;
+    memcpy(bits, dce->buttons.names, dce->buttons.num_buttons * sizeof(Atom));
+
+    return info->length * 4;
+}
+
+static int
+appendValuatorInfo(DeviceChangedEvent *dce, xXIValuatorInfo *info, int axisnumber)
+{
+    info->type = XIValuatorClass;
+    info->length = sizeof(xXIValuatorInfo)/4;
+    info->label = dce->valuators[axisnumber].name;
+    info->min.integral = dce->valuators[axisnumber].min;
+    info->min.frac = 0;
+    info->max.integral = dce->valuators[axisnumber].max;
+    info->max.frac = 0;
+    /* FIXME: value */
+    info->value.integral = 0;
+    info->value.frac = 0;
+    info->resolution = dce->valuators[axisnumber].resolution;
+    info->number = axisnumber;
+    info->mode = dce->valuators[axisnumber].mode;
+    info->sourceid = dce->sourceid;
+
+    return info->length * 4;
+}
+
+static int
+eventToDeviceChanged(DeviceChangedEvent *dce, xEvent **xi)
+{
+    xXIDeviceChangedEvent *dcce;
+    int len = sizeof(xXIDeviceChangedEvent);
+    int nkeys;
+    char *ptr;
+
+    if (dce->buttons.num_buttons)
+    {
+        len += sizeof(xXIButtonInfo);
+        len += dce->buttons.num_buttons * sizeof(Atom); /* button names */
+        len += pad_to_int32(bits_to_bytes(dce->buttons.num_buttons));
+    }
+    if (dce->num_valuators)
+        len += sizeof(xXIValuatorInfo) * dce->num_valuators;
+
+    nkeys = (dce->keys.max_keycode > 0) ?
+                dce->keys.max_keycode - dce->keys.min_keycode + 1 : 0;
+    if (nkeys > 0)
+    {
+        len += sizeof(xXIKeyInfo);
+        len += sizeof(CARD32) * nkeys; /* keycodes */
+    }
+
+    dcce = calloc(1, len);
+    if (!dcce)
+    {
+        ErrorF("[Xi] BadAlloc in SendDeviceChangedEvent.\n");
+        return BadAlloc;
+    }
+
+    dcce->type         = GenericEvent;
+    dcce->extension    = IReqCode;
+    dcce->evtype       = XI_DeviceChanged;
+    dcce->time         = dce->time;
+    dcce->deviceid     = dce->deviceid;
+    dcce->sourceid     = dce->sourceid;
+    dcce->reason       = (dce->flags & DEVCHANGE_DEVICE_CHANGE) ? XIDeviceChange : XISlaveSwitch;
+    dcce->num_classes  = 0;
+    dcce->length = bytes_to_int32(len - sizeof(xEvent));
+
+    ptr = (char*)&dcce[1];
+    if (dce->buttons.num_buttons)
+    {
+        dcce->num_classes++;
+        ptr += appendButtonInfo(dce, (xXIButtonInfo*)ptr);
+    }
+
+    if (nkeys)
+    {
+        dcce->num_classes++;
+        ptr += appendKeyInfo(dce, (xXIKeyInfo*)ptr);
+    }
+
+    if (dce->num_valuators)
+    {
+        int i;
+
+        dcce->num_classes += dce->num_valuators;
+        for (i = 0; i < dce->num_valuators; i++)
+            ptr += appendValuatorInfo(dce, (xXIValuatorInfo*)ptr, i);
+    }
+
+    *xi = (xEvent*)dcce;
+
+    return Success;
+}
+
+static int count_bits(unsigned char* ptr, int len)
+{
+    int bits = 0;
+    unsigned int i;
+    unsigned char x;
+
+    for (i = 0; i < len; i++)
+    {
+        x = ptr[i];
+        while(x > 0)
+        {
+            bits += (x & 0x1);
+            x >>= 1;
+        }
+    }
+    return bits;
+}
+
+static int
+eventToDeviceEvent(DeviceEvent *ev, xEvent **xi)
+{
+    int len = sizeof(xXIDeviceEvent);
+    xXIDeviceEvent *xde;
+    int i, btlen, vallen;
+    char *ptr;
+    FP3232 *axisval;
+
+    /* FIXME: this should just send the buttons we have, not MAX_BUTTONs. Same
+     * with MAX_VALUATORS below */
+    /* btlen is in 4 byte units */
+    btlen = bytes_to_int32(bits_to_bytes(MAX_BUTTONS));
+    len += btlen * 4; /* buttonmask len */
+
+
+    vallen = count_bits(ev->valuators.mask, sizeof(ev->valuators.mask)/sizeof(ev->valuators.mask[0]));
+    len += vallen * 2 * sizeof(uint32_t); /* axisvalues */
+    vallen = bytes_to_int32(bits_to_bytes(MAX_VALUATORS));
+    len += vallen * 4; /* valuators mask */
+
+    *xi = calloc(1, len);
+    xde = (xXIDeviceEvent*)*xi;
+    xde->type           = GenericEvent;
+    xde->extension      = IReqCode;
+    xde->evtype         = GetXI2Type((InternalEvent*)ev);
+    xde->time           = ev->time;
+    xde->length         = bytes_to_int32(len - sizeof(xEvent));
+    xde->detail         = ev->detail.button;
+    xde->root           = ev->root;
+    xde->buttons_len    = btlen;
+    xde->valuators_len  = vallen;
+    xde->deviceid       = ev->deviceid;
+    xde->sourceid       = ev->sourceid;
+    xde->root_x         = FP1616(ev->root_x, ev->root_x_frac);
+    xde->root_y         = FP1616(ev->root_y, ev->root_y_frac);
+
+    if (ev->key_repeat)
+        xde->flags      |= XIKeyRepeat;
+
+    xde->mods.base_mods         = ev->mods.base;
+    xde->mods.latched_mods      = ev->mods.latched;
+    xde->mods.locked_mods       = ev->mods.locked;
+    xde->mods.effective_mods    = ev->mods.effective;
+
+    xde->group.base_group       = ev->group.base;
+    xde->group.latched_group    = ev->group.latched;
+    xde->group.locked_group     = ev->group.locked;
+    xde->group.effective_group  = ev->group.effective;
+
+    ptr = (char*)&xde[1];
+    for (i = 0; i < sizeof(ev->buttons) * 8; i++)
+    {
+        if (BitIsOn(ev->buttons, i))
+            SetBit(ptr, i);
+    }
+
+    ptr += xde->buttons_len * 4;
+    axisval = (FP3232*)(ptr + xde->valuators_len * 4);
+    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
+    {
+        if (BitIsOn(ev->valuators.mask, i))
+        {
+            SetBit(ptr, i);
+            axisval->integral = ev->valuators.data[i];
+            axisval->frac = ev->valuators.data_frac[i];
+            axisval++;
+        }
+    }
+
+    return Success;
+}
+
+static int
+eventToRawEvent(RawDeviceEvent *ev, xEvent **xi)
+{
+    xXIRawEvent* raw;
+    int vallen, nvals;
+    int i, len = sizeof(xXIRawEvent);
+    char *ptr;
+    FP3232 *axisval;
+
+    nvals = count_bits(ev->valuators.mask, sizeof(ev->valuators.mask));
+    len += nvals * sizeof(FP3232) * 2; /* 8 byte per valuator, once
+                                    raw, once processed */
+    vallen = bytes_to_int32(bits_to_bytes(MAX_VALUATORS));
+    len += vallen * 4; /* valuators mask */
+
+    *xi = calloc(1, len);
+    raw = (xXIRawEvent*)*xi;
+    raw->type           = GenericEvent;
+    raw->extension      = IReqCode;
+    raw->evtype         = GetXI2Type((InternalEvent*)ev);
+    raw->time           = ev->time;
+    raw->length         = bytes_to_int32(len - sizeof(xEvent));
+    raw->detail         = ev->detail.button;
+    raw->deviceid       = ev->deviceid;
+    raw->valuators_len  = vallen;
+
+    ptr = (char*)&raw[1];
+    axisval = (FP3232*)(ptr + raw->valuators_len * 4);
+    for (i = 0; i < sizeof(ev->valuators.mask) * 8; i++)
+    {
+        if (BitIsOn(ev->valuators.mask, i))
+        {
+            SetBit(ptr, i);
+            axisval->integral = ev->valuators.data[i];
+            axisval->frac = ev->valuators.data_frac[i];
+            (axisval + nvals)->integral = ev->valuators.data_raw[i];
+            (axisval + nvals)->frac = ev->valuators.data_raw_frac[i];
+            axisval++;
+        }
+    }
+
+    return Success;
+}
+
+/**
+ * Return the corresponding core type for the given event or 0 if no core
+ * equivalent exists.
+ */
+int
+GetCoreType(InternalEvent *event)
+{
+    int coretype = 0;
+    switch(event->any.type)
+    {
+        case ET_Motion:         coretype = MotionNotify;  break;
+        case ET_ButtonPress:    coretype = ButtonPress;   break;
+        case ET_ButtonRelease:  coretype = ButtonRelease; break;
+        case ET_KeyPress:       coretype = KeyPress;      break;
+        case ET_KeyRelease:     coretype = KeyRelease;    break;
+        default:
+            break;
+    }
+    return coretype;
+}
+
+/**
+ * Return the corresponding XI 1.x type for the given event or 0 if no
+ * equivalent exists.
+ */
+int
+GetXIType(InternalEvent *event)
+{
+    int xitype = 0;
+    switch(event->any.type)
+    {
+        case ET_Motion:         xitype = DeviceMotionNotify;  break;
+        case ET_ButtonPress:    xitype = DeviceButtonPress;   break;
+        case ET_ButtonRelease:  xitype = DeviceButtonRelease; break;
+        case ET_KeyPress:       xitype = DeviceKeyPress;      break;
+        case ET_KeyRelease:     xitype = DeviceKeyRelease;    break;
+        case ET_ProximityIn:    xitype = ProximityIn;         break;
+        case ET_ProximityOut:   xitype = ProximityOut;        break;
+        default:
+            break;
+    }
+    return xitype;
+}
+
+/**
+ * Return the corresponding XI 2.x type for the given event or 0 if no
+ * equivalent exists.
+ */
+int
+GetXI2Type(InternalEvent *event)
+{
+    int xi2type = 0;
+
+    switch(event->any.type)
+    {
+        case ET_Motion:         xi2type = XI_Motion;           break;
+        case ET_ButtonPress:    xi2type = XI_ButtonPress;      break;
+        case ET_ButtonRelease:  xi2type = XI_ButtonRelease;    break;
+        case ET_KeyPress:       xi2type = XI_KeyPress;         break;
+        case ET_KeyRelease:     xi2type = XI_KeyRelease;       break;
+        case ET_Enter:          xi2type = XI_Enter;            break;
+        case ET_Leave:          xi2type = XI_Leave;            break;
+        case ET_Hierarchy:      xi2type = XI_HierarchyChanged; break;
+        case ET_DeviceChanged:  xi2type = XI_DeviceChanged;    break;
+        case ET_RawKeyPress:    xi2type = XI_RawKeyPress;      break;
+        case ET_RawKeyRelease:  xi2type = XI_RawKeyRelease;    break;
+        case ET_RawButtonPress: xi2type = XI_RawButtonPress;   break;
+        case ET_RawButtonRelease: xi2type = XI_RawButtonRelease; break;
+        case ET_RawMotion:      xi2type = XI_RawMotion;        break;
+        case ET_FocusIn:        xi2type = XI_FocusIn;          break;
+        case ET_FocusOut:       xi2type = XI_FocusOut;         break;
+        default:
+            break;
+    }
+    return xi2type;
+}
diff --git a/xorg-server/test/input.c b/xorg-server/test/input.c
index 879e14f2f..e0e9e6af7 100644
--- a/xorg-server/test/input.c
+++ b/xorg-server/test/input.c
@@ -36,6 +36,7 @@
 #include "inputstr.h"
 #include "eventconvert.h"
 #include "exevents.h"
+#include "exglobals.h"
 #include "dixgrabs.h"
 #include "eventstr.h"
 #include "inpututils.h"
@@ -285,6 +286,143 @@ static void dix_event_to_core_conversion(void)
     dix_event_to_core(ET_Motion);
 }
 
+static void
+_dix_test_xi_convert(DeviceEvent *ev, int expected_rc, int expected_count)
+{
+    xEvent *xi;
+    int count = 0;
+    int rc;
+
+    rc = EventToXI((InternalEvent*)ev, &xi, &count);
+    g_assert(rc == expected_rc);
+    g_assert(count >= expected_count);
+    if (count > 0){
+        deviceKeyButtonPointer *kbp = (deviceKeyButtonPointer*)xi;
+        g_assert(kbp->type == IEventBase + ev->type);
+        g_assert(kbp->detail == ev->detail.key);
+        g_assert(kbp->time == ev->time);
+        g_assert((kbp->deviceid & ~MORE_EVENTS) == ev->deviceid);
+        g_assert(kbp->root_x == ev->root_x);
+        g_assert(kbp->root_y == ev->root_y);
+        g_assert(kbp->state == ev->corestate);
+        g_assert(kbp->event_x == 0);
+        g_assert(kbp->event_y == 0);
+        g_assert(kbp->root == ev->root);
+        g_assert(kbp->event == 0);
+        g_assert(kbp->child == 0);
+        g_assert(kbp->same_screen == FALSE);
+
+        while (--count > 0) {
+            deviceValuator *v = (deviceValuator*)&xi[count];
+            g_assert(v->type == DeviceValuator);
+            g_assert(v->num_valuators <= 6);
+        }
+
+
+        free(xi);
+    }
+}
+
+/**
+ * This tests for internal event → XI1 event conversion
+ * - all conversions should generate the right XI event type
+ * - right number of events generated
+ * - extra events are valuators
+ */
+static void dix_event_to_xi1_conversion(void)
+{
+    DeviceEvent ev = {0};
+    int time;
+    int x, y;
+    int state;
+    int detail;
+    const int ROOT_WINDOW_ID = 0x100;
+    int deviceid;
+
+    IEventBase = 80;
+    DeviceValuator      = IEventBase - 1;
+    DeviceKeyPress      = IEventBase + ET_KeyPress;
+    DeviceKeyRelease    = IEventBase + ET_KeyRelease;
+    DeviceButtonPress   = IEventBase + ET_ButtonPress;
+    DeviceButtonRelease = IEventBase + ET_ButtonRelease;
+    DeviceMotionNotify  = IEventBase + ET_Motion;
+    DeviceFocusIn       = IEventBase + ET_FocusIn;
+    DeviceFocusOut      = IEventBase + ET_FocusOut;
+    ProximityIn         = IEventBase + ET_ProximityIn;
+    ProximityOut        = IEventBase + ET_ProximityOut;
+
+    /* EventToXI callocs */
+    x = 0;
+    y = 0;
+    time = 12345;
+    state = 0;
+    detail = 0;
+    deviceid = 4;
+
+    ev.header   = 0xFF;
+
+    ev.header           = 0xFF;
+    ev.length           = sizeof(DeviceEvent);
+    ev.time             = time;
+    ev.root_y           = x;
+    ev.root_x           = y;
+    SetBit(ev.valuators.mask, 0);
+    SetBit(ev.valuators.mask, 1);
+    ev.root             = ROOT_WINDOW_ID;
+    ev.corestate        = state;
+    ev.detail.key       = detail;
+    ev.deviceid         = deviceid;
+
+    /* test all types for bad match */
+    ev.type = ET_KeyPress;         _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_KeyRelease;       _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ButtonPress;      _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ButtonRelease;    _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_Motion;           _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ProximityIn;      _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ProximityOut;     _dix_test_xi_convert(&ev, Success, 1);
+
+    /* No axes */
+    ClearBit(ev.valuators.mask, 0);
+    ClearBit(ev.valuators.mask, 1);
+    ev.type = ET_KeyPress;         _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_KeyRelease;       _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ButtonPress;      _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_ButtonRelease;    _dix_test_xi_convert(&ev, Success, 1);
+    ev.type = ET_Motion;           _dix_test_xi_convert(&ev, BadMatch, 0);
+    ev.type = ET_ProximityIn;      _dix_test_xi_convert(&ev, BadMatch, 0);
+    ev.type = ET_ProximityOut;     _dix_test_xi_convert(&ev, BadMatch, 0);
+
+    /* more than 6 axes → 2 valuator events */
+    SetBit(ev.valuators.mask, 0);
+    SetBit(ev.valuators.mask, 1);
+    SetBit(ev.valuators.mask, 2);
+    SetBit(ev.valuators.mask, 3);
+    SetBit(ev.valuators.mask, 4);
+    SetBit(ev.valuators.mask, 5);
+    SetBit(ev.valuators.mask, 6);
+    ev.type = ET_KeyPress;         _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_KeyRelease;       _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_ButtonPress;      _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_ButtonRelease;    _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_Motion;           _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_ProximityIn;      _dix_test_xi_convert(&ev, Success, 2);
+    ev.type = ET_ProximityOut;     _dix_test_xi_convert(&ev, Success, 2);
+
+
+    /* keycode too high */
+    ev.type = ET_KeyPress;
+    ev.detail.key = 256;
+    _dix_test_xi_convert(&ev, Success, 0);
+
+    /* deviceid too high */
+    ev.type = ET_KeyPress;
+    ev.detail.key = 18;
+    ev.deviceid = 128;
+    _dix_test_xi_convert(&ev, Success, 0);
+}
+
+
 static void xi2_struct_sizes(void)
 {
 #define compare(req) \
@@ -1070,6 +1208,7 @@ int main(int argc, char** argv)
     g_test_add_func("/dix/input/attributes", dix_input_attributes);
     g_test_add_func("/dix/input/init-valuators", dix_init_valuators);
     g_test_add_func("/dix/input/event-core-conversion", dix_event_to_core_conversion);
+    g_test_add_func("/dix/input/event-xi1-conversion", dix_event_to_xi1_conversion);
     g_test_add_func("/dix/input/check-grab-values", dix_check_grab_values);
     g_test_add_func("/dix/input/xi2-struct-sizes", xi2_struct_sizes);
     g_test_add_func("/dix/input/grab_matching", dix_grab_matching);
-- 
cgit v1.2.3