Imported nx-X11-3.1.0-1.tar.gznx-X11/3.1.0-1

Summary: Imported nx-X11-3.1.0-1.tar.gz Keywords: Imported nx-X11-3.1.0-1.tar.gz into Git repository
author: Reinhard Tartler <siretart@tauware.de> 2011-10-10 17:43:39 +0200
committer: Reinhard Tartler <siretart@tauware.de> 2011-10-10 17:43:39 +0200
commit: f4092abdf94af6a99aff944d6264bc1284e8bdd4 (patch)
tree: 2ac1c9cc16ceb93edb2c4382c088dac5aeafdf0f /nx-X11/extras/Mesa/src/mesa/x86
parent: a840692edc9c6d19cd7c057f68e39c7d95eb767d (diff)
download: nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.tar.gz
nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.tar.bz2
nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.zip
39 files changed, 14248 insertions, 0 deletions
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow.c b/nx-X11/extras/Mesa/src/mesa/x86/3dnow.c
new file mode 100644
index 000000000..d1f827c3a
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow.c
@@ -0,0 +1,92 @@
+/* $Id: 3dnow.c,v 1.1.1.1 2004/06/16 09:19:34 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.0.1
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * 3DNow! optimizations contributed by
+ * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+
+#include "3dnow.h"
+#include "common_x86_macros.h"
+
+#ifdef DEBUG
+#include "math/m_debug.h"
+#endif
+
+
+#ifdef USE_3DNOW_ASM
+DECLARE_XFORM_GROUP( 3dnow, 2 )
+DECLARE_XFORM_GROUP( 3dnow, 3 )
+DECLARE_XFORM_GROUP( 3dnow, 4 )
+
+DECLARE_NORM_GROUP( 3dnow )
+
+
+extern void _ASMAPI
+_mesa_v16_3dnow_general_xform( GLfloat *first_vert,
+			       const GLfloat *m,
+			       const GLfloat *src,
+			       GLuint src_stride,
+			       GLuint count );
+
+extern void _ASMAPI
+_mesa_3dnow_project_vertices( GLfloat *first,
+			      GLfloat *last,
+			      const GLfloat *m,
+			      GLuint stride );
+
+extern void _ASMAPI
+_mesa_3dnow_project_clipped_vertices( GLfloat *first,
+				      GLfloat *last,
+				      const GLfloat *m,
+				      GLuint stride,
+				      const GLubyte *clipmask );
+#endif
+
+
+void _mesa_init_3dnow_transform_asm( void )
+{
+#ifdef USE_3DNOW_ASM
+   ASSIGN_XFORM_GROUP( 3dnow, 2 );
+   ASSIGN_XFORM_GROUP( 3dnow, 3 );
+   ASSIGN_XFORM_GROUP( 3dnow, 4 );
+
+   /* There's a bug somewhere in the 3dnow_normal.S file that causes
+    * bad shading.  Disable for now.
+   ASSIGN_NORM_GROUP( 3dnow );
+   */
+
+#ifdef DEBUG
+   _math_test_all_transform_functions( "3DNow!" );
+   _math_test_all_normal_transform_functions( "3DNow!" );
+#endif
+#endif
+}
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow.h b/nx-X11/extras/Mesa/src/mesa/x86/3dnow.h
new file mode 100644
index 000000000..0ff0888ff
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow.h
@@ -0,0 +1,39 @@
+/* $Id: 3dnow.h,v 1.1.1.1 2004/06/16 09:19:34 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * 3DNow! optimizations contributed by
+ * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ */
+
+#ifndef __3DNOW_H__
+#define __3DNOW_H__
+
+#include "math/m_xform.h"
+
+void _mesa_init_3dnow_transform_asm( void );
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow_normal.S b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_normal.S
new file mode 100644
index 000000000..15fc569bc
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_normal.S
@@ -0,0 +1,848 @@
+/* $Id: 3dnow_normal.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.1
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * 3Dnow assembly code by Holger Waechtler
+ */
+
+#ifdef USE_3DNOW_ASM
+
+#include "matypes.h"
+#include "norm_args.h"
+
+        SEG_TEXT
+
+#define M(i)    REGOFF(i * 4, ECX)
+#define STRIDE  REGOFF(12, ESI)
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
+HIDDEN(_mesa_3dnow_transform_normalize_normals)
+GLNAME(_mesa_3dnow_transform_normalize_normals):
+
+#define FRAME_OFFSET 12
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+
+    MOV_L      ( ARG_LENGTHS, EDI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+
+    CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
+    JE         ( LLBL (G3TN_end) )
+
+    MOV_L      ( REGOFF (V4F_COUNT, ESI), EBP )
+    FEMMS
+
+    PUSH_L     ( EBP )
+    PUSH_L     ( EAX )
+    PUSH_L     ( EDX )                  /*  save counter & pointer for   */
+                                        /*  the normalize pass           */
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 24
+
+    MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
+    MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
+
+    MOVD       ( M(2), MM5 )            /*                  | m2         */
+    PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
+
+    MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
+    MOVQ       ( M(10), MM7 )           /*                  | m10        */
+
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
+    JNE        ( LLBL (G3TN_scale_end ) )
+
+    MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
+    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
+
+    PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
+    PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
+    PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
+    PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
+    PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
+
+ALIGNTEXT32
+LLBL (G3TN_scale_end):
+LLBL (G3TN_transform):
+    MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
+    MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
+
+    MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
+    PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
+
+    PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
+    PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
+
+    PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
+    PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
+
+    MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
+
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
+    MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
+
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
+    PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
+
+    PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
+    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
+    DEC_L      ( EBP )                  /*  decrement normal counter       */
+    JA         ( LLBL (G3TN_transform) )
+
+
+    POP_L      ( EDX )                  /*  end of transform ---           */
+    POP_L      ( EAX )                  /*    now normalizing ...          */
+    POP_L      ( EBP )
+
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
+    JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
+
+
+ALIGNTEXT32
+LLBL (G3TN_norm_w_lengths):
+
+    PREFETCHW  ( REGOFF(12,EAX) )
+
+    MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
+    MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
+
+    MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
+    PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
+
+    PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
+    PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
+
+    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
+    ADD_L      ( CONST(4), EDI )        /*  next length                    */
+
+    PREFETCH   ( REGIND(EDI) )
+
+    MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
+    MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
+
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+    DEC_L      ( EBP )                  /*  decrement normal counter       */
+
+    JA         ( LLBL (G3TN_norm_w_lengths) )
+    JMP        ( LLBL (G3TN_exit_3dnow) )
+
+ALIGNTEXT32
+LLBL (G3TN_norm):
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
+    MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
+
+    MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
+    MOVQ       ( MM1, MM4 )             /*                  | x2           */
+
+    PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
+    ADD_L      ( CONST(16), EAX )       /*  next r                         */
+
+    PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
+    PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
+
+    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
+    PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
+
+    MOVQ       ( MM5, MM4 )
+    PUNPCKLDQ  ( MM3, MM3 )
+
+    DEC_L      ( EBP )                  /*  decrement normal counter       */
+    PFMUL      ( MM5, MM5 )
+
+    PFRSQIT1   ( MM3, MM5 )
+    PFRCPIT2   ( MM4, MM5 )
+
+    PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
+
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
+    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
+
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
+    JA         ( LLBL (G3TN_norm) )
+
+LLBL (G3TN_exit_3dnow):
+    FEMMS
+
+LLBL (G3TN_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
+
+#undef FRAME_OFFSET
+#define FRAME_OFFSET 12
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+
+    MOV_L      ( ARG_LENGTHS, EDI )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+
+    CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
+    JE         ( LLBL (G3TNNR_end) )
+
+    FEMMS
+
+    MOVD       ( M(0), MM0 )            /*               | m0                 */
+    PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
+
+    MOVD       ( M(10), MM2 )           /*               | m10                */
+    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
+
+    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
+    JNE        ( LLBL (G3TNNR_scale_end ) )
+
+    MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
+    PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
+
+    PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
+    PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
+
+ALIGNTEXT32
+LLBL (G3TNNR_scale_end):
+    CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
+    JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
+
+    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
+
+
+ALIGNTEXT32
+LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
+
+    PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX )          /* next normal                        */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
+    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
+
+    ADD_L      ( CONST(4), EDI )        /* next length                        */
+    PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
+
+    DEC_L      ( EBP )                  /* decrement normal counter           */
+    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
+
+    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
+    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
+
+    JA         ( LLBL (G3TNNR_norm_w_lengths) )
+    JMP        ( LLBL (G3TNNR_exit_3dnow) )
+
+ALIGNTEXT32
+LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
+
+    PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
+    MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
+
+    MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
+    PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
+
+
+    PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
+    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
+
+    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
+    ADD_L      ( STRIDE, EDX )          /* next normal            */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
+    MOVQ       ( MM5, MM4 )
+
+    PUNPCKLDQ  ( MM3, MM3 )
+    PFMUL      ( MM5, MM5 )
+
+    PFRSQIT1   ( MM3, MM5 )
+    DEC_L      ( EBP )                  /* decrement normal counter           */
+
+    PFRCPIT2   ( MM4, MM5 )
+    PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
+
+    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
+
+    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
+    JA         ( LLBL (G3TNNR_norm) )
+
+
+LLBL (G3TNNR_exit_3dnow):
+    FEMMS
+
+LLBL (G3TNNR_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
+
+#undef FRAME_OFFSET
+#define FRAME_OFFSET 12
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+
+    MOV_L      ( ARG_IN, EAX )
+    MOV_L      ( ARG_DEST, EDX )
+    MOV_L      ( REGOFF(V4F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EDX) )
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+    MOV_L      ( REGOFF(V4F_START, EDX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+
+    CMP_L      ( CONST(0), EBP )
+    JE         ( LLBL (G3TRNR_end) )
+
+    FEMMS
+
+    MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
+    PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
+
+    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
+    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
+
+    PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
+    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
+
+    PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
+
+ALIGNTEXT32
+LLBL (G3TRNR_rescale):
+
+    PREFETCHW  ( REGIND(EAX) )
+	
+    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
+	
+    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX )          /* next normal                        */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    DEC_L      ( EBP )                  /* decrement normal counter           */
+    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
+
+    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
+    JA         ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
+
+    FEMMS
+
+LLBL (G3TRNR_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
+HIDDEN(_mesa_3dnow_transform_rescale_normals)
+GLNAME(_mesa_3dnow_transform_rescale_normals):
+
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+
+    CMP_L      ( CONST(0), EDI )
+    JE         ( LLBL (G3TR_end) )
+
+    FEMMS
+
+    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
+
+    MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
+    MOVD       ( ARG_SCALE, MM0 )       /* scale       */
+
+    MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
+    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
+
+    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
+    PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
+
+    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
+    PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
+
+    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
+    PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
+
+    PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
+
+    PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
+
+ALIGNTEXT32
+LLBL (G3TR_rescale):
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+
+    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
+    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
+
+    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
+    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
+
+    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
+
+    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
+    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
+
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+    ADD_L      ( STRIDE, EDX )          /* next normal                    */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
+
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
+    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
+
+    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
+
+    DEC_L      ( EDI )                  /* decrement normal counter           */
+    JA         ( LLBL (G3TR_rescale) )
+
+    FEMMS
+
+LLBL (G3TR_end):
+    POP_L       ( ESI )
+    POP_L       ( EDI )
+    RET
+
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
+HIDDEN(_mesa_3dnow_transform_normals_no_rot)
+GLNAME(_mesa_3dnow_transform_normals_no_rot):
+
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+
+    CMP_L      ( CONST(0), EDI )
+    JE         ( LLBL (G3TNR_end) )
+
+    FEMMS
+
+    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
+    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
+
+    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
+    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
+
+ALIGNTEXT32
+LLBL (G3TNR_transform):
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
+
+    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
+    ADD_L      ( STRIDE, EDX)           /* next normal      */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    DEC_L      ( EDI )                  /* decrement normal counter           */
+    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
+
+    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
+    JA         ( LLBL (G3TNR_transform) )
+
+    FEMMS
+
+LLBL (G3TNR_end):
+    POP_L       ( ESI )
+    POP_L       ( EDI )
+    RET
+
+
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_transform_normals)
+HIDDEN(_mesa_3dnow_transform_normals)
+GLNAME(_mesa_3dnow_transform_normals):
+
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( ARG_MAT, ECX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
+    MOV_L      ( EDI, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), EDX ) /*  in->start    */
+    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
+
+    CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
+    JE         ( LLBL (G3T_end) )
+
+    FEMMS
+
+    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
+    MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
+
+    MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
+    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
+
+    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
+    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
+
+ALIGNTEXT32
+LLBL (G3T_transform):
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+
+    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
+    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
+
+    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
+    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
+
+    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
+    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
+
+    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
+
+    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
+    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
+
+    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
+    ADD_L      ( STRIDE, EDX )          /* next normal               */
+
+    PREFETCH   ( REGIND(EDX) )
+
+    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
+    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
+
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
+    DEC_L      ( EDI )                  /* decrement normal counter           */
+
+    JA         ( LLBL (G3T_transform) )
+
+    FEMMS
+
+LLBL (G3T_end):
+    POP_L  ( ESI )
+    POP_L  ( EDI )
+    RET
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_normalize_normals)
+HIDDEN(_mesa_3dnow_normalize_normals)
+GLNAME(_mesa_3dnow_normalize_normals):
+
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 12
+
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EBP )
+
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
+    MOV_L      ( EBP, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
+    MOV_L      ( ARG_LENGTHS, EDX )
+
+    CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
+    JE         ( LLBL (G3N_end) )
+
+    FEMMS
+
+    CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
+    JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
+
+ALIGNTEXT32
+LLBL (G3N_norm1):                       /* use precalculated lengths          */
+
+    PREFETCH   ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
+
+    MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
+    PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
+
+    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
+    ADD_L      ( STRIDE, ECX )          /* next normal            */
+
+    PREFETCH   ( REGIND(ECX) )
+
+    PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
+    MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
+
+    MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    ADD_L      ( CONST(4), EDX )        /* next length                        */
+    DEC_L      ( EBP )                  /* decrement normal counter           */
+
+    JA         ( LLBL (G3N_norm1) )
+
+    JMP        ( LLBL (G3N_end1) )
+
+ALIGNTEXT32
+LLBL (G3N_norm2):                       /* need to calculate lengths          */
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    PREFETCH   ( REGIND(ECX) )
+
+    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
+    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
+
+    MOVQ       ( MM0, MM3 )             /* x1              | x0               */
+    ADD_L      ( STRIDE, ECX )          /* next normal    */
+
+    PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
+    MOVQ       ( MM1, MM4 )             /*                 | x2               */
+
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+    PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
+
+    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
+    PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
+
+    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
+    MOVQ       ( MM5, MM4 )
+
+    PUNPCKLDQ  ( MM3, MM3 )
+    PFMUL      ( MM5, MM5 )
+
+    PFRSQIT1   ( MM3, MM5 )
+    DEC_L      ( EBP )                  /* decrement normal counter           */
+
+    PFRCPIT2   ( MM4, MM5 )
+
+    PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
+    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
+
+    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
+    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
+
+    JA         ( LLBL (G3N_norm2) )
+
+LLBL (G3N_end1):
+    FEMMS
+
+LLBL (G3N_end):
+    POP_L      ( EBP )
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_3dnow_rescale_normals)
+HIDDEN(_mesa_3dnow_rescale_normals)
+GLNAME(_mesa_3dnow_rescale_normals):
+
+#undef  FRAME_OFFSET
+#define FRAME_OFFSET 8
+    PUSH_L     ( EDI )
+    PUSH_L     ( ESI )
+
+    MOV_L      ( ARG_IN, ESI )
+    MOV_L      ( ARG_DEST, EAX )
+    MOV_L      ( REGOFF(V4F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
+    MOV_L      ( EDX, REGOFF(V4F_COUNT, EAX) )
+    MOV_L      ( REGOFF(V4F_START, EAX), EAX ) /*  dest->start  */
+    MOV_L      ( REGOFF(V4F_START, ESI), ECX ) /*  in->start    */
+
+    CMP_L      ( CONST(0), EDX )
+    JE         ( LLBL (G3R_end) )
+
+    FEMMS
+
+    MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
+    PUNPCKLDQ  ( MM0, MM0 )
+
+ALIGNTEXT32
+LLBL (G3R_rescale):
+
+    PREFETCHW  ( REGIND(EAX) )
+
+    MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
+    MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
+
+    PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
+    ADD_L      ( STRIDE, ECX )          /* next normal                  */
+
+    PREFETCH   ( REGIND(ECX) )
+
+    PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
+    ADD_L      ( CONST(16), EAX )       /* next r                             */
+
+    MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
+    MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
+
+    DEC_L      ( EDX )                  /* decrement normal counter           */
+    JA         ( LLBL (G3R_rescale) )
+
+    FEMMS
+
+LLBL (G3R_end):
+    POP_L      ( ESI )
+    POP_L      ( EDI )
+    RET
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform1.S b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform1.S
new file mode 100644
index 000000000..f1e4c7b67
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform1.S
@@ -0,0 +1,433 @@
+/* $Id: 3dnow_xform1.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef USE_3DNOW_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+    SEG_TEXT
+
+#define FRAME_OFFSET	4
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
+HIDDEN(_mesa_3dnow_transform_points1_general)
+GLNAME( _mesa_3dnow_transform_points1_general ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_3 ) )
+
+    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
+    MOVQ      ( REGOFF(8, ECX), MM1 )	/* m03             | m02             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+    MOVQ      ( REGOFF(56, ECX), MM3 )	/* m33             | m32             */
+
+ALIGNTEXT16
+LLBL( G3TPGR_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
+
+    MOVQ      ( MM4, MM5 )		/* x0              | x0              */
+    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
+
+    PFMUL     ( MM1, MM5 )		/* x0*m03          | x0*m02          */
+    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
+
+    PFADD     ( MM3, MM5 )		/* x0*m03+m33      | x0*m02+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+
+    MOVQ      ( MM5, REGOFF(8, EDX) )	/* write r3, r2                      */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPGR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
+HIDDEN(_mesa_3dnow_transform_points1_identity)
+GLNAME( _mesa_3dnow_transform_points1_identity ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(1), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_4) )
+
+ALIGNTEXT16
+LLBL( G3TPIR_3 ):
+
+    MOVD      ( REGIND(EAX), MM0 )	/*                 | x0              */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    MOVD      ( MM0, REGIND(EDX) )	/*                 | r0              */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPIR_4 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points1_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TP3NRR_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    PFMUL     ( MM0, MM4 )		/*                 | x0*m00          */
+
+    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+
+    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3NRR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
+HIDDEN(_mesa_3dnow_transform_points1_perspective)
+GLNAME( _mesa_3dnow_transform_points1_perspective ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TPPR_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/* 0               | x0              */
+    PFMUL     ( MM0, MM4 )		/* 0               | x0*m00          */
+
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPPR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
+HIDDEN(_mesa_3dnow_transform_points1_2d)
+GLNAME( _mesa_3dnow_transform_points1_2d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3 ) )
+
+    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
+
+    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
+    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
+
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2R_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points1_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    PFMUL     ( MM0, MM4 )		/*                 | x0*m00          */
+    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
+
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2NRR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
+HIDDEN(_mesa_3dnow_transform_points1_3d)
+GLNAME( _mesa_3dnow_transform_points1_3d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(4, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_3 ) )
+
+    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
+    MOVD      ( REGOFF(8, ECX), MM1 )	/*                 | m02             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TP3R_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
+
+    MOVQ      ( MM4, MM5 )		/*                 | x0              */
+    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
+
+    PFMUL     ( MM1, MM5 )		/*                 | x0*m02          */
+    PFADD     ( MM2, MM4 )		/* x0*m01+m31      | x0*m00+m30      */
+
+    PFADD     ( MM3, MM5 )		/*                 | x0*m02+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+
+    MOVD      ( MM5, REGOFF(8, EDX) )	/* write r2                          */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3R_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform2.S b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform2.S
new file mode 100644
index 000000000..49a602b83
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform2.S
@@ -0,0 +1,473 @@
+/* $Id: 3dnow_xform2.S,v 1.1.1.2 2005/07/31 16:46:38 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef USE_3DNOW_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+    SEG_TEXT
+
+#define FRAME_OFFSET	4
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
+HIDDEN(_mesa_3dnow_transform_points2_general)
+GLNAME( _mesa_3dnow_transform_points2_general ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
+
+    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
+
+    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
+
+    MOVD      ( REGOFF(12, ECX), MM3 )	/*                 | m03             */
+    PUNPCKLDQ ( REGOFF(28, ECX), MM3 )	/* m13             | m03             */
+
+    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
+    MOVQ      ( REGOFF(56, ECX), MM5 )	/* m33             | m32             */
+
+ALIGNTEXT16
+LLBL( G3TPGR_2 ):
+
+    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
+    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
+
+    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
+    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
+
+    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
+    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
+
+    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
+    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
+
+    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
+    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
+
+    PFMUL     ( MM3, MM7 )		/* x1*m13          | x0*m03          */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    PFACC     ( MM7, MM6 )		/* x0*m03+x1*m13   | x0*x02+x1*m12   */
+    PFADD     ( MM5, MM6 )		/* x0*...*m13+m33  | x0*...*m12+m32  */
+
+    MOVQ      ( MM6, REGOFF(8, EDX) )	/* write r3, r2                      */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPGR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
+HIDDEN(_mesa_3dnow_transform_points2_perspective)
+GLNAME( _mesa_3dnow_transform_points2_perspective ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TPPR_2 ):
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPPR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
+HIDDEN(_mesa_3dnow_transform_points2_3d)
+GLNAME( _mesa_3dnow_transform_points2_3d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
+
+    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
+
+    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
+
+    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM5 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TP3R_2 ):
+
+    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
+    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
+
+    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
+    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
+
+    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
+    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
+
+    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
+    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
+
+    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
+    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
+
+    PFACC     ( MM7, MM6 )		/* ***trash***     | x0*x02+x1*m12   */
+    PFADD     ( MM5, MM6 )		/* ***trash***     | x0*...*m12+m32  */
+
+    MOVD      ( MM6, REGOFF(8, EDX) )	/* write r2                          */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3R_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TP3NRR_2 ):
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+
+    PFADD     ( MM2, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+
+    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3NRR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
+HIDDEN(_mesa_3dnow_transform_points2_2d)
+GLNAME( _mesa_3dnow_transform_points2_2d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3 ) )
+
+    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
+    MOVQ      ( REGOFF(16, ECX), MM1 )	/* m11             | m10             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+
+    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
+    MOVD      ( REGOFF(4, EAX), MM5 )	/*                 | x1              */
+
+    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
+    PUNPCKLDQ ( MM5, MM5 )		/* x1              | x1              */
+
+    PFMUL     ( MM1, MM5 )		/* x1*m11          | x1*m10          */
+    PFADD     ( MM2, MM4 )		/* x...x1*m11+31   | x0*..*m10+m30   */
+
+    PFADD     ( MM5, MM4 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2R_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
+
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2NRR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
+HIDDEN(_mesa_3dnow_transform_points2_identity)
+GLNAME( _mesa_3dnow_transform_points2_identity ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_3 ) )
+
+ALIGNTEXT16
+LLBL( G3TPIR_3 ):
+
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+
+    MOVQ      ( MM0, REGIND(EDX) )	/* r1              | r0              */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPIR_4 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform3.S b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform3.S
new file mode 100644
index 000000000..d9fac6612
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform3.S
@@ -0,0 +1,557 @@
+/* $Id: 3dnow_xform3.S,v 1.1.1.2 2005/07/31 16:46:40 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef USE_3DNOW_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+    SEG_TEXT
+
+#define FRAME_OFFSET	4
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
+HIDDEN(_mesa_3dnow_transform_points3_general)
+GLNAME( _mesa_3dnow_transform_points3_general ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_2 ) )
+
+    PREFETCHW ( REGIND(EDX) )
+
+ALIGNTEXT16
+LLBL( G3TPGR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM2 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
+    PUNPCKLDQ ( MM2, MM2 )		/* x2              | x2              */
+
+    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
+    MOVQ      ( MM2, MM5 )		/* x2              | x2              */
+
+    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
+    PFMUL     ( REGOFF(32, ECX), MM2 )	/* x2*m9           | x2*m8           */
+
+    MOVQ      ( MM0, MM3 )		/* x0              | x0              */
+    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
+
+    MOVQ      ( MM1, MM4 )		/* x1              | x1              */
+    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
+
+    PFADD     ( REGOFF(48, ECX), MM2 )	/* x2*m9+m13       | x2*m8+m12       */
+    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
+
+    PFADD     ( REGOFF(56, ECX), MM5 )	/* x2*m11+m15      | x2*m10+m14      */
+    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
+
+    PFMUL     ( REGOFF(8, ECX), MM3 )	/* x0*m3           | x0*m2           */
+    PFADD     ( MM1, MM2 )		/* r1              | r0              */
+
+    PFMUL     ( REGOFF(24, ECX), MM4 )	/* x1*m7           | x1*m6           */
+    ADD_L     ( CONST(16), EDX )	/* next output vertex                */
+
+    PFADD     ( MM3, MM4 )		/* x0*m3+x1*m7     | x0*m2+x1*m6     */
+    MOVQ      ( MM2, REGOFF(-16, EDX) )	/* write r0, r1                      */
+
+    PFADD     ( MM4, MM5 )		/* r3              | r2              */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPGR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
+HIDDEN(_mesa_3dnow_transform_points3_perspective)
+GLNAME( _mesa_3dnow_transform_points3_perspective ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_2 ) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVQ      ( REGOFF(32, ECX), MM1 )	/* m21             | m20             */
+    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
+
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+ALIGNTEXT16
+LLBL( G3TPPR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    PXOR      ( MM7, MM7 )		/* 0               | 0               */
+    MOVQ      ( MM5, MM6 )		/*                 | x2              */
+
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+    PFSUB     ( MM5, MM7 )		/*                 | -x2             */
+
+    PFMUL     ( MM2, MM6 )		/*                 | x2*m22          */
+    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    PFMUL     ( MM1, MM5 )		/* x2*m21          | x2*m20          */
+
+    PFADD     ( MM3, MM6 )		/*                 | x2*m22+m32      */
+    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
+
+    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    MOVD      ( MM6, REGOFF(-8, EDX) )	/* write r2                          */
+
+    MOVD      ( MM7, REGOFF(-4, EDX) )	/* write r3                          */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPPR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
+HIDDEN(_mesa_3dnow_transform_points3_3d)
+GLNAME( _mesa_3dnow_transform_points3_3d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_2 ) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCH  ( REGIND(EDX) )
+
+    MOVD      ( REGOFF(8, ECX), MM7 )	/*                 | m2              */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM7 )	/* m6              | m2              */
+
+
+ALIGNTEXT16
+LLBL( G3TP3R_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PUNPCKLDQ ( MM2, MM2 )		/* x0              | x0              */
+    MOVQ      ( MM0, MM3 )		/* x1              | x0              */
+
+    PFMUL     ( REGIND(ECX), MM2 )	/* x0*m1           | x0*m0           */
+    PUNPCKHDQ ( MM3, MM3 )		/* x1              | x1              */
+
+    MOVQ      ( MM1, MM4 )		/*                 | x2              */
+    PFMUL     ( REGOFF(16, ECX), MM3 )	/* x1*m5           | x1*m4           */
+
+    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
+    PFADD     ( MM2, MM3 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
+
+    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
+    PFADD     ( REGOFF(48, ECX), MM3 )	/* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
+
+    PFMUL     ( MM7, MM0 )		/* x1*m6           | x0*m2           */
+    PFADD     ( MM4, MM3 )		/* r1              | r0              */
+
+    PFMUL     ( REGOFF(40, ECX), MM1 )	/*                 | x2*m10          */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m14             | x2*m10          */
+
+    PFACC     ( MM0, MM1 )
+
+    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    PFACC     ( MM1, MM1 )		/*                 | r2              */
+
+    MOVD      ( MM1, REGOFF(-8, EDX) )	/* write r2                          */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3R_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_2 ) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
+    PUNPCKLDQ ( MM2, MM2 )		/* m22             | m22             */
+
+    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
+    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
+
+    PUNPCKLDQ ( MM3, MM3 )		/* m32             | m32             */
+
+
+ALIGNTEXT16
+LLBL( G3TP3NRR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCHW ( REGIND(EAX) )
+	
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+
+    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
+    PFMUL     ( MM2, MM5 )		/*                 | x2*m22          */
+
+    PFADD     ( MM3, MM5 )		/*                 | x2*m22+m32      */
+    MOVQ      ( MM4, REGIND(EDX) )	/* write r0, r1                      */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2                          */
+    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3NRR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
+HIDDEN(_mesa_3dnow_transform_points3_2d)
+GLNAME( _mesa_3dnow_transform_points3_2d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_3) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
+
+    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2R_2 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
+    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
+
+    PFACC     ( MM4, MM3 )		/* x0*m00+x1*m10   | x0*m01+x1*m11   */
+    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
+
+    PFADD     ( MM2, MM3 )		/* x0*...*m10+m30  | x0*...*m11+m31  */
+    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2R_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_2 ) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
+
+
+ALIGNTEXT16
+LLBL( G3TP2NRR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
+
+    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2NRR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
+HIDDEN(_mesa_3dnow_transform_points3_identity)
+GLNAME( _mesa_3dnow_transform_points3_identity ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_2 ) )
+
+    PREFETCHW ( REGIND(EDX) )
+
+ALIGNTEXT16
+LLBL( G3TPIR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )
+
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
+    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
+
+    MOVD      ( MM1, REGOFF(-8, EDX) )	/*                 | r2              */
+    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPIR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform4.S b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform4.S
new file mode 100644
index 000000000..4df8ae8e2
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/3dnow_xform4.S
@@ -0,0 +1,566 @@
+/* $Id: 3dnow_xform4.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef USE_3DNOW_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+    SEG_TEXT
+
+#define FRAME_OFFSET	4
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
+HIDDEN(_mesa_3dnow_transform_points4_general)
+GLNAME( _mesa_3dnow_transform_points4_general ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPGR_2 ) )
+
+    PREFETCHW ( REGIND(EDX) )
+
+ALIGNTEXT16
+LLBL( G3TPGR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1            | x0                */
+    MOVQ      ( REGOFF(8, EAX), MM4 )	/* x3            | x2                */
+	
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+	
+    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
+    MOVQ      ( MM4, MM6 )		/* x3              | x2              */
+
+    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
+    PUNPCKHDQ ( MM2, MM2 )		/* x1              | x1              */
+
+    MOVQ      ( MM0, MM1 )		/* x0              | x0              */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
+    MOVQ      ( MM2, MM3 )		/* x1              | x1              */
+
+    PFMUL     ( REGOFF(8, ECX), MM1 )	/* x0*m3           | x0*m2           */
+    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
+
+    PFMUL     ( REGOFF(16, ECX), MM2 )	/* x1*m5           | x1*m4           */
+    MOVQ      ( MM4, MM5 )		/* x2              | x2              */
+
+    PFMUL     ( REGOFF(24, ECX), MM3 )	/* x1*m7           | x1*m6           */
+    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
+
+    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
+    MOVQ      ( MM6, MM7 )		/* x3              | x3              */
+
+    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
+    PFADD     ( MM0, MM2 )
+
+    PFMUL     ( REGOFF(48, ECX), MM6 )	/* x3*m13          | x3*m12          */
+    PFADD     ( MM1, MM3 )
+
+    PFMUL     ( REGOFF(56, ECX), MM7 )	/* x3*m15          | x3*m14          */
+    PFADD     ( MM4, MM6 )
+
+    PFADD     ( MM5, MM7 )
+    PFADD     ( MM2, MM6 )
+
+    PFADD     ( MM3, MM7 )
+    MOVQ      ( MM6, REGOFF(-16, EDX) )
+
+    MOVQ      ( MM7, REGOFF(-8, EDX) )
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPGR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
+HIDDEN(_mesa_3dnow_transform_points4_perspective)
+GLNAME( _mesa_3dnow_transform_points4_perspective ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPPR_2 ) )
+
+    PREFETCH  ( REGIND(EAX) )
+    PREFETCHW ( REGIND(EDX) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVD      ( REGOFF(40, ECX), MM1 )	/*                 | m22             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m32             | m22             */
+
+    MOVQ      ( REGOFF(32, ECX), MM2 )	/* m21             | m20             */
+    PXOR      ( MM7, MM7 )		/* 0               | 0               */
+
+ALIGNTEXT16
+LLBL( G3TPPR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
+    MOVD      ( REGOFF(8, EAX), MM3 )	/*                 | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
+
+    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+
+    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PFMUL     ( MM2, MM5 )		/* x2*m21          | x2*m20          */
+    PFSUBR    ( MM7, MM3 )		/*                 | -x2             */
+
+    PFMUL     ( MM1, MM6 )		/* x3*m32          | x2*m22          */
+    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
+
+    PFACC     ( MM3, MM6 )		/* -x2             | x2*m22+x3*m32   */
+    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
+
+    MOVQ      ( MM6, REGOFF(-8, EDX) )	/* write r2, r3                      */
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPPR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
+HIDDEN(_mesa_3dnow_transform_points4_3d)
+GLNAME( _mesa_3dnow_transform_points4_3d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3R_2 ) )
+
+    MOVD      ( REGOFF(8, ECX), MM6 )	/*                 | m2              */
+    PUNPCKLDQ ( REGOFF(24, ECX), MM6 )	/* m6              | m2              */
+
+    MOVD      ( REGOFF(40, ECX), MM7 )	/*                 | m10             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM7 )	/* m14             | m10             */
+
+ALIGNTEXT16
+LLBL( G3TP3R_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully array is tightly packed */
+
+    MOVQ      ( REGIND(EAX), MM2 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM3 )	/* x3              | x2              */
+
+    MOVQ      ( MM2, MM0 )		/* x1              | x0              */
+    MOVQ      ( MM3, MM4 )		/* x3              | x2              */
+
+    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
+    MOVQ      ( MM4, MM5 )		/* x3              | x2              */
+
+    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
+    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
+
+    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
+    PUNPCKLDQ ( MM3, MM3 )		/* x2              | x2              */
+
+    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
+    PUNPCKHDQ ( MM4, MM4 )		/* x3              | x3              */
+
+    PFMUL     ( MM6, MM2 )		/* x1*m6           | x0*m2           */
+    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
+
+    PFMUL     ( REGOFF(32, ECX), MM3 )	/* x2*m9           | x2*m8           */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PFMUL     ( REGOFF(48, ECX), MM4 )	/* x3*m13          | x3*m12          */
+    PFADD     ( MM1, MM3 )		/* x0*m1+..+x2*m9  | x0*m0+...+x2*m8 */
+
+    PFMUL     ( MM7, MM5 )		/* x3*m14          | x2*m10          */
+    PFADD     ( MM3, MM4 )		/* r1              | r0              */
+
+    PFACC     ( MM2, MM5 )		/* x0*m2+x1*m6     | x2*m10+x3*m14   */
+    MOVD      ( REGOFF(12, EAX), MM0 )	/*                 | x3              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PFACC     ( MM0, MM5 )		/* r3              | r2              */
+
+    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3R_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
+GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
+
+    PUSH_L    ( ESI )
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP3NRR_2 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
+    PUNPCKLDQ ( REGOFF(56, ECX), MM2 )	/* m32             | m22             */
+
+    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP3NRR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
+    MOVD      ( REGOFF(12, EAX), MM7 )	/*                 | x3              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGOFF(32, EAX) )	/* hopefully stride is zero          */
+
+    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+
+    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
+    PFMUL     ( MM2, MM5 )		/* x3*m32          | x2*m22          */
+
+    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
+    PFACC     ( MM7, MM5 )		/* x3              | x2*m22+x3*m32   */
+
+    PFADD     ( MM6, MM4 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP3NRR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
+HIDDEN(_mesa_3dnow_transform_points4_2d)
+GLNAME( _mesa_3dnow_transform_points4_2d ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2R_2 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
+
+    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
+
+    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2R_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
+    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
+
+    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
+    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
+
+    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+
+    PFACC     ( MM4, MM3 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
+    PFMUL     ( MM2, MM6 )		/* x3*m31          | x3*m30          */
+
+    PFADD     ( MM6, MM3 )		/* r1              | r0              */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
+
+    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TP2R_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2R_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
+HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
+GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TP2NRR_3 ) )
+
+    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
+    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
+
+    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
+
+ALIGNTEXT16
+LLBL( G3TP2NRR_2 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
+
+    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM5 )	/* x3              | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
+    MOVQ      ( MM5, MM6 )		/* x3              | x2              */
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    PUNPCKHDQ ( MM6, MM6 )		/* x3              | x3              */
+
+    PFMUL     ( MM1, MM6 )		/* x3*m31          | x3*m30          */
+    PFADD     ( MM4, MM6 )		/* x1*m11+x3*m31   | x0*m00+x3*m30   */
+
+    MOVQ      ( MM6, REGOFF(-16, EDX) )	/* write r0, r1                      */
+    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+
+    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TP2NRR_3 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
+HIDDEN(_mesa_3dnow_transform_points4_identity)
+GLNAME( _mesa_3dnow_transform_points4_identity ):
+
+    PUSH_L    ( ESI )
+
+    MOV_L     ( ARG_DEST, ECX )
+    MOV_L     ( ARG_MATRIX, ESI )
+    MOV_L     ( ARG_SOURCE, EAX )
+    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
+    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
+    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
+
+    PUSH_L    ( EDI )
+
+    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
+    MOV_L     ( ESI, ECX )
+    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
+    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
+    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
+
+    TEST_L    ( ESI, ESI )
+    JZ        ( LLBL( G3TPIR_2 ) )
+
+ALIGNTEXT16
+LLBL( G3TPIR_1 ):
+
+    PREFETCHW ( REGOFF(32, EDX) )       /* prefetch 2 vertices ahead         */
+	
+    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
+    MOVQ      ( REGOFF(8, EAX), MM1 )	/* x3              | x2              */
+
+    ADD_L     ( EDI, EAX )		/* next vertex                       */
+    PREFETCH  ( REGIND(EAX) )
+
+    ADD_L     ( CONST(16), EDX )	/* next r                            */
+    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
+
+    MOVQ      ( MM1, REGOFF(-8, EDX) )	/* r3              | r2              */
+
+    DEC_L     ( ESI )			/* decrement vertex counter          */
+    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
+
+LLBL( G3TPIR_2 ):
+
+    FEMMS
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/Makefile b/nx-X11/extras/Mesa/src/mesa/x86/Makefile
new file mode 100644
index 000000000..3c6a6b11c
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/Makefile
@@ -0,0 +1,46 @@
+# src/mesa/x86/Makefile
+
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include/GL \
+	-I$(TOP)/include \
+	-I.. \
+	-I../main \
+	-I../math \
+	-I../glapi \
+	-I../tnl
+
+
+default: gen_matypes matypes.h
+
+clean:
+	rm -f matypes.h gen_matypes
+
+
+gen_matypes: gen_matypes.c
+	$(CC) $(INCLUDE_DIRS) $(CFLAGS) gen_matypes.c -o gen_matypes
+
+# need some special rules here, unfortunately
+matypes.h: ../main/mtypes.h ../tnl/t_context.h gen_matypes
+	./gen_matypes > matypes.h
+
+common_x86_asm.o: matypes.h
+3dnow_normal.o: matypes.h
+3dnow_xform1.o: matypes.h
+3dnow_xform2.o: matypes.h
+3dnow_xform3.o: matypes.h
+3dnow_xform4.o: matypes.h
+mmx_blend.o: matypes.h
+sse_normal.o: matypes.h
+sse_xform1.o: matypes.h
+sse_xform2.o: matypes.h
+sse_xform3.o: matypes.h
+sse_xform4.o: matypes.h
+x86_cliptest.o: matypes.h
+x86_xform2.o: matypes.h
+x86_xform3.o: matypes.h
+x86_xform4.o: matypes.h
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/assyntax.h b/nx-X11/extras/Mesa/src/mesa/x86/assyntax.h
new file mode 100644
index 000000000..9bceacebd
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/assyntax.h
@@ -0,0 +1,1746 @@
+
+#ifndef __ASSYNTAX_H__
+#define __ASSYNTAX_H__
+
+/*
+ * Copyright 1992 Vrije Universiteit, The Netherlands
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the Vrije Universiteit not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  The Vrije Universiteit makes no
+ * representations about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ * The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
+ * IN NO EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL,
+ * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * assyntax.h
+ *
+ * Select the syntax appropriate to the 386 assembler being used
+ * To add support for more assemblers add more columns to the CHOICE
+ * macro.  Note that register names must also have uppercase names
+ * to avoid macro recursion. e.g., #define ah %ah recurses!
+ *
+ * NB 1.  Some of the macros for certain assemblers imply that the code is to
+ *	  run in protected mode!!  Caveat emptor.
+ *
+ * NB 2.  486 specific instructions are not included.  This is to discourage
+ *	  their accidental use in code that is intended to run on 386 and 486
+ *	  systems.
+ *
+ * Supported assemblers:
+ *
+ * (a) AT&T SysVr4 as(1):	define ATT_ASSEMBLER
+ * (b) GNU Assembler gas:	define GNU_ASSEMBLER (default)
+ * (c) Amsterdam Compiler kit:	define ACK_ASSEMBLER
+ * (d) The Netwide Assembler:	define NASM_ASSEMBLER
+ * (e) Microsoft Assembler:	define MASM_ASSEMBLER (UNTESTED!)
+ *
+ * The following naming conventions have been used to identify the various
+ * data types:
+ *		_SR = segment register version
+ *	Integer:
+ *		_Q = quadword	= 64 bits
+ *		_L = long	= 32 bits
+ *		_W = short	= 16 bits
+ *		_B = byte	=  8 bits
+ *	Floating-point:
+ *		_X = m80real	= 80 bits
+ *		_D = double	= 64 bits
+ *		_S = single	= 32 bits
+ *
+ * Author: Gregory J. Sharp, Sept 1992
+ *         Vrije Universiteit, Amsterdam, The Netherlands
+ *
+ *         [support for Intel syntax added by Josh Vanderhoof, 1999]
+ */
+
+#if !(defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER))
+
+/* Default to ATT_ASSEMBLER when SVR4 or SYSV are defined */
+#if (defined(SVR4) || defined(SYSV)) && !defined(GNU_ASSEMBLER)
+#define ATT_ASSEMBLER
+#endif
+
+#if !defined(ATT_ASSEMBLER) && !defined(GNU_ASSEMBLER) && !defined(ACK_ASSEMBLER)
+#define GNU_ASSEMBLER
+#endif
+
+#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__))
+#define CONCAT(x, y)		x ## y
+#define CONCAT3(x, y, z)	x ## y ## z
+#else
+#define CONCAT(x, y)		x/**/y
+#define CONCAT3(x, y, z)	x/**/y/**/z
+#endif
+
+#ifdef ACK_ASSEMBLER
+
+/* Assume we write code for 32-bit protected mode! */
+
+/* Redefine register names for GAS & AT&T assemblers */
+#define AL		al
+#define AH		ah
+#define AX		ax
+#define EAX		ax
+#define BL		bl
+#define BH		bh
+#define BX		bx
+#define EBX		bx
+#define CL		cl
+#define CH		ch
+#define CX		cx
+#define ECX		cx
+#define DL		dl
+#define DH		dh
+#define DX		dx
+#define EDX		dx
+#define BP		bp
+#define EBP		bp
+#define SI		si
+#define ESI		si
+#define DI		di
+#define EDI		di
+#define SP		sp
+#define ESP		sp
+#define CS		cs
+#define SS		ss
+#define DS		ds
+#define ES		es
+#define FS		fs
+#define GS		gs
+/* Control Registers */
+#define CR0		cr0
+#define CR1		cr1
+#define CR2		cr2
+#define CR3		cr3
+/* Debug Registers */
+#define DR0		dr0
+#define DR1		dr1
+#define DR2		dr2
+#define DR3		dr3
+#define DR4		dr4
+#define DR5		dr5
+#define DR6		dr6
+#define DR7		dr7
+/* Floating-point Stack */
+#define ST		st
+
+#define AS_BEGIN	.sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text
+
+
+#define _WTOG		o16	/* word toggle for _W instructions */
+#define _LTOG			/* long toggle for _L instructions */
+#define ADDR_TOGGLE	a16
+#define OPSZ_TOGGLE	o16
+#define USE16		.use16
+#define USE32		.use32
+
+#define CHOICE(a,b,c)	c
+
+#else /* AT&T or GAS */
+
+/* Redefine register names for GAS & AT&T assemblers */
+#define AL		%al
+#define AH		%ah
+#define AX		%ax
+#define EAX		%eax
+#define BL		%bl
+#define BH		%bh
+#define BX		%bx
+#define EBX		%ebx
+#define CL		%cl
+#define CH		%ch
+#define CX		%cx
+#define ECX		%ecx
+#define DL		%dl
+#define DH		%dh
+#define DX		%dx
+#define EDX		%edx
+#define BP		%bp
+#define EBP		%ebp
+#define SI		%si
+#define ESI		%esi
+#define DI		%di
+#define EDI		%edi
+#define SP		%sp
+#define ESP		%esp
+#define CS		%cs
+#define SS		%ss
+#define DS		%ds
+#define ES		%es
+#define FS		%fs
+#define GS		%gs
+/* Control Registers */
+#define CR0		%cr0
+#define CR1		%cr1
+#define CR2		%cr2
+#define CR3		%cr3
+/* Debug Registers */
+#define DR0		%db0
+#define DR1		%db1
+#define DR2		%db2
+#define DR3		%db3
+#define DR4		%db4
+#define DR5		%db5
+#define DR6		%db6
+#define DR7		%db7
+/* Floating-point Stack */
+#define _STX0		%st(0)
+#define _STX1		%st(1)
+#define _STX2		%st(2)
+#define _STX3		%st(3)
+#define _STX4		%st(4)
+#define _STX5		%st(5)
+#define _STX6		%st(6)
+#define _STX7		%st(7)
+#define ST(x)		CONCAT(_STX,x)
+#ifdef GNU_ASSEMBLER
+#define ST0		%st(0)
+#else
+#define ST0		%st
+#endif
+/* MMX Registers */
+#define MM0		%mm0
+#define MM1		%mm1
+#define MM2		%mm2
+#define MM3		%mm3
+#define MM4		%mm4
+#define MM5		%mm5
+#define MM6		%mm6
+#define MM7		%mm7
+/* SSE Registers */
+#define XMM0		%xmm0
+#define XMM1		%xmm1
+#define XMM2		%xmm2
+#define XMM3		%xmm3
+#define XMM4		%xmm4
+#define XMM5		%xmm5
+#define XMM6		%xmm6
+#define XMM7		%xmm7
+
+#define AS_BEGIN
+#define USE16
+#define USE32
+
+#ifdef GNU_ASSEMBLER
+
+#define ADDR_TOGGLE	aword
+#define OPSZ_TOGGLE	word
+
+#define CHOICE(a,b,c)	b
+
+#else
+/*
+ * AT&T ASSEMBLER SYNTAX
+ * *********************
+ */
+#define CHOICE(a,b,c)	a
+
+#define ADDR_TOGGLE	addr16
+#define OPSZ_TOGGLE	data16
+
+#endif /* GNU_ASSEMBLER */
+#endif /* ACK_ASSEMBLER */
+
+
+#if defined(__QNX__) || defined(Lynx) || (defined(SYSV) || defined(SVR4)) && !defined(ACK_ASSEMBLER) || defined(__ELF__) || defined(__GNU__) || defined(__GNUC__) && !defined(__DJGPP__) && !defined(__MINGW32__)
+#define GLNAME(a)	a
+#else
+#define GLNAME(a)	CONCAT(_,a)
+#endif
+
+
+	/****************************************/
+	/*					*/
+	/*	Select the various choices	*/
+	/*					*/
+	/****************************************/
+
+
+/* Redefine assembler directives */
+/*********************************/
+#define GLOBL		CHOICE(.globl, .globl, .extern)
+#define GLOBAL		GLOBL
+#define EXTERN		GLOBL
+#ifndef __AOUT__
+#define ALIGNTEXT32	CHOICE(.align 32, .balign 32, .align 32)
+#define ALIGNTEXT16	CHOICE(.align 16, .balign 16, .align 16)
+#define ALIGNTEXT8	CHOICE(.align 8, .balign 8, .align 8)
+#define ALIGNTEXT4	CHOICE(.align 4, .balign 4, .align 4)
+#define ALIGNTEXT2	CHOICE(.align 2, .balign 2, .align 2)
+/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is
+ * guaranteed to be filled with NOPs.  Otherwise it does nothing.
+ */
+#define ALIGNTEXT32ifNOP	CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/)
+#define ALIGNTEXT16ifNOP	CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/)
+#define ALIGNTEXT8ifNOP	CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/)
+#define ALIGNTEXT4ifNOP	CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/)
+#define ALIGNDATA32	CHOICE(.align 32, .balign ARG2(32,0x0), .align 32)
+#define ALIGNDATA16	CHOICE(.align 16, .balign ARG2(16,0x0), .align 16)
+#define ALIGNDATA8	CHOICE(.align 8, .balign ARG2(8,0x0), .align 8)
+#define ALIGNDATA4	CHOICE(.align 4, .balign ARG2(4,0x0), .align 4)
+#define ALIGNDATA2	CHOICE(.align 2, .balign ARG2(2,0x0), .align 2)
+#else
+/* 'as -aout' on FreeBSD doesn't have .balign */
+#define ALIGNTEXT32	CHOICE(.align 32, .align ARG2(5,0x90), .align 32)
+#define ALIGNTEXT16	CHOICE(.align 16, .align ARG2(4,0x90), .align 16)
+#define ALIGNTEXT8	CHOICE(.align 8, .align ARG2(3,0x90), .align 8)
+#define ALIGNTEXT4	CHOICE(.align 4, .align ARG2(2,0x90), .align 4)
+#define ALIGNTEXT2	CHOICE(.align 2, .align ARG2(1,0x90), .align 2)
+/* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is
+ * guaranteed to be filled with NOPs.  Otherwise it does nothing.
+ */
+#define ALIGNTEXT32ifNOP	CHOICE(.align 32, .align ARG2(5,0x90), /*can't do it*/)
+#define ALIGNTEXT16ifNOP	CHOICE(.align 16, .align ARG2(4,0x90), /*can't do it*/)
+#define ALIGNTEXT8ifNOP	CHOICE(.align 8, .align ARG2(3,0x90), /*can't do it*/)
+#define ALIGNTEXT4ifNOP	CHOICE(.align 4, .align ARG2(2,0x90), /*can't do it*/)
+#define ALIGNDATA32	CHOICE(.align 32, .align ARG2(5,0x0), .align 32)
+#define ALIGNDATA16	CHOICE(.align 16, .align ARG2(4,0x0), .align 16)
+#define ALIGNDATA8	CHOICE(.align 8, .align ARG2(3,0x0), .align 8)
+#define ALIGNDATA4	CHOICE(.align 4, .align ARG2(2,0x0), .align 4)
+#define ALIGNDATA2	CHOICE(.align 2, .align ARG2(1,0x0), .align 2)
+#endif /* __AOUT__ */
+#define FILE(s)		CHOICE(.file s, .file s, .file s)
+#define STRING(s)	CHOICE(.string s, .asciz s, .asciz s)
+#define D_LONG		CHOICE(.long, .long, .data4)
+#define D_WORD		CHOICE(.value, .short, .data2)
+#define D_BYTE		CHOICE(.byte, .byte, .data1)
+#define SPACE		CHOICE(.comm, .space, .space)
+#define COMM		CHOICE(.comm, .comm, .comm)
+#define SEG_DATA	CHOICE(.data, .data, .sect .data)
+#define SEG_TEXT	CHOICE(.text, .text, .sect .text)
+#define SEG_BSS		CHOICE(.bss, .bss, .sect .bss)
+
+#ifdef GNU_ASSEMBLER
+#define D_SPACE(n)	. = . + n
+#else
+#define D_SPACE(n)	.space n
+#endif
+
+/* Addressing Modes */
+/* Immediate Mode */
+#define ADDR(a)		CHOICE(CONCAT($,a), $a, a)
+#define CONST(a)	CHOICE(CONCAT($,a), $a, a)
+
+/* Indirect Mode */
+#define CONTENT(a)	CHOICE(a, a, (a))	 /* take contents of variable */
+#define REGIND(a)	CHOICE((a), (a), (a))	 /* Register a indirect */
+/* Register b indirect plus displacement a */
+#define REGOFF(a, b)	CHOICE(a(b), a(b), a(b))
+/* Reg indirect Base + Index + Displacement  - this is mainly for 16-bit mode
+ * which has no scaling
+ */
+#define REGBID(b,i,d)	CHOICE(d(b,i), d(b,i), d(b)(i))
+/* Reg indirect Base + (Index * Scale) */
+#define REGBIS(b,i,s)	CHOICE((b,i,s), (b,i,s), (b)(i*s))
+/* Reg indirect Base + (Index * Scale) + Displacement */
+#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s))
+/* Displaced Scaled Index: */
+#define REGDIS(d,i,s)	CHOICE(d(,i,s), d(,i,s), d(i * s))
+/* Indexed Base: */
+#define REGBI(b,i)	CHOICE((b,i), (b,i), (b)(i))
+/* Displaced Base: */
+#define REGDB(d,b)	CHOICE(d(b), d(b), d(b))
+/* Variable indirect: */
+#define VARINDIRECT(var) CHOICE(*var, *var, (var))
+/* Use register contents as jump/call target: */
+#define CODEPTR(reg)	CHOICE(*reg, *reg, reg)
+
+/* For expressions requiring bracketing
+ * eg. (CRT0_PM | CRT_EM)
+ */
+
+#define EXPR(a)		CHOICE([a], (a), [a])
+#define ENOT(a)		CHOICE(0!a, ~a, ~a)
+#define EMUL(a,b)	CHOICE(a\*b, a*b, a*b)
+#define EDIV(a,b)	CHOICE(a\/b, a/b, a/b)
+
+/*
+ * We have to beat the problem of commas within arguments to choice.
+ * eg. choice (add a,b, add b,a) will get argument mismatch.  Luckily ANSI
+ * and other known cpp definitions evaluate arguments before substitution
+ * so the following works.
+ */
+#define ARG2(a, b)	a,b
+#define ARG3(a,b,c)	a,b,c
+
+/* Redefine assembler commands */
+#define AAA		CHOICE(aaa, aaa, aaa)
+#define AAD		CHOICE(aad, aad, aad)
+#define AAM		CHOICE(aam, aam, aam)
+#define AAS		CHOICE(aas, aas, aas)
+#define ADC_L(a, b)	CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a))
+#define ADC_W(a, b)	CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a))
+#define ADC_B(a, b)	CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a))
+#define ADD_L(a, b)	CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a))
+#define ADD_W(a, b)	CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a))
+#define ADD_B(a, b)	CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a))
+#define AND_L(a, b)	CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a))
+#define AND_W(a, b)	CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a))
+#define AND_B(a, b)	CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a))
+#define ARPL(a,b)	CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a))
+#define BOUND_L(a, b)	CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a))
+#define BOUND_W(a, b)	CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a))
+#define BSF_L(a, b)	CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a))
+#define BSF_W(a, b)	CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a))
+#define BSR_L(a, b)	CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a))
+#define BSR_W(a, b)	CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a))
+#define BT_L(a, b)	CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a))
+#define BT_W(a, b)	CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a))
+#define BTC_L(a, b)	CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a))
+#define BTC_W(a, b)	CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a))
+#define BTR_L(a, b)	CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a))
+#define BTR_W(a, b)	CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a))
+#define BTS_L(a, b)	CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a))
+#define BTS_W(a, b)	CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a))
+#define CALL(a)		CHOICE(call a, call a, call a)
+#define CALLF(s,a)	CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a)
+#define CBW		CHOICE(cbtw, cbw, cbw)
+#define CWDE		CHOICE(cwtd, cwde, cwde)
+#define CLC		CHOICE(clc, clc, clc)
+#define CLD		CHOICE(cld, cld, cld)
+#define CLI		CHOICE(cli, cli, cli)
+#define CLTS		CHOICE(clts, clts, clts)
+#define CMC		CHOICE(cmc, cmc, cmc)
+#define CMP_L(a, b)	CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a))
+#define CMP_W(a, b)	CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a))
+#define CMP_B(a, b)	CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a))
+#define CMPS_L		CHOICE(cmpsl, cmpsl, _LTOG cmps)
+#define CMPS_W		CHOICE(cmpsw, cmpsw, _WTOG cmps)
+#define CMPS_B		CHOICE(cmpsb, cmpsb, cmpsb)
+#define CWD		CHOICE(cwtl, cwd, cwd)
+#define CDQ		CHOICE(cltd, cdq, cdq)
+#define DAA		CHOICE(daa, daa, daa)
+#define DAS		CHOICE(das, das, das)
+#define DEC_L(a)	CHOICE(decl a, decl a, _LTOG dec a)
+#define DEC_W(a)	CHOICE(decw a, decw a, _WTOG dec a)
+#define DEC_B(a)	CHOICE(decb a, decb a, decb a)
+#define DIV_L(a)	CHOICE(divl a, divl a, div a)
+#define DIV_W(a)	CHOICE(divw a, divw a, div a)
+#define DIV_B(a)	CHOICE(divb a, divb a, divb a)
+#define ENTER(a,b)	CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a))
+#define HLT		CHOICE(hlt, hlt, hlt)
+#define IDIV_L(a)	CHOICE(idivl a, idivl a, _LTOG idiv a)
+#define IDIV_W(a)	CHOICE(idivw a, idivw a, _WTOG idiv a)
+#define IDIV_B(a)	CHOICE(idivb a, idivb a, idivb a)
+/* More forms than this for imul!! */
+#define IMUL_L(a, b)	CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a))
+#define IMUL_W(a, b)	CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a))
+#define IMUL_B(a)	CHOICE(imulb a, imulb a, imulb a)
+#define IN_L		CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX)
+#define IN_W		CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX)
+#define IN_B		CHOICE(inb (DX), inb ARG2(DX,AL), inb DX)
+/* Please AS code writer: use the following ONLY, if you refer to ports<256
+ * directly, but not in IN1_W(DX), for instance, even if IN1_ looks nicer
+ */
+#if defined (sun)
+#define IN1_L(a)	CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a)
+#define IN1_W(a)	CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a)
+#define IN1_B(a)	CHOICE(inb (a), inb ARG2(a,AL), inb a)
+#else
+#define IN1_L(a)	CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a)
+#define IN1_W(a)	CHOICE(inw a, inw ARG2(a,AX), _WTOG in a)
+#define IN1_B(a)	CHOICE(inb a, inb ARG2(a,AL), inb a)
+#endif
+#define INC_L(a)	CHOICE(incl a, incl a, _LTOG inc a)
+#define INC_W(a)	CHOICE(incw a, incw a, _WTOG inc a)
+#define INC_B(a)	CHOICE(incb a, incb a, incb a)
+#define INS_L		CHOICE(insl, insl, _LTOG ins)
+#define INS_W		CHOICE(insw, insw, _WTOG ins)
+#define INS_B		CHOICE(insb, insb, insb)
+#define INT(a)		CHOICE(int a, int a, int a)
+#define INT3		CHOICE(int CONST(3), int3, int CONST(3))
+#define INTO		CHOICE(into, into, into)
+#define IRET		CHOICE(iret, iret, iret)
+#define IRETD		CHOICE(iret, iret, iretd)
+#define JA(a)		CHOICE(ja a, ja a, ja a)
+#define JAE(a)		CHOICE(jae a, jae a, jae a)
+#define JB(a)		CHOICE(jb a, jb a, jb a)
+#define JBE(a)		CHOICE(jbe a, jbe a, jbe a)
+#define JC(a)		CHOICE(jc a, jc a, jc a)
+#define JE(a)		CHOICE(je a, je a, je a)
+#define JG(a)		CHOICE(jg a, jg a, jg a)
+#define JGE(a)		CHOICE(jge a, jge a, jge a)
+#define JL(a)		CHOICE(jl a, jl a, jl a)
+#define JLE(a)		CHOICE(jle a, jle a, jle a)
+#define JNA(a)		CHOICE(jna a, jna a, jna a)
+#define JNAE(a)		CHOICE(jnae a, jnae a, jnae a)
+#define JNB(a)		CHOICE(jnb a, jnb a, jnb a)
+#define JNBE(a)		CHOICE(jnbe a, jnbe a, jnbe a)
+#define JNC(a)		CHOICE(jnc a, jnc a, jnc a)
+#define JNE(a)		CHOICE(jne a, jne a, jne a)
+#define JNG(a)		CHOICE(jng a, jng a, jng a)
+#define JNGE(a)		CHOICE(jnge a, jnge a, jnge a)
+#define JNL(a)		CHOICE(jnl a, jnl a, jnl a)
+#define JNLE(a)		CHOICE(jnle a, jnle a, jnle a)
+#define JNO(a)		CHOICE(jno a, jno a, jno a)
+#define JNP(a)		CHOICE(jnp a, jnp a, jnp a)
+#define JNS(a)		CHOICE(jns a, jns a, jns a)
+#define JNZ(a)		CHOICE(jnz a, jnz a, jnz a)
+#define JO(a)		CHOICE(jo a, jo a, jo a)
+#define JP(a)		CHOICE(jp a, jp a, jp a)
+#define JPE(a)		CHOICE(jpe a, jpe a, jpe a)
+#define JPO(a)		CHOICE(jpo a, jpo a, jpo a)
+#define JS(a)		CHOICE(js a, js a, js a)
+#define JZ(a)		CHOICE(jz a, jz a, jz a)
+#define JMP(a)		CHOICE(jmp a, jmp a, jmp a)
+#define JMPF(s,a)	CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a)
+#define LAHF		CHOICE(lahf, lahf, lahf)
+#if !defined(_REAL_MODE) && !defined(_V86_MODE)
+#define LAR(a, b)	CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a))
+#endif
+#define LEA_L(a, b)	CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a))
+#define LEA_W(a, b)	CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a))
+#define LEAVE		CHOICE(leave, leave, leave)
+#define LGDT(a)		CHOICE(lgdt a, lgdt a, lgdt a)
+#define LIDT(a)		CHOICE(lidt a, lidt a, lidt a)
+#define LDS(a, b)	CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a))
+#define LES(a, b)	CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a))
+#define LFS(a, b)	CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a))
+#define LGS(a, b)	CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a))
+#define LSS(a, b)	CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a))
+#define LLDT(a)		CHOICE(lldt a, lldt a, lldt a)
+#define LMSW(a)		CHOICE(lmsw a, lmsw a, lmsw a)
+#define LOCK		CHOICE(lock, lock, lock)
+#define LODS_L		CHOICE(lodsl, lodsl, _LTOG lods)
+#define LODS_W		CHOICE(lodsw, lodsw, _WTOG lods)
+#define LODS_B		CHOICE(lodsb, lodsb, lodsb)
+#define LOOP(a)		CHOICE(loop a, loop a, loop a)
+#define LOOPE(a)	CHOICE(loope a, loope a, loope a)
+#define LOOPZ(a)	CHOICE(loopz a, loopz a, loopz a)
+#define LOOPNE(a)	CHOICE(loopne a, loopne a, loopne a)
+#define LOOPNZ(a)	CHOICE(loopnz a, loopnz a, loopnz a)
+#if !defined(_REAL_MODE) && !defined(_V86_MODE)
+#define LSL(a, b)	CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a))
+#endif
+#define LTR(a)		CHOICE(ltr a, ltr a, ltr a)
+#define MOV_SR(a, b)	CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a))
+#define MOV_L(a, b)	CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a))
+#define MOV_W(a, b)	CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a))
+#define MOV_B(a, b)	CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a))
+#define MOVS_L		CHOICE(movsl, movsl, _LTOG movs)
+#define MOVS_W		CHOICE(movsw, movsw, _WTOG movs)
+#define MOVS_B		CHOICE(movsb, movsb, movsb)
+#define MOVSX_BL(a, b)	CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a))
+#define MOVSX_BW(a, b)	CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a))
+#define MOVSX_WL(a, b)	CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a))
+#define MOVZX_BL(a, b)	CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a))
+#define MOVZX_BW(a, b)	CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a))
+#define MOVZX_WL(a, b)	CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a))
+#define MUL_L(a)	CHOICE(mull a, mull a, _LTOG mul a)
+#define MUL_W(a)	CHOICE(mulw a, mulw a, _WTOG mul a)
+#define MUL_B(a)	CHOICE(mulb a, mulb a, mulb a)
+#define NEG_L(a)	CHOICE(negl a, negl a, _LTOG neg a)
+#define NEG_W(a)	CHOICE(negw a, negw a, _WTOG neg a)
+#define NEG_B(a)	CHOICE(negb a, negb a, negb a)
+#define NOP		CHOICE(nop, nop, nop)
+#define NOT_L(a)	CHOICE(notl a, notl a, _LTOG not a)
+#define NOT_W(a)	CHOICE(notw a, notw a, _WTOG not a)
+#define NOT_B(a)	CHOICE(notb a, notb a, notb a)
+#define OR_L(a,b)	CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a))
+#define OR_W(a,b)	CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a))
+#define OR_B(a,b)	CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a))
+#define OUT_L		CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX)
+#define OUT_W		CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX)
+#define OUT_B		CHOICE(outb (DX), outb ARG2(AL,DX), outb DX)
+/* Please AS code writer: use the following ONLY, if you refer to ports<256
+ * directly, but not in OUT1_W(DX), for instance, even if OUT1_ looks nicer
+ */
+#define OUT1_L(a)	CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a)
+#define OUT1_W(a)	CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a)
+#define OUT1_B(a)	CHOICE(outb (a), outb ARG2(AL,a), outb a)
+#define OUTS_L		CHOICE(outsl, outsl, _LTOG outs)
+#define OUTS_W		CHOICE(outsw, outsw, _WTOG outs)
+#define OUTS_B		CHOICE(outsb, outsb, outsb)
+#define POP_SR(a)	CHOICE(pop a, pop a, pop a)
+#define POP_L(a)	CHOICE(popl a, popl a, _LTOG pop a)
+#define POP_W(a)	CHOICE(popw a, popw a, _WTOG pop a)
+#define POPA_L		CHOICE(popal, popal, _LTOG popa)
+#define POPA_W		CHOICE(popaw, popaw, _WTOG popa)
+#define POPF_L		CHOICE(popfl, popfl, _LTOG popf)
+#define POPF_W		CHOICE(popfw, popfw, _WTOG popf)
+#define PUSH_SR(a)	CHOICE(push a, push a, push a)
+#define PUSH_L(a)	CHOICE(pushl a, pushl a, _LTOG push a)
+#define PUSH_W(a)	CHOICE(pushw a, pushw a, _WTOG push a)
+#define PUSH_B(a)	CHOICE(push a, pushb a, push a)
+#define PUSHA_L		CHOICE(pushal, pushal, _LTOG pusha)
+#define PUSHA_W		CHOICE(pushaw, pushaw, _WTOG pusha)
+#define PUSHF_L		CHOICE(pushfl, pushfl, _LTOG pushf)
+#define PUSHF_W		CHOICE(pushfw, pushfw, _WTOG pushf)
+#define RCL_L(a, b)	CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a))
+#define RCL_W(a, b)	CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a))
+#define RCL_B(a, b)	CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a))
+#define RCR_L(a, b)	CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a))
+#define RCR_W(a, b)	CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a))
+#define RCR_B(a, b)	CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a))
+#define ROL_L(a, b)	CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a))
+#define ROL_W(a, b)	CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a))
+#define ROL_B(a, b)	CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a))
+#define ROR_L(a, b)	CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a))
+#define ROR_W(a, b)	CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a))
+#define ROR_B(a, b)	CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a))
+#define REP		CHOICE(rep ;, rep ;, repe)
+#define REPE		CHOICE(repz ;, repe ;, repe)
+#define REPNE		CHOICE(repnz ;, repne ;, repne)
+#define REPNZ		REPNE
+#define REPZ		REPE
+#define RET		CHOICE(ret, ret, ret)
+#define SAHF		CHOICE(sahf, sahf, sahf)
+#define SAL_L(a, b)	CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a))
+#define SAL_W(a, b)	CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a))
+#define SAL_B(a, b)	CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a))
+#define SAR_L(a, b)	CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a))
+#define SAR_W(a, b)	CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a))
+#define SAR_B(a, b)	CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a))
+#define SBB_L(a, b)	CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a))
+#define SBB_W(a, b)	CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a))
+#define SBB_B(a, b)	CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a))
+#define SCAS_L		CHOICE(scasl, scasl, _LTOG scas)
+#define SCAS_W		CHOICE(scasw, scasw, _WTOG scas)
+#define SCAS_B		CHOICE(scasb, scasb, scasb)
+#define SETA(a)		CHOICE(seta a, seta a, seta a)
+#define SETAE(a)	CHOICE(setae a, setae a, setae a)
+#define SETB(a)		CHOICE(setb a, setb a, setb a)
+#define SETBE(a)	CHOICE(setbe a, setbe a, setbe a)
+#define SETC(a)		CHOICE(setc a, setb a, setb a)
+#define SETE(a)		CHOICE(sete a, sete a, sete a)
+#define SETG(a)		CHOICE(setg a, setg a, setg a)
+#define SETGE(a)	CHOICE(setge a, setge a, setge a)
+#define SETL(a)		CHOICE(setl a, setl a, setl a)
+#define SETLE(a)	CHOICE(setle a, setle a, setle a)
+#define SETNA(a)	CHOICE(setna a, setna a, setna a)
+#define SETNAE(a)	CHOICE(setnae a, setnae a, setnae a)
+#define SETNB(a)	CHOICE(setnb a, setnb a, setnb a)
+#define SETNBE(a)	CHOICE(setnbe a, setnbe a, setnbe a)
+#define SETNC(a)	CHOICE(setnc a, setnb a, setnb a)
+#define SETNE(a)	CHOICE(setne a, setne a, setne a)
+#define SETNG(a)	CHOICE(setng a, setng a, setng a)
+#define SETNGE(a)	CHOICE(setnge a, setnge a, setnge a)
+#define SETNL(a)	CHOICE(setnl a, setnl a, setnl a)
+#define SETNLE(a)	CHOICE(setnle a, setnle a, setnle a)
+#define SETNO(a)	CHOICE(setno a, setno a, setno a)
+#define SETNP(a)	CHOICE(setnp a, setnp a, setnp a)
+#define SETNS(a)	CHOICE(setns a, setns a, setna a)
+#define SETNZ(a)	CHOICE(setnz a, setnz a, setnz a)
+#define SETO(a)		CHOICE(seto a, seto a, seto a)
+#define SETP(a)		CHOICE(setp a, setp a, setp a)
+#define SETPE(a)	CHOICE(setpe a, setpe a, setpe a)
+#define SETPO(a)	CHOICE(setpo a, setpo a, setpo a)
+#define SETS(a)		CHOICE(sets a, sets a, seta a)
+#define SETZ(a)		CHOICE(setz a, setz a, setz a)
+#define SGDT(a)		CHOICE(sgdt a, sgdt a, sgdt a)
+#define SIDT(a)		CHOICE(sidt a, sidt a, sidt a)
+#define SHL_L(a, b)	CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a))
+#define SHL_W(a, b)	CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a))
+#define SHL_B(a, b)	CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a))
+#define SHLD_L(a,b,c)	CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a))
+#define SHLD2_L(a,b)	CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL))
+#define SHLD_W(a,b,c)	CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a))
+#define SHLD2_W(a,b)	CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL))
+#define SHR_L(a, b)	CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a))
+#define SHR_W(a, b)	CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a))
+#define SHR_B(a, b)	CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a))
+#define SHRD_L(a,b,c)	CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a))
+#define SHRD2_L(a,b)	CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL))
+#define SHRD_W(a,b,c)	CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a))
+#define SHRD2_W(a,b)	CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL))
+#define SLDT(a)		CHOICE(sldt a, sldt a, sldt a)
+#define SMSW(a)		CHOICE(smsw a, smsw a, smsw a)
+#define STC		CHOICE(stc, stc, stc)
+#define STD		CHOICE(std, std, std)
+#define STI		CHOICE(sti, sti, sti)
+#define STOS_L		CHOICE(stosl, stosl, _LTOG stos)
+#define STOS_W		CHOICE(stosw, stosw, _WTOG stos)
+#define STOS_B		CHOICE(stosb, stosb, stosb)
+#define STR(a)		CHOICE(str a, str a, str a)
+#define SUB_L(a, b)	CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a))
+#define SUB_W(a, b)	CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a))
+#define SUB_B(a, b)	CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a))
+#define TEST_L(a, b)	CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a))
+#define TEST_W(a, b)	CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a))
+#define TEST_B(a, b)	CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a))
+#define VERR(a)		CHOICE(verr a, verr a, verr a)
+#define VERW(a)		CHOICE(verw a, verw a, verw a)
+#define WAIT		CHOICE(wait, wait, wait)
+#define XCHG_L(a, b)	CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a))
+#define XCHG_W(a, b)	CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a))
+#define XCHG_B(a, b)	CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a))
+#define XLAT		CHOICE(xlat, xlat, xlat)
+#define XOR_L(a, b)	CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a))
+#define XOR_W(a, b)	CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a))
+#define XOR_B(a, b)	CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a))
+
+
+/* Floating Point Instructions */
+#define F2XM1		CHOICE(f2xm1, f2xm1, f2xm1)
+#define FABS		CHOICE(fabs, fabs, fabs)
+#define FADD_D(a)	CHOICE(faddl a, faddl a, faddd a)
+#define FADD_S(a)	CHOICE(fadds a, fadds a, fadds a)
+#define FADD2(a, b)	CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a))
+#define FADDP(a, b)	CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a))
+#define FIADD_L(a)	CHOICE(fiaddl a, fiaddl a, fiaddl a)
+#define FIADD_W(a)	CHOICE(fiadd a, fiadds a, fiadds a)
+#define FBLD(a)		CHOICE(fbld a, fbld a, fbld a)
+#define FBSTP(a)	CHOICE(fbstp a, fbstp a, fbstp a)
+#define FCHS		CHOICE(fchs, fchs, fchs)
+#define FCLEX		CHOICE(fclex, wait; fnclex, wait; fclex)
+#define FNCLEX		CHOICE(fnclex, fnclex, fclex)
+#define FCOM(a)		CHOICE(fcom a, fcom a, fcom a)
+#define FCOM_D(a)	CHOICE(fcoml a, fcoml a, fcomd a)
+#define FCOM_S(a)	CHOICE(fcoms a, fcoms a, fcoms a)
+#define FCOMP(a)	CHOICE(fcomp a, fcomp a, fcomp a)
+#define FCOMP_D(a)	CHOICE(fcompl a, fcompl a, fcompd a)
+#define FCOMP_S(a)	CHOICE(fcomps a, fcomps a, fcomps a)
+#define FCOMPP		CHOICE(fcompp, fcompp, fcompp)
+#define FCOS		CHOICE(fcos, fcos, fcos)
+#define FDECSTP		CHOICE(fdecstp, fdecstp, fdecstp)
+#define FDIV_D(a)	CHOICE(fdivl a, fdivl a, fdivd a)
+#define FDIV_S(a)	CHOICE(fdivs a, fdivs a, fdivs a)
+#define FDIV2(a, b)	CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a))
+#define FDIVP(a, b)	CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a))
+#define FIDIV_L(a)	CHOICE(fidivl a, fidivl a, fidivl a)
+#define FIDIV_W(a)	CHOICE(fidiv a, fidivs a, fidivs a)
+#define FDIVR_D(a)	CHOICE(fdivrl a, fdivrl a, fdivrd a)
+#define FDIVR_S(a)	CHOICE(fdivrs a, fdivrs a, fdivrs a)
+#define FDIVR2(a, b)	CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a))
+#define FDIVRP(a, b)	CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a))
+#define FIDIVR_L(a)	CHOICE(fidivrl a, fidivrl a, fidivrl a)
+#define FIDIVR_W(a)	CHOICE(fidivr a, fidivrs a, fidivrs a)
+#define FFREE(a)	CHOICE(ffree a, ffree a, ffree a)
+#define FICOM_L(a)	CHOICE(ficoml a, ficoml a, ficoml a)
+#define FICOM_W(a)	CHOICE(ficom a, ficoms a, ficoms a)
+#define FICOMP_L(a)	CHOICE(ficompl a, ficompl a, ficompl a)
+#define FICOMP_W(a)	CHOICE(ficomp a, ficomps a, ficomps a)
+#define FILD_Q(a)	CHOICE(fildll a, fildq a, fildq a)
+#define FILD_L(a)	CHOICE(fildl a, fildl a, fildl a)
+#define FILD_W(a)	CHOICE(fild a, filds a, filds a)
+#define FINCSTP		CHOICE(fincstp, fincstp, fincstp)
+#define FINIT		CHOICE(finit, wait; fninit, wait; finit)
+#define FNINIT		CHOICE(fninit, fninit, finit)
+#define FIST_L(a)	CHOICE(fistl a, fistl a, fistl a)
+#define FIST_W(a)	CHOICE(fist a, fists a, fists a)
+#define FISTP_Q(a)	CHOICE(fistpll a, fistpq a, fistpq a)
+#define FISTP_L(a)	CHOICE(fistpl a, fistpl a, fistpl a)
+#define FISTP_W(a)	CHOICE(fistp a, fistps a, fistps a)
+#define FLD_X(a)	CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */
+#define FLD_D(a)	CHOICE(fldl a, fldl a, fldd a)
+#define FLD_S(a)	CHOICE(flds a, flds a, flds a)
+#define FLD1		CHOICE(fld1, fld1, fld1)
+#define FLDL2T		CHOICE(fldl2t, fldl2t, fldl2t)
+#define FLDL2E		CHOICE(fldl2e, fldl2e, fldl2e)
+#define FLDPI		CHOICE(fldpi, fldpi, fldpi)
+#define FLDLG2		CHOICE(fldlg2, fldlg2, fldlg2)
+#define FLDLN2		CHOICE(fldln2, fldln2, fldln2)
+#define FLDZ		CHOICE(fldz, fldz, fldz)
+#define FLDCW(a)	CHOICE(fldcw a, fldcw a, fldcw a)
+#define FLDENV(a)	CHOICE(fldenv a, fldenv a, fldenv a)
+#define FMUL_S(a)	CHOICE(fmuls a, fmuls a, fmuls a)
+#define FMUL_D(a)	CHOICE(fmull a, fmull a, fmuld a)
+#define FMUL2(a, b)	CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a))
+#define FMULP(a, b)	CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a))
+#define FIMUL_L(a)	CHOICE(fimull a, fimull a, fimull a)
+#define FIMUL_W(a)	CHOICE(fimul a, fimuls a, fimuls a)
+#define FNOP		CHOICE(fnop, fnop, fnop)
+#define FPATAN		CHOICE(fpatan, fpatan, fpatan)
+#define FPREM		CHOICE(fprem, fprem, fprem)
+#define FPREM1		CHOICE(fprem1, fprem1, fprem1)
+#define FPTAN		CHOICE(fptan, fptan, fptan)
+#define FRNDINT		CHOICE(frndint, frndint, frndint)
+#define FRSTOR(a)	CHOICE(frstor a, frstor a, frstor a)
+#define FSAVE(a)	CHOICE(fsave a, wait; fnsave a, wait; fsave a)
+#define FNSAVE(a)	CHOICE(fnsave a, fnsave a, fsave a)
+#define FSCALE		CHOICE(fscale, fscale, fscale)
+#define FSIN		CHOICE(fsin, fsin, fsin)
+#define FSINCOS		CHOICE(fsincos, fsincos, fsincos)
+#define FSQRT		CHOICE(fsqrt, fsqrt, fsqrt)
+#define FST_D(a)	CHOICE(fstl a, fstl a, fstd a)
+#define FST_S(a)	CHOICE(fsts a, fsts a, fsts a)
+#define FSTP_X(a)	CHOICE(fstpt a, fstpt a, fstpx a)
+#define FSTP_D(a)	CHOICE(fstpl a, fstpl a, fstpd a)
+#define FSTP_S(a)	CHOICE(fstps a, fstps a, fstps a)
+#define FSTP(a)		CHOICE(fstp a, fstp a, fstp a)
+#define FSTCW(a)	CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a)
+#define FNSTCW(a)	CHOICE(fnstcw a, fnstcw a, fstcw a)
+#define FSTENV(a)	CHOICE(fstenv a, wait; fnstenv a, fstenv a)
+#define FNSTENV(a)	CHOICE(fnstenv a, fnstenv a, fstenv a)
+#define FSTSW(a)	CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a)
+#define FNSTSW(a)	CHOICE(fnstsw a, fnstsw a, fstsw a)
+#define FSUB_S(a)	CHOICE(fsubs a, fsubs a, fsubs a)
+#define FSUB_D(a)	CHOICE(fsubl a, fsubl a, fsubd a)
+#define FSUB2(a, b)	CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a))
+#define FSUBP(a, b)	CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a))
+#define FISUB_L(a)	CHOICE(fisubl a, fisubl a, fisubl a)
+#define FISUB_W(a)	CHOICE(fisub a, fisubs a, fisubs a)
+#define FSUBR_S(a)	CHOICE(fsubrs a, fsubrs a, fsubrs a)
+#define FSUBR_D(a)	CHOICE(fsubrl a, fsubrl a, fsubrd a)
+#define FSUBR2(a, b)	CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a))
+#define FSUBRP(a, b)	CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a))
+#define FISUBR_L(a)	CHOICE(fisubrl a, fisubrl a, fisubrl a)
+#define FISUBR_W(a)	CHOICE(fisubr a, fisubrs a, fisubrs a)
+#define FTST		CHOICE(ftst, ftst, ftst)
+#define FUCOM(a)	CHOICE(fucom a, fucom a, fucom a)
+#define FUCOMP(a)	CHOICE(fucomp a, fucomp a, fucomp a)
+#define FUCOMPP		CHOICE(fucompp, fucompp, fucompp)
+#define FWAIT		CHOICE(wait, wait, wait)
+#define FXAM		CHOICE(fxam, fxam, fxam)
+#define FXCH(a)		CHOICE(fxch a, fxch a, fxch a)
+#define FXTRACT		CHOICE(fxtract, fxtract, fxtract)
+#define FYL2X		CHOICE(fyl2x, fyl2x, fyl2x)
+#define FYL2XP1		CHOICE(fyl2xp1, fyl2xp1, fyl2xp1)
+
+/* New instructions */
+#define CPUID		CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162))
+#define RDTSC		CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49))
+
+#else /* NASM_ASSEMBLER || MASM_ASSEMBLER is defined */
+
+	/****************************************/
+	/*					*/
+	/*	Intel style assemblers.		*/
+	/*	(NASM and MASM)			*/
+	/*					*/
+	/****************************************/
+
+#define P_EAX		EAX
+#define L_EAX		EAX
+#define W_AX		AX
+#define B_AH		AH
+#define B_AL		AL
+
+#define P_EBX		EBX
+#define L_EBX		EBX
+#define W_BX		BX
+#define B_BH		BH
+#define B_BL		BL
+
+#define P_ECX		ECX
+#define L_ECX		ECX
+#define W_CX		CX
+#define B_CH		CH
+#define B_CL		CL
+
+#define P_EDX		EDX
+#define L_EDX		EDX
+#define W_DX		DX
+#define B_DH		DH
+#define B_DL		DL
+
+#define P_EBP		EBP
+#define L_EBP		EBP
+#define W_BP		BP
+
+#define P_ESI		ESI
+#define L_ESI		ESI
+#define W_SI		SI
+
+#define P_EDI		EDI
+#define L_EDI		EDI
+#define W_DI		DI
+
+#define P_ESP		ESP
+#define L_ESP		ESP
+#define W_SP		SP
+
+#define W_CS		CS
+#define W_SS		SS
+#define W_DS		DS
+#define W_ES		ES
+#define W_FS		FS
+#define W_GS		GS
+
+#define X_ST		ST
+#define D_ST		ST
+#define L_ST		ST
+
+#define P_MM0		mm0
+#define P_MM1		mm1
+#define P_MM2		mm2
+#define P_MM3		mm3
+#define P_MM4		mm4
+#define P_MM5		mm5
+#define P_MM6		mm6
+#define P_MM7		mm7
+
+#define P_XMM0		xmm0
+#define P_XMM1		xmm1
+#define P_XMM2		xmm2
+#define P_XMM3		xmm3
+#define P_XMM4		xmm4
+#define P_XMM5		xmm5
+#define P_XMM6		xmm6
+#define P_XMM7		xmm7
+
+#define CONCAT(x, y)		x ## y
+#define CONCAT3(x, y, z)	x ## y ## z
+
+#if defined(NASM_ASSEMBLER)
+
+#define ST(n)		st ## n
+#define ST0		st0
+
+#define TBYTE_PTR	tword
+#define QWORD_PTR	qword
+#define DWORD_PTR	dword
+#define WORD_PTR	word
+#define BYTE_PTR	byte
+
+#define OFFSET
+
+#define GLOBL			GLOBAL
+#define ALIGNTEXT32		ALIGN 32
+#define ALIGNTEXT16		ALIGN 16
+#define ALIGNTEXT8		ALIGN 8
+#define ALIGNTEXT4		ALIGN 4
+#define ALIGNTEXT2		ALIGN 2
+#define ALIGNTEXT32ifNOP	ALIGN 32
+#define ALIGNTEXT16ifNOP	ALIGN 16
+#define ALIGNTEXT8ifNOP		ALIGN 8
+#define ALIGNTEXT4ifNOP		ALIGN 4
+#define ALIGNDATA32		ALIGN 32
+#define ALIGNDATA16		ALIGN 16
+#define ALIGNDATA8		ALIGN 8
+#define ALIGNDATA4		ALIGN 4
+#define ALIGNDATA2		ALIGN 2
+#define FILE(s)
+#define STRING(s)	db s
+#define D_LONG		dd
+#define D_WORD		dw
+#define D_BYTE		db
+/* #define SPACE */
+/* #define COMM */
+#if defined(__WATCOMC__)
+SECTION _TEXT public align=16 class=CODE use32 flat
+SECTION _DATA public align=16 class=DATA use32 flat
+#define SEG_TEXT	SECTION _TEXT
+#define SEG_DATA	SECTION _DATA
+#define SEG_BSS		SECTION .bss
+#else
+#define SEG_DATA	SECTION .data
+#define SEG_TEXT	SECTION .text
+#define SEG_BSS		SECTION .bss
+#endif
+
+#define D_SPACE(n)	db n REP 0
+
+#define AS_BEGIN
+
+/* Jcc's should be handled better than this... */
+#define NEAR		near
+
+#else /* MASM */
+
+#define TBYTE_PTR	tbyte ptr
+#define QWORD_PTR	qword ptr
+#define DWORD_PTR	dword ptr
+#define WORD_PTR	word ptr
+#define BYTE_PTR	byte ptr
+
+#define OFFSET		offset
+
+#define GLOBL			GLOBAL
+#define ALIGNTEXT32		ALIGN 32
+#define ALIGNTEXT16		ALIGN 16
+#define ALIGNTEXT8		ALIGN 8
+#define ALIGNTEXT4		ALIGN 4
+#define ALIGNTEXT2		ALIGN 2
+#define ALIGNTEXT32ifNOP	ALIGN 32
+#define ALIGNTEXT16ifNOP	ALIGN 16
+#define ALIGNTEXT8ifNOP		ALIGN 8
+#define ALIGNTEXT4ifNOP		ALIGN 4
+#define ALIGNDATA32		ALIGN 32
+#define ALIGNDATA16		ALIGN 16
+#define ALIGNDATA8		ALIGN 8
+#define ALIGNDATA4		ALIGN 4
+#define ALIGNDATA2		ALIGN 2
+#define FILE(s)
+#define STRING(s)	db s
+#define D_LONG		dd
+#define D_WORD		dw
+#define D_BYTE		db
+/* #define SPACE */
+/* #define COMM */
+#define SEG_DATA	.DATA
+#define SEG_TEXT	.CODE
+#define SEG_BSS		.DATA
+
+#define D_SPACE(n)	db n REP 0
+
+#define AS_BEGIN
+
+#define NEAR
+
+#endif
+
+#if defined(Lynx) || (defined(SYSV) || defined(SVR4)) \
+ || (defined(__linux__) || defined(__OS2ELF__)) && defined(__ELF__) \
+ || defined(__FreeBSD__) && __FreeBSD__ >= 3
+#define GLNAME(a)	a
+#else
+#define GLNAME(a)	CONCAT(_, a)
+#endif
+
+/*
+ *	Addressing Modes
+ */
+
+/* Immediate Mode */
+#define P_ADDR(a)		OFFSET a
+#define X_ADDR(a)		OFFSET a
+#define D_ADDR(a)		OFFSET a
+#define L_ADDR(a)		OFFSET a
+#define W_ADDR(a)		OFFSET a
+#define B_ADDR(a)		OFFSET a
+
+#define P_CONST(a)		a
+#define X_CONST(a)		a
+#define D_CONST(a)		a
+#define L_CONST(a)		a
+#define W_CONST(a)		a
+#define B_CONST(a)		a
+
+/* Indirect Mode */
+#ifdef NASM_ASSEMBLER
+#define P_CONTENT(a)		[a]
+#define X_CONTENT(a)		TBYTE_PTR [a]
+#define D_CONTENT(a)		QWORD_PTR [a]
+#define L_CONTENT(a)		DWORD_PTR [a]
+#define W_CONTENT(a)		WORD_PTR [a]
+#define B_CONTENT(a)		BYTE_PTR [a]
+#else
+#define P_CONTENT(a)		a
+#define X_CONTENT(a)		TBYTE_PTR a
+#define D_CONTENT(a)		QWORD_PTR a
+#define L_CONTENT(a)		DWORD_PTR a
+#define W_CONTENT(a)		WORD_PTR a
+#define B_CONTENT(a)		BYTE_PTR a
+#endif
+
+/* Register a indirect */
+#define P_REGIND(a)		[a]
+#define X_REGIND(a)		TBYTE_PTR [a]
+#define D_REGIND(a)		QWORD_PTR [a]
+#define L_REGIND(a)		DWORD_PTR [a]
+#define W_REGIND(a)		WORD_PTR [a]
+#define B_REGIND(a)		BYTE_PTR [a]
+
+/* Register b indirect plus displacement a */
+#define P_REGOFF(a, b)		[b + a]
+#define X_REGOFF(a, b)		TBYTE_PTR [b + a]
+#define D_REGOFF(a, b)		QWORD_PTR [b + a]
+#define L_REGOFF(a, b)		DWORD_PTR [b + a]
+#define W_REGOFF(a, b)		WORD_PTR [b + a]
+#define B_REGOFF(a, b)		BYTE_PTR [b + a]
+
+/* Reg indirect Base + Index + Displacement  - this is mainly for 16-bit mode
+ * which has no scaling
+ */
+#define P_REGBID(b, i, d)	[b + i + d]
+#define X_REGBID(b, i, d)	TBYTE_PTR [b + i + d]
+#define D_REGBID(b, i, d)	QWORD_PTR [b + i + d]
+#define L_REGBID(b, i, d)	DWORD_PTR [b + i + d]
+#define W_REGBID(b, i, d)	WORD_PTR [b + i + d]
+#define B_REGBID(b, i, d)	BYTE_PTR [b + i + d]
+
+/* Reg indirect Base + (Index * Scale) */
+#define P_REGBIS(b, i, s)	[b + i * s]
+#define X_REGBIS(b, i, s)	TBYTE_PTR [b + i * s]
+#define D_REGBIS(b, i, s)	QWORD_PTR [b + i * s]
+#define L_REGBIS(b, i, s)	DWORD_PTR [b + i * s]
+#define W_REGBIS(b, i, s)	WORD_PTR [b + i * s]
+#define B_REGBIS(b, i, s)	BYTE_PTR [b + i * s]
+
+/* Reg indirect Base + (Index * Scale) + Displacement */
+#define P_REGBISD(b, i, s, d)	[b + i * s + d]
+#define X_REGBISD(b, i, s, d)	TBYTE_PTR [b + i * s + d]
+#define D_REGBISD(b, i, s, d)	QWORD_PTR [b + i * s + d]
+#define L_REGBISD(b, i, s, d)	DWORD_PTR [b + i * s + d]
+#define W_REGBISD(b, i, s, d)	WORD_PTR [b + i * s + d]
+#define B_REGBISD(b, i, s, d)	BYTE_PTR [b + i * s + d]
+
+/* Displaced Scaled Index: */
+#define P_REGDIS(d, i, s)	[i * s + d]
+#define X_REGDIS(d, i, s)	TBYTE_PTR [i * s + d]
+#define D_REGDIS(d, i, s)	QWORD_PTR [i * s + d]
+#define L_REGDIS(d, i, s)	DWORD_PTR [i * s + d]
+#define W_REGDIS(d, i, s)	WORD_PTR [i * s + d]
+#define B_REGDIS(d, i, s)	BYTE_PTR [i * s + d]
+
+/* Indexed Base: */
+#define P_REGBI(b, i)		[b + i]
+#define X_REGBI(b, i)		TBYTE_PTR [b + i]
+#define D_REGBI(b, i)		QWORD_PTR [b + i]
+#define L_REGBI(b, i)		DWORD_PTR [b + i]
+#define W_REGBI(b, i)		WORD_PTR [b + i]
+#define B_REGBI(b, i)		BYTE_PTR [b + i]
+
+/* Displaced Base: */
+#define P_REGDB(d, b)		[b + d]
+#define X_REGDB(d, b)		TBYTE_PTR [b + d]
+#define D_REGDB(d, b)		QWORD_PTR [b + d]
+#define L_REGDB(d, b)		DWORD_PTR [b + d]
+#define W_REGDB(d, b)		WORD_PTR [b + d]
+#define B_REGDB(d, b)		BYTE_PTR [b + d]
+
+/* Variable indirect: */
+#define VARINDIRECT(var)	[var]
+
+/* Use register contents as jump/call target: */
+#define CODEPTR(reg)		P_(reg)
+
+/*
+ * Redefine assembler commands
+ */
+
+#define P_(a)			P_ ## a
+#define X_(a)			X_ ## a
+#define D_(a)			D_ ## a
+#define SR_(a)			W_ ## a
+#define S_(a)			L_ ## a
+#define L_(a)			L_ ## a
+#define W_(a)			W_ ## a
+#define B_(a)			B_ ## a
+
+#define AAA			aaa
+#define AAD			aad
+#define AAM			aam
+#define AAS			aas
+#define ADC_L(a, b)		adc L_(b), L_(a)
+#define ADC_W(a, b)		adc W_(b), W_(a)
+#define ADC_B(a, b)		adc B_(b), B_(a)
+#define ADD_L(a, b)		add L_(b), L_(a)
+#define ADD_W(a, b)		add W_(b), W_(a)
+#define ADD_B(a, b)		add B_(b), B_(a)
+#define AND_L(a, b)		and L_(b), L_(a)
+#define AND_W(a, b)		and W_(b), W_(a)
+#define AND_B(a, b)		and B_(b), B_(a)
+#define ARPL(a,b)		arpl W_(b), a
+#define BOUND_L(a, b)		bound L_(b), L_(a)
+#define BOUND_W(a, b)		bound W_(b), W_(a)
+#define BSF_L(a, b)		bsf L_(b), L_(a)
+#define BSF_W(a, b)		bsf W_(b), W_(a)
+#define BSR_L(a, b)		bsr L_(b), L_(a)
+#define BSR_W(a, b)		bsr W_(b), W_(a)
+#define BT_L(a, b)		bt L_(b), L_(a)
+#define BT_W(a, b)		bt W_(b), W_(a)
+#define BTC_L(a, b)		btc L_(b), L_(a)
+#define BTC_W(a, b)		btc W_(b), W_(a)
+#define BTR_L(a, b)		btr L_(b), L_(a)
+#define BTR_W(a, b)		btr W_(b), W_(a)
+#define BTS_L(a, b)		bts L_(b), L_(a)
+#define BTS_W(a, b)		bts W_(b), W_(a)
+#define CALL(a)			call a
+#define CALLF(s,a)		call far s:a
+#define CBW			cbw
+#define CWDE			cwde
+#define CLC			clc
+#define CLD			cld
+#define CLI			cli
+#define CLTS			clts
+#define CMC			cmc
+#define CMP_L(a, b)		cmp L_(b), L_(a)
+#define CMP_W(a, b)		cmp W_(b), W_(a)
+#define CMP_B(a, b)		cmp B_(b), B_(a)
+#define CMPS_L			cmpsd
+#define CMPS_W			cmpsw
+#define CMPS_B			cmpsb
+#define CPUID			cpuid
+#define CWD			cwd
+#define CDQ			cdq
+#define DAA			daa
+#define DAS			das
+#define DEC_L(a)		dec L_(a)
+#define DEC_W(a)		dec W_(a)
+#define DEC_B(a)		dec B_(a)
+#define DIV_L(a)		div L_(a)
+#define DIV_W(a)		div W_(a)
+#define DIV_B(a)		div B_(a)
+#define ENTER(a,b)		enter b, a
+#define HLT			hlt
+#define IDIV_L(a)		idiv L_(a)
+#define IDIV_W(a)		idiv W_(a)
+#define IDIV_B(a)		idiv B_(a)
+#define IMUL_L(a, b)		imul L_(b), L_(a)
+#define IMUL_W(a, b)		imul W_(b), W_(a)
+#define IMUL_B(a)		imul B_(a)
+#define IN_L			in EAX, DX
+#define IN_W			in AX, DX
+#define IN_B			in AL, DX
+#define IN1_L(a)		in1 L_(a)
+#define IN1_W(a)		in1 W_(a)
+#define IN1_B(a)		in1 B_(a)
+#define INC_L(a)		inc L_(a)
+#define INC_W(a)		inc W_(a)
+#define INC_B(a)		inc B_(a)
+#define INS_L			ins
+#define INS_W			ins
+#define INS_B			ins
+#define INT(a)			int B_(a)
+#define INT3			int3
+#define INTO			into
+#define IRET			iret
+#define IRETD			iretd
+#define JA(a)			ja NEAR a
+#define JAE(a)			jae NEAR a
+#define JB(a)			jb NEAR a
+#define JBE(a)			jbe NEAR a
+#define JC(a)			jc NEAR a
+#define JE(a)			je NEAR a
+#define JG(a)			jg NEAR a
+#define JGE(a)			jge NEAR a
+#define JL(a)			jl NEAR a
+#define JLE(a)			jle NEAR a
+#define JNA(a)			jna NEAR a
+#define JNAE(a)			jnae NEAR a
+#define JNB(a)			jnb NEAR a
+#define JNBE(a)			jnbe NEAR a
+#define JNC(a)			jnc NEAR a
+#define JNE(a)			jne NEAR a
+#define JNG(a)			jng NEAR a
+#define JNGE(a)			jnge NEAR a
+#define JNL(a)			jnl NEAR a
+#define JNLE(a)			jnle NEAR a
+#define JNO(a)			jno NEAR a
+#define JNP(a)			jnp NEAR a
+#define JNS(a)			jns NEAR a
+#define JNZ(a)			jnz NEAR a
+#define JO(a)			jo NEAR a
+#define JP(a)			jp NEAR a
+#define JPE(a)			jpe NEAR a
+#define JPO(a)			jpo NEAR a
+#define JS(a)			js NEAR a
+#define JZ(a)			jz NEAR a
+#define JMP(a)			jmp a
+#define JMPF(s,a)		jmp far s:a
+#define LAHF			lahf
+#define LAR(a, b)		lar b, a
+#define LEA_L(a, b)		lea P_(b), P_(a)
+#define LEA_W(a, b)		lea P_(b), P_(a)
+#define LEAVE			leave
+#define LGDT(a)			lgdt a
+#define LIDT(a)			lidt a
+#define LDS(a, b)		lds b, P_(a)
+#define LES(a, b)		les b, P_(a)
+#define LFS(a, b)		lfs b, P_(a)
+#define LGS(a, b)		lgs b, P_(a)
+#define LSS(a, b)		lss b, P_(a)
+#define LLDT(a)			lldt a
+#define LMSW(a)			lmsw a
+#define LOCK			lock
+#define LODS_L			lodsd
+#define LODS_W			lodsw
+#define LODS_B			lodsb
+#define LOOP(a)			loop a
+#define LOOPE(a)		loope a
+#define LOOPZ(a)		loopz a
+#define LOOPNE(a)		loopne a
+#define LOOPNZ(a)		loopnz a
+#define LSL(a, b)		lsl b, a
+#define LTR(a)			ltr a
+#define MOV_SR(a, b)		mov SR_(b), SR_(a)
+#define MOV_L(a, b)		mov L_(b), L_(a)
+#define MOV_W(a, b)		mov W_(b), W_(a)
+#define MOV_B(a, b)		mov B_(b), B_(a)
+#define MOVS_L			movsd
+#define MOVS_W			movsw
+#define MOVS_B			movsb
+#define MOVSX_BL(a, b)		movsx B_(b), B_(a)
+#define MOVSX_BW(a, b)		movsx B_(b), B_(a)
+#define MOVSX_WL(a, b)		movsx W_(b), W_(a)
+#define MOVZX_BL(a, b)		movzx B_(b), B_(a)
+#define MOVZX_BW(a, b)		movzx B_(b), B_(a)
+#define MOVZX_WL(a, b)		movzx W_(b), W_(a)
+#define MUL_L(a)		mul L_(a)
+#define MUL_W(a)		mul W_(a)
+#define MUL_B(a)		mul B_(a)
+#define NEG_L(a)		neg L_(a)
+#define NEG_W(a)		neg W_(a)
+#define NEG_B(a)		neg B_(a)
+#define NOP			nop
+#define NOT_L(a)		not L_(a)
+#define NOT_W(a)		not W_(a)
+#define NOT_B(a)		not B_(a)
+#define OR_L(a,b)		or L_(b), L_(a)
+#define OR_W(a,b)		or W_(b), W_(a)
+#define OR_B(a,b)		or B_(b), B_(a)
+#define OUT_L			out DX, EAX
+#define OUT_W			out DX, AX
+#define OUT_B			out DX, AL
+#define OUT1_L(a)		out1 L_(a)
+#define OUT1_W(a)		out1 W_(a)
+#define OUT1_B(a)		out1 B_(a)
+#define OUTS_L			outsd
+#define OUTS_W			outsw
+#define OUTS_B			outsb
+#define POP_SR(a)		pop SR_(a)
+#define POP_L(a)		pop L_(a)
+#define POP_W(a)		pop W_(a)
+#define POPA_L			popad
+#define POPA_W			popa
+#define POPF_L			popfd
+#define POPF_W			popf
+#define PUSH_SR(a)		push SR_(a)
+#define PUSH_L(a)		push L_(a)
+#define PUSH_W(a)		push W_(a)
+#define PUSH_B(a)		push B_(a)
+#define PUSHA_L			pushad
+#define PUSHA_W			pusha
+#define PUSHF_L			pushfd
+#define PUSHF_W			pushf
+#define RCL_L(a, b)		rcl L_(b), L_(a)
+#define RCL_W(a, b)		rcl W_(b), W_(a)
+#define RCL_B(a, b)		rcl B_(b), B_(a)
+#define RCR_L(a, b)		rcr L_(b), L_(a)
+#define RCR_W(a, b)		rcr W_(b), W_(a)
+#define RCR_B(a, b)		rcr B_(b), B_(a)
+#define RDTSC			rdtsc
+#define ROL_L(a, b)		rol L_(b), L_(a)
+#define ROL_W(a, b)		rol W_(b), W_(a)
+#define ROL_B(a, b)		rol B_(b), B_(a)
+#define ROR_L(a, b)		ror L_(b), L_(a)
+#define ROR_W(a, b)		ror W_(b), W_(a)
+#define ROR_B(a, b)		ror B_(b), B_(a)
+#define REP			rep
+#define REPE			repe
+#define REPNE			repne
+#define REPNZ			REPNE
+#define REPZ			REPE
+#define RET			ret
+#define SAHF			sahf
+#define SAL_L(a, b)		sal L_(b), B_(a)
+#define SAL_W(a, b)		sal W_(b), B_(a)
+#define SAL_B(a, b)		sal B_(b), B_(a)
+#define SAR_L(a, b)		sar L_(b), B_(a)
+#define SAR_W(a, b)		sar W_(b), B_(a)
+#define SAR_B(a, b)		sar B_(b), B_(a)
+#define SBB_L(a, b)		sbb L_(b), L_(a)
+#define SBB_W(a, b)		sbb W_(b), W_(a)
+#define SBB_B(a, b)		sbb B_(b), B_(a)
+#define SCAS_L			scas
+#define SCAS_W			scas
+#define SCAS_B			scas
+#define SETA(a)			seta a
+#define SETAE(a)		setae a
+#define SETB(a)			setb a
+#define SETBE(a)		setbe a
+#define SETC(a)			setc a
+#define SETE(a)			sete a
+#define SETG(a)			setg a
+#define SETGE(a)		setge a
+#define SETL(a)			setl a
+#define SETLE(a)		setle a
+#define SETNA(a)		setna a
+#define SETNAE(a)		setnae a
+#define SETNB(a)		setnb a
+#define SETNBE(a)		setnbe a
+#define SETNC(a)		setnc a
+#define SETNE(a)		setne a
+#define SETNG(a)		setng a
+#define SETNGE(a)		setnge a
+#define SETNL(a)		setnl a
+#define SETNLE(a)		setnle a
+#define SETNO(a)		setno a
+#define SETNP(a)		setnp a
+#define SETNS(a)		setns a
+#define SETNZ(a)		setnz a
+#define SETO(a)			seto a
+#define SETP(a)			setp a
+#define SETPE(a)		setpe a
+#define SETPO(a)		setpo a
+#define SETS(a)			sets a
+#define SETZ(a)			setz a
+#define SGDT(a)			sgdt a
+#define SIDT(a)			sidt a
+#define SHL_L(a, b)		shl L_(b), B_(a)
+#define SHL_W(a, b)		shl W_(b), B_(a)
+#define SHL_B(a, b)		shl B_(b), B_(a)
+#define SHLD_L(a,b,c)		shld
+#define SHLD2_L(a,b)		shld L_(b), L_(a)
+#define SHLD_W(a,b,c)		shld
+#define SHLD2_W(a,b)		shld W_(b), W_(a)
+#define SHR_L(a, b)		shr L_(b), B_(a)
+#define SHR_W(a, b)		shr W_(b), B_(a)
+#define SHR_B(a, b)		shr B_(b), B_(a)
+#define SHRD_L(a,b,c)		shrd
+#define SHRD2_L(a,b)		shrd L_(b), L_(a)
+#define SHRD_W(a,b,c)		shrd
+#define SHRD2_W(a,b)		shrd W_(b), W_(a)
+#define SLDT(a)			sldt a
+#define SMSW(a)			smsw a
+#define STC			stc
+#define STD			std
+#define STI			sti
+#define STOS_L			stosd
+#define STOS_W			stosw
+#define STOS_B			stosb
+#define STR(a)			str a
+#define SUB_L(a, b)		sub L_(b), L_(a)
+#define SUB_W(a, b)		sub W_(b), W_(a)
+#define SUB_B(a, b)		sub B_(b), B_(a)
+#define TEST_L(a, b)		test L_(b), L_(a)
+#define TEST_W(a, b)		test W_(b), W_(a)
+#define TEST_B(a, b)		test B_(b), B_(a)
+#define VERR(a)			verr a
+#define VERW(a)			verw a
+#define WAIT			wait
+#define XCHG_L(a, b)		xchg L_(b), L_(a)
+#define XCHG_W(a, b)		xchg W_(b), W_(a)
+#define XCHG_B(a, b)		xchg B_(b), B_(a)
+#define XLAT			xlat
+#define XOR_L(a, b)		xor L_(b), L_(a)
+#define XOR_W(a, b)		xor W_(b), W_(a)
+#define XOR_B(a, b)		xor B_(b), B_(a)
+
+
+/* Floating Point Instructions */
+#define F2XM1			f2xm1
+#define FABS			fabs
+#define FADD_D(a)		fadd D_(a)
+#define FADD_S(a)		fadd S_(a)
+#define FADD2(a, b)		fadd b, a
+#define FADDP(a, b)		faddp b, a
+#define FIADD_L(a)		fiadd L_(a)
+#define FIADD_W(a)		fiadd W_(a)
+#define FBLD(a)			fbld a
+#define FBSTP(a)		fbstp a
+#define FCHS			fchs
+#define FCLEX			fclex
+#define FNCLEX			fnclex
+#define FCOM(a)			fcom a
+#define FCOM_D(a)		fcom D_(a)
+#define FCOM_S(a)		fcom S_(a)
+#define FCOMP(a)		fcomp a
+#define FCOMP_D(a)		fcomp D_(a)
+#define FCOMP_S(a)		fcomp S_(a)
+#define FCOMPP			fcompp
+#define FCOS			fcos
+#define FDECSTP			fdecstp
+#define FDIV_D(a)		fdiv D_(a)
+#define FDIV_S(a)		fdiv S_(a)
+#define FDIV2(a, b)		fdiv b, a
+#define FDIVP(a, b)		fdivp b, a
+#define FIDIV_L(a)		fidiv L_(a)
+#define FIDIV_W(a)		fidiv W_(a)
+#define FDIVR_D(a)		fdivr D_(a)
+#define FDIVR_S(a)		fdivr S_(a)
+#define FDIVR2(a, b)		fdivr b, a
+#define FDIVRP(a, b)		fdivrp b, a
+#define FIDIVR_L(a)		fidivr L_(a)
+#define FIDIVR_W(a)		fidivr W_(a)
+#define FFREE(a)		ffree a
+#define FICOM_L(a)		ficom L_(a)
+#define FICOM_W(a)		ficom W_(a)
+#define FICOMP_L(a)		ficomp L_(a)
+#define FICOMP_W(a)		ficomp W_(a)
+#define FILD_Q(a)		fild D_(a)
+#define FILD_L(a)		fild L_(a)
+#define FILD_W(a)		fild W_(a)
+#define FINCSTP			fincstp
+#define FINIT			finit
+#define FNINIT			fninit
+#define FIST_L(a)		fist L_(a)
+#define FIST_W(a)		fist W_(a)
+#define FISTP_Q(a)		fistp D_(a)
+#define FISTP_L(a)		fistp L_(a)
+#define FISTP_W(a)		fistp W_(a)
+#define FLD_X(a)		fld X_(a)
+#define FLD_D(a)		fld D_(a)
+#define FLD_S(a)		fld S_(a)
+#define FLD1			fld1
+#define FLDL2T			fldl2t
+#define FLDL2E			fldl2e
+#define FLDPI			fldpi
+#define FLDLG2			fldlg2
+#define FLDLN2			fldln2
+#define FLDZ			fldz
+#define FLDCW(a)		fldcw a
+#define FLDENV(a)		fldenv a
+#define FMUL_S(a)		fmul S_(a)
+#define FMUL_D(a)		fmul D_(a)
+#define FMUL2(a, b)		fmul b, a
+#define FMULP(a, b)		fmulp b, a
+#define FIMUL_L(a)		fimul L_(a)
+#define FIMUL_W(a)		fimul W_(a)
+#define FNOP			fnop
+#define FPATAN			fpatan
+#define FPREM			fprem
+#define FPREM1			fprem1
+#define FPTAN			fptan
+#define FRNDINT			frndint
+#define FRSTOR(a)		frstor a
+#define FSAVE(a)		fsave a
+#define FNSAVE(a)		fnsave a
+#define FSCALE			fscale
+#define FSIN			fsin
+#define FSINCOS			fsincos
+#define FSQRT			fsqrt
+#define FST_D(a)		fst D_(a)
+#define FST_S(a)		fst S_(a)
+#define FSTP_X(a)		fstp X_(a)
+#define FSTP_D(a)		fstp D_(a)
+#define FSTP_S(a)		fstp S_(a)
+#define FSTP(a)			fstp a
+#define FSTCW(a)		fstcw a
+#define FNSTCW(a)		fnstcw a
+#define FSTENV(a)		fstenv a
+#define FNSTENV(a)		fnstenv a
+#define FSTSW(a)		fstsw a
+#define FNSTSW(a)		fnstsw a
+#define FSUB_S(a)		fsub S_(a)
+#define FSUB_D(a)		fsub D_(a)
+#define FSUB2(a, b)		fsub b, a
+#define FSUBP(a, b)		fsubp b, a
+#define FISUB_L(a)		fisub L_(a)
+#define FISUB_W(a)		fisub W_(a)
+#define FSUBR_S(a)		fsubr S_(a)
+#define FSUBR_D(a)		fsubr D_(a)
+#define FSUBR2(a, b)		fsubr b, a
+#define FSUBRP(a, b)		fsubrp b, a
+#define FISUBR_L(a)		fisubr L_(a)
+#define FISUBR_W(a)		fisubr W_(a)
+#define FTST			ftst
+#define FUCOM(a)		fucom a
+#define FUCOMP(a)		fucomp a
+#define FUCOMPP			fucompp
+#define FWAIT			fwait
+#define FXAM			fxam
+#define FXCH(a)			fxch a
+#define FXTRACT			fxtract
+#define FYL2X			fyl2x
+#define FYL2XP1			fyl2xp1
+
+#endif /* NASM_ASSEMBLER, MASM_ASSEMBLER */
+
+	/****************************************/
+	/*					*/
+	/*	Extensions to x86 insn set -	*/
+	/*	MMX, 3DNow!			*/
+	/*					*/
+	/****************************************/
+
+#if defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER)
+#define P_ARG1(a)		P_ ## a
+#define P_ARG2(a, b)		P_ ## b, P_ ## a
+#define P_ARG3(a, b, c)		P_ ## c, P_ ## b, P_ ## a
+#else
+#define P_ARG1(a)		a
+#define P_ARG2(a, b)		a, b
+#define P_ARG3(a, b, c)		a, b, c
+#endif
+
+/* MMX */
+#define MOVD(a, b)		movd P_ARG2(a, b)
+#define MOVQ(a, b)		movq P_ARG2(a, b)
+
+#define PADDB(a, b)		paddb P_ARG2(a, b)
+#define PADDW(a, b)		paddw P_ARG2(a, b)
+#define PADDD(a, b)		paddd P_ARG2(a, b)
+
+#define PADDSB(a, b)		paddsb P_ARG2(a, b)
+#define PADDSW(a, b)		paddsw P_ARG2(a, b)
+
+#define PADDUSB(a, b)		paddusb P_ARG2(a, b)
+#define PADDUSW(a, b)		paddusw P_ARG2(a, b)
+
+#define PSUBB(a, b)		psubb P_ARG2(a, b)
+#define PSUBW(a, b)		psubw P_ARG2(a, b)
+#define PSUBD(a, b)		psubd P_ARG2(a, b)
+
+#define PSUBSB(a, b)		psubsb P_ARG2(a, b)
+#define PSUBSW(a, b)		psubsw P_ARG2(a, b)
+
+#define PSUBUSB(a, b)		psubusb P_ARG2(a, b)
+#define PSUBUSW(a, b)		psubusw P_ARG2(a, b)
+
+#define PCMPEQB(a, b)		pcmpeqb P_ARG2(a, b)
+#define PCMPEQW(a, b)		pcmpeqw P_ARG2(a, b)
+#define PCMPEQD(a, b)		pcmpeqd P_ARG2(a, b)
+
+#define PCMPGTB(a, b)		pcmpgtb P_ARG2(a, b)
+#define PCMPGTW(a, b)		pcmpgtw P_ARG2(a, b)
+#define PCMPGTD(a, b)		pcmpgtd P_ARG2(a, b)
+
+#define PMULHW(a, b)		pmulhw P_ARG2(a, b)
+#define PMULLW(a, b)		pmullw P_ARG2(a, b)
+
+#define PMADDWD(a, b)		pmaddwd P_ARG2(a, b)
+
+#define PAND(a, b)		pand P_ARG2(a, b)
+
+#define PANDN(a, b)		pandn P_ARG2(a, b)
+
+#define POR(a, b)		por P_ARG2(a, b)
+
+#define PXOR(a, b)		pxor P_ARG2(a, b)
+
+#define PSRAW(a, b)		psraw P_ARG2(a, b)
+#define PSRAD(a, b)		psrad P_ARG2(a, b)
+
+#define PSRLW(a, b)		psrlw P_ARG2(a, b)
+#define PSRLD(a, b)		psrld P_ARG2(a, b)
+#define PSRLQ(a, b)		psrlq P_ARG2(a, b)
+
+#define PSLLW(a, b)		psllw P_ARG2(a, b)
+#define PSLLD(a, b)		pslld P_ARG2(a, b)
+#define PSLLQ(a, b)		psllq P_ARG2(a, b)
+
+#define PACKSSWB(a, b)		packsswb P_ARG2(a, b)
+#define PACKSSDW(a, b)		packssdw P_ARG2(a, b)
+#define PACKUSWB(a, b)		packuswb P_ARG2(a, b)
+
+#define PUNPCKHBW(a, b)		punpckhbw P_ARG2(a, b)
+#define PUNPCKHWD(a, b)		punpckhwd P_ARG2(a, b)
+#define PUNPCKHDQ(a, b)		punpckhdq P_ARG2(a, b)
+#define PUNPCKLBW(a, b)		punpcklbw P_ARG2(a, b)
+#define PUNPCKLWD(a, b)		punpcklwd P_ARG2(a, b)
+#define PUNPCKLDQ(a, b)		punpckldq P_ARG2(a, b)
+
+#define EMMS			emms
+
+/* AMD 3DNow! */
+#define PAVGUSB(a, b)		pavgusb P_ARG2(a, b)
+#define PFADD(a, b)		pfadd P_ARG2(a, b)
+#define PFSUB(a, b)		pfsub P_ARG2(a, b)
+#define PFSUBR(a, b)		pfsubr P_ARG2(a, b)
+#define PFACC(a, b)		pfacc P_ARG2(a, b)
+#define PFCMPGE(a, b)		pfcmpge P_ARG2(a, b)
+#define PFCMPGT(a, b)		pfcmpgt P_ARG2(a, b)
+#define PFCMPEQ(a, b)		pfcmpeq P_ARG2(a, b)
+#define PFMIN(a, b)		pfmin P_ARG2(a, b)
+#define PFMAX(a, b)		pfmax P_ARG2(a, b)
+#define PI2FD(a, b)		pi2fd P_ARG2(a, b)
+#define PF2ID(a, b)		pf2id P_ARG2(a, b)
+#define PFRCP(a, b)		pfrcp P_ARG2(a, b)
+#define PFRSQRT(a, b)		pfrsqrt P_ARG2(a, b)
+#define PFMUL(a, b)		pfmul P_ARG2(a, b)
+#define PFRCPIT1(a, b)		pfrcpit1 P_ARG2(a, b)
+#define PFRSQIT1(a, b)		pfrsqit1 P_ARG2(a, b)
+#define PFRCPIT2(a, b)		pfrcpit2 P_ARG2(a, b)
+#define PMULHRW(a, b)		pmulhrw P_ARG2(a, b)
+
+#define FEMMS			femms
+#define PREFETCH(a)		prefetch P_ARG1(a)
+#define PREFETCHW(a)		prefetchw P_ARG1(a)
+
+/* Intel SSE */
+#define ADDPS(a, b)		addps P_ARG2(a, b)
+#define ADDSS(a, b)		addss P_ARG2(a, b)
+#define ANDNPS(a, b)		andnps P_ARG2(a, b)
+#define ANDPS(a, b)		andps P_ARG2(a, b)
+/* NASM only knows the pseudo ops for these.
+#define CMPPS(a, b, c)		cmpps P_ARG3(a, b, c)
+#define CMPSS(a, b, c)		cmpss P_ARG3(a, b, c)
+*/
+#define CMPEQPS(a, b)		cmpeqps P_ARG2(a, b)
+#define CMPLTPS(a, b)		cmpltps P_ARG2(a, b)
+#define CMPLEPS(a, b)		cmpleps P_ARG2(a, b)
+#define CMPUNORDPS(a, b)	cmpunordps P_ARG2(a, b)
+#define CMPNEQPS(a, b)		cmpneqps P_ARG2(a, b)
+#define CMPNLTPS(a, b)		cmpnltps P_ARG2(a, b)
+#define CMPNLEPS(a, b)		cmpnleps P_ARG2(a, b)
+#define CMPORDPS(a, b)		cmpordps P_ARG2(a, b)
+#define CMPEQSS(a, b)		cmpeqss P_ARG2(a, b)
+#define CMPLTSS(a, b)		cmpltss P_ARG2(a, b)
+#define CMPLESS(a, b)		cmpless P_ARG2(a, b)
+#define CMPUNORDSS(a, b)	cmpunordss P_ARG2(a, b)
+#define CMPNEQSS(a, b)		cmpneqss P_ARG2(a, b)
+#define CMPNLTSS(a, b)		cmpnltss P_ARG2(a, b)
+#define CMPNLESS(a, b)		cmpnless P_ARG2(a, b)
+#define CMPORDSS(a, b)		cmpordss P_ARG2(a, b)
+#define COMISS(a, b)		comiss P_ARG2(a, b)
+#define CVTPI2PS(a, b)		cvtpi2ps P_ARG2(a, b)
+#define CVTPS2PI(a, b)		cvtps2pi P_ARG2(a, b)
+#define CVTSI2SS(a, b)		cvtsi2ss P_ARG2(a, b)
+#define CVTSS2SI(a, b)		cvtss2si P_ARG2(a, b)
+#define CVTTPS2PI(a, b)		cvttps2pi P_ARG2(a, b)
+#define CVTTSS2SI(a, b)		cvttss2si P_ARG2(a, b)
+#define DIVPS(a, b)		divps P_ARG2(a, b)
+#define DIVSS(a, b)		divss P_ARG2(a, b)
+#define FXRSTOR(a)		fxrstor P_ARG1(a)
+#define FXSAVE(a)		fxsave P_ARG1(a)
+#define LDMXCSR(a)		ldmxcsr P_ARG1(a)
+#define MAXPS(a, b)		maxps P_ARG2(a, b)
+#define MAXSS(a, b)		maxss P_ARG2(a, b)
+#define MINPS(a, b)		minps P_ARG2(a, b)
+#define MINSS(a, b)		minss P_ARG2(a, b)
+#define MOVAPS(a, b)		movaps P_ARG2(a, b)
+#define MOVHLPS(a, b)		movhlps P_ARG2(a, b)
+#define MOVHPS(a, b)		movhps P_ARG2(a, b)
+#define MOVLHPS(a, b)		movlhps P_ARG2(a, b)
+#define MOVLPS(a, b)		movlps P_ARG2(a, b)
+#define MOVMSKPS(a, b)		movmskps P_ARG2(a, b)
+#define MOVNTPS(a, b)		movntps P_ARG2(a, b)
+#define MOVNTQ(a, b)		movntq P_ARG2(a, b)
+#define MOVSS(a, b)		movss P_ARG2(a, b)
+#define MOVUPS(a, b)		movups P_ARG2(a, b)
+#define MULPS(a, b)		mulps P_ARG2(a, b)
+#define MULSS(a, b)		mulss P_ARG2(a, b)
+#define ORPS(a, b)		orps P_ARG2(a, b)
+#define RCPPS(a, b)		rcpps P_ARG2(a, b)
+#define RCPSS(a, b)		rcpss P_ARG2(a, b)
+#define RSQRTPS(a, b)		rsqrtps P_ARG2(a, b)
+#define RSQRTSS(a, b)		rsqrtss P_ARG2(a, b)
+#define SHUFPS(a, b, c)		shufps P_ARG3(a, b, c)
+#define SQRTPS(a, b)		sqrtps P_ARG2(a, b)
+#define SQRTSS(a, b)		sqrtss P_ARG2(a, b)
+#define STMXCSR(a)		stmxcsr P_ARG1(a)
+#define SUBPS(a, b)		subps P_ARG2(a, b)
+#define UCOMISS(a, b)		ucomiss P_ARG2(a, b)
+#define UNPCKHPS(a, b)		unpckhps P_ARG2(a, b)
+#define UNPCKLPS(a, b)		unpcklps P_ARG2(a, b)
+#define XORPS(a, b)		xorps P_ARG2(a, b)
+
+#define PREFETCHNTA(a)		prefetchnta P_ARG1(a)
+#define PREFETCHT0(a)		prefetcht0 P_ARG1(a)
+#define PREFETCHT1(a)		prefetcht1 P_ARG1(a)
+#define PREFETCHT2(a)		prefetcht2 P_ARG1(a)
+#define SFENCE			sfence
+
+/* Added by BrianP for FreeBSD (per David Dawes) */
+#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) && !defined(__bsdi__)
+#define LLBL(a)		CONCAT(.L,a)
+#define LLBL2(a,b)	CONCAT3(.L,a,b)
+#else
+#define LLBL(a)		a
+#define LLBL2(a,b)	CONCAT(a,b)
+#endif
+
+/* Segment overrides */
+#define SEGCS		D_BYTE	46
+#define SEGDS		D_BYTE	62
+#define SEGES		D_BYTE	38
+#define SEGFS		D_BYTE	100
+#define SEGGS		D_BYTE	101
+
+/* Temporary labels: valid until next non-local label */
+#ifdef NASM_ASSEMBLER
+#define TLBL(a)		CONCAT(.,a)
+#else
+#define TLBL(a)		CONCAT(a,$)
+#endif
+
+/* Hidden symbol visibility support.
+ * If we build with gcc's -fvisibility=hidden flag, we'll need to change
+ * the symbol visibility mode to 'default'.
+ */
+#if defined(GNU_ASSEMBLER) && !defined(__DJGPP__) && !defined(__MINGW32__)
+#  define HIDDEN(x) .hidden x
+#elif defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 && !defined(__MINGW32__)
+#  pragma GCC visibility push(default)
+#  define HIDDEN(x) .hidden x
+#else
+#  define HIDDEN(x)
+#endif
+
+#endif /* __ASSYNTAX_H__ */
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/clip_args.h b/nx-X11/extras/Mesa/src/mesa/x86/clip_args.h
new file mode 100644
index 000000000..ff9aa377f
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/clip_args.h
@@ -0,0 +1,60 @@
+/* $Id: clip_args.h,v 1.1.1.1 2004/06/16 09:19:35 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Clip test function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+
+#ifndef __CLIP_ARGS_H__
+#define __CLIP_ARGS_H__
+
+/*
+ * Offsets for clip_func arguments
+ *
+ * typedef GLvector4f *(*clip_func)( GLvector4f *clip_vec,
+ *	                             GLvector4f *proj_vec,
+ *	                             GLubyte clipMask[],
+ *	                             GLubyte *orMask,
+ *	                             GLubyte *andMask );
+ */
+
+#define OFFSET_SOURCE	4
+#define OFFSET_DEST	8
+#define OFFSET_CLIP	12
+#define OFFSET_OR	16
+#define OFFSET_AND	20
+
+#define ARG_SOURCE	REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+#define ARG_DEST	REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_CLIP	REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
+#define ARG_OR		REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
+#define ARG_AND		REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/common_x86.c b/nx-X11/extras/Mesa/src/mesa/x86/common_x86.c
new file mode 100644
index 000000000..0ff7fd51a
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/common_x86.c
@@ -0,0 +1,448 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.0.1
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file common_x86.c
+ *
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
+ *
+ * Changed by Andre Werthmann for using the new SSE functions.
+ *
+ * \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+
+/* XXX these includes should probably go into imports.h or glheader.h */
+#if defined(USE_SSE_ASM) && defined(__linux__)
+#include <signal.h>
+#endif
+#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+#include "common_x86_asm.h"
+#include "imports.h"
+
+
+int _mesa_x86_cpu_features = 0;
+
+/* No reason for this to be public.
+ */
+extern GLuint	_ASMAPI _mesa_x86_has_cpuid(void);
+extern void	_ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
+extern GLuint	_ASMAPI _mesa_x86_cpuid_eax(GLuint op);
+extern GLuint	_ASMAPI _mesa_x86_cpuid_ebx(GLuint op);
+extern GLuint	_ASMAPI _mesa_x86_cpuid_ecx(GLuint op);
+extern GLuint	_ASMAPI _mesa_x86_cpuid_edx(GLuint op);
+
+static void message( const char *msg )
+{
+   GLboolean debug;
+#ifdef DEBUG
+   debug = GL_TRUE;
+#else
+   if ( _mesa_getenv( "MESA_DEBUG" ) ) {
+      debug = GL_TRUE;
+   } else {
+      debug = GL_FALSE;
+   }
+#endif
+   if ( debug ) {
+      fprintf( stderr, "%s", msg );
+   }
+}
+
+#if defined(USE_SSE_ASM)
+/*
+ * We must verify that the Streaming SIMD Extensions are truly supported
+ * on this processor before we go ahead and hook out the optimized code.
+ * Unfortunately, the CPUID bit isn't enough, as the OS must set the
+ * OSFXSR bit in CR4 if it supports the extended FPU save and restore
+ * required to use SSE.  Unfortunately, we can't just go ahead and read
+ * this register, as only the kernel can do that.  Similarly, we must
+ * verify that the OSXMMEXCPT bit in CR4 has been set by the OS,
+ * signifying that it supports unmasked SIMD FPU exceptions.  If we take
+ * an unmasked exception and the OS doesn't correctly support them, the
+ * best we'll get is a SIGILL and the worst we'll get is an infinite
+ * loop in the signal delivery from the kernel as we can't interact with
+ * the SIMD FPU state to clear the exception bits.  Either way, this is
+ * not good.
+ *
+ * However, I have been told by Alan Cox that all 2.4 (and later) Linux
+ * kernels provide full SSE support on all processors that expose SSE via
+ * the CPUID mechanism.  It just so happens that this is the exact set of
+ * kernels supported DRI.  Therefore, when building for DRI the funky SSE
+ * exception test is omitted.
+ */
+
+extern void _mesa_test_os_sse_support( void );
+extern void _mesa_test_os_sse_exception_support( void );
+
+#if defined(__linux__) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) \
+   && !defined(IN_DRI_DRIVER)
+static void sigill_handler( int signal, struct sigcontext sc )
+{
+   message( "SIGILL, " );
+
+   /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1"
+    * instructions are 3 bytes long.  We must increment the instruction
+    * pointer manually to avoid repeated execution of the offending
+    * instruction.
+    *
+    * If the SIGILL is caused by a divide-by-zero when unmasked
+    * exceptions aren't supported, the SIMD FPU status and control
+    * word will be restored at the end of the test, so we don't need
+    * to worry about doing it here.  Besides, we may not be able to...
+    */
+   sc.eip += 3;
+
+   _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+}
+
+static void sigfpe_handler( int signal, struct sigcontext sc )
+{
+   message( "SIGFPE, " );
+
+   if ( sc.fpstate->magic != 0xffff ) {
+      /* Our signal context has the extended FPU state, so reset the
+       * divide-by-zero exception mask and clear the divide-by-zero
+       * exception bit.
+       */
+      sc.fpstate->mxcsr |= 0x00000200;
+      sc.fpstate->mxcsr &= 0xfffffffb;
+   } else {
+      /* If we ever get here, we're completely hosed.
+       */
+      message( "\n\n" );
+      _mesa_problem( NULL, "SSE enabling test failed badly!" );
+   }
+}
+#endif /* __linux__ && _POSIX_SOURCE && X86_FXSR_MAGIC */
+
+#if defined(WIN32)
+#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
+# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
+#endif
+static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
+{
+   PEXCEPTION_RECORD rec = exp->ExceptionRecord;
+   PCONTEXT ctx = exp->ContextRecord;
+
+   if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
+      message( "EXCEPTION_ILLEGAL_INSTRUCTION, " );
+      _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   } else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
+      message( "STATUS_FLOAT_MULTIPLE_TRAPS, " );
+      /* Windows seems to clear the exception flag itself, we just have to increment Eip */
+   } else {
+      message( "UNEXPECTED EXCEPTION (0x%08x), terminating!" );
+      return EXCEPTION_EXECUTE_HANDLER;
+   }
+
+   if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
+      message( "Context does not contain control registers, terminating!" );
+      return EXCEPTION_EXECUTE_HANDLER;
+   }
+   ctx->Eip += 3;
+
+   return EXCEPTION_CONTINUE_EXECUTION;
+}
+#endif /* WIN32 */
+
+
+/* If we're running on a processor that can do SSE, let's see if we
+ * are allowed to or not.  This will catch 2.4.0 or later kernels that
+ * haven't been configured for a Pentium III but are running on one,
+ * and RedHat patched 2.2 kernels that have broken exception handling
+ * support for user space apps that do SSE.
+ *
+ * GH: Isn't this just awful?
+ */
+static void check_os_sse_support( void )
+{
+#if defined(__linux__) && !defined(IN_DRI_DRIVER)
+#if defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+   struct sigaction saved_sigill;
+   struct sigaction saved_sigfpe;
+
+   /* Save the original signal handlers.
+    */
+   sigaction( SIGILL, NULL, &saved_sigill );
+   sigaction( SIGFPE, NULL, &saved_sigfpe );
+
+   signal( SIGILL, (void (*)(int))sigill_handler );
+   signal( SIGFPE, (void (*)(int))sigfpe_handler );
+
+   /* Emulate test for OSFXSR in CR4.  The OS will set this bit if it
+    * supports the extended FPU save and restore required for SSE.  If
+    * we execute an SSE instruction on a PIII and get a SIGILL, the OS
+    * doesn't support Streaming SIMD Exceptions, even if the processor
+    * does.
+    */
+   if ( cpu_has_xmm ) {
+      message( "Testing OS support for SSE... " );
+
+      _mesa_test_os_sse_support();
+
+      if ( cpu_has_xmm ) {
+	 message( "yes.\n" );
+      } else {
+	 message( "no!\n" );
+      }
+   }
+
+   /* Emulate test for OSXMMEXCPT in CR4.  The OS will set this bit if
+    * it supports unmasked SIMD FPU exceptions.  If we unmask the
+    * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS
+    * doesn't support unmasked SIMD FPU exceptions.  If we get a SIGFPE
+    * as expected, we're okay but we need to clean up after it.
+    *
+    * Are we being too stringent in our requirement that the OS support
+    * unmasked exceptions?  Certain RedHat 2.2 kernels enable SSE by
+    * setting CR4.OSFXSR but don't support unmasked exceptions.  Win98
+    * doesn't even support them.  We at least know the user-space SSE
+    * support is good in kernels that do support unmasked exceptions,
+    * and therefore to be safe I'm going to leave this test in here.
+    */
+   if ( cpu_has_xmm ) {
+      message( "Testing OS support for SSE unmasked exceptions... " );
+
+      _mesa_test_os_sse_exception_support();
+
+      if ( cpu_has_xmm ) {
+	 message( "yes.\n" );
+      } else {
+	 message( "no!\n" );
+      }
+   }
+
+   /* Restore the original signal handlers.
+    */
+   sigaction( SIGILL, &saved_sigill, NULL );
+   sigaction( SIGFPE, &saved_sigfpe, NULL );
+
+   /* If we've gotten to here and the XMM CPUID bit is still set, we're
+    * safe to go ahead and hook out the SSE code throughout Mesa.
+    */
+   if ( cpu_has_xmm ) {
+      message( "Tests of OS support for SSE passed.\n" );
+   } else {
+      message( "Tests of OS support for SSE failed!\n" );
+   }
+#else
+   /* We can't use POSIX signal handling to test the availability of
+    * SSE, so we disable it by default.
+    */
+   message( "Cannot test OS support for SSE, disabling to be safe.\n" );
+   _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+#endif /* _POSIX_SOURCE && X86_FXSR_MAGIC */
+#elif defined(__FreeBSD__)
+   {
+      int ret, enabled;
+      unsigned int len;
+      len = sizeof(enabled);
+      ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
+      if (ret || !enabled)
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+   }
+#elif defined(WIN32)
+   LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
+   
+   /* Install our ExceptionFilter */
+   oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
+   
+   if ( cpu_has_xmm ) {
+      message( "Testing OS support for SSE... " );
+
+      _mesa_test_os_sse_support();
+
+      if ( cpu_has_xmm ) {
+	 message( "yes.\n" );
+      } else {
+	 message( "no!\n" );
+      }
+   }
+
+   if ( cpu_has_xmm ) {
+      message( "Testing OS support for SSE unmasked exceptions... " );
+
+      _mesa_test_os_sse_exception_support();
+
+      if ( cpu_has_xmm ) {
+	 message( "yes.\n" );
+      } else {
+	 message( "no!\n" );
+      }
+   }
+
+   /* Restore previous exception filter */
+   SetUnhandledExceptionFilter( oldFilter );
+
+   if ( cpu_has_xmm ) {
+      message( "Tests of OS support for SSE passed.\n" );
+   } else {
+      message( "Tests of OS support for SSE failed!\n" );
+   }
+#else
+   /* Do nothing on other platforms for now.
+    */
+   message( "Not testing OS support for SSE, leaving enabled.\n" );
+#endif /* __linux__ */
+}
+
+#endif /* USE_SSE_ASM */
+
+
+void _mesa_init_all_x86_transform_asm( void )
+{
+   (void) message; /* silence warning */
+#ifdef USE_X86_ASM
+   _mesa_x86_cpu_features = 0;
+
+   if (!_mesa_x86_has_cpuid()) {
+       message("CPUID not detected");
+   }
+   else {
+       GLuint cpu_features;
+       GLuint cpu_ext_features;
+       GLuint cpu_ext_info;
+       char cpu_vendor[13];
+       GLuint result;
+
+       /* get vendor name */
+       _mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
+       cpu_vendor[12] = '\0';
+
+       message("cpu vendor: ");
+       message(cpu_vendor);
+       message("\n");
+
+       /* get cpu features */
+       cpu_features = _mesa_x86_cpuid_edx(1);
+
+       if (cpu_features & X86_CPU_FPU)
+	   _mesa_x86_cpu_features |= X86_FEATURE_FPU;
+       if (cpu_features & X86_CPU_CMOV)
+	   _mesa_x86_cpu_features |= X86_FEATURE_CMOV;
+
+#ifdef USE_MMX_ASM
+       if (cpu_features & X86_CPU_MMX)
+	   _mesa_x86_cpu_features |= X86_FEATURE_MMX;
+#endif
+
+#ifdef USE_SSE_ASM
+       if (cpu_features & X86_CPU_XMM)
+	   _mesa_x86_cpu_features |= X86_FEATURE_XMM;
+       if (cpu_features & X86_CPU_XMM2)
+	   _mesa_x86_cpu_features |= X86_FEATURE_XMM2;
+#endif
+
+       /* query extended cpu features */
+       if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
+	   if (cpu_ext_info >= 0x80000001) {
+
+	       cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
+
+	       if (cpu_features & X86_CPU_MMX) {
+
+#ifdef USE_3DNOW_ASM
+		   if (cpu_ext_features & X86_CPUEXT_3DNOW)
+		       _mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
+		   if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
+		       _mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
+#endif
+
+#ifdef USE_MMX_ASM
+		   if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
+		       _mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
+#endif
+	       }
+	   }
+
+	   /* query cpu name */
+	   if (cpu_ext_info >= 0x80000002) {
+	       GLuint ofs;
+	       char cpu_name[49];
+	       for (ofs = 0; ofs < 3; ofs++)
+		   _mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
+	       cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
+
+	       message("cpu name: ");
+	       message(cpu_name);
+	       message("\n");
+	   }
+       }
+
+   }
+   
+   if ( _mesa_getenv( "MESA_NO_ASM" ) ) {
+      _mesa_x86_cpu_features = 0;
+   }
+
+   if ( _mesa_x86_cpu_features ) {
+      _mesa_init_x86_transform_asm();
+   }
+
+#ifdef USE_MMX_ASM
+   if ( cpu_has_mmx ) {
+      if ( _mesa_getenv( "MESA_NO_MMX" ) == 0 ) {
+         message( "MMX cpu detected.\n" );
+      } else {
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
+      }
+   }
+#endif
+
+#ifdef USE_3DNOW_ASM
+   if ( cpu_has_3dnow ) {
+      if ( _mesa_getenv( "MESA_NO_3DNOW" ) == 0 ) {
+         message( "3DNow! cpu detected.\n" );
+         _mesa_init_3dnow_transform_asm();
+      } else {
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
+      }
+   }
+#endif
+
+#ifdef USE_SSE_ASM
+   if ( cpu_has_xmm ) {
+      if ( _mesa_getenv( "MESA_NO_SSE" ) == 0 ) {
+         message( "SSE cpu detected.\n" );
+         if ( _mesa_getenv( "MESA_FORCE_SSE" ) == 0 ) {
+            check_os_sse_support();
+         }
+         if ( cpu_has_xmm ) {
+            _mesa_init_sse_transform_asm();
+         }
+      } else {
+         message( "SSE cpu detected, but switched off by user.\n" );
+         _mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
+      }
+   }
+#endif
+#endif
+}
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.S b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.S
new file mode 100644
index 000000000..654b3469d
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.S
@@ -0,0 +1,215 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Check extended CPU capabilities.  Now justs returns the raw CPUID
+ * feature information, allowing the higher level code to interpret the
+ * results.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ *
+ * Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+
+#include "matypes.h"
+#include "common_x86_features.h"
+
+	SEG_TEXT
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_has_cpuid)
+HIDDEN(_mesa_x86_has_cpuid)
+GLNAME(_mesa_x86_has_cpuid):
+
+	/* Test for the CPUID command.  If the ID Flag bit in EFLAGS
+	 * (bit 21) is writable, the CPUID command is present */
+	PUSHF_L
+	POP_L	(EAX)
+	MOV_L	(EAX, ECX)
+	XOR_L	(CONST(0x00200000), EAX)
+	PUSH_L	(EAX)
+	POPF_L
+	PUSHF_L
+	POP_L	(EAX)
+
+	/* Verify the ID Flag bit has been written. */
+	CMP_L	(ECX, EAX)
+	SETNE	(AL)
+	XOR_L	(CONST(0xff), EAX)
+
+	RET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid)
+HIDDEN(_mesa_x86_cpuid)
+GLNAME(_mesa_x86_cpuid):
+
+	MOV_L	(REGOFF(4, ESP), EAX)		/* cpuid op */
+	PUSH_L	(EDI)
+	PUSH_L	(EBX)
+
+	CPUID
+
+	MOV_L	(REGOFF(16, ESP), EDI)	/* *eax */
+	MOV_L	(EAX, REGIND(EDI))
+	MOV_L	(REGOFF(20, ESP), EDI)	/* *ebx */
+	MOV_L	(EBX, REGIND(EDI))
+	MOV_L	(REGOFF(24, ESP), EDI)	/* *ecx */
+	MOV_L	(ECX, REGIND(EDI))
+	MOV_L	(REGOFF(28, ESP), EDI)	/* *edx */
+	MOV_L	(EDX, REGIND(EDI))
+
+	POP_L	(EBX)
+	POP_L	(EDI)
+	RET
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_eax)
+HIDDEN(_mesa_x86_cpuid_eax)
+GLNAME(_mesa_x86_cpuid_eax):
+
+	MOV_L	(REGOFF(4, ESP), EAX)		/* cpuid op */
+	PUSH_L	(EBX)
+
+	CPUID
+
+	POP_L	(EBX)
+	RET
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_ebx)
+HIDDEN(_mesa_x86_cpuid_ebx)
+GLNAME(_mesa_x86_cpuid_ebx):
+
+	MOV_L	(REGOFF(4, ESP), EAX)		/* cpuid op */
+	PUSH_L	(EBX)
+
+	CPUID
+	MOV_L	(EBX, EAX)			/* return EBX */
+
+	POP_L	(EBX)
+	RET
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_ecx)
+HIDDEN(_mesa_x86_cpuid_ecx)
+GLNAME(_mesa_x86_cpuid_ecx):
+
+	MOV_L	(REGOFF(4, ESP), EAX)		/* cpuid op */
+	PUSH_L	(EBX)
+
+	CPUID
+	MOV_L	(ECX, EAX)			/* return ECX */
+
+	POP_L	(EBX)
+	RET
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_x86_cpuid_edx)
+HIDDEN(_mesa_x86_cpuid_edx)
+GLNAME(_mesa_x86_cpuid_edx):
+
+	MOV_L	(REGOFF(4, ESP), EAX)		/* cpuid op */
+	PUSH_L	(EBX)
+
+	CPUID
+	MOV_L	(EDX, EAX)			/* return EDX */
+
+	POP_L	(EBX)
+	RET
+
+#ifdef USE_SSE_ASM
+/* Execute an SSE instruction to see if the operating system correctly
+ * supports SSE.  A signal handler for SIGILL should have been set
+ * before calling this function, otherwise this could kill the client
+ * application.
+ *
+ *        -----> !!!! ATTENTION DEVELOPERS !!!! <-----
+ *
+ * If you're debugging with gdb and you get stopped in this function,
+ * just type 'continue'!  Execution will proceed normally.
+ * See freedesktop.org bug #1709 for more info.
+ */
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_test_os_sse_support )
+HIDDEN(_mesa_test_os_sse_support)
+GLNAME( _mesa_test_os_sse_support ):
+
+	XORPS	( XMM0, XMM0 )
+
+	RET
+
+
+/* Perform an SSE divide-by-zero to see if the operating system
+ * correctly supports unmasked SIMD FPU exceptions.  Signal handlers for
+ * SIGILL and SIGFPE should have been set before calling this function,
+ * otherwise this could kill the client application.
+ */
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_test_os_sse_exception_support )
+HIDDEN(_mesa_test_os_sse_exception_support)
+GLNAME( _mesa_test_os_sse_exception_support ):
+
+	PUSH_L	( EBP )
+	MOV_L	( ESP, EBP )
+	SUB_L	( CONST( 8 ), ESP )
+
+	/* Save the original MXCSR register value.
+	 */
+	STMXCSR	( REGOFF( -4, EBP ) )
+
+	/* Unmask the divide-by-zero exception and perform one.
+	 */
+	STMXCSR	( REGOFF( -8, EBP ) )
+	AND_L	( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
+	LDMXCSR	( REGOFF( -8, EBP ) )
+
+	XORPS	( XMM0, XMM0 )
+
+	PUSH_L	( CONST( 0x3f800000 ) )
+	PUSH_L	( CONST( 0x3f800000 ) )
+	PUSH_L	( CONST( 0x3f800000 ) )
+	PUSH_L	( CONST( 0x3f800000 ) )
+
+	MOVUPS	( REGIND( ESP ), XMM1 )
+
+	DIVPS	( XMM0, XMM1 )
+
+	/* Restore the original MXCSR register value.
+	 */
+	LDMXCSR	( REGOFF( -4, EBP ) )
+
+	LEAVE
+	RET
+
+#endif
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.h b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.h
new file mode 100644
index 000000000..a59585abe
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_asm.h
@@ -0,0 +1,60 @@
+/* $Id: common_x86_asm.h,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Check CPU capabilities & initialize optimized funtions for this particular
+ * processor.
+ *
+ * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
+ * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
+ * new SSE functions
+ *
+ * Reimplemented by Gareth Hughes in a more
+ * future-proof manner, based on code in the Linux kernel.
+ */
+
+#ifndef __COMMON_X86_ASM_H__
+#define __COMMON_X86_ASM_H__
+
+/* Do not reference mtypes.h from this file.
+ */
+#include "common_x86_features.h"
+
+#ifdef USE_X86_ASM
+#include "x86.h"
+#ifdef USE_3DNOW_ASM
+#include "3dnow.h"
+#endif
+#ifdef USE_SSE_ASM
+#include "sse.h"
+#endif
+#endif
+
+extern int _mesa_x86_cpu_features;
+
+extern void _mesa_init_all_x86_transform_asm( void );
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/common_x86_features.h b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_features.h
new file mode 100644
index 000000000..d6f488fc7
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_features.h
@@ -0,0 +1,68 @@
+/* $Id: common_x86_features.h,v 1.1.1.1 2004/06/16 09:19:36 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.1
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * x86 CPUID feature information.  The raw data is returned by
+ * _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
+ * helper macros.
+ *
+ * Gareth Hughes
+ */
+
+#ifndef __COMMON_X86_FEATURES_H__
+#define __COMMON_X86_FEATURES_H__
+
+#define X86_FEATURE_FPU		(1<<0)
+#define X86_FEATURE_CMOV	(1<<1)
+#define X86_FEATURE_MMXEXT	(1<<2)
+#define X86_FEATURE_MMX		(1<<3)
+#define X86_FEATURE_FXSR	(1<<4)
+#define X86_FEATURE_XMM		(1<<5)
+#define X86_FEATURE_XMM2	(1<<6)
+#define X86_FEATURE_3DNOWEXT	(1<<7)
+#define X86_FEATURE_3DNOW	(1<<8)
+
+/* standard X86 CPU features */
+#define X86_CPU_FPU		(1<<0)
+#define X86_CPU_CMOV		(1<<15)
+#define X86_CPU_MMX		(1<<23)
+#define X86_CPU_XMM		(1<<25)
+#define X86_CPU_XMM2		(1<<26)
+
+/* extended X86 CPU features */
+#define X86_CPUEXT_MMX_EXT	(1<<22)
+#define X86_CPUEXT_3DNOW_EXT	(1<<30)
+#define X86_CPUEXT_3DNOW	(1<<31)
+
+#define cpu_has_mmx		(_mesa_x86_cpu_features & X86_FEATURE_MMX)
+#define cpu_has_mmxext		(_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
+#define cpu_has_xmm		(_mesa_x86_cpu_features & X86_FEATURE_XMM)
+#define cpu_has_xmm2		(_mesa_x86_cpu_features & X86_FEATURE_XMM2)
+#define cpu_has_3dnow		(_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
+#define cpu_has_3dnowext	(_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
+
+#endif
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/common_x86_macros.h b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_macros.h
new file mode 100644
index 000000000..8741ff11b
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/common_x86_macros.h
@@ -0,0 +1,107 @@
+/* $Id: common_x86_macros.h,v 1.1.1.1 2004/06/16 09:19:36 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes
+ */
+
+#ifndef __COMMON_X86_MACROS_H__
+#define __COMMON_X86_MACROS_H__
+
+
+/* =============================================================
+ * Transformation function declarations:
+ */
+
+#define XFORM_ARGS	GLvector4f *to_vec,				\
+			const GLfloat m[16],				\
+			const GLvector4f *from_vec
+
+#define DECLARE_XFORM_GROUP( pfx, sz ) \
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS );		\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS );	\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS );	\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS );	\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS );		\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS );	\
+extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
+
+#define ASSIGN_XFORM_GROUP( pfx, sz )					\
+   _mesa_transform_tab[sz][MATRIX_GENERAL] =				\
+      _mesa_##pfx##_transform_points##sz##_general;			\
+   _mesa_transform_tab[sz][MATRIX_IDENTITY] =				\
+      _mesa_##pfx##_transform_points##sz##_identity;			\
+   _mesa_transform_tab[sz][MATRIX_3D_NO_ROT] =				\
+      _mesa_##pfx##_transform_points##sz##_3d_no_rot;			\
+   _mesa_transform_tab[sz][MATRIX_PERSPECTIVE] =			\
+      _mesa_##pfx##_transform_points##sz##_perspective;			\
+   _mesa_transform_tab[sz][MATRIX_2D] =					\
+      _mesa_##pfx##_transform_points##sz##_2d;				\
+   _mesa_transform_tab[sz][MATRIX_2D_NO_ROT] =				\
+      _mesa_##pfx##_transform_points##sz##_2d_no_rot;			\
+   _mesa_transform_tab[sz][MATRIX_3D] =					\
+      _mesa_##pfx##_transform_points##sz##_3d;
+
+
+/* =============================================================
+ * Normal transformation function declarations:
+ */
+
+#define NORM_ARGS	const GLmatrix *mat,				\
+			GLfloat scale,					\
+			const GLvector4f *in,				\
+			const GLfloat *lengths,				\
+			GLvector4f *dest
+
+#define DECLARE_NORM_GROUP( pfx ) \
+extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS );				\
+extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS );			\
+extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS );			\
+extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS );		\
+extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS );		\
+extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS );	\
+extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS );		\
+extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
+
+#define ASSIGN_NORM_GROUP( pfx )					\
+   _mesa_normal_tab[NORM_RESCALE] =					\
+      _mesa_##pfx##_rescale_normals;					\
+   _mesa_normal_tab[NORM_NORMALIZE] =					\
+      _mesa_##pfx##_normalize_normals;					\
+   _mesa_normal_tab[NORM_TRANSFORM] =					\
+      _mesa_##pfx##_transform_normals;					\
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =				\
+      _mesa_##pfx##_transform_normals_no_rot;				\
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =			\
+      _mesa_##pfx##_transform_rescale_normals;				\
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =		\
+      _mesa_##pfx##_transform_rescale_normals_no_rot;			\
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =			\
+      _mesa_##pfx##_transform_normalize_normals;			\
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =		\
+      _mesa_##pfx##_transform_normalize_normals_no_rot;
+
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/gen_matypes.c b/nx-X11/extras/Mesa/src/mesa/x86/gen_matypes.c
new file mode 100644
index 000000000..d5cee5347
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/gen_matypes.c
@@ -0,0 +1,251 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.1
+ *
+ * Copyright (C) 1999-2003  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes
+ */
+
+/*
+ * This generates an asm version of mtypes.h (called matypes.h), so that
+ * Mesa's x86 assembly code can access the internal structures easily.
+ * This will be particularly useful when developing new x86 asm code for
+ * Mesa, including lighting, clipping, texture image conversion etc.
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "tnl/t_context.h"
+
+
+#undef offsetof
+#define offsetof( type, member ) ((size_t) &((type *)0)->member)
+
+
+#define OFFSET_HEADER( x )						\
+do {									\
+   printf( "\n" );							\
+   printf( "\n" );							\
+   printf( "/* ====================================================="	\
+	   "========\n" );						\
+   printf( " * Offsets for %s\n", x );					\
+   printf( " */\n" );							\
+   printf( "\n" );							\
+} while (0)
+
+#define DEFINE_HEADER( x )						\
+do {									\
+   printf( "\n" );							\
+   printf( "/*\n" );							\
+   printf( " * Flags for %s\n", x );					\
+   printf( " */\n" );							\
+   printf( "\n" );							\
+} while (0)
+
+#if defined(__BEOS__) || defined(_LP64)
+#define OFFSET( s, t, m )						\
+   printf( "#define %s\t%ld\n", s, offsetof( t, m ) );
+#else
+#define OFFSET( s, t, m )						\
+   printf( "#define %s\t%d\n", s, offsetof( t, m ) );
+#endif
+
+#if defined(__BEOS__) || defined(_LP64)
+#define SIZEOF( s, t )							\
+   printf( "#define %s\t%ld\n", s, sizeof(t) );
+#else
+#define SIZEOF( s, t )							\
+   printf( "#define %s\t%d\n", s, sizeof(t) );
+#endif
+
+#define DEFINE( s, d )							\
+   printf( "#define %s\t0x%x\n", s, d );
+
+
+
+int main( int argc, char **argv )
+{
+   printf( "/*\n" );
+   printf( " * This file is automatically generated from the Mesa internal type\n" );
+   printf( " * definitions.  Do not edit directly.\n" );
+   printf( " */\n" );
+   printf( "\n" );
+   printf( "#ifndef __ASM_TYPES_H__\n" );
+   printf( "#define __ASM_TYPES_H__\n" );
+   printf( "\n" );
+   printf( "#include \"assyntax.h\"\n" );
+
+
+   /* GLcontext offsets:
+    */
+   OFFSET_HEADER( "GLcontext" );
+
+   OFFSET( "CTX_DRIVER_CTX              ", GLcontext, DriverCtx );
+   printf( "\n" );
+   OFFSET( "CTX_LIGHT_ENABLED           ", GLcontext, Light.Enabled );
+   OFFSET( "CTX_LIGHT_SHADE_MODEL       ", GLcontext, Light.ShadeModel );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_FACE    ", GLcontext, Light.ColorMaterialFace );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_MODE    ", GLcontext, Light.ColorMaterialMode );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_MASK    ", GLcontext, Light.ColorMaterialBitmask );
+   OFFSET( "CTX_LIGHT_COLOR_MAT_ENABLED ", GLcontext, Light.ColorMaterialEnabled );
+   OFFSET( "CTX_LIGHT_ENABLED_LIST      ", GLcontext, Light.EnabledList );
+   OFFSET( "CTX_LIGHT_NEED_VERTS        ", GLcontext, Light._NeedVertices );
+   OFFSET( "CTX_LIGHT_FLAGS             ", GLcontext, Light._Flags );
+   OFFSET( "CTX_LIGHT_BASE_COLOR        ", GLcontext, Light._BaseColor );
+
+
+   /* struct vertex_buffer offsets:
+    */
+   OFFSET_HEADER( "struct vertex_buffer" );
+
+   OFFSET( "VB_SIZE                ", struct vertex_buffer, Size );
+   OFFSET( "VB_COUNT               ", struct vertex_buffer, Count );
+   printf( "\n" );
+   OFFSET( "VB_ELTS                ", struct vertex_buffer, Elts );
+   OFFSET( "VB_OBJ_PTR             ", struct vertex_buffer, ObjPtr );
+   OFFSET( "VB_EYE_PTR             ", struct vertex_buffer, EyePtr );
+   OFFSET( "VB_CLIP_PTR            ", struct vertex_buffer, ClipPtr );
+   OFFSET( "VB_PROJ_CLIP_PTR       ", struct vertex_buffer, NdcPtr );
+   OFFSET( "VB_CLIP_OR_MASK        ", struct vertex_buffer, ClipOrMask );
+   OFFSET( "VB_CLIP_MASK           ", struct vertex_buffer, ClipMask );
+   OFFSET( "VB_NORMAL_PTR          ", struct vertex_buffer, NormalPtr );
+   OFFSET( "VB_EDGE_FLAG           ", struct vertex_buffer, EdgeFlag );
+   OFFSET( "VB_TEX0_COORD_PTR      ", struct vertex_buffer, TexCoordPtr[0] );
+   OFFSET( "VB_TEX1_COORD_PTR      ", struct vertex_buffer, TexCoordPtr[1] );
+   OFFSET( "VB_TEX2_COORD_PTR      ", struct vertex_buffer, TexCoordPtr[2] );
+   OFFSET( "VB_TEX3_COORD_PTR      ", struct vertex_buffer, TexCoordPtr[3] );
+   OFFSET( "VB_INDEX_PTR           ", struct vertex_buffer, IndexPtr );
+   OFFSET( "VB_COLOR_PTR           ", struct vertex_buffer, ColorPtr );
+   OFFSET( "VB_SECONDARY_COLOR_PTR ", struct vertex_buffer, SecondaryColorPtr );
+   OFFSET( "VB_FOG_COORD_PTR       ", struct vertex_buffer, FogCoordPtr );
+   OFFSET( "VB_POINT_SIZE_PTR      ", struct vertex_buffer, PointSizePtr );
+   OFFSET( "VB_PRIMITIVE           ", struct vertex_buffer, Primitive );
+   printf( "\n" );
+   OFFSET( "VB_LAST_CLIPPED        ", struct vertex_buffer, LastClipped );
+
+   DEFINE_HEADER( "struct vertex_buffer" );
+
+   /* XXX use new labels here someday after vertex proram is done */
+   DEFINE( "VERT_BIT_OBJ           ", VERT_BIT_POS );
+   DEFINE( "VERT_BIT_NORM          ", VERT_BIT_NORMAL );
+   DEFINE( "VERT_BIT_RGBA          ", VERT_BIT_COLOR0 );
+   DEFINE( "VERT_BIT_SPEC_RGB      ", VERT_BIT_COLOR1 );
+   DEFINE( "VERT_BIT_FOG_COORD     ", VERT_BIT_FOG );
+   DEFINE( "VERT_BIT_TEX0          ", VERT_BIT_TEX0 );
+   DEFINE( "VERT_BIT_TEX1          ", VERT_BIT_TEX1 );
+   DEFINE( "VERT_BIT_TEX2          ", VERT_BIT_TEX2 );
+   DEFINE( "VERT_BIT_TEX3          ", VERT_BIT_TEX3 );
+
+
+   /* GLvector4f offsets:
+    */
+   OFFSET_HEADER( "GLvector4f" );
+
+   OFFSET( "V4F_DATA          ", GLvector4f, data );
+   OFFSET( "V4F_START         ", GLvector4f, start );
+   OFFSET( "V4F_COUNT         ", GLvector4f, count );
+   OFFSET( "V4F_STRIDE        ", GLvector4f, stride );
+   OFFSET( "V4F_SIZE          ", GLvector4f, size );
+   OFFSET( "V4F_FLAGS         ", GLvector4f, flags );
+
+   DEFINE_HEADER( "GLvector4f" );
+
+   DEFINE( "VEC_MALLOC        ", VEC_MALLOC );
+   DEFINE( "VEC_NOT_WRITEABLE ", VEC_NOT_WRITEABLE );
+   DEFINE( "VEC_BAD_STRIDE    ", VEC_BAD_STRIDE );
+   printf( "\n" );
+   DEFINE( "VEC_SIZE_1        ", VEC_SIZE_1 );
+   DEFINE( "VEC_SIZE_2        ", VEC_SIZE_2 );
+   DEFINE( "VEC_SIZE_3        ", VEC_SIZE_3 );
+   DEFINE( "VEC_SIZE_4        ", VEC_SIZE_4 );
+
+
+   /* GLmatrix offsets:
+    */
+   OFFSET_HEADER( "GLmatrix" );
+
+   OFFSET( "MATRIX_DATA   ", GLmatrix, m );
+   OFFSET( "MATRIX_INV    ", GLmatrix, inv );
+   OFFSET( "MATRIX_FLAGS  ", GLmatrix, flags );
+   OFFSET( "MATRIX_TYPE   ", GLmatrix, type );
+
+
+   /* struct gl_light offsets:
+    */
+   OFFSET_HEADER( "struct gl_light" );
+
+   OFFSET( "LIGHT_NEXT              ", struct gl_light, next );
+   OFFSET( "LIGHT_PREV              ", struct gl_light, prev );
+   printf( "\n" );
+   OFFSET( "LIGHT_AMBIENT           ", struct gl_light, Ambient );
+   OFFSET( "LIGHT_DIFFUSE           ", struct gl_light, Diffuse );
+   OFFSET( "LIGHT_SPECULAR          ", struct gl_light, Specular );
+   OFFSET( "LIGHT_EYE_POSITION      ", struct gl_light, EyePosition );
+   OFFSET( "LIGHT_EYE_DIRECTION     ", struct gl_light, EyeDirection );
+   OFFSET( "LIGHT_SPOT_EXPONENT     ", struct gl_light, SpotExponent );
+   OFFSET( "LIGHT_SPOT_CUTOFF       ", struct gl_light, SpotCutoff );
+   OFFSET( "LIGHT_COS_CUTOFF        ", struct gl_light, _CosCutoff );
+   OFFSET( "LIGHT_CONST_ATTEN       ", struct gl_light, ConstantAttenuation );
+   OFFSET( "LIGHT_LINEAR_ATTEN      ", struct gl_light, LinearAttenuation );
+   OFFSET( "LIGHT_QUADRATIC_ATTEN   ", struct gl_light, QuadraticAttenuation );
+   OFFSET( "LIGHT_ENABLED           ", struct gl_light, Enabled );
+   printf( "\n" );
+   OFFSET( "LIGHT_FLAGS             ", struct gl_light, _Flags );
+   printf( "\n" );
+   OFFSET( "LIGHT_POSITION          ", struct gl_light, _Position );
+   OFFSET( "LIGHT_VP_INF_NORM       ", struct gl_light, _VP_inf_norm );
+   OFFSET( "LIGHT_H_INF_NORM        ", struct gl_light, _h_inf_norm );
+   OFFSET( "LIGHT_NORM_DIRECTION    ", struct gl_light, _NormDirection );
+   OFFSET( "LIGHT_VP_INF_SPOT_ATTEN ", struct gl_light, _VP_inf_spot_attenuation );
+   printf( "\n" );
+   OFFSET( "LIGHT_SPOT_EXP_TABLE    ", struct gl_light, _SpotExpTable );
+   OFFSET( "LIGHT_MAT_AMBIENT       ", struct gl_light, _MatAmbient );
+   OFFSET( "LIGHT_MAT_DIFFUSE       ", struct gl_light, _MatDiffuse );
+   OFFSET( "LIGHT_MAT_SPECULAR      ", struct gl_light, _MatSpecular );
+   printf( "\n" );
+   SIZEOF( "SIZEOF_GL_LIGHT         ", struct gl_light );
+
+   DEFINE_HEADER( "struct gl_light" );
+
+   DEFINE( "LIGHT_SPOT              ", LIGHT_SPOT );
+   DEFINE( "LIGHT_LOCAL_VIEWER      ", LIGHT_LOCAL_VIEWER );
+   DEFINE( "LIGHT_POSITIONAL        ", LIGHT_POSITIONAL );
+   printf( "\n" );
+   DEFINE( "LIGHT_NEED_VERTICES     ", LIGHT_NEED_VERTICES );
+
+
+   /* struct gl_lightmodel offsets:
+    */
+   OFFSET_HEADER( "struct gl_lightmodel" );
+
+   OFFSET( "LIGHT_MODEL_AMBIENT       ", struct gl_lightmodel, Ambient );
+   OFFSET( "LIGHT_MODEL_LOCAL_VIEWER  ", struct gl_lightmodel, LocalViewer );
+   OFFSET( "LIGHT_MODEL_TWO_SIDE      ", struct gl_lightmodel, TwoSide );
+   OFFSET( "LIGHT_MODEL_COLOR_CONTROL ", struct gl_lightmodel, ColorControl );
+
+
+   printf( "\n" );
+   printf( "\n" );
+   printf( "#endif /* __ASM_TYPES_H__ */\n" );
+
+   return 0;
+}
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/glapi_x86.S b/nx-X11/extras/Mesa/src/mesa/x86/glapi_x86.S
new file mode 100644
index 000000000..231027ed6
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/glapi_x86.S
@@ -0,0 +1,1155 @@
+/* DO NOT EDIT - This file generated automatically by gl_x86_asm.py (from Mesa) script */
+
+/*
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ * (C) Copyright IBM Corporation 2004, 2005
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL, IBM,
+ * AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "assyntax.h"
+#include "glapioffsets.h"
+
+#if defined(STDCALL_API)
+# if defined(USE_MGL_NAMESPACE)
+#  define GL_PREFIX(n,n2) GLNAME(CONCAT(mgl,n2))
+# else
+#  define GL_PREFIX(n,n2) GLNAME(CONCAT(gl,n2))
+# endif
+#else
+# if defined(USE_MGL_NAMESPACE)
+#  define GL_PREFIX(n,n2) GLNAME(CONCAT(mgl,n))
+# else
+#  define GL_PREFIX(n,n2) GLNAME(CONCAT(gl,n))
+# endif
+#endif
+
+#define GL_OFFSET(x) CODEPTR(REGOFF(4 * x, EAX))
+
+#if defined(GNU_ASSEMBLER) && !defined(__DJGPP__) && !defined(__MINGW32__)
+#define GLOBL_FN(x) GLOBL x ; .type x, function
+#else
+#define GLOBL_FN(x) GLOBL x
+#endif
+
+#if defined(PTHREADS) || defined(USE_XTHREADS) || defined(SOLARIS_THREADS) || defined(WIN32_THREADS) || defined(BEOS_THREADS)
+#  define THREADS
+#endif
+
+#ifdef GLX_USE_TLS
+
+#  define GL_STUB(fn,off,fn_alt)			\
+ALIGNTEXT16;						\
+GLOBL_FN(GL_PREFIX(fn, fn_alt));			\
+GL_PREFIX(fn, fn_alt):					\
+	CALL(_x86_get_dispatch) ;			\
+	NOP ;						\
+	JMP(GL_OFFSET(off))
+
+#elif defined(PTHREADS)
+#  define GL_STUB(fn,off,fn_alt)			\
+ALIGNTEXT16;						\
+GLOBL_FN(GL_PREFIX(fn, fn_alt));			\
+GL_PREFIX(fn, fn_alt):					\
+	MOV_L(CONTENT(GLNAME(_glapi_Dispatch)), EAX) ;	\
+	TEST_L(EAX, EAX) ;				\
+	JE(1f) ;					\
+	JMP(GL_OFFSET(off)) ;				\
+1:	CALL(_x86_get_dispatch) ;			\
+	JMP(GL_OFFSET(off))
+#elif defined(THREADS)
+#  define GL_STUB(fn,off,fn_alt)			\
+ALIGNTEXT16;						\
+GLOBL_FN(GL_PREFIX(fn, fn_alt));			\
+GL_PREFIX(fn, fn_alt):					\
+	MOV_L(CONTENT(GLNAME(_glapi_Dispatch)), EAX) ;	\
+	TEST_L(EAX, EAX) ;				\
+	JE(1f) ;					\
+	JMP(GL_OFFSET(off)) ;				\
+1:	CALL(_glapi_get_dispatch) ;			\
+	JMP(GL_OFFSET(off))
+#else /* Non-threaded version. */
+#  define GL_STUB(fn,off,fn_alt)			\
+ALIGNTEXT16;						\
+GLOBL_FN(GL_PREFIX(fn, fn_alt));			\
+GL_PREFIX(fn, fn_alt):					\
+	MOV_L(CONTENT(GLNAME(_glapi_Dispatch)), EAX) ;	\
+	JMP(GL_OFFSET(off))
+#endif
+
+#ifdef HAVE_ALIAS
+#  define GL_STUB_ALIAS(fn,off,fn_alt,alias,alias_alt)	\
+	.globl	GL_PREFIX(fn, fn_alt) ;			\
+	.set	GL_PREFIX(fn, fn_alt), GL_PREFIX(alias, alias_alt)
+#else
+#  define GL_STUB_ALIAS(fn,off,fn_alt,alias,alias_alt)	\
+    GL_STUB(fn, off, fn_alt)
+#endif
+
+SEG_TEXT
+
+#ifdef GLX_USE_TLS
+
+	GLOBL	GLNAME(_x86_get_dispatch)
+	HIDDEN(GLNAME(_x86_get_dispatch))
+ALIGNTEXT16
+GLNAME(_x86_get_dispatch):
+	movl	%gs:_glapi_tls_Dispatch@NTPOFF, %eax
+	ret
+
+#elif defined(PTHREADS)
+EXTERN GLNAME(_glapi_Dispatch)
+EXTERN GLNAME(_gl_DispatchTSD)
+EXTERN GLNAME(pthread_getspecific)
+
+ALIGNTEXT16
+GLNAME(_x86_get_dispatch):
+	SUB_L(CONST(24), ESP)
+	PUSH_L(GLNAME(_gl_DispatchTSD))
+	CALL(GLNAME(pthread_getspecific))
+	ADD_L(CONST(28), ESP)
+	RET
+#elif defined(THREADS)
+EXTERN GLNAME(_glapi_get_dispatch)
+#endif
+
+#if defined( GLX_USE_TLS )
+		.section	wtext, "awx", @progbits
+#endif /* defined( GLX_USE_TLS ) */
+
+		ALIGNTEXT16
+		GLOBL GLNAME(gl_dispatch_functions_start)
+		HIDDEN(GLNAME(gl_dispatch_functions_start))
+GLNAME(gl_dispatch_functions_start):
+
+	GL_STUB(NewList, _gloffset_NewList, NewList@8)
+	GL_STUB(EndList, _gloffset_EndList, EndList@0)
+	GL_STUB(CallList, _gloffset_CallList, CallList@4)
+	GL_STUB(CallLists, _gloffset_CallLists, CallLists@12)
+	GL_STUB(DeleteLists, _gloffset_DeleteLists, DeleteLists@8)
+	GL_STUB(GenLists, _gloffset_GenLists, GenLists@4)
+	GL_STUB(ListBase, _gloffset_ListBase, ListBase@4)
+	GL_STUB(Begin, _gloffset_Begin, Begin@4)
+	GL_STUB(Bitmap, _gloffset_Bitmap, Bitmap@28)
+	GL_STUB(Color3b, _gloffset_Color3b, Color3b@12)
+	GL_STUB(Color3bv, _gloffset_Color3bv, Color3bv@4)
+	GL_STUB(Color3d, _gloffset_Color3d, Color3d@24)
+	GL_STUB(Color3dv, _gloffset_Color3dv, Color3dv@4)
+	GL_STUB(Color3f, _gloffset_Color3f, Color3f@12)
+	GL_STUB(Color3fv, _gloffset_Color3fv, Color3fv@4)
+	GL_STUB(Color3i, _gloffset_Color3i, Color3i@12)
+	GL_STUB(Color3iv, _gloffset_Color3iv, Color3iv@4)
+	GL_STUB(Color3s, _gloffset_Color3s, Color3s@12)
+	GL_STUB(Color3sv, _gloffset_Color3sv, Color3sv@4)
+	GL_STUB(Color3ub, _gloffset_Color3ub, Color3ub@12)
+	GL_STUB(Color3ubv, _gloffset_Color3ubv, Color3ubv@4)
+	GL_STUB(Color3ui, _gloffset_Color3ui, Color3ui@12)
+	GL_STUB(Color3uiv, _gloffset_Color3uiv, Color3uiv@4)
+	GL_STUB(Color3us, _gloffset_Color3us, Color3us@12)
+	GL_STUB(Color3usv, _gloffset_Color3usv, Color3usv@4)
+	GL_STUB(Color4b, _gloffset_Color4b, Color4b@16)
+	GL_STUB(Color4bv, _gloffset_Color4bv, Color4bv@4)
+	GL_STUB(Color4d, _gloffset_Color4d, Color4d@32)
+	GL_STUB(Color4dv, _gloffset_Color4dv, Color4dv@4)
+	GL_STUB(Color4f, _gloffset_Color4f, Color4f@16)
+	GL_STUB(Color4fv, _gloffset_Color4fv, Color4fv@4)
+	GL_STUB(Color4i, _gloffset_Color4i, Color4i@16)
+	GL_STUB(Color4iv, _gloffset_Color4iv, Color4iv@4)
+	GL_STUB(Color4s, _gloffset_Color4s, Color4s@16)
+	GL_STUB(Color4sv, _gloffset_Color4sv, Color4sv@4)
+	GL_STUB(Color4ub, _gloffset_Color4ub, Color4ub@16)
+	GL_STUB(Color4ubv, _gloffset_Color4ubv, Color4ubv@4)
+	GL_STUB(Color4ui, _gloffset_Color4ui, Color4ui@16)
+	GL_STUB(Color4uiv, _gloffset_Color4uiv, Color4uiv@4)
+	GL_STUB(Color4us, _gloffset_Color4us, Color4us@16)
+	GL_STUB(Color4usv, _gloffset_Color4usv, Color4usv@4)
+	GL_STUB(EdgeFlag, _gloffset_EdgeFlag, EdgeFlag@4)
+	GL_STUB(EdgeFlagv, _gloffset_EdgeFlagv, EdgeFlagv@4)
+	GL_STUB(End, _gloffset_End, End@0)
+	GL_STUB(Indexd, _gloffset_Indexd, Indexd@8)
+	GL_STUB(Indexdv, _gloffset_Indexdv, Indexdv@4)
+	GL_STUB(Indexf, _gloffset_Indexf, Indexf@4)
+	GL_STUB(Indexfv, _gloffset_Indexfv, Indexfv@4)
+	GL_STUB(Indexi, _gloffset_Indexi, Indexi@4)
+	GL_STUB(Indexiv, _gloffset_Indexiv, Indexiv@4)
+	GL_STUB(Indexs, _gloffset_Indexs, Indexs@4)
+	GL_STUB(Indexsv, _gloffset_Indexsv, Indexsv@4)
+	GL_STUB(Normal3b, _gloffset_Normal3b, Normal3b@12)
+	GL_STUB(Normal3bv, _gloffset_Normal3bv, Normal3bv@4)
+	GL_STUB(Normal3d, _gloffset_Normal3d, Normal3d@24)
+	GL_STUB(Normal3dv, _gloffset_Normal3dv, Normal3dv@4)
+	GL_STUB(Normal3f, _gloffset_Normal3f, Normal3f@12)
+	GL_STUB(Normal3fv, _gloffset_Normal3fv, Normal3fv@4)
+	GL_STUB(Normal3i, _gloffset_Normal3i, Normal3i@12)
+	GL_STUB(Normal3iv, _gloffset_Normal3iv, Normal3iv@4)
+	GL_STUB(Normal3s, _gloffset_Normal3s, Normal3s@12)
+	GL_STUB(Normal3sv, _gloffset_Normal3sv, Normal3sv@4)
+	GL_STUB(RasterPos2d, _gloffset_RasterPos2d, RasterPos2d@16)
+	GL_STUB(RasterPos2dv, _gloffset_RasterPos2dv, RasterPos2dv@4)
+	GL_STUB(RasterPos2f, _gloffset_RasterPos2f, RasterPos2f@8)
+	GL_STUB(RasterPos2fv, _gloffset_RasterPos2fv, RasterPos2fv@4)
+	GL_STUB(RasterPos2i, _gloffset_RasterPos2i, RasterPos2i@8)
+	GL_STUB(RasterPos2iv, _gloffset_RasterPos2iv, RasterPos2iv@4)
+	GL_STUB(RasterPos2s, _gloffset_RasterPos2s, RasterPos2s@8)
+	GL_STUB(RasterPos2sv, _gloffset_RasterPos2sv, RasterPos2sv@4)
+	GL_STUB(RasterPos3d, _gloffset_RasterPos3d, RasterPos3d@24)
+	GL_STUB(RasterPos3dv, _gloffset_RasterPos3dv, RasterPos3dv@4)
+	GL_STUB(RasterPos3f, _gloffset_RasterPos3f, RasterPos3f@12)
+	GL_STUB(RasterPos3fv, _gloffset_RasterPos3fv, RasterPos3fv@4)
+	GL_STUB(RasterPos3i, _gloffset_RasterPos3i, RasterPos3i@12)
+	GL_STUB(RasterPos3iv, _gloffset_RasterPos3iv, RasterPos3iv@4)
+	GL_STUB(RasterPos3s, _gloffset_RasterPos3s, RasterPos3s@12)
+	GL_STUB(RasterPos3sv, _gloffset_RasterPos3sv, RasterPos3sv@4)
+	GL_STUB(RasterPos4d, _gloffset_RasterPos4d, RasterPos4d@32)
+	GL_STUB(RasterPos4dv, _gloffset_RasterPos4dv, RasterPos4dv@4)
+	GL_STUB(RasterPos4f, _gloffset_RasterPos4f, RasterPos4f@16)
+	GL_STUB(RasterPos4fv, _gloffset_RasterPos4fv, RasterPos4fv@4)
+	GL_STUB(RasterPos4i, _gloffset_RasterPos4i, RasterPos4i@16)
+	GL_STUB(RasterPos4iv, _gloffset_RasterPos4iv, RasterPos4iv@4)
+	GL_STUB(RasterPos4s, _gloffset_RasterPos4s, RasterPos4s@16)
+	GL_STUB(RasterPos4sv, _gloffset_RasterPos4sv, RasterPos4sv@4)
+	GL_STUB(Rectd, _gloffset_Rectd, Rectd@32)
+	GL_STUB(Rectdv, _gloffset_Rectdv, Rectdv@8)
+	GL_STUB(Rectf, _gloffset_Rectf, Rectf@16)
+	GL_STUB(Rectfv, _gloffset_Rectfv, Rectfv@8)
+	GL_STUB(Recti, _gloffset_Recti, Recti@16)
+	GL_STUB(Rectiv, _gloffset_Rectiv, Rectiv@8)
+	GL_STUB(Rects, _gloffset_Rects, Rects@16)
+	GL_STUB(Rectsv, _gloffset_Rectsv, Rectsv@8)
+	GL_STUB(TexCoord1d, _gloffset_TexCoord1d, TexCoord1d@8)
+	GL_STUB(TexCoord1dv, _gloffset_TexCoord1dv, TexCoord1dv@4)
+	GL_STUB(TexCoord1f, _gloffset_TexCoord1f, TexCoord1f@4)
+	GL_STUB(TexCoord1fv, _gloffset_TexCoord1fv, TexCoord1fv@4)
+	GL_STUB(TexCoord1i, _gloffset_TexCoord1i, TexCoord1i@4)
+	GL_STUB(TexCoord1iv, _gloffset_TexCoord1iv, TexCoord1iv@4)
+	GL_STUB(TexCoord1s, _gloffset_TexCoord1s, TexCoord1s@4)
+	GL_STUB(TexCoord1sv, _gloffset_TexCoord1sv, TexCoord1sv@4)
+	GL_STUB(TexCoord2d, _gloffset_TexCoord2d, TexCoord2d@16)
+	GL_STUB(TexCoord2dv, _gloffset_TexCoord2dv, TexCoord2dv@4)
+	GL_STUB(TexCoord2f, _gloffset_TexCoord2f, TexCoord2f@8)
+	GL_STUB(TexCoord2fv, _gloffset_TexCoord2fv, TexCoord2fv@4)
+	GL_STUB(TexCoord2i, _gloffset_TexCoord2i, TexCoord2i@8)
+	GL_STUB(TexCoord2iv, _gloffset_TexCoord2iv, TexCoord2iv@4)
+	GL_STUB(TexCoord2s, _gloffset_TexCoord2s, TexCoord2s@8)
+	GL_STUB(TexCoord2sv, _gloffset_TexCoord2sv, TexCoord2sv@4)
+	GL_STUB(TexCoord3d, _gloffset_TexCoord3d, TexCoord3d@24)
+	GL_STUB(TexCoord3dv, _gloffset_TexCoord3dv, TexCoord3dv@4)
+	GL_STUB(TexCoord3f, _gloffset_TexCoord3f, TexCoord3f@12)
+	GL_STUB(TexCoord3fv, _gloffset_TexCoord3fv, TexCoord3fv@4)
+	GL_STUB(TexCoord3i, _gloffset_TexCoord3i, TexCoord3i@12)
+	GL_STUB(TexCoord3iv, _gloffset_TexCoord3iv, TexCoord3iv@4)
+	GL_STUB(TexCoord3s, _gloffset_TexCoord3s, TexCoord3s@12)
+	GL_STUB(TexCoord3sv, _gloffset_TexCoord3sv, TexCoord3sv@4)
+	GL_STUB(TexCoord4d, _gloffset_TexCoord4d, TexCoord4d@32)
+	GL_STUB(TexCoord4dv, _gloffset_TexCoord4dv, TexCoord4dv@4)
+	GL_STUB(TexCoord4f, _gloffset_TexCoord4f, TexCoord4f@16)
+	GL_STUB(TexCoord4fv, _gloffset_TexCoord4fv, TexCoord4fv@4)
+	GL_STUB(TexCoord4i, _gloffset_TexCoord4i, TexCoord4i@16)
+	GL_STUB(TexCoord4iv, _gloffset_TexCoord4iv, TexCoord4iv@4)
+	GL_STUB(TexCoord4s, _gloffset_TexCoord4s, TexCoord4s@16)
+	GL_STUB(TexCoord4sv, _gloffset_TexCoord4sv, TexCoord4sv@4)
+	GL_STUB(Vertex2d, _gloffset_Vertex2d, Vertex2d@16)
+	GL_STUB(Vertex2dv, _gloffset_Vertex2dv, Vertex2dv@4)
+	GL_STUB(Vertex2f, _gloffset_Vertex2f, Vertex2f@8)
+	GL_STUB(Vertex2fv, _gloffset_Vertex2fv, Vertex2fv@4)
+	GL_STUB(Vertex2i, _gloffset_Vertex2i, Vertex2i@8)
+	GL_STUB(Vertex2iv, _gloffset_Vertex2iv, Vertex2iv@4)
+	GL_STUB(Vertex2s, _gloffset_Vertex2s, Vertex2s@8)
+	GL_STUB(Vertex2sv, _gloffset_Vertex2sv, Vertex2sv@4)
+	GL_STUB(Vertex3d, _gloffset_Vertex3d, Vertex3d@24)
+	GL_STUB(Vertex3dv, _gloffset_Vertex3dv, Vertex3dv@4)
+	GL_STUB(Vertex3f, _gloffset_Vertex3f, Vertex3f@12)
+	GL_STUB(Vertex3fv, _gloffset_Vertex3fv, Vertex3fv@4)
+	GL_STUB(Vertex3i, _gloffset_Vertex3i, Vertex3i@12)
+	GL_STUB(Vertex3iv, _gloffset_Vertex3iv, Vertex3iv@4)
+	GL_STUB(Vertex3s, _gloffset_Vertex3s, Vertex3s@12)
+	GL_STUB(Vertex3sv, _gloffset_Vertex3sv, Vertex3sv@4)
+	GL_STUB(Vertex4d, _gloffset_Vertex4d, Vertex4d@32)
+	GL_STUB(Vertex4dv, _gloffset_Vertex4dv, Vertex4dv@4)
+	GL_STUB(Vertex4f, _gloffset_Vertex4f, Vertex4f@16)
+	GL_STUB(Vertex4fv, _gloffset_Vertex4fv, Vertex4fv@4)
+	GL_STUB(Vertex4i, _gloffset_Vertex4i, Vertex4i@16)
+	GL_STUB(Vertex4iv, _gloffset_Vertex4iv, Vertex4iv@4)
+	GL_STUB(Vertex4s, _gloffset_Vertex4s, Vertex4s@16)
+	GL_STUB(Vertex4sv, _gloffset_Vertex4sv, Vertex4sv@4)
+	GL_STUB(ClipPlane, _gloffset_ClipPlane, ClipPlane@8)
+	GL_STUB(ColorMaterial, _gloffset_ColorMaterial, ColorMaterial@8)
+	GL_STUB(CullFace, _gloffset_CullFace, CullFace@4)
+	GL_STUB(Fogf, _gloffset_Fogf, Fogf@8)
+	GL_STUB(Fogfv, _gloffset_Fogfv, Fogfv@8)
+	GL_STUB(Fogi, _gloffset_Fogi, Fogi@8)
+	GL_STUB(Fogiv, _gloffset_Fogiv, Fogiv@8)
+	GL_STUB(FrontFace, _gloffset_FrontFace, FrontFace@4)
+	GL_STUB(Hint, _gloffset_Hint, Hint@8)
+	GL_STUB(Lightf, _gloffset_Lightf, Lightf@12)
+	GL_STUB(Lightfv, _gloffset_Lightfv, Lightfv@12)
+	GL_STUB(Lighti, _gloffset_Lighti, Lighti@12)
+	GL_STUB(Lightiv, _gloffset_Lightiv, Lightiv@12)
+	GL_STUB(LightModelf, _gloffset_LightModelf, LightModelf@8)
+	GL_STUB(LightModelfv, _gloffset_LightModelfv, LightModelfv@8)
+	GL_STUB(LightModeli, _gloffset_LightModeli, LightModeli@8)
+	GL_STUB(LightModeliv, _gloffset_LightModeliv, LightModeliv@8)
+	GL_STUB(LineStipple, _gloffset_LineStipple, LineStipple@8)
+	GL_STUB(LineWidth, _gloffset_LineWidth, LineWidth@4)
+	GL_STUB(Materialf, _gloffset_Materialf, Materialf@12)
+	GL_STUB(Materialfv, _gloffset_Materialfv, Materialfv@12)
+	GL_STUB(Materiali, _gloffset_Materiali, Materiali@12)
+	GL_STUB(Materialiv, _gloffset_Materialiv, Materialiv@12)
+	GL_STUB(PointSize, _gloffset_PointSize, PointSize@4)
+	GL_STUB(PolygonMode, _gloffset_PolygonMode, PolygonMode@8)
+	GL_STUB(PolygonStipple, _gloffset_PolygonStipple, PolygonStipple@4)
+	GL_STUB(Scissor, _gloffset_Scissor, Scissor@16)
+	GL_STUB(ShadeModel, _gloffset_ShadeModel, ShadeModel@4)
+	GL_STUB(TexParameterf, _gloffset_TexParameterf, TexParameterf@12)
+	GL_STUB(TexParameterfv, _gloffset_TexParameterfv, TexParameterfv@12)
+	GL_STUB(TexParameteri, _gloffset_TexParameteri, TexParameteri@12)
+	GL_STUB(TexParameteriv, _gloffset_TexParameteriv, TexParameteriv@12)
+	GL_STUB(TexImage1D, _gloffset_TexImage1D, TexImage1D@32)
+	GL_STUB(TexImage2D, _gloffset_TexImage2D, TexImage2D@36)
+	GL_STUB(TexEnvf, _gloffset_TexEnvf, TexEnvf@12)
+	GL_STUB(TexEnvfv, _gloffset_TexEnvfv, TexEnvfv@12)
+	GL_STUB(TexEnvi, _gloffset_TexEnvi, TexEnvi@12)
+	GL_STUB(TexEnviv, _gloffset_TexEnviv, TexEnviv@12)
+	GL_STUB(TexGend, _gloffset_TexGend, TexGend@16)
+	GL_STUB(TexGendv, _gloffset_TexGendv, TexGendv@12)
+	GL_STUB(TexGenf, _gloffset_TexGenf, TexGenf@12)
+	GL_STUB(TexGenfv, _gloffset_TexGenfv, TexGenfv@12)
+	GL_STUB(TexGeni, _gloffset_TexGeni, TexGeni@12)
+	GL_STUB(TexGeniv, _gloffset_TexGeniv, TexGeniv@12)
+	GL_STUB(FeedbackBuffer, _gloffset_FeedbackBuffer, FeedbackBuffer@12)
+	GL_STUB(SelectBuffer, _gloffset_SelectBuffer, SelectBuffer@8)
+	GL_STUB(RenderMode, _gloffset_RenderMode, RenderMode@4)
+	GL_STUB(InitNames, _gloffset_InitNames, InitNames@0)
+	GL_STUB(LoadName, _gloffset_LoadName, LoadName@4)
+	GL_STUB(PassThrough, _gloffset_PassThrough, PassThrough@4)
+	GL_STUB(PopName, _gloffset_PopName, PopName@0)
+	GL_STUB(PushName, _gloffset_PushName, PushName@4)
+	GL_STUB(DrawBuffer, _gloffset_DrawBuffer, DrawBuffer@4)
+	GL_STUB(Clear, _gloffset_Clear, Clear@4)
+	GL_STUB(ClearAccum, _gloffset_ClearAccum, ClearAccum@16)
+	GL_STUB(ClearIndex, _gloffset_ClearIndex, ClearIndex@4)
+	GL_STUB(ClearColor, _gloffset_ClearColor, ClearColor@16)
+	GL_STUB(ClearStencil, _gloffset_ClearStencil, ClearStencil@4)
+	GL_STUB(ClearDepth, _gloffset_ClearDepth, ClearDepth@8)
+	GL_STUB(StencilMask, _gloffset_StencilMask, StencilMask@4)
+	GL_STUB(ColorMask, _gloffset_ColorMask, ColorMask@16)
+	GL_STUB(DepthMask, _gloffset_DepthMask, DepthMask@4)
+	GL_STUB(IndexMask, _gloffset_IndexMask, IndexMask@4)
+	GL_STUB(Accum, _gloffset_Accum, Accum@8)
+	GL_STUB(Disable, _gloffset_Disable, Disable@4)
+	GL_STUB(Enable, _gloffset_Enable, Enable@4)
+	GL_STUB(Finish, _gloffset_Finish, Finish@0)
+	GL_STUB(Flush, _gloffset_Flush, Flush@0)
+	GL_STUB(PopAttrib, _gloffset_PopAttrib, PopAttrib@0)
+	GL_STUB(PushAttrib, _gloffset_PushAttrib, PushAttrib@4)
+	GL_STUB(Map1d, _gloffset_Map1d, Map1d@32)
+	GL_STUB(Map1f, _gloffset_Map1f, Map1f@24)
+	GL_STUB(Map2d, _gloffset_Map2d, Map2d@56)
+	GL_STUB(Map2f, _gloffset_Map2f, Map2f@40)
+	GL_STUB(MapGrid1d, _gloffset_MapGrid1d, MapGrid1d@20)
+	GL_STUB(MapGrid1f, _gloffset_MapGrid1f, MapGrid1f@12)
+	GL_STUB(MapGrid2d, _gloffset_MapGrid2d, MapGrid2d@40)
+	GL_STUB(MapGrid2f, _gloffset_MapGrid2f, MapGrid2f@24)
+	GL_STUB(EvalCoord1d, _gloffset_EvalCoord1d, EvalCoord1d@8)
+	GL_STUB(EvalCoord1dv, _gloffset_EvalCoord1dv, EvalCoord1dv@4)
+	GL_STUB(EvalCoord1f, _gloffset_EvalCoord1f, EvalCoord1f@4)
+	GL_STUB(EvalCoord1fv, _gloffset_EvalCoord1fv, EvalCoord1fv@4)
+	GL_STUB(EvalCoord2d, _gloffset_EvalCoord2d, EvalCoord2d@16)
+	GL_STUB(EvalCoord2dv, _gloffset_EvalCoord2dv, EvalCoord2dv@4)
+	GL_STUB(EvalCoord2f, _gloffset_EvalCoord2f, EvalCoord2f@8)
+	GL_STUB(EvalCoord2fv, _gloffset_EvalCoord2fv, EvalCoord2fv@4)
+	GL_STUB(EvalMesh1, _gloffset_EvalMesh1, EvalMesh1@12)
+	GL_STUB(EvalPoint1, _gloffset_EvalPoint1, EvalPoint1@4)
+	GL_STUB(EvalMesh2, _gloffset_EvalMesh2, EvalMesh2@20)
+	GL_STUB(EvalPoint2, _gloffset_EvalPoint2, EvalPoint2@8)
+	GL_STUB(AlphaFunc, _gloffset_AlphaFunc, AlphaFunc@8)
+	GL_STUB(BlendFunc, _gloffset_BlendFunc, BlendFunc@8)
+	GL_STUB(LogicOp, _gloffset_LogicOp, LogicOp@4)
+	GL_STUB(StencilFunc, _gloffset_StencilFunc, StencilFunc@12)
+	GL_STUB(StencilOp, _gloffset_StencilOp, StencilOp@12)
+	GL_STUB(DepthFunc, _gloffset_DepthFunc, DepthFunc@4)
+	GL_STUB(PixelZoom, _gloffset_PixelZoom, PixelZoom@8)
+	GL_STUB(PixelTransferf, _gloffset_PixelTransferf, PixelTransferf@8)
+	GL_STUB(PixelTransferi, _gloffset_PixelTransferi, PixelTransferi@8)
+	GL_STUB(PixelStoref, _gloffset_PixelStoref, PixelStoref@8)
+	GL_STUB(PixelStorei, _gloffset_PixelStorei, PixelStorei@8)
+	GL_STUB(PixelMapfv, _gloffset_PixelMapfv, PixelMapfv@12)
+	GL_STUB(PixelMapuiv, _gloffset_PixelMapuiv, PixelMapuiv@12)
+	GL_STUB(PixelMapusv, _gloffset_PixelMapusv, PixelMapusv@12)
+	GL_STUB(ReadBuffer, _gloffset_ReadBuffer, ReadBuffer@4)
+	GL_STUB(CopyPixels, _gloffset_CopyPixels, CopyPixels@20)
+	GL_STUB(ReadPixels, _gloffset_ReadPixels, ReadPixels@28)
+	GL_STUB(DrawPixels, _gloffset_DrawPixels, DrawPixels@20)
+	GL_STUB(GetBooleanv, _gloffset_GetBooleanv, GetBooleanv@8)
+	GL_STUB(GetClipPlane, _gloffset_GetClipPlane, GetClipPlane@8)
+	GL_STUB(GetDoublev, _gloffset_GetDoublev, GetDoublev@8)
+	GL_STUB(GetError, _gloffset_GetError, GetError@0)
+	GL_STUB(GetFloatv, _gloffset_GetFloatv, GetFloatv@8)
+	GL_STUB(GetIntegerv, _gloffset_GetIntegerv, GetIntegerv@8)
+	GL_STUB(GetLightfv, _gloffset_GetLightfv, GetLightfv@12)
+	GL_STUB(GetLightiv, _gloffset_GetLightiv, GetLightiv@12)
+	GL_STUB(GetMapdv, _gloffset_GetMapdv, GetMapdv@12)
+	GL_STUB(GetMapfv, _gloffset_GetMapfv, GetMapfv@12)
+	GL_STUB(GetMapiv, _gloffset_GetMapiv, GetMapiv@12)
+	GL_STUB(GetMaterialfv, _gloffset_GetMaterialfv, GetMaterialfv@12)
+	GL_STUB(GetMaterialiv, _gloffset_GetMaterialiv, GetMaterialiv@12)
+	GL_STUB(GetPixelMapfv, _gloffset_GetPixelMapfv, GetPixelMapfv@8)
+	GL_STUB(GetPixelMapuiv, _gloffset_GetPixelMapuiv, GetPixelMapuiv@8)
+	GL_STUB(GetPixelMapusv, _gloffset_GetPixelMapusv, GetPixelMapusv@8)
+	GL_STUB(GetPolygonStipple, _gloffset_GetPolygonStipple, GetPolygonStipple@4)
+	GL_STUB(GetString, _gloffset_GetString, GetString@4)
+	GL_STUB(GetTexEnvfv, _gloffset_GetTexEnvfv, GetTexEnvfv@12)
+	GL_STUB(GetTexEnviv, _gloffset_GetTexEnviv, GetTexEnviv@12)
+	GL_STUB(GetTexGendv, _gloffset_GetTexGendv, GetTexGendv@12)
+	GL_STUB(GetTexGenfv, _gloffset_GetTexGenfv, GetTexGenfv@12)
+	GL_STUB(GetTexGeniv, _gloffset_GetTexGeniv, GetTexGeniv@12)
+	GL_STUB(GetTexImage, _gloffset_GetTexImage, GetTexImage@20)
+	GL_STUB(GetTexParameterfv, _gloffset_GetTexParameterfv, GetTexParameterfv@12)
+	GL_STUB(GetTexParameteriv, _gloffset_GetTexParameteriv, GetTexParameteriv@12)
+	GL_STUB(GetTexLevelParameterfv, _gloffset_GetTexLevelParameterfv, GetTexLevelParameterfv@16)
+	GL_STUB(GetTexLevelParameteriv, _gloffset_GetTexLevelParameteriv, GetTexLevelParameteriv@16)
+	GL_STUB(IsEnabled, _gloffset_IsEnabled, IsEnabled@4)
+	GL_STUB(IsList, _gloffset_IsList, IsList@4)
+	GL_STUB(DepthRange, _gloffset_DepthRange, DepthRange@16)
+	GL_STUB(Frustum, _gloffset_Frustum, Frustum@48)
+	GL_STUB(LoadIdentity, _gloffset_LoadIdentity, LoadIdentity@0)
+	GL_STUB(LoadMatrixf, _gloffset_LoadMatrixf, LoadMatrixf@4)
+	GL_STUB(LoadMatrixd, _gloffset_LoadMatrixd, LoadMatrixd@4)
+	GL_STUB(MatrixMode, _gloffset_MatrixMode, MatrixMode@4)
+	GL_STUB(MultMatrixf, _gloffset_MultMatrixf, MultMatrixf@4)
+	GL_STUB(MultMatrixd, _gloffset_MultMatrixd, MultMatrixd@4)
+	GL_STUB(Ortho, _gloffset_Ortho, Ortho@48)
+	GL_STUB(PopMatrix, _gloffset_PopMatrix, PopMatrix@0)
+	GL_STUB(PushMatrix, _gloffset_PushMatrix, PushMatrix@0)
+	GL_STUB(Rotated, _gloffset_Rotated, Rotated@32)
+	GL_STUB(Rotatef, _gloffset_Rotatef, Rotatef@16)
+	GL_STUB(Scaled, _gloffset_Scaled, Scaled@24)
+	GL_STUB(Scalef, _gloffset_Scalef, Scalef@12)
+	GL_STUB(Translated, _gloffset_Translated, Translated@24)
+	GL_STUB(Translatef, _gloffset_Translatef, Translatef@12)
+	GL_STUB(Viewport, _gloffset_Viewport, Viewport@16)
+	GL_STUB(ArrayElement, _gloffset_ArrayElement, ArrayElement@4)
+	GL_STUB(BindTexture, _gloffset_BindTexture, BindTexture@8)
+	GL_STUB(ColorPointer, _gloffset_ColorPointer, ColorPointer@16)
+	GL_STUB(DisableClientState, _gloffset_DisableClientState, DisableClientState@4)
+	GL_STUB(DrawArrays, _gloffset_DrawArrays, DrawArrays@12)
+	GL_STUB(DrawElements, _gloffset_DrawElements, DrawElements@16)
+	GL_STUB(EdgeFlagPointer, _gloffset_EdgeFlagPointer, EdgeFlagPointer@8)
+	GL_STUB(EnableClientState, _gloffset_EnableClientState, EnableClientState@4)
+	GL_STUB(IndexPointer, _gloffset_IndexPointer, IndexPointer@12)
+	GL_STUB(Indexub, _gloffset_Indexub, Indexub@4)
+	GL_STUB(Indexubv, _gloffset_Indexubv, Indexubv@4)
+	GL_STUB(InterleavedArrays, _gloffset_InterleavedArrays, InterleavedArrays@12)
+	GL_STUB(NormalPointer, _gloffset_NormalPointer, NormalPointer@12)
+	GL_STUB(PolygonOffset, _gloffset_PolygonOffset, PolygonOffset@8)
+	GL_STUB(TexCoordPointer, _gloffset_TexCoordPointer, TexCoordPointer@16)
+	GL_STUB(VertexPointer, _gloffset_VertexPointer, VertexPointer@16)
+	GL_STUB(AreTexturesResident, _gloffset_AreTexturesResident, AreTexturesResident@12)
+	GL_STUB(CopyTexImage1D, _gloffset_CopyTexImage1D, CopyTexImage1D@28)
+	GL_STUB(CopyTexImage2D, _gloffset_CopyTexImage2D, CopyTexImage2D@32)
+	GL_STUB(CopyTexSubImage1D, _gloffset_CopyTexSubImage1D, CopyTexSubImage1D@24)
+	GL_STUB(CopyTexSubImage2D, _gloffset_CopyTexSubImage2D, CopyTexSubImage2D@32)
+	GL_STUB(DeleteTextures, _gloffset_DeleteTextures, DeleteTextures@8)
+	GL_STUB(GenTextures, _gloffset_GenTextures, GenTextures@8)
+	GL_STUB(GetPointerv, _gloffset_GetPointerv, GetPointerv@8)
+	GL_STUB(IsTexture, _gloffset_IsTexture, IsTexture@4)
+	GL_STUB(PrioritizeTextures, _gloffset_PrioritizeTextures, PrioritizeTextures@12)
+	GL_STUB(TexSubImage1D, _gloffset_TexSubImage1D, TexSubImage1D@28)
+	GL_STUB(TexSubImage2D, _gloffset_TexSubImage2D, TexSubImage2D@36)
+	GL_STUB(PopClientAttrib, _gloffset_PopClientAttrib, PopClientAttrib@0)
+	GL_STUB(PushClientAttrib, _gloffset_PushClientAttrib, PushClientAttrib@4)
+	GL_STUB(BlendColor, _gloffset_BlendColor, BlendColor@16)
+	GL_STUB(BlendEquation, _gloffset_BlendEquation, BlendEquation@4)
+	GL_STUB(DrawRangeElements, _gloffset_DrawRangeElements, DrawRangeElements@24)
+	GL_STUB(ColorTable, _gloffset_ColorTable, ColorTable@24)
+	GL_STUB(ColorTableParameterfv, _gloffset_ColorTableParameterfv, ColorTableParameterfv@12)
+	GL_STUB(ColorTableParameteriv, _gloffset_ColorTableParameteriv, ColorTableParameteriv@12)
+	GL_STUB(CopyColorTable, _gloffset_CopyColorTable, CopyColorTable@20)
+	GL_STUB(GetColorTable, _gloffset_GetColorTable, GetColorTable@16)
+	GL_STUB(GetColorTableParameterfv, _gloffset_GetColorTableParameterfv, GetColorTableParameterfv@12)
+	GL_STUB(GetColorTableParameteriv, _gloffset_GetColorTableParameteriv, GetColorTableParameteriv@12)
+	GL_STUB(ColorSubTable, _gloffset_ColorSubTable, ColorSubTable@24)
+	GL_STUB(CopyColorSubTable, _gloffset_CopyColorSubTable, CopyColorSubTable@20)
+	GL_STUB(ConvolutionFilter1D, _gloffset_ConvolutionFilter1D, ConvolutionFilter1D@24)
+	GL_STUB(ConvolutionFilter2D, _gloffset_ConvolutionFilter2D, ConvolutionFilter2D@28)
+	GL_STUB(ConvolutionParameterf, _gloffset_ConvolutionParameterf, ConvolutionParameterf@12)
+	GL_STUB(ConvolutionParameterfv, _gloffset_ConvolutionParameterfv, ConvolutionParameterfv@12)
+	GL_STUB(ConvolutionParameteri, _gloffset_ConvolutionParameteri, ConvolutionParameteri@12)
+	GL_STUB(ConvolutionParameteriv, _gloffset_ConvolutionParameteriv, ConvolutionParameteriv@12)
+	GL_STUB(CopyConvolutionFilter1D, _gloffset_CopyConvolutionFilter1D, CopyConvolutionFilter1D@20)
+	GL_STUB(CopyConvolutionFilter2D, _gloffset_CopyConvolutionFilter2D, CopyConvolutionFilter2D@24)
+	GL_STUB(GetConvolutionFilter, _gloffset_GetConvolutionFilter, GetConvolutionFilter@16)
+	GL_STUB(GetConvolutionParameterfv, _gloffset_GetConvolutionParameterfv, GetConvolutionParameterfv@12)
+	GL_STUB(GetConvolutionParameteriv, _gloffset_GetConvolutionParameteriv, GetConvolutionParameteriv@12)
+	GL_STUB(GetSeparableFilter, _gloffset_GetSeparableFilter, GetSeparableFilter@24)
+	GL_STUB(SeparableFilter2D, _gloffset_SeparableFilter2D, SeparableFilter2D@32)
+	GL_STUB(GetHistogram, _gloffset_GetHistogram, GetHistogram@20)
+	GL_STUB(GetHistogramParameterfv, _gloffset_GetHistogramParameterfv, GetHistogramParameterfv@12)
+	GL_STUB(GetHistogramParameteriv, _gloffset_GetHistogramParameteriv, GetHistogramParameteriv@12)
+	GL_STUB(GetMinmax, _gloffset_GetMinmax, GetMinmax@20)
+	GL_STUB(GetMinmaxParameterfv, _gloffset_GetMinmaxParameterfv, GetMinmaxParameterfv@12)
+	GL_STUB(GetMinmaxParameteriv, _gloffset_GetMinmaxParameteriv, GetMinmaxParameteriv@12)
+	GL_STUB(Histogram, _gloffset_Histogram, Histogram@16)
+	GL_STUB(Minmax, _gloffset_Minmax, Minmax@12)
+	GL_STUB(ResetHistogram, _gloffset_ResetHistogram, ResetHistogram@4)
+	GL_STUB(ResetMinmax, _gloffset_ResetMinmax, ResetMinmax@4)
+	GL_STUB(TexImage3D, _gloffset_TexImage3D, TexImage3D@40)
+	GL_STUB(TexSubImage3D, _gloffset_TexSubImage3D, TexSubImage3D@44)
+	GL_STUB(CopyTexSubImage3D, _gloffset_CopyTexSubImage3D, CopyTexSubImage3D@36)
+	GL_STUB(ActiveTextureARB, _gloffset_ActiveTextureARB, ActiveTextureARB@4)
+	GL_STUB(ClientActiveTextureARB, _gloffset_ClientActiveTextureARB, ClientActiveTextureARB@4)
+	GL_STUB(MultiTexCoord1dARB, _gloffset_MultiTexCoord1dARB, MultiTexCoord1dARB@12)
+	GL_STUB(MultiTexCoord1dvARB, _gloffset_MultiTexCoord1dvARB, MultiTexCoord1dvARB@8)
+	GL_STUB(MultiTexCoord1fARB, _gloffset_MultiTexCoord1fARB, MultiTexCoord1fARB@8)
+	GL_STUB(MultiTexCoord1fvARB, _gloffset_MultiTexCoord1fvARB, MultiTexCoord1fvARB@8)
+	GL_STUB(MultiTexCoord1iARB, _gloffset_MultiTexCoord1iARB, MultiTexCoord1iARB@8)
+	GL_STUB(MultiTexCoord1ivARB, _gloffset_MultiTexCoord1ivARB, MultiTexCoord1ivARB@8)
+	GL_STUB(MultiTexCoord1sARB, _gloffset_MultiTexCoord1sARB, MultiTexCoord1sARB@8)
+	GL_STUB(MultiTexCoord1svARB, _gloffset_MultiTexCoord1svARB, MultiTexCoord1svARB@8)
+	GL_STUB(MultiTexCoord2dARB, _gloffset_MultiTexCoord2dARB, MultiTexCoord2dARB@20)
+	GL_STUB(MultiTexCoord2dvARB, _gloffset_MultiTexCoord2dvARB, MultiTexCoord2dvARB@8)
+	GL_STUB(MultiTexCoord2fARB, _gloffset_MultiTexCoord2fARB, MultiTexCoord2fARB@12)
+	GL_STUB(MultiTexCoord2fvARB, _gloffset_MultiTexCoord2fvARB, MultiTexCoord2fvARB@8)
+	GL_STUB(MultiTexCoord2iARB, _gloffset_MultiTexCoord2iARB, MultiTexCoord2iARB@12)
+	GL_STUB(MultiTexCoord2ivARB, _gloffset_MultiTexCoord2ivARB, MultiTexCoord2ivARB@8)
+	GL_STUB(MultiTexCoord2sARB, _gloffset_MultiTexCoord2sARB, MultiTexCoord2sARB@12)
+	GL_STUB(MultiTexCoord2svARB, _gloffset_MultiTexCoord2svARB, MultiTexCoord2svARB@8)
+	GL_STUB(MultiTexCoord3dARB, _gloffset_MultiTexCoord3dARB, MultiTexCoord3dARB@28)
+	GL_STUB(MultiTexCoord3dvARB, _gloffset_MultiTexCoord3dvARB, MultiTexCoord3dvARB@8)
+	GL_STUB(MultiTexCoord3fARB, _gloffset_MultiTexCoord3fARB, MultiTexCoord3fARB@16)
+	GL_STUB(MultiTexCoord3fvARB, _gloffset_MultiTexCoord3fvARB, MultiTexCoord3fvARB@8)
+	GL_STUB(MultiTexCoord3iARB, _gloffset_MultiTexCoord3iARB, MultiTexCoord3iARB@16)
+	GL_STUB(MultiTexCoord3ivARB, _gloffset_MultiTexCoord3ivARB, MultiTexCoord3ivARB@8)
+	GL_STUB(MultiTexCoord3sARB, _gloffset_MultiTexCoord3sARB, MultiTexCoord3sARB@16)
+	GL_STUB(MultiTexCoord3svARB, _gloffset_MultiTexCoord3svARB, MultiTexCoord3svARB@8)
+	GL_STUB(MultiTexCoord4dARB, _gloffset_MultiTexCoord4dARB, MultiTexCoord4dARB@36)
+	GL_STUB(MultiTexCoord4dvARB, _gloffset_MultiTexCoord4dvARB, MultiTexCoord4dvARB@8)
+	GL_STUB(MultiTexCoord4fARB, _gloffset_MultiTexCoord4fARB, MultiTexCoord4fARB@20)
+	GL_STUB(MultiTexCoord4fvARB, _gloffset_MultiTexCoord4fvARB, MultiTexCoord4fvARB@8)
+	GL_STUB(MultiTexCoord4iARB, _gloffset_MultiTexCoord4iARB, MultiTexCoord4iARB@20)
+	GL_STUB(MultiTexCoord4ivARB, _gloffset_MultiTexCoord4ivARB, MultiTexCoord4ivARB@8)
+	GL_STUB(MultiTexCoord4sARB, _gloffset_MultiTexCoord4sARB, MultiTexCoord4sARB@20)
+	GL_STUB(MultiTexCoord4svARB, _gloffset_MultiTexCoord4svARB, MultiTexCoord4svARB@8)
+	GL_STUB(LoadTransposeMatrixfARB, _gloffset_LoadTransposeMatrixfARB, LoadTransposeMatrixfARB@4)
+	GL_STUB(LoadTransposeMatrixdARB, _gloffset_LoadTransposeMatrixdARB, LoadTransposeMatrixdARB@4)
+	GL_STUB(MultTransposeMatrixfARB, _gloffset_MultTransposeMatrixfARB, MultTransposeMatrixfARB@4)
+	GL_STUB(MultTransposeMatrixdARB, _gloffset_MultTransposeMatrixdARB, MultTransposeMatrixdARB@4)
+	GL_STUB(SampleCoverageARB, _gloffset_SampleCoverageARB, SampleCoverageARB@8)
+	GL_STUB(DrawBuffersARB, _gloffset_DrawBuffersARB, DrawBuffersARB@8)
+	GL_STUB(PolygonOffsetEXT, _gloffset_PolygonOffsetEXT, PolygonOffsetEXT@8)
+	GL_STUB(GetTexFilterFuncSGIS, _gloffset_GetTexFilterFuncSGIS, GetTexFilterFuncSGIS@12)
+	GL_STUB(TexFilterFuncSGIS, _gloffset_TexFilterFuncSGIS, TexFilterFuncSGIS@16)
+	GL_STUB(GetHistogramEXT, _gloffset_GetHistogramEXT, GetHistogramEXT@20)
+	GL_STUB(GetHistogramParameterfvEXT, _gloffset_GetHistogramParameterfvEXT, GetHistogramParameterfvEXT@12)
+	GL_STUB(GetHistogramParameterivEXT, _gloffset_GetHistogramParameterivEXT, GetHistogramParameterivEXT@12)
+	GL_STUB(GetMinmaxEXT, _gloffset_GetMinmaxEXT, GetMinmaxEXT@20)
+	GL_STUB(GetMinmaxParameterfvEXT, _gloffset_GetMinmaxParameterfvEXT, GetMinmaxParameterfvEXT@12)
+	GL_STUB(GetMinmaxParameterivEXT, _gloffset_GetMinmaxParameterivEXT, GetMinmaxParameterivEXT@12)
+	GL_STUB(GetConvolutionFilterEXT, _gloffset_GetConvolutionFilterEXT, GetConvolutionFilterEXT@16)
+	GL_STUB(GetConvolutionParameterfvEXT, _gloffset_GetConvolutionParameterfvEXT, GetConvolutionParameterfvEXT@12)
+	GL_STUB(GetConvolutionParameterivEXT, _gloffset_GetConvolutionParameterivEXT, GetConvolutionParameterivEXT@12)
+	GL_STUB(GetSeparableFilterEXT, _gloffset_GetSeparableFilterEXT, GetSeparableFilterEXT@24)
+	GL_STUB(GetColorTableSGI, _gloffset_GetColorTableSGI, GetColorTableSGI@16)
+	GL_STUB(GetColorTableParameterfvSGI, _gloffset_GetColorTableParameterfvSGI, GetColorTableParameterfvSGI@12)
+	GL_STUB(GetColorTableParameterivSGI, _gloffset_GetColorTableParameterivSGI, GetColorTableParameterivSGI@12)
+	GL_STUB(PixelTexGenSGIX, _gloffset_PixelTexGenSGIX, PixelTexGenSGIX@4)
+	GL_STUB(PixelTexGenParameteriSGIS, _gloffset_PixelTexGenParameteriSGIS, PixelTexGenParameteriSGIS@8)
+	GL_STUB(PixelTexGenParameterivSGIS, _gloffset_PixelTexGenParameterivSGIS, PixelTexGenParameterivSGIS@8)
+	GL_STUB(PixelTexGenParameterfSGIS, _gloffset_PixelTexGenParameterfSGIS, PixelTexGenParameterfSGIS@8)
+	GL_STUB(PixelTexGenParameterfvSGIS, _gloffset_PixelTexGenParameterfvSGIS, PixelTexGenParameterfvSGIS@8)
+	GL_STUB(GetPixelTexGenParameterivSGIS, _gloffset_GetPixelTexGenParameterivSGIS, GetPixelTexGenParameterivSGIS@8)
+	GL_STUB(GetPixelTexGenParameterfvSGIS, _gloffset_GetPixelTexGenParameterfvSGIS, GetPixelTexGenParameterfvSGIS@8)
+	GL_STUB(TexImage4DSGIS, _gloffset_TexImage4DSGIS, TexImage4DSGIS@44)
+	GL_STUB(TexSubImage4DSGIS, _gloffset_TexSubImage4DSGIS, TexSubImage4DSGIS@52)
+	GL_STUB(AreTexturesResidentEXT, _gloffset_AreTexturesResidentEXT, AreTexturesResidentEXT@12)
+	GL_STUB(GenTexturesEXT, _gloffset_GenTexturesEXT, GenTexturesEXT@8)
+	GL_STUB(IsTextureEXT, _gloffset_IsTextureEXT, IsTextureEXT@4)
+	GL_STUB(DetailTexFuncSGIS, _gloffset_DetailTexFuncSGIS, DetailTexFuncSGIS@12)
+	GL_STUB(GetDetailTexFuncSGIS, _gloffset_GetDetailTexFuncSGIS, GetDetailTexFuncSGIS@8)
+	GL_STUB(SharpenTexFuncSGIS, _gloffset_SharpenTexFuncSGIS, SharpenTexFuncSGIS@12)
+	GL_STUB(GetSharpenTexFuncSGIS, _gloffset_GetSharpenTexFuncSGIS, GetSharpenTexFuncSGIS@8)
+	GL_STUB(SampleMaskSGIS, _gloffset_SampleMaskSGIS, SampleMaskSGIS@8)
+	GL_STUB(SamplePatternSGIS, _gloffset_SamplePatternSGIS, SamplePatternSGIS@4)
+	GL_STUB(ColorPointerEXT, _gloffset_ColorPointerEXT, ColorPointerEXT@20)
+	GL_STUB(EdgeFlagPointerEXT, _gloffset_EdgeFlagPointerEXT, EdgeFlagPointerEXT@12)
+	GL_STUB(IndexPointerEXT, _gloffset_IndexPointerEXT, IndexPointerEXT@16)
+	GL_STUB(NormalPointerEXT, _gloffset_NormalPointerEXT, NormalPointerEXT@16)
+	GL_STUB(TexCoordPointerEXT, _gloffset_TexCoordPointerEXT, TexCoordPointerEXT@20)
+	GL_STUB(VertexPointerEXT, _gloffset_VertexPointerEXT, VertexPointerEXT@20)
+	GL_STUB(SpriteParameterfSGIX, _gloffset_SpriteParameterfSGIX, SpriteParameterfSGIX@8)
+	GL_STUB(SpriteParameterfvSGIX, _gloffset_SpriteParameterfvSGIX, SpriteParameterfvSGIX@8)
+	GL_STUB(SpriteParameteriSGIX, _gloffset_SpriteParameteriSGIX, SpriteParameteriSGIX@8)
+	GL_STUB(SpriteParameterivSGIX, _gloffset_SpriteParameterivSGIX, SpriteParameterivSGIX@8)
+	GL_STUB(PointParameterfEXT, _gloffset_PointParameterfEXT, PointParameterfEXT@8)
+	GL_STUB(PointParameterfvEXT, _gloffset_PointParameterfvEXT, PointParameterfvEXT@8)
+	GL_STUB(GetInstrumentsSGIX, _gloffset_GetInstrumentsSGIX, GetInstrumentsSGIX@0)
+	GL_STUB(InstrumentsBufferSGIX, _gloffset_InstrumentsBufferSGIX, InstrumentsBufferSGIX@8)
+	GL_STUB(PollInstrumentsSGIX, _gloffset_PollInstrumentsSGIX, PollInstrumentsSGIX@4)
+	GL_STUB(ReadInstrumentsSGIX, _gloffset_ReadInstrumentsSGIX, ReadInstrumentsSGIX@4)
+	GL_STUB(StartInstrumentsSGIX, _gloffset_StartInstrumentsSGIX, StartInstrumentsSGIX@0)
+	GL_STUB(StopInstrumentsSGIX, _gloffset_StopInstrumentsSGIX, StopInstrumentsSGIX@4)
+	GL_STUB(FrameZoomSGIX, _gloffset_FrameZoomSGIX, FrameZoomSGIX@4)
+	GL_STUB(TagSampleBufferSGIX, _gloffset_TagSampleBufferSGIX, TagSampleBufferSGIX@0)
+	GL_STUB(ReferencePlaneSGIX, _gloffset_ReferencePlaneSGIX, ReferencePlaneSGIX@4)
+	GL_STUB(FlushRasterSGIX, _gloffset_FlushRasterSGIX, FlushRasterSGIX@0)
+	GL_STUB(GetListParameterfvSGIX, _gloffset_GetListParameterfvSGIX, GetListParameterfvSGIX@12)
+	GL_STUB(GetListParameterivSGIX, _gloffset_GetListParameterivSGIX, GetListParameterivSGIX@12)
+	GL_STUB(ListParameterfSGIX, _gloffset_ListParameterfSGIX, ListParameterfSGIX@12)
+	GL_STUB(ListParameterfvSGIX, _gloffset_ListParameterfvSGIX, ListParameterfvSGIX@12)
+	GL_STUB(ListParameteriSGIX, _gloffset_ListParameteriSGIX, ListParameteriSGIX@12)
+	GL_STUB(ListParameterivSGIX, _gloffset_ListParameterivSGIX, ListParameterivSGIX@12)
+	GL_STUB(FragmentColorMaterialSGIX, _gloffset_FragmentColorMaterialSGIX, FragmentColorMaterialSGIX@8)
+	GL_STUB(FragmentLightfSGIX, _gloffset_FragmentLightfSGIX, FragmentLightfSGIX@12)
+	GL_STUB(FragmentLightfvSGIX, _gloffset_FragmentLightfvSGIX, FragmentLightfvSGIX@12)
+	GL_STUB(FragmentLightiSGIX, _gloffset_FragmentLightiSGIX, FragmentLightiSGIX@12)
+	GL_STUB(FragmentLightivSGIX, _gloffset_FragmentLightivSGIX, FragmentLightivSGIX@12)
+	GL_STUB(FragmentLightModelfSGIX, _gloffset_FragmentLightModelfSGIX, FragmentLightModelfSGIX@8)
+	GL_STUB(FragmentLightModelfvSGIX, _gloffset_FragmentLightModelfvSGIX, FragmentLightModelfvSGIX@8)
+	GL_STUB(FragmentLightModeliSGIX, _gloffset_FragmentLightModeliSGIX, FragmentLightModeliSGIX@8)
+	GL_STUB(FragmentLightModelivSGIX, _gloffset_FragmentLightModelivSGIX, FragmentLightModelivSGIX@8)
+	GL_STUB(FragmentMaterialfSGIX, _gloffset_FragmentMaterialfSGIX, FragmentMaterialfSGIX@12)
+	GL_STUB(FragmentMaterialfvSGIX, _gloffset_FragmentMaterialfvSGIX, FragmentMaterialfvSGIX@12)
+	GL_STUB(FragmentMaterialiSGIX, _gloffset_FragmentMaterialiSGIX, FragmentMaterialiSGIX@12)
+	GL_STUB(FragmentMaterialivSGIX, _gloffset_FragmentMaterialivSGIX, FragmentMaterialivSGIX@12)
+	GL_STUB(GetFragmentLightfvSGIX, _gloffset_GetFragmentLightfvSGIX, GetFragmentLightfvSGIX@12)
+	GL_STUB(GetFragmentLightivSGIX, _gloffset_GetFragmentLightivSGIX, GetFragmentLightivSGIX@12)
+	GL_STUB(GetFragmentMaterialfvSGIX, _gloffset_GetFragmentMaterialfvSGIX, GetFragmentMaterialfvSGIX@12)
+	GL_STUB(GetFragmentMaterialivSGIX, _gloffset_GetFragmentMaterialivSGIX, GetFragmentMaterialivSGIX@12)
+	GL_STUB(LightEnviSGIX, _gloffset_LightEnviSGIX, LightEnviSGIX@8)
+	GL_STUB(VertexWeightfEXT, _gloffset_VertexWeightfEXT, VertexWeightfEXT@4)
+	GL_STUB(VertexWeightfvEXT, _gloffset_VertexWeightfvEXT, VertexWeightfvEXT@4)
+	GL_STUB(VertexWeightPointerEXT, _gloffset_VertexWeightPointerEXT, VertexWeightPointerEXT@16)
+	GL_STUB(FlushVertexArrayRangeNV, _gloffset_FlushVertexArrayRangeNV, FlushVertexArrayRangeNV@0)
+	GL_STUB(VertexArrayRangeNV, _gloffset_VertexArrayRangeNV, VertexArrayRangeNV@8)
+	GL_STUB(CombinerParameterfvNV, _gloffset_CombinerParameterfvNV, CombinerParameterfvNV@8)
+	GL_STUB(CombinerParameterfNV, _gloffset_CombinerParameterfNV, CombinerParameterfNV@8)
+	GL_STUB(CombinerParameterivNV, _gloffset_CombinerParameterivNV, CombinerParameterivNV@8)
+	GL_STUB(CombinerParameteriNV, _gloffset_CombinerParameteriNV, CombinerParameteriNV@8)
+	GL_STUB(CombinerInputNV, _gloffset_CombinerInputNV, CombinerInputNV@24)
+	GL_STUB(CombinerOutputNV, _gloffset_CombinerOutputNV, CombinerOutputNV@40)
+	GL_STUB(FinalCombinerInputNV, _gloffset_FinalCombinerInputNV, FinalCombinerInputNV@16)
+	GL_STUB(GetCombinerInputParameterfvNV, _gloffset_GetCombinerInputParameterfvNV, GetCombinerInputParameterfvNV@20)
+	GL_STUB(GetCombinerInputParameterivNV, _gloffset_GetCombinerInputParameterivNV, GetCombinerInputParameterivNV@20)
+	GL_STUB(GetCombinerOutputParameterfvNV, _gloffset_GetCombinerOutputParameterfvNV, GetCombinerOutputParameterfvNV@16)
+	GL_STUB(GetCombinerOutputParameterivNV, _gloffset_GetCombinerOutputParameterivNV, GetCombinerOutputParameterivNV@16)
+	GL_STUB(GetFinalCombinerInputParameterfvNV, _gloffset_GetFinalCombinerInputParameterfvNV, GetFinalCombinerInputParameterfvNV@12)
+	GL_STUB(GetFinalCombinerInputParameterivNV, _gloffset_GetFinalCombinerInputParameterivNV, GetFinalCombinerInputParameterivNV@12)
+	GL_STUB(ResizeBuffersMESA, _gloffset_ResizeBuffersMESA, ResizeBuffersMESA@0)
+	GL_STUB(WindowPos2dMESA, _gloffset_WindowPos2dMESA, WindowPos2dMESA@16)
+	GL_STUB(WindowPos2dvMESA, _gloffset_WindowPos2dvMESA, WindowPos2dvMESA@4)
+	GL_STUB(WindowPos2fMESA, _gloffset_WindowPos2fMESA, WindowPos2fMESA@8)
+	GL_STUB(WindowPos2fvMESA, _gloffset_WindowPos2fvMESA, WindowPos2fvMESA@4)
+	GL_STUB(WindowPos2iMESA, _gloffset_WindowPos2iMESA, WindowPos2iMESA@8)
+	GL_STUB(WindowPos2ivMESA, _gloffset_WindowPos2ivMESA, WindowPos2ivMESA@4)
+	GL_STUB(WindowPos2sMESA, _gloffset_WindowPos2sMESA, WindowPos2sMESA@8)
+	GL_STUB(WindowPos2svMESA, _gloffset_WindowPos2svMESA, WindowPos2svMESA@4)
+	GL_STUB(WindowPos3dMESA, _gloffset_WindowPos3dMESA, WindowPos3dMESA@24)
+	GL_STUB(WindowPos3dvMESA, _gloffset_WindowPos3dvMESA, WindowPos3dvMESA@4)
+	GL_STUB(WindowPos3fMESA, _gloffset_WindowPos3fMESA, WindowPos3fMESA@12)
+	GL_STUB(WindowPos3fvMESA, _gloffset_WindowPos3fvMESA, WindowPos3fvMESA@4)
+	GL_STUB(WindowPos3iMESA, _gloffset_WindowPos3iMESA, WindowPos3iMESA@12)
+	GL_STUB(WindowPos3ivMESA, _gloffset_WindowPos3ivMESA, WindowPos3ivMESA@4)
+	GL_STUB(WindowPos3sMESA, _gloffset_WindowPos3sMESA, WindowPos3sMESA@12)
+	GL_STUB(WindowPos3svMESA, _gloffset_WindowPos3svMESA, WindowPos3svMESA@4)
+	GL_STUB(WindowPos4dMESA, _gloffset_WindowPos4dMESA, WindowPos4dMESA@32)
+	GL_STUB(WindowPos4dvMESA, _gloffset_WindowPos4dvMESA, WindowPos4dvMESA@4)
+	GL_STUB(WindowPos4fMESA, _gloffset_WindowPos4fMESA, WindowPos4fMESA@16)
+	GL_STUB(WindowPos4fvMESA, _gloffset_WindowPos4fvMESA, WindowPos4fvMESA@4)
+	GL_STUB(WindowPos4iMESA, _gloffset_WindowPos4iMESA, WindowPos4iMESA@16)
+	GL_STUB(WindowPos4ivMESA, _gloffset_WindowPos4ivMESA, WindowPos4ivMESA@4)
+	GL_STUB(WindowPos4sMESA, _gloffset_WindowPos4sMESA, WindowPos4sMESA@16)
+	GL_STUB(WindowPos4svMESA, _gloffset_WindowPos4svMESA, WindowPos4svMESA@4)
+	GL_STUB(BlendFuncSeparateEXT, _gloffset_BlendFuncSeparateEXT, BlendFuncSeparateEXT@16)
+	GL_STUB(IndexMaterialEXT, _gloffset_IndexMaterialEXT, IndexMaterialEXT@8)
+	GL_STUB(IndexFuncEXT, _gloffset_IndexFuncEXT, IndexFuncEXT@8)
+	GL_STUB(LockArraysEXT, _gloffset_LockArraysEXT, LockArraysEXT@8)
+	GL_STUB(UnlockArraysEXT, _gloffset_UnlockArraysEXT, UnlockArraysEXT@0)
+	GL_STUB(CullParameterdvEXT, _gloffset_CullParameterdvEXT, CullParameterdvEXT@8)
+	GL_STUB(CullParameterfvEXT, _gloffset_CullParameterfvEXT, CullParameterfvEXT@8)
+	GL_STUB(HintPGI, _gloffset_HintPGI, HintPGI@8)
+	GL_STUB(FogCoordfEXT, _gloffset_FogCoordfEXT, FogCoordfEXT@4)
+	GL_STUB(FogCoordfvEXT, _gloffset_FogCoordfvEXT, FogCoordfvEXT@4)
+	GL_STUB(FogCoorddEXT, _gloffset_FogCoorddEXT, FogCoorddEXT@8)
+	GL_STUB(FogCoorddvEXT, _gloffset_FogCoorddvEXT, FogCoorddvEXT@4)
+	GL_STUB(FogCoordPointerEXT, _gloffset_FogCoordPointerEXT, FogCoordPointerEXT@12)
+	GL_STUB(GetColorTableEXT, _gloffset_GetColorTableEXT, GetColorTableEXT@16)
+	GL_STUB(GetColorTableParameterivEXT, _gloffset_GetColorTableParameterivEXT, GetColorTableParameterivEXT@12)
+	GL_STUB(GetColorTableParameterfvEXT, _gloffset_GetColorTableParameterfvEXT, GetColorTableParameterfvEXT@12)
+	GL_STUB(TbufferMask3DFX, _gloffset_TbufferMask3DFX, TbufferMask3DFX@4)
+	GL_STUB(CompressedTexImage3DARB, _gloffset_CompressedTexImage3DARB, CompressedTexImage3DARB@36)
+	GL_STUB(CompressedTexImage2DARB, _gloffset_CompressedTexImage2DARB, CompressedTexImage2DARB@32)
+	GL_STUB(CompressedTexImage1DARB, _gloffset_CompressedTexImage1DARB, CompressedTexImage1DARB@28)
+	GL_STUB(CompressedTexSubImage3DARB, _gloffset_CompressedTexSubImage3DARB, CompressedTexSubImage3DARB@44)
+	GL_STUB(CompressedTexSubImage2DARB, _gloffset_CompressedTexSubImage2DARB, CompressedTexSubImage2DARB@36)
+	GL_STUB(CompressedTexSubImage1DARB, _gloffset_CompressedTexSubImage1DARB, CompressedTexSubImage1DARB@28)
+	GL_STUB(GetCompressedTexImageARB, _gloffset_GetCompressedTexImageARB, GetCompressedTexImageARB@12)
+	GL_STUB(SecondaryColor3bEXT, _gloffset_SecondaryColor3bEXT, SecondaryColor3bEXT@12)
+	GL_STUB(SecondaryColor3bvEXT, _gloffset_SecondaryColor3bvEXT, SecondaryColor3bvEXT@4)
+	GL_STUB(SecondaryColor3dEXT, _gloffset_SecondaryColor3dEXT, SecondaryColor3dEXT@24)
+	GL_STUB(SecondaryColor3dvEXT, _gloffset_SecondaryColor3dvEXT, SecondaryColor3dvEXT@4)
+	GL_STUB(SecondaryColor3fEXT, _gloffset_SecondaryColor3fEXT, SecondaryColor3fEXT@12)
+	GL_STUB(SecondaryColor3fvEXT, _gloffset_SecondaryColor3fvEXT, SecondaryColor3fvEXT@4)
+	GL_STUB(SecondaryColor3iEXT, _gloffset_SecondaryColor3iEXT, SecondaryColor3iEXT@12)
+	GL_STUB(SecondaryColor3ivEXT, _gloffset_SecondaryColor3ivEXT, SecondaryColor3ivEXT@4)
+	GL_STUB(SecondaryColor3sEXT, _gloffset_SecondaryColor3sEXT, SecondaryColor3sEXT@12)
+	GL_STUB(SecondaryColor3svEXT, _gloffset_SecondaryColor3svEXT, SecondaryColor3svEXT@4)
+	GL_STUB(SecondaryColor3ubEXT, _gloffset_SecondaryColor3ubEXT, SecondaryColor3ubEXT@12)
+	GL_STUB(SecondaryColor3ubvEXT, _gloffset_SecondaryColor3ubvEXT, SecondaryColor3ubvEXT@4)
+	GL_STUB(SecondaryColor3uiEXT, _gloffset_SecondaryColor3uiEXT, SecondaryColor3uiEXT@12)
+	GL_STUB(SecondaryColor3uivEXT, _gloffset_SecondaryColor3uivEXT, SecondaryColor3uivEXT@4)
+	GL_STUB(SecondaryColor3usEXT, _gloffset_SecondaryColor3usEXT, SecondaryColor3usEXT@12)
+	GL_STUB(SecondaryColor3usvEXT, _gloffset_SecondaryColor3usvEXT, SecondaryColor3usvEXT@4)
+	GL_STUB(SecondaryColorPointerEXT, _gloffset_SecondaryColorPointerEXT, SecondaryColorPointerEXT@16)
+	GL_STUB(AreProgramsResidentNV, _gloffset_AreProgramsResidentNV, AreProgramsResidentNV@12)
+	GL_STUB(BindProgramNV, _gloffset_BindProgramNV, BindProgramNV@8)
+	GL_STUB(DeleteProgramsNV, _gloffset_DeleteProgramsNV, DeleteProgramsNV@8)
+	GL_STUB(ExecuteProgramNV, _gloffset_ExecuteProgramNV, ExecuteProgramNV@12)
+	GL_STUB(GenProgramsNV, _gloffset_GenProgramsNV, GenProgramsNV@8)
+	GL_STUB(GetProgramParameterdvNV, _gloffset_GetProgramParameterdvNV, GetProgramParameterdvNV@16)
+	GL_STUB(GetProgramParameterfvNV, _gloffset_GetProgramParameterfvNV, GetProgramParameterfvNV@16)
+	GL_STUB(GetProgramivNV, _gloffset_GetProgramivNV, GetProgramivNV@12)
+	GL_STUB(GetProgramStringNV, _gloffset_GetProgramStringNV, GetProgramStringNV@12)
+	GL_STUB(GetTrackMatrixivNV, _gloffset_GetTrackMatrixivNV, GetTrackMatrixivNV@16)
+	GL_STUB(GetVertexAttribdvARB, _gloffset_GetVertexAttribdvARB, GetVertexAttribdvARB@12)
+	GL_STUB(GetVertexAttribfvARB, _gloffset_GetVertexAttribfvARB, GetVertexAttribfvARB@12)
+	GL_STUB(GetVertexAttribivARB, _gloffset_GetVertexAttribivARB, GetVertexAttribivARB@12)
+	GL_STUB(GetVertexAttribPointervNV, _gloffset_GetVertexAttribPointervNV, GetVertexAttribPointervNV@12)
+	GL_STUB(IsProgramNV, _gloffset_IsProgramNV, IsProgramNV@4)
+	GL_STUB(LoadProgramNV, _gloffset_LoadProgramNV, LoadProgramNV@16)
+	GL_STUB(ProgramParameter4dNV, _gloffset_ProgramParameter4dNV, ProgramParameter4dNV@40)
+	GL_STUB(ProgramParameter4dvNV, _gloffset_ProgramParameter4dvNV, ProgramParameter4dvNV@12)
+	GL_STUB(ProgramParameter4fNV, _gloffset_ProgramParameter4fNV, ProgramParameter4fNV@24)
+	GL_STUB(ProgramParameter4fvNV, _gloffset_ProgramParameter4fvNV, ProgramParameter4fvNV@12)
+	GL_STUB(ProgramParameters4dvNV, _gloffset_ProgramParameters4dvNV, ProgramParameters4dvNV@16)
+	GL_STUB(ProgramParameters4fvNV, _gloffset_ProgramParameters4fvNV, ProgramParameters4fvNV@16)
+	GL_STUB(RequestResidentProgramsNV, _gloffset_RequestResidentProgramsNV, RequestResidentProgramsNV@8)
+	GL_STUB(TrackMatrixNV, _gloffset_TrackMatrixNV, TrackMatrixNV@16)
+	GL_STUB(VertexAttribPointerNV, _gloffset_VertexAttribPointerNV, VertexAttribPointerNV@20)
+	GL_STUB(VertexAttrib1dARB, _gloffset_VertexAttrib1dARB, VertexAttrib1dARB@12)
+	GL_STUB(VertexAttrib1dvARB, _gloffset_VertexAttrib1dvARB, VertexAttrib1dvARB@8)
+	GL_STUB(VertexAttrib1fARB, _gloffset_VertexAttrib1fARB, VertexAttrib1fARB@8)
+	GL_STUB(VertexAttrib1fvARB, _gloffset_VertexAttrib1fvARB, VertexAttrib1fvARB@8)
+	GL_STUB(VertexAttrib1sARB, _gloffset_VertexAttrib1sARB, VertexAttrib1sARB@8)
+	GL_STUB(VertexAttrib1svARB, _gloffset_VertexAttrib1svARB, VertexAttrib1svARB@8)
+	GL_STUB(VertexAttrib2dARB, _gloffset_VertexAttrib2dARB, VertexAttrib2dARB@20)
+	GL_STUB(VertexAttrib2dvARB, _gloffset_VertexAttrib2dvARB, VertexAttrib2dvARB@8)
+	GL_STUB(VertexAttrib2fARB, _gloffset_VertexAttrib2fARB, VertexAttrib2fARB@12)
+	GL_STUB(VertexAttrib2fvARB, _gloffset_VertexAttrib2fvARB, VertexAttrib2fvARB@8)
+	GL_STUB(VertexAttrib2sARB, _gloffset_VertexAttrib2sARB, VertexAttrib2sARB@12)
+	GL_STUB(VertexAttrib2svARB, _gloffset_VertexAttrib2svARB, VertexAttrib2svARB@8)
+	GL_STUB(VertexAttrib3dARB, _gloffset_VertexAttrib3dARB, VertexAttrib3dARB@28)
+	GL_STUB(VertexAttrib3dvARB, _gloffset_VertexAttrib3dvARB, VertexAttrib3dvARB@8)
+	GL_STUB(VertexAttrib3fARB, _gloffset_VertexAttrib3fARB, VertexAttrib3fARB@16)
+	GL_STUB(VertexAttrib3fvARB, _gloffset_VertexAttrib3fvARB, VertexAttrib3fvARB@8)
+	GL_STUB(VertexAttrib3sARB, _gloffset_VertexAttrib3sARB, VertexAttrib3sARB@16)
+	GL_STUB(VertexAttrib3svARB, _gloffset_VertexAttrib3svARB, VertexAttrib3svARB@8)
+	GL_STUB(VertexAttrib4dARB, _gloffset_VertexAttrib4dARB, VertexAttrib4dARB@36)
+	GL_STUB(VertexAttrib4dvARB, _gloffset_VertexAttrib4dvARB, VertexAttrib4dvARB@8)
+	GL_STUB(VertexAttrib4fARB, _gloffset_VertexAttrib4fARB, VertexAttrib4fARB@20)
+	GL_STUB(VertexAttrib4fvARB, _gloffset_VertexAttrib4fvARB, VertexAttrib4fvARB@8)
+	GL_STUB(VertexAttrib4sARB, _gloffset_VertexAttrib4sARB, VertexAttrib4sARB@20)
+	GL_STUB(VertexAttrib4svARB, _gloffset_VertexAttrib4svARB, VertexAttrib4svARB@8)
+	GL_STUB(VertexAttrib4NubARB, _gloffset_VertexAttrib4NubARB, VertexAttrib4NubARB@20)
+	GL_STUB(VertexAttrib4NubvARB, _gloffset_VertexAttrib4NubvARB, VertexAttrib4NubvARB@8)
+	GL_STUB(VertexAttribs1dvNV, _gloffset_VertexAttribs1dvNV, VertexAttribs1dvNV@12)
+	GL_STUB(VertexAttribs1fvNV, _gloffset_VertexAttribs1fvNV, VertexAttribs1fvNV@12)
+	GL_STUB(VertexAttribs1svNV, _gloffset_VertexAttribs1svNV, VertexAttribs1svNV@12)
+	GL_STUB(VertexAttribs2dvNV, _gloffset_VertexAttribs2dvNV, VertexAttribs2dvNV@12)
+	GL_STUB(VertexAttribs2fvNV, _gloffset_VertexAttribs2fvNV, VertexAttribs2fvNV@12)
+	GL_STUB(VertexAttribs2svNV, _gloffset_VertexAttribs2svNV, VertexAttribs2svNV@12)
+	GL_STUB(VertexAttribs3dvNV, _gloffset_VertexAttribs3dvNV, VertexAttribs3dvNV@12)
+	GL_STUB(VertexAttribs3fvNV, _gloffset_VertexAttribs3fvNV, VertexAttribs3fvNV@12)
+	GL_STUB(VertexAttribs3svNV, _gloffset_VertexAttribs3svNV, VertexAttribs3svNV@12)
+	GL_STUB(VertexAttribs4dvNV, _gloffset_VertexAttribs4dvNV, VertexAttribs4dvNV@12)
+	GL_STUB(VertexAttribs4fvNV, _gloffset_VertexAttribs4fvNV, VertexAttribs4fvNV@12)
+	GL_STUB(VertexAttribs4svNV, _gloffset_VertexAttribs4svNV, VertexAttribs4svNV@12)
+	GL_STUB(VertexAttribs4ubvNV, _gloffset_VertexAttribs4ubvNV, VertexAttribs4ubvNV@12)
+	GL_STUB(PointParameteriNV, _gloffset_PointParameteriNV, PointParameteriNV@8)
+	GL_STUB(PointParameterivNV, _gloffset_PointParameterivNV, PointParameterivNV@8)
+	GL_STUB(MultiDrawArraysEXT, _gloffset_MultiDrawArraysEXT, MultiDrawArraysEXT@16)
+	GL_STUB(MultiDrawElementsEXT, _gloffset_MultiDrawElementsEXT, MultiDrawElementsEXT@20)
+	GL_STUB(ActiveStencilFaceEXT, _gloffset_ActiveStencilFaceEXT, ActiveStencilFaceEXT@4)
+	GL_STUB(DeleteFencesNV, _gloffset_DeleteFencesNV, DeleteFencesNV@8)
+	GL_STUB(GenFencesNV, _gloffset_GenFencesNV, GenFencesNV@8)
+	GL_STUB(IsFenceNV, _gloffset_IsFenceNV, IsFenceNV@4)
+	GL_STUB(TestFenceNV, _gloffset_TestFenceNV, TestFenceNV@4)
+	GL_STUB(GetFenceivNV, _gloffset_GetFenceivNV, GetFenceivNV@12)
+	GL_STUB(FinishFenceNV, _gloffset_FinishFenceNV, FinishFenceNV@4)
+	GL_STUB(SetFenceNV, _gloffset_SetFenceNV, SetFenceNV@8)
+	GL_STUB(VertexAttrib4bvARB, _gloffset_VertexAttrib4bvARB, VertexAttrib4bvARB@8)
+	GL_STUB(VertexAttrib4ivARB, _gloffset_VertexAttrib4ivARB, VertexAttrib4ivARB@8)
+	GL_STUB(VertexAttrib4ubvARB, _gloffset_VertexAttrib4ubvARB, VertexAttrib4ubvARB@8)
+	GL_STUB(VertexAttrib4usvARB, _gloffset_VertexAttrib4usvARB, VertexAttrib4usvARB@8)
+	GL_STUB(VertexAttrib4uivARB, _gloffset_VertexAttrib4uivARB, VertexAttrib4uivARB@8)
+	GL_STUB(VertexAttrib4NbvARB, _gloffset_VertexAttrib4NbvARB, VertexAttrib4NbvARB@8)
+	GL_STUB(VertexAttrib4NsvARB, _gloffset_VertexAttrib4NsvARB, VertexAttrib4NsvARB@8)
+	GL_STUB(VertexAttrib4NivARB, _gloffset_VertexAttrib4NivARB, VertexAttrib4NivARB@8)
+	GL_STUB(VertexAttrib4NusvARB, _gloffset_VertexAttrib4NusvARB, VertexAttrib4NusvARB@8)
+	GL_STUB(VertexAttrib4NuivARB, _gloffset_VertexAttrib4NuivARB, VertexAttrib4NuivARB@8)
+	GL_STUB(VertexAttribPointerARB, _gloffset_VertexAttribPointerARB, VertexAttribPointerARB@24)
+	GL_STUB(EnableVertexAttribArrayARB, _gloffset_EnableVertexAttribArrayARB, EnableVertexAttribArrayARB@4)
+	GL_STUB(DisableVertexAttribArrayARB, _gloffset_DisableVertexAttribArrayARB, DisableVertexAttribArrayARB@4)
+	GL_STUB(ProgramStringARB, _gloffset_ProgramStringARB, ProgramStringARB@16)
+	GL_STUB(ProgramEnvParameter4dARB, _gloffset_ProgramEnvParameter4dARB, ProgramEnvParameter4dARB@40)
+	GL_STUB(ProgramEnvParameter4dvARB, _gloffset_ProgramEnvParameter4dvARB, ProgramEnvParameter4dvARB@12)
+	GL_STUB(ProgramEnvParameter4fARB, _gloffset_ProgramEnvParameter4fARB, ProgramEnvParameter4fARB@24)
+	GL_STUB(ProgramEnvParameter4fvARB, _gloffset_ProgramEnvParameter4fvARB, ProgramEnvParameter4fvARB@12)
+	GL_STUB(ProgramLocalParameter4dARB, _gloffset_ProgramLocalParameter4dARB, ProgramLocalParameter4dARB@40)
+	GL_STUB(ProgramLocalParameter4dvARB, _gloffset_ProgramLocalParameter4dvARB, ProgramLocalParameter4dvARB@12)
+	GL_STUB(ProgramLocalParameter4fARB, _gloffset_ProgramLocalParameter4fARB, ProgramLocalParameter4fARB@24)
+	GL_STUB(ProgramLocalParameter4fvARB, _gloffset_ProgramLocalParameter4fvARB, ProgramLocalParameter4fvARB@12)
+	GL_STUB(GetProgramEnvParameterdvARB, _gloffset_GetProgramEnvParameterdvARB, GetProgramEnvParameterdvARB@12)
+	GL_STUB(GetProgramEnvParameterfvARB, _gloffset_GetProgramEnvParameterfvARB, GetProgramEnvParameterfvARB@12)
+	GL_STUB(GetProgramLocalParameterdvARB, _gloffset_GetProgramLocalParameterdvARB, GetProgramLocalParameterdvARB@12)
+	GL_STUB(GetProgramLocalParameterfvARB, _gloffset_GetProgramLocalParameterfvARB, GetProgramLocalParameterfvARB@12)
+	GL_STUB(GetProgramivARB, _gloffset_GetProgramivARB, GetProgramivARB@12)
+	GL_STUB(GetProgramStringARB, _gloffset_GetProgramStringARB, GetProgramStringARB@12)
+	GL_STUB(ProgramNamedParameter4fNV, _gloffset_ProgramNamedParameter4fNV, ProgramNamedParameter4fNV@28)
+	GL_STUB(ProgramNamedParameter4dNV, _gloffset_ProgramNamedParameter4dNV, ProgramNamedParameter4dNV@44)
+	GL_STUB(ProgramNamedParameter4fvNV, _gloffset_ProgramNamedParameter4fvNV, ProgramNamedParameter4fvNV@16)
+	GL_STUB(ProgramNamedParameter4dvNV, _gloffset_ProgramNamedParameter4dvNV, ProgramNamedParameter4dvNV@16)
+	GL_STUB(GetProgramNamedParameterfvNV, _gloffset_GetProgramNamedParameterfvNV, GetProgramNamedParameterfvNV@16)
+	GL_STUB(GetProgramNamedParameterdvNV, _gloffset_GetProgramNamedParameterdvNV, GetProgramNamedParameterdvNV@16)
+	GL_STUB(BindBufferARB, _gloffset_BindBufferARB, BindBufferARB@8)
+	GL_STUB(BufferDataARB, _gloffset_BufferDataARB, BufferDataARB@16)
+	GL_STUB(BufferSubDataARB, _gloffset_BufferSubDataARB, BufferSubDataARB@16)
+	GL_STUB(DeleteBuffersARB, _gloffset_DeleteBuffersARB, DeleteBuffersARB@8)
+	GL_STUB(GenBuffersARB, _gloffset_GenBuffersARB, GenBuffersARB@8)
+	GL_STUB(GetBufferParameterivARB, _gloffset_GetBufferParameterivARB, GetBufferParameterivARB@12)
+	GL_STUB(GetBufferPointervARB, _gloffset_GetBufferPointervARB, GetBufferPointervARB@12)
+	GL_STUB(GetBufferSubDataARB, _gloffset_GetBufferSubDataARB, GetBufferSubDataARB@16)
+	GL_STUB(IsBufferARB, _gloffset_IsBufferARB, IsBufferARB@4)
+	GL_STUB(MapBufferARB, _gloffset_MapBufferARB, MapBufferARB@8)
+	GL_STUB(UnmapBufferARB, _gloffset_UnmapBufferARB, UnmapBufferARB@4)
+	GL_STUB(DepthBoundsEXT, _gloffset_DepthBoundsEXT, DepthBoundsEXT@16)
+	GL_STUB(GenQueriesARB, _gloffset_GenQueriesARB, GenQueriesARB@8)
+	GL_STUB(DeleteQueriesARB, _gloffset_DeleteQueriesARB, DeleteQueriesARB@8)
+	GL_STUB(IsQueryARB, _gloffset_IsQueryARB, IsQueryARB@4)
+	GL_STUB(BeginQueryARB, _gloffset_BeginQueryARB, BeginQueryARB@8)
+	GL_STUB(EndQueryARB, _gloffset_EndQueryARB, EndQueryARB@4)
+	GL_STUB(GetQueryivARB, _gloffset_GetQueryivARB, GetQueryivARB@12)
+	GL_STUB(GetQueryObjectivARB, _gloffset_GetQueryObjectivARB, GetQueryObjectivARB@12)
+	GL_STUB(GetQueryObjectuivARB, _gloffset_GetQueryObjectuivARB, GetQueryObjectuivARB@12)
+	GL_STUB(MultiModeDrawArraysIBM, _gloffset_MultiModeDrawArraysIBM, MultiModeDrawArraysIBM@20)
+	GL_STUB(MultiModeDrawElementsIBM, _gloffset_MultiModeDrawElementsIBM, MultiModeDrawElementsIBM@24)
+	GL_STUB(BlendEquationSeparateEXT, _gloffset_BlendEquationSeparateEXT, BlendEquationSeparateEXT@8)
+	GL_STUB(DeleteObjectARB, _gloffset_DeleteObjectARB, DeleteObjectARB@4)
+	GL_STUB(GetHandleARB, _gloffset_GetHandleARB, GetHandleARB@4)
+	GL_STUB(DetachObjectARB, _gloffset_DetachObjectARB, DetachObjectARB@8)
+	GL_STUB(CreateShaderObjectARB, _gloffset_CreateShaderObjectARB, CreateShaderObjectARB@4)
+	GL_STUB(ShaderSourceARB, _gloffset_ShaderSourceARB, ShaderSourceARB@16)
+	GL_STUB(CompileShaderARB, _gloffset_CompileShaderARB, CompileShaderARB@4)
+	GL_STUB(CreateProgramObjectARB, _gloffset_CreateProgramObjectARB, CreateProgramObjectARB@0)
+	GL_STUB(AttachObjectARB, _gloffset_AttachObjectARB, AttachObjectARB@8)
+	GL_STUB(LinkProgramARB, _gloffset_LinkProgramARB, LinkProgramARB@4)
+	GL_STUB(UseProgramObjectARB, _gloffset_UseProgramObjectARB, UseProgramObjectARB@4)
+	GL_STUB(ValidateProgramARB, _gloffset_ValidateProgramARB, ValidateProgramARB@4)
+	GL_STUB(Uniform1fARB, _gloffset_Uniform1fARB, Uniform1fARB@8)
+	GL_STUB(Uniform2fARB, _gloffset_Uniform2fARB, Uniform2fARB@12)
+	GL_STUB(Uniform3fARB, _gloffset_Uniform3fARB, Uniform3fARB@16)
+	GL_STUB(Uniform4fARB, _gloffset_Uniform4fARB, Uniform4fARB@20)
+	GL_STUB(Uniform1iARB, _gloffset_Uniform1iARB, Uniform1iARB@8)
+	GL_STUB(Uniform2iARB, _gloffset_Uniform2iARB, Uniform2iARB@12)
+	GL_STUB(Uniform3iARB, _gloffset_Uniform3iARB, Uniform3iARB@16)
+	GL_STUB(Uniform4iARB, _gloffset_Uniform4iARB, Uniform4iARB@20)
+	GL_STUB(Uniform1fvARB, _gloffset_Uniform1fvARB, Uniform1fvARB@12)
+	GL_STUB(Uniform2fvARB, _gloffset_Uniform2fvARB, Uniform2fvARB@12)
+	GL_STUB(Uniform3fvARB, _gloffset_Uniform3fvARB, Uniform3fvARB@12)
+	GL_STUB(Uniform4fvARB, _gloffset_Uniform4fvARB, Uniform4fvARB@12)
+	GL_STUB(Uniform1ivARB, _gloffset_Uniform1ivARB, Uniform1ivARB@12)
+	GL_STUB(Uniform2ivARB, _gloffset_Uniform2ivARB, Uniform2ivARB@12)
+	GL_STUB(Uniform3ivARB, _gloffset_Uniform3ivARB, Uniform3ivARB@12)
+	GL_STUB(Uniform4ivARB, _gloffset_Uniform4ivARB, Uniform4ivARB@12)
+	GL_STUB(UniformMatrix2fvARB, _gloffset_UniformMatrix2fvARB, UniformMatrix2fvARB@16)
+	GL_STUB(UniformMatrix3fvARB, _gloffset_UniformMatrix3fvARB, UniformMatrix3fvARB@16)
+	GL_STUB(UniformMatrix4fvARB, _gloffset_UniformMatrix4fvARB, UniformMatrix4fvARB@16)
+	GL_STUB(GetObjectParameterfvARB, _gloffset_GetObjectParameterfvARB, GetObjectParameterfvARB@12)
+	GL_STUB(GetObjectParameterivARB, _gloffset_GetObjectParameterivARB, GetObjectParameterivARB@12)
+	GL_STUB(GetInfoLogARB, _gloffset_GetInfoLogARB, GetInfoLogARB@16)
+	GL_STUB(GetAttachedObjectsARB, _gloffset_GetAttachedObjectsARB, GetAttachedObjectsARB@16)
+	GL_STUB(GetUniformLocationARB, _gloffset_GetUniformLocationARB, GetUniformLocationARB@8)
+	GL_STUB(GetActiveUniformARB, _gloffset_GetActiveUniformARB, GetActiveUniformARB@28)
+	GL_STUB(GetUniformfvARB, _gloffset_GetUniformfvARB, GetUniformfvARB@12)
+	GL_STUB(GetUniformivARB, _gloffset_GetUniformivARB, GetUniformivARB@12)
+	GL_STUB(GetShaderSourceARB, _gloffset_GetShaderSourceARB, GetShaderSourceARB@16)
+	GL_STUB(BindAttribLocationARB, _gloffset_BindAttribLocationARB, BindAttribLocationARB@12)
+	GL_STUB(GetActiveAttribARB, _gloffset_GetActiveAttribARB, GetActiveAttribARB@28)
+	GL_STUB(GetAttribLocationARB, _gloffset_GetAttribLocationARB, GetAttribLocationARB@8)
+	GL_STUB(GetVertexAttribdvNV, _gloffset_GetVertexAttribdvNV, GetVertexAttribdvNV@12)
+	GL_STUB(GetVertexAttribfvNV, _gloffset_GetVertexAttribfvNV, GetVertexAttribfvNV@12)
+	GL_STUB(GetVertexAttribivNV, _gloffset_GetVertexAttribivNV, GetVertexAttribivNV@12)
+	GL_STUB(VertexAttrib1dNV, _gloffset_VertexAttrib1dNV, VertexAttrib1dNV@12)
+	GL_STUB(VertexAttrib1dvNV, _gloffset_VertexAttrib1dvNV, VertexAttrib1dvNV@8)
+	GL_STUB(VertexAttrib1fNV, _gloffset_VertexAttrib1fNV, VertexAttrib1fNV@8)
+	GL_STUB(VertexAttrib1fvNV, _gloffset_VertexAttrib1fvNV, VertexAttrib1fvNV@8)
+	GL_STUB(VertexAttrib1sNV, _gloffset_VertexAttrib1sNV, VertexAttrib1sNV@8)
+	GL_STUB(VertexAttrib1svNV, _gloffset_VertexAttrib1svNV, VertexAttrib1svNV@8)
+	GL_STUB(VertexAttrib2dNV, _gloffset_VertexAttrib2dNV, VertexAttrib2dNV@20)
+	GL_STUB(VertexAttrib2dvNV, _gloffset_VertexAttrib2dvNV, VertexAttrib2dvNV@8)
+	GL_STUB(VertexAttrib2fNV, _gloffset_VertexAttrib2fNV, VertexAttrib2fNV@12)
+	GL_STUB(VertexAttrib2fvNV, _gloffset_VertexAttrib2fvNV, VertexAttrib2fvNV@8)
+	GL_STUB(VertexAttrib2sNV, _gloffset_VertexAttrib2sNV, VertexAttrib2sNV@12)
+	GL_STUB(VertexAttrib2svNV, _gloffset_VertexAttrib2svNV, VertexAttrib2svNV@8)
+	GL_STUB(VertexAttrib3dNV, _gloffset_VertexAttrib3dNV, VertexAttrib3dNV@28)
+	GL_STUB(VertexAttrib3dvNV, _gloffset_VertexAttrib3dvNV, VertexAttrib3dvNV@8)
+	GL_STUB(VertexAttrib3fNV, _gloffset_VertexAttrib3fNV, VertexAttrib3fNV@16)
+	GL_STUB(VertexAttrib3fvNV, _gloffset_VertexAttrib3fvNV, VertexAttrib3fvNV@8)
+	GL_STUB(VertexAttrib3sNV, _gloffset_VertexAttrib3sNV, VertexAttrib3sNV@16)
+	GL_STUB(VertexAttrib3svNV, _gloffset_VertexAttrib3svNV, VertexAttrib3svNV@8)
+	GL_STUB(VertexAttrib4dNV, _gloffset_VertexAttrib4dNV, VertexAttrib4dNV@36)
+	GL_STUB(VertexAttrib4dvNV, _gloffset_VertexAttrib4dvNV, VertexAttrib4dvNV@8)
+	GL_STUB(VertexAttrib4fNV, _gloffset_VertexAttrib4fNV, VertexAttrib4fNV@20)
+	GL_STUB(VertexAttrib4fvNV, _gloffset_VertexAttrib4fvNV, VertexAttrib4fvNV@8)
+	GL_STUB(VertexAttrib4sNV, _gloffset_VertexAttrib4sNV, VertexAttrib4sNV@20)
+	GL_STUB(VertexAttrib4svNV, _gloffset_VertexAttrib4svNV, VertexAttrib4svNV@8)
+	GL_STUB(VertexAttrib4ubNV, _gloffset_VertexAttrib4ubNV, VertexAttrib4ubNV@20)
+	GL_STUB(VertexAttrib4ubvNV, _gloffset_VertexAttrib4ubvNV, VertexAttrib4ubvNV@8)
+	GL_STUB(GenFragmentShadersATI, _gloffset_GenFragmentShadersATI, GenFragmentShadersATI@4)
+	GL_STUB(BindFragmentShaderATI, _gloffset_BindFragmentShaderATI, BindFragmentShaderATI@4)
+	GL_STUB(DeleteFragmentShaderATI, _gloffset_DeleteFragmentShaderATI, DeleteFragmentShaderATI@4)
+	GL_STUB(BeginFragmentShaderATI, _gloffset_BeginFragmentShaderATI, BeginFragmentShaderATI@0)
+	GL_STUB(EndFragmentShaderATI, _gloffset_EndFragmentShaderATI, EndFragmentShaderATI@0)
+	GL_STUB(PassTexCoordATI, _gloffset_PassTexCoordATI, PassTexCoordATI@12)
+	GL_STUB(SampleMapATI, _gloffset_SampleMapATI, SampleMapATI@12)
+	GL_STUB(ColorFragmentOp1ATI, _gloffset_ColorFragmentOp1ATI, ColorFragmentOp1ATI@28)
+	GL_STUB(ColorFragmentOp2ATI, _gloffset_ColorFragmentOp2ATI, ColorFragmentOp2ATI@40)
+	GL_STUB(ColorFragmentOp3ATI, _gloffset_ColorFragmentOp3ATI, ColorFragmentOp3ATI@52)
+	GL_STUB(AlphaFragmentOp1ATI, _gloffset_AlphaFragmentOp1ATI, AlphaFragmentOp1ATI@24)
+	GL_STUB(AlphaFragmentOp2ATI, _gloffset_AlphaFragmentOp2ATI, AlphaFragmentOp2ATI@36)
+	GL_STUB(AlphaFragmentOp3ATI, _gloffset_AlphaFragmentOp3ATI, AlphaFragmentOp3ATI@48)
+	GL_STUB(SetFragmentShaderConstantATI, _gloffset_SetFragmentShaderConstantATI, SetFragmentShaderConstantATI@8)
+	GL_STUB(IsRenderbufferEXT, _gloffset_IsRenderbufferEXT, IsRenderbufferEXT@4)
+	GL_STUB(BindRenderbufferEXT, _gloffset_BindRenderbufferEXT, BindRenderbufferEXT@8)
+	GL_STUB(DeleteRenderbuffersEXT, _gloffset_DeleteRenderbuffersEXT, DeleteRenderbuffersEXT@8)
+	GL_STUB(GenRenderbuffersEXT, _gloffset_GenRenderbuffersEXT, GenRenderbuffersEXT@8)
+	GL_STUB(RenderbufferStorageEXT, _gloffset_RenderbufferStorageEXT, RenderbufferStorageEXT@16)
+	GL_STUB(GetRenderbufferParameterivEXT, _gloffset_GetRenderbufferParameterivEXT, GetRenderbufferParameterivEXT@12)
+	GL_STUB(IsFramebufferEXT, _gloffset_IsFramebufferEXT, IsFramebufferEXT@4)
+	GL_STUB(BindFramebufferEXT, _gloffset_BindFramebufferEXT, BindFramebufferEXT@8)
+	GL_STUB(DeleteFramebuffersEXT, _gloffset_DeleteFramebuffersEXT, DeleteFramebuffersEXT@8)
+	GL_STUB(GenFramebuffersEXT, _gloffset_GenFramebuffersEXT, GenFramebuffersEXT@8)
+	GL_STUB(CheckFramebufferStatusEXT, _gloffset_CheckFramebufferStatusEXT, CheckFramebufferStatusEXT@4)
+	GL_STUB(FramebufferTexture1DEXT, _gloffset_FramebufferTexture1DEXT, FramebufferTexture1DEXT@20)
+	GL_STUB(FramebufferTexture2DEXT, _gloffset_FramebufferTexture2DEXT, FramebufferTexture2DEXT@20)
+	GL_STUB(FramebufferTexture3DEXT, _gloffset_FramebufferTexture3DEXT, FramebufferTexture3DEXT@24)
+	GL_STUB(FramebufferRenderbufferEXT, _gloffset_FramebufferRenderbufferEXT, FramebufferRenderbufferEXT@16)
+	GL_STUB(GetFramebufferAttachmentParameterivEXT, _gloffset_GetFramebufferAttachmentParameterivEXT, GetFramebufferAttachmentParameterivEXT@16)
+	GL_STUB(GenerateMipmapEXT, _gloffset_GenerateMipmapEXT, GenerateMipmapEXT@4)
+	GL_STUB(StencilFuncSeparate, _gloffset_StencilFuncSeparate, StencilFuncSeparate@16)
+	GL_STUB(StencilOpSeparate, _gloffset_StencilOpSeparate, StencilOpSeparate@16)
+	GL_STUB(StencilMaskSeparate, _gloffset_StencilMaskSeparate, StencilMaskSeparate@8)
+	GL_STUB_ALIAS(ArrayElementEXT, _gloffset_ArrayElement, ArrayElementEXT@4, ArrayElement, ArrayElement@4)
+	GL_STUB_ALIAS(BindTextureEXT, _gloffset_BindTexture, BindTextureEXT@8, BindTexture, BindTexture@8)
+	GL_STUB_ALIAS(DrawArraysEXT, _gloffset_DrawArrays, DrawArraysEXT@12, DrawArrays, DrawArrays@12)
+	GL_STUB_ALIAS(CopyTexImage1DEXT, _gloffset_CopyTexImage1D, CopyTexImage1DEXT@28, CopyTexImage1D, CopyTexImage1D@28)
+	GL_STUB_ALIAS(CopyTexImage2DEXT, _gloffset_CopyTexImage2D, CopyTexImage2DEXT@32, CopyTexImage2D, CopyTexImage2D@32)
+	GL_STUB_ALIAS(CopyTexSubImage1DEXT, _gloffset_CopyTexSubImage1D, CopyTexSubImage1DEXT@24, CopyTexSubImage1D, CopyTexSubImage1D@24)
+	GL_STUB_ALIAS(CopyTexSubImage2DEXT, _gloffset_CopyTexSubImage2D, CopyTexSubImage2DEXT@32, CopyTexSubImage2D, CopyTexSubImage2D@32)
+	GL_STUB_ALIAS(DeleteTexturesEXT, _gloffset_DeleteTextures, DeleteTexturesEXT@8, DeleteTextures, DeleteTextures@8)
+	GL_STUB_ALIAS(GetPointervEXT, _gloffset_GetPointerv, GetPointervEXT@8, GetPointerv, GetPointerv@8)
+	GL_STUB_ALIAS(PrioritizeTexturesEXT, _gloffset_PrioritizeTextures, PrioritizeTexturesEXT@12, PrioritizeTextures, PrioritizeTextures@12)
+	GL_STUB_ALIAS(TexSubImage1DEXT, _gloffset_TexSubImage1D, TexSubImage1DEXT@28, TexSubImage1D, TexSubImage1D@28)
+	GL_STUB_ALIAS(TexSubImage2DEXT, _gloffset_TexSubImage2D, TexSubImage2DEXT@36, TexSubImage2D, TexSubImage2D@36)
+	GL_STUB_ALIAS(BlendColorEXT, _gloffset_BlendColor, BlendColorEXT@16, BlendColor, BlendColor@16)
+	GL_STUB_ALIAS(BlendEquationEXT, _gloffset_BlendEquation, BlendEquationEXT@4, BlendEquation, BlendEquation@4)
+	GL_STUB_ALIAS(DrawRangeElementsEXT, _gloffset_DrawRangeElements, DrawRangeElementsEXT@24, DrawRangeElements, DrawRangeElements@24)
+	GL_STUB_ALIAS(ColorTableSGI, _gloffset_ColorTable, ColorTableSGI@24, ColorTable, ColorTable@24)
+	GL_STUB_ALIAS(ColorTableEXT, _gloffset_ColorTable, ColorTableEXT@24, ColorTable, ColorTable@24)
+	GL_STUB_ALIAS(ColorTableParameterfvSGI, _gloffset_ColorTableParameterfv, ColorTableParameterfvSGI@12, ColorTableParameterfv, ColorTableParameterfv@12)
+	GL_STUB_ALIAS(ColorTableParameterivSGI, _gloffset_ColorTableParameteriv, ColorTableParameterivSGI@12, ColorTableParameteriv, ColorTableParameteriv@12)
+	GL_STUB_ALIAS(CopyColorTableSGI, _gloffset_CopyColorTable, CopyColorTableSGI@20, CopyColorTable, CopyColorTable@20)
+	GL_STUB_ALIAS(ColorSubTableEXT, _gloffset_ColorSubTable, ColorSubTableEXT@24, ColorSubTable, ColorSubTable@24)
+	GL_STUB_ALIAS(CopyColorSubTableEXT, _gloffset_CopyColorSubTable, CopyColorSubTableEXT@20, CopyColorSubTable, CopyColorSubTable@20)
+	GL_STUB_ALIAS(ConvolutionFilter1DEXT, _gloffset_ConvolutionFilter1D, ConvolutionFilter1DEXT@24, ConvolutionFilter1D, ConvolutionFilter1D@24)
+	GL_STUB_ALIAS(ConvolutionFilter2DEXT, _gloffset_ConvolutionFilter2D, ConvolutionFilter2DEXT@28, ConvolutionFilter2D, ConvolutionFilter2D@28)
+	GL_STUB_ALIAS(ConvolutionParameterfEXT, _gloffset_ConvolutionParameterf, ConvolutionParameterfEXT@12, ConvolutionParameterf, ConvolutionParameterf@12)
+	GL_STUB_ALIAS(ConvolutionParameterfvEXT, _gloffset_ConvolutionParameterfv, ConvolutionParameterfvEXT@12, ConvolutionParameterfv, ConvolutionParameterfv@12)
+	GL_STUB_ALIAS(ConvolutionParameteriEXT, _gloffset_ConvolutionParameteri, ConvolutionParameteriEXT@12, ConvolutionParameteri, ConvolutionParameteri@12)
+	GL_STUB_ALIAS(ConvolutionParameterivEXT, _gloffset_ConvolutionParameteriv, ConvolutionParameterivEXT@12, ConvolutionParameteriv, ConvolutionParameteriv@12)
+	GL_STUB_ALIAS(CopyConvolutionFilter1DEXT, _gloffset_CopyConvolutionFilter1D, CopyConvolutionFilter1DEXT@20, CopyConvolutionFilter1D, CopyConvolutionFilter1D@20)
+	GL_STUB_ALIAS(CopyConvolutionFilter2DEXT, _gloffset_CopyConvolutionFilter2D, CopyConvolutionFilter2DEXT@24, CopyConvolutionFilter2D, CopyConvolutionFilter2D@24)
+	GL_STUB_ALIAS(SeparableFilter2DEXT, _gloffset_SeparableFilter2D, SeparableFilter2DEXT@32, SeparableFilter2D, SeparableFilter2D@32)
+	GL_STUB_ALIAS(HistogramEXT, _gloffset_Histogram, HistogramEXT@16, Histogram, Histogram@16)
+	GL_STUB_ALIAS(MinmaxEXT, _gloffset_Minmax, MinmaxEXT@12, Minmax, Minmax@12)
+	GL_STUB_ALIAS(ResetHistogramEXT, _gloffset_ResetHistogram, ResetHistogramEXT@4, ResetHistogram, ResetHistogram@4)
+	GL_STUB_ALIAS(ResetMinmaxEXT, _gloffset_ResetMinmax, ResetMinmaxEXT@4, ResetMinmax, ResetMinmax@4)
+	GL_STUB_ALIAS(TexImage3DEXT, _gloffset_TexImage3D, TexImage3DEXT@40, TexImage3D, TexImage3D@40)
+	GL_STUB_ALIAS(TexSubImage3DEXT, _gloffset_TexSubImage3D, TexSubImage3DEXT@44, TexSubImage3D, TexSubImage3D@44)
+	GL_STUB_ALIAS(CopyTexSubImage3DEXT, _gloffset_CopyTexSubImage3D, CopyTexSubImage3DEXT@36, CopyTexSubImage3D, CopyTexSubImage3D@36)
+	GL_STUB_ALIAS(ActiveTexture, _gloffset_ActiveTextureARB, ActiveTexture@4, ActiveTextureARB, ActiveTextureARB@4)
+	GL_STUB_ALIAS(ClientActiveTexture, _gloffset_ClientActiveTextureARB, ClientActiveTexture@4, ClientActiveTextureARB, ClientActiveTextureARB@4)
+	GL_STUB_ALIAS(MultiTexCoord1d, _gloffset_MultiTexCoord1dARB, MultiTexCoord1d@12, MultiTexCoord1dARB, MultiTexCoord1dARB@12)
+	GL_STUB_ALIAS(MultiTexCoord1dv, _gloffset_MultiTexCoord1dvARB, MultiTexCoord1dv@8, MultiTexCoord1dvARB, MultiTexCoord1dvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1f, _gloffset_MultiTexCoord1fARB, MultiTexCoord1f@8, MultiTexCoord1fARB, MultiTexCoord1fARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1fv, _gloffset_MultiTexCoord1fvARB, MultiTexCoord1fv@8, MultiTexCoord1fvARB, MultiTexCoord1fvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1i, _gloffset_MultiTexCoord1iARB, MultiTexCoord1i@8, MultiTexCoord1iARB, MultiTexCoord1iARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1iv, _gloffset_MultiTexCoord1ivARB, MultiTexCoord1iv@8, MultiTexCoord1ivARB, MultiTexCoord1ivARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1s, _gloffset_MultiTexCoord1sARB, MultiTexCoord1s@8, MultiTexCoord1sARB, MultiTexCoord1sARB@8)
+	GL_STUB_ALIAS(MultiTexCoord1sv, _gloffset_MultiTexCoord1svARB, MultiTexCoord1sv@8, MultiTexCoord1svARB, MultiTexCoord1svARB@8)
+	GL_STUB_ALIAS(MultiTexCoord2d, _gloffset_MultiTexCoord2dARB, MultiTexCoord2d@20, MultiTexCoord2dARB, MultiTexCoord2dARB@20)
+	GL_STUB_ALIAS(MultiTexCoord2dv, _gloffset_MultiTexCoord2dvARB, MultiTexCoord2dv@8, MultiTexCoord2dvARB, MultiTexCoord2dvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord2f, _gloffset_MultiTexCoord2fARB, MultiTexCoord2f@12, MultiTexCoord2fARB, MultiTexCoord2fARB@12)
+	GL_STUB_ALIAS(MultiTexCoord2fv, _gloffset_MultiTexCoord2fvARB, MultiTexCoord2fv@8, MultiTexCoord2fvARB, MultiTexCoord2fvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord2i, _gloffset_MultiTexCoord2iARB, MultiTexCoord2i@12, MultiTexCoord2iARB, MultiTexCoord2iARB@12)
+	GL_STUB_ALIAS(MultiTexCoord2iv, _gloffset_MultiTexCoord2ivARB, MultiTexCoord2iv@8, MultiTexCoord2ivARB, MultiTexCoord2ivARB@8)
+	GL_STUB_ALIAS(MultiTexCoord2s, _gloffset_MultiTexCoord2sARB, MultiTexCoord2s@12, MultiTexCoord2sARB, MultiTexCoord2sARB@12)
+	GL_STUB_ALIAS(MultiTexCoord2sv, _gloffset_MultiTexCoord2svARB, MultiTexCoord2sv@8, MultiTexCoord2svARB, MultiTexCoord2svARB@8)
+	GL_STUB_ALIAS(MultiTexCoord3d, _gloffset_MultiTexCoord3dARB, MultiTexCoord3d@28, MultiTexCoord3dARB, MultiTexCoord3dARB@28)
+	GL_STUB_ALIAS(MultiTexCoord3dv, _gloffset_MultiTexCoord3dvARB, MultiTexCoord3dv@8, MultiTexCoord3dvARB, MultiTexCoord3dvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord3f, _gloffset_MultiTexCoord3fARB, MultiTexCoord3f@16, MultiTexCoord3fARB, MultiTexCoord3fARB@16)
+	GL_STUB_ALIAS(MultiTexCoord3fv, _gloffset_MultiTexCoord3fvARB, MultiTexCoord3fv@8, MultiTexCoord3fvARB, MultiTexCoord3fvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord3i, _gloffset_MultiTexCoord3iARB, MultiTexCoord3i@16, MultiTexCoord3iARB, MultiTexCoord3iARB@16)
+	GL_STUB_ALIAS(MultiTexCoord3iv, _gloffset_MultiTexCoord3ivARB, MultiTexCoord3iv@8, MultiTexCoord3ivARB, MultiTexCoord3ivARB@8)
+	GL_STUB_ALIAS(MultiTexCoord3s, _gloffset_MultiTexCoord3sARB, MultiTexCoord3s@16, MultiTexCoord3sARB, MultiTexCoord3sARB@16)
+	GL_STUB_ALIAS(MultiTexCoord3sv, _gloffset_MultiTexCoord3svARB, MultiTexCoord3sv@8, MultiTexCoord3svARB, MultiTexCoord3svARB@8)
+	GL_STUB_ALIAS(MultiTexCoord4d, _gloffset_MultiTexCoord4dARB, MultiTexCoord4d@36, MultiTexCoord4dARB, MultiTexCoord4dARB@36)
+	GL_STUB_ALIAS(MultiTexCoord4dv, _gloffset_MultiTexCoord4dvARB, MultiTexCoord4dv@8, MultiTexCoord4dvARB, MultiTexCoord4dvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord4f, _gloffset_MultiTexCoord4fARB, MultiTexCoord4f@20, MultiTexCoord4fARB, MultiTexCoord4fARB@20)
+	GL_STUB_ALIAS(MultiTexCoord4fv, _gloffset_MultiTexCoord4fvARB, MultiTexCoord4fv@8, MultiTexCoord4fvARB, MultiTexCoord4fvARB@8)
+	GL_STUB_ALIAS(MultiTexCoord4i, _gloffset_MultiTexCoord4iARB, MultiTexCoord4i@20, MultiTexCoord4iARB, MultiTexCoord4iARB@20)
+	GL_STUB_ALIAS(MultiTexCoord4iv, _gloffset_MultiTexCoord4ivARB, MultiTexCoord4iv@8, MultiTexCoord4ivARB, MultiTexCoord4ivARB@8)
+	GL_STUB_ALIAS(MultiTexCoord4s, _gloffset_MultiTexCoord4sARB, MultiTexCoord4s@20, MultiTexCoord4sARB, MultiTexCoord4sARB@20)
+	GL_STUB_ALIAS(MultiTexCoord4sv, _gloffset_MultiTexCoord4svARB, MultiTexCoord4sv@8, MultiTexCoord4svARB, MultiTexCoord4svARB@8)
+	GL_STUB_ALIAS(LoadTransposeMatrixf, _gloffset_LoadTransposeMatrixfARB, LoadTransposeMatrixf@4, LoadTransposeMatrixfARB, LoadTransposeMatrixfARB@4)
+	GL_STUB_ALIAS(LoadTransposeMatrixd, _gloffset_LoadTransposeMatrixdARB, LoadTransposeMatrixd@4, LoadTransposeMatrixdARB, LoadTransposeMatrixdARB@4)
+	GL_STUB_ALIAS(MultTransposeMatrixf, _gloffset_MultTransposeMatrixfARB, MultTransposeMatrixf@4, MultTransposeMatrixfARB, MultTransposeMatrixfARB@4)
+	GL_STUB_ALIAS(MultTransposeMatrixd, _gloffset_MultTransposeMatrixdARB, MultTransposeMatrixd@4, MultTransposeMatrixdARB, MultTransposeMatrixdARB@4)
+	GL_STUB_ALIAS(SampleCoverage, _gloffset_SampleCoverageARB, SampleCoverage@8, SampleCoverageARB, SampleCoverageARB@8)
+	GL_STUB_ALIAS(DrawBuffersATI, _gloffset_DrawBuffersARB, DrawBuffersATI@8, DrawBuffersARB, DrawBuffersARB@8)
+	GL_STUB_ALIAS(SampleMaskEXT, _gloffset_SampleMaskSGIS, SampleMaskEXT@8, SampleMaskSGIS, SampleMaskSGIS@8)
+	GL_STUB_ALIAS(SamplePatternEXT, _gloffset_SamplePatternSGIS, SamplePatternEXT@4, SamplePatternSGIS, SamplePatternSGIS@4)
+	GL_STUB_ALIAS(PointParameterf, _gloffset_PointParameterfEXT, PointParameterf@8, PointParameterfEXT, PointParameterfEXT@8)
+	GL_STUB_ALIAS(PointParameterfARB, _gloffset_PointParameterfEXT, PointParameterfARB@8, PointParameterfEXT, PointParameterfEXT@8)
+	GL_STUB_ALIAS(PointParameterfSGIS, _gloffset_PointParameterfEXT, PointParameterfSGIS@8, PointParameterfEXT, PointParameterfEXT@8)
+	GL_STUB_ALIAS(PointParameterfv, _gloffset_PointParameterfvEXT, PointParameterfv@8, PointParameterfvEXT, PointParameterfvEXT@8)
+	GL_STUB_ALIAS(PointParameterfvARB, _gloffset_PointParameterfvEXT, PointParameterfvARB@8, PointParameterfvEXT, PointParameterfvEXT@8)
+	GL_STUB_ALIAS(PointParameterfvSGIS, _gloffset_PointParameterfvEXT, PointParameterfvSGIS@8, PointParameterfvEXT, PointParameterfvEXT@8)
+	GL_STUB_ALIAS(WindowPos2d, _gloffset_WindowPos2dMESA, WindowPos2d@16, WindowPos2dMESA, WindowPos2dMESA@16)
+	GL_STUB_ALIAS(WindowPos2dARB, _gloffset_WindowPos2dMESA, WindowPos2dARB@16, WindowPos2dMESA, WindowPos2dMESA@16)
+	GL_STUB_ALIAS(WindowPos2dv, _gloffset_WindowPos2dvMESA, WindowPos2dv@4, WindowPos2dvMESA, WindowPos2dvMESA@4)
+	GL_STUB_ALIAS(WindowPos2dvARB, _gloffset_WindowPos2dvMESA, WindowPos2dvARB@4, WindowPos2dvMESA, WindowPos2dvMESA@4)
+	GL_STUB_ALIAS(WindowPos2f, _gloffset_WindowPos2fMESA, WindowPos2f@8, WindowPos2fMESA, WindowPos2fMESA@8)
+	GL_STUB_ALIAS(WindowPos2fARB, _gloffset_WindowPos2fMESA, WindowPos2fARB@8, WindowPos2fMESA, WindowPos2fMESA@8)
+	GL_STUB_ALIAS(WindowPos2fv, _gloffset_WindowPos2fvMESA, WindowPos2fv@4, WindowPos2fvMESA, WindowPos2fvMESA@4)
+	GL_STUB_ALIAS(WindowPos2fvARB, _gloffset_WindowPos2fvMESA, WindowPos2fvARB@4, WindowPos2fvMESA, WindowPos2fvMESA@4)
+	GL_STUB_ALIAS(WindowPos2i, _gloffset_WindowPos2iMESA, WindowPos2i@8, WindowPos2iMESA, WindowPos2iMESA@8)
+	GL_STUB_ALIAS(WindowPos2iARB, _gloffset_WindowPos2iMESA, WindowPos2iARB@8, WindowPos2iMESA, WindowPos2iMESA@8)
+	GL_STUB_ALIAS(WindowPos2iv, _gloffset_WindowPos2ivMESA, WindowPos2iv@4, WindowPos2ivMESA, WindowPos2ivMESA@4)
+	GL_STUB_ALIAS(WindowPos2ivARB, _gloffset_WindowPos2ivMESA, WindowPos2ivARB@4, WindowPos2ivMESA, WindowPos2ivMESA@4)
+	GL_STUB_ALIAS(WindowPos2s, _gloffset_WindowPos2sMESA, WindowPos2s@8, WindowPos2sMESA, WindowPos2sMESA@8)
+	GL_STUB_ALIAS(WindowPos2sARB, _gloffset_WindowPos2sMESA, WindowPos2sARB@8, WindowPos2sMESA, WindowPos2sMESA@8)
+	GL_STUB_ALIAS(WindowPos2sv, _gloffset_WindowPos2svMESA, WindowPos2sv@4, WindowPos2svMESA, WindowPos2svMESA@4)
+	GL_STUB_ALIAS(WindowPos2svARB, _gloffset_WindowPos2svMESA, WindowPos2svARB@4, WindowPos2svMESA, WindowPos2svMESA@4)
+	GL_STUB_ALIAS(WindowPos3d, _gloffset_WindowPos3dMESA, WindowPos3d@24, WindowPos3dMESA, WindowPos3dMESA@24)
+	GL_STUB_ALIAS(WindowPos3dARB, _gloffset_WindowPos3dMESA, WindowPos3dARB@24, WindowPos3dMESA, WindowPos3dMESA@24)
+	GL_STUB_ALIAS(WindowPos3dv, _gloffset_WindowPos3dvMESA, WindowPos3dv@4, WindowPos3dvMESA, WindowPos3dvMESA@4)
+	GL_STUB_ALIAS(WindowPos3dvARB, _gloffset_WindowPos3dvMESA, WindowPos3dvARB@4, WindowPos3dvMESA, WindowPos3dvMESA@4)
+	GL_STUB_ALIAS(WindowPos3f, _gloffset_WindowPos3fMESA, WindowPos3f@12, WindowPos3fMESA, WindowPos3fMESA@12)
+	GL_STUB_ALIAS(WindowPos3fARB, _gloffset_WindowPos3fMESA, WindowPos3fARB@12, WindowPos3fMESA, WindowPos3fMESA@12)
+	GL_STUB_ALIAS(WindowPos3fv, _gloffset_WindowPos3fvMESA, WindowPos3fv@4, WindowPos3fvMESA, WindowPos3fvMESA@4)
+	GL_STUB_ALIAS(WindowPos3fvARB, _gloffset_WindowPos3fvMESA, WindowPos3fvARB@4, WindowPos3fvMESA, WindowPos3fvMESA@4)
+	GL_STUB_ALIAS(WindowPos3i, _gloffset_WindowPos3iMESA, WindowPos3i@12, WindowPos3iMESA, WindowPos3iMESA@12)
+	GL_STUB_ALIAS(WindowPos3iARB, _gloffset_WindowPos3iMESA, WindowPos3iARB@12, WindowPos3iMESA, WindowPos3iMESA@12)
+	GL_STUB_ALIAS(WindowPos3iv, _gloffset_WindowPos3ivMESA, WindowPos3iv@4, WindowPos3ivMESA, WindowPos3ivMESA@4)
+	GL_STUB_ALIAS(WindowPos3ivARB, _gloffset_WindowPos3ivMESA, WindowPos3ivARB@4, WindowPos3ivMESA, WindowPos3ivMESA@4)
+	GL_STUB_ALIAS(WindowPos3s, _gloffset_WindowPos3sMESA, WindowPos3s@12, WindowPos3sMESA, WindowPos3sMESA@12)
+	GL_STUB_ALIAS(WindowPos3sARB, _gloffset_WindowPos3sMESA, WindowPos3sARB@12, WindowPos3sMESA, WindowPos3sMESA@12)
+	GL_STUB_ALIAS(WindowPos3sv, _gloffset_WindowPos3svMESA, WindowPos3sv@4, WindowPos3svMESA, WindowPos3svMESA@4)
+	GL_STUB_ALIAS(WindowPos3svARB, _gloffset_WindowPos3svMESA, WindowPos3svARB@4, WindowPos3svMESA, WindowPos3svMESA@4)
+	GL_STUB_ALIAS(BlendFuncSeparate, _gloffset_BlendFuncSeparateEXT, BlendFuncSeparate@16, BlendFuncSeparateEXT, BlendFuncSeparateEXT@16)
+	GL_STUB_ALIAS(BlendFuncSeparateINGR, _gloffset_BlendFuncSeparateEXT, BlendFuncSeparateINGR@16, BlendFuncSeparateEXT, BlendFuncSeparateEXT@16)
+	GL_STUB_ALIAS(FogCoordf, _gloffset_FogCoordfEXT, FogCoordf@4, FogCoordfEXT, FogCoordfEXT@4)
+	GL_STUB_ALIAS(FogCoordfv, _gloffset_FogCoordfvEXT, FogCoordfv@4, FogCoordfvEXT, FogCoordfvEXT@4)
+	GL_STUB_ALIAS(FogCoordd, _gloffset_FogCoorddEXT, FogCoordd@8, FogCoorddEXT, FogCoorddEXT@8)
+	GL_STUB_ALIAS(FogCoorddv, _gloffset_FogCoorddvEXT, FogCoorddv@4, FogCoorddvEXT, FogCoorddvEXT@4)
+	GL_STUB_ALIAS(FogCoordPointer, _gloffset_FogCoordPointerEXT, FogCoordPointer@12, FogCoordPointerEXT, FogCoordPointerEXT@12)
+	GL_STUB_ALIAS(CompressedTexImage3D, _gloffset_CompressedTexImage3DARB, CompressedTexImage3D@36, CompressedTexImage3DARB, CompressedTexImage3DARB@36)
+	GL_STUB_ALIAS(CompressedTexImage2D, _gloffset_CompressedTexImage2DARB, CompressedTexImage2D@32, CompressedTexImage2DARB, CompressedTexImage2DARB@32)
+	GL_STUB_ALIAS(CompressedTexImage1D, _gloffset_CompressedTexImage1DARB, CompressedTexImage1D@28, CompressedTexImage1DARB, CompressedTexImage1DARB@28)
+	GL_STUB_ALIAS(CompressedTexSubImage3D, _gloffset_CompressedTexSubImage3DARB, CompressedTexSubImage3D@44, CompressedTexSubImage3DARB, CompressedTexSubImage3DARB@44)
+	GL_STUB_ALIAS(CompressedTexSubImage2D, _gloffset_CompressedTexSubImage2DARB, CompressedTexSubImage2D@36, CompressedTexSubImage2DARB, CompressedTexSubImage2DARB@36)
+	GL_STUB_ALIAS(CompressedTexSubImage1D, _gloffset_CompressedTexSubImage1DARB, CompressedTexSubImage1D@28, CompressedTexSubImage1DARB, CompressedTexSubImage1DARB@28)
+	GL_STUB_ALIAS(GetCompressedTexImage, _gloffset_GetCompressedTexImageARB, GetCompressedTexImage@12, GetCompressedTexImageARB, GetCompressedTexImageARB@12)
+	GL_STUB_ALIAS(SecondaryColor3b, _gloffset_SecondaryColor3bEXT, SecondaryColor3b@12, SecondaryColor3bEXT, SecondaryColor3bEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3bv, _gloffset_SecondaryColor3bvEXT, SecondaryColor3bv@4, SecondaryColor3bvEXT, SecondaryColor3bvEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3d, _gloffset_SecondaryColor3dEXT, SecondaryColor3d@24, SecondaryColor3dEXT, SecondaryColor3dEXT@24)
+	GL_STUB_ALIAS(SecondaryColor3dv, _gloffset_SecondaryColor3dvEXT, SecondaryColor3dv@4, SecondaryColor3dvEXT, SecondaryColor3dvEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3f, _gloffset_SecondaryColor3fEXT, SecondaryColor3f@12, SecondaryColor3fEXT, SecondaryColor3fEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3fv, _gloffset_SecondaryColor3fvEXT, SecondaryColor3fv@4, SecondaryColor3fvEXT, SecondaryColor3fvEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3i, _gloffset_SecondaryColor3iEXT, SecondaryColor3i@12, SecondaryColor3iEXT, SecondaryColor3iEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3iv, _gloffset_SecondaryColor3ivEXT, SecondaryColor3iv@4, SecondaryColor3ivEXT, SecondaryColor3ivEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3s, _gloffset_SecondaryColor3sEXT, SecondaryColor3s@12, SecondaryColor3sEXT, SecondaryColor3sEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3sv, _gloffset_SecondaryColor3svEXT, SecondaryColor3sv@4, SecondaryColor3svEXT, SecondaryColor3svEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3ub, _gloffset_SecondaryColor3ubEXT, SecondaryColor3ub@12, SecondaryColor3ubEXT, SecondaryColor3ubEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3ubv, _gloffset_SecondaryColor3ubvEXT, SecondaryColor3ubv@4, SecondaryColor3ubvEXT, SecondaryColor3ubvEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3ui, _gloffset_SecondaryColor3uiEXT, SecondaryColor3ui@12, SecondaryColor3uiEXT, SecondaryColor3uiEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3uiv, _gloffset_SecondaryColor3uivEXT, SecondaryColor3uiv@4, SecondaryColor3uivEXT, SecondaryColor3uivEXT@4)
+	GL_STUB_ALIAS(SecondaryColor3us, _gloffset_SecondaryColor3usEXT, SecondaryColor3us@12, SecondaryColor3usEXT, SecondaryColor3usEXT@12)
+	GL_STUB_ALIAS(SecondaryColor3usv, _gloffset_SecondaryColor3usvEXT, SecondaryColor3usv@4, SecondaryColor3usvEXT, SecondaryColor3usvEXT@4)
+	GL_STUB_ALIAS(SecondaryColorPointer, _gloffset_SecondaryColorPointerEXT, SecondaryColorPointer@16, SecondaryColorPointerEXT, SecondaryColorPointerEXT@16)
+	GL_STUB_ALIAS(BindProgramARB, _gloffset_BindProgramNV, BindProgramARB@8, BindProgramNV, BindProgramNV@8)
+	GL_STUB_ALIAS(DeleteProgramsARB, _gloffset_DeleteProgramsNV, DeleteProgramsARB@8, DeleteProgramsNV, DeleteProgramsNV@8)
+	GL_STUB_ALIAS(GenProgramsARB, _gloffset_GenProgramsNV, GenProgramsARB@8, GenProgramsNV, GenProgramsNV@8)
+	GL_STUB_ALIAS(GetVertexAttribPointervARB, _gloffset_GetVertexAttribPointervNV, GetVertexAttribPointervARB@12, GetVertexAttribPointervNV, GetVertexAttribPointervNV@12)
+	GL_STUB_ALIAS(IsProgramARB, _gloffset_IsProgramNV, IsProgramARB@4, IsProgramNV, IsProgramNV@4)
+	GL_STUB_ALIAS(PointParameteri, _gloffset_PointParameteriNV, PointParameteri@8, PointParameteriNV, PointParameteriNV@8)
+	GL_STUB_ALIAS(PointParameteriv, _gloffset_PointParameterivNV, PointParameteriv@8, PointParameterivNV, PointParameterivNV@8)
+	GL_STUB_ALIAS(MultiDrawArrays, _gloffset_MultiDrawArraysEXT, MultiDrawArrays@16, MultiDrawArraysEXT, MultiDrawArraysEXT@16)
+	GL_STUB_ALIAS(MultiDrawElements, _gloffset_MultiDrawElementsEXT, MultiDrawElements@20, MultiDrawElementsEXT, MultiDrawElementsEXT@20)
+	GL_STUB_ALIAS(BindBuffer, _gloffset_BindBufferARB, BindBuffer@8, BindBufferARB, BindBufferARB@8)
+	GL_STUB_ALIAS(BufferData, _gloffset_BufferDataARB, BufferData@16, BufferDataARB, BufferDataARB@16)
+	GL_STUB_ALIAS(BufferSubData, _gloffset_BufferSubDataARB, BufferSubData@16, BufferSubDataARB, BufferSubDataARB@16)
+	GL_STUB_ALIAS(DeleteBuffers, _gloffset_DeleteBuffersARB, DeleteBuffers@8, DeleteBuffersARB, DeleteBuffersARB@8)
+	GL_STUB_ALIAS(GenBuffers, _gloffset_GenBuffersARB, GenBuffers@8, GenBuffersARB, GenBuffersARB@8)
+	GL_STUB_ALIAS(GetBufferParameteriv, _gloffset_GetBufferParameterivARB, GetBufferParameteriv@12, GetBufferParameterivARB, GetBufferParameterivARB@12)
+	GL_STUB_ALIAS(GetBufferPointerv, _gloffset_GetBufferPointervARB, GetBufferPointerv@12, GetBufferPointervARB, GetBufferPointervARB@12)
+	GL_STUB_ALIAS(GetBufferSubData, _gloffset_GetBufferSubDataARB, GetBufferSubData@16, GetBufferSubDataARB, GetBufferSubDataARB@16)
+	GL_STUB_ALIAS(IsBuffer, _gloffset_IsBufferARB, IsBuffer@4, IsBufferARB, IsBufferARB@4)
+	GL_STUB_ALIAS(MapBuffer, _gloffset_MapBufferARB, MapBuffer@8, MapBufferARB, MapBufferARB@8)
+	GL_STUB_ALIAS(UnmapBuffer, _gloffset_UnmapBufferARB, UnmapBuffer@4, UnmapBufferARB, UnmapBufferARB@4)
+	GL_STUB_ALIAS(GenQueries, _gloffset_GenQueriesARB, GenQueries@8, GenQueriesARB, GenQueriesARB@8)
+	GL_STUB_ALIAS(DeleteQueries, _gloffset_DeleteQueriesARB, DeleteQueries@8, DeleteQueriesARB, DeleteQueriesARB@8)
+	GL_STUB_ALIAS(IsQuery, _gloffset_IsQueryARB, IsQuery@4, IsQueryARB, IsQueryARB@4)
+	GL_STUB_ALIAS(BeginQuery, _gloffset_BeginQueryARB, BeginQuery@8, BeginQueryARB, BeginQueryARB@8)
+	GL_STUB_ALIAS(EndQuery, _gloffset_EndQueryARB, EndQuery@4, EndQueryARB, EndQueryARB@4)
+	GL_STUB_ALIAS(GetQueryiv, _gloffset_GetQueryivARB, GetQueryiv@12, GetQueryivARB, GetQueryivARB@12)
+	GL_STUB_ALIAS(GetQueryObjectiv, _gloffset_GetQueryObjectivARB, GetQueryObjectiv@12, GetQueryObjectivARB, GetQueryObjectivARB@12)
+	GL_STUB_ALIAS(GetQueryObjectuiv, _gloffset_GetQueryObjectuivARB, GetQueryObjectuiv@12, GetQueryObjectuivARB, GetQueryObjectuivARB@12)
+	GL_STUB_ALIAS(BlendEquationSeparateATI, _gloffset_BlendEquationSeparateEXT, BlendEquationSeparateATI@8, BlendEquationSeparateEXT, BlendEquationSeparateEXT@8)
+
+		GLOBL	GLNAME(gl_dispatch_functions_end)
+		HIDDEN(GLNAME(gl_dispatch_functions_end))
+		ALIGNTEXT16
+GLNAME(gl_dispatch_functions_end):
+
+#if defined(GLX_USE_TLS) && defined(__linux__)
+	.section ".note.ABI-tag", "a"
+	.p2align 2
+	.long	1f - 0f   /* name length */
+	.long	3f - 2f   /* data length */
+	.long	1         /* note length */
+0:	.asciz "GNU"      /* vendor name */
+1:	.p2align 2
+2:	.long	0         /* note data: the ABI tag */
+	.long	2,4,20    /* Minimum kernel version w/TLS */
+3:	.p2align 2        /* pad out section */
+#endif /* GLX_USE_TLS */
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/mmx.h b/nx-X11/extras/Mesa/src/mesa/x86/mmx.h
new file mode 100644
index 000000000..b3a096725
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/mmx.h
@@ -0,0 +1,51 @@
+/* $Id: mmx.h,v 1.1.1.1 2004/06/16 09:19:36 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef ASM_MMX_H
+#define ASM_MMX_H
+
+extern void _ASMAPI
+_mesa_mmx_blend_transparency( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                              GLubyte rgba[][4], const GLubyte dest[][4] );
+
+extern void _ASMAPI
+_mesa_mmx_blend_add( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                     GLubyte rgba[][4], const GLubyte dest[][4] );
+
+extern void _ASMAPI
+_mesa_mmx_blend_min( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                     GLubyte rgba[][4], const GLubyte dest[][4] );
+
+extern void _ASMAPI
+_mesa_mmx_blend_max( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                     GLubyte rgba[][4], const GLubyte dest[][4] );
+
+extern void _ASMAPI
+_mesa_mmx_blend_modulate( GLcontext *ctx, GLuint n, const GLubyte mask[],
+                          GLubyte rgba[][4], const GLubyte dest[][4] );
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/mmx_blend.S b/nx-X11/extras/Mesa/src/mesa/x86/mmx_blend.S
new file mode 100644
index 000000000..f7326cdbe
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/mmx_blend.S
@@ -0,0 +1,366 @@
+
+/*
+ * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+#ifdef USE_MMX_ASM
+#include "matypes.h"
+
+/* integer multiplication - alpha plus one
+ *
+ * makes the following approximation to the division (Sree)
+ *
+ *   rgb*a/255 ~= (rgb*(a+1)) >> 256
+ *
+ * which is the fastest method that satisfies the following OpenGL criteria
+ *
+ *   0*0 = 0 and 255*255 = 255
+ *
+ * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
+ *
+ *   PCMPEQW    ( MX1, MX1 )
+ */
+#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
+    PSUBW      ( MX1, MA1 )			/*   a1 + 1  |   a1 + 1  |   a1 + 1  |   a1 + 1  */	;\
+    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
+													;\
+TWO(PSUBW      ( MX1, MA2 ))			/*   a2 + 1  |   a2 + 1  |   a2 + 1  |   a2 + 1  */	;\
+TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
+													;\
+    PSRLW      ( CONST(8), MA1 )		/*               t1 >> 8 ~= t1/255               */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*               t2 >> 8 ~= t2/255               */	
+
+
+/* integer multiplication - geometric series
+ *
+ * takes the geometric series approximation to the division
+ *
+ *   t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
+ *
+ * in this case just the first two terms to fit in 16bit arithmetic
+ *
+ *   t/255 ~= (t + (t >> 8)) >> 8
+ *
+ * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, 
+ * so the special case a = 255 must be accounted or roundoff must be used
+ */
+#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
+    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
+TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
+													;\
+    MOVQ       ( MA1, MP1 )										;\
+    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
+													;\
+TWO(MOVQ       ( MA2, MP2 ))										;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
+													;\
+    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
+    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
+													;\
+TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
+
+
+/* integer multiplication - geometric series plus rounding
+ *
+ * when using a geometric series division instead of truncating the result 
+ * use roundoff in the approximation (Jim Blinn)
+ *
+ *   t = rgb*a + 0x80
+ *
+ * achieving the exact results
+ *
+ * note that M80 is register with the 0x0080008000800080 constant
+ */
+#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
+    PMULLW     ( MP1, MA1 )			/*                  t1 = p1*a1                   */	;\
+    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
+													;\
+TWO(PMULLW     ( MP2, MA2 ))			/*                  t2 = p2*a2                   */	;\
+TWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
+													;\
+    MOVQ       ( MA1, MP1 )										;\
+    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
+													;\
+TWO(MOVQ       ( MA2, MP2 ))										;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
+													;\
+    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
+    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
+													;\
+TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
+
+
+/* linear interpolation - geometric series 
+ */
+#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
+    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
+    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
+													;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
+TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
+TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
+													;\
+    MOVQ       ( MA1, MP1 )										;\
+    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
+													;\
+TWO(MOVQ       ( MA2, MP2 ))										;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
+													;\
+    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
+TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
+													;\
+    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
+TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
+													;\
+    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
+
+
+/* linear interpolation - geometric series with roundoff
+ *
+ * this is a generalization of Blinn's formula to signed arithmetic
+ *
+ * note that M80 is a register with the 0x0080008000800080 constant
+ */
+#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
+    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
+    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
+													;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
+TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
+TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
+													;\
+    PSRLW      ( CONST(15), MP1 )		/*                 q1 > p1 ? 1 : 0               */	;\
+TWO(PSRLW      ( CONST(15), MP2 ))		/*                 q2 > q2 ? 1 : 0               */	;\
+													;\
+    PSLLW      ( CONST(8), MP1 )		/*             q1 > p1 ? 0x100 : 0               */	;\
+TWO(PSLLW      ( CONST(8), MP2 ))		/*             q2 > q2 ? 0x100 : 0               */	;\
+													;\
+    PSUBW      ( MP1, MA1 )			/*                  t1 -=? 0x100                 */	;\
+TWO(PSUBW      ( MP2, MA2 ))			/*                  t2 -=? 0x100                 */	;\
+ 													;\
+    PADDW      ( M80, MA1 )			/*                 t1 += 0x80                    */	;\
+TWO(PADDW      ( M80, MA2 ))			/*                 t2 += 0x80                    */	;\
+													;\
+    MOVQ       ( MA1, MP1 )										;\
+    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
+													;\
+TWO(MOVQ       ( MA2, MP2 ))										;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
+													;\
+    PADDW      ( MP1, MA1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
+TWO(PADDW      ( MP2, MA2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
+													;\
+    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
+TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
+													;\
+    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
+
+
+/* linear interpolation - geometric series with correction
+ *
+ * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
+ *
+ *   t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
+ *
+ * note that although is faster than rounding off it doesn't give always the exact results
+ */
+#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
+    PSUBW      ( MQ1, MP1 )                     /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */	;\
+    PSLLW      ( CONST(8), MQ1 )		/*                    q1 << 8                    */	;\
+    PMULLW     ( MP1, MA1 )			/*              t1 = (q1 - p1)*pa1               */	;\
+													;\
+TWO(PSUBW      ( MQ2, MP2 ))                    /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */	;\
+TWO(PSLLW      ( CONST(8), MQ2 ))		/*                    q2 << 8                    */	;\
+TWO(PMULLW     ( MP2, MA2 ))			/*              t2 = (q2 - p2)*pa2               */	;\
+													;\
+    MOVQ       ( MA1, MP1 )										;\
+    PSRLW      ( CONST(8), MA1 )		/*                    t1 >> 8                    */	;\
+													;\
+TWO(MOVQ       ( MA2, MP2 ))										;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*                    t2 >> 8                    */	;\
+													;\
+    PADDW      ( MA1, MP1 )			/*        t1 + (t1 >> 8) ~= (t1/255) << 8        */	;\
+    PSRLW      ( CONST(7), MA1 )		/*                    t1 >> 15                   */	;\
+													;\
+TWO(PADDW      ( MA2, MP2 ))			/*        t2 + (t2 >> 8) ~= (t2/255) << 8        */	;\
+TWO(PSRLW      ( CONST(7), MA2 ))		/*                    t2 >> 15                   */	;\
+													;\
+    PADDW      ( MP1, MA1 )			/*  t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8  */	;\
+TWO(PADDW      ( MP2, MA2 ))			/*  t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8  */	;\
+													;\
+    PADDW      ( MQ1, MA1 )			/*              (t1/255 + q1) << 8               */	;\
+TWO(PADDW      ( MQ2, MA2 ))			/*              (t2/255 + q2) << 8               */	;\
+													;\
+    PSRLW      ( CONST(8), MA1 )		/*    sa1    |    sb1    |    sg1    |    sr1    */	;\
+TWO(PSRLW      ( CONST(8), MA2 ))		/*    sa2    |    sb2    |    sg2    |    sr2    */
+
+
+/* common blending setup code
+ *
+ * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
+ *
+ *   PXOR      ( M00, M00 )
+ */
+#define GMB_LOAD(rgba, dest, MPP, MQQ) \
+ONE(MOVD       ( REGIND(rgba), MPP ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
+ONE(MOVD       ( REGIND(dest), MQQ ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
+													;\
+TWO(MOVQ       ( REGIND(rgba), MPP ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
+TWO(MOVQ       ( REGIND(dest), MQQ ))		/* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
+
+#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
+TWO(MOVQ       ( MP1, MP2 ))										;\
+TWO(MOVQ       ( MQ1, MQ2 ))										;\
+													;\
+    PUNPCKLBW  ( M00, MQ1 )			/*    qa1    |    qb1    |    qg1    |    qr1    */	;\
+TWO(PUNPCKHBW  ( M00, MQ2 ))                    /*    qa2    |    qb2    |    qg2    |    qr2    */	;\
+    PUNPCKLBW  ( M00, MP1 )			/*    pa1    |    pb1    |    pg1    |    pr1    */	;\
+TWO(PUNPCKHBW  ( M00, MP2 ))                    /*    pa2    |    pb2    |    pg2    |    pr2    */
+
+#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
+    MOVQ       ( MP1, MA1 )										;\
+TWO(MOVQ       ( MP2, MA2 ))										;\
+													;\
+    PUNPCKHWD  ( MA1, MA1 )			/*    pa1    |    pa1    |           |           */	;\
+TWO(PUNPCKHWD  ( MA2, MA2 ))			/*    pa2    |    pa2    |           |           */	;\
+    PUNPCKHDQ  ( MA1, MA1 )                     /*    pa1    |    pa1    |    pa1    |    pa1    */	;\
+TWO(PUNPCKHDQ  ( MA2, MA2 ))                    /*    pa2    |    pa2    |    pa2    |    pa2    */
+
+#define GMB_PACK( MS1, MS2 ) \
+    PACKUSWB   ( MS2, MS1 )			/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
+
+#define GMB_STORE(rgba, MSS ) \
+ONE(MOVD       ( MSS, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
+TWO(MOVQ       ( MSS, REGIND(rgba) ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
+
+
+    SEG_DATA
+
+ALIGNDATA8
+const_0080:
+    D_LONG 0x00800080, 0x00800080
+
+const_80:
+    D_LONG 0x80808080, 0x80808080
+
+    SEG_TEXT
+
+
+/* Blend transparency function
+ */
+
+#define TAG(x) CONCAT(x,_transparency)
+#define LLTAG(x) LLBL2(x,_transparency)
+
+#define INIT \
+    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */
+
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
+    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
+    GMB_ALPHA( MM1, MM3, MM4, MM6 )									;\
+    GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 )							;\
+    GMB_PACK( MM3, MM6 )										;\
+    GMB_STORE( rgba, MM3 )
+
+#include "mmx_blendtmp.h"
+
+
+/* Blend add function
+ *
+ * FIXME: Add some loop unrolling here...
+ */
+
+#define TAG(x) CONCAT(x,_add)
+#define LLTAG(x) LLBL2(x,_add)
+
+#define INIT
+
+#define MAIN( rgba, dest ) \
+ONE(MOVD       ( REGIND(rgba), MM1 ))		/*     |     |     |     | qa1 | qb1 | qg1 | qr1 */	;\
+ONE(MOVD       ( REGIND(dest), MM2 ))		/*     |     |     |     | pa1 | pb1 | pg1 | pr1 */	;\
+ONE(PADDUSB    ( MM2, MM1 ))										;\
+ONE(MOVD       ( MM1, REGIND(rgba) ))		/*     |     |     |     | sa1 | sb1 | sg1 | sr1 */	;\
+													;\
+TWO(MOVQ       ( REGIND(rgba), MM1 ))		/* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */	;\
+TWO(PADDUSB    ( REGIND(dest), MM1 ))		/* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */	;\
+TWO(MOVQ       ( MM1, REGIND(rgba) ))
+
+#include "mmx_blendtmp.h"
+
+
+/* Blend min function
+ */
+
+#define TAG(x) CONCAT(x,_min)
+#define LLTAG(x) LLBL2(x,_min)
+
+#define INIT \
+    MOVQ       ( CONTENT(const_80), MM7 )	/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/
+
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
+    MOVQ       ( MM1, MM3 )										;\
+    MOVQ       ( MM2, MM4 )										;\
+    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
+    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
+    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
+    PAND       ( MM4, MM1 )			/*                 q > p ? p : 0                 */	;\
+    PANDN      ( MM2, MM4 )			/*                 q > p ? 0 : q                 */	;\
+    POR        ( MM1, MM4 )			/*                 q > p ? p : q                 */	;\
+    GMB_STORE( rgba, MM4 )
+
+#include "mmx_blendtmp.h"
+
+
+/* Blend max function
+ */
+
+#define TAG(x) CONCAT(x,_max)
+#define LLTAG(x) LLBL2(x,_max)
+
+#define INIT \
+    MOVQ       ( CONTENT(const_80), MM7 )	/* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/
+
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
+    MOVQ       ( MM1, MM3 )										;\
+    MOVQ       ( MM2, MM4 )										;\
+    PXOR       ( MM7, MM3 )			/*              unsigned -> signed               */	;\
+    PXOR       ( MM7, MM4 )			/*              unsigned -> signed               */	;\
+    PCMPGTB    ( MM3, MM4 )			/*                 q > p ? 0xff : 0x00           */	;\
+    PAND       ( MM4, MM2 )			/*                 q > p ? q : 0                 */	;\
+    PANDN      ( MM1, MM4 )			/*                 q > p ? 0 : p                 */	;\
+    POR        ( MM2, MM4 )			/*                 q > p ? p : q                 */	;\
+    GMB_STORE( rgba, MM4 )
+
+#include "mmx_blendtmp.h"
+
+
+/* Blend modulate function
+ */
+
+#define TAG(x) CONCAT(x,_modulate)
+#define LLTAG(x) LLBL2(x,_modulate)
+
+#define INIT \
+    PXOR       ( MM0, MM0 )			/*   0x0000  |   0x0000  |   0x0000  |   0x0000  */	;\
+    MOVQ       ( CONTENT(const_0080), MM7 )	/*   0x0080  |   0x0080  |   0x0080  |   0x0080  */
+
+#define MAIN( rgba, dest ) \
+    GMB_LOAD( rgba, dest, MM1, MM2 )									;\
+    GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 )								;\
+    GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 )								;\
+    GMB_PACK( MM2, MM5 )										;\
+    GMB_STORE( rgba, MM2 )
+
+#include "mmx_blendtmp.h"
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/mmx_blendtmp.h b/nx-X11/extras/Mesa/src/mesa/x86/mmx_blendtmp.h
new file mode 100644
index 000000000..c2fdeb62b
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/mmx_blendtmp.h
@@ -0,0 +1,114 @@
+/*
+ * Written by Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/*
+ * void _mesa_mmx_blend( GLcontext *ctx,
+ *                       GLuint n, 
+ *                       const GLubyte mask[],
+ *                       GLchan rgba[][4], 
+ *                       CONST GLchan dest[][4] )
+ * 
+ */
+ALIGNTEXT16
+GLOBL GLNAME( TAG(_mesa_mmx_blend) )
+HIDDEN( TAG(_mesa_mmx_blend) )
+GLNAME( TAG(_mesa_mmx_blend) ):
+
+    PUSH_L     ( EBP )
+    MOV_L      ( ESP, EBP )
+    PUSH_L     ( ESI )
+    PUSH_L     ( EDI )
+    PUSH_L     ( EBX )
+
+    MOV_L      ( REGOFF(12, EBP), ECX )		/* n */
+    CMP_L      ( CONST(0), ECX)
+    JE         ( LLTAG(GMB_return) )
+
+    MOV_L      ( REGOFF(16, EBP), EBX )		/* mask */
+    MOV_L      ( REGOFF(20, EBP), EDI )         /* rgba */
+    MOV_L      ( REGOFF(24, EBP), ESI )         /* dest */
+
+    INIT
+    
+    TEST_L     ( CONST(4), EDI )		/* align rgba on an 8-byte boundary */
+    JZ         ( LLTAG(GMB_align_end) )
+
+    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */
+    JE         ( LLTAG(GMB_align_continue) )
+
+    /* runin */
+#define ONE(x)	x
+#define TWO(x)  
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+
+LLTAG(GMB_align_continue):
+
+    DEC_L      ( ECX )				/* n -= 1 */
+    INC_L      ( EBX )		                /* mask += 1 */
+    ADD_L      ( CONST(4), EDI )		/* rgba += 1 */
+    ADD_L      ( CONST(4), ESI )		/* dest += 1 */ 
+
+LLTAG(GMB_align_end):
+
+    CMP_L      ( CONST(2), ECX)
+    JB         ( LLTAG(GMB_loop_end) )
+
+ALIGNTEXT16
+LLTAG(GMB_loop_begin):
+
+    CMP_W      ( CONST(0), REGIND(EBX) )	/* *mask == 0 && *(mask + 1) == 0 */
+    JE         ( LLTAG(GMB_loop_continue) )
+
+    /* main loop */
+#define ONE(x)
+#define TWO(x)	x
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+
+LLTAG(GMB_loop_continue):
+
+    DEC_L      ( ECX )
+    DEC_L      ( ECX )				/* n -= 2 */
+    ADD_L      ( CONST(2), EBX )		/* mask += 2 */
+    ADD_L      ( CONST(8), EDI )		/* rgba += 2 */
+    ADD_L      ( CONST(8), ESI )		/* dest += 2 */ 
+    CMP_L      ( CONST(2), ECX )
+    JAE        ( LLTAG(GMB_loop_begin) )
+
+LLTAG(GMB_loop_end):
+
+    CMP_L      ( CONST(1), ECX )
+    JB         ( LLTAG(GMB_done) )
+
+    CMP_B      ( CONST(0), REGIND(EBX) )	/* *mask == 0 */
+    JE         ( LLTAG(GMB_done) )
+
+    /* runout */
+#define ONE(x)	x
+#define TWO(x)
+    MAIN       ( EDI, ESI )
+#undef ONE
+#undef TWO
+
+LLTAG(GMB_done):
+
+    EMMS
+
+LLTAG(GMB_return):
+
+    POP_L      ( EBX )
+    POP_L      ( EDI )
+    POP_L      ( ESI )
+    MOV_L      ( EBP, ESP )
+    POP_L      ( EBP )
+    RET
+
+#undef TAG
+#undef LLTAG
+#undef INIT
+#undef MAIN
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/norm_args.h b/nx-X11/extras/Mesa/src/mesa/x86/norm_args.h
new file mode 100644
index 000000000..cb18fba9b
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/norm_args.h
@@ -0,0 +1,58 @@
+/* $Id: norm_args.h,v 1.1.1.1 2004/06/16 09:19:36 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Normal transform function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+
+#ifndef __NORM_ARGS_H__
+#define __NORM_ARGS_H__
+
+/* Offsets for normal_func arguments
+ *
+ * typedef void (*normal_func)( CONST GLmatrix *mat,
+ *                              GLfloat scale,
+ *                              CONST GLvector4f *in,
+ *                              CONST GLfloat lengths[],
+ *                              GLvector4f *dest );
+ */
+#define OFFSET_MAT	4
+#define OFFSET_SCALE	8
+#define OFFSET_IN	12
+#define OFFSET_LENGTHS	16
+#define OFFSET_DEST	20
+
+#define ARG_MAT         REGOFF(FRAME_OFFSET+OFFSET_MAT, ESP)
+#define ARG_SCALE       REGOFF(FRAME_OFFSET+OFFSET_SCALE, ESP)
+#define ARG_IN          REGOFF(FRAME_OFFSET+OFFSET_IN, ESP)
+#define ARG_LENGTHS     REGOFF(FRAME_OFFSET+OFFSET_LENGTHS, ESP)
+#define ARG_DEST        REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.S b/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.S
new file mode 100644
index 000000000..6b8036e5b
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.S
@@ -0,0 +1,689 @@
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+ 
+/**
+ * \file read_rgba_span_x86.S
+ * Optimized routines to transfer pixel data from the framebuffer to a
+ * buffer in main memory.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+	.file	"read_rgba_span_x86.S"
+#if !defined(__DJGPP__) && !defined(__MINGW32__) /* this one cries for assyntax.h */
+	.section	.rodata
+	.align 16
+	.type	mask, @object
+	.size	mask, 32
+mask:
+	.long	0xff00ff00
+	.long	0xff00ff00
+	.long	0xff00ff00
+	.long	0xff00ff00
+	.long	0x00ff0000
+	.long	0x00ff0000
+	.long	0x00ff0000
+	.long	0x00ff0000
+
+
+/* I implemented these as macros because the appear in quite a few places,
+ * and I've tweaked them a number of times.  I got tired of changing every
+ * place they appear. :)
+ */
+
+#define DO_ONE_PIXEL() \
+	movl	(%ebx), %eax ; \
+	addl	$4, %ebx ; \
+	bswap	%eax          /* ARGB -> BGRA */ ; \
+	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
+	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+	addl	$4, %ecx
+
+#define DO_ONE_LAST_PIXEL() \
+	movl	(%ebx), %eax ; \
+	bswap	%eax          /* ARGB -> BGRA */ ; \
+	rorl	$8, %eax      /* BGRA -> ABGR */ ; \
+	movl	%eax, (%ecx)  /* ABGR -> R, G, B, A */ ; \
+
+
+/**
+ * MMX optimized version of the BGRA8888_REV to RGBA copy routine.
+ * 
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
+.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
+	.type	_generic_read_RGBA_span_BGRA8888_REV_MMX, @function
+_generic_read_RGBA_span_BGRA8888_REV_MMX:
+	pushl	%ebx
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	movq	mask, %mm1
+	movq	mask+16, %mm2
+
+	movl	8(%esp), %ebx	/* source pointer */
+	movl	16(%esp), %edx	/* number of pixels to copy */
+	movl	12(%esp), %ecx	/* destination pointer */
+
+	testl	%edx, %edx
+	jle	.L20		/* Bail if there's nothing to do. */
+
+	movl	%ebx, %eax
+
+	negl	%eax
+	sarl	$2, %eax
+	andl	$1, %eax
+	je	.L17
+
+	subl	%eax, %edx
+	DO_ONE_PIXEL()
+.L17:
+
+	/* Would it be faster to unroll this loop once and process 4 pixels
+	 * per pass, instead of just two?
+	 */
+
+	movl	%edx, %eax
+	shrl	%eax
+	jmp	.L18
+.L19:
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	/* These 9 instructions do what PSHUFB (if there were such an
+	 * instruction) could do in 1. :(
+	 */
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+	subl	$1, %eax
+.L18:
+	jne	.L19
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+
+	/* At this point there are either 1 or 0 pixels remaining to be
+	 * converted.  Convert the last pixel, if needed.
+	 */
+
+	testl	$1, %edx
+	je	.L20
+
+	DO_ONE_LAST_PIXEL()
+
+.L20:
+	popl	%ebx
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
+
+
+/**
+ * SSE optimized version of the BGRA8888_REV to RGBA copy routine.  SSE
+ * instructions are only actually used to read data from the framebuffer.
+ * In practice, the speed-up is pretty small.
+ *
+ * \todo
+ * Do some more testing and determine if there's any reason to have this
+ * function in addition to the MMX version.
+ *
+ * \warning
+ * This function assumes that the caller will issue the EMMS instruction
+ * at the correct places.
+ */
+
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
+	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE:
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	movq	mask, %mm1
+	movq	mask+16, %mm2
+
+	movl	16(%esp), %ebx	/* source pointer */
+	movl	24(%esp), %edx	/* number of pixels to copy */
+	movl	20(%esp), %ecx	/* destination pointer */
+
+	testl	%edx, %edx
+	jle	.L35		/* Bail if there's nothing to do. */
+
+	movl	%esp, %ebp
+	subl	$16, %esp
+	andl	$0xfffffff0, %esp
+
+	movl	%ebx, %eax
+	movl	%edx, %esi
+
+	negl	%eax
+	andl	$15, %eax
+	sarl	$2, %eax
+	cmpl	%edx, %eax
+	cmovle	%eax, %esi
+
+	subl	%esi, %edx
+
+	testl	$1, %esi
+	je	.L32
+
+	DO_ONE_PIXEL()
+.L32:
+
+	testl	$2, %esi
+	je	.L31
+
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+.L31:
+
+	movl	%edx, %eax
+	shrl	$2, %eax
+	jmp	.L33
+.L34:
+	movaps	(%ebx), %xmm0
+	addl	$16, %ebx
+
+	/* This would be so much better if we could just move directly from
+	 * an SSE register to an MMX register.  Unfortunately, that
+	 * functionality wasn't introduced until SSE2 with the MOVDQ2Q
+	 * instruction.
+	 */
+
+	movaps	%xmm0, (%esp)
+	movq	(%esp), %mm0
+	movq	8(%esp), %mm5
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	movq	%mm5, %mm6
+	movq	%mm5, %mm7
+
+	pand	%mm2, %mm3
+	pand	%mm2, %mm6
+
+	psllq	$16, %mm4
+	psllq	$16, %mm7
+
+	psrlq	$16, %mm3
+	psrlq	$16, %mm6
+
+	pand	%mm2, %mm4
+	pand	%mm2, %mm7
+
+	pand	%mm1, %mm0
+	pand	%mm1, %mm5
+
+	por	%mm4, %mm3
+	por	%mm7, %mm6
+
+	por	%mm3, %mm0
+	por	%mm6, %mm5
+
+	movq	%mm0, (%ecx)
+	movq	%mm5, 8(%ecx)
+	addl	$16, %ecx
+
+	subl	$1, %eax
+.L33:
+	jne	.L34
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	movl	%ebp, %esp
+
+	/* At this point there are either [0, 3] pixels remaining to be
+	 * converted.
+	 */
+
+	testl	$2, %edx
+	je	.L36
+
+	movq	(%ebx), %mm0
+	addl	$8, %ebx
+
+	movq	%mm0, %mm3
+	movq	%mm0, %mm4
+	
+	pand	%mm2, %mm3
+	psllq	$16, %mm4
+	psrlq	$16, %mm3
+	pand	%mm2, %mm4
+
+	pand	%mm1, %mm0
+	por	%mm4, %mm3
+	por	%mm3, %mm0
+
+	movq	%mm0, (%ecx)
+	addl	$8, %ecx
+.L36:
+
+	testl	$1, %edx
+	je	.L35
+
+	DO_ONE_LAST_PIXEL()
+.L35:
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
+
+
+/**
+ * SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
+ */
+
+	.text
+.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
+.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
+	.type	_generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
+_generic_read_RGBA_span_BGRA8888_REV_SSE2:
+	pushl	%esi
+	pushl	%ebx
+
+	movdqa	mask, %xmm1
+	movdqa	mask+16, %xmm2
+
+	movl	12(%esp), %ebx	/* source pointer */
+	movl	20(%esp), %edx	/* number of pixels to copy */
+	movl	16(%esp), %ecx	/* destination pointer */
+
+	movl	%ebx, %eax
+	movl	%edx, %esi
+
+	testl	%edx, %edx
+	jle	.L46		/* Bail if there's nothing to do. */
+
+	/* If the source pointer isn't a multiple of 16 we have to process
+	 * a few pixels the "slow" way to get the address aligned for
+	 * the SSE fetch intsructions.
+	 */
+
+	negl	%eax
+	andl	$15, %eax
+	sarl	$2, %eax
+
+	cmpl	%edx, %eax
+	cmovbe	%eax, %esi
+	subl	%esi, %edx
+
+	testl	$1, %esi
+	je	.L41
+
+	DO_ONE_PIXEL()  
+.L41:
+	testl	$2, %esi
+	je	.L40
+
+	movq	(%ebx), %xmm0
+	addl	$8, %ebx
+
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movq	%xmm0, (%ecx)
+	addl	$8, %ecx
+.L40:
+
+	/* Would it be worth having a specialized version of this loop for
+	 * the case where the destination is 16-byte aligned?  That version
+	 * would be identical except that it could use movedqa instead of
+	 * movdqu.
+	 */
+
+	movl	%edx, %eax
+	shrl	$2, %eax
+	jmp	.L42
+.L43:
+	movdqa	(%ebx), %xmm0
+	addl	$16, %ebx
+
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movdqu	%xmm0, (%ecx)
+	addl	$16, %ecx
+	subl	$1, %eax
+.L42:
+	jne	.L43
+
+
+	/* There may be upto 3 pixels remaining to be copied.  Take care
+	 * of them now.  We do the 2 pixel case first because the data
+	 * will be aligned.
+	 */
+
+	testl	$2, %edx
+	je	.L47
+
+	movq	(%ebx), %xmm0
+
+	movdqa	%xmm0, %xmm3
+	movdqa	%xmm0, %xmm4
+	andps	%xmm1, %xmm0
+
+	andps	%xmm2, %xmm3
+	pslldq	$2, %xmm4
+	psrldq	$2, %xmm3
+	andps	%xmm2, %xmm4
+
+	orps	%xmm4, %xmm3
+	orps	%xmm3, %xmm0
+
+	movq	%xmm0, (%ecx)
+.L47:
+
+	testl	$1, %edx
+	je	.L46
+
+	DO_ONE_LAST_PIXEL()  
+.L46:
+
+	popl	%ebx
+	popl	%esi
+	ret
+	.size	_generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
+
+
+
+	.section	.rodata
+
+	.align	16
+mask_565:
+	.word	0xf800
+	.word	0x07e0
+	.word	0x001f
+	.word	0x0000
+
+/* Setting SCALE_ADJUST to 5 gives a perfect match with the classic C
+ * implementation in Mesa.  Setting SCALE_ADJUST to 0 is slightly faster but
+ * at a small cost to accuracy.
+ */
+
+#define SCALE_ADJUST	5
+#if SCALE_ADJUST == 5
+prescale:
+	.word	0x0001
+	.word	0x0010
+	.word	0x0200
+	.word	0x0000
+
+scale:
+	.word	0x20e8		/* (0x00ff0000 / 0x000007c0) + 1 */
+	.word	0x40c5		/* (0x00ff0000 / 0x000003f0) + 1 */
+	.word	0x839d		/* (0x00ff0000 / 0x000001f0) + 1 */
+	.word	0x0000
+#elif SCALE_ADJUST == 0
+prescale:
+	.word	0x0001
+	.word	0x0020
+	.word	0x0800
+	.word	0x0000
+
+scale:
+	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
+	.word	0x0104		/* (0x00ff0000 / 0x0000fc00) + 1 */
+	.word	0x0108		/* (0x00ff0000 / 0x0000f800) + 1 */
+	.word	0x0000
+#else
+#error SCALE_ADJUST must either be 5 or 0.
+#endif
+
+
+alpha:	.long	0x00000000
+	.long	0x00ff0000
+
+/**
+ * MMX optimized version of the RGB565 to RGBA copy routine.
+ */
+
+	.text
+	.globl	_generic_read_RGBA_span_RGB565_MMX
+        .hidden _generic_read_RGBA_span_RGB565_MMX
+	.type	_generic_read_RGBA_span_RGB565_MMX, @function
+
+_generic_read_RGBA_span_RGB565_MMX:
+
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+
+	movl	4(%esp), %eax	/* source pointer */
+	movl	8(%esp), %edx	/* destination pointer */
+	movl	12(%esp), %ecx	/* number of pixels to copy */
+
+	movq	mask_565, %mm5
+	movq	prescale, %mm6
+	movq	scale, %mm7
+
+	sarl	$2, %ecx
+	jle	.L01		/* Bail early if the count is negative. */
+	jmp	.L02
+
+.L03:
+	/* Fetch 4 RGB565 pixels into %mm4.  Distribute the first and
+	 * second pixels into the four words of %mm0 and %mm2.
+      	 */
+
+	movq	(%eax), %mm4
+	addl	$8, %eax
+
+	pshufw	$0x00, %mm4, %mm0
+	pshufw	$0x55, %mm4, %mm2
+
+
+	/* Mask the pixels so that each word of each register contains only
+	 * one color component.
+	 */
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+
+
+	/* Adjust the component values so that they are as small as possible,
+	 * but large enough so that we can multiply them by an unsigned 16-bit
+	 * number and get a value as large as 0x00ff0000.
+ 	 */
+
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+
+	/* Scale the input component values to be on the range
+	 * [0, 0x00ff0000].  This it the real magic of the whole routine.
+	 */
+
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+
+	/* Always set the alpha value to 0xff.
+	 */
+
+	por	alpha, %mm0
+	por	alpha, %mm2
+
+
+	/* Pack the 16-bit values to 8-bit values and store the converted
+	 * pixel data.
+	 */
+
+	packuswb	%mm2, %mm0
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+
+
+	pshufw	$0xaa, %mm4, %mm0
+	pshufw	$0xff, %mm4, %mm2
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+	por	alpha, %mm0
+	por	alpha, %mm2
+
+	packuswb	%mm2, %mm0
+
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+	subl	$1, %ecx
+.L02:
+	jne	.L03
+
+
+	/* At this point there can be at most 3 pixels left to process.  If
+	 * there is either 2 or 3 left, process 2.
+         */
+
+	movl	12(%esp), %ecx
+	testl	$0x02, %ecx
+	je	.L04
+
+	movd	(%eax), %mm4
+	addl	$4, %eax
+
+	pshufw	$0x00, %mm4, %mm0
+	pshufw	$0x55, %mm4, %mm2
+
+	pand	%mm5, %mm0
+	pand	%mm5, %mm2
+	pmullw	%mm6, %mm0
+	pmullw	%mm6, %mm2
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+	psrlw	$SCALE_ADJUST, %mm2
+#endif
+	pmulhuw	%mm7, %mm0
+	pmulhuw	%mm7, %mm2
+
+	por	alpha, %mm0
+	por	alpha, %mm2
+
+	packuswb	%mm2, %mm0
+
+	movq	%mm0, (%edx)
+	addl	$8, %edx
+
+.L04:
+	/* At this point there can be at most 1 pixel left to process.
+	 * Process it if needed.
+         */
+
+	testl	$0x01, %ecx
+	je	.L01
+
+	movzxw	(%eax), %ecx
+	movd	%ecx, %mm4
+
+	pshufw	$0x00, %mm4, %mm0
+
+	pand	%mm5, %mm0
+	pmullw	%mm6, %mm0
+#if SCALE_ADJUST > 0
+	psrlw	$SCALE_ADJUST, %mm0
+#endif
+	pmulhuw	%mm7, %mm0
+
+	por	alpha, %mm0
+
+	packuswb	%mm0, %mm0
+
+	movd	%mm0, (%edx)
+
+.L01:
+#ifdef USE_INNER_EMMS
+	emms
+#endif
+	ret
+#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) */
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.h b/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.h
new file mode 100644
index 000000000..564b1bb0f
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/read_rgba_span_x86.h
@@ -0,0 +1,56 @@
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+ 
+/**
+ * \file read_rgba_span_x86.h
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#ifndef READ_RGBA_SPAN_X86_H
+#define READ_RGBA_SPAN_X86_H
+
+#if defined(USE_SSE_ASM) || defined(USE_MMX_ASM)
+#include "x86/common_x86_asm.h"
+#endif
+
+#if defined(USE_SSE_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_SSE2( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+
+#if defined(USE_SSE_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_SSE( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+
+#if defined(USE_MMX_ASM)
+extern void _generic_read_RGBA_span_BGRA8888_REV_MMX( const unsigned char *,
+    unsigned char *, unsigned );
+
+extern void _generic_read_RGBA_span_RGB565_MMX( const unsigned char *,
+    unsigned char *, unsigned );
+#endif
+
+#endif /* READ_RGBA_SPAN_X86_H */
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.c b/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.c
new file mode 100644
index 000000000..0c9ffe25f
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.c
@@ -0,0 +1,997 @@
+#if defined(USE_X86_ASM)
+
+#include "imports.h"
+#include "x86sse.h"
+
+#define DISASSEM 0
+#define X86_TWOB 0x0f
+
+/* Emit bytes to the instruction stream:
+ */
+static void emit_1b( struct x86_function *p, GLbyte b0 )
+{
+   *(GLbyte *)(p->csr++) = b0;
+}
+
+static void emit_1i( struct x86_function *p, GLint i0 )
+{
+   *(GLint *)(p->csr) = i0;
+   p->csr += 4;
+}
+
+static void disassem( struct x86_function *p, const char *fn )
+{
+#if DISASSEM && 0
+   if (fn && fn != p->fn) {
+      _mesa_printf("0x%x: %s\n", p->csr, fn);
+      p->fn = fn;
+   }
+#endif
+}
+
+static void emit_1ub_fn( struct x86_function *p, GLubyte b0, const char *fn )
+{
+   disassem(p, fn);
+   *(p->csr++) = b0;
+}
+
+static void emit_2ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, const char *fn )
+{
+   disassem(p, fn);
+   *(p->csr++) = b0;
+   *(p->csr++) = b1;
+}
+
+static void emit_3ub_fn( struct x86_function *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
+{
+   disassem(p, fn);
+   *(p->csr++) = b0;
+   *(p->csr++) = b1;
+   *(p->csr++) = b2;
+}
+
+#define emit_1ub(p, b0)         emit_1ub_fn(p, b0, __FUNCTION__)
+#define emit_2ub(p, b0, b1)     emit_2ub_fn(p, b0, b1, __FUNCTION__)
+#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
+
+
+
+/* Build a modRM byte + possible displacement.  No treatment of SIB
+ * indexing.  BZZT - no way to encode an absolute address.
+ */
+static void emit_modrm( struct x86_function *p, 
+			struct x86_reg reg, 
+			struct x86_reg regmem )
+{
+   GLubyte val = 0;
+   
+   assert(reg.mod == mod_REG);
+   
+   val |= regmem.mod << 6;     	/* mod field */
+   val |= reg.idx << 3;		/* reg field */
+   val |= regmem.idx;		/* r/m field */
+   
+   emit_1ub_fn(p, val, 0);
+
+   /* Oh-oh we've stumbled into the SIB thing.
+    */
+   if (regmem.file == file_REG32 &&
+       regmem.idx == reg_SP) {
+      emit_1ub_fn(p, 0x24, 0);		/* simplistic! */
+   }
+
+   switch (regmem.mod) {
+   case mod_REG:
+   case mod_INDIRECT:
+      break;
+   case mod_DISP8:
+      emit_1b(p, regmem.disp);
+      break;
+   case mod_DISP32:
+      emit_1i(p, regmem.disp);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+
+static void emit_modrm_noreg( struct x86_function *p,
+			      GLuint op,
+			      struct x86_reg regmem )
+{
+   struct x86_reg dummy = x86_make_reg(file_REG32, op);
+   emit_modrm(p, dummy, regmem);
+}
+
+/* Many x86 instructions have two opcodes to cope with the situations
+ * where the destination is a register or memory reference
+ * respectively.  This function selects the correct opcode based on
+ * the arguments presented.
+ */
+static void emit_op_modrm( struct x86_function *p,
+			   GLubyte op_dst_is_reg, 
+			   GLubyte op_dst_is_mem,
+			   struct x86_reg dst,
+			   struct x86_reg src )
+{  
+   switch (dst.mod) {
+   case mod_REG:
+      emit_1ub_fn(p, op_dst_is_reg, 0);
+      emit_modrm(p, dst, src);
+      break;
+   case mod_INDIRECT:
+   case mod_DISP32:
+   case mod_DISP8:
+      assert(src.mod == mod_REG);
+      emit_1ub_fn(p, op_dst_is_mem, 0);
+      emit_modrm(p, src, dst);
+      break;
+   default:
+      assert(0);
+      break;
+   }
+}
+
+
+
+
+
+
+
+/* Create and manipulate registers and regmem values:
+ */
+struct x86_reg x86_make_reg( GLuint file,
+			     GLuint idx )
+{
+   struct x86_reg reg;
+
+   reg.file = file;
+   reg.idx = idx;
+   reg.mod = mod_REG;
+   reg.disp = 0;
+
+   return reg;
+}
+
+struct x86_reg x86_make_disp( struct x86_reg reg,
+			      GLint disp )
+{
+   assert(reg.file == file_REG32);
+
+   if (reg.mod == mod_REG)
+      reg.disp = disp;
+   else
+      reg.disp += disp;
+
+   if (reg.disp == 0)
+      reg.mod = mod_INDIRECT;
+   else if (reg.disp <= 127 && reg.disp >= -128)
+      reg.mod = mod_DISP8;
+   else
+      reg.mod = mod_DISP32;
+
+   return reg;
+}
+
+struct x86_reg x86_deref( struct x86_reg reg )
+{
+   return x86_make_disp(reg, 0);
+}
+
+struct x86_reg x86_get_base_reg( struct x86_reg reg )
+{
+   return x86_make_reg( reg.file, reg.idx );
+}
+
+GLubyte *x86_get_label( struct x86_function *p )
+{
+   return p->csr;
+}
+
+
+
+/***********************************************************************
+ * x86 instructions
+ */
+
+
+void x86_jcc( struct x86_function *p,
+	      GLuint cc,
+	      GLubyte *label )
+{
+   GLint offset = label - (x86_get_label(p) + 2);
+   
+   if (offset <= 127 && offset >= -128) {
+      emit_1ub(p, 0x70 + cc);
+      emit_1b(p, (GLbyte) offset);
+   }
+   else {
+      offset = label - (x86_get_label(p) + 6);
+      emit_2ub(p, 0x0f, 0x80 + cc);
+      emit_1i(p, offset);
+   }
+}
+
+/* Always use a 32bit offset for forward jumps:
+ */
+GLubyte *x86_jcc_forward( struct x86_function *p,
+			  GLuint cc )
+{
+   emit_2ub(p, 0x0f, 0x80 + cc);
+   emit_1i(p, 0);
+   return x86_get_label(p);
+}
+
+/* Fixup offset from forward jump:
+ */
+void x86_fixup_fwd_jump( struct x86_function *p,
+			 GLubyte *fixup )
+{
+   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
+}
+
+void x86_push( struct x86_function *p,
+	       struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x50 + reg.idx);
+   p->stack_offset += 4;
+}
+
+void x86_pop( struct x86_function *p,
+	      struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x58 + reg.idx);
+   p->stack_offset -= 4;
+}
+
+void x86_inc( struct x86_function *p,
+	      struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x40 + reg.idx);
+}
+
+void x86_dec( struct x86_function *p,
+	      struct x86_reg reg )
+{
+   assert(reg.mod == mod_REG);
+   emit_1ub(p, 0x48 + reg.idx);
+}
+
+void x86_ret( struct x86_function *p )
+{
+   emit_1ub(p, 0xc3);
+}
+
+void x86_sahf( struct x86_function *p )
+{
+   emit_1ub(p, 0x9e);
+}
+
+void x86_mov( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   emit_op_modrm( p, 0x8b, 0x89, dst, src );
+}
+
+void x86_xor( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   emit_op_modrm( p, 0x33, 0x31, dst, src );
+}
+
+void x86_cmp( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   emit_op_modrm( p, 0x3b, 0x39, dst, src );
+}
+
+void x86_lea( struct x86_function *p,
+	      struct x86_reg dst,
+	      struct x86_reg src )
+{
+   emit_1ub(p, 0x8d);
+   emit_modrm( p, dst, src );
+}
+
+void x86_test( struct x86_function *p,
+	       struct x86_reg dst,
+	       struct x86_reg src )
+{
+   emit_1ub(p, 0x85);
+   emit_modrm( p, dst, src );
+}
+
+
+
+/***********************************************************************
+ * SSE instructions
+ */
+
+
+void sse_movss( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, 0xF3, X86_TWOB);
+   emit_op_modrm( p, 0x10, 0x11, dst, src );
+}
+
+void sse_movaps( struct x86_function *p,
+		 struct x86_reg dst,
+		 struct x86_reg src )
+{
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x28, 0x29, dst, src );
+}
+
+void sse_movups( struct x86_function *p,
+		 struct x86_reg dst,
+		 struct x86_reg src )
+{
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x10, 0x11, dst, src );
+}
+
+void sse_movhps( struct x86_function *p,
+		 struct x86_reg dst,
+		 struct x86_reg src )
+{
+   assert(dst.mod != mod_REG || src.mod != mod_REG);
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
+}
+
+void sse_movlps( struct x86_function *p,
+		 struct x86_reg dst,
+		 struct x86_reg src )
+{
+   assert(dst.mod != mod_REG || src.mod != mod_REG);
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
+}
+
+void sse_maxps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5F);
+   emit_modrm( p, dst, src );
+}
+
+void sse_divss( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
+   emit_modrm( p, dst, src );
+}
+
+void sse_minps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5D);
+   emit_modrm( p, dst, src );
+}
+
+void sse_subps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x5C);
+   emit_modrm( p, dst, src );
+}
+
+void sse_mulps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x59);
+   emit_modrm( p, dst, src );
+}
+
+void sse_addps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x58);
+   emit_modrm( p, dst, src );
+}
+
+void sse_addss( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
+   emit_modrm( p, dst, src );
+}
+
+void sse_andps( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, X86_TWOB, 0x54);
+   emit_modrm( p, dst, src );
+}
+
+
+void sse_rsqrtss( struct x86_function *p,
+		  struct x86_reg dst,
+		  struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
+   emit_modrm( p, dst, src );
+
+}
+
+void sse_movhlps( struct x86_function *p,
+		  struct x86_reg dst,
+		  struct x86_reg src )
+{
+   assert(dst.mod == mod_REG && src.mod == mod_REG);
+   emit_2ub(p, X86_TWOB, 0x12);
+   emit_modrm( p, dst, src );
+}
+
+void sse_movlhps( struct x86_function *p,
+		  struct x86_reg dst,
+		  struct x86_reg src )
+{
+   assert(dst.mod == mod_REG && src.mod == mod_REG);
+   emit_2ub(p, X86_TWOB, 0x16);
+   emit_modrm( p, dst, src );
+}
+
+
+void sse_cvtps2pi( struct x86_function *p,
+		   struct x86_reg dst,
+		   struct x86_reg src )
+{
+   assert(dst.file == file_MMX && 
+	  (src.file == file_XMM || src.mod != mod_REG));
+
+   p->need_emms = 1;
+
+   emit_2ub(p, X86_TWOB, 0x2d);
+   emit_modrm( p, dst, src );
+}
+
+
+/* Shufps can also be used to implement a reduced swizzle when dest ==
+ * arg0.
+ */
+void sse_shufps( struct x86_function *p,
+		 struct x86_reg dest,
+		 struct x86_reg arg0,
+		 GLubyte shuf) 
+{
+   emit_2ub(p, X86_TWOB, 0xC6);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, shuf); 
+}
+
+void sse_cmpps( struct x86_function *p,
+		struct x86_reg dest,
+		struct x86_reg arg0,
+		GLubyte cc) 
+{
+   emit_2ub(p, X86_TWOB, 0xC2);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, cc); 
+}
+
+/***********************************************************************
+ * SSE2 instructions
+ */
+
+/**
+ * Perform a reduced swizzle:
+ */
+void sse2_pshufd( struct x86_function *p,
+		  struct x86_reg dest,
+		  struct x86_reg arg0,
+		  GLubyte shuf) 
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x70);
+   emit_modrm(p, dest, arg0);
+   emit_1ub(p, shuf); 
+}
+
+void sse2_cvtps2dq( struct x86_function *p,
+		    struct x86_reg dst,
+		    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_packssdw( struct x86_function *p,
+		    struct x86_reg dst,
+		    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_packsswb( struct x86_function *p,
+		    struct x86_reg dst,
+		    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x63);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_packuswb( struct x86_function *p,
+		    struct x86_reg dst,
+		    struct x86_reg src )
+{
+   emit_3ub(p, 0x66, X86_TWOB, 0x67);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_rcpss( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
+   emit_modrm( p, dst, src );
+}
+
+void sse2_movd( struct x86_function *p,
+		struct x86_reg dst,
+		struct x86_reg src )
+{
+   emit_2ub(p, 0x66, X86_TWOB);
+   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
+}
+
+
+
+
+/***********************************************************************
+ * x87 instructions
+ */
+void x87_fist( struct x86_function *p, struct x86_reg dst )
+{
+   emit_1ub(p, 0xdb);
+   emit_modrm_noreg(p, 2, dst);
+}
+
+void x87_fistp( struct x86_function *p, struct x86_reg dst )
+{
+   emit_1ub(p, 0xdb);
+   emit_modrm_noreg(p, 3, dst);
+}
+
+void x87_fldz( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xee);
+}
+
+
+void x87_fldcw( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_REG32);
+   assert(arg.mod != mod_REG);
+   emit_1ub(p, 0xd9);
+   emit_modrm_noreg(p, 5, arg);
+}
+
+void x87_fld1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe8);
+}
+
+void x87_fldl2e( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xea);
+}
+
+void x87_fldln2( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xed);
+}
+
+void x87_fwait( struct x86_function *p )
+{
+   emit_1ub(p, 0x9b);
+}
+
+void x87_fnclex( struct x86_function *p )
+{
+   emit_2ub(p, 0xdb, 0xe2);
+}
+
+void x87_fclex( struct x86_function *p )
+{
+   x87_fwait(p);
+   x87_fnclex(p);
+}
+
+
+static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
+			  GLubyte dst0ub0,
+			  GLubyte dst0ub1,
+			  GLubyte arg0ub0,
+			  GLubyte arg0ub1,
+			  GLubyte argmem_noreg)
+{
+   assert(dst.file == file_x87);
+
+   if (arg.file == file_x87) {
+      if (dst.idx == 0) 
+	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
+      else if (arg.idx == 0) 
+	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
+      else
+	 assert(0);
+   }
+   else if (dst.idx == 0) {
+      assert(arg.file = file_REG32);
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, argmem_noreg, arg);
+   }
+   else
+      assert(0);
+}
+
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xc8,
+		0xdc, 0xc8,
+		4);
+}
+
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xe0,
+		0xdc, 0xe8,
+		4);
+}
+
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xe8,
+		0xdc, 0xe0,
+		5);
+}
+
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xc0,
+		0xdc, 0xc0,
+		0);
+}
+
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xf0,
+		0xdc, 0xf8,
+		6);
+}
+
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
+{
+   x87_arith_op(p, dst, arg, 
+		0xd8, 0xf8,
+		0xdc, 0xf0,
+		7);
+}
+
+void x87_fmulp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xc8+dst.idx);
+}
+
+void x87_fsubp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xe8+dst.idx);
+}
+
+void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xe0+dst.idx);
+}
+
+void x87_faddp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xc0+dst.idx);
+}
+
+void x87_fdivp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xf8+dst.idx);
+}
+
+void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_x87);
+   assert(dst.idx >= 1);
+   emit_2ub(p, 0xde, 0xf0+dst.idx);
+}
+
+void x87_fucom( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xdd, 0xe0+arg.idx);
+}
+
+void x87_fucomp( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xdd, 0xe8+arg.idx);
+}
+
+void x87_fucompp( struct x86_function *p )
+{
+   emit_2ub(p, 0xda, 0xe9);
+}
+
+void x87_fxch( struct x86_function *p, struct x86_reg arg )
+{
+   assert(arg.file == file_x87);
+   emit_2ub(p, 0xd9, 0xc8+arg.idx);
+}
+
+void x87_fabs( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe1);
+}
+
+void x87_fchs( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xe0);
+}
+
+void x87_fcos( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xff);
+}
+
+
+void x87_fprndint( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfc);
+}
+
+void x87_fscale( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfd);
+}
+
+void x87_fsin( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfe);
+}
+
+void x87_fsincos( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfb);
+}
+
+void x87_fsqrt( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xfa);
+}
+
+void x87_fxtract( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf4);
+}
+
+/* st0 = (2^st0)-1
+ *
+ * Restrictions: -1.0 <= st0 <= 1.0
+ */
+void x87_f2xm1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf0);
+}
+
+/* st1 = st1 * log2(st0);
+ * pop_stack;
+ */
+void x87_fyl2x( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf1);
+}
+
+/* st1 = st1 * log2(st0 + 1.0);
+ * pop_stack;
+ *
+ * A fast operation, with restrictions: -.29 < st0 < .29 
+ */
+void x87_fyl2xp1( struct x86_function *p )
+{
+   emit_2ub(p, 0xd9, 0xf9);
+}
+
+
+void x87_fld( struct x86_function *p, struct x86_reg arg )
+{
+   if (arg.file == file_x87) 
+      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 0, arg);
+   }
+}
+
+void x87_fst( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87) 
+      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 2, dst);
+   }
+}
+
+void x87_fstp( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87) 
+      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
+   else {
+      emit_1ub(p, 0xd9);
+      emit_modrm_noreg(p, 3, dst);
+   }
+}
+
+void x87_fcom( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87) 
+      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
+   else {
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, 2, dst);
+   }
+}
+
+void x87_fcomp( struct x86_function *p, struct x86_reg dst )
+{
+   if (dst.file == file_x87) 
+      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
+   else {
+      emit_1ub(p, 0xd8);
+      emit_modrm_noreg(p, 3, dst);
+   }
+}
+
+
+void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
+{
+   assert(dst.file == file_REG32);
+
+   if (dst.idx == reg_AX &&
+       dst.mod == mod_REG) 
+      emit_2ub(p, 0xdf, 0xe0);
+   else {
+      emit_1ub(p, 0xdd);
+      emit_modrm_noreg(p, 7, dst);
+   }
+}
+
+
+
+
+/***********************************************************************
+ * MMX instructions
+ */
+
+void mmx_emms( struct x86_function *p )
+{
+   assert(p->need_emms);
+   emit_2ub(p, 0x0f, 0x77);
+   p->need_emms = 0;
+}
+
+void mmx_packssdw( struct x86_function *p,
+		   struct x86_reg dst,
+		   struct x86_reg src )
+{
+   assert(dst.file == file_MMX && 
+	  (src.file == file_MMX || src.mod != mod_REG));
+
+   p->need_emms = 1;
+
+   emit_2ub(p, X86_TWOB, 0x6b);
+   emit_modrm( p, dst, src );
+}
+
+void mmx_packuswb( struct x86_function *p,
+		   struct x86_reg dst,
+		   struct x86_reg src )
+{
+   assert(dst.file == file_MMX && 
+	  (src.file == file_MMX || src.mod != mod_REG));
+
+   p->need_emms = 1;
+
+   emit_2ub(p, X86_TWOB, 0x67);
+   emit_modrm( p, dst, src );
+}
+
+void mmx_movd( struct x86_function *p,
+	       struct x86_reg dst,
+	       struct x86_reg src )
+{
+   p->need_emms = 1;
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
+}
+
+void mmx_movq( struct x86_function *p,
+	       struct x86_reg dst,
+	       struct x86_reg src )
+{
+   p->need_emms = 1;
+   emit_1ub(p, X86_TWOB);
+   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
+}
+
+
+/***********************************************************************
+ * Helper functions
+ */
+
+
+/* Retreive a reference to one of the function arguments, taking into
+ * account any push/pop activity:
+ */
+struct x86_reg x86_fn_arg( struct x86_function *p,
+			   GLuint arg )
+{
+   return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 
+			p->stack_offset + arg * 4);	/* ??? */
+}
+
+
+void x86_init_func( struct x86_function *p )
+{
+   p->store = _mesa_exec_malloc(1024);
+   p->csr = p->store;
+}
+
+void x86_release_func( struct x86_function *p )
+{
+   _mesa_exec_free(p->store);
+}
+
+
+void (*x86_get_func( struct x86_function *p ))(void)
+{
+   if (DISASSEM)
+      _mesa_printf("disassemble %p %p\n", p->store, p->csr);
+   return (void (*)())p->store;
+}
+
+#else
+
+void x86sse_dummy( void )
+{
+}
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.h b/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.h
new file mode 100644
index 000000000..611d01e1c
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/rtasm/x86sse.h
@@ -0,0 +1,223 @@
+
+#ifndef _X86SSE_H_
+#define _X86SSE_H_
+
+#if defined(USE_X86_ASM)
+
+#include "glheader.h"
+
+/* It is up to the caller to ensure that instructions issued are
+ * suitable for the host cpu.  There are no checks made in this module
+ * for mmx/sse/sse2 support on the cpu.
+ */
+struct x86_reg {
+   GLuint file:3;
+   GLuint idx:3;
+   GLuint mod:2;		/* mod_REG if this is just a register */
+   GLint  disp:24;		/* only +/- 23bits of offset - should be enough... */
+};
+
+struct x86_function {
+   GLubyte *store;
+   GLubyte *csr;
+   GLuint stack_offset;
+   GLint need_emms;
+   const char *fn;
+};
+
+enum x86_reg_file {
+   file_REG32,
+   file_MMX,
+   file_XMM,
+   file_x87
+};
+
+/* Values for mod field of modr/m byte
+ */
+enum x86_reg_mod {
+   mod_INDIRECT,
+   mod_DISP8,
+   mod_DISP32,
+   mod_REG
+};
+
+enum x86_reg_name {
+   reg_AX,
+   reg_CX,
+   reg_DX,
+   reg_BX,
+   reg_SP,
+   reg_BP,
+   reg_SI,
+   reg_DI
+};
+
+
+enum x86_cc {
+   cc_O,			/* overflow */
+   cc_NO,			/* not overflow */
+   cc_NAE,			/* not above or equal / carry */
+   cc_AE,			/* above or equal / not carry */
+   cc_E,			/* equal / zero */
+   cc_NE			/* not equal / not zero */
+};
+
+enum sse_cc {
+   cc_Equal,
+   cc_LessThan,
+   cc_LessThanEqual,
+   cc_Unordered,
+   cc_NotEqual,
+   cc_NotLessThan,
+   cc_NotLessThanEqual,
+   cc_Ordered
+};
+
+#define cc_Z  cc_E
+#define cc_NZ cc_NE
+
+/* Begin/end/retreive function creation:
+ */
+
+
+void x86_init_func( struct x86_function *p );
+void x86_release_func( struct x86_function *p );
+void (*x86_get_func( struct x86_function *p ))( void );
+
+
+
+/* Create and manipulate registers and regmem values:
+ */
+struct x86_reg x86_make_reg( enum x86_reg_file file,
+			     enum x86_reg_name idx );
+
+struct x86_reg x86_make_disp( struct x86_reg reg,
+			      GLint disp );
+
+struct x86_reg x86_deref( struct x86_reg reg );
+
+struct x86_reg x86_get_base_reg( struct x86_reg reg );
+
+
+/* Labels, jumps and fixup:
+ */
+GLubyte *x86_get_label( struct x86_function *p );
+
+void x86_jcc( struct x86_function *p,
+	      enum x86_cc cc,
+	      GLubyte *label );
+
+GLubyte *x86_jcc_forward( struct x86_function *p,
+			  enum x86_cc cc );
+
+void x86_fixup_fwd_jump( struct x86_function *p,
+			 GLubyte *fixup );
+
+
+/* Macro for sse_shufps() and sse2_pshufd():
+ */
+#define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
+#define SHUF_NOOP               RSW(0,1,2,3)
+#define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
+
+void mmx_emms( struct x86_function *p );
+void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+
+void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, GLubyte cc );
+void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, GLubyte shuf );
+
+void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_dec( struct x86_function *p, struct x86_reg reg );
+void x86_inc( struct x86_function *p, struct x86_reg reg );
+void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_pop( struct x86_function *p, struct x86_reg reg );
+void x86_push( struct x86_function *p, struct x86_reg reg );
+void x86_ret( struct x86_function *p );
+void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void x86_sahf( struct x86_function *p );
+
+void x87_f2xm1( struct x86_function *p );
+void x87_fabs( struct x86_function *p );
+void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_faddp( struct x86_function *p, struct x86_reg dst );
+void x87_fchs( struct x86_function *p );
+void x87_fclex( struct x86_function *p );
+void x87_fcom( struct x86_function *p, struct x86_reg dst );
+void x87_fcomp( struct x86_function *p, struct x86_reg dst );
+void x87_fcos( struct x86_function *p );
+void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fdivp( struct x86_function *p, struct x86_reg dst );
+void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
+void x87_fist( struct x86_function *p, struct x86_reg dst );
+void x87_fistp( struct x86_function *p, struct x86_reg dst );
+void x87_fld( struct x86_function *p, struct x86_reg arg );
+void x87_fld1( struct x86_function *p );
+void x87_fldcw( struct x86_function *p, struct x86_reg arg );
+void x87_fldl2e( struct x86_function *p );
+void x87_fldln2( struct x86_function *p );
+void x87_fldz( struct x86_function *p );
+void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fmulp( struct x86_function *p, struct x86_reg dst );
+void x87_fnclex( struct x86_function *p );
+void x87_fprndint( struct x86_function *p );
+void x87_fscale( struct x86_function *p );
+void x87_fsin( struct x86_function *p );
+void x87_fsincos( struct x86_function *p );
+void x87_fsqrt( struct x86_function *p );
+void x87_fst( struct x86_function *p, struct x86_reg dst );
+void x87_fstp( struct x86_function *p, struct x86_reg dst );
+void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fsubp( struct x86_function *p, struct x86_reg dst );
+void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
+void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
+void x87_fxch( struct x86_function *p, struct x86_reg dst );
+void x87_fxtract( struct x86_function *p );
+void x87_fyl2x( struct x86_function *p );
+void x87_fyl2xp1( struct x86_function *p );
+void x87_fwait( struct x86_function *p );
+void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
+void x87_fucompp( struct x86_function *p );
+void x87_fucomp( struct x86_function *p, struct x86_reg arg );
+void x87_fucom( struct x86_function *p, struct x86_reg arg );
+
+
+
+/* Retreive a reference to one of the function arguments, taking into
+ * account any push/pop activity.  Note - doesn't track explict
+ * manipulation of ESP by other instructions.
+ */
+struct x86_reg x86_fn_arg( struct x86_function *p, GLuint arg );
+
+#endif
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse.c b/nx-X11/extras/Mesa/src/mesa/x86/sse.c
new file mode 100644
index 000000000..45852c4e1
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse.c
@@ -0,0 +1,123 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.0
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * PentiumIII-SIMD (SSE) optimizations contributed by
+ * Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+
+#include "sse.h"
+#include "common_x86_macros.h"
+
+#ifdef DEBUG
+#include "math/m_debug.h"
+#endif
+
+
+#ifdef USE_SSE_ASM
+DECLARE_XFORM_GROUP( sse, 2 )
+DECLARE_XFORM_GROUP( sse, 3 )
+
+#if 1
+/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
+extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
+
+extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS );
+extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS );
+/* XXX this function segfaults, see below */
+extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS );
+/* XXX this one works, see below */
+extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS );
+#else
+DECLARE_NORM_GROUP( sse )
+#endif
+
+
+extern void _ASMAPI
+_mesa_v16_sse_general_xform( GLfloat *first_vert,
+			     const GLfloat *m,
+			     const GLfloat *src,
+			     GLuint src_stride,
+			     GLuint count );
+
+extern void _ASMAPI
+_mesa_sse_project_vertices( GLfloat *first,
+			    GLfloat *last,
+			    const GLfloat *m,
+			    GLuint stride );
+
+extern void _ASMAPI
+_mesa_sse_project_clipped_vertices( GLfloat *first,
+				    GLfloat *last,
+				    const GLfloat *m,
+				    GLuint stride,
+				    const GLubyte *clipmask );
+#endif
+
+
+void _mesa_init_sse_transform_asm( void )
+{
+#ifdef USE_SSE_ASM
+   ASSIGN_XFORM_GROUP( sse, 2 );
+   ASSIGN_XFORM_GROUP( sse, 3 );
+
+#if 1
+   /* TODO: Finish these off.
+    */
+   _mesa_transform_tab[4][MATRIX_GENERAL] =
+      _mesa_sse_transform_points4_general;
+   _mesa_transform_tab[4][MATRIX_3D] =
+      _mesa_sse_transform_points4_3d;
+   /* XXX NOTE: _mesa_sse_transform_points4_identity segfaults with the
+      conformance tests, so use the x86 version.
+   */
+   _mesa_transform_tab[4][MATRIX_IDENTITY] =
+      _mesa_x86_transform_points4_identity;/*_mesa_sse_transform_points4_identity;*/
+
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
+      _mesa_sse_transform_normals_no_rot;
+   _mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
+      _mesa_sse_transform_rescale_normals;
+   _mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
+      _mesa_sse_transform_rescale_normals_no_rot;
+#else
+   ASSIGN_XFORM_GROUP( sse, 4 );
+
+   ASSIGN_NORM_GROUP( sse );
+#endif
+
+#ifdef DEBUG
+   _math_test_all_transform_functions( "SSE" );
+   _math_test_all_normal_transform_functions( "SSE" );
+#endif
+#endif
+}
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse.h b/nx-X11/extras/Mesa/src/mesa/x86/sse.h
new file mode 100644
index 000000000..447f192a8
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse.h
@@ -0,0 +1,39 @@
+/* $Id: sse.h,v 1.1.1.1 2004/06/16 09:19:36 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * PentiumIII-SIMD (SSE) optimizations contributed by
+ * Andre Werthmann <wertmann@cs.uni-potsdam.de>
+ */
+
+#ifndef __SSE_H__
+#define __SSE_H__
+
+#include "math/m_xform.h"
+
+void _mesa_init_sse_transform_asm( void );
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse_normal.S b/nx-X11/extras/Mesa/src/mesa/x86/sse_normal.S
new file mode 100644
index 000000000..5bff8921e
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse_normal.S
@@ -0,0 +1,257 @@
+/* $Id: sse_normal.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+
+#ifdef USE_SSE_ASM
+#include "matypes.h"
+#include "norm_args.h"
+
+   SEG_TEXT
+
+#define M(i)    REGOFF(i * 4, EDX)
+#define S(i)	REGOFF(i * 4, ESI)
+#define D(i)	REGOFF(i * 4, EDI)
+#define STRIDE  REGOFF(12, ESI)
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
+HIDDEN(_mesa_sse_transform_rescale_normals_no_rot)
+GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
+
+#define FRAME_OFFSET 8
+	PUSH_L  ( ESI )
+	PUSH_L  ( EDI )
+
+	MOV_L	( ARG_IN, ESI )				/* ptr to source GLvector3f */
+	MOV_L	( ARG_DEST, EDI )			/* ptr to dest GLvector3f */
+
+	MOV_L	( ARG_MAT, EDX )			/* ptr to matrix */
+	MOV_L	( REGOFF(MATRIX_INV, EDX), EDX)		/* matrix->inv */
+
+	MOV_L	( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L	( ECX, ECX )
+	JZ( LLBL(K_G3TRNNRR_finish) )			/* count was zero; go to finish */
+
+	MOV_L	( STRIDE, EAX )				/* stride */
+	MOV_L	( ECX, REGOFF(V4F_COUNT, EDI) )		/* set dest-count */
+
+	IMUL_L( CONST(16), ECX )			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI )		/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )		/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS	( M(0), XMM1 )				/* m0 */
+	MOVSS	( M(5), XMM2 )				/* m5 */
+	UNPCKLPS( XMM2, XMM1 )				/* m5 | m0 */
+	MOVSS	( ARG_SCALE, XMM0 )			/* scale */
+	SHUFPS	( CONST(0x0), XMM0, XMM0 )		/* scale | scale */
+	MULPS	( XMM0, XMM1 )				/* m5*scale | m0*scale */
+	MULSS	( M(10), XMM0 )				/* m10*scale */
+
+ALIGNTEXT32
+LLBL(K_G3TRNNRR_top):
+	MOVLPS	( S(0), XMM2 )				/* uy | ux */
+	MULPS	( XMM1, XMM2 )				/* uy*m5*scale | ux*m0*scale */
+	MOVLPS	( XMM2, D(0) )				/* ->D(1) | D(0) */
+
+	MOVSS	( S(2), XMM2 )				/* uz */
+	MULSS	( XMM0, XMM2 )				/* uz*m10*scale */
+	MOVSS	( XMM2, D(2) )				/* ->D(2) */
+
+LLBL(K_G3TRNNRR_skip):
+	ADD_L	( CONST(16), EDI )
+	ADD_L	( EAX, ESI )
+	CMP_L	( ECX, EDI )
+	JNE	( LLBL(K_G3TRNNRR_top) )
+
+LLBL(K_G3TRNNRR_finish):
+	POP_L	( EDI )
+	POP_L	( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
+HIDDEN(_mesa_sse_transform_rescale_normals)
+GLNAME(_mesa_sse_transform_rescale_normals):
+
+#define FRAME_OFFSET 8
+	PUSH_L  ( ESI )
+	PUSH_L  ( EDI )
+
+	MOV_L	( ARG_IN, ESI )				/* ptr to source GLvector3f */
+	MOV_L	( ARG_DEST, EDI )			/* ptr to dest GLvector3f */
+
+	MOV_L	( ARG_MAT, EDX )			/* ptr to matrix */
+	MOV_L	( REGOFF(MATRIX_INV, EDX), EDX)		/* matrix->inv */
+
+	MOV_L	( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L	( ECX, ECX )
+	JZ( LLBL(K_G3TRNR_finish) )			/* count was zero; go to finish */
+
+	MOV_L	( STRIDE, EAX )				/* stride */
+	MOV_L	( ECX, REGOFF(V4F_COUNT, EDI) )		/* set dest-count */
+
+	IMUL_L( CONST(16), ECX )			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI )		/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )		/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS	( M(0), XMM0 )				/* m0 */
+	MOVSS	( M(4), XMM1 )				/* m4 */
+	UNPCKLPS( XMM1, XMM0 )				/* m4 | m0 */
+
+	MOVSS	( ARG_SCALE, XMM4 )			/* scale */
+	SHUFPS	( CONST(0x0), XMM4, XMM4 )		/* scale | scale */
+
+	MULPS	( XMM4, XMM0 )				/* m4*scale | m0*scale */
+	MOVSS	( M(1), XMM1 )				/* m1 */
+	MOVSS	( M(5), XMM2 )				/* m5 */
+	UNPCKLPS( XMM2, XMM1 )				/* m5 | m1 */
+	MULPS	( XMM4, XMM1 )				/* m5*scale | m1*scale */
+	MOVSS	( M(2), XMM2 )				/* m2 */
+	MOVSS	( M(6), XMM3 )				/* m6 */
+	UNPCKLPS( XMM3, XMM2 )				/* m6 | m2 */
+	MULPS	( XMM4, XMM2 )				/* m6*scale | m2*scale */
+
+	MOVSS	( M(8), XMM6 )				/* m8 */
+	MULSS	( ARG_SCALE, XMM6 )			/* m8*scale */
+	MOVSS	( M(9), XMM7 )				/* m9 */
+	MULSS	( ARG_SCALE, XMM7 )			/* m9*scale */
+
+ALIGNTEXT32
+LLBL(K_G3TRNR_top):
+	MOVSS	( S(0), XMM3 )				/* ux */
+	SHUFPS	( CONST(0x0), XMM3, XMM3 )		/* ux | ux */
+	MULPS	( XMM0, XMM3 )				/* ux*m4 | ux*m0 */
+	MOVSS	( S(1), XMM4 )				/* uy */
+	SHUFPS	( CONST(0x0), XMM4, XMM4 )		/* uy | uy */
+	MULPS	( XMM1, XMM4 )				/* uy*m5 | uy*m1 */
+	MOVSS	( S(2), XMM5 )				/* uz */
+	SHUFPS	( CONST(0x0), XMM5, XMM5 )		/* uz | uz */
+	MULPS	( XMM2, XMM5 )				/* uz*m6 | uz*m2 */
+
+	ADDPS	( XMM4, XMM3 )
+	ADDPS	( XMM5, XMM3 )
+	MOVLPS	( XMM3, D(0) )
+
+	MOVSS	( M(10), XMM3 )				/* m10 */
+	MULSS	( ARG_SCALE, XMM3 )			/* m10*scale */
+	MULSS	( S(2), XMM3 )				/* m10*scale*uz */
+	MOVSS	( S(1), XMM4 )				/* uy */
+	MULSS	( XMM7, XMM4 )				/* uy*m9*scale */
+	MOVSS	( S(0), XMM5 )				/* ux */
+	MULSS	( XMM6, XMM5 )				/* ux*m8*scale */
+
+	ADDSS	( XMM4, XMM3 )
+	ADDSS	( XMM5, XMM3 )
+	MOVSS	( XMM3, D(2) )
+
+LLBL(K_G3TRNR_skip):
+	ADD_L	( CONST(16), EDI )
+	ADD_L	( EAX, ESI )
+	CMP_L	( ECX, EDI )
+	JNE	( LLBL(K_G3TRNR_top) )
+
+LLBL(K_G3TRNR_finish):
+	POP_L	( EDI )
+	POP_L	( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
+HIDDEN(_mesa_sse_transform_normals_no_rot)
+GLNAME(_mesa_sse_transform_normals_no_rot):
+
+#define FRAME_OFFSET 8
+	PUSH_L  ( ESI )
+	PUSH_L  ( EDI )
+
+	MOV_L	( ARG_IN, ESI )				/* ptr to source GLvector3f */
+	MOV_L	( ARG_DEST, EDI )			/* ptr to dest GLvector3f */
+
+	MOV_L	( ARG_MAT, EDX )			/* ptr to matrix */
+	MOV_L	( REGOFF(MATRIX_INV, EDX), EDX)		/* matrix->inv */
+
+	MOV_L	( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L	( ECX, ECX )
+	JZ( LLBL(K_G3TNNRR_finish) )			/* count was zero; go to finish */
+
+	MOV_L	( STRIDE, EAX )				/* stride */
+	MOV_L	( ECX, REGOFF(V4F_COUNT, EDI) )		/* set dest-count */
+
+	IMUL_L( CONST(16), ECX )			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI )		/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )		/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS( M(0), XMM0 )				/* m0 */
+	MOVSS( M(5), XMM1 )				/* m5 */
+	UNPCKLPS( XMM1, XMM0 )				/* m5 | m0 */
+	MOVSS( M(10), XMM1 )				/* m10 */
+
+ALIGNTEXT32
+LLBL(K_G3TNNRR_top):
+	MOVLPS( S(0), XMM2 )				/* uy | ux */
+	MULPS( XMM0, XMM2 )				/* uy*m5 | ux*m0 */
+	MOVLPS( XMM2, D(0) )
+
+	MOVSS( S(2), XMM2 )				/* uz */
+	MULSS( XMM1, XMM2 )				/* uz*m10 */
+	MOVSS( XMM2, D(2) )
+
+LLBL(K_G3TNNRR_skip):
+	ADD_L	( CONST(16), EDI )
+	ADD_L	( EAX, ESI )
+	CMP_L	( ECX, EDI )
+	JNE	( LLBL(K_G3TNNRR_top) )
+
+LLBL(K_G3TNNRR_finish):
+	POP_L	( EDI )
+	POP_L	( ESI )
+	RET
+#undef FRAME_OFFSET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse_xform1.S b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform1.S
new file mode 100644
index 000000000..17a61b410
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform1.S
@@ -0,0 +1,442 @@
+/* $Id: sse_xform1.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+
+#ifdef USE_SSE_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+   SEG_TEXT
+
+#define S(i) 	REGOFF(i * 4, ESI)
+#define D(i) 	REGOFF(i * 4, EDI)
+#define M(i) 	REGOFF(i * 4, EDX)
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_general)
+HIDDEN( _mesa_sse_transform_points1_general )
+GLNAME( _mesa_sse_transform_points1_general ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    CMP_L( CONST(0), ECX )			/* count == 0 ? */
+    JE( LLBL(K_GTP1GR_finish) )			/* yes -> nothing to do. */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    MOVAPS( M(0), XMM0 )			/* m3  | m2  | m1  | m0  */
+    MOVAPS( M(12), XMM1 )			/* m15 | m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP1GR_top):
+    MOVSS( S(0), XMM2 )				/* ox */
+    SHUFPS( CONST(0x0), XMM2, XMM2 )		/* ox | ox | ox | ox */
+    MULPS( XMM0, XMM2 )				/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+    ADDPS( XMM1, XMM2 )				/* + | + | + | + */
+    MOVUPS( XMM2, D(0) )
+
+LLBL(K_GTP1GR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP1GR_top) )
+
+LLBL(K_GTP1GR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_identity)
+HIDDEN(_mesa_sse_transform_points1_identity)
+GLNAME( _mesa_sse_transform_points1_identity ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP1IR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTP1IR_finish) )
+
+
+ALIGNTEXT32
+LLBL(K_GTP1IR_top):
+    MOV_L( S(0), EDX )
+    MOV_L( EDX, D(0) )
+
+LLBL(K_GTP1IR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP1IR_top) )
+
+LLBL(K_GTP1IR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points1_3d_no_rot)
+GLNAME(_mesa_sse_transform_points1_3d_no_rot):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13DNRR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    MOVSS( M(0), XMM0 )				/* m0 */
+    MOVSS( M(12), XMM1 )			/* m12 */
+    MOVSS( M(13), XMM2 )			/* m13 */
+    MOVSS( M(14), XMM3 )			/* m14 */
+
+ALIGNTEXT32
+LLBL(K_GTP13DNRR_top):
+    MOVSS( S(0), XMM4 )				/* ox */
+    MULSS( XMM0, XMM4 )				/* ox*m0 */
+    ADDSS( XMM1, XMM4 )				/* ox*m0+m12 */
+    MOVSS( XMM4, D(0) )
+
+    MOVSS( XMM2, D(1) )
+    MOVSS( XMM3, D(2) )
+
+LLBL(K_GTP13DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP13DNRR_top) )
+
+LLBL(K_GTP13DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
+HIDDEN(_mesa_sse_transform_points1_perspective)
+GLNAME(_mesa_sse_transform_points1_perspective):
+
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13PR_finish) )		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    XORPS( XMM0, XMM0 )				/* 0 | 0 | 0 | 0 */
+    MOVSS( M(0), XMM1 )				/* m0 */
+    MOVSS( M(14), XMM2 )			/* m14 */
+
+ALIGNTEXT32
+LLBL(K_GTP13PR_top):
+    MOVSS( S(0), XMM3 )				/* ox */
+    MULSS( XMM1, XMM3 )				/* ox*m0 */
+    MOVSS( XMM3, D(0) )				/* ox*m0->D(0) */
+    MOVSS( XMM2, D(2) )				/* m14->D(2) */
+
+    MOVSS( XMM0, D(1) )
+    MOVSS( XMM0, D(3) )
+
+LLBL(K_GTP13PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP13PR_top) )
+
+LLBL(K_GTP13PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_2d)
+HIDDEN(_mesa_sse_transform_points1_2d)
+GLNAME(_mesa_sse_transform_points1_2d):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP13P2DR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )			/* m1  | m0  */
+    MOVLPS( M(12), XMM1 )			/* m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP13P2DR_top):
+    MOVSS( S(0), XMM2 )				/* ox */
+    SHUFPS( CONST(0x0), XMM2, XMM2 )		/* ox | ox | ox | ox */
+    MULPS( XMM0, XMM2 )				/* - | - | ox*m1 | ox*m0 */
+    ADDPS( XMM1, XMM2 )				/* - | - | ox*m1+m13 | ox*m0+m12 */
+    MOVLPS( XMM2, D(0) )
+
+LLBL(K_GTP13P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP13P2DR_top) )
+
+LLBL(K_GTP13P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points1_2d_no_rot)
+GLNAME(_mesa_sse_transform_points1_2d_no_rot):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP13P2DNRR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS( M(0), XMM0 )			/* m0 */
+	MOVSS( M(12), XMM1 )			/* m12 */
+	MOVSS( M(13), XMM2 )			/* m13 */
+
+ALIGNTEXT32
+LLBL(K_GTP13P2DNRR_top):
+	MOVSS( S(0), XMM3 )			/* ox */
+	MULSS( XMM0, XMM3 )			/* ox*m0 */
+	ADDSS( XMM1, XMM3 )			/* ox*m0+m12 */
+	MOVSS( XMM3, D(0) )
+	MOVSS( XMM2, D(1) )
+
+LLBL(K_GTP13P2DNRR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP13P2DNRR_top) )
+
+LLBL(K_GTP13P2DNRR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points1_3d)
+HIDDEN(_mesa_sse_transform_points1_3d)
+GLNAME(_mesa_sse_transform_points1_3d):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP13P3DR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+
+ALIGNTEXT32
+	MOVAPS( M(0), XMM0 )			/* m3  | m2  | m1  |  m0 */
+	MOVAPS( M(12), XMM1 )			/* m15 | m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP13P3DR_top):
+	MOVSS( S(0), XMM2 )			/* ox */
+	SHUFPS( CONST(0x0), XMM2, XMM2 )	/* ox | ox | ox | ox */
+	MULPS( XMM0, XMM2 )			/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+	ADDPS( XMM1, XMM2 )			/* +m15  | +m14  | +m13  | +m12  */
+	MOVLPS( XMM2, D(0) )			/*   -   |   -   | ->D(1)| ->D(0)*/
+	UNPCKHPS( XMM2, XMM2 )			/* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
+	MOVSS( XMM2, D(2) )
+
+LLBL(K_GTP13P3DR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP13P3DR_top) )
+
+LLBL(K_GTP13P3DR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse_xform2.S b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform2.S
new file mode 100644
index 000000000..97c7e5d77
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform2.S
@@ -0,0 +1,461 @@
+/* $Id: sse_xform2.S,v 1.1.1.2 2005/07/31 16:46:40 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+
+#ifdef USE_SSE_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+   SEG_TEXT
+
+#define S(i) 	REGOFF(i * 4, ESI)
+#define D(i) 	REGOFF(i * 4, EDI)
+#define M(i) 	REGOFF(i * 4, EDX)
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_general)
+HIDDEN (_mesa_sse_transform_points2_general)
+GLNAME( _mesa_sse_transform_points2_general ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX )
+    JZ( LLBL(K_GTP2GR_finish) )			/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVAPS( M(0), XMM0 )			/* m3  | m2  | m1  | m0 */
+    MOVAPS( M(4), XMM1 )			/* m7  | m6  | m5  | m4 */
+    MOVAPS( M(12), XMM2 )			/* m15 | m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP2GR_top):
+    MOVSS( S(0), XMM3 )				/* ox */
+    SHUFPS( CONST(0x0), XMM3, XMM3 )		/* ox | ox | ox | ox */
+    MULPS( XMM0, XMM3 )				/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+    MOVSS( S(1), XMM4 )				/* oy */
+    SHUFPS( CONST(0x0), XMM4, XMM4 )		/* oy | oy | oy | oy */
+    MULPS( XMM1, XMM4 )				/* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+
+    ADDPS( XMM4, XMM3 )
+    ADDPS( XMM2, XMM3 )
+    MOVAPS( XMM3, D(0) )
+
+LLBL(K_GTP2GR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP2GR_top) )
+
+LLBL(K_GTP2GR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_identity)
+HIDDEN(_mesa_sse_transform_points2_identity)
+GLNAME( _mesa_sse_transform_points2_identity ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP2IR_finish) )			/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTP2IR_finish) )
+
+
+ALIGNTEXT32
+LLBL(K_GTP2IR_top):
+    MOV_L     ( S(0), EDX )
+    MOV_L     ( EDX, D(0) )
+    MOV_L     ( S(1), EDX )
+    MOV_L     ( EDX, D(1) )
+
+LLBL(K_GTP2IR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTP2IR_top) )
+
+LLBL(K_GTP2IR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points2_3d_no_rot)
+GLNAME(_mesa_sse_transform_points2_3d_no_rot):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23DNRR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )			/* - | - |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )			/* - | - |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )			/* - | - | m5  | m0  */
+    MOVLPS   ( M(12), XMM2 )			/* - | - | m13 | m12 */
+    MOVSS    ( M(14), XMM3 )			/* - | - |  -  | m14 */
+
+ALIGNTEXT32
+LLBL(K_GTP23DNRR_top):
+    MOVLPS   ( S(0), XMM0 )			/* - | - |  oy   | ox */
+    MULPS    ( XMM1, XMM0 )			/* - | - | oy*m5 | ox*m0 */
+    ADDPS    ( XMM2, XMM0 )			/* - | - | +m13  | +m12 */
+    MOVLPS   ( XMM0, D(0) )			/* -> D(1) | -> D(0) */
+
+    MOVSS    ( XMM3, D(2) )			/* -> D(2) */
+
+LLBL(K_GTP23DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP23DNRR_top) )
+
+LLBL(K_GTP23DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
+HIDDEN(_mesa_sse_transform_points2_perspective)
+GLNAME(_mesa_sse_transform_points2_perspective):
+
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI )	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23PR_finish) )		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )			/* -  | -  |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )			/* -  | -  |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )			/* -  | -  | m5  | m0  */
+    MOVSS    ( M(14), XMM3 )			/* m14 */
+    XORPS    ( XMM0, XMM0 )			/* 0 | 0 | 0 | 0 */
+
+ALIGNTEXT32
+LLBL(K_GTP23PR_top):
+    MOVLPS( S(0), XMM4 )			/* oy | ox */
+    MULPS( XMM1, XMM4 )				/* oy*m5 | ox*m0 */
+    MOVLPS( XMM4, D(0) )			/* ->D(1) | ->D(0) */
+    MOVSS( XMM3, D(2) )				/* ->D(2) */
+    MOVSS( XMM0, D(3) )				/* ->D(3) */
+
+LLBL(K_GTP23PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP23PR_top) )
+
+LLBL(K_GTP23PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_2d)
+HIDDEN(_mesa_sse_transform_points2_2d)
+GLNAME(_mesa_sse_transform_points2_2d):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP23P2DR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )			/* m1  | m0 */
+    MOVLPS( M(4), XMM1 )			/* m5  | m4 */
+    MOVLPS( M(12), XMM2 )			/* m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP23P2DR_top):
+    MOVSS( S(0), XMM3 )				/* ox */
+    SHUFPS( CONST(0x0), XMM3, XMM3 )		/* ox | ox */
+    MULPS( XMM0, XMM3 )				/* ox*m1 | ox*m0 */
+
+    MOVSS( S(1), XMM4 )				/* oy */
+    SHUFPS( CONST(0x0), XMM4, XMM4 )		/* oy | oy */
+    MULPS( XMM1, XMM4 )				/* oy*m5 | oy*m4 */
+
+    ADDPS( XMM4, XMM3 )
+    ADDPS( XMM2, XMM3 )
+    MOVLPS( XMM3, D(0) )			/* ->D(1) | ->D(0) */
+
+LLBL(K_GTP23P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP23P2DR_top) )
+
+LLBL(K_GTP23P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points2_2d_no_rot)
+GLNAME(_mesa_sse_transform_points2_2d_no_rot):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP23P2DNRR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS    ( M(0), XMM1 )			/* m0 */
+	MOVSS    ( M(5), XMM2 )			/* m5 */
+	UNPCKLPS ( XMM2, XMM1 )			/* m5 | m0 */
+	MOVLPS   ( M(12), XMM2 )		/* m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP23P2DNRR_top):
+	MOVLPS( S(0), XMM0 )			/* oy | ox */
+	MULPS( XMM1, XMM0 )			/* oy*m5 | ox*m0 */
+	ADDPS( XMM2, XMM0 )			/* +m13 | +m12 */
+	MOVLPS( XMM0, D(0) )			/* ->D(1) | ->D(0) */
+
+LLBL(K_GTP23P2DNRR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP23P2DNRR_top) )
+
+LLBL(K_GTP23P2DNRR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points2_3d)
+HIDDEN(_mesa_sse_transform_points2_3d)
+GLNAME(_mesa_sse_transform_points2_3d):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP23P3DR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVAPS( M(0), XMM0 )			/* m2  | m1  | m0 */
+	MOVAPS( M(4), XMM1 )			/* m6  | m5  | m4 */
+	MOVAPS( M(12), XMM2 )			/* m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP23P3DR_top):
+	MOVSS( S(0), XMM3 )			/* ox */
+	SHUFPS( CONST(0x0), XMM3, XMM3 )	/* ox | ox | ox */
+	MULPS( XMM0, XMM3 )			/* ox*m2 | ox*m1 | ox*m0 */
+
+	MOVSS( S(1), XMM4 )			/* oy */
+	SHUFPS( CONST(0x0), XMM4, XMM4 )	/* oy | oy | oy */
+	MULPS( XMM1, XMM4 )			/* oy*m6 | oy*m5 | oy*m4 */
+
+	ADDPS( XMM4, XMM3 )
+	ADDPS( XMM2, XMM3 )
+
+	MOVLPS( XMM3, D(0) )			/* ->D(1) | ->D(0) */
+	UNPCKHPS( XMM3, XMM3 )
+	MOVSS( XMM3, D(2) )			/* ->D(2) */
+
+LLBL(K_GTP23P3DR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP23P3DR_top) )
+
+LLBL(K_GTP23P3DR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse_xform3.S b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform3.S
new file mode 100644
index 000000000..0449d6858
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform3.S
@@ -0,0 +1,507 @@
+/* $Id: sse_xform3.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** TODO:
+  * - insert PREFETCH instructions to avoid cache-misses !
+  * - some more optimizations are possible...
+  * - for 40-50% more performance in the SSE-functions, the
+  *   data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
+  */
+
+#ifdef USE_SSE_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+   SEG_TEXT
+
+#define S(i) 	REGOFF(i * 4, ESI)
+#define D(i) 	REGOFF(i * 4, EDI)
+#define M(i) 	REGOFF(i * 4, EDX)
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_general)
+HIDDEN(_mesa_sse_transform_points3_general)
+GLNAME( _mesa_sse_transform_points3_general ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    CMP_L     ( CONST(0), ECX )			/* count == 0 ? */
+    JE        ( LLBL(K_GTPGR_finish) )		/* yes -> nothing to do. */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    MOVAPS    ( REGOFF(0, EDX), XMM0 )	/* m0  | m1  | m2  | m3 */
+    MOVAPS    ( REGOFF(16, EDX), XMM1 )	/* m4  | m5  | m6  | m7 */
+    MOVAPS    ( REGOFF(32, EDX), XMM2 )	/* m8  | m9  | m10 | m11 */
+    MOVAPS    ( REGOFF(48, EDX), XMM3 )	/* m12 | m13 | m14 | m15 */
+
+
+ALIGNTEXT32
+LLBL(K_GTPGR_top):
+    MOVSS     ( REGOFF(0, ESI), XMM4 )		/*    |    |    | ox */
+    SHUFPS    ( CONST(0x0), XMM4, XMM4 )	/* ox | ox | ox | ox */
+    MOVSS     ( REGOFF(4, ESI), XMM5 )		/*    |    |    | oy */
+    SHUFPS    ( CONST(0x0), XMM5, XMM5 )	/* oy | oy | oy | oy */
+    MOVSS     ( REGOFF(8, ESI), XMM6 )		/*    |    |    | oz */
+    SHUFPS    ( CONST(0x0), XMM6, XMM6 )	/* oz | oz | oz | oz */
+
+    MULPS     ( XMM0, XMM4 )		/* m3*ox  | m2*ox  | m1*ox | m0*ox */
+    MULPS     ( XMM1, XMM5 )		/* m7*oy  | m6*oy  | m5*oy | m4*oy */
+    MULPS     ( XMM2, XMM6 )		/* m11*oz | m10*oz | m9*oz | m8*oz */
+
+    ADDPS     ( XMM5, XMM4 )
+    ADDPS     ( XMM6, XMM4 )
+    ADDPS     ( XMM3, XMM4 )
+
+    MOVAPS    ( XMM4, REGOFF(0, EDI) )
+
+LLBL(K_GTPGR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTPGR_top) )
+
+LLBL(K_GTPGR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_identity)
+HIDDEN(_mesa_sse_transform_points3_identity)
+GLNAME( _mesa_sse_transform_points3_identity ):
+
+#define FRAME_OFFSET 8
+    PUSH_L    ( ESI )
+    PUSH_L    ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTPIR_finish) ) 			/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+    CMP_L( ESI, EDI )
+    JE( LLBL(K_GTPIR_finish) )
+
+
+ALIGNTEXT32
+LLBL(K_GTPIR_top):
+    MOVLPS    ( S(0), XMM0 )
+    MOVLPS    ( XMM0, D(0) )
+    MOVSS     ( S(2), XMM0 )
+    MOVSS     ( XMM0, D(2) )
+
+LLBL(K_GTPIR_skip):
+    ADD_L     ( CONST(16), EDI )
+    ADD_L     ( EAX, ESI )
+    CMP_L     ( ECX, EDI )
+    JNE       ( LLBL(K_GTPIR_top) )
+
+LLBL(K_GTPIR_finish):
+    POP_L     ( EDI )
+    POP_L     ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
+HIDDEN(_mesa_sse_transform_points3_3d_no_rot)
+GLNAME(_mesa_sse_transform_points3_3d_no_rot):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3DNRR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )			/* - | - |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )			/* - | - |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )			/* - | - | m5  | m0  */
+    MOVLPS   ( M(12), XMM2 )			/* - | - | m13 | m12 */
+    MOVSS    ( M(10), XMM3 )			/* - | - |  -  | m10 */
+    MOVSS    ( M(14), XMM4 )			/* - | - |  -  | m14 */
+
+ALIGNTEXT32
+LLBL(K_GTP3DNRR_top):
+
+    MOVLPS   ( S(0), XMM0 )			/* - | - |  s1   | s0 */
+    MULPS    ( XMM1, XMM0 )			/* - | - | s1*m5 | s0*m0 */
+    ADDPS    ( XMM2, XMM0 )			/* - | - | +m13  | +m12 */
+    MOVLPS   ( XMM0, D(0) )			/* -> D(1) | -> D(0) */
+
+    MOVSS    ( S(2), XMM0 )			/* sz */
+    MULSS    ( XMM3, XMM0 )			/* sz*m10 */
+    ADDSS    ( XMM4, XMM0 )			/* +m14 */
+    MOVSS    ( XMM0, D(2) )			/* -> D(2) */
+
+LLBL(K_GTP3DNRR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP3DNRR_top) )
+
+LLBL(K_GTP3DNRR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
+HIDDEN(_mesa_sse_transform_points3_perspective)
+GLNAME(_mesa_sse_transform_points3_perspective):
+
+#define FRAME_OFFSET 8
+    PUSH_L   ( ESI )
+    PUSH_L   ( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3PR_finish) )			/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVSS    ( M(0), XMM1 )			/* -  | -  |  -  | m0  */
+    MOVSS    ( M(5), XMM2 )			/* -  | -  |  -  | m5  */
+    UNPCKLPS ( XMM2, XMM1 )			/* -  | -  | m5  | m0  */
+    MOVLPS   ( M(8), XMM2 )			/* -  | -  | m9  | m8  */
+    MOVSS    ( M(10), XMM3 )			/* m10 */
+    MOVSS    ( M(14), XMM4 )			/* m14 */
+    XORPS    ( XMM6, XMM6 )			/* 0 */
+
+ALIGNTEXT32
+LLBL(K_GTP3PR_top):
+    MOVLPS   ( S(0), XMM0 )			/* oy | ox */
+    MULPS    ( XMM1, XMM0 )			/* oy*m5 | ox*m0 */
+    MOVSS    ( S(2), XMM5 )			/* oz */
+    SHUFPS   ( CONST(0x0), XMM5, XMM5 )		/* oz | oz */
+    MULPS    ( XMM2, XMM5 )			/* oz*m9 | oz*m8 */
+    ADDPS    ( XMM5, XMM0 )			/* +oy*m5 | +ox*m0 */
+    MOVLPS   ( XMM0, D(0) )			/* ->D(1) | ->D(0) */
+
+    MOVSS    ( S(2), XMM0 )			/* oz */
+    MULSS    ( XMM3, XMM0 )			/* oz*m10 */
+    ADDSS    ( XMM4, XMM0 )			/* +m14 */
+    MOVSS    ( XMM0, D(2) )			/* ->D(2) */
+
+    MOVSS    ( S(2), XMM0 )			/* oz */
+    MOVSS    ( XMM6, XMM5 )			/* 0 */
+    SUBPS    ( XMM0, XMM5 )			/* -oz */
+    MOVSS    ( XMM5, D(3) )			/* ->D(3) */
+
+LLBL(K_GTP3PR_skip):
+    ADD_L( CONST(16), EDI )
+    ADD_L( EAX, ESI )
+    CMP_L( ECX, EDI )
+    JNE( LLBL(K_GTP3PR_top) )
+
+LLBL(K_GTP3PR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_2d)
+HIDDEN(_mesa_sse_transform_points3_2d)
+GLNAME(_mesa_sse_transform_points3_2d):
+
+#define FRAME_OFFSET 8
+    PUSH_L( ESI )
+    PUSH_L( EDI )
+
+    MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+    MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+    MOV_L( ARG_MATRIX, EDX ) 			/* ptr to matrix */
+    MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+    TEST_L( ECX, ECX)
+    JZ( LLBL(K_GTP3P2DR_finish) ) 		/* count was zero; go to finish */
+
+    MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+    OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+    MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+    MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+    SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+    MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+    MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+    ADD_L( EDI, ECX ) 				/* count += dest ptr */
+
+ALIGNTEXT32
+    MOVLPS( M(0), XMM0 )			/* m1  | m0 */
+    MOVLPS( M(4), XMM1 )			/* m5  | m4 */
+    MOVLPS( M(12), XMM2 )			/* m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP3P2DR_top):
+    MOVSS    ( S(0), XMM3 )			/* ox */
+    SHUFPS   ( CONST(0x0), XMM3, XMM3 )		/* ox | ox */
+    MULPS    ( XMM0, XMM3 )			/* ox*m1 | ox*m0 */
+    MOVSS    ( S(1), XMM4 )			/* oy */
+    SHUFPS   ( CONST(0x0), XMM4, XMM4 )		/* oy | oy */
+    MULPS    ( XMM1, XMM4 )			/* oy*m5 | oy*m4 */
+
+    ADDPS    ( XMM4, XMM3 )
+    ADDPS    ( XMM2, XMM3 )
+    MOVLPS   ( XMM3, D(0) )
+
+    MOVSS    ( S(2), XMM3 )
+    MOVSS    ( XMM3, D(2) )
+
+LLBL(K_GTP3P2DR_skip):
+    ADD_L    ( CONST(16), EDI )
+    ADD_L    ( EAX, ESI )
+    CMP_L    ( ECX, EDI )
+    JNE      ( LLBL(K_GTP3P2DR_top) )
+
+LLBL(K_GTP3P2DR_finish):
+    POP_L    ( EDI )
+    POP_L    ( ESI )
+    RET
+#undef FRAME_OFFSET
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
+HIDDEN(_mesa_sse_transform_points3_2d_no_rot)
+GLNAME(_mesa_sse_transform_points3_2d_no_rot):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP3P2DNRR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+ALIGNTEXT32
+	MOVSS    ( M(0), XMM1 )			/* m0 */
+	MOVSS    ( M(5), XMM2 )			/* m5 */
+	UNPCKLPS ( XMM2, XMM1 )			/* m5 | m0 */
+	MOVLPS   ( M(12), XMM2 )		/* m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP3P2DNRR_top):
+	MOVLPS( S(0), XMM0 )			/* oy | ox */
+	MULPS( XMM1, XMM0 )			/* oy*m5 | ox*m0 */
+	ADDPS( XMM2, XMM0 )			/* +m13 | +m12 */
+	MOVLPS( XMM0, D(0) )			/* ->D(1) | ->D(0) */
+
+	MOVSS( S(2), XMM0 )
+	MOVSS( XMM0, D(2) )
+
+LLBL(K_GTP3P2DNRR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP3P2DNRR_top) )
+
+LLBL(K_GTP3P2DNRR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME(_mesa_sse_transform_points3_3d)
+HIDDEN(_mesa_sse_transform_points3_3d)
+GLNAME(_mesa_sse_transform_points3_3d):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) 	/* ptr to source GLvector4f */
+	MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) 	/* ptr to dest GLvector4f */
+
+
+	MOV_L( ARG_MATRIX, EDX ) 		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) 	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP3P3DR_finish) ) 	/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) 	/* stride */
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) 	/* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) 	/* set dest count */
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) 	/* set dest size */
+
+	SHL_L( CONST(4), ECX ) 			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI ) 	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI ) 	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX ) 			/* count += dest ptr */
+
+
+ALIGNTEXT32
+	MOVAPS( M(0), XMM0 )			/* m2  | m1  | m0 */
+	MOVAPS( M(4), XMM1 )			/* m6  | m5  | m4 */
+	MOVAPS( M(8), XMM2 )			/* m10 | m9  | m8 */
+	MOVAPS( M(12), XMM3 )			/* m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL(K_GTP3P3DR_top):
+	MOVSS( S(0), XMM4 )
+	SHUFPS( CONST(0x0), XMM4, XMM4 )	/* ox | ox | ox */
+	MULPS( XMM0, XMM4 )			/* ox*m2 | ox*m1 | ox*m0 */
+
+	MOVSS( S(1), XMM5 )
+	SHUFPS( CONST(0x0), XMM5, XMM5 )	/* oy | oy | oy */
+	MULPS( XMM1, XMM5 )			/* oy*m6 | oy*m5 | oy*m4 */
+
+	MOVSS( S(2), XMM6 )
+	SHUFPS( CONST(0x0), XMM6, XMM6 )	/* oz | oz | oz */
+	MULPS( XMM2, XMM6 )			/* oz*m10 | oz*m9 | oz*m8 */
+
+	ADDPS( XMM5, XMM4 )			/* + | + | + */
+	ADDPS( XMM6, XMM4 )			/* + | + | + */
+	ADDPS( XMM3, XMM4 )			/* + | + | + */
+
+	MOVLPS( XMM4, D(0) )			/* => D(1) | => D(0) */
+	UNPCKHPS( XMM4, XMM4 )
+	MOVSS( XMM4, D(2) )
+
+LLBL(K_GTP3P3DR_skip):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP3P3DR_top) )
+
+LLBL(K_GTP3P3DR_finish):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/sse_xform4.S b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform4.S
new file mode 100644
index 000000000..2ac644ac7
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/sse_xform4.S
@@ -0,0 +1,231 @@
+/* $Id: sse_xform4.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef USE_SSE_ASM
+#include "matypes.h"
+#include "xform_args.h"
+
+	SEG_TEXT
+
+#define FRAME_OFFSET	8
+
+#define SRC(i)		REGOFF(i * 4, ESI)
+#define DST(i)		REGOFF(i * 4, EDI)
+#define MAT(i)		REGOFF(i * 4, EDX)
+
+#define SELECT(r0, r1, r2, r3)	CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_sse_transform_points4_general )
+HIDDEN(_mesa_sse_transform_points4_general)
+GLNAME( _mesa_sse_transform_points4_general ):
+
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )			/* verify non-zero count */
+	JE( LLBL( sse_general_done ) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )	/* stride */
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )	/* set dest count */
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+
+	MOV_L( REGOFF(V4F_START, ESI), ESI )	/* ptr to first source vertex */
+	MOV_L( REGOFF(V4F_START, EDI), EDI )	/* ptr to first dest vertex */
+
+	PREFETCHT0( REGIND(ESI) )
+
+	MOVAPS( MAT(0), XMM4 )			/* m3  | m2  | m1  | m0  */
+	MOVAPS( MAT(4), XMM5 )			/* m7  | m6  | m5  | m4  */
+	MOVAPS( MAT(8), XMM6 )			/* m11 | m10 | m9  | m8  */
+	MOVAPS( MAT(12), XMM7 )			/* m15 | m14 | m13 | m12 */
+
+ALIGNTEXT16
+LLBL( sse_general_loop ):
+
+	MOVSS( SRC(0), XMM0 )			/* ox */
+	SHUFPS( CONST(0x0), XMM0, XMM0 )	/* ox | ox | ox | ox */
+	MULPS( XMM4, XMM0 )			/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+
+	MOVSS( SRC(1), XMM1 )			/* oy */
+	SHUFPS( CONST(0x0), XMM1, XMM1 )	/* oy | oy | oy | oy */
+	MULPS( XMM5, XMM1 )			/* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+
+	MOVSS( SRC(2), XMM2 )			/* oz */
+	SHUFPS( CONST(0x0), XMM2, XMM2 )	/* oz | oz | oz | oz */
+	MULPS( XMM6, XMM2 )			/* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
+
+	MOVSS( SRC(3), XMM3 )			/* ow */
+	SHUFPS( CONST(0x0), XMM3, XMM3 )	/* ow | ow | ow | ow */
+	MULPS( XMM7, XMM3 )			/* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
+
+	ADDPS( XMM1, XMM0 )			/* ox*m3+oy*m7 | ... */
+	ADDPS( XMM2, XMM0 )			/* ox*m3+oy*m7+oz*m11 | ... */
+	ADDPS( XMM3, XMM0 )			/* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
+	MOVAPS( XMM0, DST(0) )			/* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+
+	DEC_L( ECX )
+	JNZ( LLBL( sse_general_loop ) )
+
+LLBL( sse_general_done ):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_sse_transform_points4_3d )
+HIDDEN(_mesa_sse_transform_points4_3d)
+GLNAME( _mesa_sse_transform_points4_3d ):
+
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )		/* ptr to source GLvector4f */
+	MOV_L( ARG_DEST, EDI )			/* ptr to dest GLvector4f */
+
+	MOV_L( ARG_MATRIX, EDX )		/* ptr to matrix */
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )	/* source count */
+
+	TEST_L( ECX, ECX)
+	JZ( LLBL(K_GTP43P3DR_finish) )		/* count was zero; go to finish */
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )	/* stride */
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )	/* set dest count */
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+
+	SHL_L( CONST(4), ECX )			/* count *= 16 */
+	MOV_L( REGOFF(V4F_START, ESI), ESI )	/* ptr to first source vertex */
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )	/* ptr to first dest vertex */
+	ADD_L( EDI, ECX )			/* count += dest ptr */
+
+	MOVAPS( MAT(0), XMM0 )			/* m3  | m2  | m1  |  m0 */
+	MOVAPS( MAT(4), XMM1 )			/* m7  | m6  | m5  |  m4 */
+	MOVAPS( MAT(8), XMM2 )			/* m11 | m10 | m9  |  m8 */
+	MOVAPS( MAT(12), XMM3 )			/* m15 | m14 | m13 | m12 */
+
+ALIGNTEXT32
+LLBL( K_GTP43P3DR_top ):
+	MOVSS( SRC(0), XMM4 )			/* ox */
+	SHUFPS( CONST(0x0), XMM4, XMM4 )	/* ox | ox | ox | ox */
+	MULPS( XMM0, XMM4 )			/* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
+
+	MOVSS( SRC(1), XMM5 )			/* oy */
+	SHUFPS( CONST(0x0), XMM5, XMM5 )	/* oy | oy | oy | oy */
+	MULPS( XMM1, XMM5 )			/* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
+
+	MOVSS( SRC(2), XMM6 )			/* oz */
+	SHUFPS( CONST(0x0), XMM6, XMM6 )	/* oz | oz | oz | oz */
+	MULPS( XMM2, XMM6 )			/* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
+
+	MOVSS( SRC(3), XMM7 )			/* ow */
+	SHUFPS( CONST(0x0), XMM7, XMM7 )	/* ow | ow | ow | ow */
+	MULPS( XMM3, XMM7 )			/* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
+
+	ADDPS( XMM5, XMM4 )			/* ox*m3+oy*m7 | ... */
+	ADDPS( XMM6, XMM4 )			/* ox*m3+oy*m7+oz*m11 | ... */
+	ADDPS( XMM7, XMM4 )			/* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
+	MOVAPS( XMM4, DST(0) )			/* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
+
+	MOVSS( SRC(3), XMM4 )			/* ow */
+	MOVSS( XMM4, DST(3) )			/* ->D(3) */
+
+LLBL( K_GTP43P3DR_skip ):
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(K_GTP43P3DR_top) )
+
+LLBL( K_GTP43P3DR_finish ):
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_sse_transform_points4_identity )
+HIDDEN(_mesa_sse_transform_points4_identity)
+GLNAME( _mesa_sse_transform_points4_identity ):
+
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )			/* verify non-zero count */
+	JE( LLBL( sse_identity_done ) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )	/* stride */
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )	/* set dest count */
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
+
+	MOV_L( REGOFF(V4F_START, ESI), ESI )	/* ptr to first source vertex */
+	MOV_L( REGOFF(V4F_START, EDI), EDI )	/* ptr to first dest vertex */
+
+ALIGNTEXT16
+LLBL( sse_identity_loop ):
+
+	PREFETCHNTA( REGOFF(32, ESI) )
+
+	MOVAPS( REGIND(ESI), XMM0 )
+	ADD_L( EAX, ESI )
+
+	MOVAPS( XMM0, REGIND(EDI) )
+	ADD_L( CONST(16), EDI )
+
+	DEC_L( ECX )
+	JNZ( LLBL( sse_identity_loop ) )
+
+LLBL( sse_identity_done ):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86.c b/nx-X11/extras/Mesa/src/mesa/x86/x86.c
new file mode 100644
index 000000000..aff71d224
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86.c
@@ -0,0 +1,96 @@
+/* $Id: x86.c,v 1.1.1.1 2004/06/16 09:19:37 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Intel x86 assembly code by Josh Vanderhoof
+ */
+
+#include "glheader.h"
+#include "context.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+
+#include "x86.h"
+#include "common_x86_macros.h"
+
+#ifdef DEBUG
+#include "math/m_debug.h"
+#endif
+
+
+#ifdef USE_X86_ASM
+DECLARE_XFORM_GROUP( x86, 2 )
+DECLARE_XFORM_GROUP( x86, 3 )
+DECLARE_XFORM_GROUP( x86, 4 )
+
+
+extern GLvector4f * _ASMAPI
+_mesa_x86_cliptest_points4( GLvector4f *clip_vec,
+			    GLvector4f *proj_vec,
+			    GLubyte clipMask[],
+			    GLubyte *orMask,
+			    GLubyte *andMask );
+
+extern GLvector4f * _ASMAPI
+_mesa_x86_cliptest_points4_np( GLvector4f *clip_vec,
+			       GLvector4f *proj_vec,
+			       GLubyte clipMask[],
+			       GLubyte *orMask,
+			       GLubyte *andMask );
+
+extern void _ASMAPI
+_mesa_v16_x86_cliptest_points4( GLfloat *first_vert,
+				GLfloat *last_vert,
+				GLubyte *or_mask,
+				GLubyte *and_mask,
+				GLubyte *clip_mask );
+
+extern void _ASMAPI
+_mesa_v16_x86_general_xform( GLfloat *dest,
+			     const GLfloat *m,
+			     const GLfloat *src,
+			     GLuint src_stride,
+			     GLuint count );
+#endif
+
+
+void _mesa_init_x86_transform_asm( void )
+{
+#ifdef USE_X86_ASM
+   ASSIGN_XFORM_GROUP( x86, 2 );
+   ASSIGN_XFORM_GROUP( x86, 3 );
+   ASSIGN_XFORM_GROUP( x86, 4 );
+
+   _mesa_clip_tab[4] = _mesa_x86_cliptest_points4;
+   _mesa_clip_np_tab[4] = _mesa_x86_cliptest_points4_np;
+
+#ifdef DEBUG
+   _math_test_all_transform_functions( "x86" );
+   _math_test_all_cliptest_functions( "x86" );
+#endif
+#endif
+}
+
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86.h b/nx-X11/extras/Mesa/src/mesa/x86/x86.h
new file mode 100644
index 000000000..920387707
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86.h
@@ -0,0 +1,36 @@
+/* $Id: x86.h,v 1.1.1.1 2004/06/16 09:19:37 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Intel x86 assembly code by Josh Vanderhoof
+ */
+
+#ifndef __X86_H__
+#define __X86_H__
+
+extern void _mesa_init_x86_transform_asm( void );
+
+#endif
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86_cliptest.S b/nx-X11/extras/Mesa/src/mesa/x86/x86_cliptest.S
new file mode 100644
index 000000000..3fa9cb526
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86_cliptest.S
@@ -0,0 +1,403 @@
+/* $Id: x86_cliptest.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+
+#include "matypes.h"
+#include "clip_args.h"
+
+#define SRC0		REGOFF(0, ESI)
+#define SRC1		REGOFF(4, ESI)
+#define SRC2		REGOFF(8, ESI)
+#define SRC3		REGOFF(12, ESI)
+#define DST0		REGOFF(0, EDI)
+#define DST1		REGOFF(4, EDI)
+#define DST2		REGOFF(8, EDI)
+#define DST3		REGOFF(12, EDI)
+#define MAT0		REGOFF(0, EDX)
+#define MAT1		REGOFF(4, EDX)
+#define MAT2		REGOFF(8, EDX)
+#define MAT3		REGOFF(12, EDX)
+
+
+/*
+ * Table for clip test.
+ *
+ * 	bit6 = SRC3 < 0
+ * 	bit5 = SRC2 < 0
+ * 	bit4 = abs(S(2)) > abs(S(3))
+ * 	bit3 = SRC1 < 0
+ * 	bit2 = abs(S(1)) > abs(S(3))
+ * 	bit1 = SRC0 < 0
+ * 	bit0 = abs(S(0)) > abs(S(3))
+ */
+
+	SEG_DATA
+
+clip_table:
+	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
+	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
+	D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26
+	D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a
+	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
+	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
+	D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16
+	D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a
+	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
+	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
+	D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26
+	D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a
+	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
+	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
+	D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16
+	D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a
+
+
+	SEG_TEXT
+
+/*
+ * _mesa_x86_cliptest_points4
+ *
+ *   AL:  ormask
+ *   AH:  andmask
+ *   EBX: temp0
+ *   ECX: temp1
+ *   EDX: clipmask[]
+ *   ESI: clip[]
+ *   EDI: proj[]
+ *   EBP: temp2
+ */
+
+#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC)
+#define ELFPIC
+#endif
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_cliptest_points4 )
+HIDDEN(_mesa_x86_cliptest_points4)
+GLNAME( _mesa_x86_cliptest_points4 ):
+
+#ifdef ELFPIC
+#define FRAME_OFFSET 20
+#else
+#define FRAME_OFFSET 16
+#endif
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBP )
+	PUSH_L( EBX )
+
+#ifdef ELFPIC
+	/* store pointer to clip_table on stack */
+	CALL( LLBL(ctp4_get_eip) )
+	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
+	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
+	PUSH_L( EBX )
+	JMP( LLBL(ctp4_clip_table_ready) )
+
+LLBL(ctp4_get_eip):
+	/* store eip in ebx */
+	MOV_L( REGIND(ESP), EBX )
+	RET
+
+LLBL(ctp4_clip_table_ready):
+#endif
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_CLIP, EDX )
+	MOV_L( ARG_OR, EBX )
+
+	MOV_L( ARG_AND, EBP )
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+	MOV_L( EAX, ARG_SOURCE )	/* put stride in ARG_SOURCE */
+
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDX, ECX )
+
+	MOV_L( ECX, ARG_CLIP )		/* put clipmask + count in ARG_CLIP */
+	CMP_L( ECX, EDX )
+
+	MOV_B( REGIND(EBX), AL )
+	MOV_B( REGIND(EBP), AH )
+
+	JZ( LLBL(ctp4_finish) )
+
+ALIGNTEXT16
+LLBL(ctp4_top):
+
+	FLD1				/* F3 */
+	FDIV_S( SRC3 )		/* GH: don't care about div-by-zero */
+
+	MOV_L( SRC3, EBP )
+	MOV_L( SRC2, EBX )
+
+	XOR_L( ECX, ECX )
+	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */
+
+	ADC_L( ECX, ECX )
+	ADD_L( EBX, EBX )	/* ebx = abs(S(2))*2 ; carry = sign of S(2) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+	MOV_L( SRC1, EBX )
+
+	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+	MOV_L( SRC0, EBX )
+
+	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(0))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+
+#ifdef ELFPIC
+	MOV_L( REGIND(ESP), EBP )	/* clip_table */
+
+	MOV_B( REGBI(EBP, ECX), CL )
+#else
+	MOV_B( REGOFF(clip_table,ECX), CL )
+#endif
+
+	OR_B( CL, AL )
+	AND_B( CL, AH )
+
+	TEST_B( CL, CL )
+	MOV_B( CL, REGIND(EDX) )
+
+	JZ( LLBL(ctp4_proj) )
+
+LLBL(ctp4_noproj):
+
+	FSTP( ST(0) )			/* */
+
+	MOV_L( CONST(0), DST0 )
+	MOV_L( CONST(0), DST1 )
+	MOV_L( CONST(0), DST2 )
+	MOV_L( CONST(0x3f800000), DST3 )
+
+	JMP( LLBL(ctp4_next) )
+
+LLBL(ctp4_proj):
+
+	FLD_S( SRC0 )			/* F0 F3 */
+	FMUL2( ST(1), ST0 )
+
+	FLD_S( SRC1 )			/* F1 F0 F3 */
+	FMUL2( ST(2), ST0 )
+
+	FLD_S( SRC2 )			/* F2 F1 F0 F3 */
+	FMUL2( ST(3), ST0 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F3 */
+	FSTP_S( DST0 )		/* F1 F2 F3 */
+	FSTP_S( DST1 )		/* F2 F3 */
+	FSTP_S( DST2 )		/* F3 */
+	FSTP_S( DST3 )		/* */
+
+LLBL(ctp4_next):
+
+	INC_L( EDX )
+	ADD_L( CONST(16), EDI )
+
+	ADD_L( ARG_SOURCE, ESI )
+	CMP_L( EDX, ARG_CLIP )
+
+	JNZ( LLBL(ctp4_top) )
+
+	MOV_L( ARG_OR, ECX )
+	MOV_L( ARG_AND, EDX )
+
+	MOV_B( AL, REGIND(ECX) )
+	MOV_B( AH, REGIND(EDX) )
+
+LLBL(ctp4_finish):
+
+	MOV_L( ARG_DEST, EAX )
+#ifdef ELFPIC
+	POP_L( ESI )			/* discard ptr to clip_table */
+#endif
+	POP_L( EBX )
+	POP_L( EBP )
+	POP_L( EDI )
+	POP_L( ESI )
+
+	RET
+
+
+
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_cliptest_points4_np )
+HIDDEN(_mesa_x86_cliptest_points4_np)
+GLNAME( _mesa_x86_cliptest_points4_np ):
+
+#ifdef ELFPIC
+#define FRAME_OFFSET 20
+#else
+#define FRAME_OFFSET 16
+#endif
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBP )
+	PUSH_L( EBX )
+
+#ifdef ELFPIC
+	/* store pointer to clip_table on stack */
+	CALL( LLBL(ctp4_np_get_eip) )
+	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
+	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
+	PUSH_L( EBX )
+	JMP( LLBL(ctp4_np_clip_table_ready) )
+
+LLBL(ctp4_np_get_eip):
+	/* store eip in ebx */
+	MOV_L( REGIND(ESP), EBX )
+	RET
+
+LLBL(ctp4_np_clip_table_ready):
+#endif
+
+	MOV_L( ARG_SOURCE, ESI )
+	/* slot */
+
+	MOV_L( ARG_CLIP, EDX )
+	MOV_L( ARG_OR, EBX )
+
+	MOV_L( ARG_AND, EBP )
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( EAX, ARG_DEST )   	/* put stride in ARG_DEST */
+	ADD_L( EDX, ECX )
+
+	MOV_L( ECX, EDI )		/* put clipmask + count in EDI */
+	CMP_L( ECX, EDX )
+
+	MOV_B( REGIND(EBX), AL )
+	MOV_B( REGIND(EBP), AH )
+
+	JZ( LLBL(ctp4_np_finish) )
+
+ALIGNTEXT16
+LLBL(ctp4_np_top):
+
+	MOV_L( SRC3, EBP )
+	MOV_L( SRC2, EBX )
+
+	XOR_L( ECX, ECX )
+	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */
+
+	ADC_L( ECX, ECX )
+	ADD_L( EBX, EBX )	/* ebx = abs(S(2))*2 ; carry = sign of S(2) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+	MOV_L( SRC1, EBX )
+
+	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+	MOV_L( SRC0, EBX )
+
+	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */
+
+	ADC_L( ECX, ECX )
+	CMP_L( EBX, EBP )	/* carry = abs(S(0))*2 > abs(S(3))*2 */
+
+	ADC_L( ECX, ECX )
+
+#ifdef ELFPIC
+	MOV_L( REGIND(ESP), EBP )	/* clip_table */
+
+	MOV_B( REGBI(EBP, ECX), CL )
+#else
+	MOV_B( REGOFF(clip_table,ECX), CL )
+#endif
+
+	OR_B( CL, AL )
+	AND_B( CL, AH )
+
+	TEST_B( CL, CL )
+	MOV_B( CL, REGIND(EDX) )
+
+	INC_L( EDX )
+	/* slot */
+
+	ADD_L( ARG_DEST, ESI )
+	CMP_L( EDX, EDI )
+
+	JNZ( LLBL(ctp4_np_top) )
+
+	MOV_L( ARG_OR, ECX )
+	MOV_L( ARG_AND, EDX )
+
+	MOV_B( AL, REGIND(ECX) )
+	MOV_B( AH, REGIND(EDX) )
+
+LLBL(ctp4_np_finish):
+
+	MOV_L( ARG_SOURCE, EAX )
+#ifdef ELFPIC
+	POP_L( ESI )			/* discard ptr to clip_table */
+#endif
+	POP_L( EBX )
+	POP_L( EBP )
+	POP_L( EDI )
+	POP_L( ESI )
+
+	RET
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86_xform2.S b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform2.S
new file mode 100644
index 000000000..61a031488
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform2.S
@@ -0,0 +1,570 @@
+/* $Id: x86_xform2.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+
+#include "matypes.h"
+#include "xform_args.h"
+
+	SEG_TEXT
+
+#define FP_ONE		1065353216
+#define FP_ZERO		0
+
+#define SRC0		REGOFF(0, ESI)
+#define SRC1		REGOFF(4, ESI)
+#define SRC2		REGOFF(8, ESI)
+#define SRC3		REGOFF(12, ESI)
+#define DST0		REGOFF(0, EDI)
+#define DST1		REGOFF(4, EDI)
+#define DST2		REGOFF(8, EDI)
+#define DST3		REGOFF(12, EDI)
+#define MAT0		REGOFF(0, EDX)
+#define MAT1		REGOFF(4, EDX)
+#define MAT2		REGOFF(8, EDX)
+#define MAT3		REGOFF(12, EDX)
+#define MAT4		REGOFF(16, EDX)
+#define MAT5		REGOFF(20, EDX)
+#define MAT6		REGOFF(24, EDX)
+#define MAT7		REGOFF(28, EDX)
+#define MAT8		REGOFF(32, EDX)
+#define MAT9		REGOFF(36, EDX)
+#define MAT10		REGOFF(40, EDX)
+#define MAT11		REGOFF(44, EDX)
+#define MAT12		REGOFF(48, EDX)
+#define MAT13		REGOFF(52, EDX)
+#define MAT14		REGOFF(56, EDX)
+#define MAT15		REGOFF(60, EDX)
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_general )
+HIDDEN(_mesa_x86_transform_points2_general)
+GLNAME( _mesa_x86_transform_points2_general ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_gr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p2_gr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
+	FMUL_S( MAT3 )
+
+	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT6 )
+	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT7 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FXCH( ST(3) )			/* F4 F6 F5 F7 */
+	FADD_S( MAT12 )
+	FXCH( ST(2) )			/* F5 F6 F4 F7 */
+	FADD_S( MAT13 )
+	FXCH( ST(1) )			/* F6 F5 F4 F7 */
+	FADD_S( MAT14 )
+	FXCH( ST(3) )			/* F7 F5 F4 F6 */
+	FADD_S( MAT15 )
+
+	FXCH( ST(2) )			/* F4 F5 F7 F6 */
+	FSTP_S( DST0 )			/* F5 F7 F6 */
+	FSTP_S( DST1 )			/* F7 F6 */
+	FXCH( ST(1) )			/* F6 F7 */
+	FSTP_S( DST2 )			/* F7 */
+	FSTP_S( DST3 )			/* */
+
+LLBL(x86_p2_gr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_gr_loop) )
+
+LLBL(x86_p2_gr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
+HIDDEN(_mesa_x86_transform_points2_perspective)
+GLNAME( _mesa_x86_transform_points2_perspective ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_pr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+	MOV_L( MAT14, EBX )
+
+ALIGNTEXT16
+LLBL(x86_p2_pr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F1 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F4 F1 */
+	FSTP_S( DST0   )		/* F1 */
+	FSTP_S( DST1   )		/* */
+	MOV_L( EBX, DST2 )
+	MOV_L( CONST(FP_ZERO), DST3 )
+
+LLBL(x86_p2_pr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_pr_loop) )
+
+LLBL(x86_p2_pr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_3d )
+HIDDEN(_mesa_x86_transform_points2_3d)
+GLNAME( _mesa_x86_transform_points2_3d ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_3dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p2_3dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+
+	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT6 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FADD_S( MAT12 )
+	FXCH( ST(1) )			/* F5 F4 F6 */
+	FADD_S( MAT13 )
+	FXCH( ST(2) )			/* F6 F4 F5 */
+	FADD_S( MAT14 )
+
+	FXCH( ST(1) )			/* F4 F6 F5 */
+	FSTP_S( DST0 )			/* F6 F5 */
+	FXCH( ST(1) )			/* F5 F6 */
+	FSTP_S( DST1 )			/* F6 */
+	FSTP_S( DST2 )			/* */
+
+LLBL(x86_p2_3dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_3dr_loop) )
+
+LLBL(x86_p2_3dr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
+HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
+GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_3dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+	MOV_L( MAT14, EBX )
+
+ALIGNTEXT16
+LLBL(x86_p2_3dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F1 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F4 F1 */
+	FADD_S( MAT12 )
+	FLD_S( MAT13 )		/* F5 F4 F1 */
+	FXCH( ST(2) )			/* F1 F4 F5 */
+	FADDP( ST0, ST(2) )		/* F4 F5 */
+
+	FSTP_S( DST0 )		/* F5 */
+	FSTP_S( DST1 )		/* */
+	MOV_L( EBX, DST2 )
+
+LLBL(x86_p2_3dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_3dnrr_loop) )
+
+LLBL(x86_p2_3dnrr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_2d )
+HIDDEN(_mesa_x86_transform_points2_2d)
+GLNAME( _mesa_x86_transform_points2_2d ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_2dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p2_2dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+
+	FLD_S( SRC1 )			/* F0 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F0 F1 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F5 F4 */
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FADD_S( MAT12 )
+	FXCH( ST(1) )			/* F5 F4 */
+	FADD_S( MAT13 )
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FSTP_S( DST0 )		/* F5 */
+	FSTP_S( DST1 )		/* */
+
+LLBL(x86_p2_2dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_2dr_loop) )
+
+LLBL(x86_p2_2dr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT4
+GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
+GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_2dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p2_2dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F1 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F4 F1 */
+	FADD_S( MAT12 )
+	FLD_S( MAT13 )		/* F5 F4 F1 */
+	FXCH( ST(2) )			/* F1 F4 F5 */
+	FADDP( ST0, ST(2) )		/* F4 F5 */
+
+	FSTP_S( DST0   )		/* F5 */
+	FSTP_S( DST1   )		/* */
+
+LLBL(x86_p2_2dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_2dnrr_loop) )
+
+LLBL(x86_p2_2dnrr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points2_identity )
+HIDDEN(_mesa_x86_transform_points2_identity)
+GLNAME( _mesa_x86_transform_points2_identity ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p2_ir_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+	CMP_L( ESI, EDI )
+	JE( LLBL(x86_p2_ir_done) )
+
+ALIGNTEXT16
+LLBL(x86_p2_ir_loop):
+
+	MOV_L( SRC0, EBX )
+	MOV_L( SRC1, EDX )
+
+	MOV_L( EBX, DST0 )
+	MOV_L( EDX, DST1 )
+
+LLBL(x86_p2_ir_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p2_ir_loop) )
+
+LLBL(x86_p2_ir_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86_xform3.S b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform3.S
new file mode 100644
index 000000000..04ef3a9ee
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform3.S
@@ -0,0 +1,640 @@
+/* $Id: x86_xform3.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+
+#include "matypes.h"
+#include "xform_args.h"
+
+	SEG_TEXT
+
+#define FP_ONE		1065353216
+#define FP_ZERO		0
+
+#define SRC0		REGOFF(0, ESI)
+#define SRC1		REGOFF(4, ESI)
+#define SRC2		REGOFF(8, ESI)
+#define SRC3		REGOFF(12, ESI)
+#define DST0		REGOFF(0, EDI)
+#define DST1		REGOFF(4, EDI)
+#define DST2		REGOFF(8, EDI)
+#define DST3		REGOFF(12, EDI)
+#define MAT0		REGOFF(0, EDX)
+#define MAT1		REGOFF(4, EDX)
+#define MAT2		REGOFF(8, EDX)
+#define MAT3		REGOFF(12, EDX)
+#define MAT4		REGOFF(16, EDX)
+#define MAT5		REGOFF(20, EDX)
+#define MAT6		REGOFF(24, EDX)
+#define MAT7		REGOFF(28, EDX)
+#define MAT8		REGOFF(32, EDX)
+#define MAT9		REGOFF(36, EDX)
+#define MAT10		REGOFF(40, EDX)
+#define MAT11		REGOFF(44, EDX)
+#define MAT12		REGOFF(48, EDX)
+#define MAT13		REGOFF(52, EDX)
+#define MAT14		REGOFF(56, EDX)
+#define MAT15		REGOFF(60, EDX)
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_general )
+HIDDEN(_mesa_x86_transform_points3_general)
+GLNAME( _mesa_x86_transform_points3_general ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_gr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_gr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
+	FMUL_S( MAT3 )
+
+	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT6 )
+	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT7 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT10 )
+	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT11 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FXCH( ST(3) )			/* F4 F6 F5 F7 */
+	FADD_S( MAT12 )
+	FXCH( ST(2) )			/* F5 F6 F4 F7 */
+	FADD_S( MAT13 )
+	FXCH( ST(1) )			/* F6 F5 F4 F7 */
+	FADD_S( MAT14 )
+	FXCH( ST(3) )			/* F7 F5 F4 F6 */
+	FADD_S( MAT15 )
+
+	FXCH( ST(2) )			/* F4 F5 F7 F6 */
+	FSTP_S( DST0 )		/* F5 F7 F6 */
+	FSTP_S( DST1 )		/* F7 F6 */
+	FXCH( ST(1) )			/* F6 F7 */
+	FSTP_S( DST2 )		/* F7 */
+	FSTP_S( DST3 )		/* */
+
+LLBL(x86_p3_gr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_gr_loop) )
+
+LLBL(x86_p3_gr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
+HIDDEN(_mesa_x86_transform_points3_perspective)
+GLNAME( _mesa_x86_transform_points3_perspective ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_pr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_pr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F5 F4 */
+	FMUL_S( MAT5 )
+
+	FLD_S( SRC2 )			/* F0 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */
+	FMUL_S( MAT10 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */
+	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */
+	FADDP( ST0, ST(2) )		/* F2 F5 F4 */
+	FLD_S( MAT14 )		/* F6 F2 F5 F4 */
+	FXCH( ST(1) )			/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	MOV_L( SRC2, EBX )
+	XOR_L( CONST(-2147483648), EBX )/* change sign */
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FSTP_S( DST0 )		/* F5 F6 */
+	FSTP_S( DST1 )		/* F6 */
+	FSTP_S( DST2 )		/* */
+	MOV_L( EBX, DST3 )
+
+LLBL(x86_p3_pr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_pr_loop) )
+
+LLBL(x86_p3_pr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_3d )
+HIDDEN(_mesa_x86_transform_points3_3d)
+GLNAME( _mesa_x86_transform_points3_3d ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_3dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_3dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+
+	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT6 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT10 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FADD_S( MAT12 )
+	FXCH( ST(1) )			/* F5 F4 F6 */
+	FADD_S( MAT13 )
+	FXCH( ST(2) )			/* F6 F4 F5 */
+	FADD_S( MAT14 )
+
+	FXCH( ST(1) )			/* F4 F6 F5 */
+	FSTP_S( DST0   )		/* F6 F5 */
+	FXCH( ST(1) )			/* F5 F6 */
+	FSTP_S( DST1   )		/* F6 */
+	FSTP_S( DST2   )		/* */
+
+LLBL(x86_p3_3dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_3dr_loop) )
+
+LLBL(x86_p3_3dr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
+HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
+GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_3dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_3dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F1 F4 */
+	FMUL_S( MAT5 )
+
+	FLD_S( SRC2 )			/* F2 F1 F4 */
+	FMUL_S( MAT10 )
+
+	FXCH( ST(2) )			/* F4 F1 F2 */
+	FADD_S( MAT12 )
+	FLD_S( MAT13 )		/* F5 F4 F1 F2 */
+	FXCH( ST(2) )			/* F1 F4 F5 F2 */
+	FADDP( ST0, ST(2) )		/* F4 F5 F2 */
+	FLD_S( MAT14 )		/* F6 F4 F5 F2 */
+	FXCH( ST(3) )			/* F2 F4 F5 F6 */
+	FADDP( ST0, ST(3) )		/* F4 F5 F6 */
+
+	FSTP_S( DST0   )		/* F5 F6 */
+	FSTP_S( DST1   )		/* F6 */
+	FSTP_S( DST2   )		/* */
+
+LLBL(x86_p3_3dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_3dnrr_loop) )
+
+LLBL(x86_p3_3dnrr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_2d )
+HIDDEN(_mesa_x86_transform_points3_2d)
+GLNAME( _mesa_x86_transform_points3_2d ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_2dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_2dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+
+	FLD_S( SRC1 )			/* F0 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F0 F1 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F5 F4 */
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FADD_S( MAT12 )
+	FXCH( ST(1) )			/* F5 F4 */
+	FADD_S( MAT13 )
+
+	MOV_L( SRC2, EBX )
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FSTP_S( DST0   )		/* F5 */
+	FSTP_S( DST1   )		/* */
+	MOV_L( EBX, DST2 )
+
+LLBL(x86_p3_2dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_2dr_loop) )
+
+LLBL(x86_p3_2dr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
+GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_2dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p3_2dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F1 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F4 F1 */
+	FADD_S( MAT12 )
+	FLD_S( MAT13 )		/* F5 F4 F1 */
+
+	FXCH( ST(2) )			/* F1 F4 F5 */
+	FADDP( ST0, ST(2) )		/* F4 F5 */
+
+	MOV_L( SRC2, EBX )
+
+	FSTP_S( DST0 )		/* F5 */
+	FSTP_S( DST1 )		/* */
+	MOV_L( EBX, DST2 )
+
+LLBL(x86_p3_2dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_2dnrr_loop) )
+
+LLBL(x86_p3_2dnrr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points3_identity )
+HIDDEN(_mesa_x86_transform_points3_identity)
+GLNAME(_mesa_x86_transform_points3_identity ):
+
+#define FRAME_OFFSET 16
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+	PUSH_L( EBP )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p3_ir_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+	CMP_L( ESI, EDI )
+	JE( LLBL(x86_p3_ir_done) )
+
+ALIGNTEXT16
+LLBL(x86_p3_ir_loop):
+
+#if 1
+	MOV_L( SRC0, EBX )
+	MOV_L( SRC1, EBP )
+	MOV_L( SRC2, EDX )
+
+	MOV_L( EBX, DST0 )
+	MOV_L( EBP, DST1 )
+	MOV_L( EDX, DST2 )
+#else
+	FLD_S( SRC0 )
+	FLD_S( SRC1 )
+	FLD_S( SRC2 )
+
+	FSTP_S( DST2 )
+	FSTP_S( DST1 )
+	FSTP_S( DST0 )
+#endif
+
+LLBL(x86_p3_ir_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p3_ir_loop) )
+
+LLBL(x86_p3_ir_done):
+
+	POP_L( EBP )
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/x86_xform4.S b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform4.S
new file mode 100644
index 000000000..79e6b657a
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/x86_xform4.S
@@ -0,0 +1,673 @@
+/* $Id: x86_xform4.S,v 1.1.1.2 2005/07/31 16:46:39 ajax Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
+ * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
+ * in there will break the build on some platforms.
+ */
+
+#include "matypes.h"
+#include "xform_args.h"
+
+	SEG_TEXT
+
+#define FP_ONE		1065353216
+#define FP_ZERO		0
+
+#define SRC0		REGOFF(0, ESI)
+#define SRC1		REGOFF(4, ESI)
+#define SRC2		REGOFF(8, ESI)
+#define SRC3		REGOFF(12, ESI)
+#define DST0		REGOFF(0, EDI)
+#define DST1		REGOFF(4, EDI)
+#define DST2		REGOFF(8, EDI)
+#define DST3		REGOFF(12, EDI)
+#define MAT0		REGOFF(0, EDX)
+#define MAT1		REGOFF(4, EDX)
+#define MAT2		REGOFF(8, EDX)
+#define MAT3		REGOFF(12, EDX)
+#define MAT4		REGOFF(16, EDX)
+#define MAT5		REGOFF(20, EDX)
+#define MAT6		REGOFF(24, EDX)
+#define MAT7		REGOFF(28, EDX)
+#define MAT8		REGOFF(32, EDX)
+#define MAT9		REGOFF(36, EDX)
+#define MAT10		REGOFF(40, EDX)
+#define MAT11		REGOFF(44, EDX)
+#define MAT12		REGOFF(48, EDX)
+#define MAT13		REGOFF(52, EDX)
+#define MAT14		REGOFF(56, EDX)
+#define MAT15		REGOFF(60, EDX)
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_general )
+HIDDEN(_mesa_x86_transform_points4_general)
+GLNAME( _mesa_x86_transform_points4_general ):
+
+#define FRAME_OFFSET 8
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_gr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_gr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
+	FMUL_S( MAT3 )
+
+	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT6 )
+	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT7 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT10 )
+	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT11 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
+	FMUL_S( MAT12 )
+	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT13 )
+	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT14 )
+	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
+	FMUL_S( MAT15 )
+
+	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
+	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
+
+	FXCH( ST(3) )			/* F4 F6 F5 F7 */
+	FSTP_S( DST0 )		/* F6 F5 F7 */
+	FXCH( ST(1) )			/* F5 F6 F7 */
+	FSTP_S( DST1 )		/* F6 F7 */
+	FSTP_S( DST2 )		/* F7 */
+	FSTP_S( DST3 )		/* */
+
+LLBL(x86_p4_gr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_gr_loop) )
+
+LLBL(x86_p4_gr_done):
+
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
+HIDDEN(_mesa_x86_transform_points4_perspective)
+GLNAME( _mesa_x86_transform_points4_perspective ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_pr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_pr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F5 F4 */
+	FMUL_S( MAT5 )
+
+	FLD_S( SRC2 )			/* F0 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
+	FMUL_S( MAT10 )
+
+	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
+	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
+	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
+
+	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
+	FMUL_S( MAT14 )
+
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	MOV_L( SRC2, EBX )
+	XOR_L( CONST(-2147483648), EBX )/* change sign */
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FSTP_S( DST0 )		/* F5 F6 */
+	FSTP_S( DST1 )		/* F6 */
+	FSTP_S( DST2 )		/* */
+	MOV_L( EBX, DST3 )
+
+LLBL(x86_p4_pr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_pr_loop) )
+
+LLBL(x86_p4_pr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_3d )
+HIDDEN(_mesa_x86_transform_points4_3d)
+GLNAME( _mesa_x86_transform_points4_3d ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_3dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_3dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+	FLD_S( SRC0 )			/* F6 F5 F4 */
+	FMUL_S( MAT2 )
+
+	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT5 )
+	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT6 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT8 )
+	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT9 )
+	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT10 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT12 )
+	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT13 )
+	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT14 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	MOV_L( SRC3, EBX )
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FSTP_S( DST0 )		/* F5 F6 */
+	FSTP_S( DST1 )		/* F6 */
+	FSTP_S( DST2 )		/* */
+	MOV_L( EBX, DST3 )
+
+LLBL(x86_p4_3dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_3dr_loop) )
+
+LLBL(x86_p4_3dr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
+HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
+GLNAME(_mesa_x86_transform_points4_3d_no_rot):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_3dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_3dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F5 F4 */
+	FMUL_S( MAT5 )
+
+	FLD_S( SRC2 )			/* F6 F5 F4 */
+	FMUL_S( MAT10 )
+
+	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
+	FMUL_S( MAT12 )
+	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
+	FMUL_S( MAT13 )
+	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
+	FMUL_S( MAT14 )
+
+	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
+
+	MOV_L( SRC3, EBX )
+
+	FXCH( ST(2) )			/* F4 F5 F6 */
+	FSTP_S( DST0   )		/* F5 F6 */
+	FSTP_S( DST1   )		/* F6 */
+	FSTP_S( DST2   )		/* */
+	MOV_L( EBX, DST3 )
+
+LLBL(x86_p4_3dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_3dnrr_loop) )
+
+LLBL(x86_p4_3dnrr_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_2d )
+HIDDEN(_mesa_x86_transform_points4_2d)
+GLNAME( _mesa_x86_transform_points4_2d ):
+
+#define FRAME_OFFSET 16
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+	PUSH_L( EBP )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_2dr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_2dr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+	FLD_S( SRC0 )			/* F5 F4 */
+	FMUL_S( MAT1 )
+
+	FLD_S( SRC1 )			/* F0 F5 F4 */
+	FMUL_S( MAT4 )
+	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT5 )
+
+	FXCH( ST(1) )			/* F0 F1 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F5 F4 */
+
+	FLD_S( SRC3 )			/* F0 F5 F4 */
+	FMUL_S( MAT12 )
+	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT13 )
+
+	FXCH( ST(1) )			/* F0 F1 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F5 F4 */
+
+	MOV_L( SRC2, EBX )
+	MOV_L( SRC3, EBP )
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FSTP_S( DST0 )		/* F5 */
+	FSTP_S( DST1 )		/* */
+	MOV_L( EBX, DST2 )
+	MOV_L( EBP, DST3 )
+
+LLBL(x86_p4_2dr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_2dr_loop) )
+
+LLBL(x86_p4_2dr_done):
+
+	POP_L( EBP )
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
+HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
+GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
+
+#define FRAME_OFFSET 16
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+	PUSH_L( EBP )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_2dnrr_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+ALIGNTEXT16
+LLBL(x86_p4_2dnrr_loop):
+
+	FLD_S( SRC0 )			/* F4 */
+	FMUL_S( MAT0 )
+
+	FLD_S( SRC1 )			/* F5 F4 */
+	FMUL_S( MAT5 )
+
+	FLD_S( SRC3 )			/* F0 F5 F4 */
+	FMUL_S( MAT12 )
+	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
+	FMUL_S( MAT13 )
+
+	FXCH( ST(1) )			/* F0 F1 F5 F4 */
+	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
+	FADDP( ST0, ST(1) )		/* F5 F4 */
+
+	MOV_L( SRC2, EBX )
+	MOV_L( SRC3, EBP )
+
+	FXCH( ST(1) )			/* F4 F5 */
+	FSTP_S( DST0   )		/* F5 */
+	FSTP_S( DST1   )		/* */
+	MOV_L( EBX, DST2 )
+	MOV_L( EBP, DST3 )
+
+LLBL(x86_p4_2dnrr_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_2dnrr_loop) )
+
+LLBL(x86_p4_2dnrr_done):
+
+	POP_L( EBP )
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
+#undef FRAME_OFFSET
+
+
+
+
+ALIGNTEXT16
+GLOBL GLNAME( _mesa_x86_transform_points4_identity )
+HIDDEN(_mesa_x86_transform_points4_identity)
+GLNAME( _mesa_x86_transform_points4_identity ):
+
+#define FRAME_OFFSET 12
+	PUSH_L( ESI )
+	PUSH_L( EDI )
+	PUSH_L( EBX )
+
+	MOV_L( ARG_SOURCE, ESI )
+	MOV_L( ARG_DEST, EDI )
+
+	MOV_L( ARG_MATRIX, EDX )
+	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
+
+	TEST_L( ECX, ECX )
+	JZ( LLBL(x86_p4_ir_done) )
+
+	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
+	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
+
+	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
+	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
+
+	SHL_L( CONST(4), ECX )
+	MOV_L( REGOFF(V4F_START, ESI), ESI )
+
+	MOV_L( REGOFF(V4F_START, EDI), EDI )
+	ADD_L( EDI, ECX )
+
+	CMP_L( ESI, EDI )
+	JE( LLBL(x86_p4_ir_done) )
+
+ALIGNTEXT16
+LLBL(x86_p4_ir_loop):
+
+	MOV_L( SRC0, EBX )
+	MOV_L( SRC1, EDX )
+
+	MOV_L( EBX, DST0 )
+	MOV_L( EDX, DST1 )
+
+	MOV_L( SRC2, EBX )
+	MOV_L( SRC3, EDX )
+
+	MOV_L( EBX, DST2 )
+	MOV_L( EDX, DST3 )
+
+LLBL(x86_p4_ir_skip):
+
+	ADD_L( CONST(16), EDI )
+	ADD_L( EAX, ESI )
+	CMP_L( ECX, EDI )
+	JNE( LLBL(x86_p4_ir_loop) )
+
+LLBL(x86_p4_ir_done):
+
+	POP_L( EBX )
+	POP_L( EDI )
+	POP_L( ESI )
+	RET
diff --git a/nx-X11/extras/Mesa/src/mesa/x86/xform_args.h b/nx-X11/extras/Mesa/src/mesa/x86/xform_args.h
new file mode 100644
index 000000000..349e47ac6
--- /dev/null
+++ b/nx-X11/extras/Mesa/src/mesa/x86/xform_args.h
@@ -0,0 +1,52 @@
+/* $Id: xform_args.h,v 1.1.1.1 2004/06/16 09:19:37 anholt Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Transform function interface for assembly code.  Simply define
+ * FRAME_OFFSET to the number of bytes pushed onto the stack before
+ * using the ARG_* argument macros.
+ *
+ * Gareth Hughes
+ */
+
+#ifndef __XFORM_ARGS_H__
+#define __XFORM_ARGS_H__
+
+/* Offsets for transform_func arguments
+ *
+ * typedef void (*transform_func)( GLvector4f *to_vec,
+ *				   const GLfloat m[16],
+ *				   const GLvector4f *from_vec );
+ */
+#define OFFSET_DEST	4
+#define OFFSET_MATRIX	8
+#define OFFSET_SOURCE	12
+
+#define ARG_DEST	REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
+#define ARG_MATRIX 	REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
+#define ARG_SOURCE 	REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
+
+#endif
author	Reinhard Tartler <siretart@tauware.de>	2011-10-10 17:43:39 +0200
committer	Reinhard Tartler <siretart@tauware.de>	2011-10-10 17:43:39 +0200
commit	f4092abdf94af6a99aff944d6264bc1284e8bdd4 (patch)
tree	2ac1c9cc16ceb93edb2c4382c088dac5aeafdf0f /nx-X11/extras/Mesa/src/mesa/x86
parent	a840692edc9c6d19cd7c057f68e39c7d95eb767d (diff)
download	nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.tar.gz nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.tar.bz2 nx-libs-f4092abdf94af6a99aff944d6264bc1284e8bdd4.zip