From d105412503ea250e07d3cb008f10f60e6e48bf8a Mon Sep 17 00:00:00 2001
From: marha <marha@users.sourceforge.net>
Date: Fri, 5 Aug 2011 08:12:52 +0200
Subject: mesa pixman git update 5 aug 2011

---
 mesalib/src/glsl/ast_to_hir.cpp                    |   10 +-
 mesalib/src/glsl/glsl_parser.yy                    | 3506 +++++++-------
 mesalib/src/mesa/SConscript                        |    1 +
 mesalib/src/mesa/main/compiler.h                   |   24 +-
 mesalib/src/mesa/main/enable.c                     |   11 +-
 mesalib/src/mesa/main/ff_fragment_shader.cpp       |    3 +-
 mesalib/src/mesa/main/ffvertex_prog.c              | 3360 ++++++-------
 mesalib/src/mesa/main/nvprogram.c                  | 1840 ++++----
 mesalib/src/mesa/main/texcompress_rgtc_tmp.h       |    2 +-
 mesalib/src/mesa/main/texobj.c                     |    6 +-
 mesalib/src/mesa/main/texparam.c                   |  304 +-
 mesalib/src/mesa/main/uniforms.c                   |   46 +-
 mesalib/src/mesa/program/ir_to_mesa.cpp            |    8 +-
 mesalib/src/mesa/program/nvfragparse.c             | 3183 ++++++-------
 mesalib/src/mesa/program/prog_execute.c            | 3790 +++++++--------
 mesalib/src/mesa/program/prog_parameter.c          | 1336 +++---
 mesalib/src/mesa/program/prog_parameter.h          |   30 +-
 mesalib/src/mesa/program/prog_parameter_layout.c   |    2 +-
 mesalib/src/mesa/program/prog_print.c              |    2 +-
 mesalib/src/mesa/program/prog_statevars.c          |    2 +-
 mesalib/src/mesa/program/program.c                 |    8 +-
 mesalib/src/mesa/program/program_parse.y           |   56 +-
 mesalib/src/mesa/program/program_parser.h          |    3 +-
 mesalib/src/mesa/program/sampler.cpp               |    2 +-
 mesalib/src/mesa/sources.mak                       |    3 +-
 .../src/mesa/state_tracker/st_atom_pixeltransfer.c |   22 +-
 mesalib/src/mesa/state_tracker/st_cb_bitmap.c      | 1811 +++----
 mesalib/src/mesa/state_tracker/st_cb_blit.c        |  117 +-
 mesalib/src/mesa/state_tracker/st_cb_drawpixels.c  |   44 +
 mesalib/src/mesa/state_tracker/st_cb_program.c     |  504 +-
 mesalib/src/mesa/state_tracker/st_extensions.c     |   11 +
 mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4961 ++++++++++++++++++++
 mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.h   |   72 +
 mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c   |    6 +-
 mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.h   |  152 +-
 mesalib/src/mesa/state_tracker/st_program.c        |  412 +-
 mesalib/src/mesa/state_tracker/st_program.h        |  627 +--
 mesalib/src/mesa/state_tracker/st_texture.c        |  816 ++--
 mesalib/src/mesa/state_tracker/st_texture.h        |    4 +
 mesalib/src/mesa/vbo/vbo_exec_array.c              |    4 +-
 40 files changed, 16179 insertions(+), 10922 deletions(-)
 create mode 100644 mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
 create mode 100644 mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.h

(limited to 'mesalib/src')

diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index c0524bf0b..7da146119 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -2399,12 +2399,12 @@ ast_declarator_list::hir(exec_list *instructions,
 
    decl_type = this->type->specifier->glsl_type(& type_name, state);
    if (this->declarations.is_empty()) {
-      /* The only valid case where the declaration list can be empty is when
-       * the declaration is setting the default precision of a built-in type
-       * (e.g., 'precision highp vec4;').
-       */
-
       if (decl_type != NULL) {
+	 /* Warn if this empty declaration is not for declaring a structure.
+	  */
+	 if (this->type->specifier->structure == NULL) {
+	    _mesa_glsl_warning(&loc, state, "empty declaration");
+	 }
       } else {
 	    _mesa_glsl_error(& loc, state, "incomplete declaration");
       }
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index 29849c1e9..1851f1e20 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -1,1755 +1,1751 @@
-%{
-/*
- * Copyright © 2008, 2009 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-    
-#include "ast.h"
-#include "glsl_parser_extras.h"
-#include "glsl_types.h"
-
-#define YYLEX_PARAM state->scanner
-
-%}
-
-%pure-parser
-%error-verbose
-
-%locations
-%initial-action {
-   @$.first_line = 1;
-   @$.first_column = 1;
-   @$.last_line = 1;
-   @$.last_column = 1;
-   @$.source = 0;
-}
-
-%lex-param   {void *scanner}
-%parse-param {struct _mesa_glsl_parse_state *state}
-
-%union {
-   int n;
-   float real;
-   char *identifier;
-
-   struct ast_type_qualifier type_qualifier;
-
-   ast_node *node;
-   ast_type_specifier *type_specifier;
-   ast_fully_specified_type *fully_specified_type;
-   ast_function *function;
-   ast_parameter_declarator *parameter_declarator;
-   ast_function_definition *function_definition;
-   ast_compound_statement *compound_statement;
-   ast_expression *expression;
-   ast_declarator_list *declarator_list;
-   ast_struct_specifier *struct_specifier;
-   ast_declaration *declaration;
-
-   struct {
-      ast_node *cond;
-      ast_expression *rest;
-   } for_rest_statement;
-
-   struct {
-      ast_node *then_statement;
-      ast_node *else_statement;
-   } selection_rest_statement;
-}
-
-%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK
-%token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
-%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4
-%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING
-%token NOPERSPECTIVE FLAT SMOOTH
-%token MAT2X2 MAT2X3 MAT2X4
-%token MAT3X2 MAT3X3 MAT3X4
-%token MAT4X2 MAT4X3 MAT4X4
-%token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW
-%token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW
-%token SAMPLER2DARRAYSHADOW ISAMPLER1D ISAMPLER2D ISAMPLER3D ISAMPLERCUBE
-%token ISAMPLER1DARRAY ISAMPLER2DARRAY USAMPLER1D USAMPLER2D USAMPLER3D
-%token USAMPLERCUBE USAMPLER1DARRAY USAMPLER2DARRAY
-%token STRUCT VOID_TOK WHILE
-%token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
-%type <identifier> any_identifier
-%token <real> FLOATCONSTANT
-%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
-%token <identifier> FIELD_SELECTION
-%token LEFT_OP RIGHT_OP
-%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP
-%token AND_OP OR_OP XOR_OP MUL_ASSIGN DIV_ASSIGN ADD_ASSIGN
-%token MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
-%token SUB_ASSIGN
-%token INVARIANT
-%token LOWP MEDIUMP HIGHP SUPERP PRECISION
-
-%token VERSION EXTENSION LINE COLON EOL INTERFACE OUTPUT
-%token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF
-%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF
-%token PRAGMA_INVARIANT_ALL
-%token LAYOUT_TOK
-
-   /* Reserved words that are not actually used in the grammar.
-    */
-%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
-%token INLINE_TOK NOINLINE VOLATILE PUBLIC_TOK STATIC EXTERN EXTERNAL
-%token LONG_TOK SHORT_TOK DOUBLE_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK OUPTUT
-%token HVEC2 HVEC3 HVEC4 DVEC2 DVEC3 DVEC4 FVEC2 FVEC3 FVEC4
-%token SAMPLER2DRECT SAMPLER3DRECT SAMPLER2DRECTSHADOW
-%token SIZEOF CAST NAMESPACE USING
-
-%token ERROR_TOK
-
-%token COMMON PARTITION ACTIVE SAMPLERBUFFER FILTER
-%token  IMAGE1D  IMAGE2D  IMAGE3D  IMAGECUBE  IMAGE1DARRAY  IMAGE2DARRAY
-%token IIMAGE1D IIMAGE2D IIMAGE3D IIMAGECUBE IIMAGE1DARRAY IIMAGE2DARRAY
-%token UIMAGE1D UIMAGE2D UIMAGE3D UIMAGECUBE UIMAGE1DARRAY UIMAGE2DARRAY
-%token IMAGE1DSHADOW IMAGE2DSHADOW IMAGEBUFFER IIMAGEBUFFER UIMAGEBUFFER
-%token IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW
-%token ROW_MAJOR
-
-%type <identifier> variable_identifier
-%type <node> statement
-%type <node> statement_list
-%type <node> simple_statement
-%type <n> precision_qualifier
-%type <type_qualifier> type_qualifier
-%type <type_qualifier> storage_qualifier
-%type <type_qualifier> interpolation_qualifier
-%type <type_qualifier> layout_qualifier
-%type <type_qualifier> layout_qualifier_id_list layout_qualifier_id
-%type <type_specifier> type_specifier
-%type <type_specifier> type_specifier_no_prec
-%type <type_specifier> type_specifier_nonarray
-%type <n> basic_type_specifier_nonarray
-%type <fully_specified_type> fully_specified_type
-%type <function> function_prototype
-%type <function> function_header
-%type <function> function_header_with_parameters
-%type <function> function_declarator
-%type <parameter_declarator> parameter_declarator
-%type <parameter_declarator> parameter_declaration
-%type <type_qualifier> parameter_qualifier
-%type <type_qualifier> parameter_type_qualifier
-%type <type_specifier> parameter_type_specifier
-%type <function_definition> function_definition
-%type <compound_statement> compound_statement_no_new_scope
-%type <compound_statement> compound_statement
-%type <node> statement_no_new_scope
-%type <node> expression_statement
-%type <expression> expression
-%type <expression> primary_expression
-%type <expression> assignment_expression
-%type <expression> conditional_expression
-%type <expression> logical_or_expression
-%type <expression> logical_xor_expression
-%type <expression> logical_and_expression
-%type <expression> inclusive_or_expression
-%type <expression> exclusive_or_expression
-%type <expression> and_expression
-%type <expression> equality_expression
-%type <expression> relational_expression
-%type <expression> shift_expression
-%type <expression> additive_expression
-%type <expression> multiplicative_expression
-%type <expression> unary_expression
-%type <expression> constant_expression
-%type <expression> integer_expression
-%type <expression> postfix_expression
-%type <expression> function_call_header_with_parameters
-%type <expression> function_call_header_no_parameters
-%type <expression> function_call_header
-%type <expression> function_call_generic
-%type <expression> function_call_or_method
-%type <expression> function_call
-%type <expression> method_call_generic
-%type <expression> method_call_header_with_parameters
-%type <expression> method_call_header_no_parameters
-%type <expression> method_call_header
-%type <n> assignment_operator
-%type <n> unary_operator
-%type <expression> function_identifier
-%type <node> external_declaration
-%type <declarator_list> init_declarator_list
-%type <declarator_list> single_declaration
-%type <expression> initializer
-%type <node> declaration
-%type <node> declaration_statement
-%type <node> jump_statement
-%type <struct_specifier> struct_specifier
-%type <node> struct_declaration_list
-%type <declarator_list> struct_declaration
-%type <declaration> struct_declarator
-%type <declaration> struct_declarator_list
-%type <node> selection_statement
-%type <selection_rest_statement> selection_rest_statement
-%type <node> iteration_statement
-%type <node> condition
-%type <node> conditionopt
-%type <node> for_init_statement
-%type <for_rest_statement> for_rest_statement
-%%
-
-translation_unit: 
-	version_statement extension_statement_list
-	{
-	   _mesa_glsl_initialize_types(state);
-	}
-	external_declaration_list
-	{
-	   delete state->symbols;
-	   state->symbols = new(ralloc_parent(state)) glsl_symbol_table;
-	   _mesa_glsl_initialize_types(state);
-	}
-	;
-
-version_statement:
-	/* blank - no #version specified: defaults are already set */
-	| VERSION INTCONSTANT EOL
-	{
-	   bool supported = false;
-
-	   switch ($2) {
-	   case 100:
-	      state->es_shader = true;
-	      supported = state->Const.GLSL_100ES;
-	      break;
-	   case 110:
-	      supported = state->Const.GLSL_110;
-	      break;
-	   case 120:
-	      supported = state->Const.GLSL_120;
-	      break;
-	   case 130:
-	      supported = state->Const.GLSL_130;
-	      break;
-	   default:
-	      supported = false;
-	      break;
-	   }
-
-	   state->language_version = $2;
-	   state->version_string =
-	      ralloc_asprintf(state, "GLSL%s %d.%02d",
-			      state->es_shader ? " ES" : "",
-			      state->language_version / 100,
-			      state->language_version % 100);
-
-	   if (!supported) {
-	      _mesa_glsl_error(& @2, state, "%s is not supported. "
-			       "Supported versions are: %s\n",
-			       state->version_string,
-			       state->supported_version_string);
-	   }
-	}
-	;
-
-pragma_statement:
-	PRAGMA_DEBUG_ON EOL
-	| PRAGMA_DEBUG_OFF EOL
-	| PRAGMA_OPTIMIZE_ON EOL
-	| PRAGMA_OPTIMIZE_OFF EOL
-	| PRAGMA_INVARIANT_ALL EOL
-	{
-	   if (state->language_version < 120) {
-	      _mesa_glsl_warning(& @1, state,
-				 "pragma `invariant(all)' not supported in %s",
-				 state->version_string);
-	   } else {
-	      state->all_invariant = true;
-	   }
-	}
-	;
-
-extension_statement_list:
-
-	| extension_statement_list extension_statement
-	;
-
-any_identifier:
-	IDENTIFIER
-	| TYPE_IDENTIFIER
-	| NEW_IDENTIFIER
-	;
-
-extension_statement:
-	EXTENSION any_identifier COLON any_identifier EOL
-	{
-	   if (!_mesa_glsl_process_extension($2, & @2, $4, & @4, state)) {
-	      YYERROR;
-	   }
-	}
-	;
-
-external_declaration_list:
-	external_declaration
-	{
-	   /* FINISHME: The NULL test is required because pragmas are set to
-	    * FINISHME: NULL. (See production rule for external_declaration.)
-	    */
-	   if ($1 != NULL)
-	      state->translation_unit.push_tail(& $1->link);
-	}
-	| external_declaration_list external_declaration
-	{
-	   /* FINISHME: The NULL test is required because pragmas are set to
-	    * FINISHME: NULL. (See production rule for external_declaration.)
-	    */
-	   if ($2 != NULL)
-	      state->translation_unit.push_tail(& $2->link);
-	}
-	;
-
-variable_identifier:
-	IDENTIFIER
-	| NEW_IDENTIFIER
-	;
-
-primary_expression:
-	variable_identifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_identifier, NULL, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.identifier = $1;
-	}
-	| INTCONSTANT
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_int_constant, NULL, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.int_constant = $1;
-	}
-	| UINTCONSTANT
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_uint_constant, NULL, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.uint_constant = $1;
-	}
-	| FLOATCONSTANT
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_float_constant, NULL, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.float_constant = $1;
-	}
-	| BOOLCONSTANT
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_bool_constant, NULL, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.bool_constant = $1;
-	}
-	| '(' expression ')'
-	{
-	   $$ = $2;
-	}
-	;
-
-postfix_expression:
-	primary_expression
-	| postfix_expression '[' integer_expression ']'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_array_index, $1, $3, NULL);
-	   $$->set_location(yylloc);
-	}
-	| function_call
-	{
-	   $$ = $1;
-	}
-	| postfix_expression '.' any_identifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL);
-	   $$->set_location(yylloc);
-	   $$->primary_expression.identifier = $3;
-	}
-	| postfix_expression INC_OP
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_post_inc, $1, NULL, NULL);
-	   $$->set_location(yylloc);
-	}
-	| postfix_expression DEC_OP
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_post_dec, $1, NULL, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-integer_expression:
-	expression
-	;
-
-function_call:
-	function_call_or_method
-	;
-
-function_call_or_method:
-	function_call_generic
-	| postfix_expression '.' method_call_generic
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_field_selection, $1, $3, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-function_call_generic:
-	function_call_header_with_parameters ')'
-	| function_call_header_no_parameters ')'
-	;
-
-function_call_header_no_parameters:
-	function_call_header VOID_TOK
-	| function_call_header
-	;
-
-function_call_header_with_parameters:
-	function_call_header assignment_expression
-	{
-	   $$ = $1;
-	   $$->set_location(yylloc);
-	   $$->expressions.push_tail(& $2->link);
-	}
-	| function_call_header_with_parameters ',' assignment_expression
-	{
-	   $$ = $1;
-	   $$->set_location(yylloc);
-	   $$->expressions.push_tail(& $3->link);
-	}
-	;
-
-	// Grammar Note: Constructors look like functions, but lexical 
-	// analysis recognized most of them as keywords. They are now
-	// recognized through "type_specifier".
-function_call_header:
-	function_identifier '('
-	;
-
-function_identifier:
-	type_specifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_function_expression($1);
-	   $$->set_location(yylloc);
-   	}
-	| variable_identifier
-	{
-	   void *ctx = state;
-	   ast_expression *callee = new(ctx) ast_expression($1);
-	   $$ = new(ctx) ast_function_expression(callee);
-	   $$->set_location(yylloc);
-   	}
-	| FIELD_SELECTION
-	{
-	   void *ctx = state;
-	   ast_expression *callee = new(ctx) ast_expression($1);
-	   $$ = new(ctx) ast_function_expression(callee);
-	   $$->set_location(yylloc);
-   	}
-	;
-
-method_call_generic:
-	method_call_header_with_parameters ')'
-	| method_call_header_no_parameters ')'
-	;
-
-method_call_header_no_parameters:
-	method_call_header VOID_TOK
-	| method_call_header
-	;
-
-method_call_header_with_parameters:
-	method_call_header assignment_expression
-	{
-	   $$ = $1;
-	   $$->set_location(yylloc);
-	   $$->expressions.push_tail(& $2->link);
-	}
-	| method_call_header_with_parameters ',' assignment_expression
-	{
-	   $$ = $1;
-	   $$->set_location(yylloc);
-	   $$->expressions.push_tail(& $3->link);
-	}
-	;
-
-	// Grammar Note: Constructors look like methods, but lexical 
-	// analysis recognized most of them as keywords. They are now
-	// recognized through "type_specifier".
-method_call_header:
-	variable_identifier '('
-	{
-	   void *ctx = state;
-	   ast_expression *callee = new(ctx) ast_expression($1);
-	   $$ = new(ctx) ast_function_expression(callee);
-	   $$->set_location(yylloc);
-   	}
-	;
-
-	// Grammar Note: No traditional style type casts.
-unary_expression:
-	postfix_expression
-	| INC_OP unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_pre_inc, $2, NULL, NULL);
-	   $$->set_location(yylloc);
-	}
-	| DEC_OP unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_pre_dec, $2, NULL, NULL);
-	   $$->set_location(yylloc);
-	}
-	| unary_operator unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression($1, $2, NULL, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-	// Grammar Note: No '*' or '&' unary ops. Pointers are not supported.
-unary_operator:
-	'+'	{ $$ = ast_plus; }
-	| '-'	{ $$ = ast_neg; }
-	| '!'	{ $$ = ast_logic_not; }
-	| '~'	{ $$ = ast_bit_not; }
-	;
-
-multiplicative_expression:
-	unary_expression
-	| multiplicative_expression '*' unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_mul, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| multiplicative_expression '/' unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_div, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| multiplicative_expression '%' unary_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_mod, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-additive_expression:
-	multiplicative_expression
-	| additive_expression '+' multiplicative_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_add, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| additive_expression '-' multiplicative_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_sub, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-shift_expression:
-	additive_expression
-	| shift_expression LEFT_OP additive_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_lshift, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| shift_expression RIGHT_OP additive_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_rshift, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-relational_expression:
-	shift_expression
-	| relational_expression '<' shift_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_less, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| relational_expression '>' shift_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_greater, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| relational_expression LE_OP shift_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_lequal, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| relational_expression GE_OP shift_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_gequal, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-equality_expression:
-	relational_expression
-	| equality_expression EQ_OP relational_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_equal, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	| equality_expression NE_OP relational_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_nequal, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-and_expression:
-	equality_expression
-	| and_expression '&' equality_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_bit_and, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-exclusive_or_expression:
-	and_expression
-	| exclusive_or_expression '^' and_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_bit_xor, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-inclusive_or_expression:
-	exclusive_or_expression
-	| inclusive_or_expression '|' exclusive_or_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_bit_or, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-logical_and_expression:
-	inclusive_or_expression
-	| logical_and_expression AND_OP inclusive_or_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_logic_and, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-logical_xor_expression:
-	logical_and_expression
-	| logical_xor_expression XOR_OP logical_and_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_logic_xor, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-logical_or_expression:
-	logical_xor_expression
-	| logical_or_expression OR_OP logical_xor_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_bin(ast_logic_or, $1, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-conditional_expression:
-	logical_or_expression
-	| logical_or_expression '?' expression ':' assignment_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression(ast_conditional, $1, $3, $5);
-	   $$->set_location(yylloc);
-	}
-	;
-
-assignment_expression:
-	conditional_expression
-	| unary_expression assignment_operator assignment_expression
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression($2, $1, $3, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-assignment_operator:
-	'='		{ $$ = ast_assign; }
-	| MUL_ASSIGN	{ $$ = ast_mul_assign; }
-	| DIV_ASSIGN	{ $$ = ast_div_assign; }
-	| MOD_ASSIGN	{ $$ = ast_mod_assign; }
-	| ADD_ASSIGN	{ $$ = ast_add_assign; }
-	| SUB_ASSIGN	{ $$ = ast_sub_assign; }
-	| LEFT_ASSIGN	{ $$ = ast_ls_assign; }
-	| RIGHT_ASSIGN	{ $$ = ast_rs_assign; }
-	| AND_ASSIGN	{ $$ = ast_and_assign; }
-	| XOR_ASSIGN	{ $$ = ast_xor_assign; }
-	| OR_ASSIGN	{ $$ = ast_or_assign; }
-	;
-
-expression:
-	assignment_expression
-	{
-	   $$ = $1;
-	}
-	| expression ',' assignment_expression
-	{
-	   void *ctx = state;
-	   if ($1->oper != ast_sequence) {
-	      $$ = new(ctx) ast_expression(ast_sequence, NULL, NULL, NULL);
-	      $$->set_location(yylloc);
-	      $$->expressions.push_tail(& $1->link);
-	   } else {
-	      $$ = $1;
-	   }
-
-	   $$->expressions.push_tail(& $3->link);
-	}
-	;
-
-constant_expression:
-	conditional_expression
-	;
-
-declaration:
-	function_prototype ';'
-	{
-	   state->symbols->pop_scope();
-	   $$ = $1;
-	}
-	| init_declarator_list ';'
-	{
-	   $$ = $1;
-	}
-	| PRECISION precision_qualifier type_specifier_no_prec ';'
-	{
-	   $3->precision = $2;
-	   $3->is_precision_statement = true;
-	   $$ = $3;
-	}
-	;
-
-function_prototype:
-	function_declarator ')'
-	;
-
-function_declarator:
-	function_header
-	| function_header_with_parameters
-	;
-
-function_header_with_parameters:
-	function_header parameter_declaration
-	{
-	   $$ = $1;
-	   $$->parameters.push_tail(& $2->link);
-	}
-	| function_header_with_parameters ',' parameter_declaration
-	{
-	   $$ = $1;
-	   $$->parameters.push_tail(& $3->link);
-	}
-	;
-
-function_header:
-	fully_specified_type variable_identifier '('
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_function();
-	   $$->set_location(yylloc);
-	   $$->return_type = $1;
-	   $$->identifier = $2;
-
-	   state->symbols->add_function(new(state) ir_function($2));
-	   state->symbols->push_scope();
-	}
-	;
-
-parameter_declarator:
-	type_specifier any_identifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_parameter_declarator();
-	   $$->set_location(yylloc);
-	   $$->type = new(ctx) ast_fully_specified_type();
-	   $$->type->set_location(yylloc);
-	   $$->type->specifier = $1;
-	   $$->identifier = $2;
-	}
-	| type_specifier any_identifier '[' constant_expression ']'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_parameter_declarator();
-	   $$->set_location(yylloc);
-	   $$->type = new(ctx) ast_fully_specified_type();
-	   $$->type->set_location(yylloc);
-	   $$->type->specifier = $1;
-	   $$->identifier = $2;
-	   $$->is_array = true;
-	   $$->array_size = $4;
-	}
-	;
-
-parameter_declaration:
-	parameter_type_qualifier parameter_qualifier parameter_declarator
-	{
-	   $1.flags.i |= $2.flags.i;
-
-	   $$ = $3;
-	   $$->type->qualifier = $1;
-	}
-	| parameter_qualifier parameter_declarator
-	{
-	   $$ = $2;
-	   $$->type->qualifier = $1;
-	}
-	| parameter_type_qualifier parameter_qualifier parameter_type_specifier
-	{
-	   void *ctx = state;
-	   $1.flags.i |= $2.flags.i;
-
-	   $$ = new(ctx) ast_parameter_declarator();
-	   $$->set_location(yylloc);
-	   $$->type = new(ctx) ast_fully_specified_type();
-	   $$->type->qualifier = $1;
-	   $$->type->specifier = $3;
-	}
-	| parameter_qualifier parameter_type_specifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_parameter_declarator();
-	   $$->set_location(yylloc);
-	   $$->type = new(ctx) ast_fully_specified_type();
-	   $$->type->qualifier = $1;
-	   $$->type->specifier = $2;
-	}
-	;
-
-parameter_qualifier:
-	/* empty */
-	{
-	   memset(& $$, 0, sizeof($$));
-	}
-	| IN_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.in = 1;
-	}
-	| OUT_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.out = 1;
-	}
-	| INOUT_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.in = 1;
-	   $$.flags.q.out = 1;
-	}
-	;
-
-parameter_type_specifier:
-	type_specifier
-	;
-
-init_declarator_list:
-	single_declaration
-	| init_declarator_list ',' any_identifier
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, false, NULL, NULL);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	| init_declarator_list ',' any_identifier '[' ']'
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, NULL);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	| init_declarator_list ',' any_identifier '[' constant_expression ']'
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, NULL);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	| init_declarator_list ',' any_identifier '[' ']' '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, $7);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	| init_declarator_list ',' any_identifier '[' constant_expression ']' '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, $8);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	| init_declarator_list ',' any_identifier '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($3, false, NULL, $5);
-	   decl->set_location(yylloc);
-
-	   $$ = $1;
-	   $$->declarations.push_tail(&decl->link);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
-	}
-	;
-
-	// Grammar Note: No 'enum', or 'typedef'.
-single_declaration:
-	fully_specified_type
-	{
-	   void *ctx = state;
-	   if ($1->specifier->type_specifier != ast_struct) {
-	      _mesa_glsl_error(& @1, state, "empty declaration list\n");
-	      YYERROR;
-	   } else {
-	      $$ = new(ctx) ast_declarator_list($1);
-	      $$->set_location(yylloc);
-	   }
-	}
-	| fully_specified_type any_identifier
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, NULL);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| fully_specified_type any_identifier '[' ']'
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, NULL);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| fully_specified_type any_identifier '[' constant_expression ']'
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, NULL);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| fully_specified_type any_identifier '[' ']' '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, $6);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| fully_specified_type any_identifier '[' constant_expression ']' '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, $7);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| fully_specified_type any_identifier '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, $4);
-
-	   $$ = new(ctx) ast_declarator_list($1);
-	   $$->set_location(yylloc);
-	   $$->declarations.push_tail(&decl->link);
-	}
-	| INVARIANT variable_identifier // Vertex only.
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, NULL);
-
-	   $$ = new(ctx) ast_declarator_list(NULL);
-	   $$->set_location(yylloc);
-	   $$->invariant = true;
-
-	   $$->declarations.push_tail(&decl->link);
-	}
-	;
-
-fully_specified_type:
-	type_specifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_fully_specified_type();
-	   $$->set_location(yylloc);
-	   $$->specifier = $1;
-	}
-	| type_qualifier type_specifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_fully_specified_type();
-	   $$->set_location(yylloc);
-	   $$->qualifier = $1;
-	   $$->specifier = $2;
-	}
-	;
-
-layout_qualifier:
-	LAYOUT_TOK '(' layout_qualifier_id_list ')'
-	{
-	  $$ = $3;
-	}
-	;
-
-layout_qualifier_id_list:
-	layout_qualifier_id
-	| layout_qualifier_id_list ',' layout_qualifier_id
-	{
-	   if (($1.flags.i & $3.flags.i) != 0) {
-	      _mesa_glsl_error(& @3, state,
-			       "duplicate layout qualifiers used\n");
-	      YYERROR;
-	   }
-
-	   $$.flags.i = $1.flags.i | $3.flags.i;
-
-	   if ($1.flags.q.explicit_location)
-	      $$.location = $1.location;
-
-	   if ($3.flags.q.explicit_location)
-	      $$.location = $3.location;
-	}
-	;
-
-layout_qualifier_id:
-	any_identifier
-	{
-	   bool got_one = false;
-
-	   memset(& $$, 0, sizeof($$));
-
-	   /* Layout qualifiers for ARB_fragment_coord_conventions. */
-	   if (!got_one && state->ARB_fragment_coord_conventions_enable) {
-	      if (strcmp($1, "origin_upper_left") == 0) {
-		 got_one = true;
-		 $$.flags.q.origin_upper_left = 1;
-	      } else if (strcmp($1, "pixel_center_integer") == 0) {
-		 got_one = true;
-		 $$.flags.q.pixel_center_integer = 1;
-	      }
-
-	      if (got_one && state->ARB_fragment_coord_conventions_warn) {
-		 _mesa_glsl_warning(& @1, state,
-				    "GL_ARB_fragment_coord_conventions layout "
-				    "identifier `%s' used\n", $1);
-	      }
-	   }
-
-	   /* Layout qualifiers for AMD_conservative_depth. */
-	   if (!got_one && state->AMD_conservative_depth_enable) {
-	      if (strcmp($1, "depth_any") == 0) {
-	         got_one = true;
-	         $$.flags.q.depth_any = 1;
-	      } else if (strcmp($1, "depth_greater") == 0) {
-	         got_one = true;
-	         $$.flags.q.depth_greater = 1;
-	      } else if (strcmp($1, "depth_less") == 0) {
-	         got_one = true;
-	         $$.flags.q.depth_less = 1;
-	      } else if (strcmp($1, "depth_unchanged") == 0) {
-	         got_one = true;
-	         $$.flags.q.depth_unchanged = 1;
-	      }
-	
-	      if (got_one && state->AMD_conservative_depth_warn) {
-	         _mesa_glsl_warning(& @1, state,
-	                            "GL_AMD_conservative_depth "
-	                            "layout qualifier `%s' is used\n", $1);
-	      }
-	   }
-
-	   if (!got_one) {
-	      _mesa_glsl_error(& @1, state, "unrecognized layout identifier "
-			       "`%s'\n", $1);
-	      YYERROR;
-	   }
-	}
-	| any_identifier '=' INTCONSTANT
-	{
-	   bool got_one = false;
-
-	   memset(& $$, 0, sizeof($$));
-
-	   if (state->ARB_explicit_attrib_location_enable) {
-	      /* FINISHME: Handle 'index' once GL_ARB_blend_func_exteneded and
-	       * FINISHME: GLSL 1.30 (or later) are supported.
-	       */
-	      if (strcmp("location", $1) == 0) {
-		 got_one = true;
-
-		 $$.flags.q.explicit_location = 1;
-
-		 if ($3 >= 0) {
-		    $$.location = $3;
-		 } else {
-		    _mesa_glsl_error(& @3, state,
-				     "invalid location %d specified\n", $3);
-		    YYERROR;
-		 }
-	      }
-	   }
-
-	   /* If the identifier didn't match any known layout identifiers,
-	    * emit an error.
-	    */
-	   if (!got_one) {
-	      _mesa_glsl_error(& @1, state, "unrecognized layout identifier "
-			       "`%s'\n", $1);
-	      YYERROR;
-	   } else if (state->ARB_explicit_attrib_location_warn) {
-	      _mesa_glsl_warning(& @1, state,
-				 "GL_ARB_explicit_attrib_location layout "
-				 "identifier `%s' used\n", $1);
-	   }
-	}
-	;
-
-interpolation_qualifier:
-	SMOOTH
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.smooth = 1;
-	}
-	| FLAT
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.flat = 1;
-	}
-	| NOPERSPECTIVE
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.noperspective = 1;
-	}
-	;
-
-parameter_type_qualifier:
-	CONST_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.constant = 1;
-	}
-	;
-
-type_qualifier:
-	storage_qualifier
-	| layout_qualifier
-	| layout_qualifier storage_qualifier
-	{
-	   $$ = $1;
-	   $$.flags.i |= $2.flags.i;
-	}
-	| interpolation_qualifier
-	| interpolation_qualifier storage_qualifier
-	{
-	   $$ = $1;
-	   $$.flags.i |= $2.flags.i;
-	}
-	| INVARIANT storage_qualifier
-	{
-	   $$ = $2;
-	   $$.flags.q.invariant = 1;
-	}
-	| INVARIANT interpolation_qualifier storage_qualifier
-	{
-	   $$ = $2;
-	   $$.flags.i |= $3.flags.i;
-	   $$.flags.q.invariant = 1;
-	}
-	| INVARIANT
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.invariant = 1;
-	}
-	;
-
-storage_qualifier:
-	CONST_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.constant = 1;
-	}
-	| ATTRIBUTE
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.attribute = 1;
-	}
-	| VARYING
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.varying = 1;
-	}
-	| CENTROID VARYING
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.centroid = 1;
-	   $$.flags.q.varying = 1;
-	}
-	| IN_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.in = 1;
-	}
-	| OUT_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.out = 1;
-	}
-	| CENTROID IN_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.centroid = 1; $$.flags.q.in = 1;
-	}
-	| CENTROID OUT_TOK
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.centroid = 1; $$.flags.q.out = 1;
-	}
-	| UNIFORM
-	{
-	   memset(& $$, 0, sizeof($$));
-	   $$.flags.q.uniform = 1;
-	}
-	;
-
-type_specifier:
-	type_specifier_no_prec
-	{
-	   $$ = $1;
-	}
-	| precision_qualifier type_specifier_no_prec
-	{
-	   $$ = $2;
-	   $$->precision = $1;
-	}
-	;
-
-type_specifier_no_prec:
-	type_specifier_nonarray
-	| type_specifier_nonarray '[' ']'
-	{
-	   $$ = $1;
-	   $$->is_array = true;
-	   $$->array_size = NULL;
-	}
-	| type_specifier_nonarray '[' constant_expression ']'
-	{
-	   $$ = $1;
-	   $$->is_array = true;
-	   $$->array_size = $3;
-	}
-	;
-
-type_specifier_nonarray:
-	basic_type_specifier_nonarray
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_type_specifier($1);
-	   $$->set_location(yylloc);
-	}
-	| struct_specifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_type_specifier($1);
-	   $$->set_location(yylloc);
-	}
-	| TYPE_IDENTIFIER
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_type_specifier($1);
-	   $$->set_location(yylloc);
-	}
-	;
-
-basic_type_specifier_nonarray:
-	VOID_TOK		{ $$ = ast_void; }
-	| FLOAT_TOK		{ $$ = ast_float; }
-	| INT_TOK		{ $$ = ast_int; }
-	| UINT_TOK		{ $$ = ast_uint; }
-	| BOOL_TOK		{ $$ = ast_bool; }
-	| VEC2			{ $$ = ast_vec2; }
-	| VEC3			{ $$ = ast_vec3; }
-	| VEC4			{ $$ = ast_vec4; }
-	| BVEC2			{ $$ = ast_bvec2; }
-	| BVEC3			{ $$ = ast_bvec3; }
-	| BVEC4			{ $$ = ast_bvec4; }
-	| IVEC2			{ $$ = ast_ivec2; }
-	| IVEC3			{ $$ = ast_ivec3; }
-	| IVEC4			{ $$ = ast_ivec4; }
-	| UVEC2			{ $$ = ast_uvec2; }
-	| UVEC3			{ $$ = ast_uvec3; }
-	| UVEC4			{ $$ = ast_uvec4; }
-	| MAT2X2		{ $$ = ast_mat2; }
-	| MAT2X3		{ $$ = ast_mat2x3; }
-	| MAT2X4		{ $$ = ast_mat2x4; }
-	| MAT3X2		{ $$ = ast_mat3x2; }
-	| MAT3X3		{ $$ = ast_mat3; }
-	| MAT3X4		{ $$ = ast_mat3x4; }
-	| MAT4X2		{ $$ = ast_mat4x2; }
-	| MAT4X3		{ $$ = ast_mat4x3; }
-	| MAT4X4		{ $$ = ast_mat4; }
-	| SAMPLER1D		{ $$ = ast_sampler1d; }
-	| SAMPLER2D		{ $$ = ast_sampler2d; }
-	| SAMPLER2DRECT		{ $$ = ast_sampler2drect; }
-	| SAMPLER3D		{ $$ = ast_sampler3d; }
-	| SAMPLERCUBE		{ $$ = ast_samplercube; }
-	| SAMPLER1DSHADOW	{ $$ = ast_sampler1dshadow; }
-	| SAMPLER2DSHADOW	{ $$ = ast_sampler2dshadow; }
-	| SAMPLER2DRECTSHADOW	{ $$ = ast_sampler2drectshadow; }
-	| SAMPLERCUBESHADOW	{ $$ = ast_samplercubeshadow; }
-	| SAMPLER1DARRAY	{ $$ = ast_sampler1darray; }
-	| SAMPLER2DARRAY	{ $$ = ast_sampler2darray; }
-	| SAMPLER1DARRAYSHADOW	{ $$ = ast_sampler1darrayshadow; }
-	| SAMPLER2DARRAYSHADOW	{ $$ = ast_sampler2darrayshadow; }
-	| ISAMPLER1D		{ $$ = ast_isampler1d; }
-	| ISAMPLER2D		{ $$ = ast_isampler2d; }
-	| ISAMPLER3D		{ $$ = ast_isampler3d; }
-	| ISAMPLERCUBE		{ $$ = ast_isamplercube; }
-	| ISAMPLER1DARRAY	{ $$ = ast_isampler1darray; }
-	| ISAMPLER2DARRAY	{ $$ = ast_isampler2darray; }
-	| USAMPLER1D		{ $$ = ast_usampler1d; }
-	| USAMPLER2D		{ $$ = ast_usampler2d; }
-	| USAMPLER3D		{ $$ = ast_usampler3d; }
-	| USAMPLERCUBE		{ $$ = ast_usamplercube; }
-	| USAMPLER1DARRAY	{ $$ = ast_usampler1darray; }
-	| USAMPLER2DARRAY	{ $$ = ast_usampler2darray; }
-	;
-
-precision_qualifier:
-	HIGHP	  {
-		     if (!state->es_shader && state->language_version < 130)
-			_mesa_glsl_error(& @1, state,
-				         "precision qualifier forbidden "
-					 "in %s (1.30 or later "
-					 "required)\n",
-					 state->version_string);
-
-		     $$ = ast_precision_high;
-		  }
-	| MEDIUMP {
-		     if (!state->es_shader && state->language_version < 130)
-			_mesa_glsl_error(& @1, state,
-					 "precision qualifier forbidden "
-					 "in %s (1.30 or later "
-					 "required)\n",
-					 state->version_string);
-
-		     $$ = ast_precision_medium;
-		  }
-	| LOWP	  {
-		     if (!state->es_shader && state->language_version < 130)
-			_mesa_glsl_error(& @1, state,
-					 "precision qualifier forbidden "
-					 "in %s (1.30 or later "
-					 "required)\n",
-					 state->version_string);
-
-		     $$ = ast_precision_low;
-		  }
-	;
-
-struct_specifier:
-	STRUCT any_identifier '{' struct_declaration_list '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_struct_specifier($2, $4);
-	   $$->set_location(yylloc);
-	   state->symbols->add_type($2, glsl_type::void_type);
-	}
-	| STRUCT '{' struct_declaration_list '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_struct_specifier(NULL, $3);
-	   $$->set_location(yylloc);
-	}
-	;
-
-struct_declaration_list:
-	struct_declaration
-	{
-	   $$ = (ast_node *) $1;
-	   $1->link.self_link();
-	}
-	| struct_declaration_list struct_declaration
-	{
-	   $$ = (ast_node *) $1;
-	   $$->link.insert_before(& $2->link);
-	}
-	;
-
-struct_declaration:
-	type_specifier struct_declarator_list ';'
-	{
-	   void *ctx = state;
-	   ast_fully_specified_type *type = new(ctx) ast_fully_specified_type();
-	   type->set_location(yylloc);
-
-	   type->specifier = $1;
-	   $$ = new(ctx) ast_declarator_list(type);
-	   $$->set_location(yylloc);
-
-	   $$->declarations.push_degenerate_list_at_head(& $2->link);
-	}
-	;
-
-struct_declarator_list:
-	struct_declarator
-	{
-	   $$ = $1;
-	   $1->link.self_link();
-	}
-	| struct_declarator_list ',' struct_declarator
-	{
-	   $$ = $1;
-	   $$->link.insert_before(& $3->link);
-	}
-	;
-
-struct_declarator:
-	any_identifier
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_declaration($1, false, NULL, NULL);
-	   $$->set_location(yylloc);
-	   state->symbols->add_variable(new(state) ir_variable(NULL, $1, ir_var_auto));
-	}
-	| any_identifier '[' constant_expression ']'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_declaration($1, true, $3, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-initializer:
-	assignment_expression
-	;
-
-declaration_statement:
-	declaration
-	;
-
-	// Grammar Note: labeled statements for SWITCH only; 'goto' is not
-	// supported.
-statement:
-	compound_statement	{ $$ = (ast_node *) $1; }
-	| simple_statement
-	;
-
-simple_statement:
-	declaration_statement
-	| expression_statement
-	| selection_statement
-	| switch_statement		{ $$ = NULL; }
-	| case_label			{ $$ = NULL; }
-	| iteration_statement
-	| jump_statement
-	;
-
-compound_statement:
-	'{' '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_compound_statement(true, NULL);
-	   $$->set_location(yylloc);
-	}
-	| '{'
-	{
-	   state->symbols->push_scope();
-	}
-	statement_list '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_compound_statement(true, $3);
-	   $$->set_location(yylloc);
-	   state->symbols->pop_scope();
-	}
-	;
-
-statement_no_new_scope:
-	compound_statement_no_new_scope { $$ = (ast_node *) $1; }
-	| simple_statement
-	;
-
-compound_statement_no_new_scope:
-	'{' '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_compound_statement(false, NULL);
-	   $$->set_location(yylloc);
-	}
-	| '{' statement_list '}'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_compound_statement(false, $2);
-	   $$->set_location(yylloc);
-	}
-	;
-
-statement_list:
-	statement
-	{
-	   if ($1 == NULL) {
-	      _mesa_glsl_error(& @1, state, "<nil> statement\n");
-	      assert($1 != NULL);
-	   }
-
-	   $$ = $1;
-	   $$->link.self_link();
-	}
-	| statement_list statement
-	{
-	   if ($2 == NULL) {
-	      _mesa_glsl_error(& @2, state, "<nil> statement\n");
-	      assert($2 != NULL);
-	   }
-	   $$ = $1;
-	   $$->link.insert_before(& $2->link);
-	}
-	;
-
-expression_statement:
-	';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_statement(NULL);
-	   $$->set_location(yylloc);
-	}
-	| expression ';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_expression_statement($1);
-	   $$->set_location(yylloc);
-	}
-	;
-
-selection_statement:
-	IF '(' expression ')' selection_rest_statement
-	{
-	   $$ = new(state) ast_selection_statement($3, $5.then_statement,
-						   $5.else_statement);
-	   $$->set_location(yylloc);
-	}
-	;
-
-selection_rest_statement:
-	statement ELSE statement
-	{
-	   $$.then_statement = $1;
-	   $$.else_statement = $3;
-	}
-	| statement
-	{
-	   $$.then_statement = $1;
-	   $$.else_statement = NULL;
-	}
-	;
-
-condition:
-	expression
-	{
-	   $$ = (ast_node *) $1;
-	}
-	| fully_specified_type any_identifier '=' initializer
-	{
-	   void *ctx = state;
-	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, $4);
-	   ast_declarator_list *declarator = new(ctx) ast_declarator_list($1);
-	   decl->set_location(yylloc);
-	   declarator->set_location(yylloc);
-
-	   declarator->declarations.push_tail(&decl->link);
-	   $$ = declarator;
-	}
-	;
-
-switch_statement:
-	SWITCH '(' expression ')' compound_statement
-	;
-
-case_label:
-	CASE expression ':'
-	| DEFAULT ':'
-	;
-
-iteration_statement:
-	WHILE '(' condition ')' statement_no_new_scope
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_while,
-	   					    NULL, $3, NULL, $5);
-	   $$->set_location(yylloc);
-	}
-	| DO statement WHILE '(' expression ')' ';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_do_while,
-						    NULL, $5, NULL, $2);
-	   $$->set_location(yylloc);
-	}
-	| FOR '(' for_init_statement for_rest_statement ')' statement_no_new_scope
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_for,
-						    $3, $4.cond, $4.rest, $6);
-	   $$->set_location(yylloc);
-	}
-	;
-
-for_init_statement:
-	expression_statement
-	| declaration_statement
-	;
-
-conditionopt:
-	condition
-	| /* empty */
-	{
-	   $$ = NULL;
-	}
-	;
-
-for_rest_statement:
-	conditionopt ';'
-	{
-	   $$.cond = $1;
-	   $$.rest = NULL;
-	}
-	| conditionopt ';' expression
-	{
-	   $$.cond = $1;
-	   $$.rest = $3;
-	}
-	;
-
-	// Grammar Note: No 'goto'. Gotos are not supported.
-jump_statement:
-	CONTINUE ';' 
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_continue, NULL);
-	   $$->set_location(yylloc);
-	}
-	| BREAK ';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_break, NULL);
-	   $$->set_location(yylloc);
-	}
-	| RETURN ';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, NULL);
-	   $$->set_location(yylloc);
-	}
-	| RETURN expression ';'
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, $2);
-	   $$->set_location(yylloc);
-	}
-	| DISCARD ';' // Fragment shader only.
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_discard, NULL);
-	   $$->set_location(yylloc);
-	}
-	;
-
-external_declaration:
-	function_definition	{ $$ = $1; }
-	| declaration		{ $$ = $1; }
-	| pragma_statement	{ $$ = NULL; }
-	;
-
-function_definition:
-	function_prototype compound_statement_no_new_scope
-	{
-	   void *ctx = state;
-	   $$ = new(ctx) ast_function_definition();
-	   $$->set_location(yylloc);
-	   $$->prototype = $1;
-	   $$->body = $2;
-
-	   state->symbols->pop_scope();
-	}
-	;
+%{
+/*
+ * Copyright © 2008, 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+    
+#include "ast.h"
+#include "glsl_parser_extras.h"
+#include "glsl_types.h"
+
+#define YYLEX_PARAM state->scanner
+
+%}
+
+%pure-parser
+%error-verbose
+
+%locations
+%initial-action {
+   @$.first_line = 1;
+   @$.first_column = 1;
+   @$.last_line = 1;
+   @$.last_column = 1;
+   @$.source = 0;
+}
+
+%lex-param   {void *scanner}
+%parse-param {struct _mesa_glsl_parse_state *state}
+
+%union {
+   int n;
+   float real;
+   char *identifier;
+
+   struct ast_type_qualifier type_qualifier;
+
+   ast_node *node;
+   ast_type_specifier *type_specifier;
+   ast_fully_specified_type *fully_specified_type;
+   ast_function *function;
+   ast_parameter_declarator *parameter_declarator;
+   ast_function_definition *function_definition;
+   ast_compound_statement *compound_statement;
+   ast_expression *expression;
+   ast_declarator_list *declarator_list;
+   ast_struct_specifier *struct_specifier;
+   ast_declaration *declaration;
+
+   struct {
+      ast_node *cond;
+      ast_expression *rest;
+   } for_rest_statement;
+
+   struct {
+      ast_node *then_statement;
+      ast_node *else_statement;
+   } selection_rest_statement;
+}
+
+%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK
+%token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
+%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4
+%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING
+%token NOPERSPECTIVE FLAT SMOOTH
+%token MAT2X2 MAT2X3 MAT2X4
+%token MAT3X2 MAT3X3 MAT3X4
+%token MAT4X2 MAT4X3 MAT4X4
+%token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW
+%token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW
+%token SAMPLER2DARRAYSHADOW ISAMPLER1D ISAMPLER2D ISAMPLER3D ISAMPLERCUBE
+%token ISAMPLER1DARRAY ISAMPLER2DARRAY USAMPLER1D USAMPLER2D USAMPLER3D
+%token USAMPLERCUBE USAMPLER1DARRAY USAMPLER2DARRAY
+%token STRUCT VOID_TOK WHILE
+%token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
+%type <identifier> any_identifier
+%token <real> FLOATCONSTANT
+%token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
+%token <identifier> FIELD_SELECTION
+%token LEFT_OP RIGHT_OP
+%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP
+%token AND_OP OR_OP XOR_OP MUL_ASSIGN DIV_ASSIGN ADD_ASSIGN
+%token MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN
+%token SUB_ASSIGN
+%token INVARIANT
+%token LOWP MEDIUMP HIGHP SUPERP PRECISION
+
+%token VERSION EXTENSION LINE COLON EOL INTERFACE OUTPUT
+%token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF
+%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF
+%token PRAGMA_INVARIANT_ALL
+%token LAYOUT_TOK
+
+   /* Reserved words that are not actually used in the grammar.
+    */
+%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
+%token INLINE_TOK NOINLINE VOLATILE PUBLIC_TOK STATIC EXTERN EXTERNAL
+%token LONG_TOK SHORT_TOK DOUBLE_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK OUPTUT
+%token HVEC2 HVEC3 HVEC4 DVEC2 DVEC3 DVEC4 FVEC2 FVEC3 FVEC4
+%token SAMPLER2DRECT SAMPLER3DRECT SAMPLER2DRECTSHADOW
+%token SIZEOF CAST NAMESPACE USING
+
+%token ERROR_TOK
+
+%token COMMON PARTITION ACTIVE SAMPLERBUFFER FILTER
+%token  IMAGE1D  IMAGE2D  IMAGE3D  IMAGECUBE  IMAGE1DARRAY  IMAGE2DARRAY
+%token IIMAGE1D IIMAGE2D IIMAGE3D IIMAGECUBE IIMAGE1DARRAY IIMAGE2DARRAY
+%token UIMAGE1D UIMAGE2D UIMAGE3D UIMAGECUBE UIMAGE1DARRAY UIMAGE2DARRAY
+%token IMAGE1DSHADOW IMAGE2DSHADOW IMAGEBUFFER IIMAGEBUFFER UIMAGEBUFFER
+%token IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW
+%token ROW_MAJOR
+
+%type <identifier> variable_identifier
+%type <node> statement
+%type <node> statement_list
+%type <node> simple_statement
+%type <n> precision_qualifier
+%type <type_qualifier> type_qualifier
+%type <type_qualifier> storage_qualifier
+%type <type_qualifier> interpolation_qualifier
+%type <type_qualifier> layout_qualifier
+%type <type_qualifier> layout_qualifier_id_list layout_qualifier_id
+%type <type_specifier> type_specifier
+%type <type_specifier> type_specifier_no_prec
+%type <type_specifier> type_specifier_nonarray
+%type <n> basic_type_specifier_nonarray
+%type <fully_specified_type> fully_specified_type
+%type <function> function_prototype
+%type <function> function_header
+%type <function> function_header_with_parameters
+%type <function> function_declarator
+%type <parameter_declarator> parameter_declarator
+%type <parameter_declarator> parameter_declaration
+%type <type_qualifier> parameter_qualifier
+%type <type_qualifier> parameter_type_qualifier
+%type <type_specifier> parameter_type_specifier
+%type <function_definition> function_definition
+%type <compound_statement> compound_statement_no_new_scope
+%type <compound_statement> compound_statement
+%type <node> statement_no_new_scope
+%type <node> expression_statement
+%type <expression> expression
+%type <expression> primary_expression
+%type <expression> assignment_expression
+%type <expression> conditional_expression
+%type <expression> logical_or_expression
+%type <expression> logical_xor_expression
+%type <expression> logical_and_expression
+%type <expression> inclusive_or_expression
+%type <expression> exclusive_or_expression
+%type <expression> and_expression
+%type <expression> equality_expression
+%type <expression> relational_expression
+%type <expression> shift_expression
+%type <expression> additive_expression
+%type <expression> multiplicative_expression
+%type <expression> unary_expression
+%type <expression> constant_expression
+%type <expression> integer_expression
+%type <expression> postfix_expression
+%type <expression> function_call_header_with_parameters
+%type <expression> function_call_header_no_parameters
+%type <expression> function_call_header
+%type <expression> function_call_generic
+%type <expression> function_call_or_method
+%type <expression> function_call
+%type <expression> method_call_generic
+%type <expression> method_call_header_with_parameters
+%type <expression> method_call_header_no_parameters
+%type <expression> method_call_header
+%type <n> assignment_operator
+%type <n> unary_operator
+%type <expression> function_identifier
+%type <node> external_declaration
+%type <declarator_list> init_declarator_list
+%type <declarator_list> single_declaration
+%type <expression> initializer
+%type <node> declaration
+%type <node> declaration_statement
+%type <node> jump_statement
+%type <struct_specifier> struct_specifier
+%type <node> struct_declaration_list
+%type <declarator_list> struct_declaration
+%type <declaration> struct_declarator
+%type <declaration> struct_declarator_list
+%type <node> selection_statement
+%type <selection_rest_statement> selection_rest_statement
+%type <node> iteration_statement
+%type <node> condition
+%type <node> conditionopt
+%type <node> for_init_statement
+%type <for_rest_statement> for_rest_statement
+%%
+
+translation_unit: 
+	version_statement extension_statement_list
+	{
+	   _mesa_glsl_initialize_types(state);
+	}
+	external_declaration_list
+	{
+	   delete state->symbols;
+	   state->symbols = new(ralloc_parent(state)) glsl_symbol_table;
+	   _mesa_glsl_initialize_types(state);
+	}
+	;
+
+version_statement:
+	/* blank - no #version specified: defaults are already set */
+	| VERSION INTCONSTANT EOL
+	{
+	   bool supported = false;
+
+	   switch ($2) {
+	   case 100:
+	      state->es_shader = true;
+	      supported = state->Const.GLSL_100ES;
+	      break;
+	   case 110:
+	      supported = state->Const.GLSL_110;
+	      break;
+	   case 120:
+	      supported = state->Const.GLSL_120;
+	      break;
+	   case 130:
+	      supported = state->Const.GLSL_130;
+	      break;
+	   default:
+	      supported = false;
+	      break;
+	   }
+
+	   state->language_version = $2;
+	   state->version_string =
+	      ralloc_asprintf(state, "GLSL%s %d.%02d",
+			      state->es_shader ? " ES" : "",
+			      state->language_version / 100,
+			      state->language_version % 100);
+
+	   if (!supported) {
+	      _mesa_glsl_error(& @2, state, "%s is not supported. "
+			       "Supported versions are: %s\n",
+			       state->version_string,
+			       state->supported_version_string);
+	   }
+	}
+	;
+
+pragma_statement:
+	PRAGMA_DEBUG_ON EOL
+	| PRAGMA_DEBUG_OFF EOL
+	| PRAGMA_OPTIMIZE_ON EOL
+	| PRAGMA_OPTIMIZE_OFF EOL
+	| PRAGMA_INVARIANT_ALL EOL
+	{
+	   if (state->language_version < 120) {
+	      _mesa_glsl_warning(& @1, state,
+				 "pragma `invariant(all)' not supported in %s",
+				 state->version_string);
+	   } else {
+	      state->all_invariant = true;
+	   }
+	}
+	;
+
+extension_statement_list:
+
+	| extension_statement_list extension_statement
+	;
+
+any_identifier:
+	IDENTIFIER
+	| TYPE_IDENTIFIER
+	| NEW_IDENTIFIER
+	;
+
+extension_statement:
+	EXTENSION any_identifier COLON any_identifier EOL
+	{
+	   if (!_mesa_glsl_process_extension($2, & @2, $4, & @4, state)) {
+	      YYERROR;
+	   }
+	}
+	;
+
+external_declaration_list:
+	external_declaration
+	{
+	   /* FINISHME: The NULL test is required because pragmas are set to
+	    * FINISHME: NULL. (See production rule for external_declaration.)
+	    */
+	   if ($1 != NULL)
+	      state->translation_unit.push_tail(& $1->link);
+	}
+	| external_declaration_list external_declaration
+	{
+	   /* FINISHME: The NULL test is required because pragmas are set to
+	    * FINISHME: NULL. (See production rule for external_declaration.)
+	    */
+	   if ($2 != NULL)
+	      state->translation_unit.push_tail(& $2->link);
+	}
+	;
+
+variable_identifier:
+	IDENTIFIER
+	| NEW_IDENTIFIER
+	;
+
+primary_expression:
+	variable_identifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_identifier, NULL, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.identifier = $1;
+	}
+	| INTCONSTANT
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_int_constant, NULL, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.int_constant = $1;
+	}
+	| UINTCONSTANT
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_uint_constant, NULL, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.uint_constant = $1;
+	}
+	| FLOATCONSTANT
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_float_constant, NULL, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.float_constant = $1;
+	}
+	| BOOLCONSTANT
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_bool_constant, NULL, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.bool_constant = $1;
+	}
+	| '(' expression ')'
+	{
+	   $$ = $2;
+	}
+	;
+
+postfix_expression:
+	primary_expression
+	| postfix_expression '[' integer_expression ']'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_array_index, $1, $3, NULL);
+	   $$->set_location(yylloc);
+	}
+	| function_call
+	{
+	   $$ = $1;
+	}
+	| postfix_expression '.' any_identifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL);
+	   $$->set_location(yylloc);
+	   $$->primary_expression.identifier = $3;
+	}
+	| postfix_expression INC_OP
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_post_inc, $1, NULL, NULL);
+	   $$->set_location(yylloc);
+	}
+	| postfix_expression DEC_OP
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_post_dec, $1, NULL, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+integer_expression:
+	expression
+	;
+
+function_call:
+	function_call_or_method
+	;
+
+function_call_or_method:
+	function_call_generic
+	| postfix_expression '.' method_call_generic
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_field_selection, $1, $3, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+function_call_generic:
+	function_call_header_with_parameters ')'
+	| function_call_header_no_parameters ')'
+	;
+
+function_call_header_no_parameters:
+	function_call_header VOID_TOK
+	| function_call_header
+	;
+
+function_call_header_with_parameters:
+	function_call_header assignment_expression
+	{
+	   $$ = $1;
+	   $$->set_location(yylloc);
+	   $$->expressions.push_tail(& $2->link);
+	}
+	| function_call_header_with_parameters ',' assignment_expression
+	{
+	   $$ = $1;
+	   $$->set_location(yylloc);
+	   $$->expressions.push_tail(& $3->link);
+	}
+	;
+
+	// Grammar Note: Constructors look like functions, but lexical 
+	// analysis recognized most of them as keywords. They are now
+	// recognized through "type_specifier".
+function_call_header:
+	function_identifier '('
+	;
+
+function_identifier:
+	type_specifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_function_expression($1);
+	   $$->set_location(yylloc);
+   	}
+	| variable_identifier
+	{
+	   void *ctx = state;
+	   ast_expression *callee = new(ctx) ast_expression($1);
+	   $$ = new(ctx) ast_function_expression(callee);
+	   $$->set_location(yylloc);
+   	}
+	| FIELD_SELECTION
+	{
+	   void *ctx = state;
+	   ast_expression *callee = new(ctx) ast_expression($1);
+	   $$ = new(ctx) ast_function_expression(callee);
+	   $$->set_location(yylloc);
+   	}
+	;
+
+method_call_generic:
+	method_call_header_with_parameters ')'
+	| method_call_header_no_parameters ')'
+	;
+
+method_call_header_no_parameters:
+	method_call_header VOID_TOK
+	| method_call_header
+	;
+
+method_call_header_with_parameters:
+	method_call_header assignment_expression
+	{
+	   $$ = $1;
+	   $$->set_location(yylloc);
+	   $$->expressions.push_tail(& $2->link);
+	}
+	| method_call_header_with_parameters ',' assignment_expression
+	{
+	   $$ = $1;
+	   $$->set_location(yylloc);
+	   $$->expressions.push_tail(& $3->link);
+	}
+	;
+
+	// Grammar Note: Constructors look like methods, but lexical 
+	// analysis recognized most of them as keywords. They are now
+	// recognized through "type_specifier".
+method_call_header:
+	variable_identifier '('
+	{
+	   void *ctx = state;
+	   ast_expression *callee = new(ctx) ast_expression($1);
+	   $$ = new(ctx) ast_function_expression(callee);
+	   $$->set_location(yylloc);
+   	}
+	;
+
+	// Grammar Note: No traditional style type casts.
+unary_expression:
+	postfix_expression
+	| INC_OP unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_pre_inc, $2, NULL, NULL);
+	   $$->set_location(yylloc);
+	}
+	| DEC_OP unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_pre_dec, $2, NULL, NULL);
+	   $$->set_location(yylloc);
+	}
+	| unary_operator unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression($1, $2, NULL, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+	// Grammar Note: No '*' or '&' unary ops. Pointers are not supported.
+unary_operator:
+	'+'	{ $$ = ast_plus; }
+	| '-'	{ $$ = ast_neg; }
+	| '!'	{ $$ = ast_logic_not; }
+	| '~'	{ $$ = ast_bit_not; }
+	;
+
+multiplicative_expression:
+	unary_expression
+	| multiplicative_expression '*' unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_mul, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| multiplicative_expression '/' unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_div, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| multiplicative_expression '%' unary_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_mod, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+additive_expression:
+	multiplicative_expression
+	| additive_expression '+' multiplicative_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_add, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| additive_expression '-' multiplicative_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_sub, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+shift_expression:
+	additive_expression
+	| shift_expression LEFT_OP additive_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_lshift, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| shift_expression RIGHT_OP additive_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_rshift, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+relational_expression:
+	shift_expression
+	| relational_expression '<' shift_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_less, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| relational_expression '>' shift_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_greater, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| relational_expression LE_OP shift_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_lequal, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| relational_expression GE_OP shift_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_gequal, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+equality_expression:
+	relational_expression
+	| equality_expression EQ_OP relational_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_equal, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	| equality_expression NE_OP relational_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_nequal, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+and_expression:
+	equality_expression
+	| and_expression '&' equality_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_bit_and, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+exclusive_or_expression:
+	and_expression
+	| exclusive_or_expression '^' and_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_bit_xor, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+inclusive_or_expression:
+	exclusive_or_expression
+	| inclusive_or_expression '|' exclusive_or_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_bit_or, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+logical_and_expression:
+	inclusive_or_expression
+	| logical_and_expression AND_OP inclusive_or_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_logic_and, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+logical_xor_expression:
+	logical_and_expression
+	| logical_xor_expression XOR_OP logical_and_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_logic_xor, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+logical_or_expression:
+	logical_xor_expression
+	| logical_or_expression OR_OP logical_xor_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_bin(ast_logic_or, $1, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+conditional_expression:
+	logical_or_expression
+	| logical_or_expression '?' expression ':' assignment_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression(ast_conditional, $1, $3, $5);
+	   $$->set_location(yylloc);
+	}
+	;
+
+assignment_expression:
+	conditional_expression
+	| unary_expression assignment_operator assignment_expression
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression($2, $1, $3, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+assignment_operator:
+	'='		{ $$ = ast_assign; }
+	| MUL_ASSIGN	{ $$ = ast_mul_assign; }
+	| DIV_ASSIGN	{ $$ = ast_div_assign; }
+	| MOD_ASSIGN	{ $$ = ast_mod_assign; }
+	| ADD_ASSIGN	{ $$ = ast_add_assign; }
+	| SUB_ASSIGN	{ $$ = ast_sub_assign; }
+	| LEFT_ASSIGN	{ $$ = ast_ls_assign; }
+	| RIGHT_ASSIGN	{ $$ = ast_rs_assign; }
+	| AND_ASSIGN	{ $$ = ast_and_assign; }
+	| XOR_ASSIGN	{ $$ = ast_xor_assign; }
+	| OR_ASSIGN	{ $$ = ast_or_assign; }
+	;
+
+expression:
+	assignment_expression
+	{
+	   $$ = $1;
+	}
+	| expression ',' assignment_expression
+	{
+	   void *ctx = state;
+	   if ($1->oper != ast_sequence) {
+	      $$ = new(ctx) ast_expression(ast_sequence, NULL, NULL, NULL);
+	      $$->set_location(yylloc);
+	      $$->expressions.push_tail(& $1->link);
+	   } else {
+	      $$ = $1;
+	   }
+
+	   $$->expressions.push_tail(& $3->link);
+	}
+	;
+
+constant_expression:
+	conditional_expression
+	;
+
+declaration:
+	function_prototype ';'
+	{
+	   state->symbols->pop_scope();
+	   $$ = $1;
+	}
+	| init_declarator_list ';'
+	{
+	   $$ = $1;
+	}
+	| PRECISION precision_qualifier type_specifier_no_prec ';'
+	{
+	   $3->precision = $2;
+	   $3->is_precision_statement = true;
+	   $$ = $3;
+	}
+	;
+
+function_prototype:
+	function_declarator ')'
+	;
+
+function_declarator:
+	function_header
+	| function_header_with_parameters
+	;
+
+function_header_with_parameters:
+	function_header parameter_declaration
+	{
+	   $$ = $1;
+	   $$->parameters.push_tail(& $2->link);
+	}
+	| function_header_with_parameters ',' parameter_declaration
+	{
+	   $$ = $1;
+	   $$->parameters.push_tail(& $3->link);
+	}
+	;
+
+function_header:
+	fully_specified_type variable_identifier '('
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_function();
+	   $$->set_location(yylloc);
+	   $$->return_type = $1;
+	   $$->identifier = $2;
+
+	   state->symbols->add_function(new(state) ir_function($2));
+	   state->symbols->push_scope();
+	}
+	;
+
+parameter_declarator:
+	type_specifier any_identifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_parameter_declarator();
+	   $$->set_location(yylloc);
+	   $$->type = new(ctx) ast_fully_specified_type();
+	   $$->type->set_location(yylloc);
+	   $$->type->specifier = $1;
+	   $$->identifier = $2;
+	}
+	| type_specifier any_identifier '[' constant_expression ']'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_parameter_declarator();
+	   $$->set_location(yylloc);
+	   $$->type = new(ctx) ast_fully_specified_type();
+	   $$->type->set_location(yylloc);
+	   $$->type->specifier = $1;
+	   $$->identifier = $2;
+	   $$->is_array = true;
+	   $$->array_size = $4;
+	}
+	;
+
+parameter_declaration:
+	parameter_type_qualifier parameter_qualifier parameter_declarator
+	{
+	   $1.flags.i |= $2.flags.i;
+
+	   $$ = $3;
+	   $$->type->qualifier = $1;
+	}
+	| parameter_qualifier parameter_declarator
+	{
+	   $$ = $2;
+	   $$->type->qualifier = $1;
+	}
+	| parameter_type_qualifier parameter_qualifier parameter_type_specifier
+	{
+	   void *ctx = state;
+	   $1.flags.i |= $2.flags.i;
+
+	   $$ = new(ctx) ast_parameter_declarator();
+	   $$->set_location(yylloc);
+	   $$->type = new(ctx) ast_fully_specified_type();
+	   $$->type->qualifier = $1;
+	   $$->type->specifier = $3;
+	}
+	| parameter_qualifier parameter_type_specifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_parameter_declarator();
+	   $$->set_location(yylloc);
+	   $$->type = new(ctx) ast_fully_specified_type();
+	   $$->type->qualifier = $1;
+	   $$->type->specifier = $2;
+	}
+	;
+
+parameter_qualifier:
+	/* empty */
+	{
+	   memset(& $$, 0, sizeof($$));
+	}
+	| IN_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.in = 1;
+	}
+	| OUT_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.out = 1;
+	}
+	| INOUT_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.in = 1;
+	   $$.flags.q.out = 1;
+	}
+	;
+
+parameter_type_specifier:
+	type_specifier
+	;
+
+init_declarator_list:
+	single_declaration
+	| init_declarator_list ',' any_identifier
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, false, NULL, NULL);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	| init_declarator_list ',' any_identifier '[' ']'
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, NULL);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	| init_declarator_list ',' any_identifier '[' constant_expression ']'
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, NULL);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	| init_declarator_list ',' any_identifier '[' ']' '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, true, NULL, $7);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	| init_declarator_list ',' any_identifier '[' constant_expression ']' '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, true, $5, $8);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	| init_declarator_list ',' any_identifier '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($3, false, NULL, $5);
+	   decl->set_location(yylloc);
+
+	   $$ = $1;
+	   $$->declarations.push_tail(&decl->link);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto));
+	}
+	;
+
+	// Grammar Note: No 'enum', or 'typedef'.
+single_declaration:
+	fully_specified_type
+	{
+	   void *ctx = state;
+	   /* Empty declaration list is valid. */
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	}
+	| fully_specified_type any_identifier
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, NULL);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| fully_specified_type any_identifier '[' ']'
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, NULL);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| fully_specified_type any_identifier '[' constant_expression ']'
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, NULL);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| fully_specified_type any_identifier '[' ']' '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, true, NULL, $6);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| fully_specified_type any_identifier '[' constant_expression ']' '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, true, $4, $7);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| fully_specified_type any_identifier '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, $4);
+
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
+	   $$->declarations.push_tail(&decl->link);
+	}
+	| INVARIANT variable_identifier // Vertex only.
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, NULL);
+
+	   $$ = new(ctx) ast_declarator_list(NULL);
+	   $$->set_location(yylloc);
+	   $$->invariant = true;
+
+	   $$->declarations.push_tail(&decl->link);
+	}
+	;
+
+fully_specified_type:
+	type_specifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_fully_specified_type();
+	   $$->set_location(yylloc);
+	   $$->specifier = $1;
+	}
+	| type_qualifier type_specifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_fully_specified_type();
+	   $$->set_location(yylloc);
+	   $$->qualifier = $1;
+	   $$->specifier = $2;
+	}
+	;
+
+layout_qualifier:
+	LAYOUT_TOK '(' layout_qualifier_id_list ')'
+	{
+	  $$ = $3;
+	}
+	;
+
+layout_qualifier_id_list:
+	layout_qualifier_id
+	| layout_qualifier_id_list ',' layout_qualifier_id
+	{
+	   if (($1.flags.i & $3.flags.i) != 0) {
+	      _mesa_glsl_error(& @3, state,
+			       "duplicate layout qualifiers used\n");
+	      YYERROR;
+	   }
+
+	   $$.flags.i = $1.flags.i | $3.flags.i;
+
+	   if ($1.flags.q.explicit_location)
+	      $$.location = $1.location;
+
+	   if ($3.flags.q.explicit_location)
+	      $$.location = $3.location;
+	}
+	;
+
+layout_qualifier_id:
+	any_identifier
+	{
+	   bool got_one = false;
+
+	   memset(& $$, 0, sizeof($$));
+
+	   /* Layout qualifiers for ARB_fragment_coord_conventions. */
+	   if (!got_one && state->ARB_fragment_coord_conventions_enable) {
+	      if (strcmp($1, "origin_upper_left") == 0) {
+		 got_one = true;
+		 $$.flags.q.origin_upper_left = 1;
+	      } else if (strcmp($1, "pixel_center_integer") == 0) {
+		 got_one = true;
+		 $$.flags.q.pixel_center_integer = 1;
+	      }
+
+	      if (got_one && state->ARB_fragment_coord_conventions_warn) {
+		 _mesa_glsl_warning(& @1, state,
+				    "GL_ARB_fragment_coord_conventions layout "
+				    "identifier `%s' used\n", $1);
+	      }
+	   }
+
+	   /* Layout qualifiers for AMD_conservative_depth. */
+	   if (!got_one && state->AMD_conservative_depth_enable) {
+	      if (strcmp($1, "depth_any") == 0) {
+	         got_one = true;
+	         $$.flags.q.depth_any = 1;
+	      } else if (strcmp($1, "depth_greater") == 0) {
+	         got_one = true;
+	         $$.flags.q.depth_greater = 1;
+	      } else if (strcmp($1, "depth_less") == 0) {
+	         got_one = true;
+	         $$.flags.q.depth_less = 1;
+	      } else if (strcmp($1, "depth_unchanged") == 0) {
+	         got_one = true;
+	         $$.flags.q.depth_unchanged = 1;
+	      }
+	
+	      if (got_one && state->AMD_conservative_depth_warn) {
+	         _mesa_glsl_warning(& @1, state,
+	                            "GL_AMD_conservative_depth "
+	                            "layout qualifier `%s' is used\n", $1);
+	      }
+	   }
+
+	   if (!got_one) {
+	      _mesa_glsl_error(& @1, state, "unrecognized layout identifier "
+			       "`%s'\n", $1);
+	      YYERROR;
+	   }
+	}
+	| any_identifier '=' INTCONSTANT
+	{
+	   bool got_one = false;
+
+	   memset(& $$, 0, sizeof($$));
+
+	   if (state->ARB_explicit_attrib_location_enable) {
+	      /* FINISHME: Handle 'index' once GL_ARB_blend_func_exteneded and
+	       * FINISHME: GLSL 1.30 (or later) are supported.
+	       */
+	      if (strcmp("location", $1) == 0) {
+		 got_one = true;
+
+		 $$.flags.q.explicit_location = 1;
+
+		 if ($3 >= 0) {
+		    $$.location = $3;
+		 } else {
+		    _mesa_glsl_error(& @3, state,
+				     "invalid location %d specified\n", $3);
+		    YYERROR;
+		 }
+	      }
+	   }
+
+	   /* If the identifier didn't match any known layout identifiers,
+	    * emit an error.
+	    */
+	   if (!got_one) {
+	      _mesa_glsl_error(& @1, state, "unrecognized layout identifier "
+			       "`%s'\n", $1);
+	      YYERROR;
+	   } else if (state->ARB_explicit_attrib_location_warn) {
+	      _mesa_glsl_warning(& @1, state,
+				 "GL_ARB_explicit_attrib_location layout "
+				 "identifier `%s' used\n", $1);
+	   }
+	}
+	;
+
+interpolation_qualifier:
+	SMOOTH
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.smooth = 1;
+	}
+	| FLAT
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.flat = 1;
+	}
+	| NOPERSPECTIVE
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.noperspective = 1;
+	}
+	;
+
+parameter_type_qualifier:
+	CONST_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.constant = 1;
+	}
+	;
+
+type_qualifier:
+	storage_qualifier
+	| layout_qualifier
+	| layout_qualifier storage_qualifier
+	{
+	   $$ = $1;
+	   $$.flags.i |= $2.flags.i;
+	}
+	| interpolation_qualifier
+	| interpolation_qualifier storage_qualifier
+	{
+	   $$ = $1;
+	   $$.flags.i |= $2.flags.i;
+	}
+	| INVARIANT storage_qualifier
+	{
+	   $$ = $2;
+	   $$.flags.q.invariant = 1;
+	}
+	| INVARIANT interpolation_qualifier storage_qualifier
+	{
+	   $$ = $2;
+	   $$.flags.i |= $3.flags.i;
+	   $$.flags.q.invariant = 1;
+	}
+	| INVARIANT
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.invariant = 1;
+	}
+	;
+
+storage_qualifier:
+	CONST_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.constant = 1;
+	}
+	| ATTRIBUTE
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.attribute = 1;
+	}
+	| VARYING
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.varying = 1;
+	}
+	| CENTROID VARYING
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.centroid = 1;
+	   $$.flags.q.varying = 1;
+	}
+	| IN_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.in = 1;
+	}
+	| OUT_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.out = 1;
+	}
+	| CENTROID IN_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.centroid = 1; $$.flags.q.in = 1;
+	}
+	| CENTROID OUT_TOK
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.centroid = 1; $$.flags.q.out = 1;
+	}
+	| UNIFORM
+	{
+	   memset(& $$, 0, sizeof($$));
+	   $$.flags.q.uniform = 1;
+	}
+	;
+
+type_specifier:
+	type_specifier_no_prec
+	{
+	   $$ = $1;
+	}
+	| precision_qualifier type_specifier_no_prec
+	{
+	   $$ = $2;
+	   $$->precision = $1;
+	}
+	;
+
+type_specifier_no_prec:
+	type_specifier_nonarray
+	| type_specifier_nonarray '[' ']'
+	{
+	   $$ = $1;
+	   $$->is_array = true;
+	   $$->array_size = NULL;
+	}
+	| type_specifier_nonarray '[' constant_expression ']'
+	{
+	   $$ = $1;
+	   $$->is_array = true;
+	   $$->array_size = $3;
+	}
+	;
+
+type_specifier_nonarray:
+	basic_type_specifier_nonarray
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_type_specifier($1);
+	   $$->set_location(yylloc);
+	}
+	| struct_specifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_type_specifier($1);
+	   $$->set_location(yylloc);
+	}
+	| TYPE_IDENTIFIER
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_type_specifier($1);
+	   $$->set_location(yylloc);
+	}
+	;
+
+basic_type_specifier_nonarray:
+	VOID_TOK		{ $$ = ast_void; }
+	| FLOAT_TOK		{ $$ = ast_float; }
+	| INT_TOK		{ $$ = ast_int; }
+	| UINT_TOK		{ $$ = ast_uint; }
+	| BOOL_TOK		{ $$ = ast_bool; }
+	| VEC2			{ $$ = ast_vec2; }
+	| VEC3			{ $$ = ast_vec3; }
+	| VEC4			{ $$ = ast_vec4; }
+	| BVEC2			{ $$ = ast_bvec2; }
+	| BVEC3			{ $$ = ast_bvec3; }
+	| BVEC4			{ $$ = ast_bvec4; }
+	| IVEC2			{ $$ = ast_ivec2; }
+	| IVEC3			{ $$ = ast_ivec3; }
+	| IVEC4			{ $$ = ast_ivec4; }
+	| UVEC2			{ $$ = ast_uvec2; }
+	| UVEC3			{ $$ = ast_uvec3; }
+	| UVEC4			{ $$ = ast_uvec4; }
+	| MAT2X2		{ $$ = ast_mat2; }
+	| MAT2X3		{ $$ = ast_mat2x3; }
+	| MAT2X4		{ $$ = ast_mat2x4; }
+	| MAT3X2		{ $$ = ast_mat3x2; }
+	| MAT3X3		{ $$ = ast_mat3; }
+	| MAT3X4		{ $$ = ast_mat3x4; }
+	| MAT4X2		{ $$ = ast_mat4x2; }
+	| MAT4X3		{ $$ = ast_mat4x3; }
+	| MAT4X4		{ $$ = ast_mat4; }
+	| SAMPLER1D		{ $$ = ast_sampler1d; }
+	| SAMPLER2D		{ $$ = ast_sampler2d; }
+	| SAMPLER2DRECT		{ $$ = ast_sampler2drect; }
+	| SAMPLER3D		{ $$ = ast_sampler3d; }
+	| SAMPLERCUBE		{ $$ = ast_samplercube; }
+	| SAMPLER1DSHADOW	{ $$ = ast_sampler1dshadow; }
+	| SAMPLER2DSHADOW	{ $$ = ast_sampler2dshadow; }
+	| SAMPLER2DRECTSHADOW	{ $$ = ast_sampler2drectshadow; }
+	| SAMPLERCUBESHADOW	{ $$ = ast_samplercubeshadow; }
+	| SAMPLER1DARRAY	{ $$ = ast_sampler1darray; }
+	| SAMPLER2DARRAY	{ $$ = ast_sampler2darray; }
+	| SAMPLER1DARRAYSHADOW	{ $$ = ast_sampler1darrayshadow; }
+	| SAMPLER2DARRAYSHADOW	{ $$ = ast_sampler2darrayshadow; }
+	| ISAMPLER1D		{ $$ = ast_isampler1d; }
+	| ISAMPLER2D		{ $$ = ast_isampler2d; }
+	| ISAMPLER3D		{ $$ = ast_isampler3d; }
+	| ISAMPLERCUBE		{ $$ = ast_isamplercube; }
+	| ISAMPLER1DARRAY	{ $$ = ast_isampler1darray; }
+	| ISAMPLER2DARRAY	{ $$ = ast_isampler2darray; }
+	| USAMPLER1D		{ $$ = ast_usampler1d; }
+	| USAMPLER2D		{ $$ = ast_usampler2d; }
+	| USAMPLER3D		{ $$ = ast_usampler3d; }
+	| USAMPLERCUBE		{ $$ = ast_usamplercube; }
+	| USAMPLER1DARRAY	{ $$ = ast_usampler1darray; }
+	| USAMPLER2DARRAY	{ $$ = ast_usampler2darray; }
+	;
+
+precision_qualifier:
+	HIGHP	  {
+		     if (!state->es_shader && state->language_version < 130)
+			_mesa_glsl_error(& @1, state,
+				         "precision qualifier forbidden "
+					 "in %s (1.30 or later "
+					 "required)\n",
+					 state->version_string);
+
+		     $$ = ast_precision_high;
+		  }
+	| MEDIUMP {
+		     if (!state->es_shader && state->language_version < 130)
+			_mesa_glsl_error(& @1, state,
+					 "precision qualifier forbidden "
+					 "in %s (1.30 or later "
+					 "required)\n",
+					 state->version_string);
+
+		     $$ = ast_precision_medium;
+		  }
+	| LOWP	  {
+		     if (!state->es_shader && state->language_version < 130)
+			_mesa_glsl_error(& @1, state,
+					 "precision qualifier forbidden "
+					 "in %s (1.30 or later "
+					 "required)\n",
+					 state->version_string);
+
+		     $$ = ast_precision_low;
+		  }
+	;
+
+struct_specifier:
+	STRUCT any_identifier '{' struct_declaration_list '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_struct_specifier($2, $4);
+	   $$->set_location(yylloc);
+	   state->symbols->add_type($2, glsl_type::void_type);
+	}
+	| STRUCT '{' struct_declaration_list '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_struct_specifier(NULL, $3);
+	   $$->set_location(yylloc);
+	}
+	;
+
+struct_declaration_list:
+	struct_declaration
+	{
+	   $$ = (ast_node *) $1;
+	   $1->link.self_link();
+	}
+	| struct_declaration_list struct_declaration
+	{
+	   $$ = (ast_node *) $1;
+	   $$->link.insert_before(& $2->link);
+	}
+	;
+
+struct_declaration:
+	type_specifier struct_declarator_list ';'
+	{
+	   void *ctx = state;
+	   ast_fully_specified_type *type = new(ctx) ast_fully_specified_type();
+	   type->set_location(yylloc);
+
+	   type->specifier = $1;
+	   $$ = new(ctx) ast_declarator_list(type);
+	   $$->set_location(yylloc);
+
+	   $$->declarations.push_degenerate_list_at_head(& $2->link);
+	}
+	;
+
+struct_declarator_list:
+	struct_declarator
+	{
+	   $$ = $1;
+	   $1->link.self_link();
+	}
+	| struct_declarator_list ',' struct_declarator
+	{
+	   $$ = $1;
+	   $$->link.insert_before(& $3->link);
+	}
+	;
+
+struct_declarator:
+	any_identifier
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_declaration($1, false, NULL, NULL);
+	   $$->set_location(yylloc);
+	   state->symbols->add_variable(new(state) ir_variable(NULL, $1, ir_var_auto));
+	}
+	| any_identifier '[' constant_expression ']'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_declaration($1, true, $3, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+initializer:
+	assignment_expression
+	;
+
+declaration_statement:
+	declaration
+	;
+
+	// Grammar Note: labeled statements for SWITCH only; 'goto' is not
+	// supported.
+statement:
+	compound_statement	{ $$ = (ast_node *) $1; }
+	| simple_statement
+	;
+
+simple_statement:
+	declaration_statement
+	| expression_statement
+	| selection_statement
+	| switch_statement		{ $$ = NULL; }
+	| case_label			{ $$ = NULL; }
+	| iteration_statement
+	| jump_statement
+	;
+
+compound_statement:
+	'{' '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_compound_statement(true, NULL);
+	   $$->set_location(yylloc);
+	}
+	| '{'
+	{
+	   state->symbols->push_scope();
+	}
+	statement_list '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_compound_statement(true, $3);
+	   $$->set_location(yylloc);
+	   state->symbols->pop_scope();
+	}
+	;
+
+statement_no_new_scope:
+	compound_statement_no_new_scope { $$ = (ast_node *) $1; }
+	| simple_statement
+	;
+
+compound_statement_no_new_scope:
+	'{' '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_compound_statement(false, NULL);
+	   $$->set_location(yylloc);
+	}
+	| '{' statement_list '}'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_compound_statement(false, $2);
+	   $$->set_location(yylloc);
+	}
+	;
+
+statement_list:
+	statement
+	{
+	   if ($1 == NULL) {
+	      _mesa_glsl_error(& @1, state, "<nil> statement\n");
+	      assert($1 != NULL);
+	   }
+
+	   $$ = $1;
+	   $$->link.self_link();
+	}
+	| statement_list statement
+	{
+	   if ($2 == NULL) {
+	      _mesa_glsl_error(& @2, state, "<nil> statement\n");
+	      assert($2 != NULL);
+	   }
+	   $$ = $1;
+	   $$->link.insert_before(& $2->link);
+	}
+	;
+
+expression_statement:
+	';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_statement(NULL);
+	   $$->set_location(yylloc);
+	}
+	| expression ';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_expression_statement($1);
+	   $$->set_location(yylloc);
+	}
+	;
+
+selection_statement:
+	IF '(' expression ')' selection_rest_statement
+	{
+	   $$ = new(state) ast_selection_statement($3, $5.then_statement,
+						   $5.else_statement);
+	   $$->set_location(yylloc);
+	}
+	;
+
+selection_rest_statement:
+	statement ELSE statement
+	{
+	   $$.then_statement = $1;
+	   $$.else_statement = $3;
+	}
+	| statement
+	{
+	   $$.then_statement = $1;
+	   $$.else_statement = NULL;
+	}
+	;
+
+condition:
+	expression
+	{
+	   $$ = (ast_node *) $1;
+	}
+	| fully_specified_type any_identifier '=' initializer
+	{
+	   void *ctx = state;
+	   ast_declaration *decl = new(ctx) ast_declaration($2, false, NULL, $4);
+	   ast_declarator_list *declarator = new(ctx) ast_declarator_list($1);
+	   decl->set_location(yylloc);
+	   declarator->set_location(yylloc);
+
+	   declarator->declarations.push_tail(&decl->link);
+	   $$ = declarator;
+	}
+	;
+
+switch_statement:
+	SWITCH '(' expression ')' compound_statement
+	;
+
+case_label:
+	CASE expression ':'
+	| DEFAULT ':'
+	;
+
+iteration_statement:
+	WHILE '(' condition ')' statement_no_new_scope
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_while,
+	   					    NULL, $3, NULL, $5);
+	   $$->set_location(yylloc);
+	}
+	| DO statement WHILE '(' expression ')' ';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_do_while,
+						    NULL, $5, NULL, $2);
+	   $$->set_location(yylloc);
+	}
+	| FOR '(' for_init_statement for_rest_statement ')' statement_no_new_scope
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_for,
+						    $3, $4.cond, $4.rest, $6);
+	   $$->set_location(yylloc);
+	}
+	;
+
+for_init_statement:
+	expression_statement
+	| declaration_statement
+	;
+
+conditionopt:
+	condition
+	| /* empty */
+	{
+	   $$ = NULL;
+	}
+	;
+
+for_rest_statement:
+	conditionopt ';'
+	{
+	   $$.cond = $1;
+	   $$.rest = NULL;
+	}
+	| conditionopt ';' expression
+	{
+	   $$.cond = $1;
+	   $$.rest = $3;
+	}
+	;
+
+	// Grammar Note: No 'goto'. Gotos are not supported.
+jump_statement:
+	CONTINUE ';' 
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_continue, NULL);
+	   $$->set_location(yylloc);
+	}
+	| BREAK ';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_break, NULL);
+	   $$->set_location(yylloc);
+	}
+	| RETURN ';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, NULL);
+	   $$->set_location(yylloc);
+	}
+	| RETURN expression ';'
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, $2);
+	   $$->set_location(yylloc);
+	}
+	| DISCARD ';' // Fragment shader only.
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_discard, NULL);
+	   $$->set_location(yylloc);
+	}
+	;
+
+external_declaration:
+	function_definition	{ $$ = $1; }
+	| declaration		{ $$ = $1; }
+	| pragma_statement	{ $$ = NULL; }
+	;
+
+function_definition:
+	function_prototype compound_statement_no_new_scope
+	{
+	   void *ctx = state;
+	   $$ = new(ctx) ast_function_definition();
+	   $$->set_location(yylloc);
+	   $$->prototype = $1;
+	   $$->body = $2;
+
+	   state->symbols->pop_scope();
+	}
+	;
diff --git a/mesalib/src/mesa/SConscript b/mesalib/src/mesa/SConscript
index 24e2155c3..cbd166251 100644
--- a/mesalib/src/mesa/SConscript
+++ b/mesalib/src/mesa/SConscript
@@ -264,6 +264,7 @@ statetracker_sources = [
     'state_tracker/st_draw_feedback.c',
     'state_tracker/st_extensions.c',
     'state_tracker/st_format.c',
+    'state_tracker/st_glsl_to_tgsi.cpp',
     'state_tracker/st_gen_mipmap.c',
     'state_tracker/st_manager.c',
     'state_tracker/st_mesa_to_tgsi.c',
diff --git a/mesalib/src/mesa/main/compiler.h b/mesalib/src/mesa/main/compiler.h
index d736fdfc5..ee7d0b2f8 100644
--- a/mesalib/src/mesa/main/compiler.h
+++ b/mesalib/src/mesa/main/compiler.h
@@ -60,29 +60,7 @@ extern "C" {
 /**
  * Get standard integer types
  */
-#if defined(_MSC_VER)
-   typedef __int8             int8_t;
-   typedef unsigned __int8    uint8_t;
-   typedef __int16            int16_t;
-   typedef unsigned __int16   uint16_t;
-   typedef __int32            int32_t;
-   typedef unsigned __int32   uint32_t;
-   typedef __int64            int64_t;
-   typedef unsigned __int64   uint64_t;
-
-#  if defined(_WIN64)
-     typedef __int64            intptr_t;
-     typedef unsigned __int64   uintptr_t;
-#  else
-     typedef __int32            intptr_t;
-     typedef unsigned __int32   uintptr_t;
-#  endif
-
-#  define INT64_C(__val) __val##i64
-#  define UINT64_C(__val) __val##ui64
-#else
-#  include <stdint.h>
-#endif
+#include <stdint.h>
 
 
 /**
diff --git a/mesalib/src/mesa/main/enable.c b/mesalib/src/mesa/main/enable.c
index aac8b9c5e..3ba4df634 100644
--- a/mesalib/src/mesa/main/enable.c
+++ b/mesalib/src/mesa/main/enable.c
@@ -5,7 +5,6 @@
 
 /*
  * Mesa 3-D graphics library
- * Version:  7.0.3
  *
  * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
  *
@@ -560,7 +559,6 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          ctx->Polygon.OffsetLine = state;
          break;
       case GL_POLYGON_OFFSET_FILL:
-         /*case GL_POLYGON_OFFSET_EXT:*/
          if (ctx->Polygon.OffsetFill == state)
             return;
          FLUSH_VERTICES(ctx, _NEW_POLYGON);
@@ -643,9 +641,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, GLboolean state)
          break;
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
       case GL_NORMAL_ARRAY:
       case GL_COLOR_ARRAY:
@@ -1174,7 +1170,6 @@ _mesa_IsEnabled( GLenum cap )
       case GL_POLYGON_OFFSET_LINE:
 	 return ctx->Polygon.OffsetLine;
       case GL_POLYGON_OFFSET_FILL:
-      /*case GL_POLYGON_OFFSET_EXT:*/
 	 return ctx->Polygon.OffsetFill;
       case GL_RESCALE_NORMAL_EXT:
          return ctx->Transform.RescaleNormals;
@@ -1213,9 +1208,7 @@ _mesa_IsEnabled( GLenum cap )
          }
 #endif
 
-      /*
-       * CLIENT STATE!!!
-       */
+      /* client-side state */
       case GL_VERTEX_ARRAY:
          return (ctx->Array.ArrayObj->Vertex.Enabled != 0);
       case GL_NORMAL_ARRAY:
diff --git a/mesalib/src/mesa/main/ff_fragment_shader.cpp b/mesalib/src/mesa/main/ff_fragment_shader.cpp
index dbfa6b57d..7cc172168 100644
--- a/mesalib/src/mesa/main/ff_fragment_shader.cpp
+++ b/mesalib/src/mesa/main/ff_fragment_shader.cpp
@@ -874,7 +874,8 @@ static struct ureg register_const4f( struct texenv_fragment_program *p,
    values[1] = s1;
    values[2] = s2;
    values[3] = s3;
-   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters,
+                                     (gl_constant_value *) values, 4,
                                      &swizzle );
    r = make_ureg(PROGRAM_CONSTANT, idx);
    r.swz = swizzle;
diff --git a/mesalib/src/mesa/main/ffvertex_prog.c b/mesalib/src/mesa/main/ffvertex_prog.c
index 281229e34..2d2485c9e 100644
--- a/mesalib/src/mesa/main/ffvertex_prog.c
+++ b/mesalib/src/mesa/main/ffvertex_prog.c
@@ -1,1680 +1,1680 @@
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file ffvertex_prog.c
- *
- * Create a vertex program to execute the current fixed function T&L pipeline.
- * \author Keith Whitwell
- */
-
-
-#include "main/glheader.h"
-#include "main/mtypes.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "main/enums.h"
-#include "main/ffvertex_prog.h"
-#include "program/program.h"
-#include "program/prog_cache.h"
-#include "program/prog_instruction.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_statevars.h"
-
-
-/** Max of number of lights and texture coord units */
-#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
-
-struct state_key {
-   unsigned light_color_material_mask:12;
-   unsigned light_global_enabled:1;
-   unsigned light_local_viewer:1;
-   unsigned light_twoside:1;
-   unsigned material_shininess_is_zero:1;
-   unsigned need_eye_coords:1;
-   unsigned normalize:1;
-   unsigned rescale_normals:1;
-
-   unsigned fog_source_is_depth:1;
-   unsigned separate_specular:1;
-   unsigned point_attenuated:1;
-   unsigned point_array:1;
-   unsigned texture_enabled_global:1;
-   unsigned fragprog_inputs_read:12;
-
-   unsigned varying_vp_inputs;
-
-   struct {
-      unsigned light_enabled:1;
-      unsigned light_eyepos3_is_zero:1;
-      unsigned light_spotcutoff_is_180:1;
-      unsigned light_attenuated:1;
-      unsigned texunit_really_enabled:1;
-      unsigned texmat_enabled:1;
-      unsigned coord_replace:1;
-      unsigned texgen_enabled:4;
-      unsigned texgen_mode0:4;
-      unsigned texgen_mode1:4;
-      unsigned texgen_mode2:4;
-      unsigned texgen_mode3:4;
-   } unit[NUM_UNITS];
-};
-
-
-#define TXG_NONE           0
-#define TXG_OBJ_LINEAR     1
-#define TXG_EYE_LINEAR     2
-#define TXG_SPHERE_MAP     3
-#define TXG_REFLECTION_MAP 4
-#define TXG_NORMAL_MAP     5
-
-static GLuint translate_texgen( GLboolean enabled, GLenum mode )
-{
-   if (!enabled)
-      return TXG_NONE;
-
-   switch (mode) {
-   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
-   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
-   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
-   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
-   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
-   default: return TXG_NONE;
-   }
-}
-
-
-
-static GLboolean check_active_shininess( struct gl_context *ctx,
-                                         const struct state_key *key,
-                                         GLuint side )
-{
-   GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
-
-   if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
-       (key->light_color_material_mask & bit))
-      return GL_TRUE;
-
-   if (key->varying_vp_inputs & (bit << 16))
-      return GL_TRUE;
-
-   if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
-      return GL_TRUE;
-
-   return GL_FALSE;
-}
-
-
-static void make_state_key( struct gl_context *ctx, struct state_key *key )
-{
-   const struct gl_fragment_program *fp;
-   GLuint i;
-
-   memset(key, 0, sizeof(struct state_key));
-   fp = ctx->FragmentProgram._Current;
-
-   /* This now relies on texenvprogram.c being active:
-    */
-   assert(fp);
-
-   key->need_eye_coords = ctx->_NeedEyeCoords;
-
-   key->fragprog_inputs_read = fp->Base.InputsRead;
-   key->varying_vp_inputs = ctx->varying_vp_inputs;
-
-   if (ctx->RenderMode == GL_FEEDBACK) {
-      /* make sure the vertprog emits color and tex0 */
-      key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
-   }
-
-   key->separate_specular = (ctx->Light.Model.ColorControl ==
-			     GL_SEPARATE_SPECULAR_COLOR);
-
-   if (ctx->Light.Enabled) {
-      key->light_global_enabled = 1;
-
-      if (ctx->Light.Model.LocalViewer)
-	 key->light_local_viewer = 1;
-
-      if (ctx->Light.Model.TwoSide)
-	 key->light_twoside = 1;
-
-      if (ctx->Light.ColorMaterialEnabled) {
-	 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
-      }
-
-      for (i = 0; i < MAX_LIGHTS; i++) {
-	 struct gl_light *light = &ctx->Light.Light[i];
-
-	 if (light->Enabled) {
-	    key->unit[i].light_enabled = 1;
-
-	    if (light->EyePosition[3] == 0.0)
-	       key->unit[i].light_eyepos3_is_zero = 1;
-
-	    if (light->SpotCutoff == 180.0)
-	       key->unit[i].light_spotcutoff_is_180 = 1;
-
-	    if (light->ConstantAttenuation != 1.0 ||
-		light->LinearAttenuation != 0.0 ||
-		light->QuadraticAttenuation != 0.0)
-	       key->unit[i].light_attenuated = 1;
-	 }
-      }
-
-      if (check_active_shininess(ctx, key, 0)) {
-         key->material_shininess_is_zero = 0;
-      }
-      else if (key->light_twoside &&
-               check_active_shininess(ctx, key, 1)) {
-         key->material_shininess_is_zero = 0;
-      }
-      else {
-         key->material_shininess_is_zero = 1;
-      }
-   }
-
-   if (ctx->Transform.Normalize)
-      key->normalize = 1;
-
-   if (ctx->Transform.RescaleNormals)
-      key->rescale_normals = 1;
-
-   if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
-      key->fog_source_is_depth = 1;
-
-   if (ctx->Point._Attenuated)
-      key->point_attenuated = 1;
-
-#if FEATURE_point_size_array
-   if (ctx->Array.ArrayObj->PointSize.Enabled)
-      key->point_array = 1;
-#endif
-
-   if (ctx->Texture._TexGenEnabled ||
-       ctx->Texture._TexMatEnabled ||
-       ctx->Texture._EnabledUnits)
-      key->texture_enabled_global = 1;
-
-   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
-      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
-
-      if (texUnit->_ReallyEnabled)
-	 key->unit[i].texunit_really_enabled = 1;
-
-      if (ctx->Point.PointSprite)
-	 if (ctx->Point.CoordReplace[i])
-	    key->unit[i].coord_replace = 1;
-
-      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
-	 key->unit[i].texmat_enabled = 1;
-
-      if (texUnit->TexGenEnabled) {
-	 key->unit[i].texgen_enabled = 1;
-
-	 key->unit[i].texgen_mode0 =
-	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
-			      texUnit->GenS.Mode );
-	 key->unit[i].texgen_mode1 =
-	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
-			      texUnit->GenT.Mode );
-	 key->unit[i].texgen_mode2 =
-	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
-			      texUnit->GenR.Mode );
-	 key->unit[i].texgen_mode3 =
-	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
-			      texUnit->GenQ.Mode );
-      }
-   }
-}
-
-
-
-/* Very useful debugging tool - produces annotated listing of
- * generated program with line/function references for each
- * instruction back into this file:
- */
-#define DISASSEM 0
-
-
-/* Use uregs to represent registers internally, translate to Mesa's
- * expected formats on emit.
- *
- * NOTE: These are passed by value extensively in this file rather
- * than as usual by pointer reference.  If this disturbs you, try
- * remembering they are just 32bits in size.
- *
- * GCC is smart enough to deal with these dword-sized structures in
- * much the same way as if I had defined them as dwords and was using
- * macros to access and set the fields.  This is much nicer and easier
- * to evolve.
- */
-struct ureg {
-   GLuint file:4;
-   GLint idx:9;      /* relative addressing may be negative */
-                     /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
-   GLuint negate:1;
-   GLuint swz:12;
-   GLuint pad:6;
-};
-
-
-struct tnl_program {
-   const struct state_key *state;
-   struct gl_vertex_program *program;
-   GLint max_inst;  /** number of instructions allocated for program */
-   GLboolean mvp_with_dp4;
-
-   GLuint temp_in_use;
-   GLuint temp_reserved;
-
-   struct ureg eye_position;
-   struct ureg eye_position_z;
-   struct ureg eye_position_normalized;
-   struct ureg transformed_normal;
-   struct ureg identity;
-
-   GLuint materials;
-   GLuint color_materials;
-};
-
-
-static const struct ureg undef = {
-   PROGRAM_UNDEFINED,
-   0,
-   0,
-   0,
-   0
-};
-
-/* Local shorthand:
- */
-#define X    SWIZZLE_X
-#define Y    SWIZZLE_Y
-#define Z    SWIZZLE_Z
-#define W    SWIZZLE_W
-
-
-/* Construct a ureg:
- */
-static struct ureg make_ureg(GLuint file, GLint idx)
-{
-   struct ureg reg;
-   reg.file = file;
-   reg.idx = idx;
-   reg.negate = 0;
-   reg.swz = SWIZZLE_NOOP;
-   reg.pad = 0;
-   return reg;
-}
-
-
-
-static struct ureg negate( struct ureg reg )
-{
-   reg.negate ^= 1;
-   return reg;
-}
-
-
-static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
-{
-   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
-			   GET_SWZ(reg.swz, y),
-			   GET_SWZ(reg.swz, z),
-			   GET_SWZ(reg.swz, w));
-   return reg;
-}
-
-
-static struct ureg swizzle1( struct ureg reg, int x )
-{
-   return swizzle(reg, x, x, x, x);
-}
-
-
-static struct ureg get_temp( struct tnl_program *p )
-{
-   int bit = _mesa_ffs( ~p->temp_in_use );
-   if (!bit) {
-      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
-      exit(1);
-   }
-
-   if ((GLuint) bit > p->program->Base.NumTemporaries)
-      p->program->Base.NumTemporaries = bit;
-
-   p->temp_in_use |= 1<<(bit-1);
-   return make_ureg(PROGRAM_TEMPORARY, bit-1);
-}
-
-
-static struct ureg reserve_temp( struct tnl_program *p )
-{
-   struct ureg temp = get_temp( p );
-   p->temp_reserved |= 1<<temp.idx;
-   return temp;
-}
-
-
-static void release_temp( struct tnl_program *p, struct ureg reg )
-{
-   if (reg.file == PROGRAM_TEMPORARY) {
-      p->temp_in_use &= ~(1<<reg.idx);
-      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
-   }
-}
-
-static void release_temps( struct tnl_program *p )
-{
-   p->temp_in_use = p->temp_reserved;
-}
-
-
-static struct ureg register_param5(struct tnl_program *p,
-				   GLint s0,
-				   GLint s1,
-				   GLint s2,
-				   GLint s3,
-                                   GLint s4)
-{
-   gl_state_index tokens[STATE_LENGTH];
-   GLint idx;
-   tokens[0] = s0;
-   tokens[1] = s1;
-   tokens[2] = s2;
-   tokens[3] = s3;
-   tokens[4] = s4;
-   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
-   return make_ureg(PROGRAM_STATE_VAR, idx);
-}
-
-
-#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
-#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
-#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
-#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
-
-
-
-/**
- * \param input  one of VERT_ATTRIB_x tokens.
- */
-static struct ureg register_input( struct tnl_program *p, GLuint input )
-{
-   assert(input < 32);
-
-   if (p->state->varying_vp_inputs & (1<<input)) {
-      p->program->Base.InputsRead |= (1<<input);
-      return make_ureg(PROGRAM_INPUT, input);
-   }
-   else {
-      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
-   }
-}
-
-
-/**
- * \param input  one of VERT_RESULT_x tokens.
- */
-static struct ureg register_output( struct tnl_program *p, GLuint output )
-{
-   p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
-   return make_ureg(PROGRAM_OUTPUT, output);
-}
-
-
-static struct ureg register_const4f( struct tnl_program *p,
-			      GLfloat s0,
-			      GLfloat s1,
-			      GLfloat s2,
-			      GLfloat s3)
-{
-   GLfloat values[4];
-   GLint idx;
-   GLuint swizzle;
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
-   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
-                                     &swizzle );
-   ASSERT(swizzle == SWIZZLE_NOOP);
-   return make_ureg(PROGRAM_CONSTANT, idx);
-}
-
-#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
-#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
-#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
-#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
-
-static GLboolean is_undef( struct ureg reg )
-{
-   return reg.file == PROGRAM_UNDEFINED;
-}
-
-
-static struct ureg get_identity_param( struct tnl_program *p )
-{
-   if (is_undef(p->identity))
-      p->identity = register_const4f(p, 0,0,0,1);
-
-   return p->identity;
-}
-
-static void register_matrix_param5( struct tnl_program *p,
-				    GLint s0, /* modelview, projection, etc */
-				    GLint s1, /* texture matrix number */
-				    GLint s2, /* first row */
-				    GLint s3, /* last row */
-				    GLint s4, /* inverse, transpose, etc */
-				    struct ureg *matrix )
-{
-   GLint i;
-
-   /* This is a bit sad as the support is there to pull the whole
-    * matrix out in one go:
-    */
-   for (i = 0; i <= s3 - s2; i++)
-      matrix[i] = register_param5( p, s0, s1, i, i, s4 );
-}
-
-
-static void emit_arg( struct prog_src_register *src,
-		      struct ureg reg )
-{
-   src->File = reg.file;
-   src->Index = reg.idx;
-   src->Swizzle = reg.swz;
-   src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
-   src->Abs = 0;
-   src->RelAddr = 0;
-   /* Check that bitfield sizes aren't exceeded */
-   ASSERT(src->Index == reg.idx);
-}
-
-
-static void emit_dst( struct prog_dst_register *dst,
-		      struct ureg reg, GLuint mask )
-{
-   dst->File = reg.file;
-   dst->Index = reg.idx;
-   /* allow zero as a shorthand for xyzw */
-   dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
-   dst->CondMask = COND_TR;  /* always pass cond test */
-   dst->CondSwizzle = SWIZZLE_NOOP;
-   dst->CondSrc = 0;
-   /* Check that bitfield sizes aren't exceeded */
-   ASSERT(dst->Index == reg.idx);
-}
-
-
-static void debug_insn( struct prog_instruction *inst, const char *fn,
-			GLuint line )
-{
-   if (DISASSEM) {
-      static const char *last_fn;
-
-      if (fn != last_fn) {
-	 last_fn = fn;
-	 printf("%s:\n", fn);
-      }
-
-      printf("%d:\t", line);
-      _mesa_print_instruction(inst);
-   }
-}
-
-
-static void emit_op3fn(struct tnl_program *p,
-                       enum prog_opcode op,
-		       struct ureg dest,
-		       GLuint mask,
-		       struct ureg src0,
-		       struct ureg src1,
-		       struct ureg src2,
-		       const char *fn,
-		       GLuint line)
-{
-   GLuint nr;
-   struct prog_instruction *inst;
-
-   assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
-
-   if (p->program->Base.NumInstructions == p->max_inst) {
-      /* need to extend the program's instruction array */
-      struct prog_instruction *newInst;
-
-      /* double the size */
-      p->max_inst *= 2;
-
-      newInst = _mesa_alloc_instructions(p->max_inst);
-      if (!newInst) {
-         _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
-         return;
-      }
-
-      _mesa_copy_instructions(newInst,
-                              p->program->Base.Instructions,
-                              p->program->Base.NumInstructions);
-
-      _mesa_free_instructions(p->program->Base.Instructions,
-                              p->program->Base.NumInstructions);
-
-      p->program->Base.Instructions = newInst;
-   }
-
-   nr = p->program->Base.NumInstructions++;
-
-   inst = &p->program->Base.Instructions[nr];
-   inst->Opcode = (enum prog_opcode) op;
-   inst->Data = 0;
-
-   emit_arg( &inst->SrcReg[0], src0 );
-   emit_arg( &inst->SrcReg[1], src1 );
-   emit_arg( &inst->SrcReg[2], src2 );
-
-   emit_dst( &inst->DstReg, dest, mask );
-
-   debug_insn(inst, fn, line);
-}
-
-
-#define emit_op3(p, op, dst, mask, src0, src1, src2) \
-   emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
-
-#define emit_op2(p, op, dst, mask, src0, src1) \
-    emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
-
-#define emit_op1(p, op, dst, mask, src0) \
-    emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
-
-
-static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
-{
-   if (reg.file == PROGRAM_TEMPORARY &&
-       !(p->temp_reserved & (1<<reg.idx)))
-      return reg;
-   else {
-      struct ureg temp = get_temp(p);
-      emit_op1(p, OPCODE_MOV, temp, 0, reg);
-      return temp;
-   }
-}
-
-
-/* Currently no tracking performed of input/output/register size or
- * active elements.  Could be used to reduce these operations, as
- * could the matrix type.
- */
-static void emit_matrix_transform_vec4( struct tnl_program *p,
-					struct ureg dest,
-					const struct ureg *mat,
-					struct ureg src)
-{
-   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
-   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
-   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
-   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
-}
-
-
-/* This version is much easier to implement if writemasks are not
- * supported natively on the target or (like SSE), the target doesn't
- * have a clean/obvious dotproduct implementation.
- */
-static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
-						  struct ureg dest,
-						  const struct ureg *mat,
-						  struct ureg src)
-{
-   struct ureg tmp;
-
-   if (dest.file != PROGRAM_TEMPORARY)
-      tmp = get_temp(p);
-   else
-      tmp = dest;
-
-   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
-   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
-   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
-   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
-
-   if (dest.file != PROGRAM_TEMPORARY)
-      release_temp(p, tmp);
-}
-
-
-static void emit_matrix_transform_vec3( struct tnl_program *p,
-					struct ureg dest,
-					const struct ureg *mat,
-					struct ureg src)
-{
-   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
-   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
-   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
-}
-
-
-static void emit_normalize_vec3( struct tnl_program *p,
-				 struct ureg dest,
-				 struct ureg src )
-{
-#if 0
-   /* XXX use this when drivers are ready for NRM3 */
-   emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
-#else
-   struct ureg tmp = get_temp(p);
-   emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
-   emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
-   emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
-   release_temp(p, tmp);
-#endif
-}
-
-
-static void emit_passthrough( struct tnl_program *p,
-			      GLuint input,
-			      GLuint output )
-{
-   struct ureg out = register_output(p, output);
-   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
-}
-
-
-static struct ureg get_eye_position( struct tnl_program *p )
-{
-   if (is_undef(p->eye_position)) {
-      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
-      struct ureg modelview[4];
-
-      p->eye_position = reserve_temp(p);
-
-      if (p->mvp_with_dp4) {
-	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
-                                 0, modelview );
-
-	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
-      }
-      else {
-	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
-				 STATE_MATRIX_TRANSPOSE, modelview );
-
-	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
-      }
-   }
-
-   return p->eye_position;
-}
-
-
-static struct ureg get_eye_position_z( struct tnl_program *p )
-{
-   if (!is_undef(p->eye_position))
-      return swizzle1(p->eye_position, Z);
-
-   if (is_undef(p->eye_position_z)) {
-      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
-      struct ureg modelview[4];
-
-      p->eye_position_z = reserve_temp(p);
-
-      register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
-                              0, modelview );
-
-      emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
-   }
-
-   return p->eye_position_z;
-}
-
-
-static struct ureg get_eye_position_normalized( struct tnl_program *p )
-{
-   if (is_undef(p->eye_position_normalized)) {
-      struct ureg eye = get_eye_position(p);
-      p->eye_position_normalized = reserve_temp(p);
-      emit_normalize_vec3(p, p->eye_position_normalized, eye);
-   }
-
-   return p->eye_position_normalized;
-}
-
-
-static struct ureg get_transformed_normal( struct tnl_program *p )
-{
-   if (is_undef(p->transformed_normal) &&
-       !p->state->need_eye_coords &&
-       !p->state->normalize &&
-       !(p->state->need_eye_coords == p->state->rescale_normals))
-   {
-      p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
-   }
-   else if (is_undef(p->transformed_normal))
-   {
-      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
-      struct ureg mvinv[3];
-      struct ureg transformed_normal = reserve_temp(p);
-
-      if (p->state->need_eye_coords) {
-         register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
-                                 STATE_MATRIX_INVTRANS, mvinv );
-
-         /* Transform to eye space:
-          */
-         emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
-         normal = transformed_normal;
-      }
-
-      /* Normalize/Rescale:
-       */
-      if (p->state->normalize) {
-	 emit_normalize_vec3( p, transformed_normal, normal );
-         normal = transformed_normal;
-      }
-      else if (p->state->need_eye_coords == p->state->rescale_normals) {
-         /* This is already adjusted for eye/non-eye rendering:
-          */
-	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
-                                               STATE_NORMAL_SCALE);
-
-	 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
-         normal = transformed_normal;
-      }
-
-      assert(normal.file == PROGRAM_TEMPORARY);
-      p->transformed_normal = normal;
-   }
-
-   return p->transformed_normal;
-}
-
-
-static void build_hpos( struct tnl_program *p )
-{
-   struct ureg pos = register_input( p, VERT_ATTRIB_POS );
-   struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
-   struct ureg mvp[4];
-
-   if (p->mvp_with_dp4) {
-      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
-			      0, mvp );
-      emit_matrix_transform_vec4( p, hpos, mvp, pos );
-   }
-   else {
-      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
-			      STATE_MATRIX_TRANSPOSE, mvp );
-      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
-   }
-}
-
-
-static GLuint material_attrib( GLuint side, GLuint property )
-{
-   return (property - STATE_AMBIENT) * 2 + side;
-}
-
-
-/**
- * Get a bitmask of which material values vary on a per-vertex basis.
- */
-static void set_material_flags( struct tnl_program *p )
-{
-   p->color_materials = 0;
-   p->materials = 0;
-
-   if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
-      p->materials =
-	 p->color_materials = p->state->light_color_material_mask;
-   }
-
-   p->materials |= (p->state->varying_vp_inputs >> 16);
-}
-
-
-static struct ureg get_material( struct tnl_program *p, GLuint side,
-				 GLuint property )
-{
-   GLuint attrib = material_attrib(side, property);
-
-   if (p->color_materials & (1<<attrib))
-      return register_input(p, VERT_ATTRIB_COLOR0);
-   else if (p->materials & (1<<attrib)) {
-      /* Put material values in the GENERIC slots -- they are not used
-       * for anything in fixed function mode.
-       */
-      return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
-   }
-   else
-      return register_param3( p, STATE_MATERIAL, side, property );
-}
-
-#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
-				   MAT_BIT_FRONT_AMBIENT | \
-				   MAT_BIT_FRONT_DIFFUSE) << (side))
-
-
-/**
- * Either return a precalculated constant value or emit code to
- * calculate these values dynamically in the case where material calls
- * are present between begin/end pairs.
- *
- * Probably want to shift this to the program compilation phase - if
- * we always emitted the calculation here, a smart compiler could
- * detect that it was constant (given a certain set of inputs), and
- * lift it out of the main loop.  That way the programs created here
- * would be independent of the vertex_buffer details.
- */
-static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
-{
-   if (p->materials & SCENE_COLOR_BITS(side)) {
-      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
-      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
-      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
-      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
-      struct ureg tmp = make_temp(p, material_diffuse);
-      emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
-	       material_ambient, material_emission);
-      return tmp;
-   }
-   else
-      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
-}
-
-
-static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
-				  GLuint side, GLuint property )
-{
-   GLuint attrib = material_attrib(side, property);
-   if (p->materials & (1<<attrib)) {
-      struct ureg light_value =
-	 register_param3(p, STATE_LIGHT, light, property);
-      struct ureg material_value = get_material(p, side, property);
-      struct ureg tmp = get_temp(p);
-      emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
-      return tmp;
-   }
-   else
-      return register_param4(p, STATE_LIGHTPROD, light, side, property);
-}
-
-
-static struct ureg calculate_light_attenuation( struct tnl_program *p,
-						GLuint i,
-						struct ureg VPpli,
-						struct ureg dist )
-{
-   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
-					     STATE_ATTENUATION);
-   struct ureg att = get_temp(p);
-
-   /* Calculate spot attenuation:
-    */
-   if (!p->state->unit[i].light_spotcutoff_is_180) {
-      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
-						  STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
-      struct ureg spot = get_temp(p);
-      struct ureg slt = get_temp(p);
-
-      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
-      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
-      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
-      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
-
-      release_temp(p, spot);
-      release_temp(p, slt);
-   }
-
-   /* Calculate distance attenuation:
-    */
-   if (p->state->unit[i].light_attenuated) {
-      /* 1/d,d,d,1/d */
-      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
-      /* 1,d,d*d,1/d */
-      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
-      /* 1/dist-atten */
-      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
-
-      if (!p->state->unit[i].light_spotcutoff_is_180) {
-	 /* dist-atten */
-	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
-	 /* spot-atten * dist-atten */
-	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
-      }
-      else {
-	 /* dist-atten */
-	 emit_op1(p, OPCODE_RCP, att, 0, dist);
-      }
-   }
-
-   return att;
-}
-
-
-/**
- * Compute:
- *   lit.y = MAX(0, dots.x)
- *   lit.z = SLT(0, dots.x)
- */
-static void emit_degenerate_lit( struct tnl_program *p,
-                                 struct ureg lit,
-                                 struct ureg dots )
-{
-   struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
-
-   /* Note that lit.x & lit.w will not be examined.  Note also that
-    * dots.xyzw == dots.xxxx.
-    */
-
-   /* MAX lit, id, dots;
-    */
-   emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
-
-   /* result[2] = (in > 0 ? 1 : 0)
-    * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
-    */
-   emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
-}
-
-
-/* Need to add some addtional parameters to allow lighting in object
- * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
- * space lighting.
- */
-static void build_lighting( struct tnl_program *p )
-{
-   const GLboolean twoside = p->state->light_twoside;
-   const GLboolean separate = p->state->separate_specular;
-   GLuint nr_lights = 0, count = 0;
-   struct ureg normal = get_transformed_normal(p);
-   struct ureg lit = get_temp(p);
-   struct ureg dots = get_temp(p);
-   struct ureg _col0 = undef, _col1 = undef;
-   struct ureg _bfc0 = undef, _bfc1 = undef;
-   GLuint i;
-
-   /*
-    * NOTE:
-    * dots.x = dot(normal, VPpli)
-    * dots.y = dot(normal, halfAngle)
-    * dots.z = back.shininess
-    * dots.w = front.shininess
-    */
-
-   for (i = 0; i < MAX_LIGHTS; i++)
-      if (p->state->unit[i].light_enabled)
-	 nr_lights++;
-
-   set_material_flags(p);
-
-   {
-      if (!p->state->material_shininess_is_zero) {
-         struct ureg shininess = get_material(p, 0, STATE_SHININESS);
-         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
-         release_temp(p, shininess);
-      }
-
-      _col0 = make_temp(p, get_scenecolor(p, 0));
-      if (separate)
-	 _col1 = make_temp(p, get_identity_param(p));
-      else
-	 _col1 = _col0;
-   }
-
-   if (twoside) {
-      if (!p->state->material_shininess_is_zero) {
-         /* Note that we negate the back-face specular exponent here.
-          * The negation will be un-done later in the back-face code below.
-          */
-         struct ureg shininess = get_material(p, 1, STATE_SHININESS);
-         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
-                  negate(swizzle1(shininess,X)));
-         release_temp(p, shininess);
-      }
-
-      _bfc0 = make_temp(p, get_scenecolor(p, 1));
-      if (separate)
-	 _bfc1 = make_temp(p, get_identity_param(p));
-      else
-	 _bfc1 = _bfc0;
-   }
-
-   /* If no lights, still need to emit the scenecolor.
-    */
-   {
-      struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
-      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
-   }
-
-   if (separate) {
-      struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
-      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
-   }
-
-   if (twoside) {
-      struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
-      emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
-   }
-
-   if (twoside && separate) {
-      struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
-      emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
-   }
-
-   if (nr_lights == 0) {
-      release_temps(p);
-      return;
-   }
-
-   for (i = 0; i < MAX_LIGHTS; i++) {
-      if (p->state->unit[i].light_enabled) {
-	 struct ureg half = undef;
-	 struct ureg att = undef, VPpli = undef;
-
-	 count++;
-
-	 if (p->state->unit[i].light_eyepos3_is_zero) {
-	    /* Can used precomputed constants in this case.
-	     * Attenuation never applies to infinite lights.
-	     */
-	    VPpli = register_param3(p, STATE_INTERNAL,
-				    STATE_LIGHT_POSITION_NORMALIZED, i);
-
-            if (!p->state->material_shininess_is_zero) {
-               if (p->state->light_local_viewer) {
-                  struct ureg eye_hat = get_eye_position_normalized(p);
-                  half = get_temp(p);
-                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
-                  emit_normalize_vec3(p, half, half);
-               }
-               else {
-                  half = register_param3(p, STATE_INTERNAL,
-                                         STATE_LIGHT_HALF_VECTOR, i);
-               }
-            }
-	 }
-	 else {
-	    struct ureg Ppli = register_param3(p, STATE_INTERNAL,
-					       STATE_LIGHT_POSITION, i);
-	    struct ureg V = get_eye_position(p);
-	    struct ureg dist = get_temp(p);
-
-	    VPpli = get_temp(p);
-
-	    /* Calculate VPpli vector
-	     */
-	    emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
-
-	    /* Normalize VPpli.  The dist value also used in
-	     * attenuation below.
-	     */
-	    emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
-	    emit_op1(p, OPCODE_RSQ, dist, 0, dist);
-	    emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
-
-	    /* Calculate attenuation:
-	     */
-	    if (!p->state->unit[i].light_spotcutoff_is_180 ||
-		p->state->unit[i].light_attenuated) {
-	       att = calculate_light_attenuation(p, i, VPpli, dist);
-	    }
-
-	    /* Calculate viewer direction, or use infinite viewer:
-	     */
-            if (!p->state->material_shininess_is_zero) {
-               half = get_temp(p);
-
-               if (p->state->light_local_viewer) {
-                  struct ureg eye_hat = get_eye_position_normalized(p);
-                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
-               }
-               else {
-                  struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
-                  emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
-               }
-
-               emit_normalize_vec3(p, half, half);
-            }
-
-	    release_temp(p, dist);
-	 }
-
-	 /* Calculate dot products:
-	  */
-         if (p->state->material_shininess_is_zero) {
-            emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
-         }
-         else {
-            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
-            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
-         }
-
-	 /* Front face lighting:
-	  */
-	 {
-	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
-	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
-	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
-	    struct ureg res0, res1;
-	    GLuint mask0, mask1;
-
-	    if (count == nr_lights) {
-	       if (separate) {
-		  mask0 = WRITEMASK_XYZ;
-		  mask1 = WRITEMASK_XYZ;
-		  res0 = register_output( p, VERT_RESULT_COL0 );
-		  res1 = register_output( p, VERT_RESULT_COL1 );
-	       }
-	       else {
-		  mask0 = 0;
-		  mask1 = WRITEMASK_XYZ;
-		  res0 = _col0;
-		  res1 = register_output( p, VERT_RESULT_COL0 );
-	       }
-	    }
-            else {
-	       mask0 = 0;
-	       mask1 = 0;
-	       res0 = _col0;
-	       res1 = _col1;
-	    }
-
-	    if (!is_undef(att)) {
-               /* light is attenuated by distance */
-               emit_op1(p, OPCODE_LIT, lit, 0, dots);
-               emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
-               emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
-            }
-            else if (!p->state->material_shininess_is_zero) {
-               /* there's a non-zero specular term */
-               emit_op1(p, OPCODE_LIT, lit, 0, dots);
-               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
-            }
-            else {
-               /* no attenutation, no specular */
-               emit_degenerate_lit(p, lit, dots);
-               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
-            }
-
-	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
-	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
-
-	    release_temp(p, ambient);
-	    release_temp(p, diffuse);
-	    release_temp(p, specular);
-	 }
-
-	 /* Back face lighting:
-	  */
-	 if (twoside) {
-	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
-	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
-	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
-	    struct ureg res0, res1;
-	    GLuint mask0, mask1;
-
-	    if (count == nr_lights) {
-	       if (separate) {
-		  mask0 = WRITEMASK_XYZ;
-		  mask1 = WRITEMASK_XYZ;
-		  res0 = register_output( p, VERT_RESULT_BFC0 );
-		  res1 = register_output( p, VERT_RESULT_BFC1 );
-	       }
-	       else {
-		  mask0 = 0;
-		  mask1 = WRITEMASK_XYZ;
-		  res0 = _bfc0;
-		  res1 = register_output( p, VERT_RESULT_BFC0 );
-	       }
-	    }
-            else {
-	       res0 = _bfc0;
-	       res1 = _bfc1;
-	       mask0 = 0;
-	       mask1 = 0;
-	    }
-
-            /* For the back face we need to negate the X and Y component
-             * dot products.  dots.Z has the negated back-face specular
-             * exponent.  We swizzle that into the W position.  This
-             * negation makes the back-face specular term positive again.
-             */
-            dots = negate(swizzle(dots,X,Y,W,Z));
-
-	    if (!is_undef(att)) {
-               emit_op1(p, OPCODE_LIT, lit, 0, dots);
-	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
-               emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
-            }
-            else if (!p->state->material_shininess_is_zero) {
-               emit_op1(p, OPCODE_LIT, lit, 0, dots);
-               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
-            }
-            else {
-               emit_degenerate_lit(p, lit, dots);
-               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
-            }
-
-	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
-	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
-            /* restore dots to its original state for subsequent lights
-             * by negating and swizzling again.
-             */
-            dots = negate(swizzle(dots,X,Y,W,Z));
-
-	    release_temp(p, ambient);
-	    release_temp(p, diffuse);
-	    release_temp(p, specular);
-	 }
-
-	 release_temp(p, half);
-	 release_temp(p, VPpli);
-	 release_temp(p, att);
-      }
-   }
-
-   release_temps( p );
-}
-
-
-static void build_fog( struct tnl_program *p )
-{
-   struct ureg fog = register_output(p, VERT_RESULT_FOGC);
-   struct ureg input;
-
-   if (p->state->fog_source_is_depth) {
-      input = get_eye_position_z(p);
-   }
-   else {
-      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
-   }
-
-   /* result.fog = {abs(f),0,0,1}; */
-   emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
-   emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
-}
-
-
-static void build_reflect_texgen( struct tnl_program *p,
-				  struct ureg dest,
-				  GLuint writemask )
-{
-   struct ureg normal = get_transformed_normal(p);
-   struct ureg eye_hat = get_eye_position_normalized(p);
-   struct ureg tmp = get_temp(p);
-
-   /* n.u */
-   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
-   /* 2n.u */
-   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
-   /* (-2n.u)n + u */
-   emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
-
-   release_temp(p, tmp);
-}
-
-
-static void build_sphere_texgen( struct tnl_program *p,
-				 struct ureg dest,
-				 GLuint writemask )
-{
-   struct ureg normal = get_transformed_normal(p);
-   struct ureg eye_hat = get_eye_position_normalized(p);
-   struct ureg tmp = get_temp(p);
-   struct ureg half = register_scalar_const(p, .5);
-   struct ureg r = get_temp(p);
-   struct ureg inv_m = get_temp(p);
-   struct ureg id = get_identity_param(p);
-
-   /* Could share the above calculations, but it would be
-    * a fairly odd state for someone to set (both sphere and
-    * reflection active for different texture coordinate
-    * components.  Of course - if two texture units enable
-    * reflect and/or sphere, things start to tilt in favour
-    * of seperating this out:
-    */
-
-   /* n.u */
-   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
-   /* 2n.u */
-   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
-   /* (-2n.u)n + u */
-   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
-   /* r + 0,0,1 */
-   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
-   /* rx^2 + ry^2 + (rz+1)^2 */
-   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
-   /* 2/m */
-   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
-   /* 1/m */
-   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
-   /* r/m + 1/2 */
-   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
-
-   release_temp(p, tmp);
-   release_temp(p, r);
-   release_temp(p, inv_m);
-}
-
-
-static void build_texture_transform( struct tnl_program *p )
-{
-   GLuint i, j;
-
-   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
-
-      if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
-	 continue;
-
-      if (p->state->unit[i].coord_replace)
-  	 continue;
-
-      if (p->state->unit[i].texgen_enabled ||
-	  p->state->unit[i].texmat_enabled) {
-
-	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
-	 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
-	 struct ureg out_texgen = undef;
-
-	 if (p->state->unit[i].texgen_enabled) {
-	    GLuint copy_mask = 0;
-	    GLuint sphere_mask = 0;
-	    GLuint reflect_mask = 0;
-	    GLuint normal_mask = 0;
-	    GLuint modes[4];
-
-	    if (texmat_enabled)
-	       out_texgen = get_temp(p);
-	    else
-	       out_texgen = out;
-
-	    modes[0] = p->state->unit[i].texgen_mode0;
-	    modes[1] = p->state->unit[i].texgen_mode1;
-	    modes[2] = p->state->unit[i].texgen_mode2;
-	    modes[3] = p->state->unit[i].texgen_mode3;
-
-	    for (j = 0; j < 4; j++) {
-	       switch (modes[j]) {
-	       case TXG_OBJ_LINEAR: {
-		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
-		  struct ureg plane =
-		     register_param3(p, STATE_TEXGEN, i,
-				     STATE_TEXGEN_OBJECT_S + j);
-
-		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
-			   obj, plane );
-		  break;
-	       }
-	       case TXG_EYE_LINEAR: {
-		  struct ureg eye = get_eye_position(p);
-		  struct ureg plane =
-		     register_param3(p, STATE_TEXGEN, i,
-				     STATE_TEXGEN_EYE_S + j);
-
-		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
-			   eye, plane );
-		  break;
-	       }
-	       case TXG_SPHERE_MAP:
-		  sphere_mask |= WRITEMASK_X << j;
-		  break;
-	       case TXG_REFLECTION_MAP:
-		  reflect_mask |= WRITEMASK_X << j;
-		  break;
-	       case TXG_NORMAL_MAP:
-		  normal_mask |= WRITEMASK_X << j;
-		  break;
-	       case TXG_NONE:
-		  copy_mask |= WRITEMASK_X << j;
-	       }
-	    }
-
-	    if (sphere_mask) {
-	       build_sphere_texgen(p, out_texgen, sphere_mask);
-	    }
-
-	    if (reflect_mask) {
-	       build_reflect_texgen(p, out_texgen, reflect_mask);
-	    }
-
-	    if (normal_mask) {
-	       struct ureg normal = get_transformed_normal(p);
-	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
-	    }
-
-	    if (copy_mask) {
-	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
-	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
-	    }
-	 }
-
-	 if (texmat_enabled) {
-	    struct ureg texmat[4];
-	    struct ureg in = (!is_undef(out_texgen) ?
-			      out_texgen :
-			      register_input(p, VERT_ATTRIB_TEX0+i));
-	    if (p->mvp_with_dp4) {
-	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
-				       0, texmat );
-	       emit_matrix_transform_vec4( p, out, texmat, in );
-	    }
-	    else {
-	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
-				       STATE_MATRIX_TRANSPOSE, texmat );
-	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
-	    }
-	 }
-
-	 release_temps(p);
-      }
-      else {
-	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
-      }
-   }
-}
-
-
-/**
- * Point size attenuation computation.
- */
-static void build_atten_pointsize( struct tnl_program *p )
-{
-   struct ureg eye = get_eye_position_z(p);
-   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
-   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
-   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
-   struct ureg ut = get_temp(p);
-
-   /* dist = |eyez| */
-   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
-   /* p1 + dist * (p2 + dist * p3); */
-   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
-		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
-   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
-		ut, swizzle1(state_attenuation, X));
-
-   /* 1 / sqrt(factor) */
-   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
-
-#if 0
-   /* out = pointSize / sqrt(factor) */
-   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
-#else
-   /* this is a good place to clamp the point size since there's likely
-    * no hardware registers to clamp point size at rasterization time.
-    */
-   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
-   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
-   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
-#endif
-
-   release_temp(p, ut);
-}
-
-
-/**
- * Pass-though per-vertex point size, from user's point size array.
- */
-static void build_array_pointsize( struct tnl_program *p )
-{
-   struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
-   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
-   emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
-}
-
-
-static void build_tnl_program( struct tnl_program *p )
-{
-   /* Emit the program, starting with modelviewproject:
-    */
-   build_hpos(p);
-
-   /* Lighting calculations:
-    */
-   if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
-      if (p->state->light_global_enabled)
-	 build_lighting(p);
-      else {
-	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
-	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
-
-	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
-	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
-      }
-   }
-
-   if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC)
-      build_fog(p);
-
-   if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
-      build_texture_transform(p);
-
-   if (p->state->point_attenuated)
-      build_atten_pointsize(p);
-   else if (p->state->point_array)
-      build_array_pointsize(p);
-
-   /* Finish up:
-    */
-   emit_op1(p, OPCODE_END, undef, 0, undef);
-
-   /* Disassemble:
-    */
-   if (DISASSEM) {
-      printf ("\n");
-   }
-}
-
-
-static void
-create_new_program( const struct state_key *key,
-                    struct gl_vertex_program *program,
-                    GLboolean mvp_with_dp4,
-                    GLuint max_temps)
-{
-   struct tnl_program p;
-
-   memset(&p, 0, sizeof(p));
-   p.state = key;
-   p.program = program;
-   p.eye_position = undef;
-   p.eye_position_z = undef;
-   p.eye_position_normalized = undef;
-   p.transformed_normal = undef;
-   p.identity = undef;
-   p.temp_in_use = 0;
-   p.mvp_with_dp4 = mvp_with_dp4;
-
-   if (max_temps >= sizeof(int) * 8)
-      p.temp_reserved = 0;
-   else
-      p.temp_reserved = ~((1<<max_temps)-1);
-
-   /* Start by allocating 32 instructions.
-    * If we need more, we'll grow the instruction array as needed.
-    */
-   p.max_inst = 32;
-   p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
-   p.program->Base.String = NULL;
-   p.program->Base.NumInstructions =
-   p.program->Base.NumTemporaries =
-   p.program->Base.NumParameters =
-   p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
-   p.program->Base.Parameters = _mesa_new_parameter_list();
-   p.program->Base.InputsRead = 0;
-   p.program->Base.OutputsWritten = 0;
-
-   build_tnl_program( &p );
-}
-
-
-/**
- * Return a vertex program which implements the current fixed-function
- * transform/lighting/texgen operations.
- * XXX move this into core mesa (main/)
- */
-struct gl_vertex_program *
-_mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
-{
-   struct gl_vertex_program *prog;
-   struct state_key key;
-
-   /* Grab all the relevent state and put it in a single structure:
-    */
-   make_state_key(ctx, &key);
-
-   /* Look for an already-prepared program for this state:
-    */
-   prog = (struct gl_vertex_program *)
-      _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
-
-   if (!prog) {
-      /* OK, we'll have to build a new one */
-      if (0)
-         printf("Build new TNL program\n");
-
-      prog = (struct gl_vertex_program *)
-         ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
-      if (!prog)
-         return NULL;
-
-      create_new_program( &key, prog,
-                          ctx->mvp_with_dp4,
-                          ctx->Const.VertexProgram.MaxTemps );
-
-#if 0
-      if (ctx->Driver.ProgramStringNotify)
-         ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
-                                          &prog->Base );
-#endif
-      _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
-                                 &key, sizeof(key), &prog->Base);
-   }
-
-   return prog;
-}
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file ffvertex_prog.c
+ *
+ * Create a vertex program to execute the current fixed function T&L pipeline.
+ * \author Keith Whitwell
+ */
+
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "main/enums.h"
+#include "main/ffvertex_prog.h"
+#include "program/program.h"
+#include "program/prog_cache.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_statevars.h"
+
+
+/** Max of number of lights and texture coord units */
+#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
+
+struct state_key {
+   unsigned light_color_material_mask:12;
+   unsigned light_global_enabled:1;
+   unsigned light_local_viewer:1;
+   unsigned light_twoside:1;
+   unsigned material_shininess_is_zero:1;
+   unsigned need_eye_coords:1;
+   unsigned normalize:1;
+   unsigned rescale_normals:1;
+
+   unsigned fog_source_is_depth:1;
+   unsigned separate_specular:1;
+   unsigned point_attenuated:1;
+   unsigned point_array:1;
+   unsigned texture_enabled_global:1;
+   unsigned fragprog_inputs_read:12;
+
+   unsigned varying_vp_inputs;
+
+   struct {
+      unsigned light_enabled:1;
+      unsigned light_eyepos3_is_zero:1;
+      unsigned light_spotcutoff_is_180:1;
+      unsigned light_attenuated:1;
+      unsigned texunit_really_enabled:1;
+      unsigned texmat_enabled:1;
+      unsigned coord_replace:1;
+      unsigned texgen_enabled:4;
+      unsigned texgen_mode0:4;
+      unsigned texgen_mode1:4;
+      unsigned texgen_mode2:4;
+      unsigned texgen_mode3:4;
+   } unit[NUM_UNITS];
+};
+
+
+#define TXG_NONE           0
+#define TXG_OBJ_LINEAR     1
+#define TXG_EYE_LINEAR     2
+#define TXG_SPHERE_MAP     3
+#define TXG_REFLECTION_MAP 4
+#define TXG_NORMAL_MAP     5
+
+static GLuint translate_texgen( GLboolean enabled, GLenum mode )
+{
+   if (!enabled)
+      return TXG_NONE;
+
+   switch (mode) {
+   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
+   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
+   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
+   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
+   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
+   default: return TXG_NONE;
+   }
+}
+
+
+
+static GLboolean check_active_shininess( struct gl_context *ctx,
+                                         const struct state_key *key,
+                                         GLuint side )
+{
+   GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
+
+   if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
+       (key->light_color_material_mask & bit))
+      return GL_TRUE;
+
+   if (key->varying_vp_inputs & (bit << 16))
+      return GL_TRUE;
+
+   if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
+      return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+
+static void make_state_key( struct gl_context *ctx, struct state_key *key )
+{
+   const struct gl_fragment_program *fp;
+   GLuint i;
+
+   memset(key, 0, sizeof(struct state_key));
+   fp = ctx->FragmentProgram._Current;
+
+   /* This now relies on texenvprogram.c being active:
+    */
+   assert(fp);
+
+   key->need_eye_coords = ctx->_NeedEyeCoords;
+
+   key->fragprog_inputs_read = fp->Base.InputsRead;
+   key->varying_vp_inputs = ctx->varying_vp_inputs;
+
+   if (ctx->RenderMode == GL_FEEDBACK) {
+      /* make sure the vertprog emits color and tex0 */
+      key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
+   }
+
+   key->separate_specular = (ctx->Light.Model.ColorControl ==
+			     GL_SEPARATE_SPECULAR_COLOR);
+
+   if (ctx->Light.Enabled) {
+      key->light_global_enabled = 1;
+
+      if (ctx->Light.Model.LocalViewer)
+	 key->light_local_viewer = 1;
+
+      if (ctx->Light.Model.TwoSide)
+	 key->light_twoside = 1;
+
+      if (ctx->Light.ColorMaterialEnabled) {
+	 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
+      }
+
+      for (i = 0; i < MAX_LIGHTS; i++) {
+	 struct gl_light *light = &ctx->Light.Light[i];
+
+	 if (light->Enabled) {
+	    key->unit[i].light_enabled = 1;
+
+	    if (light->EyePosition[3] == 0.0)
+	       key->unit[i].light_eyepos3_is_zero = 1;
+
+	    if (light->SpotCutoff == 180.0)
+	       key->unit[i].light_spotcutoff_is_180 = 1;
+
+	    if (light->ConstantAttenuation != 1.0 ||
+		light->LinearAttenuation != 0.0 ||
+		light->QuadraticAttenuation != 0.0)
+	       key->unit[i].light_attenuated = 1;
+	 }
+      }
+
+      if (check_active_shininess(ctx, key, 0)) {
+         key->material_shininess_is_zero = 0;
+      }
+      else if (key->light_twoside &&
+               check_active_shininess(ctx, key, 1)) {
+         key->material_shininess_is_zero = 0;
+      }
+      else {
+         key->material_shininess_is_zero = 1;
+      }
+   }
+
+   if (ctx->Transform.Normalize)
+      key->normalize = 1;
+
+   if (ctx->Transform.RescaleNormals)
+      key->rescale_normals = 1;
+
+   if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
+      key->fog_source_is_depth = 1;
+
+   if (ctx->Point._Attenuated)
+      key->point_attenuated = 1;
+
+#if FEATURE_point_size_array
+   if (ctx->Array.ArrayObj->PointSize.Enabled)
+      key->point_array = 1;
+#endif
+
+   if (ctx->Texture._TexGenEnabled ||
+       ctx->Texture._TexMatEnabled ||
+       ctx->Texture._EnabledUnits)
+      key->texture_enabled_global = 1;
+
+   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
+      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+
+      if (texUnit->_ReallyEnabled)
+	 key->unit[i].texunit_really_enabled = 1;
+
+      if (ctx->Point.PointSprite)
+	 if (ctx->Point.CoordReplace[i])
+	    key->unit[i].coord_replace = 1;
+
+      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
+	 key->unit[i].texmat_enabled = 1;
+
+      if (texUnit->TexGenEnabled) {
+	 key->unit[i].texgen_enabled = 1;
+
+	 key->unit[i].texgen_mode0 =
+	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
+			      texUnit->GenS.Mode );
+	 key->unit[i].texgen_mode1 =
+	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
+			      texUnit->GenT.Mode );
+	 key->unit[i].texgen_mode2 =
+	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
+			      texUnit->GenR.Mode );
+	 key->unit[i].texgen_mode3 =
+	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
+			      texUnit->GenQ.Mode );
+      }
+   }
+}
+
+
+
+/* Very useful debugging tool - produces annotated listing of
+ * generated program with line/function references for each
+ * instruction back into this file:
+ */
+#define DISASSEM 0
+
+
+/* Use uregs to represent registers internally, translate to Mesa's
+ * expected formats on emit.
+ *
+ * NOTE: These are passed by value extensively in this file rather
+ * than as usual by pointer reference.  If this disturbs you, try
+ * remembering they are just 32bits in size.
+ *
+ * GCC is smart enough to deal with these dword-sized structures in
+ * much the same way as if I had defined them as dwords and was using
+ * macros to access and set the fields.  This is much nicer and easier
+ * to evolve.
+ */
+struct ureg {
+   GLuint file:4;
+   GLint idx:9;      /* relative addressing may be negative */
+                     /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
+   GLuint negate:1;
+   GLuint swz:12;
+   GLuint pad:6;
+};
+
+
+struct tnl_program {
+   const struct state_key *state;
+   struct gl_vertex_program *program;
+   GLint max_inst;  /** number of instructions allocated for program */
+   GLboolean mvp_with_dp4;
+
+   GLuint temp_in_use;
+   GLuint temp_reserved;
+
+   struct ureg eye_position;
+   struct ureg eye_position_z;
+   struct ureg eye_position_normalized;
+   struct ureg transformed_normal;
+   struct ureg identity;
+
+   GLuint materials;
+   GLuint color_materials;
+};
+
+
+static const struct ureg undef = {
+   PROGRAM_UNDEFINED,
+   0,
+   0,
+   0,
+   0
+};
+
+/* Local shorthand:
+ */
+#define X    SWIZZLE_X
+#define Y    SWIZZLE_Y
+#define Z    SWIZZLE_Z
+#define W    SWIZZLE_W
+
+
+/* Construct a ureg:
+ */
+static struct ureg make_ureg(GLuint file, GLint idx)
+{
+   struct ureg reg;
+   reg.file = file;
+   reg.idx = idx;
+   reg.negate = 0;
+   reg.swz = SWIZZLE_NOOP;
+   reg.pad = 0;
+   return reg;
+}
+
+
+
+static struct ureg negate( struct ureg reg )
+{
+   reg.negate ^= 1;
+   return reg;
+}
+
+
+static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
+{
+   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
+			   GET_SWZ(reg.swz, y),
+			   GET_SWZ(reg.swz, z),
+			   GET_SWZ(reg.swz, w));
+   return reg;
+}
+
+
+static struct ureg swizzle1( struct ureg reg, int x )
+{
+   return swizzle(reg, x, x, x, x);
+}
+
+
+static struct ureg get_temp( struct tnl_program *p )
+{
+   int bit = _mesa_ffs( ~p->temp_in_use );
+   if (!bit) {
+      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   if ((GLuint) bit > p->program->Base.NumTemporaries)
+      p->program->Base.NumTemporaries = bit;
+
+   p->temp_in_use |= 1<<(bit-1);
+   return make_ureg(PROGRAM_TEMPORARY, bit-1);
+}
+
+
+static struct ureg reserve_temp( struct tnl_program *p )
+{
+   struct ureg temp = get_temp( p );
+   p->temp_reserved |= 1<<temp.idx;
+   return temp;
+}
+
+
+static void release_temp( struct tnl_program *p, struct ureg reg )
+{
+   if (reg.file == PROGRAM_TEMPORARY) {
+      p->temp_in_use &= ~(1<<reg.idx);
+      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
+   }
+}
+
+static void release_temps( struct tnl_program *p )
+{
+   p->temp_in_use = p->temp_reserved;
+}
+
+
+static struct ureg register_param5(struct tnl_program *p,
+				   GLint s0,
+				   GLint s1,
+				   GLint s2,
+				   GLint s3,
+                                   GLint s4)
+{
+   gl_state_index tokens[STATE_LENGTH];
+   GLint idx;
+   tokens[0] = s0;
+   tokens[1] = s1;
+   tokens[2] = s2;
+   tokens[3] = s3;
+   tokens[4] = s4;
+   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
+   return make_ureg(PROGRAM_STATE_VAR, idx);
+}
+
+
+#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
+#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
+#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
+#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
+
+
+
+/**
+ * \param input  one of VERT_ATTRIB_x tokens.
+ */
+static struct ureg register_input( struct tnl_program *p, GLuint input )
+{
+   assert(input < 32);
+
+   if (p->state->varying_vp_inputs & (1<<input)) {
+      p->program->Base.InputsRead |= (1<<input);
+      return make_ureg(PROGRAM_INPUT, input);
+   }
+   else {
+      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
+   }
+}
+
+
+/**
+ * \param input  one of VERT_RESULT_x tokens.
+ */
+static struct ureg register_output( struct tnl_program *p, GLuint output )
+{
+   p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
+   return make_ureg(PROGRAM_OUTPUT, output);
+}
+
+
+static struct ureg register_const4f( struct tnl_program *p,
+			      GLfloat s0,
+			      GLfloat s1,
+			      GLfloat s2,
+			      GLfloat s3)
+{
+   gl_constant_value values[4];
+   GLint idx;
+   GLuint swizzle;
+   values[0].f = s0;
+   values[1].f = s1;
+   values[2].f = s2;
+   values[3].f = s3;
+   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
+                                     &swizzle );
+   ASSERT(swizzle == SWIZZLE_NOOP);
+   return make_ureg(PROGRAM_CONSTANT, idx);
+}
+
+#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
+#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
+#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
+#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
+
+static GLboolean is_undef( struct ureg reg )
+{
+   return reg.file == PROGRAM_UNDEFINED;
+}
+
+
+static struct ureg get_identity_param( struct tnl_program *p )
+{
+   if (is_undef(p->identity))
+      p->identity = register_const4f(p, 0,0,0,1);
+
+   return p->identity;
+}
+
+static void register_matrix_param5( struct tnl_program *p,
+				    GLint s0, /* modelview, projection, etc */
+				    GLint s1, /* texture matrix number */
+				    GLint s2, /* first row */
+				    GLint s3, /* last row */
+				    GLint s4, /* inverse, transpose, etc */
+				    struct ureg *matrix )
+{
+   GLint i;
+
+   /* This is a bit sad as the support is there to pull the whole
+    * matrix out in one go:
+    */
+   for (i = 0; i <= s3 - s2; i++)
+      matrix[i] = register_param5( p, s0, s1, i, i, s4 );
+}
+
+
+static void emit_arg( struct prog_src_register *src,
+		      struct ureg reg )
+{
+   src->File = reg.file;
+   src->Index = reg.idx;
+   src->Swizzle = reg.swz;
+   src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
+   src->Abs = 0;
+   src->RelAddr = 0;
+   /* Check that bitfield sizes aren't exceeded */
+   ASSERT(src->Index == reg.idx);
+}
+
+
+static void emit_dst( struct prog_dst_register *dst,
+		      struct ureg reg, GLuint mask )
+{
+   dst->File = reg.file;
+   dst->Index = reg.idx;
+   /* allow zero as a shorthand for xyzw */
+   dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
+   dst->CondMask = COND_TR;  /* always pass cond test */
+   dst->CondSwizzle = SWIZZLE_NOOP;
+   dst->CondSrc = 0;
+   /* Check that bitfield sizes aren't exceeded */
+   ASSERT(dst->Index == reg.idx);
+}
+
+
+static void debug_insn( struct prog_instruction *inst, const char *fn,
+			GLuint line )
+{
+   if (DISASSEM) {
+      static const char *last_fn;
+
+      if (fn != last_fn) {
+	 last_fn = fn;
+	 printf("%s:\n", fn);
+      }
+
+      printf("%d:\t", line);
+      _mesa_print_instruction(inst);
+   }
+}
+
+
+static void emit_op3fn(struct tnl_program *p,
+                       enum prog_opcode op,
+		       struct ureg dest,
+		       GLuint mask,
+		       struct ureg src0,
+		       struct ureg src1,
+		       struct ureg src2,
+		       const char *fn,
+		       GLuint line)
+{
+   GLuint nr;
+   struct prog_instruction *inst;
+
+   assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
+
+   if (p->program->Base.NumInstructions == p->max_inst) {
+      /* need to extend the program's instruction array */
+      struct prog_instruction *newInst;
+
+      /* double the size */
+      p->max_inst *= 2;
+
+      newInst = _mesa_alloc_instructions(p->max_inst);
+      if (!newInst) {
+         _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
+         return;
+      }
+
+      _mesa_copy_instructions(newInst,
+                              p->program->Base.Instructions,
+                              p->program->Base.NumInstructions);
+
+      _mesa_free_instructions(p->program->Base.Instructions,
+                              p->program->Base.NumInstructions);
+
+      p->program->Base.Instructions = newInst;
+   }
+
+   nr = p->program->Base.NumInstructions++;
+
+   inst = &p->program->Base.Instructions[nr];
+   inst->Opcode = (enum prog_opcode) op;
+   inst->Data = 0;
+
+   emit_arg( &inst->SrcReg[0], src0 );
+   emit_arg( &inst->SrcReg[1], src1 );
+   emit_arg( &inst->SrcReg[2], src2 );
+
+   emit_dst( &inst->DstReg, dest, mask );
+
+   debug_insn(inst, fn, line);
+}
+
+
+#define emit_op3(p, op, dst, mask, src0, src1, src2) \
+   emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
+
+#define emit_op2(p, op, dst, mask, src0, src1) \
+    emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
+
+#define emit_op1(p, op, dst, mask, src0) \
+    emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
+
+
+static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
+{
+   if (reg.file == PROGRAM_TEMPORARY &&
+       !(p->temp_reserved & (1<<reg.idx)))
+      return reg;
+   else {
+      struct ureg temp = get_temp(p);
+      emit_op1(p, OPCODE_MOV, temp, 0, reg);
+      return temp;
+   }
+}
+
+
+/* Currently no tracking performed of input/output/register size or
+ * active elements.  Could be used to reduce these operations, as
+ * could the matrix type.
+ */
+static void emit_matrix_transform_vec4( struct tnl_program *p,
+					struct ureg dest,
+					const struct ureg *mat,
+					struct ureg src)
+{
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
+   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
+}
+
+
+/* This version is much easier to implement if writemasks are not
+ * supported natively on the target or (like SSE), the target doesn't
+ * have a clean/obvious dotproduct implementation.
+ */
+static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
+						  struct ureg dest,
+						  const struct ureg *mat,
+						  struct ureg src)
+{
+   struct ureg tmp;
+
+   if (dest.file != PROGRAM_TEMPORARY)
+      tmp = get_temp(p);
+   else
+      tmp = dest;
+
+   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
+   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
+   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
+   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
+
+   if (dest.file != PROGRAM_TEMPORARY)
+      release_temp(p, tmp);
+}
+
+
+static void emit_matrix_transform_vec3( struct tnl_program *p,
+					struct ureg dest,
+					const struct ureg *mat,
+					struct ureg src)
+{
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
+   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
+}
+
+
+static void emit_normalize_vec3( struct tnl_program *p,
+				 struct ureg dest,
+				 struct ureg src )
+{
+#if 0
+   /* XXX use this when drivers are ready for NRM3 */
+   emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
+#else
+   struct ureg tmp = get_temp(p);
+   emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
+   emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
+   emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
+   release_temp(p, tmp);
+#endif
+}
+
+
+static void emit_passthrough( struct tnl_program *p,
+			      GLuint input,
+			      GLuint output )
+{
+   struct ureg out = register_output(p, output);
+   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
+}
+
+
+static struct ureg get_eye_position( struct tnl_program *p )
+{
+   if (is_undef(p->eye_position)) {
+      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
+      struct ureg modelview[4];
+
+      p->eye_position = reserve_temp(p);
+
+      if (p->mvp_with_dp4) {
+	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
+                                 0, modelview );
+
+	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
+      }
+      else {
+	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
+				 STATE_MATRIX_TRANSPOSE, modelview );
+
+	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
+      }
+   }
+
+   return p->eye_position;
+}
+
+
+static struct ureg get_eye_position_z( struct tnl_program *p )
+{
+   if (!is_undef(p->eye_position))
+      return swizzle1(p->eye_position, Z);
+
+   if (is_undef(p->eye_position_z)) {
+      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
+      struct ureg modelview[4];
+
+      p->eye_position_z = reserve_temp(p);
+
+      register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
+                              0, modelview );
+
+      emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
+   }
+
+   return p->eye_position_z;
+}
+
+
+static struct ureg get_eye_position_normalized( struct tnl_program *p )
+{
+   if (is_undef(p->eye_position_normalized)) {
+      struct ureg eye = get_eye_position(p);
+      p->eye_position_normalized = reserve_temp(p);
+      emit_normalize_vec3(p, p->eye_position_normalized, eye);
+   }
+
+   return p->eye_position_normalized;
+}
+
+
+static struct ureg get_transformed_normal( struct tnl_program *p )
+{
+   if (is_undef(p->transformed_normal) &&
+       !p->state->need_eye_coords &&
+       !p->state->normalize &&
+       !(p->state->need_eye_coords == p->state->rescale_normals))
+   {
+      p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
+   }
+   else if (is_undef(p->transformed_normal))
+   {
+      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
+      struct ureg mvinv[3];
+      struct ureg transformed_normal = reserve_temp(p);
+
+      if (p->state->need_eye_coords) {
+         register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
+                                 STATE_MATRIX_INVTRANS, mvinv );
+
+         /* Transform to eye space:
+          */
+         emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
+         normal = transformed_normal;
+      }
+
+      /* Normalize/Rescale:
+       */
+      if (p->state->normalize) {
+	 emit_normalize_vec3( p, transformed_normal, normal );
+         normal = transformed_normal;
+      }
+      else if (p->state->need_eye_coords == p->state->rescale_normals) {
+         /* This is already adjusted for eye/non-eye rendering:
+          */
+	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
+                                               STATE_NORMAL_SCALE);
+
+	 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
+         normal = transformed_normal;
+      }
+
+      assert(normal.file == PROGRAM_TEMPORARY);
+      p->transformed_normal = normal;
+   }
+
+   return p->transformed_normal;
+}
+
+
+static void build_hpos( struct tnl_program *p )
+{
+   struct ureg pos = register_input( p, VERT_ATTRIB_POS );
+   struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
+   struct ureg mvp[4];
+
+   if (p->mvp_with_dp4) {
+      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
+			      0, mvp );
+      emit_matrix_transform_vec4( p, hpos, mvp, pos );
+   }
+   else {
+      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
+			      STATE_MATRIX_TRANSPOSE, mvp );
+      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
+   }
+}
+
+
+static GLuint material_attrib( GLuint side, GLuint property )
+{
+   return (property - STATE_AMBIENT) * 2 + side;
+}
+
+
+/**
+ * Get a bitmask of which material values vary on a per-vertex basis.
+ */
+static void set_material_flags( struct tnl_program *p )
+{
+   p->color_materials = 0;
+   p->materials = 0;
+
+   if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
+      p->materials =
+	 p->color_materials = p->state->light_color_material_mask;
+   }
+
+   p->materials |= (p->state->varying_vp_inputs >> 16);
+}
+
+
+static struct ureg get_material( struct tnl_program *p, GLuint side,
+				 GLuint property )
+{
+   GLuint attrib = material_attrib(side, property);
+
+   if (p->color_materials & (1<<attrib))
+      return register_input(p, VERT_ATTRIB_COLOR0);
+   else if (p->materials & (1<<attrib)) {
+      /* Put material values in the GENERIC slots -- they are not used
+       * for anything in fixed function mode.
+       */
+      return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
+   }
+   else
+      return register_param3( p, STATE_MATERIAL, side, property );
+}
+
+#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
+				   MAT_BIT_FRONT_AMBIENT | \
+				   MAT_BIT_FRONT_DIFFUSE) << (side))
+
+
+/**
+ * Either return a precalculated constant value or emit code to
+ * calculate these values dynamically in the case where material calls
+ * are present between begin/end pairs.
+ *
+ * Probably want to shift this to the program compilation phase - if
+ * we always emitted the calculation here, a smart compiler could
+ * detect that it was constant (given a certain set of inputs), and
+ * lift it out of the main loop.  That way the programs created here
+ * would be independent of the vertex_buffer details.
+ */
+static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
+{
+   if (p->materials & SCENE_COLOR_BITS(side)) {
+      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
+      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
+      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
+      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
+      struct ureg tmp = make_temp(p, material_diffuse);
+      emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
+	       material_ambient, material_emission);
+      return tmp;
+   }
+   else
+      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
+}
+
+
+static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
+				  GLuint side, GLuint property )
+{
+   GLuint attrib = material_attrib(side, property);
+   if (p->materials & (1<<attrib)) {
+      struct ureg light_value =
+	 register_param3(p, STATE_LIGHT, light, property);
+      struct ureg material_value = get_material(p, side, property);
+      struct ureg tmp = get_temp(p);
+      emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
+      return tmp;
+   }
+   else
+      return register_param4(p, STATE_LIGHTPROD, light, side, property);
+}
+
+
+static struct ureg calculate_light_attenuation( struct tnl_program *p,
+						GLuint i,
+						struct ureg VPpli,
+						struct ureg dist )
+{
+   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
+					     STATE_ATTENUATION);
+   struct ureg att = get_temp(p);
+
+   /* Calculate spot attenuation:
+    */
+   if (!p->state->unit[i].light_spotcutoff_is_180) {
+      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
+						  STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
+      struct ureg spot = get_temp(p);
+      struct ureg slt = get_temp(p);
+
+      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
+      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
+      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
+      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
+
+      release_temp(p, spot);
+      release_temp(p, slt);
+   }
+
+   /* Calculate distance attenuation:
+    */
+   if (p->state->unit[i].light_attenuated) {
+      /* 1/d,d,d,1/d */
+      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
+      /* 1,d,d*d,1/d */
+      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
+      /* 1/dist-atten */
+      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
+
+      if (!p->state->unit[i].light_spotcutoff_is_180) {
+	 /* dist-atten */
+	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
+	 /* spot-atten * dist-atten */
+	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
+      }
+      else {
+	 /* dist-atten */
+	 emit_op1(p, OPCODE_RCP, att, 0, dist);
+      }
+   }
+
+   return att;
+}
+
+
+/**
+ * Compute:
+ *   lit.y = MAX(0, dots.x)
+ *   lit.z = SLT(0, dots.x)
+ */
+static void emit_degenerate_lit( struct tnl_program *p,
+                                 struct ureg lit,
+                                 struct ureg dots )
+{
+   struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
+
+   /* Note that lit.x & lit.w will not be examined.  Note also that
+    * dots.xyzw == dots.xxxx.
+    */
+
+   /* MAX lit, id, dots;
+    */
+   emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
+
+   /* result[2] = (in > 0 ? 1 : 0)
+    * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
+    */
+   emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
+}
+
+
+/* Need to add some addtional parameters to allow lighting in object
+ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
+ * space lighting.
+ */
+static void build_lighting( struct tnl_program *p )
+{
+   const GLboolean twoside = p->state->light_twoside;
+   const GLboolean separate = p->state->separate_specular;
+   GLuint nr_lights = 0, count = 0;
+   struct ureg normal = get_transformed_normal(p);
+   struct ureg lit = get_temp(p);
+   struct ureg dots = get_temp(p);
+   struct ureg _col0 = undef, _col1 = undef;
+   struct ureg _bfc0 = undef, _bfc1 = undef;
+   GLuint i;
+
+   /*
+    * NOTE:
+    * dots.x = dot(normal, VPpli)
+    * dots.y = dot(normal, halfAngle)
+    * dots.z = back.shininess
+    * dots.w = front.shininess
+    */
+
+   for (i = 0; i < MAX_LIGHTS; i++)
+      if (p->state->unit[i].light_enabled)
+	 nr_lights++;
+
+   set_material_flags(p);
+
+   {
+      if (!p->state->material_shininess_is_zero) {
+         struct ureg shininess = get_material(p, 0, STATE_SHININESS);
+         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
+         release_temp(p, shininess);
+      }
+
+      _col0 = make_temp(p, get_scenecolor(p, 0));
+      if (separate)
+	 _col1 = make_temp(p, get_identity_param(p));
+      else
+	 _col1 = _col0;
+   }
+
+   if (twoside) {
+      if (!p->state->material_shininess_is_zero) {
+         /* Note that we negate the back-face specular exponent here.
+          * The negation will be un-done later in the back-face code below.
+          */
+         struct ureg shininess = get_material(p, 1, STATE_SHININESS);
+         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
+                  negate(swizzle1(shininess,X)));
+         release_temp(p, shininess);
+      }
+
+      _bfc0 = make_temp(p, get_scenecolor(p, 1));
+      if (separate)
+	 _bfc1 = make_temp(p, get_identity_param(p));
+      else
+	 _bfc1 = _bfc0;
+   }
+
+   /* If no lights, still need to emit the scenecolor.
+    */
+   {
+      struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
+      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
+   }
+
+   if (separate) {
+      struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
+      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
+   }
+
+   if (twoside) {
+      struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
+      emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
+   }
+
+   if (twoside && separate) {
+      struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
+      emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
+   }
+
+   if (nr_lights == 0) {
+      release_temps(p);
+      return;
+   }
+
+   for (i = 0; i < MAX_LIGHTS; i++) {
+      if (p->state->unit[i].light_enabled) {
+	 struct ureg half = undef;
+	 struct ureg att = undef, VPpli = undef;
+
+	 count++;
+
+	 if (p->state->unit[i].light_eyepos3_is_zero) {
+	    /* Can used precomputed constants in this case.
+	     * Attenuation never applies to infinite lights.
+	     */
+	    VPpli = register_param3(p, STATE_INTERNAL,
+				    STATE_LIGHT_POSITION_NORMALIZED, i);
+
+            if (!p->state->material_shininess_is_zero) {
+               if (p->state->light_local_viewer) {
+                  struct ureg eye_hat = get_eye_position_normalized(p);
+                  half = get_temp(p);
+                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                  emit_normalize_vec3(p, half, half);
+               }
+               else {
+                  half = register_param3(p, STATE_INTERNAL,
+                                         STATE_LIGHT_HALF_VECTOR, i);
+               }
+            }
+	 }
+	 else {
+	    struct ureg Ppli = register_param3(p, STATE_INTERNAL,
+					       STATE_LIGHT_POSITION, i);
+	    struct ureg V = get_eye_position(p);
+	    struct ureg dist = get_temp(p);
+
+	    VPpli = get_temp(p);
+
+	    /* Calculate VPpli vector
+	     */
+	    emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
+
+	    /* Normalize VPpli.  The dist value also used in
+	     * attenuation below.
+	     */
+	    emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
+	    emit_op1(p, OPCODE_RSQ, dist, 0, dist);
+	    emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
+
+	    /* Calculate attenuation:
+	     */
+	    if (!p->state->unit[i].light_spotcutoff_is_180 ||
+		p->state->unit[i].light_attenuated) {
+	       att = calculate_light_attenuation(p, i, VPpli, dist);
+	    }
+
+	    /* Calculate viewer direction, or use infinite viewer:
+	     */
+            if (!p->state->material_shininess_is_zero) {
+               half = get_temp(p);
+
+               if (p->state->light_local_viewer) {
+                  struct ureg eye_hat = get_eye_position_normalized(p);
+                  emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+               }
+               else {
+                  struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
+                  emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
+               }
+
+               emit_normalize_vec3(p, half, half);
+            }
+
+	    release_temp(p, dist);
+	 }
+
+	 /* Calculate dot products:
+	  */
+         if (p->state->material_shininess_is_zero) {
+            emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
+         }
+         else {
+            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
+            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
+         }
+
+	 /* Front face lighting:
+	  */
+	 {
+	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
+	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
+	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
+	    struct ureg res0, res1;
+	    GLuint mask0, mask1;
+
+	    if (count == nr_lights) {
+	       if (separate) {
+		  mask0 = WRITEMASK_XYZ;
+		  mask1 = WRITEMASK_XYZ;
+		  res0 = register_output( p, VERT_RESULT_COL0 );
+		  res1 = register_output( p, VERT_RESULT_COL1 );
+	       }
+	       else {
+		  mask0 = 0;
+		  mask1 = WRITEMASK_XYZ;
+		  res0 = _col0;
+		  res1 = register_output( p, VERT_RESULT_COL0 );
+	       }
+	    }
+            else {
+	       mask0 = 0;
+	       mask1 = 0;
+	       res0 = _col0;
+	       res1 = _col1;
+	    }
+
+	    if (!is_undef(att)) {
+               /* light is attenuated by distance */
+               emit_op1(p, OPCODE_LIT, lit, 0, dots);
+               emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
+               emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
+            }
+            else if (!p->state->material_shininess_is_zero) {
+               /* there's a non-zero specular term */
+               emit_op1(p, OPCODE_LIT, lit, 0, dots);
+               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
+            }
+            else {
+               /* no attenutation, no specular */
+               emit_degenerate_lit(p, lit, dots);
+               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
+            }
+
+	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
+	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
+
+	    release_temp(p, ambient);
+	    release_temp(p, diffuse);
+	    release_temp(p, specular);
+	 }
+
+	 /* Back face lighting:
+	  */
+	 if (twoside) {
+	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
+	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
+	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
+	    struct ureg res0, res1;
+	    GLuint mask0, mask1;
+
+	    if (count == nr_lights) {
+	       if (separate) {
+		  mask0 = WRITEMASK_XYZ;
+		  mask1 = WRITEMASK_XYZ;
+		  res0 = register_output( p, VERT_RESULT_BFC0 );
+		  res1 = register_output( p, VERT_RESULT_BFC1 );
+	       }
+	       else {
+		  mask0 = 0;
+		  mask1 = WRITEMASK_XYZ;
+		  res0 = _bfc0;
+		  res1 = register_output( p, VERT_RESULT_BFC0 );
+	       }
+	    }
+            else {
+	       res0 = _bfc0;
+	       res1 = _bfc1;
+	       mask0 = 0;
+	       mask1 = 0;
+	    }
+
+            /* For the back face we need to negate the X and Y component
+             * dot products.  dots.Z has the negated back-face specular
+             * exponent.  We swizzle that into the W position.  This
+             * negation makes the back-face specular term positive again.
+             */
+            dots = negate(swizzle(dots,X,Y,W,Z));
+
+	    if (!is_undef(att)) {
+               emit_op1(p, OPCODE_LIT, lit, 0, dots);
+	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
+               emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
+            }
+            else if (!p->state->material_shininess_is_zero) {
+               emit_op1(p, OPCODE_LIT, lit, 0, dots);
+               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
+            }
+            else {
+               emit_degenerate_lit(p, lit, dots);
+               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
+            }
+
+	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
+	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
+            /* restore dots to its original state for subsequent lights
+             * by negating and swizzling again.
+             */
+            dots = negate(swizzle(dots,X,Y,W,Z));
+
+	    release_temp(p, ambient);
+	    release_temp(p, diffuse);
+	    release_temp(p, specular);
+	 }
+
+	 release_temp(p, half);
+	 release_temp(p, VPpli);
+	 release_temp(p, att);
+      }
+   }
+
+   release_temps( p );
+}
+
+
+static void build_fog( struct tnl_program *p )
+{
+   struct ureg fog = register_output(p, VERT_RESULT_FOGC);
+   struct ureg input;
+
+   if (p->state->fog_source_is_depth) {
+      input = get_eye_position_z(p);
+   }
+   else {
+      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
+   }
+
+   /* result.fog = {abs(f),0,0,1}; */
+   emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
+   emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
+}
+
+
+static void build_reflect_texgen( struct tnl_program *p,
+				  struct ureg dest,
+				  GLuint writemask )
+{
+   struct ureg normal = get_transformed_normal(p);
+   struct ureg eye_hat = get_eye_position_normalized(p);
+   struct ureg tmp = get_temp(p);
+
+   /* n.u */
+   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
+   /* 2n.u */
+   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
+   /* (-2n.u)n + u */
+   emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
+
+   release_temp(p, tmp);
+}
+
+
+static void build_sphere_texgen( struct tnl_program *p,
+				 struct ureg dest,
+				 GLuint writemask )
+{
+   struct ureg normal = get_transformed_normal(p);
+   struct ureg eye_hat = get_eye_position_normalized(p);
+   struct ureg tmp = get_temp(p);
+   struct ureg half = register_scalar_const(p, .5);
+   struct ureg r = get_temp(p);
+   struct ureg inv_m = get_temp(p);
+   struct ureg id = get_identity_param(p);
+
+   /* Could share the above calculations, but it would be
+    * a fairly odd state for someone to set (both sphere and
+    * reflection active for different texture coordinate
+    * components.  Of course - if two texture units enable
+    * reflect and/or sphere, things start to tilt in favour
+    * of seperating this out:
+    */
+
+   /* n.u */
+   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
+   /* 2n.u */
+   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
+   /* (-2n.u)n + u */
+   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
+   /* r + 0,0,1 */
+   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
+   /* rx^2 + ry^2 + (rz+1)^2 */
+   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
+   /* 2/m */
+   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
+   /* 1/m */
+   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
+   /* r/m + 1/2 */
+   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
+
+   release_temp(p, tmp);
+   release_temp(p, r);
+   release_temp(p, inv_m);
+}
+
+
+static void build_texture_transform( struct tnl_program *p )
+{
+   GLuint i, j;
+
+   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
+
+      if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
+	 continue;
+
+      if (p->state->unit[i].coord_replace)
+  	 continue;
+
+      if (p->state->unit[i].texgen_enabled ||
+	  p->state->unit[i].texmat_enabled) {
+
+	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
+	 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
+	 struct ureg out_texgen = undef;
+
+	 if (p->state->unit[i].texgen_enabled) {
+	    GLuint copy_mask = 0;
+	    GLuint sphere_mask = 0;
+	    GLuint reflect_mask = 0;
+	    GLuint normal_mask = 0;
+	    GLuint modes[4];
+
+	    if (texmat_enabled)
+	       out_texgen = get_temp(p);
+	    else
+	       out_texgen = out;
+
+	    modes[0] = p->state->unit[i].texgen_mode0;
+	    modes[1] = p->state->unit[i].texgen_mode1;
+	    modes[2] = p->state->unit[i].texgen_mode2;
+	    modes[3] = p->state->unit[i].texgen_mode3;
+
+	    for (j = 0; j < 4; j++) {
+	       switch (modes[j]) {
+	       case TXG_OBJ_LINEAR: {
+		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
+		  struct ureg plane =
+		     register_param3(p, STATE_TEXGEN, i,
+				     STATE_TEXGEN_OBJECT_S + j);
+
+		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
+			   obj, plane );
+		  break;
+	       }
+	       case TXG_EYE_LINEAR: {
+		  struct ureg eye = get_eye_position(p);
+		  struct ureg plane =
+		     register_param3(p, STATE_TEXGEN, i,
+				     STATE_TEXGEN_EYE_S + j);
+
+		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
+			   eye, plane );
+		  break;
+	       }
+	       case TXG_SPHERE_MAP:
+		  sphere_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_REFLECTION_MAP:
+		  reflect_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_NORMAL_MAP:
+		  normal_mask |= WRITEMASK_X << j;
+		  break;
+	       case TXG_NONE:
+		  copy_mask |= WRITEMASK_X << j;
+	       }
+	    }
+
+	    if (sphere_mask) {
+	       build_sphere_texgen(p, out_texgen, sphere_mask);
+	    }
+
+	    if (reflect_mask) {
+	       build_reflect_texgen(p, out_texgen, reflect_mask);
+	    }
+
+	    if (normal_mask) {
+	       struct ureg normal = get_transformed_normal(p);
+	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
+	    }
+
+	    if (copy_mask) {
+	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
+	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
+	    }
+	 }
+
+	 if (texmat_enabled) {
+	    struct ureg texmat[4];
+	    struct ureg in = (!is_undef(out_texgen) ?
+			      out_texgen :
+			      register_input(p, VERT_ATTRIB_TEX0+i));
+	    if (p->mvp_with_dp4) {
+	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
+				       0, texmat );
+	       emit_matrix_transform_vec4( p, out, texmat, in );
+	    }
+	    else {
+	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
+				       STATE_MATRIX_TRANSPOSE, texmat );
+	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
+	    }
+	 }
+
+	 release_temps(p);
+      }
+      else {
+	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
+      }
+   }
+}
+
+
+/**
+ * Point size attenuation computation.
+ */
+static void build_atten_pointsize( struct tnl_program *p )
+{
+   struct ureg eye = get_eye_position_z(p);
+   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
+   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
+   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
+   struct ureg ut = get_temp(p);
+
+   /* dist = |eyez| */
+   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
+   /* p1 + dist * (p2 + dist * p3); */
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
+		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
+   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
+		ut, swizzle1(state_attenuation, X));
+
+   /* 1 / sqrt(factor) */
+   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
+
+#if 0
+   /* out = pointSize / sqrt(factor) */
+   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
+#else
+   /* this is a good place to clamp the point size since there's likely
+    * no hardware registers to clamp point size at rasterization time.
+    */
+   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
+   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
+   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
+#endif
+
+   release_temp(p, ut);
+}
+
+
+/**
+ * Pass-though per-vertex point size, from user's point size array.
+ */
+static void build_array_pointsize( struct tnl_program *p )
+{
+   struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
+   struct ureg out = register_output(p, VERT_RESULT_PSIZ);
+   emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
+}
+
+
+static void build_tnl_program( struct tnl_program *p )
+{
+   /* Emit the program, starting with modelviewproject:
+    */
+   build_hpos(p);
+
+   /* Lighting calculations:
+    */
+   if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
+      if (p->state->light_global_enabled)
+	 build_lighting(p);
+      else {
+	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
+	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
+
+	 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
+	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
+      }
+   }
+
+   if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC)
+      build_fog(p);
+
+   if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
+      build_texture_transform(p);
+
+   if (p->state->point_attenuated)
+      build_atten_pointsize(p);
+   else if (p->state->point_array)
+      build_array_pointsize(p);
+
+   /* Finish up:
+    */
+   emit_op1(p, OPCODE_END, undef, 0, undef);
+
+   /* Disassemble:
+    */
+   if (DISASSEM) {
+      printf ("\n");
+   }
+}
+
+
+static void
+create_new_program( const struct state_key *key,
+                    struct gl_vertex_program *program,
+                    GLboolean mvp_with_dp4,
+                    GLuint max_temps)
+{
+   struct tnl_program p;
+
+   memset(&p, 0, sizeof(p));
+   p.state = key;
+   p.program = program;
+   p.eye_position = undef;
+   p.eye_position_z = undef;
+   p.eye_position_normalized = undef;
+   p.transformed_normal = undef;
+   p.identity = undef;
+   p.temp_in_use = 0;
+   p.mvp_with_dp4 = mvp_with_dp4;
+
+   if (max_temps >= sizeof(int) * 8)
+      p.temp_reserved = 0;
+   else
+      p.temp_reserved = ~((1<<max_temps)-1);
+
+   /* Start by allocating 32 instructions.
+    * If we need more, we'll grow the instruction array as needed.
+    */
+   p.max_inst = 32;
+   p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
+   p.program->Base.String = NULL;
+   p.program->Base.NumInstructions =
+   p.program->Base.NumTemporaries =
+   p.program->Base.NumParameters =
+   p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
+   p.program->Base.Parameters = _mesa_new_parameter_list();
+   p.program->Base.InputsRead = 0;
+   p.program->Base.OutputsWritten = 0;
+
+   build_tnl_program( &p );
+}
+
+
+/**
+ * Return a vertex program which implements the current fixed-function
+ * transform/lighting/texgen operations.
+ * XXX move this into core mesa (main/)
+ */
+struct gl_vertex_program *
+_mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
+{
+   struct gl_vertex_program *prog;
+   struct state_key key;
+
+   /* Grab all the relevent state and put it in a single structure:
+    */
+   make_state_key(ctx, &key);
+
+   /* Look for an already-prepared program for this state:
+    */
+   prog = (struct gl_vertex_program *)
+      _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
+
+   if (!prog) {
+      /* OK, we'll have to build a new one */
+      if (0)
+         printf("Build new TNL program\n");
+
+      prog = (struct gl_vertex_program *)
+         ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
+      if (!prog)
+         return NULL;
+
+      create_new_program( &key, prog,
+                          ctx->mvp_with_dp4,
+                          ctx->Const.VertexProgram.MaxTemps );
+
+#if 0
+      if (ctx->Driver.ProgramStringNotify)
+         ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
+                                          &prog->Base );
+#endif
+      _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
+                                 &key, sizeof(key), &prog->Base);
+   }
+
+   return prog;
+}
diff --git a/mesalib/src/mesa/main/nvprogram.c b/mesalib/src/mesa/main/nvprogram.c
index 6b20fdae6..7ff7645b7 100644
--- a/mesalib/src/mesa/main/nvprogram.c
+++ b/mesalib/src/mesa/main/nvprogram.c
@@ -1,920 +1,920 @@
-/*
- * Mesa 3-D graphics library
- * Version:  6.5.2
- *
- * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file nvprogram.c
- * NVIDIA vertex/fragment program state management functions.
- * \author Brian Paul
- */
-
-/*
- * Regarding GL_NV_fragment/vertex_program, GL_NV_vertex_program1_1, etc:
- *
- * Portions of this software may use or implement intellectual
- * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
- * any and all warranties with respect to such intellectual property,
- * including any use thereof or modifications thereto.
- */
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/hash.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "main/mtypes.h"
-#include "main/nvprogram.h"
-#include "program/arbprogparse.h"
-#include "program/nvfragparse.h"
-#include "program/nvvertparse.h"
-#include "program/program.h"
-#include "program/prog_instruction.h"
-#include "program/prog_parameter.h"
-
-
-
-/**
- * Execute a vertex state program.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_ExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params)
-{
-   struct gl_vertex_program *vprog;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target != GL_VERTEX_STATE_PROGRAM_NV) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glExecuteProgramNV");
-      return;
-   }
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-
-   vprog = (struct gl_vertex_program *) _mesa_lookup_program(ctx, id);
-
-   if (!vprog || vprog->Base.Target != GL_VERTEX_STATE_PROGRAM_NV) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glExecuteProgramNV");
-      return;
-   }
-   
-   _mesa_problem(ctx, "glExecuteProgramNV() not supported");
-}
-
-
-/**
- * Determine if a set of programs is resident in hardware.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-GLboolean GLAPIENTRY
-_mesa_AreProgramsResidentNV(GLsizei n, const GLuint *ids,
-                            GLboolean *residences)
-{
-   GLint i, j;
-   GLboolean allResident = GL_TRUE;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
-
-   if (n < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV(n)");
-      return GL_FALSE;
-   }
-
-   for (i = 0; i < n; i++) {
-      const struct gl_program *prog;
-      if (ids[i] == 0) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV");
-         return GL_FALSE;
-      }
-      prog = _mesa_lookup_program(ctx, ids[i]);
-      if (!prog) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV");
-         return GL_FALSE;
-      }
-      if (prog->Resident) {
-	 if (!allResident)
-	    residences[i] = GL_TRUE;
-      }
-      else {
-         if (allResident) {
-	    allResident = GL_FALSE;
-	    for (j = 0; j < i; j++)
-	       residences[j] = GL_TRUE;
-	 }
-	 residences[i] = GL_FALSE;
-      }
-   }
-
-   return allResident;
-}
-
-
-/**
- * Request that a set of programs be resident in hardware.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_RequestResidentProgramsNV(GLsizei n, const GLuint *ids)
-{
-   GLint i;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (n < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(n)");
-      return;
-   }
-
-   /* just error checking for now */
-   for (i = 0; i < n; i++) {
-      struct gl_program *prog;
-
-      if (ids[i] == 0) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(id)");
-         return;
-      }
-
-      prog = _mesa_lookup_program(ctx, ids[i]);
-      if (!prog) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(id)");
-         return;
-      }
-
-      /* XXX this is really a hardware thing we should hook out */
-      prog->Resident = GL_TRUE;
-   }
-}
-
-
-/**
- * Get a program parameter register.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetProgramParameterfvNV(GLenum target, GLuint index,
-                              GLenum pname, GLfloat *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target == GL_VERTEX_PROGRAM_NV) {
-      if (pname == GL_PROGRAM_PARAMETER_NV) {
-         if (index < MAX_NV_VERTEX_PROGRAM_PARAMS) {
-            COPY_4V(params, ctx->VertexProgram.Parameters[index]);
-         }
-         else {
-            _mesa_error(ctx, GL_INVALID_VALUE,
-                        "glGetProgramParameterfvNV(index)");
-            return;
-         }
-      }
-      else {
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterfvNV(pname)");
-         return;
-      }
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterfvNV(target)");
-      return;
-   }
-}
-
-
-/**
- * Get a program parameter register.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetProgramParameterdvNV(GLenum target, GLuint index,
-                              GLenum pname, GLdouble *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target == GL_VERTEX_PROGRAM_NV) {
-      if (pname == GL_PROGRAM_PARAMETER_NV) {
-         if (index < MAX_NV_VERTEX_PROGRAM_PARAMS) {
-            COPY_4V(params, ctx->VertexProgram.Parameters[index]);
-         }
-         else {
-            _mesa_error(ctx, GL_INVALID_VALUE,
-                        "glGetProgramParameterdvNV(index)");
-            return;
-         }
-      }
-      else {
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterdvNV(pname)");
-         return;
-      }
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterdvNV(target)");
-      return;
-   }
-}
-
-
-/**
- * Get a program attribute.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetProgramivNV(GLuint id, GLenum pname, GLint *params)
-{
-   struct gl_program *prog;
-   GET_CURRENT_CONTEXT(ctx);
-
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   prog = _mesa_lookup_program(ctx, id);
-   if (!prog) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramivNV");
-      return;
-   }
-
-   switch (pname) {
-      case GL_PROGRAM_TARGET_NV:
-         *params = prog->Target;
-         return;
-      case GL_PROGRAM_LENGTH_NV:
-         *params = prog->String ?(GLint) strlen((char *) prog->String) : 0;
-         return;
-      case GL_PROGRAM_RESIDENT_NV:
-         *params = prog->Resident;
-         return;
-      default:
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramivNV(pname)");
-         return;
-   }
-}
-
-
-/**
- * Get the program source code.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetProgramStringNV(GLuint id, GLenum pname, GLubyte *program)
-{
-   struct gl_program *prog;
-   GET_CURRENT_CONTEXT(ctx);
-
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (pname != GL_PROGRAM_STRING_NV) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramStringNV(pname)");
-      return;
-   }
-
-   prog = _mesa_lookup_program(ctx, id);
-   if (!prog) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramStringNV");
-      return;
-   }
-
-   if (prog->String) {
-      memcpy(program, prog->String, strlen((char *) prog->String));
-   }
-   else {
-      program[0] = 0;
-   }
-}
-
-
-/**
- * Get matrix tracking information.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetTrackMatrixivNV(GLenum target, GLuint address,
-                         GLenum pname, GLint *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target == GL_VERTEX_PROGRAM_NV
-       && ctx->Extensions.NV_vertex_program) {
-      GLuint i;
-
-      if ((address & 0x3) || address >= MAX_NV_VERTEX_PROGRAM_PARAMS) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glGetTrackMatrixivNV(address)");
-         return;
-      }
-
-      i = address / 4;
-
-      switch (pname) {
-         case GL_TRACK_MATRIX_NV:
-            params[0] = (GLint) ctx->VertexProgram.TrackMatrix[i];
-            return;
-         case GL_TRACK_MATRIX_TRANSFORM_NV:
-            params[0] = (GLint) ctx->VertexProgram.TrackMatrixTransform[i];
-            return;
-         default:
-            _mesa_error(ctx, GL_INVALID_ENUM, "glGetTrackMatrixivNV");
-            return;
-      }
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTrackMatrixivNV");
-      return;
-   }
-}
-
-
-/**
- * Get a vertex (or vertex array) attribute.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetVertexAttribdvNV(GLuint index, GLenum pname, GLdouble *params)
-{
-   const struct gl_client_array *array;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
-      return;
-   }
-
-   array = &ctx->Array.ArrayObj->VertexAttrib[index];
-
-   switch (pname) {
-      case GL_ATTRIB_ARRAY_SIZE_NV:
-         params[0] = array->Size;
-         break;
-      case GL_ATTRIB_ARRAY_STRIDE_NV:
-         params[0] = array->Stride;
-         break;
-      case GL_ATTRIB_ARRAY_TYPE_NV:
-         params[0] = array->Type;
-         break;
-      case GL_CURRENT_ATTRIB_NV:
-         if (index == 0) {
-            _mesa_error(ctx, GL_INVALID_OPERATION,
-                        "glGetVertexAttribdvNV(index == 0)");
-            return;
-         }
-	 FLUSH_CURRENT(ctx, 0);
-         COPY_4V(params, ctx->Current.Attrib[index]);
-         break;
-      default:
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
-         return;
-   }
-}
-
-/**
- * Get a vertex (or vertex array) attribute.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetVertexAttribfvNV(GLuint index, GLenum pname, GLfloat *params)
-{
-   const struct gl_client_array *array;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
-      return;
-   }
-
-   array = &ctx->Array.ArrayObj->VertexAttrib[index];
-
-   switch (pname) {
-      case GL_ATTRIB_ARRAY_SIZE_NV:
-         params[0] = (GLfloat) array->Size;
-         break;
-      case GL_ATTRIB_ARRAY_STRIDE_NV:
-         params[0] = (GLfloat) array->Stride;
-         break;
-      case GL_ATTRIB_ARRAY_TYPE_NV:
-         params[0] = (GLfloat) array->Type;
-         break;
-      case GL_CURRENT_ATTRIB_NV:
-         if (index == 0) {
-            _mesa_error(ctx, GL_INVALID_OPERATION,
-                        "glGetVertexAttribfvNV(index == 0)");
-            return;
-         }
-	 FLUSH_CURRENT(ctx, 0);
-         COPY_4V(params, ctx->Current.Attrib[index]);
-         break;
-      default:
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
-         return;
-   }
-}
-
-/**
- * Get a vertex (or vertex array) attribute.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetVertexAttribivNV(GLuint index, GLenum pname, GLint *params)
-{
-   const struct gl_client_array *array;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
-      return;
-   }
-
-   array = &ctx->Array.ArrayObj->VertexAttrib[index];
-
-   switch (pname) {
-      case GL_ATTRIB_ARRAY_SIZE_NV:
-         params[0] = array->Size;
-         break;
-      case GL_ATTRIB_ARRAY_STRIDE_NV:
-         params[0] = array->Stride;
-         break;
-      case GL_ATTRIB_ARRAY_TYPE_NV:
-         params[0] = array->Type;
-         break;
-      case GL_CURRENT_ATTRIB_NV:
-         if (index == 0) {
-            _mesa_error(ctx, GL_INVALID_OPERATION,
-                        "glGetVertexAttribivNV(index == 0)");
-            return;
-         }
-	 FLUSH_CURRENT(ctx, 0);
-         params[0] = (GLint) ctx->Current.Attrib[index][0];
-         params[1] = (GLint) ctx->Current.Attrib[index][1];
-         params[2] = (GLint) ctx->Current.Attrib[index][2];
-         params[3] = (GLint) ctx->Current.Attrib[index][3];
-         break;
-      case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB:
-         params[0] = array->BufferObj->Name;
-         break;
-      default:
-         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
-         return;
-   }
-}
-
-
-/**
- * Get a vertex array attribute pointer.
- * \note Not compiled into display lists.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_GetVertexAttribPointervNV(GLuint index, GLenum pname, GLvoid **pointer)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribPointerNV(index)");
-      return;
-   }
-
-   if (pname != GL_ATTRIB_ARRAY_POINTER_NV) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribPointerNV(pname)");
-      return;
-   }
-
-   *pointer = (GLvoid *) ctx->Array.ArrayObj->VertexAttrib[index].Ptr;
-}
-
-void
-_mesa_emit_nv_temp_initialization(struct gl_context *ctx,
-				  struct gl_program *program)
-{
-   struct prog_instruction *inst;
-   GLuint i;
-   struct gl_shader_compiler_options* options =
-         &ctx->ShaderCompilerOptions[_mesa_program_target_to_index(program->Target)];
-
-   if (!options->EmitNVTempInitialization)
-      return;
-
-   /* We'll swizzle up a zero temporary so we can use it for the
-    * ARL.
-    */
-   if (program->NumTemporaries == 0)
-      program->NumTemporaries = 1;
-
-   _mesa_insert_instructions(program, 0, program->NumTemporaries + 1);
-
-   for (i = 0; i < program->NumTemporaries; i++) {
-      struct prog_instruction *inst = &program->Instructions[i];
-
-      inst->Opcode = OPCODE_SWZ;
-      inst->DstReg.File = PROGRAM_TEMPORARY;
-      inst->DstReg.Index = i;
-      inst->DstReg.WriteMask = WRITEMASK_XYZW;
-      inst->SrcReg[0].File = PROGRAM_TEMPORARY;
-      inst->SrcReg[0].Index = 0;
-      inst->SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO,
-					      SWIZZLE_ZERO,
-					      SWIZZLE_ZERO,
-					      SWIZZLE_ZERO);
-   }
-
-   inst = &program->Instructions[i];
-   inst->Opcode = OPCODE_ARL;
-   inst->DstReg.File = PROGRAM_ADDRESS;
-   inst->DstReg.Index = 0;
-   inst->DstReg.WriteMask = WRITEMASK_XYZW;
-   inst->SrcReg[0].File = PROGRAM_TEMPORARY;
-   inst->SrcReg[0].Index = 0;
-   inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
-   if (program->NumAddressRegs == 0)
-      program->NumAddressRegs = 1;
-}
-
-void
-_mesa_setup_nv_temporary_count(struct gl_context *ctx, struct gl_program *program)
-{
-   GLuint i;
-
-   program->NumTemporaries = 0;
-   for (i = 0; i < program->NumInstructions; i++) {
-      struct prog_instruction *inst = &program->Instructions[i];
-
-      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
-	 program->NumTemporaries = MAX2(program->NumTemporaries,
-					inst->DstReg.Index + 1);
-      }
-      if (inst->SrcReg[0].File == PROGRAM_TEMPORARY) {
-	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
-					inst->SrcReg[0].Index + 1);
-      }
-      if (inst->SrcReg[1].File == PROGRAM_TEMPORARY) {
-	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
-					inst->SrcReg[1].Index + 1);
-      }
-      if (inst->SrcReg[2].File == PROGRAM_TEMPORARY) {
-	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
-					inst->SrcReg[2].Index + 1);
-      }
-   }
-}
-
-/**
- * Load/parse/compile a program.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_LoadProgramNV(GLenum target, GLuint id, GLsizei len,
-                    const GLubyte *program)
-{
-   struct gl_program *prog;
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (!ctx->Extensions.NV_vertex_program
-       && !ctx->Extensions.NV_fragment_program) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV()");
-      return;
-   }
-
-   if (id == 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glLoadProgramNV(id)");
-      return;
-   }
-
-   if (len < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glLoadProgramNV(len)");
-      return;
-   }
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-
-   prog = _mesa_lookup_program(ctx, id);
-
-   if (prog && prog->Target != 0 && prog->Target != target) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(target)");
-      return;
-   }
-
-   if ((target == GL_VERTEX_PROGRAM_NV ||
-        target == GL_VERTEX_STATE_PROGRAM_NV)
-       && ctx->Extensions.NV_vertex_program) {
-      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
-      if (!vprog || prog == &_mesa_DummyProgram) {
-         vprog = (struct gl_vertex_program *)
-            ctx->Driver.NewProgram(ctx, target, id);
-         if (!vprog) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
-            return;
-         }
-         _mesa_HashInsert(ctx->Shared->Programs, id, vprog);
-      }
-
-      if (ctx->Extensions.ARB_vertex_program
-	  && (strncmp((char *) program, "!!ARB", 5) == 0)) {
-	 _mesa_parse_arb_vertex_program(ctx, target, program, len, vprog);
-      } else {
-	 _mesa_parse_nv_vertex_program(ctx, target, program, len, vprog);
-      }
-   }
-   else if (target == GL_FRAGMENT_PROGRAM_NV
-            && ctx->Extensions.NV_fragment_program) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      if (!fprog || prog == &_mesa_DummyProgram) {
-         fprog = (struct gl_fragment_program *)
-            ctx->Driver.NewProgram(ctx, target, id);
-         if (!fprog) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
-            return;
-         }
-         _mesa_HashInsert(ctx->Shared->Programs, id, fprog);
-      }
-      _mesa_parse_nv_fragment_program(ctx, target, program, len, fprog);
-   }
-   else if (target == GL_FRAGMENT_PROGRAM_ARB
-            && ctx->Extensions.ARB_fragment_program) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      if (!fprog || prog == &_mesa_DummyProgram) {
-         fprog = (struct gl_fragment_program *)
-            ctx->Driver.NewProgram(ctx, target, id);
-         if (!fprog) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
-            return;
-         }
-         _mesa_HashInsert(ctx->Shared->Programs, id, fprog);
-      }
-      _mesa_parse_arb_fragment_program(ctx, target, program, len, fprog);
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glLoadProgramNV(target)");
-   }
-}
-
-
-
-/**
- * Set a sequence of program parameter registers.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_ProgramParameters4dvNV(GLenum target, GLuint index,
-                             GLsizei num, const GLdouble *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
-      GLint i;
-      if (index + num > MAX_NV_VERTEX_PROGRAM_PARAMS) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramParameters4dvNV");
-         return;
-      }
-      for (i = 0; i < num; i++) {
-         ctx->VertexProgram.Parameters[index + i][0] = (GLfloat) params[0];
-         ctx->VertexProgram.Parameters[index + i][1] = (GLfloat) params[1];
-         ctx->VertexProgram.Parameters[index + i][2] = (GLfloat) params[2];
-         ctx->VertexProgram.Parameters[index + i][3] = (GLfloat) params[3];
-         params += 4;
-      };
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameters4dvNV");
-      return;
-   }
-}
-
-
-/**
- * Set a sequence of program parameter registers.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_ProgramParameters4fvNV(GLenum target, GLuint index,
-                             GLsizei num, const GLfloat *params)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
-      GLint i;
-      if (index + num > MAX_NV_VERTEX_PROGRAM_PARAMS) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramParameters4fvNV");
-         return;
-      }
-      for (i = 0; i < num; i++) {
-         COPY_4V(ctx->VertexProgram.Parameters[index + i], params);
-         params += 4;
-      }
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameters4fvNV");
-      return;
-   }
-}
-
-
-
-/**
- * Setup tracking of matrices into program parameter registers.
- * \note Called from the GL API dispatcher.
- */
-void GLAPIENTRY
-_mesa_TrackMatrixNV(GLenum target, GLuint address,
-                    GLenum matrix, GLenum transform)
-{
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
-
-   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
-      if (address & 0x3) {
-         /* addr must be multiple of four */
-         _mesa_error(ctx, GL_INVALID_VALUE, "glTrackMatrixNV(address)");
-         return;
-      }
-
-      switch (matrix) {
-         case GL_NONE:
-         case GL_MODELVIEW:
-         case GL_PROJECTION:
-         case GL_TEXTURE:
-         case GL_COLOR:
-         case GL_MODELVIEW_PROJECTION_NV:
-         case GL_MATRIX0_NV:
-         case GL_MATRIX1_NV:
-         case GL_MATRIX2_NV:
-         case GL_MATRIX3_NV:
-         case GL_MATRIX4_NV:
-         case GL_MATRIX5_NV:
-         case GL_MATRIX6_NV:
-         case GL_MATRIX7_NV:
-            /* OK, fallthrough */
-            break;
-         default:
-            _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(matrix)");
-            return;
-      }
-
-      switch (transform) {
-         case GL_IDENTITY_NV:
-         case GL_INVERSE_NV:
-         case GL_TRANSPOSE_NV:
-         case GL_INVERSE_TRANSPOSE_NV:
-            /* OK, fallthrough */
-            break;
-         default:
-            _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(transform)");
-            return;
-      }
-
-      ctx->VertexProgram.TrackMatrix[address / 4] = matrix;
-      ctx->VertexProgram.TrackMatrixTransform[address / 4] = transform;
-   }
-   else {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(target)");
-      return;
-   }
-}
-
-
-void GLAPIENTRY
-_mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
-                                GLfloat x, GLfloat y, GLfloat z, GLfloat w)
-{
-   struct gl_program *prog;
-   struct gl_fragment_program *fragProg;
-   GLfloat *v;
-
-   GET_CURRENT_CONTEXT(ctx);
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
-
-   prog = _mesa_lookup_program(ctx, id);
-   if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramNamedParameterNV");
-      return;
-   }
-
-   if (len <= 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glProgramNamedParameterNV(len)");
-      return;
-   }
-
-   fragProg = (struct gl_fragment_program *) prog;
-   v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
-                                    (char *) name);
-   if (v) {
-      v[0] = x;
-      v[1] = y;
-      v[2] = z;
-      v[3] = w;
-      return;
-   }
-
-   _mesa_error(ctx, GL_INVALID_VALUE, "glProgramNamedParameterNV(name)");
-}
-
-
-void GLAPIENTRY
-_mesa_ProgramNamedParameter4fvNV(GLuint id, GLsizei len, const GLubyte *name,
-                                 const float v[])
-{
-   _mesa_ProgramNamedParameter4fNV(id, len, name, v[0], v[1], v[2], v[3]);
-}
-
-
-void GLAPIENTRY
-_mesa_ProgramNamedParameter4dNV(GLuint id, GLsizei len, const GLubyte *name,
-                                GLdouble x, GLdouble y, GLdouble z, GLdouble w)
-{
-   _mesa_ProgramNamedParameter4fNV(id, len, name, (GLfloat)x, (GLfloat)y, 
-                                   (GLfloat)z, (GLfloat)w);
-}
-
-
-void GLAPIENTRY
-_mesa_ProgramNamedParameter4dvNV(GLuint id, GLsizei len, const GLubyte *name,
-                                 const double v[])
-{
-   _mesa_ProgramNamedParameter4fNV(id, len, name,
-                                   (GLfloat)v[0], (GLfloat)v[1],
-                                   (GLfloat)v[2], (GLfloat)v[3]);
-}
-
-
-void GLAPIENTRY
-_mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
-                                   GLfloat *params)
-{
-   struct gl_program *prog;
-   struct gl_fragment_program *fragProg;
-   const GLfloat *v;
-
-   GET_CURRENT_CONTEXT(ctx);
-
-   ASSERT_OUTSIDE_BEGIN_END(ctx);
-
-   prog = _mesa_lookup_program(ctx, id);
-   if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramNamedParameterNV");
-      return;
-   }
-
-   if (len <= 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramNamedParameterNV");
-      return;
-   }
-
-   fragProg = (struct gl_fragment_program *) prog;
-   v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
-                                    len, (char *) name);
-   if (v) {
-      params[0] = v[0];
-      params[1] = v[1];
-      params[2] = v[2];
-      params[3] = v[3];
-      return;
-   }
-
-   _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramNamedParameterNV");
-}
-
-
-void GLAPIENTRY
-_mesa_GetProgramNamedParameterdvNV(GLuint id, GLsizei len, const GLubyte *name,
-                                   GLdouble *params)
-{
-   GLfloat floatParams[4];
-   _mesa_GetProgramNamedParameterfvNV(id, len, name, floatParams);
-   COPY_4V(params, floatParams);
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.2
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file nvprogram.c
+ * NVIDIA vertex/fragment program state management functions.
+ * \author Brian Paul
+ */
+
+/*
+ * Regarding GL_NV_fragment/vertex_program, GL_NV_vertex_program1_1, etc:
+ *
+ * Portions of this software may use or implement intellectual
+ * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
+ * any and all warranties with respect to such intellectual property,
+ * including any use thereof or modifications thereto.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/hash.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/nvprogram.h"
+#include "program/arbprogparse.h"
+#include "program/nvfragparse.h"
+#include "program/nvvertparse.h"
+#include "program/program.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+
+
+
+/**
+ * Execute a vertex state program.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_ExecuteProgramNV(GLenum target, GLuint id, const GLfloat *params)
+{
+   struct gl_vertex_program *vprog;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target != GL_VERTEX_STATE_PROGRAM_NV) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glExecuteProgramNV");
+      return;
+   }
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+
+   vprog = (struct gl_vertex_program *) _mesa_lookup_program(ctx, id);
+
+   if (!vprog || vprog->Base.Target != GL_VERTEX_STATE_PROGRAM_NV) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glExecuteProgramNV");
+      return;
+   }
+   
+   _mesa_problem(ctx, "glExecuteProgramNV() not supported");
+}
+
+
+/**
+ * Determine if a set of programs is resident in hardware.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+GLboolean GLAPIENTRY
+_mesa_AreProgramsResidentNV(GLsizei n, const GLuint *ids,
+                            GLboolean *residences)
+{
+   GLint i, j;
+   GLboolean allResident = GL_TRUE;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
+
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV(n)");
+      return GL_FALSE;
+   }
+
+   for (i = 0; i < n; i++) {
+      const struct gl_program *prog;
+      if (ids[i] == 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV");
+         return GL_FALSE;
+      }
+      prog = _mesa_lookup_program(ctx, ids[i]);
+      if (!prog) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glAreProgramsResidentNV");
+         return GL_FALSE;
+      }
+      if (prog->Resident) {
+	 if (!allResident)
+	    residences[i] = GL_TRUE;
+      }
+      else {
+         if (allResident) {
+	    allResident = GL_FALSE;
+	    for (j = 0; j < i; j++)
+	       residences[j] = GL_TRUE;
+	 }
+	 residences[i] = GL_FALSE;
+      }
+   }
+
+   return allResident;
+}
+
+
+/**
+ * Request that a set of programs be resident in hardware.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_RequestResidentProgramsNV(GLsizei n, const GLuint *ids)
+{
+   GLint i;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(n)");
+      return;
+   }
+
+   /* just error checking for now */
+   for (i = 0; i < n; i++) {
+      struct gl_program *prog;
+
+      if (ids[i] == 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(id)");
+         return;
+      }
+
+      prog = _mesa_lookup_program(ctx, ids[i]);
+      if (!prog) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glRequestResidentProgramsNV(id)");
+         return;
+      }
+
+      /* XXX this is really a hardware thing we should hook out */
+      prog->Resident = GL_TRUE;
+   }
+}
+
+
+/**
+ * Get a program parameter register.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetProgramParameterfvNV(GLenum target, GLuint index,
+                              GLenum pname, GLfloat *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target == GL_VERTEX_PROGRAM_NV) {
+      if (pname == GL_PROGRAM_PARAMETER_NV) {
+         if (index < MAX_NV_VERTEX_PROGRAM_PARAMS) {
+            COPY_4V(params, ctx->VertexProgram.Parameters[index]);
+         }
+         else {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "glGetProgramParameterfvNV(index)");
+            return;
+         }
+      }
+      else {
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterfvNV(pname)");
+         return;
+      }
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterfvNV(target)");
+      return;
+   }
+}
+
+
+/**
+ * Get a program parameter register.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetProgramParameterdvNV(GLenum target, GLuint index,
+                              GLenum pname, GLdouble *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target == GL_VERTEX_PROGRAM_NV) {
+      if (pname == GL_PROGRAM_PARAMETER_NV) {
+         if (index < MAX_NV_VERTEX_PROGRAM_PARAMS) {
+            COPY_4V(params, ctx->VertexProgram.Parameters[index]);
+         }
+         else {
+            _mesa_error(ctx, GL_INVALID_VALUE,
+                        "glGetProgramParameterdvNV(index)");
+            return;
+         }
+      }
+      else {
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterdvNV(pname)");
+         return;
+      }
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramParameterdvNV(target)");
+      return;
+   }
+}
+
+
+/**
+ * Get a program attribute.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetProgramivNV(GLuint id, GLenum pname, GLint *params)
+{
+   struct gl_program *prog;
+   GET_CURRENT_CONTEXT(ctx);
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   prog = _mesa_lookup_program(ctx, id);
+   if (!prog) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramivNV");
+      return;
+   }
+
+   switch (pname) {
+      case GL_PROGRAM_TARGET_NV:
+         *params = prog->Target;
+         return;
+      case GL_PROGRAM_LENGTH_NV:
+         *params = prog->String ?(GLint) strlen((char *) prog->String) : 0;
+         return;
+      case GL_PROGRAM_RESIDENT_NV:
+         *params = prog->Resident;
+         return;
+      default:
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramivNV(pname)");
+         return;
+   }
+}
+
+
+/**
+ * Get the program source code.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetProgramStringNV(GLuint id, GLenum pname, GLubyte *program)
+{
+   struct gl_program *prog;
+   GET_CURRENT_CONTEXT(ctx);
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (pname != GL_PROGRAM_STRING_NV) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetProgramStringNV(pname)");
+      return;
+   }
+
+   prog = _mesa_lookup_program(ctx, id);
+   if (!prog) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramStringNV");
+      return;
+   }
+
+   if (prog->String) {
+      memcpy(program, prog->String, strlen((char *) prog->String));
+   }
+   else {
+      program[0] = 0;
+   }
+}
+
+
+/**
+ * Get matrix tracking information.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetTrackMatrixivNV(GLenum target, GLuint address,
+                         GLenum pname, GLint *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target == GL_VERTEX_PROGRAM_NV
+       && ctx->Extensions.NV_vertex_program) {
+      GLuint i;
+
+      if ((address & 0x3) || address >= MAX_NV_VERTEX_PROGRAM_PARAMS) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glGetTrackMatrixivNV(address)");
+         return;
+      }
+
+      i = address / 4;
+
+      switch (pname) {
+         case GL_TRACK_MATRIX_NV:
+            params[0] = (GLint) ctx->VertexProgram.TrackMatrix[i];
+            return;
+         case GL_TRACK_MATRIX_TRANSFORM_NV:
+            params[0] = (GLint) ctx->VertexProgram.TrackMatrixTransform[i];
+            return;
+         default:
+            _mesa_error(ctx, GL_INVALID_ENUM, "glGetTrackMatrixivNV");
+            return;
+      }
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTrackMatrixivNV");
+      return;
+   }
+}
+
+
+/**
+ * Get a vertex (or vertex array) attribute.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetVertexAttribdvNV(GLuint index, GLenum pname, GLdouble *params)
+{
+   const struct gl_client_array *array;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
+      return;
+   }
+
+   array = &ctx->Array.ArrayObj->VertexAttrib[index];
+
+   switch (pname) {
+      case GL_ATTRIB_ARRAY_SIZE_NV:
+         params[0] = array->Size;
+         break;
+      case GL_ATTRIB_ARRAY_STRIDE_NV:
+         params[0] = array->Stride;
+         break;
+      case GL_ATTRIB_ARRAY_TYPE_NV:
+         params[0] = array->Type;
+         break;
+      case GL_CURRENT_ATTRIB_NV:
+         if (index == 0) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glGetVertexAttribdvNV(index == 0)");
+            return;
+         }
+	 FLUSH_CURRENT(ctx, 0);
+         COPY_4V(params, ctx->Current.Attrib[index]);
+         break;
+      default:
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
+         return;
+   }
+}
+
+/**
+ * Get a vertex (or vertex array) attribute.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetVertexAttribfvNV(GLuint index, GLenum pname, GLfloat *params)
+{
+   const struct gl_client_array *array;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
+      return;
+   }
+
+   array = &ctx->Array.ArrayObj->VertexAttrib[index];
+
+   switch (pname) {
+      case GL_ATTRIB_ARRAY_SIZE_NV:
+         params[0] = (GLfloat) array->Size;
+         break;
+      case GL_ATTRIB_ARRAY_STRIDE_NV:
+         params[0] = (GLfloat) array->Stride;
+         break;
+      case GL_ATTRIB_ARRAY_TYPE_NV:
+         params[0] = (GLfloat) array->Type;
+         break;
+      case GL_CURRENT_ATTRIB_NV:
+         if (index == 0) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glGetVertexAttribfvNV(index == 0)");
+            return;
+         }
+	 FLUSH_CURRENT(ctx, 0);
+         COPY_4V(params, ctx->Current.Attrib[index]);
+         break;
+      default:
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
+         return;
+   }
+}
+
+/**
+ * Get a vertex (or vertex array) attribute.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetVertexAttribivNV(GLuint index, GLenum pname, GLint *params)
+{
+   const struct gl_client_array *array;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribdvNV(index)");
+      return;
+   }
+
+   array = &ctx->Array.ArrayObj->VertexAttrib[index];
+
+   switch (pname) {
+      case GL_ATTRIB_ARRAY_SIZE_NV:
+         params[0] = array->Size;
+         break;
+      case GL_ATTRIB_ARRAY_STRIDE_NV:
+         params[0] = array->Stride;
+         break;
+      case GL_ATTRIB_ARRAY_TYPE_NV:
+         params[0] = array->Type;
+         break;
+      case GL_CURRENT_ATTRIB_NV:
+         if (index == 0) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glGetVertexAttribivNV(index == 0)");
+            return;
+         }
+	 FLUSH_CURRENT(ctx, 0);
+         params[0] = (GLint) ctx->Current.Attrib[index][0];
+         params[1] = (GLint) ctx->Current.Attrib[index][1];
+         params[2] = (GLint) ctx->Current.Attrib[index][2];
+         params[3] = (GLint) ctx->Current.Attrib[index][3];
+         break;
+      case GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB:
+         params[0] = array->BufferObj->Name;
+         break;
+      default:
+         _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribdvNV");
+         return;
+   }
+}
+
+
+/**
+ * Get a vertex array attribute pointer.
+ * \note Not compiled into display lists.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_GetVertexAttribPointervNV(GLuint index, GLenum pname, GLvoid **pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (index >= MAX_NV_VERTEX_PROGRAM_INPUTS) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetVertexAttribPointerNV(index)");
+      return;
+   }
+
+   if (pname != GL_ATTRIB_ARRAY_POINTER_NV) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetVertexAttribPointerNV(pname)");
+      return;
+   }
+
+   *pointer = (GLvoid *) ctx->Array.ArrayObj->VertexAttrib[index].Ptr;
+}
+
+void
+_mesa_emit_nv_temp_initialization(struct gl_context *ctx,
+				  struct gl_program *program)
+{
+   struct prog_instruction *inst;
+   GLuint i;
+   struct gl_shader_compiler_options* options =
+         &ctx->ShaderCompilerOptions[_mesa_program_target_to_index(program->Target)];
+
+   if (!options->EmitNVTempInitialization)
+      return;
+
+   /* We'll swizzle up a zero temporary so we can use it for the
+    * ARL.
+    */
+   if (program->NumTemporaries == 0)
+      program->NumTemporaries = 1;
+
+   _mesa_insert_instructions(program, 0, program->NumTemporaries + 1);
+
+   for (i = 0; i < program->NumTemporaries; i++) {
+      struct prog_instruction *inst = &program->Instructions[i];
+
+      inst->Opcode = OPCODE_SWZ;
+      inst->DstReg.File = PROGRAM_TEMPORARY;
+      inst->DstReg.Index = i;
+      inst->DstReg.WriteMask = WRITEMASK_XYZW;
+      inst->SrcReg[0].File = PROGRAM_TEMPORARY;
+      inst->SrcReg[0].Index = 0;
+      inst->SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO,
+					      SWIZZLE_ZERO,
+					      SWIZZLE_ZERO,
+					      SWIZZLE_ZERO);
+   }
+
+   inst = &program->Instructions[i];
+   inst->Opcode = OPCODE_ARL;
+   inst->DstReg.File = PROGRAM_ADDRESS;
+   inst->DstReg.Index = 0;
+   inst->DstReg.WriteMask = WRITEMASK_XYZW;
+   inst->SrcReg[0].File = PROGRAM_TEMPORARY;
+   inst->SrcReg[0].Index = 0;
+   inst->SrcReg[0].Swizzle = SWIZZLE_XXXX;
+
+   if (program->NumAddressRegs == 0)
+      program->NumAddressRegs = 1;
+}
+
+void
+_mesa_setup_nv_temporary_count(struct gl_context *ctx, struct gl_program *program)
+{
+   GLuint i;
+
+   program->NumTemporaries = 0;
+   for (i = 0; i < program->NumInstructions; i++) {
+      struct prog_instruction *inst = &program->Instructions[i];
+
+      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+	 program->NumTemporaries = MAX2(program->NumTemporaries,
+					inst->DstReg.Index + 1);
+      }
+      if (inst->SrcReg[0].File == PROGRAM_TEMPORARY) {
+	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
+					inst->SrcReg[0].Index + 1);
+      }
+      if (inst->SrcReg[1].File == PROGRAM_TEMPORARY) {
+	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
+					inst->SrcReg[1].Index + 1);
+      }
+      if (inst->SrcReg[2].File == PROGRAM_TEMPORARY) {
+	 program->NumTemporaries = MAX2((GLint)program->NumTemporaries,
+					inst->SrcReg[2].Index + 1);
+      }
+   }
+}
+
+/**
+ * Load/parse/compile a program.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_LoadProgramNV(GLenum target, GLuint id, GLsizei len,
+                    const GLubyte *program)
+{
+   struct gl_program *prog;
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (!ctx->Extensions.NV_vertex_program
+       && !ctx->Extensions.NV_fragment_program) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV()");
+      return;
+   }
+
+   if (id == 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glLoadProgramNV(id)");
+      return;
+   }
+
+   if (len < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glLoadProgramNV(len)");
+      return;
+   }
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+
+   prog = _mesa_lookup_program(ctx, id);
+
+   if (prog && prog->Target != 0 && prog->Target != target) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(target)");
+      return;
+   }
+
+   if ((target == GL_VERTEX_PROGRAM_NV ||
+        target == GL_VERTEX_STATE_PROGRAM_NV)
+       && ctx->Extensions.NV_vertex_program) {
+      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+      if (!vprog || prog == &_mesa_DummyProgram) {
+         vprog = (struct gl_vertex_program *)
+            ctx->Driver.NewProgram(ctx, target, id);
+         if (!vprog) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
+            return;
+         }
+         _mesa_HashInsert(ctx->Shared->Programs, id, vprog);
+      }
+
+      if (ctx->Extensions.ARB_vertex_program
+	  && (strncmp((char *) program, "!!ARB", 5) == 0)) {
+	 _mesa_parse_arb_vertex_program(ctx, target, program, len, vprog);
+      } else {
+	 _mesa_parse_nv_vertex_program(ctx, target, program, len, vprog);
+      }
+   }
+   else if (target == GL_FRAGMENT_PROGRAM_NV
+            && ctx->Extensions.NV_fragment_program) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      if (!fprog || prog == &_mesa_DummyProgram) {
+         fprog = (struct gl_fragment_program *)
+            ctx->Driver.NewProgram(ctx, target, id);
+         if (!fprog) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
+            return;
+         }
+         _mesa_HashInsert(ctx->Shared->Programs, id, fprog);
+      }
+      _mesa_parse_nv_fragment_program(ctx, target, program, len, fprog);
+   }
+   else if (target == GL_FRAGMENT_PROGRAM_ARB
+            && ctx->Extensions.ARB_fragment_program) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      if (!fprog || prog == &_mesa_DummyProgram) {
+         fprog = (struct gl_fragment_program *)
+            ctx->Driver.NewProgram(ctx, target, id);
+         if (!fprog) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
+            return;
+         }
+         _mesa_HashInsert(ctx->Shared->Programs, id, fprog);
+      }
+      _mesa_parse_arb_fragment_program(ctx, target, program, len, fprog);
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glLoadProgramNV(target)");
+   }
+}
+
+
+
+/**
+ * Set a sequence of program parameter registers.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_ProgramParameters4dvNV(GLenum target, GLuint index,
+                             GLsizei num, const GLdouble *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
+      GLint i;
+      if (index + num > MAX_NV_VERTEX_PROGRAM_PARAMS) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramParameters4dvNV");
+         return;
+      }
+      for (i = 0; i < num; i++) {
+         ctx->VertexProgram.Parameters[index + i][0] = (GLfloat) params[0];
+         ctx->VertexProgram.Parameters[index + i][1] = (GLfloat) params[1];
+         ctx->VertexProgram.Parameters[index + i][2] = (GLfloat) params[2];
+         ctx->VertexProgram.Parameters[index + i][3] = (GLfloat) params[3];
+         params += 4;
+      };
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameters4dvNV");
+      return;
+   }
+}
+
+
+/**
+ * Set a sequence of program parameter registers.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_ProgramParameters4fvNV(GLenum target, GLuint index,
+                             GLsizei num, const GLfloat *params)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
+      GLint i;
+      if (index + num > MAX_NV_VERTEX_PROGRAM_PARAMS) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glProgramParameters4fvNV");
+         return;
+      }
+      for (i = 0; i < num; i++) {
+         COPY_4V(ctx->VertexProgram.Parameters[index + i], params);
+         params += 4;
+      }
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameters4fvNV");
+      return;
+   }
+}
+
+
+
+/**
+ * Setup tracking of matrices into program parameter registers.
+ * \note Called from the GL API dispatcher.
+ */
+void GLAPIENTRY
+_mesa_TrackMatrixNV(GLenum target, GLuint address,
+                    GLenum matrix, GLenum transform)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+
+   if (target == GL_VERTEX_PROGRAM_NV && ctx->Extensions.NV_vertex_program) {
+      if (address & 0x3) {
+         /* addr must be multiple of four */
+         _mesa_error(ctx, GL_INVALID_VALUE, "glTrackMatrixNV(address)");
+         return;
+      }
+
+      switch (matrix) {
+         case GL_NONE:
+         case GL_MODELVIEW:
+         case GL_PROJECTION:
+         case GL_TEXTURE:
+         case GL_COLOR:
+         case GL_MODELVIEW_PROJECTION_NV:
+         case GL_MATRIX0_NV:
+         case GL_MATRIX1_NV:
+         case GL_MATRIX2_NV:
+         case GL_MATRIX3_NV:
+         case GL_MATRIX4_NV:
+         case GL_MATRIX5_NV:
+         case GL_MATRIX6_NV:
+         case GL_MATRIX7_NV:
+            /* OK, fallthrough */
+            break;
+         default:
+            _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(matrix)");
+            return;
+      }
+
+      switch (transform) {
+         case GL_IDENTITY_NV:
+         case GL_INVERSE_NV:
+         case GL_TRANSPOSE_NV:
+         case GL_INVERSE_TRANSPOSE_NV:
+            /* OK, fallthrough */
+            break;
+         default:
+            _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(transform)");
+            return;
+      }
+
+      ctx->VertexProgram.TrackMatrix[address / 4] = matrix;
+      ctx->VertexProgram.TrackMatrixTransform[address / 4] = transform;
+   }
+   else {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTrackMatrixNV(target)");
+      return;
+   }
+}
+
+
+void GLAPIENTRY
+_mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
+                                GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+   struct gl_program *prog;
+   struct gl_fragment_program *fragProg;
+   gl_constant_value *v;
+
+   GET_CURRENT_CONTEXT(ctx);
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
+
+   prog = _mesa_lookup_program(ctx, id);
+   if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glProgramNamedParameterNV");
+      return;
+   }
+
+   if (len <= 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glProgramNamedParameterNV(len)");
+      return;
+   }
+
+   fragProg = (struct gl_fragment_program *) prog;
+   v = _mesa_lookup_parameter_value(fragProg->Base.Parameters, len,
+                                    (char *) name);
+   if (v) {
+      v[0].f = x;
+      v[1].f = y;
+      v[2].f = z;
+      v[3].f = w;
+      return;
+   }
+
+   _mesa_error(ctx, GL_INVALID_VALUE, "glProgramNamedParameterNV(name)");
+}
+
+
+void GLAPIENTRY
+_mesa_ProgramNamedParameter4fvNV(GLuint id, GLsizei len, const GLubyte *name,
+                                 const float v[])
+{
+   _mesa_ProgramNamedParameter4fNV(id, len, name, v[0], v[1], v[2], v[3]);
+}
+
+
+void GLAPIENTRY
+_mesa_ProgramNamedParameter4dNV(GLuint id, GLsizei len, const GLubyte *name,
+                                GLdouble x, GLdouble y, GLdouble z, GLdouble w)
+{
+   _mesa_ProgramNamedParameter4fNV(id, len, name, (GLfloat)x, (GLfloat)y, 
+                                   (GLfloat)z, (GLfloat)w);
+}
+
+
+void GLAPIENTRY
+_mesa_ProgramNamedParameter4dvNV(GLuint id, GLsizei len, const GLubyte *name,
+                                 const double v[])
+{
+   _mesa_ProgramNamedParameter4fNV(id, len, name,
+                                   (GLfloat)v[0], (GLfloat)v[1],
+                                   (GLfloat)v[2], (GLfloat)v[3]);
+}
+
+
+void GLAPIENTRY
+_mesa_GetProgramNamedParameterfvNV(GLuint id, GLsizei len, const GLubyte *name,
+                                   GLfloat *params)
+{
+   struct gl_program *prog;
+   struct gl_fragment_program *fragProg;
+   const gl_constant_value *v;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   ASSERT_OUTSIDE_BEGIN_END(ctx);
+
+   prog = _mesa_lookup_program(ctx, id);
+   if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetProgramNamedParameterNV");
+      return;
+   }
+
+   if (len <= 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramNamedParameterNV");
+      return;
+   }
+
+   fragProg = (struct gl_fragment_program *) prog;
+   v = _mesa_lookup_parameter_value(fragProg->Base.Parameters,
+                                    len, (char *) name);
+   if (v) {
+      params[0] = v[0].f;
+      params[1] = v[1].f;
+      params[2] = v[2].f;
+      params[3] = v[3].f;
+      return;
+   }
+
+   _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramNamedParameterNV");
+}
+
+
+void GLAPIENTRY
+_mesa_GetProgramNamedParameterdvNV(GLuint id, GLsizei len, const GLubyte *name,
+                                   GLdouble *params)
+{
+   GLfloat floatParams[4];
+   _mesa_GetProgramNamedParameterfvNV(id, len, name, floatParams);
+   COPY_4V(params, floatParams);
+}
diff --git a/mesalib/src/mesa/main/texcompress_rgtc_tmp.h b/mesalib/src/mesa/main/texcompress_rgtc_tmp.h
index c8bf082a1..48bbd374e 100644
--- a/mesalib/src/mesa/main/texcompress_rgtc_tmp.h
+++ b/mesalib/src/mesa/main/texcompress_rgtc_tmp.h
@@ -181,7 +181,7 @@ static void TAG(encode_rgtc_chan)(TYPE *blkaddr, TYPE srccolors[4][4],
       fprintf(stderr, "%d ", alphaenc1[i]);
    }
    fprintf(stderr, "cutVals ");
-   for (i = 0; i < 8; i++) {
+   for (i = 0; i < 7; i++) {
       fprintf(stderr, "%d ", acutValues[i]);
    }
    fprintf(stderr, "srcVals ");
diff --git a/mesalib/src/mesa/main/texobj.c b/mesalib/src/mesa/main/texobj.c
index 3021716a0..078a43ab1 100644
--- a/mesalib/src/mesa/main/texobj.c
+++ b/mesalib/src/mesa/main/texobj.c
@@ -842,7 +842,7 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
       struct gl_texture_object *texObj;
       GLuint name = first + i;
       GLenum target = 0;
-      texObj = (*ctx->Driver.NewTextureObject)( ctx, name, target);
+      texObj = ctx->Driver.NewTextureObject(ctx, name, target);
       if (!texObj) {
          _glthread_UNLOCK_MUTEX(ctx->Shared->Mutex);
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
@@ -1066,7 +1066,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
       }
       else {
          /* if this is a new texture id, allocate a texture object now */
-         newTexObj = (*ctx->Driver.NewTextureObject)(ctx, texName, target);
+         newTexObj = ctx->Driver.NewTextureObject(ctx, texName, target);
          if (!newTexObj) {
             _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBindTexture");
             return;
@@ -1108,7 +1108,7 @@ _mesa_BindTexture( GLenum target, GLuint texName )
 
    /* Pass BindTexture call to device driver */
    if (ctx->Driver.BindTexture)
-      (*ctx->Driver.BindTexture)( ctx, target, newTexObj );
+      ctx->Driver.BindTexture(ctx, target, newTexObj);
 }
 
 
diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c
index 134f15346..bbbb306b2 100644
--- a/mesalib/src/mesa/main/texparam.c
+++ b/mesalib/src/mesa/main/texparam.c
@@ -994,28 +994,21 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
             *params = 0;
          break;
       case GL_TEXTURE_DEPTH_SIZE_ARB:
-         if (ctx->Extensions.ARB_depth_texture)
-            *params = _mesa_get_format_bits(texFormat, pname);
-         else
+         if (!ctx->Extensions.ARB_depth_texture)
             goto invalid_pname;
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_STENCIL_SIZE_EXT:
-         if (ctx->Extensions.EXT_packed_depth_stencil ||
-             ctx->Extensions.ARB_framebuffer_object) {
-            *params = _mesa_get_format_bits(texFormat, pname);
-         }
-         else {
+         if (!ctx->Extensions.EXT_packed_depth_stencil &&
+             !ctx->Extensions.ARB_framebuffer_object)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, pname);
          break;
       case GL_TEXTURE_SHARED_SIZE:
-         if (ctx->VersionMajor >= 3 ||
-             ctx->Extensions.EXT_texture_shared_exponent) {
-            *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
-         }
-         else {
+         if (ctx->VersionMajor < 3 &&
+             !ctx->Extensions.EXT_texture_shared_exponent)
             goto invalid_pname;
-         }
+         *params = texFormat == MESA_FORMAT_RGB9_E5_FLOAT ? 5 : 0;
          break;
 
       /* GL_ARB_texture_compression */
@@ -1036,67 +1029,46 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
 
       /* GL_ARB_texture_float */
       case GL_TEXTURE_RED_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_RED_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_GREEN_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_GREEN_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_BLUE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_BLUE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_ALPHA_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_ALPHA_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_LUMINANCE_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_LUMINANCE_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_INTENSITY_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_INTENSITY_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
       case GL_TEXTURE_DEPTH_TYPE_ARB:
-         if (ctx->Extensions.ARB_texture_float) {
-            *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
-               _mesa_get_format_datatype(texFormat) : GL_NONE;
-         }
-         else {
+         if (!ctx->Extensions.ARB_texture_float)
             goto invalid_pname;
-         }
+         *params = _mesa_get_format_bits(texFormat, GL_TEXTURE_DEPTH_SIZE) ?
+            _mesa_get_format_datatype(texFormat) : GL_NONE;
          break;
 
       default:
@@ -1118,7 +1090,6 @@ void GLAPIENTRY
 _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
@@ -1144,17 +1115,15 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = ENUM_TO_FLOAT(obj->Sampler.WrapR);
          break;
       case GL_TEXTURE_BORDER_COLOR:
-         if(ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
+         if (ctx->NewState & (_NEW_BUFFERS | _NEW_FRAG_CLAMP))
             _mesa_update_state_locked(ctx);
-         if(ctx->Color._ClampFragmentColor)
-         {
+         if (ctx->Color._ClampFragmentColor) {
             params[0] = CLAMP(obj->Sampler.BorderColor.f[0], 0.0F, 1.0F);
             params[1] = CLAMP(obj->Sampler.BorderColor.f[1], 0.0F, 1.0F);
             params[2] = CLAMP(obj->Sampler.BorderColor.f[2], 0.0F, 1.0F);
             params[3] = CLAMP(obj->Sampler.BorderColor.f[3], 0.0F, 1.0F);
          }
-         else
-         {
+         else {
             params[0] = obj->Sampler.BorderColor.f[0];
             params[1] = obj->Sampler.BorderColor.f[1];
             params[2] = obj->Sampler.BorderColor.f[2];
@@ -1162,14 +1131,8 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          }
          break;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = ENUM_TO_FLOAT(resident);
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1.0F;
          break;
       case GL_TEXTURE_PRIORITY:
          *params = obj->Priority;
@@ -1187,49 +1150,37 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
          *params = (GLfloat) obj->MaxLevel;
          break;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = obj->Sampler.MaxAnisotropy;
-         }
-	 else
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = obj->Sampler.CompareFailValue;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = obj->Sampler.CompareFailValue;
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLfloat) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLfloat) obj->Sampler.CompareFunc;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLfloat) obj->Sampler.DepthMode;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = obj->Sampler.LodBias;
-         }
-	 else 
-	    error = GL_TRUE;
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1244,46 +1195,40 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = (GLfloat) obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
+         if (!ctx->Extensions.EXT_texture_swizzle) {
+            goto invalid_pname;
+         }
+         else {
             GLuint comp;
             for (comp = 0; comp < 4; comp++) {
                params[comp] = (GLfloat) obj->Swizzle[comp];
             }
          }
-         else {
-            error = GL_TRUE;
-         }
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLfloat) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLfloat) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-	 error = GL_TRUE;
-	 break;
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)",
-		  pname);
+   /* no error if we get here */
+   _mesa_unlock_texture(ctx, obj);
+   return;
 
+invalid_pname:
    _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
 }
 
 
@@ -1291,13 +1236,12 @@ void GLAPIENTRY
 _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
 {
    struct gl_texture_object *obj;
-   GLboolean error = GL_FALSE;
    GET_CURRENT_CONTEXT(ctx);
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
-    obj = get_texobj(ctx, target, GL_TRUE);
-    if (!obj)
-       return;
+   obj = get_texobj(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
 
    _mesa_lock_texture(ctx, obj);
    switch (pname) {
@@ -1330,14 +1274,8 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          }
          break;;
       case GL_TEXTURE_RESIDENT:
-         {
-            GLboolean resident;
-            if (ctx->Driver.IsTextureResident)
-               resident = ctx->Driver.IsTextureResident(ctx, obj);
-            else
-               resident = GL_TRUE;
-            *params = (GLint) resident;
-         }
+         *params = ctx->Driver.IsTextureResident ?
+            ctx->Driver.IsTextureResident(ctx, obj) : 1;
          break;;
       case GL_TEXTURE_PRIORITY:
          *params = FLOAT_TO_INT(obj->Priority);
@@ -1355,55 +1293,37 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
          *params = obj->MaxLevel;
          break;;
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-         if (ctx->Extensions.EXT_texture_filter_anisotropic) {
-            *params = (GLint) obj->Sampler.MaxAnisotropy;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_filter_anisotropic)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.MaxAnisotropy;
          break;
       case GL_TEXTURE_COMPARE_FAIL_VALUE_ARB:
-         if (ctx->Extensions.ARB_shadow_ambient) {
-            *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow_ambient)
+            goto invalid_pname;
+         *params = (GLint) FLOAT_TO_INT(obj->Sampler.CompareFailValue);
          break;
       case GL_GENERATE_MIPMAP_SGIS:
 	 *params = (GLint) obj->GenerateMipmap;
          break;
       case GL_TEXTURE_COMPARE_MODE_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareMode;
          break;
       case GL_TEXTURE_COMPARE_FUNC_ARB:
-         if (ctx->Extensions.ARB_shadow) {
-            *params = (GLint) obj->Sampler.CompareFunc;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_shadow)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CompareFunc;
          break;
       case GL_DEPTH_TEXTURE_MODE_ARB:
-         if (ctx->Extensions.ARB_depth_texture) {
-            *params = (GLint) obj->Sampler.DepthMode;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.ARB_depth_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.DepthMode;
          break;
       case GL_TEXTURE_LOD_BIAS:
-         if (ctx->Extensions.EXT_texture_lod_bias) {
-            *params = (GLint) obj->Sampler.LodBias;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_lod_bias)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.LodBias;
          break;
 #if FEATURE_OES_draw_texture
       case GL_TEXTURE_CROP_RECT_OES:
@@ -1417,42 +1337,34 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
       case GL_TEXTURE_SWIZZLE_G_EXT:
       case GL_TEXTURE_SWIZZLE_B_EXT:
       case GL_TEXTURE_SWIZZLE_A_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            GLuint comp = pname - GL_TEXTURE_SWIZZLE_R_EXT;
-            *params = obj->Swizzle[comp];
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         *params = obj->Swizzle[pname - GL_TEXTURE_SWIZZLE_R_EXT];
          break;
 
       case GL_TEXTURE_SWIZZLE_RGBA_EXT:
-         if (ctx->Extensions.EXT_texture_swizzle) {
-            COPY_4V(params, obj->Swizzle);
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.EXT_texture_swizzle)
+            goto invalid_pname;
+         COPY_4V(params, obj->Swizzle);
          break;
 
       case GL_TEXTURE_CUBE_MAP_SEAMLESS:
-         if (ctx->Extensions.AMD_seamless_cubemap_per_texture) {
-            *params = (GLint) obj->Sampler.CubeMapSeamless;
-         }
-         else {
-            error = GL_TRUE;
-         }
+         if (!ctx->Extensions.AMD_seamless_cubemap_per_texture)
+            goto invalid_pname;
+         *params = (GLint) obj->Sampler.CubeMapSeamless;
          break;
 
       default:
-         ; /* silence warnings */
+         goto invalid_pname;
    }
 
-   if (error)
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)",
-		  pname);
+   /* no error if we get here */
+   _mesa_unlock_texture(ctx, obj);
+   return;
 
+invalid_pname:
    _mesa_unlock_texture(ctx, obj);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
 }
 
 
@@ -1465,6 +1377,8 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
@@ -1485,6 +1399,8 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)
    ASSERT_OUTSIDE_BEGIN_END(ctx);
 
    texObj = get_texobj(ctx, target, GL_TRUE);
+   if (!texObj)
+      return;
    
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
diff --git a/mesalib/src/mesa/main/uniforms.c b/mesalib/src/mesa/main/uniforms.c
index dd069a3a4..1329af4cd 100644
--- a/mesalib/src/mesa/main/uniforms.c
+++ b/mesalib/src/mesa/main/uniforms.c
@@ -429,7 +429,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = prog->Parameters->ParameterValues[base][j];
+                  params[k++] = prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -442,7 +442,7 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
                   params[k++] = (GLdouble)
-                     prog->Parameters->ParameterValues[base][j];
+                     prog->Parameters->ParameterValues[base][j].f;
                }
             }
          }
@@ -454,8 +454,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLint)
-                     prog->Parameters->ParameterValues[base][j];
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].i;
                }
             }
          }
@@ -467,8 +468,9 @@ get_uniform(struct gl_context *ctx, GLuint program, GLint location,
             for (i = 0; i < rows; i++) {
                const int base = paramPos + offset + i;
                for (j = 0; j < cols; j++ ) {
-                  params[k++] = (GLuint)
-                     prog->Parameters->ParameterValues[base][j];
+                  params[k++] = ctx->Const.GLSLVersion <= 120 ? 
+                     (GLuint) prog->Parameters->ParameterValues[base][j].f : 
+                     prog->Parameters->ParameterValues[base][j].u;
                }
             }
          }
@@ -670,7 +672,7 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
       /* loop over number of samplers to change */
       for (i = 0; i < count; i++) {
          GLuint sampler = (GLuint)
-            program->Parameters->ParameterValues[index + offset + i][0];
+            program->Parameters->ParameterValues[index+offset + i][0].f;
          GLuint texUnit = ((GLuint *) values)[i];
 
          /* check that the sampler (tex unit index) is legal */
@@ -735,42 +737,52 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
 
       /* loop over number of array elements */
       for (k = 0; k < count; k++) {
-         GLfloat *uniformVal;
+         gl_constant_value *uniformVal;
 
          if (offset + k >= slots) {
             /* Extra array data is ignored */
             break;
          }
 
-         /* uniformVal (the destination) is always float[4] */
+         /* uniformVal (the destination) is always gl_constant_value[4] */
          uniformVal = program->Parameters->ParameterValues[index + offset + k];
 
          if (basicType == GL_INT) {
-            /* convert user's ints to floats */
             const GLint *iValues = ((const GLint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat) iValues[i];
+               else
+                  uniformVal[i].i = iValues[i];
             }
          }
          else if (basicType == GL_UNSIGNED_INT) {
-            /* convert user's uints to floats */
             const GLuint *iValues = ((const GLuint *) values) + k * elems;
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = (GLfloat) iValues[i];
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = (GLfloat)(GLuint) iValues[i];
+               else
+                  uniformVal[i].u = iValues[i];
             }
          }
          else {
             const GLfloat *fValues = ((const GLfloat *) values) + k * elems;
             assert(basicType == GL_FLOAT);
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = fValues[i];
+               uniformVal[i].f = fValues[i];
             }
          }
 
-         /* if the uniform is bool-valued, convert to 1.0 or 0.0 */
+         /* if the uniform is bool-valued, convert to 1 or 0 */
          if (isUniformBool) {
             for (i = 0; i < elems; i++) {
-               uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f;
+               if (basicType == GL_FLOAT)
+                  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
+               else
+                  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
+               
+               if (ctx->Const.GLSLVersion <= 120)
+                  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
             }
          }
       }
@@ -936,7 +948,7 @@ set_program_uniform_matrix(struct gl_context *ctx, struct gl_program *program,
             /* Ignore writes beyond the end of (the used part of) an array */
             return;
          }
-         v = program->Parameters->ParameterValues[index + offset];
+         v = (GLfloat *) program->Parameters->ParameterValues[index + offset];
          for (row = 0; row < rows; row++) {
             if (transpose) {
                v[row] = values[src + row * cols + col];
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 382cda0c7..debadb9a3 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -585,7 +585,7 @@ ir_to_mesa_visitor::src_reg_for_float(float val)
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
 
    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-					  &val, 1, &src.swizzle);
+					  (const gl_constant_value *)&val, 1, &src.swizzle);
 
    return src;
 }
@@ -1795,7 +1795,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
 	 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
 	 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						values,
+						(gl_constant_value *) values,
 						ir->type->vector_elements,
 						&src.swizzle);
 	 emit(ir, OPCODE_MOV, mat_column, src);
@@ -1833,7 +1833,7 @@ ir_to_mesa_visitor::visit(ir_constant *ir)
 
    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
-						   values,
+						   (gl_constant_value *) values,
 						   ir->type->vector_elements,
 						   &this->result.swizzle);
 }
@@ -2533,7 +2533,7 @@ add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
 	  */
 	 if (file == PROGRAM_SAMPLER) {
 	    for (unsigned int j = 0; j < size / 4; j++)
-	       prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
+	       prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
 	 }
 
 	 /* The location chosen in the Parameters list here (returned
diff --git a/mesalib/src/mesa/program/nvfragparse.c b/mesalib/src/mesa/program/nvfragparse.c
index 04538e071..ce72c610d 100644
--- a/mesalib/src/mesa/program/nvfragparse.c
+++ b/mesalib/src/mesa/program/nvfragparse.c
@@ -1,1588 +1,1595 @@
-/*
- * Mesa 3-D graphics library
- * Version:  6.5
- *
- * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file nvfragparse.c
- * NVIDIA fragment program parser.
- * \author Brian Paul
- */
-
-/*
- * Regarding GL_NV_fragment_program:
- *
- * Portions of this software may use or implement intellectual
- * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
- * any and all warranties with respect to such intellectual property,
- * including any use thereof or modifications thereto.
- */
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "program.h"
-#include "prog_parameter.h"
-#include "prog_print.h"
-#include "prog_instruction.h"
-#include "nvfragparse.h"
-
-
-#define INPUT_1V     1
-#define INPUT_2V     2
-#define INPUT_3V     3
-#define INPUT_1S     4
-#define INPUT_2S     5
-#define INPUT_CC     6
-#define INPUT_1V_T   7  /* one source vector, plus textureId */
-#define INPUT_3V_T   8  /* one source vector, plus textureId */
-#define INPUT_NONE   9
-#define INPUT_1V_S  10  /* a string and a vector register */
-#define OUTPUT_V    20
-#define OUTPUT_S    21
-#define OUTPUT_NONE 22
-
-/* IRIX defines some of these */
-#undef _R
-#undef _H
-#undef _X
-#undef _C
-#undef _S
-
-/* Optional suffixes */
-#define _R  FLOAT32  /* float */
-#define _H  FLOAT16  /* half-float */
-#define _X  FIXED12  /* fixed */
-#define _C  0x08     /* set cond codes */
-#define _S  0x10     /* saturate, clamp result to [0,1] */
-
-struct instruction_pattern {
-   const char *name;
-   enum prog_opcode opcode;
-   GLuint inputs;
-   GLuint outputs;
-   GLuint suffixes;
-};
-
-static const struct instruction_pattern Instructions[] = {
-   { "ADD", OPCODE_ADD, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "COS", OPCODE_COS, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "DDX", OPCODE_DDX, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
-   { "DDY", OPCODE_DDY, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
-   { "DP3", OPCODE_DP3, INPUT_2V, OUTPUT_S, _R | _H | _X | _C | _S },
-   { "DP4", OPCODE_DP4, INPUT_2V, OUTPUT_S, _R | _H | _X | _C | _S },
-   { "DST", OPCODE_DP4, INPUT_2V, OUTPUT_V, _R | _H |      _C | _S },
-   { "EX2", OPCODE_DP4, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "FLR", OPCODE_FLR, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "FRC", OPCODE_FRC, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "KIL", OPCODE_KIL_NV, INPUT_CC, OUTPUT_NONE, 0                },
-   { "LG2", OPCODE_LG2, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "LIT", OPCODE_LIT, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
-   { "LRP", OPCODE_LRP, INPUT_3V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "MAD", OPCODE_MAD, INPUT_3V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "MAX", OPCODE_MAX, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "MIN", OPCODE_MIN, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "MOV", OPCODE_MOV, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "MUL", OPCODE_MUL, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "PK2H",  OPCODE_PK2H,  INPUT_1V, OUTPUT_S, 0                  },
-   { "PK2US", OPCODE_PK2US, INPUT_1V, OUTPUT_S, 0                  },
-   { "PK4B",  OPCODE_PK4B,  INPUT_1V, OUTPUT_S, 0                  },
-   { "PK4UB", OPCODE_PK4UB, INPUT_1V, OUTPUT_S, 0                  },
-   { "POW", OPCODE_POW, INPUT_2S, OUTPUT_S, _R | _H |      _C | _S },
-   { "RCP", OPCODE_RCP, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "RFL", OPCODE_RFL, INPUT_2V, OUTPUT_V, _R | _H |      _C | _S },
-   { "RSQ", OPCODE_RSQ, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "SEQ", OPCODE_SEQ, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SFL", OPCODE_SFL, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SGE", OPCODE_SGE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SGT", OPCODE_SGT, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SIN", OPCODE_SIN, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
-   { "SLE", OPCODE_SLE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SLT", OPCODE_SLT, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SNE", OPCODE_SNE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "STR", OPCODE_STR, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "SUB", OPCODE_SUB, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
-   { "TEX", OPCODE_TEX, INPUT_1V_T, OUTPUT_V,              _C | _S },
-   { "TXD", OPCODE_TXD, INPUT_3V_T, OUTPUT_V,              _C | _S },
-   { "TXP", OPCODE_TXP_NV, INPUT_1V_T, OUTPUT_V,           _C | _S },
-   { "UP2H",  OPCODE_UP2H,  INPUT_1S, OUTPUT_V,            _C | _S },
-   { "UP2US", OPCODE_UP2US, INPUT_1S, OUTPUT_V,            _C | _S },
-   { "UP4B",  OPCODE_UP4B,  INPUT_1S, OUTPUT_V,            _C | _S },
-   { "UP4UB", OPCODE_UP4UB, INPUT_1S, OUTPUT_V,            _C | _S },
-   { "X2D", OPCODE_X2D, INPUT_3V, OUTPUT_V, _R | _H |      _C | _S },
-   { "PRINT", OPCODE_PRINT, INPUT_1V_S, OUTPUT_NONE, 0               },
-   { NULL, (enum prog_opcode) -1, 0, 0, 0 }
-};
-
-
-/*
- * Information needed or computed during parsing.
- * Remember, we can't modify the target program object until we've
- * _successfully_ parsed the program text.
- */
-struct parse_state {
-   struct gl_context *ctx;
-   const GLubyte *start;              /* start of program string */
-   const GLubyte *pos;                /* current position */
-   const GLubyte *curLine;
-   struct gl_fragment_program *program;  /* current program */
-
-   struct gl_program_parameter_list *parameters;
-
-   GLuint numInst;                    /* number of instructions parsed */
-   GLuint inputsRead;                 /* bitmask of input registers used */
-   GLuint outputsWritten;             /* bitmask of 1 << FRAG_OUTPUT_* bits */
-   GLuint texturesUsed[MAX_TEXTURE_IMAGE_UNITS];
-};
-
-
-
-/*
- * Called whenever we find an error during parsing.
- */
-static void
-record_error(struct parse_state *parseState, const char *msg, int lineNo)
-{
-#ifdef DEBUG
-   GLint line, column;
-   const GLubyte *lineStr;
-   lineStr = _mesa_find_line_column(parseState->start,
-                                    parseState->pos, &line, &column);
-   _mesa_debug(parseState->ctx,
-               "nvfragparse.c(%d): line %d, column %d:%s (%s)\n",
-               lineNo, line, column, (char *) lineStr, msg);
-   free((void *) lineStr);
-#else
-   (void) lineNo;
-#endif
-
-   /* Check that no error was already recorded.  Only record the first one. */
-   if (parseState->ctx->Program.ErrorString[0] == 0) {
-      _mesa_set_program_error(parseState->ctx,
-                              parseState->pos - parseState->start,
-                              msg);
-   }
-}
-
-
-#define RETURN_ERROR							\
-do {									\
-   record_error(parseState, "Unexpected end of input.", __LINE__);	\
-   return GL_FALSE;							\
-} while(0)
-
-#define RETURN_ERROR1(msg)						\
-do {									\
-   record_error(parseState, msg, __LINE__);				\
-   return GL_FALSE;							\
-} while(0)
-
-#define RETURN_ERROR2(msg1, msg2)					\
-do {									\
-   char err[1000];							\
-   sprintf(err, "%s %s", msg1, msg2);				\
-   record_error(parseState, err, __LINE__);				\
-   return GL_FALSE;							\
-} while(0)
-
-
-
-
-/*
- * Search a list of instruction structures for a match.
- */
-static struct instruction_pattern
-MatchInstruction(const GLubyte *token)
-{
-   const struct instruction_pattern *inst;
-   struct instruction_pattern result;
-
-   result.name = NULL;
-   result.opcode = MAX_OPCODE; /* i.e. invalid instruction */
-   result.inputs = 0;
-   result.outputs = 0;
-   result.suffixes = 0;
-
-   for (inst = Instructions; inst->name; inst++) {
-      if (strncmp((const char *) token, inst->name, 3) == 0) {
-         /* matched! */
-         int i = 3;
-         result = *inst;
-         result.suffixes = 0;
-         /* look at suffix */
-         if (token[i] == 'R') {
-            result.suffixes |= _R;
-            i++;
-         }
-         else if (token[i] == 'H') {
-            result.suffixes |= _H;
-            i++;
-         }
-         else if (token[i] == 'X') {
-            result.suffixes |= _X;
-            i++;
-         }
-         if (token[i] == 'C') {
-            result.suffixes |= _C;
-            i++;
-         }
-         if (token[i] == '_' && token[i+1] == 'S' &&
-             token[i+2] == 'A' && token[i+3] == 'T') {
-            result.suffixes |= _S;
-         }
-         return result;
-      }
-   }
-
-   return result;
-}
-
-
-
-
-/**********************************************************************/
-
-
-static GLboolean IsLetter(GLubyte b)
-{
-   return (b >= 'a' && b <= 'z') ||
-          (b >= 'A' && b <= 'Z') ||
-          (b == '_') ||
-          (b == '$');
-}
-
-
-static GLboolean IsDigit(GLubyte b)
-{
-   return b >= '0' && b <= '9';
-}
-
-
-static GLboolean IsWhitespace(GLubyte b)
-{
-   return b == ' ' || b == '\t' || b == '\n' || b == '\r';
-}
-
-
-/**
- * Starting at 'str' find the next token.  A token can be an integer,
- * an identifier or punctuation symbol.
- * \return <= 0 we found an error, else, return number of characters parsed.
- */
-static GLint
-GetToken(struct parse_state *parseState, GLubyte *token)
-{
-   const GLubyte *str = parseState->pos;
-   GLint i = 0, j = 0;
-
-   token[0] = 0;
-
-   /* skip whitespace and comments */
-   while (str[i] && (IsWhitespace(str[i]) || str[i] == '#')) {
-      if (str[i] == '#') {
-         /* skip comment */
-         while (str[i] && (str[i] != '\n' && str[i] != '\r')) {
-            i++;
-         }
-         if (str[i] == '\n' || str[i] == '\r')
-            parseState->curLine = str + i + 1;
-      }
-      else {
-         /* skip whitespace */
-         if (str[i] == '\n' || str[i] == '\r')
-            parseState->curLine = str + i + 1;
-         i++;
-      }
-   }
-
-   if (str[i] == 0)
-      return -i;
-
-   /* try matching an integer */
-   while (str[i] && IsDigit(str[i])) {
-      token[j++] = str[i++];
-   }
-   if (j > 0 || !str[i]) {
-      token[j] = 0;
-      return i;
-   }
-
-   /* try matching an identifier */
-   if (IsLetter(str[i])) {
-      while (str[i] && (IsLetter(str[i]) || IsDigit(str[i]))) {
-         token[j++] = str[i++];
-      }
-      token[j] = 0;
-      return i;
-   }
-
-   /* punctuation character */
-   if (str[i]) {
-      token[0] = str[i++];
-      token[1] = 0;
-      return i;
-   }
-
-   /* end of input */
-   token[0] = 0;
-   return i;
-}
-
-
-/**
- * Get next token from input stream and increment stream pointer past token.
- */
-static GLboolean
-Parse_Token(struct parse_state *parseState, GLubyte *token)
-{
-   GLint i;
-   i = GetToken(parseState, token);
-   if (i <= 0) {
-      parseState->pos += (-i);
-      return GL_FALSE;
-   }
-   parseState->pos += i;
-   return GL_TRUE;
-}
-
-
-/**
- * Get next token from input stream but don't increment stream pointer.
- */
-static GLboolean
-Peek_Token(struct parse_state *parseState, GLubyte *token)
-{
-   GLint i, len;
-   i = GetToken(parseState, token);
-   if (i <= 0) {
-      parseState->pos += (-i);
-      return GL_FALSE;
-   }
-   len = (GLint) strlen((const char *) token);
-   parseState->pos += (i - len);
-   return GL_TRUE;
-}
-
-
-/**********************************************************************/
-
-static const char *InputRegisters[MAX_NV_FRAGMENT_PROGRAM_INPUTS + 1] = {
-   "WPOS", "COL0", "COL1", "FOGC",
-   "TEX0", "TEX1", "TEX2", "TEX3", "TEX4", "TEX5", "TEX6", "TEX7", NULL
-};
-
-
-
-/**********************************************************************/
-
-/**
- * Try to match 'pattern' as the next token after any whitespace/comments.
- */
-static GLboolean
-Parse_String(struct parse_state *parseState, const char *pattern)
-{
-   const GLubyte *m;
-   GLint i;
-
-   /* skip whitespace and comments */
-   while (IsWhitespace(*parseState->pos) || *parseState->pos == '#') {
-      if (*parseState->pos == '#') {
-         while (*parseState->pos && (*parseState->pos != '\n' && *parseState->pos != '\r')) {
-            parseState->pos += 1;
-         }
-         if (*parseState->pos == '\n' || *parseState->pos == '\r')
-            parseState->curLine = parseState->pos + 1;
-      }
-      else {
-         /* skip whitespace */
-         if (*parseState->pos == '\n' || *parseState->pos == '\r')
-            parseState->curLine = parseState->pos + 1;
-         parseState->pos += 1;
-      }
-   }
-
-   /* Try to match the pattern */
-   m = parseState->pos;
-   for (i = 0; pattern[i]; i++) {
-      if (*m != (GLubyte) pattern[i])
-         return GL_FALSE;
-      m += 1;
-   }
-   parseState->pos = m;
-
-   return GL_TRUE; /* success */
-}
-
-
-static GLboolean
-Parse_Identifier(struct parse_state *parseState, GLubyte *ident)
-{
-   if (!Parse_Token(parseState, ident))
-      RETURN_ERROR;
-   if (IsLetter(ident[0]))
-      return GL_TRUE;
-   else
-      RETURN_ERROR1("Expected an identfier");
-}
-
-
-/**
- * Parse a floating point constant, or a defined symbol name.
- * [+/-]N[.N[eN]]
- * Output:  number[0 .. 3] will get the value.
- */
-static GLboolean
-Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
-{
-   char *end = NULL;
-
-   *number = (GLfloat) _mesa_strtof((const char *) parseState->pos, &end);
-
-   if (end && end > (char *) parseState->pos) {
-      /* got a number */
-      parseState->pos = (GLubyte *) end;
-      number[1] = *number;
-      number[2] = *number;
-      number[3] = *number;
-      return GL_TRUE;
-   }
-   else {
-      /* should be an identifier */
-      GLubyte ident[100];
-      const GLfloat *constant;
-      if (!Parse_Identifier(parseState, ident))
-         RETURN_ERROR1("Expected an identifier");
-      constant = _mesa_lookup_parameter_value(parseState->parameters,
-                                              -1, (const char *) ident);
-      /* XXX Check that it's a constant and not a parameter */
-      if (!constant) {
-         RETURN_ERROR1("Undefined symbol");
-      }
-      else {
-         COPY_4V(number, constant);
-         return GL_TRUE;
-      }
-   }
-}
-
-
-
-/**
- * Parse a vector constant, one of:
- *   { float }
- *   { float, float }
- *   { float, float, float }
- *   { float, float, float, float }
- */
-static GLboolean
-Parse_VectorConstant(struct parse_state *parseState, GLfloat *vec)
-{
-   /* "{" was already consumed */
-
-   ASSIGN_4V(vec, 0.0, 0.0, 0.0, 1.0);
-
-   if (!Parse_ScalarConstant(parseState, vec+0))  /* X */
-      return GL_FALSE;
-
-   if (Parse_String(parseState, "}")) {
-      return GL_TRUE;
-   }
-
-   if (!Parse_String(parseState, ","))
-      RETURN_ERROR1("Expected comma in vector constant");
-
-   if (!Parse_ScalarConstant(parseState, vec+1))  /* Y */
-      return GL_FALSE;
-
-   if (Parse_String(parseState, "}")) {
-      return GL_TRUE;
-   }
-
-   if (!Parse_String(parseState, ","))
-      RETURN_ERROR1("Expected comma in vector constant");
-
-   if (!Parse_ScalarConstant(parseState, vec+2))  /* Z */
-      return GL_FALSE;
-
-   if (Parse_String(parseState, "}")) {
-      return GL_TRUE;
-   }
-
-   if (!Parse_String(parseState, ","))
-      RETURN_ERROR1("Expected comma in vector constant");
-
-   if (!Parse_ScalarConstant(parseState, vec+3))  /* W */
-      return GL_FALSE;
-
-   if (!Parse_String(parseState, "}"))
-      RETURN_ERROR1("Expected closing brace in vector constant");
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse <number>, <varname> or {a, b, c, d}.
- * Return number of values in the vector or scalar, or zero if parse error.
- */
-static GLuint
-Parse_VectorOrScalarConstant(struct parse_state *parseState, GLfloat *vec)
-{
-   if (Parse_String(parseState, "{")) {
-      return Parse_VectorConstant(parseState, vec);
-   }
-   else {
-      GLboolean b = Parse_ScalarConstant(parseState, vec);
-      if (b) {
-         vec[1] = vec[2] = vec[3] = vec[0];
-      }
-      return b;
-   }
-}
-
-
-/**
- * Parse a texture image source:
- *    [TEX0 | TEX1 | .. | TEX15] , [1D | 2D | 3D | CUBE | RECT]
- */
-static GLboolean
-Parse_TextureImageId(struct parse_state *parseState,
-                     GLubyte *texUnit, GLubyte *texTargetBit)
-{
-   GLubyte imageSrc[100];
-   GLint unit;
-
-   if (!Parse_Token(parseState, imageSrc))
-      RETURN_ERROR;
-   
-   if (imageSrc[0] != 'T' ||
-       imageSrc[1] != 'E' ||
-       imageSrc[2] != 'X') {
-      RETURN_ERROR1("Expected TEX# source");
-   }
-   unit = atoi((const char *) imageSrc + 3);
-   if ((unit < 0 || unit > MAX_TEXTURE_IMAGE_UNITS) ||
-       (unit == 0 && (imageSrc[3] != '0' || imageSrc[4] != 0))) {
-      RETURN_ERROR1("Invalied TEX# source index");
-   }
-   *texUnit = unit;
-
-   if (!Parse_String(parseState, ","))
-      RETURN_ERROR1("Expected ,");
-
-   if (Parse_String(parseState, "1D")) {
-      *texTargetBit = TEXTURE_1D_BIT;
-   }
-   else if (Parse_String(parseState, "2D")) {
-      *texTargetBit = TEXTURE_2D_BIT;
-   }
-   else if (Parse_String(parseState, "3D")) {
-      *texTargetBit = TEXTURE_3D_BIT;
-   }
-   else if (Parse_String(parseState, "CUBE")) {
-      *texTargetBit = TEXTURE_CUBE_BIT;
-   }
-   else if (Parse_String(parseState, "RECT")) {
-      *texTargetBit = TEXTURE_RECT_BIT;
-   }
-   else {
-      RETURN_ERROR1("Invalid texture target token");
-   }
-
-   /* update record of referenced texture units */
-   parseState->texturesUsed[*texUnit] |= *texTargetBit;
-   if (_mesa_bitcount(parseState->texturesUsed[*texUnit]) > 1) {
-      RETURN_ERROR1("Only one texture target can be used per texture unit.");
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse a scalar suffix like .x, .y, .z or .w or parse a swizzle suffix
- * like .wxyz, .xxyy, etc and return the swizzle indexes.
- */
-static GLboolean
-Parse_SwizzleSuffix(const GLubyte *token, GLuint swizzle[4])
-{
-   if (token[1] == 0) {
-      /* single letter swizzle (scalar) */
-      if (token[0] == 'x')
-         ASSIGN_4V(swizzle, 0, 0, 0, 0);
-      else if (token[0] == 'y')
-         ASSIGN_4V(swizzle, 1, 1, 1, 1);
-      else if (token[0] == 'z')
-         ASSIGN_4V(swizzle, 2, 2, 2, 2);
-      else if (token[0] == 'w')
-         ASSIGN_4V(swizzle, 3, 3, 3, 3);
-      else
-         return GL_FALSE;
-   }
-   else {
-      /* 4-component swizzle (vector) */
-      GLint k;
-      for (k = 0; k < 4 && token[k]; k++) {
-         if (token[k] == 'x')
-            swizzle[k] = 0;
-         else if (token[k] == 'y')
-            swizzle[k] = 1;
-         else if (token[k] == 'z')
-            swizzle[k] = 2;
-         else if (token[k] == 'w')
-            swizzle[k] = 3;
-         else
-            return GL_FALSE;
-      }
-      if (k != 4)
-         return GL_FALSE;
-   }
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_CondCodeMask(struct parse_state *parseState,
-                   struct prog_dst_register *dstReg)
-{
-   if (Parse_String(parseState, "EQ"))
-      dstReg->CondMask = COND_EQ;
-   else if (Parse_String(parseState, "GE"))
-      dstReg->CondMask = COND_GE;
-   else if (Parse_String(parseState, "GT"))
-      dstReg->CondMask = COND_GT;
-   else if (Parse_String(parseState, "LE"))
-      dstReg->CondMask = COND_LE;
-   else if (Parse_String(parseState, "LT"))
-      dstReg->CondMask = COND_LT;
-   else if (Parse_String(parseState, "NE"))
-      dstReg->CondMask = COND_NE;
-   else if (Parse_String(parseState, "TR"))
-      dstReg->CondMask = COND_TR;
-   else if (Parse_String(parseState, "FL"))
-      dstReg->CondMask = COND_FL;
-   else
-      RETURN_ERROR1("Invalid condition code mask");
-
-   /* look for optional .xyzw swizzle */
-   if (Parse_String(parseState, ".")) {
-      GLubyte token[100];
-      GLuint swz[4];
-
-      if (!Parse_Token(parseState, token))  /* get xyzw suffix */
-         RETURN_ERROR;
-
-      if (!Parse_SwizzleSuffix(token, swz))
-         RETURN_ERROR1("Invalid swizzle suffix");
-
-      dstReg->CondSwizzle = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse a temporary register: Rnn or Hnn
- */
-static GLboolean
-Parse_TempReg(struct parse_state *parseState, GLint *tempRegNum)
-{
-   GLubyte token[100];
-
-   /* Should be 'R##' or 'H##' */
-   if (!Parse_Token(parseState, token))
-      RETURN_ERROR;
-   if (token[0] != 'R' && token[0] != 'H')
-      RETURN_ERROR1("Expected R## or H##");
-
-   if (IsDigit(token[1])) {
-      GLint reg = atoi((const char *) (token + 1));
-      if (token[0] == 'H')
-         reg += 32;
-      if (reg >= MAX_NV_FRAGMENT_PROGRAM_TEMPS)
-         RETURN_ERROR1("Invalid temporary register name");
-      *tempRegNum = reg;
-   }
-   else {
-      RETURN_ERROR1("Invalid temporary register name");
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse a write-only dummy register: RC or HC.
- */
-static GLboolean
-Parse_DummyReg(struct parse_state *parseState, GLint *regNum)
-{
-   if (Parse_String(parseState, "RC")) {
-       *regNum = 0;
-   }
-   else if (Parse_String(parseState, "HC")) {
-       *regNum = 1;
-   }
-   else {
-      RETURN_ERROR1("Invalid write-only register name");
-   }
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse a program local parameter register "p[##]"
- */
-static GLboolean
-Parse_ProgramParamReg(struct parse_state *parseState, GLint *regNum)
-{
-   GLubyte token[100];
-
-   if (!Parse_String(parseState, "p["))
-      RETURN_ERROR1("Expected p[");
-
-   if (!Parse_Token(parseState, token))
-      RETURN_ERROR;
-
-   if (IsDigit(token[0])) {
-      /* a numbered program parameter register */
-      GLint reg = atoi((const char *) token);
-      if (reg >= MAX_NV_FRAGMENT_PROGRAM_PARAMS)
-         RETURN_ERROR1("Invalid constant program number");
-      *regNum = reg;
-   }
-   else {
-      RETURN_ERROR;
-   }
-
-   if (!Parse_String(parseState, "]"))
-      RETURN_ERROR1("Expected ]");
-
-   return GL_TRUE;
-}
-
-
-/**
- * Parse f[name]  - fragment input register
- */
-static GLboolean
-Parse_FragReg(struct parse_state *parseState, GLint *tempRegNum)
-{
-   GLubyte token[100];
-   GLint j;
-
-   /* Match 'f[' */
-   if (!Parse_String(parseState, "f["))
-      RETURN_ERROR1("Expected f[");
-
-   /* get <name> and look for match */
-   if (!Parse_Token(parseState, token)) {
-      RETURN_ERROR;
-   }
-   for (j = 0; InputRegisters[j]; j++) {
-      if (strcmp((const char *) token, InputRegisters[j]) == 0) {
-         *tempRegNum = j;
-         parseState->inputsRead |= (1 << j);
-         break;
-      }
-   }
-   if (!InputRegisters[j]) {
-      /* unknown input register label */
-      RETURN_ERROR2("Invalid register name", token);
-   }
-
-   /* Match '[' */
-   if (!Parse_String(parseState, "]"))
-      RETURN_ERROR1("Expected ]");
-
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_OutputReg(struct parse_state *parseState, GLint *outputRegNum)
-{
-   GLubyte token[100];
-
-   /* Match "o[" */
-   if (!Parse_String(parseState, "o["))
-      RETURN_ERROR1("Expected o[");
-
-   /* Get output reg name */
-   if (!Parse_Token(parseState, token))
-      RETURN_ERROR;
-
-   /* try to match an output register name */
-   if (strcmp((char *) token, "COLR") == 0 ||
-       strcmp((char *) token, "COLH") == 0) {
-      /* note that we don't distinguish between COLR and COLH */
-      *outputRegNum = FRAG_RESULT_COLOR;
-      parseState->outputsWritten |= (1 << FRAG_RESULT_COLOR);
-   }
-   else if (strcmp((char *) token, "DEPR") == 0) {
-      *outputRegNum = FRAG_RESULT_DEPTH;
-      parseState->outputsWritten |= (1 << FRAG_RESULT_DEPTH);
-   }
-   else {
-      RETURN_ERROR1("Invalid output register name");
-   }
-
-   /* Match ']' */
-   if (!Parse_String(parseState, "]"))
-      RETURN_ERROR1("Expected ]");
-
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_MaskedDstReg(struct parse_state *parseState,
-                   struct prog_dst_register *dstReg)
-{
-   GLubyte token[100];
-   GLint idx;
-
-   /* Dst reg can be R<n>, H<n>, o[n], RC or HC */
-   if (!Peek_Token(parseState, token))
-      RETURN_ERROR;
-
-   if (strcmp((const char *) token, "RC") == 0 ||
-       strcmp((const char *) token, "HC") == 0) {
-      /* a write-only register */
-      dstReg->File = PROGRAM_WRITE_ONLY;
-      if (!Parse_DummyReg(parseState, &idx))
-         RETURN_ERROR;
-      dstReg->Index = idx;
-   }
-   else if (token[0] == 'R' || token[0] == 'H') {
-      /* a temporary register */
-      dstReg->File = PROGRAM_TEMPORARY;
-      if (!Parse_TempReg(parseState, &idx))
-         RETURN_ERROR;
-      dstReg->Index = idx;
-   }
-   else if (token[0] == 'o') {
-      /* an output register */
-      dstReg->File = PROGRAM_OUTPUT;
-      if (!Parse_OutputReg(parseState, &idx))
-         RETURN_ERROR;
-      dstReg->Index = idx;
-   }
-   else {
-      RETURN_ERROR1("Invalid destination register name");
-   }
-
-   /* Parse optional write mask */
-   if (Parse_String(parseState, ".")) {
-      /* got a mask */
-      GLint k = 0;
-
-      if (!Parse_Token(parseState, token))  /* get xyzw writemask */
-         RETURN_ERROR;
-
-      dstReg->WriteMask = 0;
-
-      if (token[k] == 'x') {
-         dstReg->WriteMask |= WRITEMASK_X;
-         k++;
-      }
-      if (token[k] == 'y') {
-         dstReg->WriteMask |= WRITEMASK_Y;
-         k++;
-      }
-      if (token[k] == 'z') {
-         dstReg->WriteMask |= WRITEMASK_Z;
-         k++;
-      }
-      if (token[k] == 'w') {
-         dstReg->WriteMask |= WRITEMASK_W;
-         k++;
-      }
-      if (k == 0) {
-         RETURN_ERROR1("Invalid writemask character");
-      }
-
-   }
-   else {
-      dstReg->WriteMask = WRITEMASK_XYZW;
-   }
-
-   /* optional condition code mask */
-   if (Parse_String(parseState, "(")) {
-      /* ("EQ" | "GE" | "GT" | "LE" | "LT" | "NE" | "TR" | "FL".x|y|z|w) */
-      /* ("EQ" | "GE" | "GT" | "LE" | "LT" | "NE" | "TR" | "FL".[xyzw]) */
-      if (!Parse_CondCodeMask(parseState, dstReg))
-         RETURN_ERROR;
-
-      if (!Parse_String(parseState, ")"))  /* consume ")" */
-         RETURN_ERROR1("Expected )");
-
-      return GL_TRUE;
-   }
-   else {
-      /* no cond code mask */
-      dstReg->CondMask = COND_TR;
-      dstReg->CondSwizzle = SWIZZLE_NOOP;
-      return GL_TRUE;
-   }
-}
-
-
-/**
- * Parse a vector source (register, constant, etc):
- *   <vectorSrc>    ::= <absVectorSrc>
- *                    | <baseVectorSrc>
- *   <absVectorSrc> ::= <negate> "|" <baseVectorSrc> "|"
- */
-static GLboolean
-Parse_VectorSrc(struct parse_state *parseState,
-                struct prog_src_register *srcReg)
-{
-   GLfloat sign = 1.0F;
-   GLubyte token[100];
-   GLint idx;
-   GLuint negateBase, negateAbs;
-
-   /*
-    * First, take care of +/- and absolute value stuff.
-    */
-   if (Parse_String(parseState, "-"))
-      sign = -1.0F;
-   else if (Parse_String(parseState, "+"))
-      sign = +1.0F;
-
-   if (Parse_String(parseState, "|")) {
-      srcReg->Abs = GL_TRUE;
-      negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
-
-      if (Parse_String(parseState, "-"))
-         negateBase = NEGATE_XYZW;
-      else if (Parse_String(parseState, "+"))
-         negateBase = NEGATE_NONE;
-      else
-         negateBase = NEGATE_NONE;
-   }
-   else {
-      srcReg->Abs = GL_FALSE;
-      negateAbs = NEGATE_NONE;
-      negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
-   }
-
-   srcReg->Negate = srcReg->Abs ? negateAbs : negateBase;
-
-   /* This should be the real src vector/register name */
-   if (!Peek_Token(parseState, token))
-      RETURN_ERROR;
-
-   /* Src reg can be Rn, Hn, f[n], p[n], a named parameter, a scalar
-    * literal or vector literal.
-    */
-   if (token[0] == 'R' || token[0] == 'H') {
-      srcReg->File = PROGRAM_TEMPORARY;
-      if (!Parse_TempReg(parseState, &idx))
-         RETURN_ERROR;
-      srcReg->Index = idx;
-   }
-   else if (token[0] == 'f') {
-      /* XXX this might be an identifier! */
-      srcReg->File = PROGRAM_INPUT;
-      if (!Parse_FragReg(parseState, &idx))
-         RETURN_ERROR;
-      srcReg->Index = idx;
-   }
-   else if (token[0] == 'p') {
-      /* XXX this might be an identifier! */
-      srcReg->File = PROGRAM_LOCAL_PARAM;
-      if (!Parse_ProgramParamReg(parseState, &idx))
-         RETURN_ERROR;
-      srcReg->Index = idx;
-   }
-   else if (IsLetter(token[0])){
-      GLubyte ident[100];
-      GLint paramIndex;
-      if (!Parse_Identifier(parseState, ident))
-         RETURN_ERROR;
-      paramIndex = _mesa_lookup_parameter_index(parseState->parameters,
-                                                -1, (const char *) ident);
-      if (paramIndex < 0) {
-         RETURN_ERROR2("Undefined constant or parameter: ", ident);
-      }
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      srcReg->Index = paramIndex;      
-   }
-   else if (IsDigit(token[0]) || token[0] == '-' || token[0] == '+' || token[0] == '.'){
-      /* literal scalar constant */
-      GLfloat values[4];
-      GLuint paramIndex;
-      if (!Parse_ScalarConstant(parseState, values))
-         RETURN_ERROR;
-      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      srcReg->Index = paramIndex;
-   }
-   else if (token[0] == '{'){
-      /* literal vector constant */
-      GLfloat values[4];
-      GLuint paramIndex;
-      (void) Parse_String(parseState, "{");
-      if (!Parse_VectorConstant(parseState, values))
-         RETURN_ERROR;
-      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      srcReg->Index = paramIndex;      
-   }
-   else {
-      RETURN_ERROR2("Invalid source register name", token);
-   }
-
-   /* init swizzle fields */
-   srcReg->Swizzle = SWIZZLE_NOOP;
-
-   /* Look for optional swizzle suffix */
-   if (Parse_String(parseState, ".")) {
-      GLuint swz[4];
-
-      if (!Parse_Token(parseState, token))
-         RETURN_ERROR;
-
-      if (!Parse_SwizzleSuffix(token, swz))
-         RETURN_ERROR1("Invalid swizzle suffix");
-
-      srcReg->Swizzle = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
-   }
-
-   /* Finish absolute value */
-   if (srcReg->Abs && !Parse_String(parseState, "|")) {
-      RETURN_ERROR1("Expected |");
-   }
-
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_ScalarSrcReg(struct parse_state *parseState,
-                   struct prog_src_register *srcReg)
-{
-   GLubyte token[100];
-   GLfloat sign = 1.0F;
-   GLboolean needSuffix = GL_TRUE;
-   GLint idx;
-   GLuint negateBase, negateAbs;
-
-   /*
-    * First, take care of +/- and absolute value stuff.
-    */
-   if (Parse_String(parseState, "-"))
-      sign = -1.0F;
-   else if (Parse_String(parseState, "+"))
-      sign = +1.0F;
-
-   if (Parse_String(parseState, "|")) {
-      srcReg->Abs = GL_TRUE;
-      negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
-
-      if (Parse_String(parseState, "-"))
-         negateBase = NEGATE_XYZW;
-      else if (Parse_String(parseState, "+"))
-         negateBase = NEGATE_NONE;
-      else
-         negateBase = NEGATE_NONE;
-   }
-   else {
-      srcReg->Abs = GL_FALSE;
-      negateAbs = NEGATE_NONE;
-      negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
-   }
-
-   srcReg->Negate = srcReg->Abs ? negateAbs : negateBase;
-
-   if (!Peek_Token(parseState, token))
-      RETURN_ERROR;
-
-   /* Src reg can be R<n>, H<n> or a named fragment attrib */
-   if (token[0] == 'R' || token[0] == 'H') {
-      srcReg->File = PROGRAM_TEMPORARY;
-      if (!Parse_TempReg(parseState, &idx))
-         RETURN_ERROR;
-      srcReg->Index = idx;
-   }
-   else if (token[0] == 'f') {
-      srcReg->File = PROGRAM_INPUT;
-      if (!Parse_FragReg(parseState, &idx))
-         RETURN_ERROR;
-      srcReg->Index = idx;
-   }
-   else if (token[0] == '{') {
-      /* vector literal */
-      GLfloat values[4];
-      GLuint paramIndex;
-      (void) Parse_String(parseState, "{");
-      if (!Parse_VectorConstant(parseState, values))
-         RETURN_ERROR;
-      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      srcReg->Index = paramIndex;      
-   }
-   else if (IsLetter(token[0])){
-      /* named param/constant */
-      GLubyte ident[100];
-      GLint paramIndex;
-      if (!Parse_Identifier(parseState, ident))
-         RETURN_ERROR;
-      paramIndex = _mesa_lookup_parameter_index(parseState->parameters,
-                                                -1, (const char *) ident);
-      if (paramIndex < 0) {
-         RETURN_ERROR2("Undefined constant or parameter: ", ident);
-      }
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      srcReg->Index = paramIndex;      
-   }
-   else if (IsDigit(token[0])) {
-      /* scalar literal */
-      GLfloat values[4];
-      GLuint paramIndex;
-      if (!Parse_ScalarConstant(parseState, values))
-         RETURN_ERROR;
-      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
-                                              values, 4, NULL);
-      srcReg->Index = paramIndex;      
-      srcReg->File = PROGRAM_NAMED_PARAM;
-      needSuffix = GL_FALSE;
-   }
-   else {
-      RETURN_ERROR2("Invalid scalar source argument", token);
-   }
-
-   srcReg->Swizzle = 0;
-   if (needSuffix) {
-      /* parse .[xyzw] suffix */
-      if (!Parse_String(parseState, "."))
-         RETURN_ERROR1("Expected .");
-
-      if (!Parse_Token(parseState, token))
-         RETURN_ERROR;
-
-      if (token[0] == 'x' && token[1] == 0) {
-         srcReg->Swizzle = 0;
-      }
-      else if (token[0] == 'y' && token[1] == 0) {
-         srcReg->Swizzle = 1;
-      }
-      else if (token[0] == 'z' && token[1] == 0) {
-         srcReg->Swizzle = 2;
-      }
-      else if (token[0] == 'w' && token[1] == 0) {
-         srcReg->Swizzle = 3;
-      }
-      else {
-         RETURN_ERROR1("Invalid scalar source suffix");
-      }
-   }
-
-   /* Finish absolute value */
-   if (srcReg->Abs && !Parse_String(parseState, "|")) {
-      RETURN_ERROR1("Expected |");
-   }
-
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_PrintInstruction(struct parse_state *parseState,
-                       struct prog_instruction *inst)
-{
-   const GLubyte *str;
-   GLubyte *msg;
-   GLuint len;
-   GLint idx;
-
-   /* The first argument is a literal string 'just like this' */
-   if (!Parse_String(parseState, "'"))
-      RETURN_ERROR1("Expected '");
-
-   str = parseState->pos;
-   for (len = 0; str[len] != '\''; len++) /* find closing quote */
-      ;
-   parseState->pos += len + 1;
-   msg = (GLubyte*) malloc(len + 1);
-
-   memcpy(msg, str, len);
-   msg[len] = 0;
-   inst->Data = msg;
-
-   if (Parse_String(parseState, ",")) {
-      /* got an optional register to print */
-      GLubyte token[100];
-      GetToken(parseState, token);
-      if (token[0] == 'o') {
-         /* dst reg */
-         if (!Parse_OutputReg(parseState, &idx))
-            RETURN_ERROR;
-	 inst->SrcReg[0].Index = idx;
-         inst->SrcReg[0].File = PROGRAM_OUTPUT;
-      }
-      else {
-         /* src reg */
-         if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-            RETURN_ERROR;
-      }
-   }
-   else {
-      inst->SrcReg[0].File = PROGRAM_UNDEFINED;
-   }
-
-   inst->SrcReg[0].Swizzle = SWIZZLE_NOOP;
-   inst->SrcReg[0].Abs = GL_FALSE;
-   inst->SrcReg[0].Negate = NEGATE_NONE;
-
-   return GL_TRUE;
-}
-
-
-static GLboolean
-Parse_InstructionSequence(struct parse_state *parseState,
-                          struct prog_instruction program[])
-{
-   while (1) {
-      struct prog_instruction *inst = program + parseState->numInst;
-      struct instruction_pattern instMatch;
-      GLubyte token[100];
-
-      /* Initialize the instruction */
-      _mesa_init_instructions(inst, 1);
-
-      /* special instructions */
-      if (Parse_String(parseState, "DEFINE")) {
-         GLubyte id[100];
-         GLfloat value[7];  /* yes, 7 to be safe */
-         if (!Parse_Identifier(parseState, id))
-            RETURN_ERROR;
-         /* XXX make sure id is not a reserved identifer, like R9 */
-         if (!Parse_String(parseState, "="))
-            RETURN_ERROR1("Expected =");
-         if (!Parse_VectorOrScalarConstant(parseState, value))
-            RETURN_ERROR;
-         if (!Parse_String(parseState, ";"))
-            RETURN_ERROR1("Expected ;");
-         if (_mesa_lookup_parameter_index(parseState->parameters,
-                                          -1, (const char *) id) >= 0) {
-            RETURN_ERROR2(id, "already defined");
-         }
-         _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
-      }
-      else if (Parse_String(parseState, "DECLARE")) {
-         GLubyte id[100];
-         GLfloat value[7] = {0, 0, 0, 0, 0, 0, 0};  /* yes, to be safe */
-         if (!Parse_Identifier(parseState, id))
-            RETURN_ERROR;
-         /* XXX make sure id is not a reserved identifer, like R9 */
-         if (Parse_String(parseState, "=")) {
-            if (!Parse_VectorOrScalarConstant(parseState, value))
-               RETURN_ERROR;
-         }
-         if (!Parse_String(parseState, ";"))
-            RETURN_ERROR1("Expected ;");
-         if (_mesa_lookup_parameter_index(parseState->parameters,
-                                          -1, (const char *) id) >= 0) {
-            RETURN_ERROR2(id, "already declared");
-         }
-         _mesa_add_named_parameter(parseState->parameters,
-                                   (const char *) id, value);
-      }
-      else if (Parse_String(parseState, "END")) {
-         inst->Opcode = OPCODE_END;
-         parseState->numInst++;
-         if (Parse_Token(parseState, token)) {
-            RETURN_ERROR1("Code after END opcode.");
-         }
-         break;
-      }
-      else {
-         /* general/arithmetic instruction */
-
-         /* get token */
-         if (!Parse_Token(parseState, token)) {
-            RETURN_ERROR1("Missing END instruction.");
-         }
-
-         /* try to find matching instuction */
-         instMatch = MatchInstruction(token);
-         if (instMatch.opcode >= MAX_OPCODE) {
-            /* bad instruction name */
-            RETURN_ERROR2("Unexpected token: ", token);
-         }
-
-         inst->Opcode = instMatch.opcode;
-         inst->Precision = instMatch.suffixes & (_R | _H | _X);
-         inst->SaturateMode = (instMatch.suffixes & (_S))
-            ? SATURATE_ZERO_ONE : SATURATE_OFF;
-         inst->CondUpdate = (instMatch.suffixes & (_C)) ? GL_TRUE : GL_FALSE;
-
-         /*
-          * parse the input and output operands
-          */
-         if (instMatch.outputs == OUTPUT_S || instMatch.outputs == OUTPUT_V) {
-            if (!Parse_MaskedDstReg(parseState, &inst->DstReg))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-         }
-         else if (instMatch.outputs == OUTPUT_NONE) {
-            if (instMatch.opcode == OPCODE_KIL_NV) {
-               /* This is a little weird, the cond code info is in
-                * the dest register.
-                */
-               if (!Parse_CondCodeMask(parseState, &inst->DstReg))
-                  RETURN_ERROR;
-            }
-            else {
-               ASSERT(instMatch.opcode == OPCODE_PRINT);
-            }
-         }
-
-         if (instMatch.inputs == INPUT_1V) {
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-         }
-         else if (instMatch.inputs == INPUT_2V) {
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
-               RETURN_ERROR;
-         }
-         else if (instMatch.inputs == INPUT_3V) {
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[2]))
-               RETURN_ERROR;
-         }
-         else if (instMatch.inputs == INPUT_1S) {
-            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-         }
-         else if (instMatch.inputs == INPUT_2S) {
-            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[1]))
-               RETURN_ERROR;
-         }
-         else if (instMatch.inputs == INPUT_CC) {
-            /* XXX to-do */
-         }
-         else if (instMatch.inputs == INPUT_1V_T) {
-	    GLubyte unit, idx;
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_TextureImageId(parseState, &unit, &idx))
-               RETURN_ERROR;
-	    inst->TexSrcUnit = unit;
-	    inst->TexSrcTarget = idx;
-         }
-         else if (instMatch.inputs == INPUT_3V_T) {
-	    GLubyte unit, idx;
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_VectorSrc(parseState, &inst->SrcReg[2]))
-               RETURN_ERROR;
-            if (!Parse_String(parseState, ","))
-               RETURN_ERROR1("Expected ,");
-            if (!Parse_TextureImageId(parseState, &unit, &idx))
-               RETURN_ERROR;
-	    inst->TexSrcUnit = unit;
-	    inst->TexSrcTarget = idx;
-         }
-         else if (instMatch.inputs == INPUT_1V_S) {
-            if (!Parse_PrintInstruction(parseState, inst))
-               RETURN_ERROR;
-         }
-
-         /* end of statement semicolon */
-         if (!Parse_String(parseState, ";"))
-            RETURN_ERROR1("Expected ;");
-
-         parseState->numInst++;
-
-         if (parseState->numInst >= MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS)
-            RETURN_ERROR1("Program too long");
-      }
-   }
-   return GL_TRUE;
-}
-
-
-
-/**
- * Parse/compile the 'str' returning the compiled 'program'.
- * ctx->Program.ErrorPos will be -1 if successful.  Otherwise, ErrorPos
- * indicates the position of the error in 'str'.
- */
-void
-_mesa_parse_nv_fragment_program(struct gl_context *ctx, GLenum dstTarget,
-                                const GLubyte *str, GLsizei len,
-                                struct gl_fragment_program *program)
-{
-   struct parse_state parseState;
-   struct prog_instruction instBuffer[MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS];
-   struct prog_instruction *newInst;
-   GLenum target;
-   GLubyte *programString;
-
-   /* Make a null-terminated copy of the program string */
-   programString = (GLubyte *) MALLOC(len + 1);
-   if (!programString) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
-      return;
-   }
-   memcpy(programString, str, len);
-   programString[len] = 0;
-
-   /* Get ready to parse */
-   memset(&parseState, 0, sizeof(struct parse_state));
-   parseState.ctx = ctx;
-   parseState.start = programString;
-   parseState.program = program;
-   parseState.numInst = 0;
-   parseState.curLine = programString;
-   parseState.parameters = _mesa_new_parameter_list();
-
-   /* Reset error state */
-   _mesa_set_program_error(ctx, -1, NULL);
-
-   /* check the program header */
-   if (strncmp((const char *) programString, "!!FP1.0", 7) == 0) {
-      target = GL_FRAGMENT_PROGRAM_NV;
-      parseState.pos = programString + 7;
-   }
-   else if (strncmp((const char *) programString, "!!FCP1.0", 8) == 0) {
-      /* fragment / register combiner program - not supported */
-      _mesa_set_program_error(ctx, 0, "Invalid fragment program header");
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)");
-      return;
-   }
-   else {
-      /* invalid header */
-      _mesa_set_program_error(ctx, 0, "Invalid fragment program header");
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)");
-      return;
-   }
-
-   /* make sure target and header match */
-   if (target != dstTarget) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glLoadProgramNV(target mismatch 0x%x != 0x%x)",
-                  target, dstTarget);
-      return;
-   }
-
-   if (Parse_InstructionSequence(&parseState, instBuffer)) {
-      GLuint u;
-      /* successful parse! */
-
-      if (parseState.outputsWritten == 0) {
-         /* must write at least one output! */
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "Invalid fragment program - no outputs written.");
-         return;
-      }
-
-      /* copy the compiled instructions */
-      assert(parseState.numInst <= MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS);
-      newInst = _mesa_alloc_instructions(parseState.numInst);
-      if (!newInst) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
-         return;  /* out of memory */
-      }
-      _mesa_copy_instructions(newInst, instBuffer, parseState.numInst);
-
-      /* install the program */
-      program->Base.Target = target;
-      if (program->Base.String) {
-         FREE(program->Base.String);
-      }
-      program->Base.String = programString;
-      program->Base.Format = GL_PROGRAM_FORMAT_ASCII_ARB;
-      if (program->Base.Instructions) {
-         free(program->Base.Instructions);
-      }
-      program->Base.Instructions = newInst;
-      program->Base.NumInstructions = parseState.numInst;
-      program->Base.InputsRead = parseState.inputsRead;
-      program->Base.OutputsWritten = parseState.outputsWritten;
-      for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++)
-         program->Base.TexturesUsed[u] = parseState.texturesUsed[u];
-
-      /* save program parameters */
-      program->Base.Parameters = parseState.parameters;
-
-      /* allocate registers for declared program parameters */
-#if 00
-      _mesa_assign_program_registers(&(program->SymbolTable));
-#endif
-
-#ifdef DEBUG_foo
-      printf("--- glLoadProgramNV(%d) result ---\n", program->Base.Id);
-      _mesa_fprint_program_opt(stdout, &program->Base, PROG_PRINT_NV, 0);
-      printf("----------------------------------\n");
-#endif
-   }
-   else {
-      /* Error! */
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV");
-      /* NOTE: _mesa_set_program_error would have been called already */
-   }
-}
-
-
-const char *
-_mesa_nv_fragment_input_register_name(GLuint i)
-{
-   ASSERT(i < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
-   return InputRegisters[i];
-}
-
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file nvfragparse.c
+ * NVIDIA fragment program parser.
+ * \author Brian Paul
+ */
+
+/*
+ * Regarding GL_NV_fragment_program:
+ *
+ * Portions of this software may use or implement intellectual
+ * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
+ * any and all warranties with respect to such intellectual property,
+ * including any use thereof or modifications thereto.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_parameter.h"
+#include "prog_print.h"
+#include "prog_instruction.h"
+#include "nvfragparse.h"
+
+
+#define INPUT_1V     1
+#define INPUT_2V     2
+#define INPUT_3V     3
+#define INPUT_1S     4
+#define INPUT_2S     5
+#define INPUT_CC     6
+#define INPUT_1V_T   7  /* one source vector, plus textureId */
+#define INPUT_3V_T   8  /* one source vector, plus textureId */
+#define INPUT_NONE   9
+#define INPUT_1V_S  10  /* a string and a vector register */
+#define OUTPUT_V    20
+#define OUTPUT_S    21
+#define OUTPUT_NONE 22
+
+/* IRIX defines some of these */
+#undef _R
+#undef _H
+#undef _X
+#undef _C
+#undef _S
+
+/* Optional suffixes */
+#define _R  FLOAT32  /* float */
+#define _H  FLOAT16  /* half-float */
+#define _X  FIXED12  /* fixed */
+#define _C  0x08     /* set cond codes */
+#define _S  0x10     /* saturate, clamp result to [0,1] */
+
+struct instruction_pattern {
+   const char *name;
+   enum prog_opcode opcode;
+   GLuint inputs;
+   GLuint outputs;
+   GLuint suffixes;
+};
+
+static const struct instruction_pattern Instructions[] = {
+   { "ADD", OPCODE_ADD, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "COS", OPCODE_COS, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "DDX", OPCODE_DDX, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
+   { "DDY", OPCODE_DDY, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
+   { "DP3", OPCODE_DP3, INPUT_2V, OUTPUT_S, _R | _H | _X | _C | _S },
+   { "DP4", OPCODE_DP4, INPUT_2V, OUTPUT_S, _R | _H | _X | _C | _S },
+   { "DST", OPCODE_DP4, INPUT_2V, OUTPUT_V, _R | _H |      _C | _S },
+   { "EX2", OPCODE_DP4, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "FLR", OPCODE_FLR, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "FRC", OPCODE_FRC, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "KIL", OPCODE_KIL_NV, INPUT_CC, OUTPUT_NONE, 0                },
+   { "LG2", OPCODE_LG2, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "LIT", OPCODE_LIT, INPUT_1V, OUTPUT_V, _R | _H |      _C | _S },
+   { "LRP", OPCODE_LRP, INPUT_3V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "MAD", OPCODE_MAD, INPUT_3V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "MAX", OPCODE_MAX, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "MIN", OPCODE_MIN, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "MOV", OPCODE_MOV, INPUT_1V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "MUL", OPCODE_MUL, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "PK2H",  OPCODE_PK2H,  INPUT_1V, OUTPUT_S, 0                  },
+   { "PK2US", OPCODE_PK2US, INPUT_1V, OUTPUT_S, 0                  },
+   { "PK4B",  OPCODE_PK4B,  INPUT_1V, OUTPUT_S, 0                  },
+   { "PK4UB", OPCODE_PK4UB, INPUT_1V, OUTPUT_S, 0                  },
+   { "POW", OPCODE_POW, INPUT_2S, OUTPUT_S, _R | _H |      _C | _S },
+   { "RCP", OPCODE_RCP, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "RFL", OPCODE_RFL, INPUT_2V, OUTPUT_V, _R | _H |      _C | _S },
+   { "RSQ", OPCODE_RSQ, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "SEQ", OPCODE_SEQ, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SFL", OPCODE_SFL, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SGE", OPCODE_SGE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SGT", OPCODE_SGT, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SIN", OPCODE_SIN, INPUT_1S, OUTPUT_S, _R | _H |      _C | _S },
+   { "SLE", OPCODE_SLE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SLT", OPCODE_SLT, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SNE", OPCODE_SNE, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "STR", OPCODE_STR, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "SUB", OPCODE_SUB, INPUT_2V, OUTPUT_V, _R | _H | _X | _C | _S },
+   { "TEX", OPCODE_TEX, INPUT_1V_T, OUTPUT_V,              _C | _S },
+   { "TXD", OPCODE_TXD, INPUT_3V_T, OUTPUT_V,              _C | _S },
+   { "TXP", OPCODE_TXP_NV, INPUT_1V_T, OUTPUT_V,           _C | _S },
+   { "UP2H",  OPCODE_UP2H,  INPUT_1S, OUTPUT_V,            _C | _S },
+   { "UP2US", OPCODE_UP2US, INPUT_1S, OUTPUT_V,            _C | _S },
+   { "UP4B",  OPCODE_UP4B,  INPUT_1S, OUTPUT_V,            _C | _S },
+   { "UP4UB", OPCODE_UP4UB, INPUT_1S, OUTPUT_V,            _C | _S },
+   { "X2D", OPCODE_X2D, INPUT_3V, OUTPUT_V, _R | _H |      _C | _S },
+   { "PRINT", OPCODE_PRINT, INPUT_1V_S, OUTPUT_NONE, 0               },
+   { NULL, (enum prog_opcode) -1, 0, 0, 0 }
+};
+
+
+/*
+ * Information needed or computed during parsing.
+ * Remember, we can't modify the target program object until we've
+ * _successfully_ parsed the program text.
+ */
+struct parse_state {
+   struct gl_context *ctx;
+   const GLubyte *start;              /* start of program string */
+   const GLubyte *pos;                /* current position */
+   const GLubyte *curLine;
+   struct gl_fragment_program *program;  /* current program */
+
+   struct gl_program_parameter_list *parameters;
+
+   GLuint numInst;                    /* number of instructions parsed */
+   GLuint inputsRead;                 /* bitmask of input registers used */
+   GLuint outputsWritten;             /* bitmask of 1 << FRAG_OUTPUT_* bits */
+   GLuint texturesUsed[MAX_TEXTURE_IMAGE_UNITS];
+};
+
+
+
+/*
+ * Called whenever we find an error during parsing.
+ */
+static void
+record_error(struct parse_state *parseState, const char *msg, int lineNo)
+{
+#ifdef DEBUG
+   GLint line, column;
+   const GLubyte *lineStr;
+   lineStr = _mesa_find_line_column(parseState->start,
+                                    parseState->pos, &line, &column);
+   _mesa_debug(parseState->ctx,
+               "nvfragparse.c(%d): line %d, column %d:%s (%s)\n",
+               lineNo, line, column, (char *) lineStr, msg);
+   free((void *) lineStr);
+#else
+   (void) lineNo;
+#endif
+
+   /* Check that no error was already recorded.  Only record the first one. */
+   if (parseState->ctx->Program.ErrorString[0] == 0) {
+      _mesa_set_program_error(parseState->ctx,
+                              parseState->pos - parseState->start,
+                              msg);
+   }
+}
+
+
+#define RETURN_ERROR							\
+do {									\
+   record_error(parseState, "Unexpected end of input.", __LINE__);	\
+   return GL_FALSE;							\
+} while(0)
+
+#define RETURN_ERROR1(msg)						\
+do {									\
+   record_error(parseState, msg, __LINE__);				\
+   return GL_FALSE;							\
+} while(0)
+
+#define RETURN_ERROR2(msg1, msg2)					\
+do {									\
+   char err[1000];							\
+   sprintf(err, "%s %s", msg1, msg2);				\
+   record_error(parseState, err, __LINE__);				\
+   return GL_FALSE;							\
+} while(0)
+
+
+
+
+/*
+ * Search a list of instruction structures for a match.
+ */
+static struct instruction_pattern
+MatchInstruction(const GLubyte *token)
+{
+   const struct instruction_pattern *inst;
+   struct instruction_pattern result;
+
+   result.name = NULL;
+   result.opcode = MAX_OPCODE; /* i.e. invalid instruction */
+   result.inputs = 0;
+   result.outputs = 0;
+   result.suffixes = 0;
+
+   for (inst = Instructions; inst->name; inst++) {
+      if (strncmp((const char *) token, inst->name, 3) == 0) {
+         /* matched! */
+         int i = 3;
+         result = *inst;
+         result.suffixes = 0;
+         /* look at suffix */
+         if (token[i] == 'R') {
+            result.suffixes |= _R;
+            i++;
+         }
+         else if (token[i] == 'H') {
+            result.suffixes |= _H;
+            i++;
+         }
+         else if (token[i] == 'X') {
+            result.suffixes |= _X;
+            i++;
+         }
+         if (token[i] == 'C') {
+            result.suffixes |= _C;
+            i++;
+         }
+         if (token[i] == '_' && token[i+1] == 'S' &&
+             token[i+2] == 'A' && token[i+3] == 'T') {
+            result.suffixes |= _S;
+         }
+         return result;
+      }
+   }
+
+   return result;
+}
+
+
+
+
+/**********************************************************************/
+
+
+static GLboolean IsLetter(GLubyte b)
+{
+   return (b >= 'a' && b <= 'z') ||
+          (b >= 'A' && b <= 'Z') ||
+          (b == '_') ||
+          (b == '$');
+}
+
+
+static GLboolean IsDigit(GLubyte b)
+{
+   return b >= '0' && b <= '9';
+}
+
+
+static GLboolean IsWhitespace(GLubyte b)
+{
+   return b == ' ' || b == '\t' || b == '\n' || b == '\r';
+}
+
+
+/**
+ * Starting at 'str' find the next token.  A token can be an integer,
+ * an identifier or punctuation symbol.
+ * \return <= 0 we found an error, else, return number of characters parsed.
+ */
+static GLint
+GetToken(struct parse_state *parseState, GLubyte *token)
+{
+   const GLubyte *str = parseState->pos;
+   GLint i = 0, j = 0;
+
+   token[0] = 0;
+
+   /* skip whitespace and comments */
+   while (str[i] && (IsWhitespace(str[i]) || str[i] == '#')) {
+      if (str[i] == '#') {
+         /* skip comment */
+         while (str[i] && (str[i] != '\n' && str[i] != '\r')) {
+            i++;
+         }
+         if (str[i] == '\n' || str[i] == '\r')
+            parseState->curLine = str + i + 1;
+      }
+      else {
+         /* skip whitespace */
+         if (str[i] == '\n' || str[i] == '\r')
+            parseState->curLine = str + i + 1;
+         i++;
+      }
+   }
+
+   if (str[i] == 0)
+      return -i;
+
+   /* try matching an integer */
+   while (str[i] && IsDigit(str[i])) {
+      token[j++] = str[i++];
+   }
+   if (j > 0 || !str[i]) {
+      token[j] = 0;
+      return i;
+   }
+
+   /* try matching an identifier */
+   if (IsLetter(str[i])) {
+      while (str[i] && (IsLetter(str[i]) || IsDigit(str[i]))) {
+         token[j++] = str[i++];
+      }
+      token[j] = 0;
+      return i;
+   }
+
+   /* punctuation character */
+   if (str[i]) {
+      token[0] = str[i++];
+      token[1] = 0;
+      return i;
+   }
+
+   /* end of input */
+   token[0] = 0;
+   return i;
+}
+
+
+/**
+ * Get next token from input stream and increment stream pointer past token.
+ */
+static GLboolean
+Parse_Token(struct parse_state *parseState, GLubyte *token)
+{
+   GLint i;
+   i = GetToken(parseState, token);
+   if (i <= 0) {
+      parseState->pos += (-i);
+      return GL_FALSE;
+   }
+   parseState->pos += i;
+   return GL_TRUE;
+}
+
+
+/**
+ * Get next token from input stream but don't increment stream pointer.
+ */
+static GLboolean
+Peek_Token(struct parse_state *parseState, GLubyte *token)
+{
+   GLint i, len;
+   i = GetToken(parseState, token);
+   if (i <= 0) {
+      parseState->pos += (-i);
+      return GL_FALSE;
+   }
+   len = (GLint) strlen((const char *) token);
+   parseState->pos += (i - len);
+   return GL_TRUE;
+}
+
+
+/**********************************************************************/
+
+static const char *InputRegisters[MAX_NV_FRAGMENT_PROGRAM_INPUTS + 1] = {
+   "WPOS", "COL0", "COL1", "FOGC",
+   "TEX0", "TEX1", "TEX2", "TEX3", "TEX4", "TEX5", "TEX6", "TEX7", NULL
+};
+
+
+
+/**********************************************************************/
+
+/**
+ * Try to match 'pattern' as the next token after any whitespace/comments.
+ */
+static GLboolean
+Parse_String(struct parse_state *parseState, const char *pattern)
+{
+   const GLubyte *m;
+   GLint i;
+
+   /* skip whitespace and comments */
+   while (IsWhitespace(*parseState->pos) || *parseState->pos == '#') {
+      if (*parseState->pos == '#') {
+         while (*parseState->pos && (*parseState->pos != '\n' && *parseState->pos != '\r')) {
+            parseState->pos += 1;
+         }
+         if (*parseState->pos == '\n' || *parseState->pos == '\r')
+            parseState->curLine = parseState->pos + 1;
+      }
+      else {
+         /* skip whitespace */
+         if (*parseState->pos == '\n' || *parseState->pos == '\r')
+            parseState->curLine = parseState->pos + 1;
+         parseState->pos += 1;
+      }
+   }
+
+   /* Try to match the pattern */
+   m = parseState->pos;
+   for (i = 0; pattern[i]; i++) {
+      if (*m != (GLubyte) pattern[i])
+         return GL_FALSE;
+      m += 1;
+   }
+   parseState->pos = m;
+
+   return GL_TRUE; /* success */
+}
+
+
+static GLboolean
+Parse_Identifier(struct parse_state *parseState, GLubyte *ident)
+{
+   if (!Parse_Token(parseState, ident))
+      RETURN_ERROR;
+   if (IsLetter(ident[0]))
+      return GL_TRUE;
+   else
+      RETURN_ERROR1("Expected an identfier");
+}
+
+
+/**
+ * Parse a floating point constant, or a defined symbol name.
+ * [+/-]N[.N[eN]]
+ * Output:  number[0 .. 3] will get the value.
+ */
+static GLboolean
+Parse_ScalarConstant(struct parse_state *parseState, GLfloat *number)
+{
+   char *end = NULL;
+
+   *number = (GLfloat) _mesa_strtof((const char *) parseState->pos, &end);
+
+   if (end && end > (char *) parseState->pos) {
+      /* got a number */
+      parseState->pos = (GLubyte *) end;
+      number[1] = *number;
+      number[2] = *number;
+      number[3] = *number;
+      return GL_TRUE;
+   }
+   else {
+      /* should be an identifier */
+      GLubyte ident[100];
+      const GLfloat *constant;
+      if (!Parse_Identifier(parseState, ident))
+         RETURN_ERROR1("Expected an identifier");
+      constant = (GLfloat *)_mesa_lookup_parameter_value(parseState->parameters,
+                                                         -1, 
+                                                         (const char *) ident);
+      /* XXX Check that it's a constant and not a parameter */
+      if (!constant) {
+         RETURN_ERROR1("Undefined symbol");
+      }
+      else {
+         COPY_4V(number, constant);
+         return GL_TRUE;
+      }
+   }
+}
+
+
+
+/**
+ * Parse a vector constant, one of:
+ *   { float }
+ *   { float, float }
+ *   { float, float, float }
+ *   { float, float, float, float }
+ */
+static GLboolean
+Parse_VectorConstant(struct parse_state *parseState, GLfloat *vec)
+{
+   /* "{" was already consumed */
+
+   ASSIGN_4V(vec, 0.0, 0.0, 0.0, 1.0);
+
+   if (!Parse_ScalarConstant(parseState, vec+0))  /* X */
+      return GL_FALSE;
+
+   if (Parse_String(parseState, "}")) {
+      return GL_TRUE;
+   }
+
+   if (!Parse_String(parseState, ","))
+      RETURN_ERROR1("Expected comma in vector constant");
+
+   if (!Parse_ScalarConstant(parseState, vec+1))  /* Y */
+      return GL_FALSE;
+
+   if (Parse_String(parseState, "}")) {
+      return GL_TRUE;
+   }
+
+   if (!Parse_String(parseState, ","))
+      RETURN_ERROR1("Expected comma in vector constant");
+
+   if (!Parse_ScalarConstant(parseState, vec+2))  /* Z */
+      return GL_FALSE;
+
+   if (Parse_String(parseState, "}")) {
+      return GL_TRUE;
+   }
+
+   if (!Parse_String(parseState, ","))
+      RETURN_ERROR1("Expected comma in vector constant");
+
+   if (!Parse_ScalarConstant(parseState, vec+3))  /* W */
+      return GL_FALSE;
+
+   if (!Parse_String(parseState, "}"))
+      RETURN_ERROR1("Expected closing brace in vector constant");
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse <number>, <varname> or {a, b, c, d}.
+ * Return number of values in the vector or scalar, or zero if parse error.
+ */
+static GLuint
+Parse_VectorOrScalarConstant(struct parse_state *parseState, GLfloat *vec)
+{
+   if (Parse_String(parseState, "{")) {
+      return Parse_VectorConstant(parseState, vec);
+   }
+   else {
+      GLboolean b = Parse_ScalarConstant(parseState, vec);
+      if (b) {
+         vec[1] = vec[2] = vec[3] = vec[0];
+      }
+      return b;
+   }
+}
+
+
+/**
+ * Parse a texture image source:
+ *    [TEX0 | TEX1 | .. | TEX15] , [1D | 2D | 3D | CUBE | RECT]
+ */
+static GLboolean
+Parse_TextureImageId(struct parse_state *parseState,
+                     GLubyte *texUnit, GLubyte *texTargetBit)
+{
+   GLubyte imageSrc[100];
+   GLint unit;
+
+   if (!Parse_Token(parseState, imageSrc))
+      RETURN_ERROR;
+   
+   if (imageSrc[0] != 'T' ||
+       imageSrc[1] != 'E' ||
+       imageSrc[2] != 'X') {
+      RETURN_ERROR1("Expected TEX# source");
+   }
+   unit = atoi((const char *) imageSrc + 3);
+   if ((unit < 0 || unit > MAX_TEXTURE_IMAGE_UNITS) ||
+       (unit == 0 && (imageSrc[3] != '0' || imageSrc[4] != 0))) {
+      RETURN_ERROR1("Invalied TEX# source index");
+   }
+   *texUnit = unit;
+
+   if (!Parse_String(parseState, ","))
+      RETURN_ERROR1("Expected ,");
+
+   if (Parse_String(parseState, "1D")) {
+      *texTargetBit = TEXTURE_1D_BIT;
+   }
+   else if (Parse_String(parseState, "2D")) {
+      *texTargetBit = TEXTURE_2D_BIT;
+   }
+   else if (Parse_String(parseState, "3D")) {
+      *texTargetBit = TEXTURE_3D_BIT;
+   }
+   else if (Parse_String(parseState, "CUBE")) {
+      *texTargetBit = TEXTURE_CUBE_BIT;
+   }
+   else if (Parse_String(parseState, "RECT")) {
+      *texTargetBit = TEXTURE_RECT_BIT;
+   }
+   else {
+      RETURN_ERROR1("Invalid texture target token");
+   }
+
+   /* update record of referenced texture units */
+   parseState->texturesUsed[*texUnit] |= *texTargetBit;
+   if (_mesa_bitcount(parseState->texturesUsed[*texUnit]) > 1) {
+      RETURN_ERROR1("Only one texture target can be used per texture unit.");
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse a scalar suffix like .x, .y, .z or .w or parse a swizzle suffix
+ * like .wxyz, .xxyy, etc and return the swizzle indexes.
+ */
+static GLboolean
+Parse_SwizzleSuffix(const GLubyte *token, GLuint swizzle[4])
+{
+   if (token[1] == 0) {
+      /* single letter swizzle (scalar) */
+      if (token[0] == 'x')
+         ASSIGN_4V(swizzle, 0, 0, 0, 0);
+      else if (token[0] == 'y')
+         ASSIGN_4V(swizzle, 1, 1, 1, 1);
+      else if (token[0] == 'z')
+         ASSIGN_4V(swizzle, 2, 2, 2, 2);
+      else if (token[0] == 'w')
+         ASSIGN_4V(swizzle, 3, 3, 3, 3);
+      else
+         return GL_FALSE;
+   }
+   else {
+      /* 4-component swizzle (vector) */
+      GLint k;
+      for (k = 0; k < 4 && token[k]; k++) {
+         if (token[k] == 'x')
+            swizzle[k] = 0;
+         else if (token[k] == 'y')
+            swizzle[k] = 1;
+         else if (token[k] == 'z')
+            swizzle[k] = 2;
+         else if (token[k] == 'w')
+            swizzle[k] = 3;
+         else
+            return GL_FALSE;
+      }
+      if (k != 4)
+         return GL_FALSE;
+   }
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_CondCodeMask(struct parse_state *parseState,
+                   struct prog_dst_register *dstReg)
+{
+   if (Parse_String(parseState, "EQ"))
+      dstReg->CondMask = COND_EQ;
+   else if (Parse_String(parseState, "GE"))
+      dstReg->CondMask = COND_GE;
+   else if (Parse_String(parseState, "GT"))
+      dstReg->CondMask = COND_GT;
+   else if (Parse_String(parseState, "LE"))
+      dstReg->CondMask = COND_LE;
+   else if (Parse_String(parseState, "LT"))
+      dstReg->CondMask = COND_LT;
+   else if (Parse_String(parseState, "NE"))
+      dstReg->CondMask = COND_NE;
+   else if (Parse_String(parseState, "TR"))
+      dstReg->CondMask = COND_TR;
+   else if (Parse_String(parseState, "FL"))
+      dstReg->CondMask = COND_FL;
+   else
+      RETURN_ERROR1("Invalid condition code mask");
+
+   /* look for optional .xyzw swizzle */
+   if (Parse_String(parseState, ".")) {
+      GLubyte token[100];
+      GLuint swz[4];
+
+      if (!Parse_Token(parseState, token))  /* get xyzw suffix */
+         RETURN_ERROR;
+
+      if (!Parse_SwizzleSuffix(token, swz))
+         RETURN_ERROR1("Invalid swizzle suffix");
+
+      dstReg->CondSwizzle = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse a temporary register: Rnn or Hnn
+ */
+static GLboolean
+Parse_TempReg(struct parse_state *parseState, GLint *tempRegNum)
+{
+   GLubyte token[100];
+
+   /* Should be 'R##' or 'H##' */
+   if (!Parse_Token(parseState, token))
+      RETURN_ERROR;
+   if (token[0] != 'R' && token[0] != 'H')
+      RETURN_ERROR1("Expected R## or H##");
+
+   if (IsDigit(token[1])) {
+      GLint reg = atoi((const char *) (token + 1));
+      if (token[0] == 'H')
+         reg += 32;
+      if (reg >= MAX_NV_FRAGMENT_PROGRAM_TEMPS)
+         RETURN_ERROR1("Invalid temporary register name");
+      *tempRegNum = reg;
+   }
+   else {
+      RETURN_ERROR1("Invalid temporary register name");
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse a write-only dummy register: RC or HC.
+ */
+static GLboolean
+Parse_DummyReg(struct parse_state *parseState, GLint *regNum)
+{
+   if (Parse_String(parseState, "RC")) {
+       *regNum = 0;
+   }
+   else if (Parse_String(parseState, "HC")) {
+       *regNum = 1;
+   }
+   else {
+      RETURN_ERROR1("Invalid write-only register name");
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse a program local parameter register "p[##]"
+ */
+static GLboolean
+Parse_ProgramParamReg(struct parse_state *parseState, GLint *regNum)
+{
+   GLubyte token[100];
+
+   if (!Parse_String(parseState, "p["))
+      RETURN_ERROR1("Expected p[");
+
+   if (!Parse_Token(parseState, token))
+      RETURN_ERROR;
+
+   if (IsDigit(token[0])) {
+      /* a numbered program parameter register */
+      GLint reg = atoi((const char *) token);
+      if (reg >= MAX_NV_FRAGMENT_PROGRAM_PARAMS)
+         RETURN_ERROR1("Invalid constant program number");
+      *regNum = reg;
+   }
+   else {
+      RETURN_ERROR;
+   }
+
+   if (!Parse_String(parseState, "]"))
+      RETURN_ERROR1("Expected ]");
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Parse f[name]  - fragment input register
+ */
+static GLboolean
+Parse_FragReg(struct parse_state *parseState, GLint *tempRegNum)
+{
+   GLubyte token[100];
+   GLint j;
+
+   /* Match 'f[' */
+   if (!Parse_String(parseState, "f["))
+      RETURN_ERROR1("Expected f[");
+
+   /* get <name> and look for match */
+   if (!Parse_Token(parseState, token)) {
+      RETURN_ERROR;
+   }
+   for (j = 0; InputRegisters[j]; j++) {
+      if (strcmp((const char *) token, InputRegisters[j]) == 0) {
+         *tempRegNum = j;
+         parseState->inputsRead |= (1 << j);
+         break;
+      }
+   }
+   if (!InputRegisters[j]) {
+      /* unknown input register label */
+      RETURN_ERROR2("Invalid register name", token);
+   }
+
+   /* Match '[' */
+   if (!Parse_String(parseState, "]"))
+      RETURN_ERROR1("Expected ]");
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_OutputReg(struct parse_state *parseState, GLint *outputRegNum)
+{
+   GLubyte token[100];
+
+   /* Match "o[" */
+   if (!Parse_String(parseState, "o["))
+      RETURN_ERROR1("Expected o[");
+
+   /* Get output reg name */
+   if (!Parse_Token(parseState, token))
+      RETURN_ERROR;
+
+   /* try to match an output register name */
+   if (strcmp((char *) token, "COLR") == 0 ||
+       strcmp((char *) token, "COLH") == 0) {
+      /* note that we don't distinguish between COLR and COLH */
+      *outputRegNum = FRAG_RESULT_COLOR;
+      parseState->outputsWritten |= (1 << FRAG_RESULT_COLOR);
+   }
+   else if (strcmp((char *) token, "DEPR") == 0) {
+      *outputRegNum = FRAG_RESULT_DEPTH;
+      parseState->outputsWritten |= (1 << FRAG_RESULT_DEPTH);
+   }
+   else {
+      RETURN_ERROR1("Invalid output register name");
+   }
+
+   /* Match ']' */
+   if (!Parse_String(parseState, "]"))
+      RETURN_ERROR1("Expected ]");
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_MaskedDstReg(struct parse_state *parseState,
+                   struct prog_dst_register *dstReg)
+{
+   GLubyte token[100];
+   GLint idx;
+
+   /* Dst reg can be R<n>, H<n>, o[n], RC or HC */
+   if (!Peek_Token(parseState, token))
+      RETURN_ERROR;
+
+   if (strcmp((const char *) token, "RC") == 0 ||
+       strcmp((const char *) token, "HC") == 0) {
+      /* a write-only register */
+      dstReg->File = PROGRAM_WRITE_ONLY;
+      if (!Parse_DummyReg(parseState, &idx))
+         RETURN_ERROR;
+      dstReg->Index = idx;
+   }
+   else if (token[0] == 'R' || token[0] == 'H') {
+      /* a temporary register */
+      dstReg->File = PROGRAM_TEMPORARY;
+      if (!Parse_TempReg(parseState, &idx))
+         RETURN_ERROR;
+      dstReg->Index = idx;
+   }
+   else if (token[0] == 'o') {
+      /* an output register */
+      dstReg->File = PROGRAM_OUTPUT;
+      if (!Parse_OutputReg(parseState, &idx))
+         RETURN_ERROR;
+      dstReg->Index = idx;
+   }
+   else {
+      RETURN_ERROR1("Invalid destination register name");
+   }
+
+   /* Parse optional write mask */
+   if (Parse_String(parseState, ".")) {
+      /* got a mask */
+      GLint k = 0;
+
+      if (!Parse_Token(parseState, token))  /* get xyzw writemask */
+         RETURN_ERROR;
+
+      dstReg->WriteMask = 0;
+
+      if (token[k] == 'x') {
+         dstReg->WriteMask |= WRITEMASK_X;
+         k++;
+      }
+      if (token[k] == 'y') {
+         dstReg->WriteMask |= WRITEMASK_Y;
+         k++;
+      }
+      if (token[k] == 'z') {
+         dstReg->WriteMask |= WRITEMASK_Z;
+         k++;
+      }
+      if (token[k] == 'w') {
+         dstReg->WriteMask |= WRITEMASK_W;
+         k++;
+      }
+      if (k == 0) {
+         RETURN_ERROR1("Invalid writemask character");
+      }
+
+   }
+   else {
+      dstReg->WriteMask = WRITEMASK_XYZW;
+   }
+
+   /* optional condition code mask */
+   if (Parse_String(parseState, "(")) {
+      /* ("EQ" | "GE" | "GT" | "LE" | "LT" | "NE" | "TR" | "FL".x|y|z|w) */
+      /* ("EQ" | "GE" | "GT" | "LE" | "LT" | "NE" | "TR" | "FL".[xyzw]) */
+      if (!Parse_CondCodeMask(parseState, dstReg))
+         RETURN_ERROR;
+
+      if (!Parse_String(parseState, ")"))  /* consume ")" */
+         RETURN_ERROR1("Expected )");
+
+      return GL_TRUE;
+   }
+   else {
+      /* no cond code mask */
+      dstReg->CondMask = COND_TR;
+      dstReg->CondSwizzle = SWIZZLE_NOOP;
+      return GL_TRUE;
+   }
+}
+
+
+/**
+ * Parse a vector source (register, constant, etc):
+ *   <vectorSrc>    ::= <absVectorSrc>
+ *                    | <baseVectorSrc>
+ *   <absVectorSrc> ::= <negate> "|" <baseVectorSrc> "|"
+ */
+static GLboolean
+Parse_VectorSrc(struct parse_state *parseState,
+                struct prog_src_register *srcReg)
+{
+   GLfloat sign = 1.0F;
+   GLubyte token[100];
+   GLint idx;
+   GLuint negateBase, negateAbs;
+
+   /*
+    * First, take care of +/- and absolute value stuff.
+    */
+   if (Parse_String(parseState, "-"))
+      sign = -1.0F;
+   else if (Parse_String(parseState, "+"))
+      sign = +1.0F;
+
+   if (Parse_String(parseState, "|")) {
+      srcReg->Abs = GL_TRUE;
+      negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
+
+      if (Parse_String(parseState, "-"))
+         negateBase = NEGATE_XYZW;
+      else if (Parse_String(parseState, "+"))
+         negateBase = NEGATE_NONE;
+      else
+         negateBase = NEGATE_NONE;
+   }
+   else {
+      srcReg->Abs = GL_FALSE;
+      negateAbs = NEGATE_NONE;
+      negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
+   }
+
+   srcReg->Negate = srcReg->Abs ? negateAbs : negateBase;
+
+   /* This should be the real src vector/register name */
+   if (!Peek_Token(parseState, token))
+      RETURN_ERROR;
+
+   /* Src reg can be Rn, Hn, f[n], p[n], a named parameter, a scalar
+    * literal or vector literal.
+    */
+   if (token[0] == 'R' || token[0] == 'H') {
+      srcReg->File = PROGRAM_TEMPORARY;
+      if (!Parse_TempReg(parseState, &idx))
+         RETURN_ERROR;
+      srcReg->Index = idx;
+   }
+   else if (token[0] == 'f') {
+      /* XXX this might be an identifier! */
+      srcReg->File = PROGRAM_INPUT;
+      if (!Parse_FragReg(parseState, &idx))
+         RETURN_ERROR;
+      srcReg->Index = idx;
+   }
+   else if (token[0] == 'p') {
+      /* XXX this might be an identifier! */
+      srcReg->File = PROGRAM_LOCAL_PARAM;
+      if (!Parse_ProgramParamReg(parseState, &idx))
+         RETURN_ERROR;
+      srcReg->Index = idx;
+   }
+   else if (IsLetter(token[0])){
+      GLubyte ident[100];
+      GLint paramIndex;
+      if (!Parse_Identifier(parseState, ident))
+         RETURN_ERROR;
+      paramIndex = _mesa_lookup_parameter_index(parseState->parameters,
+                                                -1, (const char *) ident);
+      if (paramIndex < 0) {
+         RETURN_ERROR2("Undefined constant or parameter: ", ident);
+      }
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      srcReg->Index = paramIndex;      
+   }
+   else if (IsDigit(token[0]) || token[0] == '-' || token[0] == '+' || token[0] == '.'){
+      /* literal scalar constant */
+      GLfloat values[4];
+      GLuint paramIndex;
+      if (!Parse_ScalarConstant(parseState, values))
+         RETURN_ERROR;
+      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
+                                              (gl_constant_value *) values,
+                                              4, NULL);
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      srcReg->Index = paramIndex;
+   }
+   else if (token[0] == '{'){
+      /* literal vector constant */
+      GLfloat values[4];
+      GLuint paramIndex;
+      (void) Parse_String(parseState, "{");
+      if (!Parse_VectorConstant(parseState, values))
+         RETURN_ERROR;
+      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
+                                              (gl_constant_value *) values,
+                                              4, NULL);
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      srcReg->Index = paramIndex;      
+   }
+   else {
+      RETURN_ERROR2("Invalid source register name", token);
+   }
+
+   /* init swizzle fields */
+   srcReg->Swizzle = SWIZZLE_NOOP;
+
+   /* Look for optional swizzle suffix */
+   if (Parse_String(parseState, ".")) {
+      GLuint swz[4];
+
+      if (!Parse_Token(parseState, token))
+         RETURN_ERROR;
+
+      if (!Parse_SwizzleSuffix(token, swz))
+         RETURN_ERROR1("Invalid swizzle suffix");
+
+      srcReg->Swizzle = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+   }
+
+   /* Finish absolute value */
+   if (srcReg->Abs && !Parse_String(parseState, "|")) {
+      RETURN_ERROR1("Expected |");
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_ScalarSrcReg(struct parse_state *parseState,
+                   struct prog_src_register *srcReg)
+{
+   GLubyte token[100];
+   GLfloat sign = 1.0F;
+   GLboolean needSuffix = GL_TRUE;
+   GLint idx;
+   GLuint negateBase, negateAbs;
+
+   /*
+    * First, take care of +/- and absolute value stuff.
+    */
+   if (Parse_String(parseState, "-"))
+      sign = -1.0F;
+   else if (Parse_String(parseState, "+"))
+      sign = +1.0F;
+
+   if (Parse_String(parseState, "|")) {
+      srcReg->Abs = GL_TRUE;
+      negateAbs = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
+
+      if (Parse_String(parseState, "-"))
+         negateBase = NEGATE_XYZW;
+      else if (Parse_String(parseState, "+"))
+         negateBase = NEGATE_NONE;
+      else
+         negateBase = NEGATE_NONE;
+   }
+   else {
+      srcReg->Abs = GL_FALSE;
+      negateAbs = NEGATE_NONE;
+      negateBase = (sign < 0.0F) ? NEGATE_XYZW : NEGATE_NONE;
+   }
+
+   srcReg->Negate = srcReg->Abs ? negateAbs : negateBase;
+
+   if (!Peek_Token(parseState, token))
+      RETURN_ERROR;
+
+   /* Src reg can be R<n>, H<n> or a named fragment attrib */
+   if (token[0] == 'R' || token[0] == 'H') {
+      srcReg->File = PROGRAM_TEMPORARY;
+      if (!Parse_TempReg(parseState, &idx))
+         RETURN_ERROR;
+      srcReg->Index = idx;
+   }
+   else if (token[0] == 'f') {
+      srcReg->File = PROGRAM_INPUT;
+      if (!Parse_FragReg(parseState, &idx))
+         RETURN_ERROR;
+      srcReg->Index = idx;
+   }
+   else if (token[0] == '{') {
+      /* vector literal */
+      GLfloat values[4];
+      GLuint paramIndex;
+      (void) Parse_String(parseState, "{");
+      if (!Parse_VectorConstant(parseState, values))
+         RETURN_ERROR;
+      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
+                                              (gl_constant_value *) values,
+                                              4, NULL);
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      srcReg->Index = paramIndex;      
+   }
+   else if (IsLetter(token[0])){
+      /* named param/constant */
+      GLubyte ident[100];
+      GLint paramIndex;
+      if (!Parse_Identifier(parseState, ident))
+         RETURN_ERROR;
+      paramIndex = _mesa_lookup_parameter_index(parseState->parameters,
+                                                -1, (const char *) ident);
+      if (paramIndex < 0) {
+         RETURN_ERROR2("Undefined constant or parameter: ", ident);
+      }
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      srcReg->Index = paramIndex;      
+   }
+   else if (IsDigit(token[0])) {
+      /* scalar literal */
+      GLfloat values[4];
+      GLuint paramIndex;
+      if (!Parse_ScalarConstant(parseState, values))
+         RETURN_ERROR;
+      paramIndex = _mesa_add_unnamed_constant(parseState->parameters,
+                                              (gl_constant_value *) values,
+                                              4, NULL);
+      srcReg->Index = paramIndex;      
+      srcReg->File = PROGRAM_NAMED_PARAM;
+      needSuffix = GL_FALSE;
+   }
+   else {
+      RETURN_ERROR2("Invalid scalar source argument", token);
+   }
+
+   srcReg->Swizzle = 0;
+   if (needSuffix) {
+      /* parse .[xyzw] suffix */
+      if (!Parse_String(parseState, "."))
+         RETURN_ERROR1("Expected .");
+
+      if (!Parse_Token(parseState, token))
+         RETURN_ERROR;
+
+      if (token[0] == 'x' && token[1] == 0) {
+         srcReg->Swizzle = 0;
+      }
+      else if (token[0] == 'y' && token[1] == 0) {
+         srcReg->Swizzle = 1;
+      }
+      else if (token[0] == 'z' && token[1] == 0) {
+         srcReg->Swizzle = 2;
+      }
+      else if (token[0] == 'w' && token[1] == 0) {
+         srcReg->Swizzle = 3;
+      }
+      else {
+         RETURN_ERROR1("Invalid scalar source suffix");
+      }
+   }
+
+   /* Finish absolute value */
+   if (srcReg->Abs && !Parse_String(parseState, "|")) {
+      RETURN_ERROR1("Expected |");
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_PrintInstruction(struct parse_state *parseState,
+                       struct prog_instruction *inst)
+{
+   const GLubyte *str;
+   GLubyte *msg;
+   GLuint len;
+   GLint idx;
+
+   /* The first argument is a literal string 'just like this' */
+   if (!Parse_String(parseState, "'"))
+      RETURN_ERROR1("Expected '");
+
+   str = parseState->pos;
+   for (len = 0; str[len] != '\''; len++) /* find closing quote */
+      ;
+   parseState->pos += len + 1;
+   msg = (GLubyte*) malloc(len + 1);
+
+   memcpy(msg, str, len);
+   msg[len] = 0;
+   inst->Data = msg;
+
+   if (Parse_String(parseState, ",")) {
+      /* got an optional register to print */
+      GLubyte token[100];
+      GetToken(parseState, token);
+      if (token[0] == 'o') {
+         /* dst reg */
+         if (!Parse_OutputReg(parseState, &idx))
+            RETURN_ERROR;
+	 inst->SrcReg[0].Index = idx;
+         inst->SrcReg[0].File = PROGRAM_OUTPUT;
+      }
+      else {
+         /* src reg */
+         if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+            RETURN_ERROR;
+      }
+   }
+   else {
+      inst->SrcReg[0].File = PROGRAM_UNDEFINED;
+   }
+
+   inst->SrcReg[0].Swizzle = SWIZZLE_NOOP;
+   inst->SrcReg[0].Abs = GL_FALSE;
+   inst->SrcReg[0].Negate = NEGATE_NONE;
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+Parse_InstructionSequence(struct parse_state *parseState,
+                          struct prog_instruction program[])
+{
+   while (1) {
+      struct prog_instruction *inst = program + parseState->numInst;
+      struct instruction_pattern instMatch;
+      GLubyte token[100];
+
+      /* Initialize the instruction */
+      _mesa_init_instructions(inst, 1);
+
+      /* special instructions */
+      if (Parse_String(parseState, "DEFINE")) {
+         GLubyte id[100];
+         GLfloat value[7];  /* yes, 7 to be safe */
+         if (!Parse_Identifier(parseState, id))
+            RETURN_ERROR;
+         /* XXX make sure id is not a reserved identifer, like R9 */
+         if (!Parse_String(parseState, "="))
+            RETURN_ERROR1("Expected =");
+         if (!Parse_VectorOrScalarConstant(parseState, value))
+            RETURN_ERROR;
+         if (!Parse_String(parseState, ";"))
+            RETURN_ERROR1("Expected ;");
+         if (_mesa_lookup_parameter_index(parseState->parameters,
+                                          -1, (const char *) id) >= 0) {
+            RETURN_ERROR2(id, "already defined");
+         }
+         _mesa_add_named_parameter(parseState->parameters,
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
+      }
+      else if (Parse_String(parseState, "DECLARE")) {
+         GLubyte id[100];
+         GLfloat value[7] = {0, 0, 0, 0, 0, 0, 0};  /* yes, to be safe */
+         if (!Parse_Identifier(parseState, id))
+            RETURN_ERROR;
+         /* XXX make sure id is not a reserved identifer, like R9 */
+         if (Parse_String(parseState, "=")) {
+            if (!Parse_VectorOrScalarConstant(parseState, value))
+               RETURN_ERROR;
+         }
+         if (!Parse_String(parseState, ";"))
+            RETURN_ERROR1("Expected ;");
+         if (_mesa_lookup_parameter_index(parseState->parameters,
+                                          -1, (const char *) id) >= 0) {
+            RETURN_ERROR2(id, "already declared");
+         }
+         _mesa_add_named_parameter(parseState->parameters,
+                                   (const char *) id,
+                                   (gl_constant_value *) value);
+      }
+      else if (Parse_String(parseState, "END")) {
+         inst->Opcode = OPCODE_END;
+         parseState->numInst++;
+         if (Parse_Token(parseState, token)) {
+            RETURN_ERROR1("Code after END opcode.");
+         }
+         break;
+      }
+      else {
+         /* general/arithmetic instruction */
+
+         /* get token */
+         if (!Parse_Token(parseState, token)) {
+            RETURN_ERROR1("Missing END instruction.");
+         }
+
+         /* try to find matching instuction */
+         instMatch = MatchInstruction(token);
+         if (instMatch.opcode >= MAX_OPCODE) {
+            /* bad instruction name */
+            RETURN_ERROR2("Unexpected token: ", token);
+         }
+
+         inst->Opcode = instMatch.opcode;
+         inst->Precision = instMatch.suffixes & (_R | _H | _X);
+         inst->SaturateMode = (instMatch.suffixes & (_S))
+            ? SATURATE_ZERO_ONE : SATURATE_OFF;
+         inst->CondUpdate = (instMatch.suffixes & (_C)) ? GL_TRUE : GL_FALSE;
+
+         /*
+          * parse the input and output operands
+          */
+         if (instMatch.outputs == OUTPUT_S || instMatch.outputs == OUTPUT_V) {
+            if (!Parse_MaskedDstReg(parseState, &inst->DstReg))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+         }
+         else if (instMatch.outputs == OUTPUT_NONE) {
+            if (instMatch.opcode == OPCODE_KIL_NV) {
+               /* This is a little weird, the cond code info is in
+                * the dest register.
+                */
+               if (!Parse_CondCodeMask(parseState, &inst->DstReg))
+                  RETURN_ERROR;
+            }
+            else {
+               ASSERT(instMatch.opcode == OPCODE_PRINT);
+            }
+         }
+
+         if (instMatch.inputs == INPUT_1V) {
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+         }
+         else if (instMatch.inputs == INPUT_2V) {
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
+               RETURN_ERROR;
+         }
+         else if (instMatch.inputs == INPUT_3V) {
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[2]))
+               RETURN_ERROR;
+         }
+         else if (instMatch.inputs == INPUT_1S) {
+            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+         }
+         else if (instMatch.inputs == INPUT_2S) {
+            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_ScalarSrcReg(parseState, &inst->SrcReg[1]))
+               RETURN_ERROR;
+         }
+         else if (instMatch.inputs == INPUT_CC) {
+            /* XXX to-do */
+         }
+         else if (instMatch.inputs == INPUT_1V_T) {
+	    GLubyte unit, idx;
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_TextureImageId(parseState, &unit, &idx))
+               RETURN_ERROR;
+	    inst->TexSrcUnit = unit;
+	    inst->TexSrcTarget = idx;
+         }
+         else if (instMatch.inputs == INPUT_3V_T) {
+	    GLubyte unit, idx;
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[0]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[1]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_VectorSrc(parseState, &inst->SrcReg[2]))
+               RETURN_ERROR;
+            if (!Parse_String(parseState, ","))
+               RETURN_ERROR1("Expected ,");
+            if (!Parse_TextureImageId(parseState, &unit, &idx))
+               RETURN_ERROR;
+	    inst->TexSrcUnit = unit;
+	    inst->TexSrcTarget = idx;
+         }
+         else if (instMatch.inputs == INPUT_1V_S) {
+            if (!Parse_PrintInstruction(parseState, inst))
+               RETURN_ERROR;
+         }
+
+         /* end of statement semicolon */
+         if (!Parse_String(parseState, ";"))
+            RETURN_ERROR1("Expected ;");
+
+         parseState->numInst++;
+
+         if (parseState->numInst >= MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS)
+            RETURN_ERROR1("Program too long");
+      }
+   }
+   return GL_TRUE;
+}
+
+
+
+/**
+ * Parse/compile the 'str' returning the compiled 'program'.
+ * ctx->Program.ErrorPos will be -1 if successful.  Otherwise, ErrorPos
+ * indicates the position of the error in 'str'.
+ */
+void
+_mesa_parse_nv_fragment_program(struct gl_context *ctx, GLenum dstTarget,
+                                const GLubyte *str, GLsizei len,
+                                struct gl_fragment_program *program)
+{
+   struct parse_state parseState;
+   struct prog_instruction instBuffer[MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS];
+   struct prog_instruction *newInst;
+   GLenum target;
+   GLubyte *programString;
+
+   /* Make a null-terminated copy of the program string */
+   programString = (GLubyte *) MALLOC(len + 1);
+   if (!programString) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
+      return;
+   }
+   memcpy(programString, str, len);
+   programString[len] = 0;
+
+   /* Get ready to parse */
+   memset(&parseState, 0, sizeof(struct parse_state));
+   parseState.ctx = ctx;
+   parseState.start = programString;
+   parseState.program = program;
+   parseState.numInst = 0;
+   parseState.curLine = programString;
+   parseState.parameters = _mesa_new_parameter_list();
+
+   /* Reset error state */
+   _mesa_set_program_error(ctx, -1, NULL);
+
+   /* check the program header */
+   if (strncmp((const char *) programString, "!!FP1.0", 7) == 0) {
+      target = GL_FRAGMENT_PROGRAM_NV;
+      parseState.pos = programString + 7;
+   }
+   else if (strncmp((const char *) programString, "!!FCP1.0", 8) == 0) {
+      /* fragment / register combiner program - not supported */
+      _mesa_set_program_error(ctx, 0, "Invalid fragment program header");
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)");
+      return;
+   }
+   else {
+      /* invalid header */
+      _mesa_set_program_error(ctx, 0, "Invalid fragment program header");
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV(bad header)");
+      return;
+   }
+
+   /* make sure target and header match */
+   if (target != dstTarget) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glLoadProgramNV(target mismatch 0x%x != 0x%x)",
+                  target, dstTarget);
+      return;
+   }
+
+   if (Parse_InstructionSequence(&parseState, instBuffer)) {
+      GLuint u;
+      /* successful parse! */
+
+      if (parseState.outputsWritten == 0) {
+         /* must write at least one output! */
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "Invalid fragment program - no outputs written.");
+         return;
+      }
+
+      /* copy the compiled instructions */
+      assert(parseState.numInst <= MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS);
+      newInst = _mesa_alloc_instructions(parseState.numInst);
+      if (!newInst) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glLoadProgramNV");
+         return;  /* out of memory */
+      }
+      _mesa_copy_instructions(newInst, instBuffer, parseState.numInst);
+
+      /* install the program */
+      program->Base.Target = target;
+      if (program->Base.String) {
+         FREE(program->Base.String);
+      }
+      program->Base.String = programString;
+      program->Base.Format = GL_PROGRAM_FORMAT_ASCII_ARB;
+      if (program->Base.Instructions) {
+         free(program->Base.Instructions);
+      }
+      program->Base.Instructions = newInst;
+      program->Base.NumInstructions = parseState.numInst;
+      program->Base.InputsRead = parseState.inputsRead;
+      program->Base.OutputsWritten = parseState.outputsWritten;
+      for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++)
+         program->Base.TexturesUsed[u] = parseState.texturesUsed[u];
+
+      /* save program parameters */
+      program->Base.Parameters = parseState.parameters;
+
+      /* allocate registers for declared program parameters */
+#if 00
+      _mesa_assign_program_registers(&(program->SymbolTable));
+#endif
+
+#ifdef DEBUG_foo
+      printf("--- glLoadProgramNV(%d) result ---\n", program->Base.Id);
+      _mesa_fprint_program_opt(stdout, &program->Base, PROG_PRINT_NV, 0);
+      printf("----------------------------------\n");
+#endif
+   }
+   else {
+      /* Error! */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glLoadProgramNV");
+      /* NOTE: _mesa_set_program_error would have been called already */
+   }
+}
+
+
+const char *
+_mesa_nv_fragment_input_register_name(GLuint i)
+{
+   ASSERT(i < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
+   return InputRegisters[i];
+}
+
diff --git a/mesalib/src/mesa/program/prog_execute.c b/mesalib/src/mesa/program/prog_execute.c
index 28a3e1e20..dbfd1b918 100644
--- a/mesalib/src/mesa/program/prog_execute.c
+++ b/mesalib/src/mesa/program/prog_execute.c
@@ -1,1895 +1,1895 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.3
- *
- * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file prog_execute.c
- * Software interpreter for vertex/fragment programs.
- * \author Brian Paul
- */
-
-/*
- * NOTE: we do everything in single-precision floating point; we don't
- * currently observe the single/half/fixed-precision qualifiers.
- *
- */
-
-
-#include "main/glheader.h"
-#include "main/colormac.h"
-#include "main/macros.h"
-#include "prog_execute.h"
-#include "prog_instruction.h"
-#include "prog_parameter.h"
-#include "prog_print.h"
-#include "prog_noise.h"
-
-
-/* debug predicate */
-#define DEBUG_PROG 0
-
-
-/**
- * Set x to positive or negative infinity.
- */
-#if defined(USE_IEEE) || defined(_WIN32)
-#define SET_POS_INFINITY(x)                  \
-   do {                                      \
-         fi_type fi;                         \
-         fi.i = 0x7F800000;                  \
-         x = fi.f;                           \
-   } while (0)
-#define SET_NEG_INFINITY(x)                  \
-   do {                                      \
-         fi_type fi;                         \
-         fi.i = 0xFF800000;                  \
-         x = fi.f;                           \
-   } while (0)
-#elif defined(VMS)
-#define SET_POS_INFINITY(x)  x = __MAXFLOAT
-#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
-#else
-#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
-#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
-#endif
-
-#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
-
-
-static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
-
-
-
-/**
- * Return TRUE for +0 and other positive values, FALSE otherwise.
- * Used for RCC opcode.
- */
-static INLINE GLboolean
-positive(float x)
-{
-   fi_type fi;
-   fi.f = x;
-   if (fi.i & 0x80000000)
-      return GL_FALSE;
-   return GL_TRUE;
-}
-
-
-
-/**
- * Return a pointer to the 4-element float vector specified by the given
- * source register.
- */
-static INLINE const GLfloat *
-get_src_register_pointer(const struct prog_src_register *source,
-                         const struct gl_program_machine *machine)
-{
-   const struct gl_program *prog = machine->CurProgram;
-   GLint reg = source->Index;
-
-   if (source->RelAddr) {
-      /* add address register value to src index/offset */
-      reg += machine->AddressReg[0][0];
-      if (reg < 0) {
-         return ZeroVec;
-      }
-   }
-
-   switch (source->File) {
-   case PROGRAM_TEMPORARY:
-      if (reg >= MAX_PROGRAM_TEMPS)
-         return ZeroVec;
-      return machine->Temporaries[reg];
-
-   case PROGRAM_INPUT:
-      if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
-         if (reg >= VERT_ATTRIB_MAX)
-            return ZeroVec;
-         return machine->VertAttribs[reg];
-      }
-      else {
-         if (reg >= FRAG_ATTRIB_MAX)
-            return ZeroVec;
-         return machine->Attribs[reg][machine->CurElement];
-      }
-
-   case PROGRAM_OUTPUT:
-      if (reg >= MAX_PROGRAM_OUTPUTS)
-         return ZeroVec;
-      return machine->Outputs[reg];
-
-   case PROGRAM_LOCAL_PARAM:
-      if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
-         return ZeroVec;
-      return machine->CurProgram->LocalParams[reg];
-
-   case PROGRAM_ENV_PARAM:
-      if (reg >= MAX_PROGRAM_ENV_PARAMS)
-         return ZeroVec;
-      return machine->EnvParams[reg];
-
-   case PROGRAM_STATE_VAR:
-      /* Fallthrough */
-   case PROGRAM_CONSTANT:
-      /* Fallthrough */
-   case PROGRAM_UNIFORM:
-      /* Fallthrough */
-   case PROGRAM_NAMED_PARAM:
-      if (reg >= (GLint) prog->Parameters->NumParameters)
-         return ZeroVec;
-      return prog->Parameters->ParameterValues[reg];
-
-   case PROGRAM_SYSTEM_VALUE:
-      assert(reg < Elements(machine->SystemValues));
-      return machine->SystemValues[reg];
-
-   default:
-      _mesa_problem(NULL,
-         "Invalid src register file %d in get_src_register_pointer()",
-         source->File);
-      return NULL;
-   }
-}
-
-
-/**
- * Return a pointer to the 4-element float vector specified by the given
- * destination register.
- */
-static INLINE GLfloat *
-get_dst_register_pointer(const struct prog_dst_register *dest,
-                         struct gl_program_machine *machine)
-{
-   static GLfloat dummyReg[4];
-   GLint reg = dest->Index;
-
-   if (dest->RelAddr) {
-      /* add address register value to src index/offset */
-      reg += machine->AddressReg[0][0];
-      if (reg < 0) {
-         return dummyReg;
-      }
-   }
-
-   switch (dest->File) {
-   case PROGRAM_TEMPORARY:
-      if (reg >= MAX_PROGRAM_TEMPS)
-         return dummyReg;
-      return machine->Temporaries[reg];
-
-   case PROGRAM_OUTPUT:
-      if (reg >= MAX_PROGRAM_OUTPUTS)
-         return dummyReg;
-      return machine->Outputs[reg];
-
-   case PROGRAM_WRITE_ONLY:
-      return dummyReg;
-
-   default:
-      _mesa_problem(NULL,
-         "Invalid dest register file %d in get_dst_register_pointer()",
-         dest->File);
-      return NULL;
-   }
-}
-
-
-
-/**
- * Fetch a 4-element float vector from the given source register.
- * Apply swizzling and negating as needed.
- */
-static void
-fetch_vector4(const struct prog_src_register *source,
-              const struct gl_program_machine *machine, GLfloat result[4])
-{
-   const GLfloat *src = get_src_register_pointer(source, machine);
-   ASSERT(src);
-
-   if (source->Swizzle == SWIZZLE_NOOP) {
-      /* no swizzling */
-      COPY_4V(result, src);
-   }
-   else {
-      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
-      result[0] = src[GET_SWZ(source->Swizzle, 0)];
-      result[1] = src[GET_SWZ(source->Swizzle, 1)];
-      result[2] = src[GET_SWZ(source->Swizzle, 2)];
-      result[3] = src[GET_SWZ(source->Swizzle, 3)];
-   }
-
-   if (source->Abs) {
-      result[0] = FABSF(result[0]);
-      result[1] = FABSF(result[1]);
-      result[2] = FABSF(result[2]);
-      result[3] = FABSF(result[3]);
-   }
-   if (source->Negate) {
-      ASSERT(source->Negate == NEGATE_XYZW);
-      result[0] = -result[0];
-      result[1] = -result[1];
-      result[2] = -result[2];
-      result[3] = -result[3];
-   }
-
-#ifdef NAN_CHECK
-   assert(!IS_INF_OR_NAN(result[0]));
-   assert(!IS_INF_OR_NAN(result[0]));
-   assert(!IS_INF_OR_NAN(result[0]));
-   assert(!IS_INF_OR_NAN(result[0]));
-#endif
-}
-
-
-/**
- * Fetch a 4-element uint vector from the given source register.
- * Apply swizzling but not negation/abs.
- */
-static void
-fetch_vector4ui(const struct prog_src_register *source,
-                const struct gl_program_machine *machine, GLuint result[4])
-{
-   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
-   ASSERT(src);
-
-   if (source->Swizzle == SWIZZLE_NOOP) {
-      /* no swizzling */
-      COPY_4V(result, src);
-   }
-   else {
-      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
-      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
-      result[0] = src[GET_SWZ(source->Swizzle, 0)];
-      result[1] = src[GET_SWZ(source->Swizzle, 1)];
-      result[2] = src[GET_SWZ(source->Swizzle, 2)];
-      result[3] = src[GET_SWZ(source->Swizzle, 3)];
-   }
-
-   /* Note: no Negate or Abs here */
-}
-
-
-
-/**
- * Fetch the derivative with respect to X or Y for the given register.
- * XXX this currently only works for fragment program input attribs.
- */
-static void
-fetch_vector4_deriv(struct gl_context * ctx,
-                    const struct prog_src_register *source,
-                    const struct gl_program_machine *machine,
-                    char xOrY, GLfloat result[4])
-{
-   if (source->File == PROGRAM_INPUT &&
-       source->Index < (GLint) machine->NumDeriv) {
-      const GLint col = machine->CurElement;
-      const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
-      const GLfloat invQ = 1.0f / w;
-      GLfloat deriv[4];
-
-      if (xOrY == 'X') {
-         deriv[0] = machine->DerivX[source->Index][0] * invQ;
-         deriv[1] = machine->DerivX[source->Index][1] * invQ;
-         deriv[2] = machine->DerivX[source->Index][2] * invQ;
-         deriv[3] = machine->DerivX[source->Index][3] * invQ;
-      }
-      else {
-         deriv[0] = machine->DerivY[source->Index][0] * invQ;
-         deriv[1] = machine->DerivY[source->Index][1] * invQ;
-         deriv[2] = machine->DerivY[source->Index][2] * invQ;
-         deriv[3] = machine->DerivY[source->Index][3] * invQ;
-      }
-
-      result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
-      result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
-      result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
-      result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
-      
-      if (source->Abs) {
-         result[0] = FABSF(result[0]);
-         result[1] = FABSF(result[1]);
-         result[2] = FABSF(result[2]);
-         result[3] = FABSF(result[3]);
-      }
-      if (source->Negate) {
-         ASSERT(source->Negate == NEGATE_XYZW);
-         result[0] = -result[0];
-         result[1] = -result[1];
-         result[2] = -result[2];
-         result[3] = -result[3];
-      }
-   }
-   else {
-      ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
-   }
-}
-
-
-/**
- * As above, but only return result[0] element.
- */
-static void
-fetch_vector1(const struct prog_src_register *source,
-              const struct gl_program_machine *machine, GLfloat result[4])
-{
-   const GLfloat *src = get_src_register_pointer(source, machine);
-   ASSERT(src);
-
-   result[0] = src[GET_SWZ(source->Swizzle, 0)];
-
-   if (source->Abs) {
-      result[0] = FABSF(result[0]);
-   }
-   if (source->Negate) {
-      result[0] = -result[0];
-   }
-}
-
-
-static GLuint
-fetch_vector1ui(const struct prog_src_register *source,
-                const struct gl_program_machine *machine)
-{
-   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
-   return src[GET_SWZ(source->Swizzle, 0)];
-}
-
-
-/**
- * Fetch texel from texture.  Use partial derivatives when possible.
- */
-static INLINE void
-fetch_texel(struct gl_context *ctx,
-            const struct gl_program_machine *machine,
-            const struct prog_instruction *inst,
-            const GLfloat texcoord[4], GLfloat lodBias,
-            GLfloat color[4])
-{
-   const GLuint unit = machine->Samplers[inst->TexSrcUnit];
-
-   /* Note: we only have the right derivatives for fragment input attribs.
-    */
-   if (machine->NumDeriv > 0 &&
-       inst->SrcReg[0].File == PROGRAM_INPUT &&
-       inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
-      /* simple texture fetch for which we should have derivatives */
-      GLuint attr = inst->SrcReg[0].Index;
-      machine->FetchTexelDeriv(ctx, texcoord,
-                               machine->DerivX[attr],
-                               machine->DerivY[attr],
-                               lodBias, unit, color);
-   }
-   else {
-      machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
-   }
-}
-
-
-/**
- * Test value against zero and return GT, LT, EQ or UN if NaN.
- */
-static INLINE GLuint
-generate_cc(float value)
-{
-   if (value != value)
-      return COND_UN;           /* NaN */
-   if (value > 0.0F)
-      return COND_GT;
-   if (value < 0.0F)
-      return COND_LT;
-   return COND_EQ;
-}
-
-
-/**
- * Test if the ccMaskRule is satisfied by the given condition code.
- * Used to mask destination writes according to the current condition code.
- */
-static INLINE GLboolean
-test_cc(GLuint condCode, GLuint ccMaskRule)
-{
-   switch (ccMaskRule) {
-   case COND_EQ: return (condCode == COND_EQ);
-   case COND_NE: return (condCode != COND_EQ);
-   case COND_LT: return (condCode == COND_LT);
-   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
-   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
-   case COND_GT: return (condCode == COND_GT);
-   case COND_TR: return GL_TRUE;
-   case COND_FL: return GL_FALSE;
-   default:      return GL_TRUE;
-   }
-}
-
-
-/**
- * Evaluate the 4 condition codes against a predicate and return GL_TRUE
- * or GL_FALSE to indicate result.
- */
-static INLINE GLboolean
-eval_condition(const struct gl_program_machine *machine,
-               const struct prog_instruction *inst)
-{
-   const GLuint swizzle = inst->DstReg.CondSwizzle;
-   const GLuint condMask = inst->DstReg.CondMask;
-   if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
-       test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
-       test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
-       test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
-      return GL_TRUE;
-   }
-   else {
-      return GL_FALSE;
-   }
-}
-
-
-
-/**
- * Store 4 floats into a register.  Observe the instructions saturate and
- * set-condition-code flags.
- */
-static void
-store_vector4(const struct prog_instruction *inst,
-              struct gl_program_machine *machine, const GLfloat value[4])
-{
-   const struct prog_dst_register *dstReg = &(inst->DstReg);
-   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
-   GLuint writeMask = dstReg->WriteMask;
-   GLfloat clampedValue[4];
-   GLfloat *dst = get_dst_register_pointer(dstReg, machine);
-
-#if 0
-   if (value[0] > 1.0e10 ||
-       IS_INF_OR_NAN(value[0]) ||
-       IS_INF_OR_NAN(value[1]) ||
-       IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
-      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
-#endif
-
-   if (clamp) {
-      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
-      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
-      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
-      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
-      value = clampedValue;
-   }
-
-   if (dstReg->CondMask != COND_TR) {
-      /* condition codes may turn off some writes */
-      if (writeMask & WRITEMASK_X) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_X;
-      }
-      if (writeMask & WRITEMASK_Y) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_Y;
-      }
-      if (writeMask & WRITEMASK_Z) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_Z;
-      }
-      if (writeMask & WRITEMASK_W) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_W;
-      }
-   }
-
-#ifdef NAN_CHECK
-   assert(!IS_INF_OR_NAN(value[0]));
-   assert(!IS_INF_OR_NAN(value[0]));
-   assert(!IS_INF_OR_NAN(value[0]));
-   assert(!IS_INF_OR_NAN(value[0]));
-#endif
-
-   if (writeMask & WRITEMASK_X)
-      dst[0] = value[0];
-   if (writeMask & WRITEMASK_Y)
-      dst[1] = value[1];
-   if (writeMask & WRITEMASK_Z)
-      dst[2] = value[2];
-   if (writeMask & WRITEMASK_W)
-      dst[3] = value[3];
-
-   if (inst->CondUpdate) {
-      if (writeMask & WRITEMASK_X)
-         machine->CondCodes[0] = generate_cc(value[0]);
-      if (writeMask & WRITEMASK_Y)
-         machine->CondCodes[1] = generate_cc(value[1]);
-      if (writeMask & WRITEMASK_Z)
-         machine->CondCodes[2] = generate_cc(value[2]);
-      if (writeMask & WRITEMASK_W)
-         machine->CondCodes[3] = generate_cc(value[3]);
-#if DEBUG_PROG
-      printf("CondCodes=(%s,%s,%s,%s) for:\n",
-             _mesa_condcode_string(machine->CondCodes[0]),
-             _mesa_condcode_string(machine->CondCodes[1]),
-             _mesa_condcode_string(machine->CondCodes[2]),
-             _mesa_condcode_string(machine->CondCodes[3]));
-#endif
-   }
-}
-
-
-/**
- * Store 4 uints into a register.  Observe the set-condition-code flags.
- */
-static void
-store_vector4ui(const struct prog_instruction *inst,
-                struct gl_program_machine *machine, const GLuint value[4])
-{
-   const struct prog_dst_register *dstReg = &(inst->DstReg);
-   GLuint writeMask = dstReg->WriteMask;
-   GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
-
-   if (dstReg->CondMask != COND_TR) {
-      /* condition codes may turn off some writes */
-      if (writeMask & WRITEMASK_X) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_X;
-      }
-      if (writeMask & WRITEMASK_Y) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_Y;
-      }
-      if (writeMask & WRITEMASK_Z) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_Z;
-      }
-      if (writeMask & WRITEMASK_W) {
-         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
-                      dstReg->CondMask))
-            writeMask &= ~WRITEMASK_W;
-      }
-   }
-
-   if (writeMask & WRITEMASK_X)
-      dst[0] = value[0];
-   if (writeMask & WRITEMASK_Y)
-      dst[1] = value[1];
-   if (writeMask & WRITEMASK_Z)
-      dst[2] = value[2];
-   if (writeMask & WRITEMASK_W)
-      dst[3] = value[3];
-
-   if (inst->CondUpdate) {
-      if (writeMask & WRITEMASK_X)
-         machine->CondCodes[0] = generate_cc((float)value[0]);
-      if (writeMask & WRITEMASK_Y)
-         machine->CondCodes[1] = generate_cc((float)value[1]);
-      if (writeMask & WRITEMASK_Z)
-         machine->CondCodes[2] = generate_cc((float)value[2]);
-      if (writeMask & WRITEMASK_W)
-         machine->CondCodes[3] = generate_cc((float)value[3]);
-#if DEBUG_PROG
-      printf("CondCodes=(%s,%s,%s,%s) for:\n",
-             _mesa_condcode_string(machine->CondCodes[0]),
-             _mesa_condcode_string(machine->CondCodes[1]),
-             _mesa_condcode_string(machine->CondCodes[2]),
-             _mesa_condcode_string(machine->CondCodes[3]));
-#endif
-   }
-}
-
-
-
-/**
- * Execute the given vertex/fragment program.
- *
- * \param ctx  rendering context
- * \param program  the program to execute
- * \param machine  machine state (must be initialized)
- * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
- */
-GLboolean
-_mesa_execute_program(struct gl_context * ctx,
-                      const struct gl_program *program,
-                      struct gl_program_machine *machine)
-{
-   const GLuint numInst = program->NumInstructions;
-   const GLuint maxExec = 10000;
-   GLuint pc, numExec = 0;
-
-   machine->CurProgram = program;
-
-   if (DEBUG_PROG) {
-      printf("execute program %u --------------------\n", program->Id);
-   }
-
-   if (program->Target == GL_VERTEX_PROGRAM_ARB) {
-      machine->EnvParams = ctx->VertexProgram.Parameters;
-   }
-   else {
-      machine->EnvParams = ctx->FragmentProgram.Parameters;
-   }
-
-   for (pc = 0; pc < numInst; pc++) {
-      const struct prog_instruction *inst = program->Instructions + pc;
-
-      if (DEBUG_PROG) {
-         _mesa_print_instruction(inst);
-      }
-
-      switch (inst->Opcode) {
-      case OPCODE_ABS:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] = FABSF(a[0]);
-            result[1] = FABSF(a[1]);
-            result[2] = FABSF(a[2]);
-            result[3] = FABSF(a[3]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_ADD:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] + b[0];
-            result[1] = a[1] + b[1];
-            result[2] = a[2] + b[2];
-            result[3] = a[3] + b[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_AND:     /* bitwise AND */
-         {
-            GLuint a[4], b[4], result[4];
-            fetch_vector4ui(&inst->SrcReg[0], machine, a);
-            fetch_vector4ui(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] & b[0];
-            result[1] = a[1] & b[1];
-            result[2] = a[2] & b[2];
-            result[3] = a[3] & b[3];
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_ARL:
-         {
-            GLfloat t[4];
-            fetch_vector4(&inst->SrcReg[0], machine, t);
-            machine->AddressReg[0][0] = IFLOOR(t[0]);
-            if (DEBUG_PROG) {
-               printf("ARL %d\n", machine->AddressReg[0][0]);
-            }
-         }
-         break;
-      case OPCODE_BGNLOOP:
-         /* no-op */
-         ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                == OPCODE_ENDLOOP);
-         break;
-      case OPCODE_ENDLOOP:
-         /* subtract 1 here since pc is incremented by for(pc) loop */
-         ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                == OPCODE_BGNLOOP);
-         pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
-         break;
-      case OPCODE_BGNSUB:      /* begin subroutine */
-         break;
-      case OPCODE_ENDSUB:      /* end subroutine */
-         break;
-      case OPCODE_BRA:         /* branch (conditional) */
-         if (eval_condition(machine, inst)) {
-            /* take branch */
-            /* Subtract 1 here since we'll do pc++ below */
-            pc = inst->BranchTarget - 1;
-         }
-         break;
-      case OPCODE_BRK:         /* break out of loop (conditional) */
-         ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                == OPCODE_ENDLOOP);
-         if (eval_condition(machine, inst)) {
-            /* break out of loop */
-            /* pc++ at end of for-loop will put us after the ENDLOOP inst */
-            pc = inst->BranchTarget;
-         }
-         break;
-      case OPCODE_CONT:        /* continue loop (conditional) */
-         ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                == OPCODE_ENDLOOP);
-         if (eval_condition(machine, inst)) {
-            /* continue at ENDLOOP */
-            /* Subtract 1 here since we'll do pc++ at end of for-loop */
-            pc = inst->BranchTarget - 1;
-         }
-         break;
-      case OPCODE_CAL:         /* Call subroutine (conditional) */
-         if (eval_condition(machine, inst)) {
-            /* call the subroutine */
-            if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
-               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
-            }
-            machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
-            /* Subtract 1 here since we'll do pc++ at end of for-loop */
-            pc = inst->BranchTarget - 1;
-         }
-         break;
-      case OPCODE_CMP:
-         {
-            GLfloat a[4], b[4], c[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            fetch_vector4(&inst->SrcReg[2], machine, c);
-            result[0] = a[0] < 0.0F ? b[0] : c[0];
-            result[1] = a[1] < 0.0F ? b[1] : c[1];
-            result[2] = a[2] < 0.0F ? b[2] : c[2];
-            result[3] = a[3] < 0.0F ? b[3] : c[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3],
-                      c[0], c[1], c[2], c[3]);
-            }
-         }
-         break;
-      case OPCODE_COS:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) cos(a[0]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_DDX:         /* Partial derivative with respect to X */
-         {
-            GLfloat result[4];
-            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
-                                'X', result);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_DDY:         /* Partial derivative with respect to Y */
-         {
-            GLfloat result[4];
-            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
-                                'Y', result);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_DP2:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("DP2 %g = (%g %g) . (%g %g)\n",
-                      result[0], a[0], a[1], b[0], b[1]);
-            }
-         }
-         break;
-      case OPCODE_DP2A:
-         {
-            GLfloat a[4], b[4], c, result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            fetch_vector1(&inst->SrcReg[1], machine, &c);
-            result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
-                      result[0], a[0], a[1], b[0], b[1], c);
-            }
-         }
-         break;
-      case OPCODE_DP3:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
-                      result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
-            }
-         }
-         break;
-      case OPCODE_DP4:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
-                      result[0], a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_DPH:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_DST:         /* Distance vector */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = 1.0F;
-            result[1] = a[1] * b[1];
-            result[2] = a[2];
-            result[3] = b[3];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_EXP:
-         {
-            GLfloat t[4], q[4], floor_t0;
-            fetch_vector1(&inst->SrcReg[0], machine, t);
-            floor_t0 = FLOORF(t[0]);
-            if (floor_t0 > FLT_MAX_EXP) {
-               SET_POS_INFINITY(q[0]);
-               SET_POS_INFINITY(q[2]);
-            }
-            else if (floor_t0 < FLT_MIN_EXP) {
-               q[0] = 0.0F;
-               q[2] = 0.0F;
-            }
-            else {
-               q[0] = LDEXPF(1.0, (int) floor_t0);
-               /* Note: GL_NV_vertex_program expects 
-                * result.z = result.x * APPX(result.y)
-                * We do what the ARB extension says.
-                */
-               q[2] = (GLfloat) pow(2.0, t[0]);
-            }
-            q[1] = t[0] - floor_t0;
-            q[3] = 1.0F;
-            store_vector4( inst, machine, q );
-         }
-         break;
-      case OPCODE_EX2:         /* Exponential base 2 */
-         {
-            GLfloat a[4], result[4], val;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            val = (GLfloat) pow(2.0, a[0]);
-            /*
-            if (IS_INF_OR_NAN(val))
-               val = 1.0e10;
-            */
-            result[0] = result[1] = result[2] = result[3] = val;
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_FLR:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] = FLOORF(a[0]);
-            result[1] = FLOORF(a[1]);
-            result[2] = FLOORF(a[2]);
-            result[3] = FLOORF(a[3]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_FRC:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] = a[0] - FLOORF(a[0]);
-            result[1] = a[1] - FLOORF(a[1]);
-            result[2] = a[2] - FLOORF(a[2]);
-            result[3] = a[3] - FLOORF(a[3]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_IF:
-         {
-            GLboolean cond;
-            ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                   == OPCODE_ELSE ||
-                   program->Instructions[inst->BranchTarget].Opcode
-                   == OPCODE_ENDIF);
-            /* eval condition */
-            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
-               GLfloat a[4];
-               fetch_vector1(&inst->SrcReg[0], machine, a);
-               cond = (a[0] != 0.0);
-            }
-            else {
-               cond = eval_condition(machine, inst);
-            }
-            if (DEBUG_PROG) {
-               printf("IF: %d\n", cond);
-            }
-            /* do if/else */
-            if (cond) {
-               /* do if-clause (just continue execution) */
-            }
-            else {
-               /* go to the instruction after ELSE or ENDIF */
-               assert(inst->BranchTarget >= 0);
-               pc = inst->BranchTarget;
-            }
-         }
-         break;
-      case OPCODE_ELSE:
-         /* goto ENDIF */
-         ASSERT(program->Instructions[inst->BranchTarget].Opcode
-                == OPCODE_ENDIF);
-         assert(inst->BranchTarget >= 0);
-         pc = inst->BranchTarget;
-         break;
-      case OPCODE_ENDIF:
-         /* nothing */
-         break;
-      case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
-         if (eval_condition(machine, inst)) {
-            return GL_FALSE;
-         }
-         break;
-      case OPCODE_KIL:         /* ARB_f_p only */
-         {
-            GLfloat a[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            if (DEBUG_PROG) {
-               printf("KIL if (%g %g %g %g) <= 0.0\n",
-                      a[0], a[1], a[2], a[3]);
-            }
-
-            if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
-               return GL_FALSE;
-            }
-         }
-         break;
-      case OPCODE_LG2:         /* log base 2 */
-         {
-            GLfloat a[4], result[4], val;
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-	    /* The fast LOG2 macro doesn't meet the precision requirements.
-	     */
-            if (a[0] == 0.0F) {
-               val = -FLT_MAX;
-            }
-            else {
-               val = (float)(log(a[0]) * 1.442695F);
-            }
-            result[0] = result[1] = result[2] = result[3] = val;
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_LIT:
-         {
-            const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            a[0] = MAX2(a[0], 0.0F);
-            a[1] = MAX2(a[1], 0.0F);
-            /* XXX ARB version clamps a[3], NV version doesn't */
-            a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
-            result[0] = 1.0F;
-            result[1] = a[0];
-            /* XXX we could probably just use pow() here */
-            if (a[0] > 0.0F) {
-               if (a[1] == 0.0 && a[3] == 0.0)
-                  result[2] = 1.0F;
-               else
-                  result[2] = (GLfloat) pow(a[1], a[3]);
-            }
-            else {
-               result[2] = 0.0F;
-            }
-            result[3] = 1.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3]);
-            }
-         }
-         break;
-      case OPCODE_LOG:
-         {
-            GLfloat t[4], q[4], abs_t0;
-            fetch_vector1(&inst->SrcReg[0], machine, t);
-            abs_t0 = FABSF(t[0]);
-            if (abs_t0 != 0.0F) {
-               /* Since we really can't handle infinite values on VMS
-                * like other OSes we'll use __MAXFLOAT to represent
-                * infinity.  This may need some tweaking.
-                */
-#ifdef VMS
-               if (abs_t0 == __MAXFLOAT)
-#else
-               if (IS_INF_OR_NAN(abs_t0))
-#endif
-               {
-                  SET_POS_INFINITY(q[0]);
-                  q[1] = 1.0F;
-                  SET_POS_INFINITY(q[2]);
-               }
-               else {
-                  int exponent;
-                  GLfloat mantissa = FREXPF(t[0], &exponent);
-                  q[0] = (GLfloat) (exponent - 1);
-                  q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
-
-		  /* The fast LOG2 macro doesn't meet the precision
-		   * requirements.
-		   */
-                  q[2] = (float)(log(t[0]) * 1.442695F);
-               }
-            }
-            else {
-               SET_NEG_INFINITY(q[0]);
-               q[1] = 1.0F;
-               SET_NEG_INFINITY(q[2]);
-            }
-            q[3] = 1.0;
-            store_vector4(inst, machine, q);
-         }
-         break;
-      case OPCODE_LRP:
-         {
-            GLfloat a[4], b[4], c[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            fetch_vector4(&inst->SrcReg[2], machine, c);
-            result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
-            result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
-            result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
-            result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("LRP (%g %g %g %g) = (%g %g %g %g), "
-                      "(%g %g %g %g), (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
-            }
-         }
-         break;
-      case OPCODE_MAD:
-         {
-            GLfloat a[4], b[4], c[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            fetch_vector4(&inst->SrcReg[2], machine, c);
-            result[0] = a[0] * b[0] + c[0];
-            result[1] = a[1] * b[1] + c[1];
-            result[2] = a[2] * b[2] + c[2];
-            result[3] = a[3] * b[3] + c[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
-                      "(%g %g %g %g) + (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
-            }
-         }
-         break;
-      case OPCODE_MAX:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = MAX2(a[0], b[0]);
-            result[1] = MAX2(a[1], b[1]);
-            result[2] = MAX2(a[2], b[2]);
-            result[3] = MAX2(a[3], b[3]);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_MIN:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = MIN2(a[0], b[0]);
-            result[1] = MIN2(a[1], b[1]);
-            result[2] = MIN2(a[2], b[2]);
-            result[3] = MIN2(a[3], b[3]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_MOV:
-         {
-            GLfloat result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, result);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("MOV (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3]);
-            }
-         }
-         break;
-      case OPCODE_MUL:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] * b[0];
-            result[1] = a[1] * b[1];
-            result[2] = a[2] * b[2];
-            result[3] = a[3] * b[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_NOISE1:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] =
-               result[1] =
-               result[2] =
-               result[3] = _mesa_noise1(a[0]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_NOISE2:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] =
-               result[1] =
-               result[2] = result[3] = _mesa_noise2(a[0], a[1]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_NOISE3:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] =
-               result[1] =
-               result[2] =
-               result[3] = _mesa_noise3(a[0], a[1], a[2]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_NOISE4:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] =
-               result[1] =
-               result[2] =
-               result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_NOP:
-         break;
-      case OPCODE_NOT:         /* bitwise NOT */
-         {
-            GLuint a[4], result[4];
-            fetch_vector4ui(&inst->SrcReg[0], machine, a);
-            result[0] = ~a[0];
-            result[1] = ~a[1];
-            result[2] = ~a[2];
-            result[3] = ~a[3];
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_NRM3:        /* 3-component normalization */
-         {
-            GLfloat a[4], result[4];
-            GLfloat tmp;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
-            if (tmp != 0.0F)
-               tmp = INV_SQRTF(tmp);
-            result[0] = tmp * a[0];
-            result[1] = tmp * a[1];
-            result[2] = tmp * a[2];
-            result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_NRM4:        /* 4-component normalization */
-         {
-            GLfloat a[4], result[4];
-            GLfloat tmp;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
-            if (tmp != 0.0F)
-               tmp = INV_SQRTF(tmp);
-            result[0] = tmp * a[0];
-            result[1] = tmp * a[1];
-            result[2] = tmp * a[2];
-            result[3] = tmp * a[3];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_OR:          /* bitwise OR */
-         {
-            GLuint a[4], b[4], result[4];
-            fetch_vector4ui(&inst->SrcReg[0], machine, a);
-            fetch_vector4ui(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] | b[0];
-            result[1] = a[1] | b[1];
-            result[2] = a[2] | b[2];
-            result[3] = a[3] | b[3];
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
-         {
-            GLfloat a[4];
-            GLuint result[4];
-            GLhalfNV hx, hy;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            hx = _mesa_float_to_half(a[0]);
-            hy = _mesa_float_to_half(a[1]);
-            result[0] =
-            result[1] =
-            result[2] =
-            result[3] = hx | (hy << 16);
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
-         {
-            GLfloat a[4];
-            GLuint result[4], usx, usy;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            a[0] = CLAMP(a[0], 0.0F, 1.0F);
-            a[1] = CLAMP(a[1], 0.0F, 1.0F);
-            usx = IROUND(a[0] * 65535.0F);
-            usy = IROUND(a[1] * 65535.0F);
-            result[0] =
-            result[1] =
-            result[2] =
-            result[3] = usx | (usy << 16);
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
-         {
-            GLfloat a[4];
-            GLuint result[4], ubx, uby, ubz, ubw;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
-            a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
-            a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
-            a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
-            ubx = IROUND(127.0F * a[0] + 128.0F);
-            uby = IROUND(127.0F * a[1] + 128.0F);
-            ubz = IROUND(127.0F * a[2] + 128.0F);
-            ubw = IROUND(127.0F * a[3] + 128.0F);
-            result[0] =
-            result[1] =
-            result[2] =
-            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
-         {
-            GLfloat a[4];
-            GLuint result[4], ubx, uby, ubz, ubw;
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            a[0] = CLAMP(a[0], 0.0F, 1.0F);
-            a[1] = CLAMP(a[1], 0.0F, 1.0F);
-            a[2] = CLAMP(a[2], 0.0F, 1.0F);
-            a[3] = CLAMP(a[3], 0.0F, 1.0F);
-            ubx = IROUND(255.0F * a[0]);
-            uby = IROUND(255.0F * a[1]);
-            ubz = IROUND(255.0F * a[2]);
-            ubw = IROUND(255.0F * a[3]);
-            result[0] =
-            result[1] =
-            result[2] =
-            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_POW:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            fetch_vector1(&inst->SrcReg[1], machine, b);
-            result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) pow(a[0], b[0]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_RCC:  /* clamped riciprocal */
-         {
-            const float largest = 1.884467e+19, smallest = 5.42101e-20;
-            GLfloat a[4], r, result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            if (DEBUG_PROG) {
-               if (a[0] == 0)
-                  printf("RCC(0)\n");
-               else if (IS_INF_OR_NAN(a[0]))
-                  printf("RCC(inf)\n");
-            }
-            if (a[0] == 1.0F) {
-               r = 1.0F;
-            }
-            else {
-               r = 1.0F / a[0];
-            }
-            if (positive(r)) {
-               if (r > largest) {
-                  r = largest;
-               }
-               else if (r < smallest) {
-                  r = smallest;
-               }
-            }
-            else {
-               if (r < -largest) {
-                  r = -largest;
-               }
-               else if (r > -smallest) {
-                  r = -smallest;
-               }
-            }
-            result[0] = result[1] = result[2] = result[3] = r;
-            store_vector4(inst, machine, result);
-         }
-         break;
-
-      case OPCODE_RCP:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            if (DEBUG_PROG) {
-               if (a[0] == 0)
-                  printf("RCP(0)\n");
-               else if (IS_INF_OR_NAN(a[0]))
-                  printf("RCP(inf)\n");
-            }
-            result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_RET:         /* return from subroutine (conditional) */
-         if (eval_condition(machine, inst)) {
-            if (machine->StackDepth == 0) {
-               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
-            }
-            /* subtract one because of pc++ in the for loop */
-            pc = machine->CallStack[--machine->StackDepth] - 1;
-         }
-         break;
-      case OPCODE_RFL:         /* reflection vector */
-         {
-            GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
-            fetch_vector4(&inst->SrcReg[0], machine, axis);
-            fetch_vector4(&inst->SrcReg[1], machine, dir);
-            tmpW = DOT3(axis, axis);
-            tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
-            result[0] = tmpX * axis[0] - dir[0];
-            result[1] = tmpX * axis[1] - dir[1];
-            result[2] = tmpX * axis[2] - dir[2];
-            /* result[3] is never written! XXX enforce in parser! */
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_RSQ:         /* 1 / sqrt() */
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            a[0] = FABSF(a[0]);
-            result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
-            }
-         }
-         break;
-      case OPCODE_SCS:         /* sine and cos */
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = (GLfloat) cos(a[0]);
-            result[1] = (GLfloat) sin(a[0]);
-            result[2] = 0.0;    /* undefined! */
-            result[3] = 0.0;    /* undefined! */
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_SEQ:         /* set on equal */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SFL:         /* set false, operands ignored */
-         {
-            static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_SGE:         /* set on greater or equal */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SGT:         /* set on greater */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SIN:
-         {
-            GLfloat a[4], result[4];
-            fetch_vector1(&inst->SrcReg[0], machine, a);
-            result[0] = result[1] = result[2] = result[3]
-               = (GLfloat) sin(a[0]);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_SLE:         /* set on less or equal */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SLT:         /* set on less */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SNE:         /* set on not equal */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
-            result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
-            result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
-            result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3],
-                      b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
-            result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
-            result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
-            result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_STR:         /* set true, operands ignored */
-         {
-            static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_SUB:
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] - b[0];
-            result[1] = a[1] - b[1];
-            result[2] = a[2] - b[2];
-            result[3] = a[3] - b[3];
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
-            }
-         }
-         break;
-      case OPCODE_SWZ:         /* extended swizzle */
-         {
-            const struct prog_src_register *source = &inst->SrcReg[0];
-            const GLfloat *src = get_src_register_pointer(source, machine);
-            GLfloat result[4];
-            GLuint i;
-            for (i = 0; i < 4; i++) {
-               const GLuint swz = GET_SWZ(source->Swizzle, i);
-               if (swz == SWIZZLE_ZERO)
-                  result[i] = 0.0;
-               else if (swz == SWIZZLE_ONE)
-                  result[i] = 1.0;
-               else {
-                  ASSERT(swz >= 0);
-                  ASSERT(swz <= 3);
-                  result[i] = src[swz];
-               }
-               if (source->Negate & (1 << i))
-                  result[i] = -result[i];
-            }
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_TEX:         /* Both ARB and NV frag prog */
-         /* Simple texel lookup */
-         {
-            GLfloat texcoord[4], color[4];
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-
-            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
-
-            if (DEBUG_PROG) {
-               printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
-                      color[0], color[1], color[2], color[3],
-                      inst->TexSrcUnit,
-                      texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
-            }
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TXB:         /* GL_ARB_fragment_program only */
-         /* Texel lookup with LOD bias */
-         {
-            GLfloat texcoord[4], color[4], lodBias;
-
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-
-            /* texcoord[3] is the bias to add to lambda */
-            lodBias = texcoord[3];
-
-            fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
-
-            if (DEBUG_PROG) {
-               printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
-                      "  bias %g\n",
-                      color[0], color[1], color[2], color[3],
-                      inst->TexSrcUnit,
-                      texcoord[0],
-                      texcoord[1],
-                      texcoord[2],
-                      texcoord[3],
-                      lodBias);
-            }
-
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TXD:         /* GL_NV_fragment_program only */
-         /* Texture lookup w/ partial derivatives for LOD */
-         {
-            GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-            fetch_vector4(&inst->SrcReg[1], machine, dtdx);
-            fetch_vector4(&inst->SrcReg[2], machine, dtdy);
-            machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
-                                     0.0, /* lodBias */
-                                     inst->TexSrcUnit, color);
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TXL:
-         /* Texel lookup with explicit LOD */
-         {
-            GLfloat texcoord[4], color[4], lod;
-
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-
-            /* texcoord[3] is the LOD */
-            lod = texcoord[3];
-
-	    machine->FetchTexelLod(ctx, texcoord, lod,
-				   machine->Samplers[inst->TexSrcUnit], color);
-
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TXP:         /* GL_ARB_fragment_program only */
-         /* Texture lookup w/ projective divide */
-         {
-            GLfloat texcoord[4], color[4];
-
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-            /* Not so sure about this test - if texcoord[3] is
-             * zero, we'd probably be fine except for an ASSERT in
-             * IROUND_POS() which gets triggered by the inf values created.
-             */
-            if (texcoord[3] != 0.0) {
-               texcoord[0] /= texcoord[3];
-               texcoord[1] /= texcoord[3];
-               texcoord[2] /= texcoord[3];
-            }
-
-            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
-
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
-         /* Texture lookup w/ projective divide, as above, but do not
-          * do the divide by w if sampling from a cube map.
-          */
-         {
-            GLfloat texcoord[4], color[4];
-
-            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
-            if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
-                texcoord[3] != 0.0) {
-               texcoord[0] /= texcoord[3];
-               texcoord[1] /= texcoord[3];
-               texcoord[2] /= texcoord[3];
-            }
-
-            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
-
-            store_vector4(inst, machine, color);
-         }
-         break;
-      case OPCODE_TRUNC:       /* truncate toward zero */
-         {
-            GLfloat a[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            result[0] = (GLfloat) (GLint) a[0];
-            result[1] = (GLfloat) (GLint) a[1];
-            result[2] = (GLfloat) (GLint) a[2];
-            result[3] = (GLfloat) (GLint) a[3];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_UP2H:        /* unpack two 16-bit floats */
-         {
-            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
-            GLfloat result[4];
-            GLushort hx, hy;
-            hx = raw & 0xffff;
-            hy = raw >> 16;
-            result[0] = result[2] = _mesa_half_to_float(hx);
-            result[1] = result[3] = _mesa_half_to_float(hy);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_UP2US:       /* unpack two GLushorts */
-         {
-            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
-            GLfloat result[4];
-            GLushort usx, usy;
-            usx = raw & 0xffff;
-            usy = raw >> 16;
-            result[0] = result[2] = usx * (1.0f / 65535.0f);
-            result[1] = result[3] = usy * (1.0f / 65535.0f);
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_UP4B:        /* unpack four GLbytes */
-         {
-            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
-            GLfloat result[4];
-            result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
-            result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
-            result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
-            result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_UP4UB:       /* unpack four GLubytes */
-         {
-            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
-            GLfloat result[4];
-            result[0] = ((raw >> 0) & 0xff) / 255.0F;
-            result[1] = ((raw >> 8) & 0xff) / 255.0F;
-            result[2] = ((raw >> 16) & 0xff) / 255.0F;
-            result[3] = ((raw >> 24) & 0xff) / 255.0F;
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_XOR:         /* bitwise XOR */
-         {
-            GLuint a[4], b[4], result[4];
-            fetch_vector4ui(&inst->SrcReg[0], machine, a);
-            fetch_vector4ui(&inst->SrcReg[1], machine, b);
-            result[0] = a[0] ^ b[0];
-            result[1] = a[1] ^ b[1];
-            result[2] = a[2] ^ b[2];
-            result[3] = a[3] ^ b[3];
-            store_vector4ui(inst, machine, result);
-         }
-         break;
-      case OPCODE_XPD:         /* cross product */
-         {
-            GLfloat a[4], b[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            result[0] = a[1] * b[2] - a[2] * b[1];
-            result[1] = a[2] * b[0] - a[0] * b[2];
-            result[2] = a[0] * b[1] - a[1] * b[0];
-            result[3] = 1.0;
-            store_vector4(inst, machine, result);
-            if (DEBUG_PROG) {
-               printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
-                      result[0], result[1], result[2], result[3],
-                      a[0], a[1], a[2], b[0], b[1], b[2]);
-            }
-         }
-         break;
-      case OPCODE_X2D:         /* 2-D matrix transform */
-         {
-            GLfloat a[4], b[4], c[4], result[4];
-            fetch_vector4(&inst->SrcReg[0], machine, a);
-            fetch_vector4(&inst->SrcReg[1], machine, b);
-            fetch_vector4(&inst->SrcReg[2], machine, c);
-            result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
-            result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
-            result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
-            result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
-            store_vector4(inst, machine, result);
-         }
-         break;
-      case OPCODE_PRINT:
-         {
-            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
-               GLfloat a[4];
-               fetch_vector4(&inst->SrcReg[0], machine, a);
-               printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
-                            a[0], a[1], a[2], a[3]);
-            }
-            else {
-               printf("%s\n", (const char *) inst->Data);
-            }
-         }
-         break;
-      case OPCODE_END:
-         return GL_TRUE;
-      default:
-         _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
-                       inst->Opcode);
-         return GL_TRUE;        /* return value doesn't matter */
-      }
-
-      numExec++;
-      if (numExec > maxExec) {
-	 static GLboolean reported = GL_FALSE;
-	 if (!reported) {
-	    _mesa_problem(ctx, "Infinite loop detected in fragment program");
-	    reported = GL_TRUE;
-	 }
-         return GL_TRUE;
-      }
-
-   } /* for pc */
-
-   return GL_TRUE;
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.3
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file prog_execute.c
+ * Software interpreter for vertex/fragment programs.
+ * \author Brian Paul
+ */
+
+/*
+ * NOTE: we do everything in single-precision floating point; we don't
+ * currently observe the single/half/fixed-precision qualifiers.
+ *
+ */
+
+
+#include "main/glheader.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+#include "prog_execute.h"
+#include "prog_instruction.h"
+#include "prog_parameter.h"
+#include "prog_print.h"
+#include "prog_noise.h"
+
+
+/* debug predicate */
+#define DEBUG_PROG 0
+
+
+/**
+ * Set x to positive or negative infinity.
+ */
+#if defined(USE_IEEE) || defined(_WIN32)
+#define SET_POS_INFINITY(x)                  \
+   do {                                      \
+         fi_type fi;                         \
+         fi.i = 0x7F800000;                  \
+         x = fi.f;                           \
+   } while (0)
+#define SET_NEG_INFINITY(x)                  \
+   do {                                      \
+         fi_type fi;                         \
+         fi.i = 0xFF800000;                  \
+         x = fi.f;                           \
+   } while (0)
+#elif defined(VMS)
+#define SET_POS_INFINITY(x)  x = __MAXFLOAT
+#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
+#else
+#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
+#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
+#endif
+
+#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
+
+
+static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+
+
+
+/**
+ * Return TRUE for +0 and other positive values, FALSE otherwise.
+ * Used for RCC opcode.
+ */
+static INLINE GLboolean
+positive(float x)
+{
+   fi_type fi;
+   fi.f = x;
+   if (fi.i & 0x80000000)
+      return GL_FALSE;
+   return GL_TRUE;
+}
+
+
+
+/**
+ * Return a pointer to the 4-element float vector specified by the given
+ * source register.
+ */
+static INLINE const GLfloat *
+get_src_register_pointer(const struct prog_src_register *source,
+                         const struct gl_program_machine *machine)
+{
+   const struct gl_program *prog = machine->CurProgram;
+   GLint reg = source->Index;
+
+   if (source->RelAddr) {
+      /* add address register value to src index/offset */
+      reg += machine->AddressReg[0][0];
+      if (reg < 0) {
+         return ZeroVec;
+      }
+   }
+
+   switch (source->File) {
+   case PROGRAM_TEMPORARY:
+      if (reg >= MAX_PROGRAM_TEMPS)
+         return ZeroVec;
+      return machine->Temporaries[reg];
+
+   case PROGRAM_INPUT:
+      if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
+         if (reg >= VERT_ATTRIB_MAX)
+            return ZeroVec;
+         return machine->VertAttribs[reg];
+      }
+      else {
+         if (reg >= FRAG_ATTRIB_MAX)
+            return ZeroVec;
+         return machine->Attribs[reg][machine->CurElement];
+      }
+
+   case PROGRAM_OUTPUT:
+      if (reg >= MAX_PROGRAM_OUTPUTS)
+         return ZeroVec;
+      return machine->Outputs[reg];
+
+   case PROGRAM_LOCAL_PARAM:
+      if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
+         return ZeroVec;
+      return machine->CurProgram->LocalParams[reg];
+
+   case PROGRAM_ENV_PARAM:
+      if (reg >= MAX_PROGRAM_ENV_PARAMS)
+         return ZeroVec;
+      return machine->EnvParams[reg];
+
+   case PROGRAM_STATE_VAR:
+      /* Fallthrough */
+   case PROGRAM_CONSTANT:
+      /* Fallthrough */
+   case PROGRAM_UNIFORM:
+      /* Fallthrough */
+   case PROGRAM_NAMED_PARAM:
+      if (reg >= (GLint) prog->Parameters->NumParameters)
+         return ZeroVec;
+      return (GLfloat *) prog->Parameters->ParameterValues[reg];
+
+   case PROGRAM_SYSTEM_VALUE:
+      assert(reg < Elements(machine->SystemValues));
+      return machine->SystemValues[reg];
+
+   default:
+      _mesa_problem(NULL,
+         "Invalid src register file %d in get_src_register_pointer()",
+         source->File);
+      return NULL;
+   }
+}
+
+
+/**
+ * Return a pointer to the 4-element float vector specified by the given
+ * destination register.
+ */
+static INLINE GLfloat *
+get_dst_register_pointer(const struct prog_dst_register *dest,
+                         struct gl_program_machine *machine)
+{
+   static GLfloat dummyReg[4];
+   GLint reg = dest->Index;
+
+   if (dest->RelAddr) {
+      /* add address register value to src index/offset */
+      reg += machine->AddressReg[0][0];
+      if (reg < 0) {
+         return dummyReg;
+      }
+   }
+
+   switch (dest->File) {
+   case PROGRAM_TEMPORARY:
+      if (reg >= MAX_PROGRAM_TEMPS)
+         return dummyReg;
+      return machine->Temporaries[reg];
+
+   case PROGRAM_OUTPUT:
+      if (reg >= MAX_PROGRAM_OUTPUTS)
+         return dummyReg;
+      return machine->Outputs[reg];
+
+   case PROGRAM_WRITE_ONLY:
+      return dummyReg;
+
+   default:
+      _mesa_problem(NULL,
+         "Invalid dest register file %d in get_dst_register_pointer()",
+         dest->File);
+      return NULL;
+   }
+}
+
+
+
+/**
+ * Fetch a 4-element float vector from the given source register.
+ * Apply swizzling and negating as needed.
+ */
+static void
+fetch_vector4(const struct prog_src_register *source,
+              const struct gl_program_machine *machine, GLfloat result[4])
+{
+   const GLfloat *src = get_src_register_pointer(source, machine);
+   ASSERT(src);
+
+   if (source->Swizzle == SWIZZLE_NOOP) {
+      /* no swizzling */
+      COPY_4V(result, src);
+   }
+   else {
+      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+      result[0] = src[GET_SWZ(source->Swizzle, 0)];
+      result[1] = src[GET_SWZ(source->Swizzle, 1)];
+      result[2] = src[GET_SWZ(source->Swizzle, 2)];
+      result[3] = src[GET_SWZ(source->Swizzle, 3)];
+   }
+
+   if (source->Abs) {
+      result[0] = FABSF(result[0]);
+      result[1] = FABSF(result[1]);
+      result[2] = FABSF(result[2]);
+      result[3] = FABSF(result[3]);
+   }
+   if (source->Negate) {
+      ASSERT(source->Negate == NEGATE_XYZW);
+      result[0] = -result[0];
+      result[1] = -result[1];
+      result[2] = -result[2];
+      result[3] = -result[3];
+   }
+
+#ifdef NAN_CHECK
+   assert(!IS_INF_OR_NAN(result[0]));
+   assert(!IS_INF_OR_NAN(result[0]));
+   assert(!IS_INF_OR_NAN(result[0]));
+   assert(!IS_INF_OR_NAN(result[0]));
+#endif
+}
+
+
+/**
+ * Fetch a 4-element uint vector from the given source register.
+ * Apply swizzling but not negation/abs.
+ */
+static void
+fetch_vector4ui(const struct prog_src_register *source,
+                const struct gl_program_machine *machine, GLuint result[4])
+{
+   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+   ASSERT(src);
+
+   if (source->Swizzle == SWIZZLE_NOOP) {
+      /* no swizzling */
+      COPY_4V(result, src);
+   }
+   else {
+      ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
+      ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
+      result[0] = src[GET_SWZ(source->Swizzle, 0)];
+      result[1] = src[GET_SWZ(source->Swizzle, 1)];
+      result[2] = src[GET_SWZ(source->Swizzle, 2)];
+      result[3] = src[GET_SWZ(source->Swizzle, 3)];
+   }
+
+   /* Note: no Negate or Abs here */
+}
+
+
+
+/**
+ * Fetch the derivative with respect to X or Y for the given register.
+ * XXX this currently only works for fragment program input attribs.
+ */
+static void
+fetch_vector4_deriv(struct gl_context * ctx,
+                    const struct prog_src_register *source,
+                    const struct gl_program_machine *machine,
+                    char xOrY, GLfloat result[4])
+{
+   if (source->File == PROGRAM_INPUT &&
+       source->Index < (GLint) machine->NumDeriv) {
+      const GLint col = machine->CurElement;
+      const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
+      const GLfloat invQ = 1.0f / w;
+      GLfloat deriv[4];
+
+      if (xOrY == 'X') {
+         deriv[0] = machine->DerivX[source->Index][0] * invQ;
+         deriv[1] = machine->DerivX[source->Index][1] * invQ;
+         deriv[2] = machine->DerivX[source->Index][2] * invQ;
+         deriv[3] = machine->DerivX[source->Index][3] * invQ;
+      }
+      else {
+         deriv[0] = machine->DerivY[source->Index][0] * invQ;
+         deriv[1] = machine->DerivY[source->Index][1] * invQ;
+         deriv[2] = machine->DerivY[source->Index][2] * invQ;
+         deriv[3] = machine->DerivY[source->Index][3] * invQ;
+      }
+
+      result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
+      result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
+      result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
+      result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
+      
+      if (source->Abs) {
+         result[0] = FABSF(result[0]);
+         result[1] = FABSF(result[1]);
+         result[2] = FABSF(result[2]);
+         result[3] = FABSF(result[3]);
+      }
+      if (source->Negate) {
+         ASSERT(source->Negate == NEGATE_XYZW);
+         result[0] = -result[0];
+         result[1] = -result[1];
+         result[2] = -result[2];
+         result[3] = -result[3];
+      }
+   }
+   else {
+      ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
+   }
+}
+
+
+/**
+ * As above, but only return result[0] element.
+ */
+static void
+fetch_vector1(const struct prog_src_register *source,
+              const struct gl_program_machine *machine, GLfloat result[4])
+{
+   const GLfloat *src = get_src_register_pointer(source, machine);
+   ASSERT(src);
+
+   result[0] = src[GET_SWZ(source->Swizzle, 0)];
+
+   if (source->Abs) {
+      result[0] = FABSF(result[0]);
+   }
+   if (source->Negate) {
+      result[0] = -result[0];
+   }
+}
+
+
+static GLuint
+fetch_vector1ui(const struct prog_src_register *source,
+                const struct gl_program_machine *machine)
+{
+   const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
+   return src[GET_SWZ(source->Swizzle, 0)];
+}
+
+
+/**
+ * Fetch texel from texture.  Use partial derivatives when possible.
+ */
+static INLINE void
+fetch_texel(struct gl_context *ctx,
+            const struct gl_program_machine *machine,
+            const struct prog_instruction *inst,
+            const GLfloat texcoord[4], GLfloat lodBias,
+            GLfloat color[4])
+{
+   const GLuint unit = machine->Samplers[inst->TexSrcUnit];
+
+   /* Note: we only have the right derivatives for fragment input attribs.
+    */
+   if (machine->NumDeriv > 0 &&
+       inst->SrcReg[0].File == PROGRAM_INPUT &&
+       inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
+      /* simple texture fetch for which we should have derivatives */
+      GLuint attr = inst->SrcReg[0].Index;
+      machine->FetchTexelDeriv(ctx, texcoord,
+                               machine->DerivX[attr],
+                               machine->DerivY[attr],
+                               lodBias, unit, color);
+   }
+   else {
+      machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
+   }
+}
+
+
+/**
+ * Test value against zero and return GT, LT, EQ or UN if NaN.
+ */
+static INLINE GLuint
+generate_cc(float value)
+{
+   if (value != value)
+      return COND_UN;           /* NaN */
+   if (value > 0.0F)
+      return COND_GT;
+   if (value < 0.0F)
+      return COND_LT;
+   return COND_EQ;
+}
+
+
+/**
+ * Test if the ccMaskRule is satisfied by the given condition code.
+ * Used to mask destination writes according to the current condition code.
+ */
+static INLINE GLboolean
+test_cc(GLuint condCode, GLuint ccMaskRule)
+{
+   switch (ccMaskRule) {
+   case COND_EQ: return (condCode == COND_EQ);
+   case COND_NE: return (condCode != COND_EQ);
+   case COND_LT: return (condCode == COND_LT);
+   case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
+   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
+   case COND_GT: return (condCode == COND_GT);
+   case COND_TR: return GL_TRUE;
+   case COND_FL: return GL_FALSE;
+   default:      return GL_TRUE;
+   }
+}
+
+
+/**
+ * Evaluate the 4 condition codes against a predicate and return GL_TRUE
+ * or GL_FALSE to indicate result.
+ */
+static INLINE GLboolean
+eval_condition(const struct gl_program_machine *machine,
+               const struct prog_instruction *inst)
+{
+   const GLuint swizzle = inst->DstReg.CondSwizzle;
+   const GLuint condMask = inst->DstReg.CondMask;
+   if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
+       test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
+       test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
+       test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
+      return GL_TRUE;
+   }
+   else {
+      return GL_FALSE;
+   }
+}
+
+
+
+/**
+ * Store 4 floats into a register.  Observe the instructions saturate and
+ * set-condition-code flags.
+ */
+static void
+store_vector4(const struct prog_instruction *inst,
+              struct gl_program_machine *machine, const GLfloat value[4])
+{
+   const struct prog_dst_register *dstReg = &(inst->DstReg);
+   const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
+   GLuint writeMask = dstReg->WriteMask;
+   GLfloat clampedValue[4];
+   GLfloat *dst = get_dst_register_pointer(dstReg, machine);
+
+#if 0
+   if (value[0] > 1.0e10 ||
+       IS_INF_OR_NAN(value[0]) ||
+       IS_INF_OR_NAN(value[1]) ||
+       IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
+      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
+#endif
+
+   if (clamp) {
+      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
+      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
+      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
+      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
+      value = clampedValue;
+   }
+
+   if (dstReg->CondMask != COND_TR) {
+      /* condition codes may turn off some writes */
+      if (writeMask & WRITEMASK_X) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_X;
+      }
+      if (writeMask & WRITEMASK_Y) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_Y;
+      }
+      if (writeMask & WRITEMASK_Z) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_Z;
+      }
+      if (writeMask & WRITEMASK_W) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_W;
+      }
+   }
+
+#ifdef NAN_CHECK
+   assert(!IS_INF_OR_NAN(value[0]));
+   assert(!IS_INF_OR_NAN(value[0]));
+   assert(!IS_INF_OR_NAN(value[0]));
+   assert(!IS_INF_OR_NAN(value[0]));
+#endif
+
+   if (writeMask & WRITEMASK_X)
+      dst[0] = value[0];
+   if (writeMask & WRITEMASK_Y)
+      dst[1] = value[1];
+   if (writeMask & WRITEMASK_Z)
+      dst[2] = value[2];
+   if (writeMask & WRITEMASK_W)
+      dst[3] = value[3];
+
+   if (inst->CondUpdate) {
+      if (writeMask & WRITEMASK_X)
+         machine->CondCodes[0] = generate_cc(value[0]);
+      if (writeMask & WRITEMASK_Y)
+         machine->CondCodes[1] = generate_cc(value[1]);
+      if (writeMask & WRITEMASK_Z)
+         machine->CondCodes[2] = generate_cc(value[2]);
+      if (writeMask & WRITEMASK_W)
+         machine->CondCodes[3] = generate_cc(value[3]);
+#if DEBUG_PROG
+      printf("CondCodes=(%s,%s,%s,%s) for:\n",
+             _mesa_condcode_string(machine->CondCodes[0]),
+             _mesa_condcode_string(machine->CondCodes[1]),
+             _mesa_condcode_string(machine->CondCodes[2]),
+             _mesa_condcode_string(machine->CondCodes[3]));
+#endif
+   }
+}
+
+
+/**
+ * Store 4 uints into a register.  Observe the set-condition-code flags.
+ */
+static void
+store_vector4ui(const struct prog_instruction *inst,
+                struct gl_program_machine *machine, const GLuint value[4])
+{
+   const struct prog_dst_register *dstReg = &(inst->DstReg);
+   GLuint writeMask = dstReg->WriteMask;
+   GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
+
+   if (dstReg->CondMask != COND_TR) {
+      /* condition codes may turn off some writes */
+      if (writeMask & WRITEMASK_X) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_X;
+      }
+      if (writeMask & WRITEMASK_Y) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_Y;
+      }
+      if (writeMask & WRITEMASK_Z) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_Z;
+      }
+      if (writeMask & WRITEMASK_W) {
+         if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
+                      dstReg->CondMask))
+            writeMask &= ~WRITEMASK_W;
+      }
+   }
+
+   if (writeMask & WRITEMASK_X)
+      dst[0] = value[0];
+   if (writeMask & WRITEMASK_Y)
+      dst[1] = value[1];
+   if (writeMask & WRITEMASK_Z)
+      dst[2] = value[2];
+   if (writeMask & WRITEMASK_W)
+      dst[3] = value[3];
+
+   if (inst->CondUpdate) {
+      if (writeMask & WRITEMASK_X)
+         machine->CondCodes[0] = generate_cc((float)value[0]);
+      if (writeMask & WRITEMASK_Y)
+         machine->CondCodes[1] = generate_cc((float)value[1]);
+      if (writeMask & WRITEMASK_Z)
+         machine->CondCodes[2] = generate_cc((float)value[2]);
+      if (writeMask & WRITEMASK_W)
+         machine->CondCodes[3] = generate_cc((float)value[3]);
+#if DEBUG_PROG
+      printf("CondCodes=(%s,%s,%s,%s) for:\n",
+             _mesa_condcode_string(machine->CondCodes[0]),
+             _mesa_condcode_string(machine->CondCodes[1]),
+             _mesa_condcode_string(machine->CondCodes[2]),
+             _mesa_condcode_string(machine->CondCodes[3]));
+#endif
+   }
+}
+
+
+
+/**
+ * Execute the given vertex/fragment program.
+ *
+ * \param ctx  rendering context
+ * \param program  the program to execute
+ * \param machine  machine state (must be initialized)
+ * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
+ */
+GLboolean
+_mesa_execute_program(struct gl_context * ctx,
+                      const struct gl_program *program,
+                      struct gl_program_machine *machine)
+{
+   const GLuint numInst = program->NumInstructions;
+   const GLuint maxExec = 10000;
+   GLuint pc, numExec = 0;
+
+   machine->CurProgram = program;
+
+   if (DEBUG_PROG) {
+      printf("execute program %u --------------------\n", program->Id);
+   }
+
+   if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+      machine->EnvParams = ctx->VertexProgram.Parameters;
+   }
+   else {
+      machine->EnvParams = ctx->FragmentProgram.Parameters;
+   }
+
+   for (pc = 0; pc < numInst; pc++) {
+      const struct prog_instruction *inst = program->Instructions + pc;
+
+      if (DEBUG_PROG) {
+         _mesa_print_instruction(inst);
+      }
+
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = FABSF(a[0]);
+            result[1] = FABSF(a[1]);
+            result[2] = FABSF(a[2]);
+            result[3] = FABSF(a[3]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_ADD:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] + b[0];
+            result[1] = a[1] + b[1];
+            result[2] = a[2] + b[2];
+            result[3] = a[3] + b[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_AND:     /* bitwise AND */
+         {
+            GLuint a[4], b[4], result[4];
+            fetch_vector4ui(&inst->SrcReg[0], machine, a);
+            fetch_vector4ui(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] & b[0];
+            result[1] = a[1] & b[1];
+            result[2] = a[2] & b[2];
+            result[3] = a[3] & b[3];
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_ARL:
+         {
+            GLfloat t[4];
+            fetch_vector4(&inst->SrcReg[0], machine, t);
+            machine->AddressReg[0][0] = IFLOOR(t[0]);
+            if (DEBUG_PROG) {
+               printf("ARL %d\n", machine->AddressReg[0][0]);
+            }
+         }
+         break;
+      case OPCODE_BGNLOOP:
+         /* no-op */
+         ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                == OPCODE_ENDLOOP);
+         break;
+      case OPCODE_ENDLOOP:
+         /* subtract 1 here since pc is incremented by for(pc) loop */
+         ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                == OPCODE_BGNLOOP);
+         pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
+         break;
+      case OPCODE_BGNSUB:      /* begin subroutine */
+         break;
+      case OPCODE_ENDSUB:      /* end subroutine */
+         break;
+      case OPCODE_BRA:         /* branch (conditional) */
+         if (eval_condition(machine, inst)) {
+            /* take branch */
+            /* Subtract 1 here since we'll do pc++ below */
+            pc = inst->BranchTarget - 1;
+         }
+         break;
+      case OPCODE_BRK:         /* break out of loop (conditional) */
+         ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                == OPCODE_ENDLOOP);
+         if (eval_condition(machine, inst)) {
+            /* break out of loop */
+            /* pc++ at end of for-loop will put us after the ENDLOOP inst */
+            pc = inst->BranchTarget;
+         }
+         break;
+      case OPCODE_CONT:        /* continue loop (conditional) */
+         ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                == OPCODE_ENDLOOP);
+         if (eval_condition(machine, inst)) {
+            /* continue at ENDLOOP */
+            /* Subtract 1 here since we'll do pc++ at end of for-loop */
+            pc = inst->BranchTarget - 1;
+         }
+         break;
+      case OPCODE_CAL:         /* Call subroutine (conditional) */
+         if (eval_condition(machine, inst)) {
+            /* call the subroutine */
+            if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
+               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
+            }
+            machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
+            /* Subtract 1 here since we'll do pc++ at end of for-loop */
+            pc = inst->BranchTarget - 1;
+         }
+         break;
+      case OPCODE_CMP:
+         {
+            GLfloat a[4], b[4], c[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            fetch_vector4(&inst->SrcReg[2], machine, c);
+            result[0] = a[0] < 0.0F ? b[0] : c[0];
+            result[1] = a[1] < 0.0F ? b[1] : c[1];
+            result[2] = a[2] < 0.0F ? b[2] : c[2];
+            result[3] = a[3] < 0.0F ? b[3] : c[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3],
+                      c[0], c[1], c[2], c[3]);
+            }
+         }
+         break;
+      case OPCODE_COS:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            result[0] = result[1] = result[2] = result[3]
+               = (GLfloat) cos(a[0]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_DDX:         /* Partial derivative with respect to X */
+         {
+            GLfloat result[4];
+            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                'X', result);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_DDY:         /* Partial derivative with respect to Y */
+         {
+            GLfloat result[4];
+            fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
+                                'Y', result);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_DP2:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("DP2 %g = (%g %g) . (%g %g)\n",
+                      result[0], a[0], a[1], b[0], b[1]);
+            }
+         }
+         break;
+      case OPCODE_DP2A:
+         {
+            GLfloat a[4], b[4], c, result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            fetch_vector1(&inst->SrcReg[1], machine, &c);
+            result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
+                      result[0], a[0], a[1], b[0], b[1], c);
+            }
+         }
+         break;
+      case OPCODE_DP3:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
+                      result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
+            }
+         }
+         break;
+      case OPCODE_DP4:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
+                      result[0], a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_DPH:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_DST:         /* Distance vector */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = 1.0F;
+            result[1] = a[1] * b[1];
+            result[2] = a[2];
+            result[3] = b[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_EXP:
+         {
+            GLfloat t[4], q[4], floor_t0;
+            fetch_vector1(&inst->SrcReg[0], machine, t);
+            floor_t0 = FLOORF(t[0]);
+            if (floor_t0 > FLT_MAX_EXP) {
+               SET_POS_INFINITY(q[0]);
+               SET_POS_INFINITY(q[2]);
+            }
+            else if (floor_t0 < FLT_MIN_EXP) {
+               q[0] = 0.0F;
+               q[2] = 0.0F;
+            }
+            else {
+               q[0] = LDEXPF(1.0, (int) floor_t0);
+               /* Note: GL_NV_vertex_program expects 
+                * result.z = result.x * APPX(result.y)
+                * We do what the ARB extension says.
+                */
+               q[2] = (GLfloat) pow(2.0, t[0]);
+            }
+            q[1] = t[0] - floor_t0;
+            q[3] = 1.0F;
+            store_vector4( inst, machine, q );
+         }
+         break;
+      case OPCODE_EX2:         /* Exponential base 2 */
+         {
+            GLfloat a[4], result[4], val;
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            val = (GLfloat) pow(2.0, a[0]);
+            /*
+            if (IS_INF_OR_NAN(val))
+               val = 1.0e10;
+            */
+            result[0] = result[1] = result[2] = result[3] = val;
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_FLR:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = FLOORF(a[0]);
+            result[1] = FLOORF(a[1]);
+            result[2] = FLOORF(a[2]);
+            result[3] = FLOORF(a[3]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_FRC:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = a[0] - FLOORF(a[0]);
+            result[1] = a[1] - FLOORF(a[1]);
+            result[2] = a[2] - FLOORF(a[2]);
+            result[3] = a[3] - FLOORF(a[3]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_IF:
+         {
+            GLboolean cond;
+            ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                   == OPCODE_ELSE ||
+                   program->Instructions[inst->BranchTarget].Opcode
+                   == OPCODE_ENDIF);
+            /* eval condition */
+            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+               GLfloat a[4];
+               fetch_vector1(&inst->SrcReg[0], machine, a);
+               cond = (a[0] != 0.0);
+            }
+            else {
+               cond = eval_condition(machine, inst);
+            }
+            if (DEBUG_PROG) {
+               printf("IF: %d\n", cond);
+            }
+            /* do if/else */
+            if (cond) {
+               /* do if-clause (just continue execution) */
+            }
+            else {
+               /* go to the instruction after ELSE or ENDIF */
+               assert(inst->BranchTarget >= 0);
+               pc = inst->BranchTarget;
+            }
+         }
+         break;
+      case OPCODE_ELSE:
+         /* goto ENDIF */
+         ASSERT(program->Instructions[inst->BranchTarget].Opcode
+                == OPCODE_ENDIF);
+         assert(inst->BranchTarget >= 0);
+         pc = inst->BranchTarget;
+         break;
+      case OPCODE_ENDIF:
+         /* nothing */
+         break;
+      case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
+         if (eval_condition(machine, inst)) {
+            return GL_FALSE;
+         }
+         break;
+      case OPCODE_KIL:         /* ARB_f_p only */
+         {
+            GLfloat a[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            if (DEBUG_PROG) {
+               printf("KIL if (%g %g %g %g) <= 0.0\n",
+                      a[0], a[1], a[2], a[3]);
+            }
+
+            if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
+               return GL_FALSE;
+            }
+         }
+         break;
+      case OPCODE_LG2:         /* log base 2 */
+         {
+            GLfloat a[4], result[4], val;
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+	    /* The fast LOG2 macro doesn't meet the precision requirements.
+	     */
+            if (a[0] == 0.0F) {
+               val = -FLT_MAX;
+            }
+            else {
+               val = (float)(log(a[0]) * 1.442695F);
+            }
+            result[0] = result[1] = result[2] = result[3] = val;
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_LIT:
+         {
+            const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            a[0] = MAX2(a[0], 0.0F);
+            a[1] = MAX2(a[1], 0.0F);
+            /* XXX ARB version clamps a[3], NV version doesn't */
+            a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
+            result[0] = 1.0F;
+            result[1] = a[0];
+            /* XXX we could probably just use pow() here */
+            if (a[0] > 0.0F) {
+               if (a[1] == 0.0 && a[3] == 0.0)
+                  result[2] = 1.0F;
+               else
+                  result[2] = (GLfloat) pow(a[1], a[3]);
+            }
+            else {
+               result[2] = 0.0F;
+            }
+            result[3] = 1.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3]);
+            }
+         }
+         break;
+      case OPCODE_LOG:
+         {
+            GLfloat t[4], q[4], abs_t0;
+            fetch_vector1(&inst->SrcReg[0], machine, t);
+            abs_t0 = FABSF(t[0]);
+            if (abs_t0 != 0.0F) {
+               /* Since we really can't handle infinite values on VMS
+                * like other OSes we'll use __MAXFLOAT to represent
+                * infinity.  This may need some tweaking.
+                */
+#ifdef VMS
+               if (abs_t0 == __MAXFLOAT)
+#else
+               if (IS_INF_OR_NAN(abs_t0))
+#endif
+               {
+                  SET_POS_INFINITY(q[0]);
+                  q[1] = 1.0F;
+                  SET_POS_INFINITY(q[2]);
+               }
+               else {
+                  int exponent;
+                  GLfloat mantissa = FREXPF(t[0], &exponent);
+                  q[0] = (GLfloat) (exponent - 1);
+                  q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
+
+		  /* The fast LOG2 macro doesn't meet the precision
+		   * requirements.
+		   */
+                  q[2] = (float)(log(t[0]) * 1.442695F);
+               }
+            }
+            else {
+               SET_NEG_INFINITY(q[0]);
+               q[1] = 1.0F;
+               SET_NEG_INFINITY(q[2]);
+            }
+            q[3] = 1.0;
+            store_vector4(inst, machine, q);
+         }
+         break;
+      case OPCODE_LRP:
+         {
+            GLfloat a[4], b[4], c[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            fetch_vector4(&inst->SrcReg[2], machine, c);
+            result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
+            result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
+            result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
+            result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("LRP (%g %g %g %g) = (%g %g %g %g), "
+                      "(%g %g %g %g), (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+            }
+         }
+         break;
+      case OPCODE_MAD:
+         {
+            GLfloat a[4], b[4], c[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            fetch_vector4(&inst->SrcReg[2], machine, c);
+            result[0] = a[0] * b[0] + c[0];
+            result[1] = a[1] * b[1] + c[1];
+            result[2] = a[2] * b[2] + c[2];
+            result[3] = a[3] * b[3] + c[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
+                      "(%g %g %g %g) + (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
+            }
+         }
+         break;
+      case OPCODE_MAX:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = MAX2(a[0], b[0]);
+            result[1] = MAX2(a[1], b[1]);
+            result[2] = MAX2(a[2], b[2]);
+            result[3] = MAX2(a[3], b[3]);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_MIN:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = MIN2(a[0], b[0]);
+            result[1] = MIN2(a[1], b[1]);
+            result[2] = MIN2(a[2], b[2]);
+            result[3] = MIN2(a[3], b[3]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_MOV:
+         {
+            GLfloat result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, result);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("MOV (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3]);
+            }
+         }
+         break;
+      case OPCODE_MUL:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] * b[0];
+            result[1] = a[1] * b[1];
+            result[2] = a[2] * b[2];
+            result[3] = a[3] * b[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_NOISE1:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            result[0] =
+               result[1] =
+               result[2] =
+               result[3] = _mesa_noise1(a[0]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NOISE2:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] =
+               result[1] =
+               result[2] = result[3] = _mesa_noise2(a[0], a[1]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NOISE3:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] =
+               result[1] =
+               result[2] =
+               result[3] = _mesa_noise3(a[0], a[1], a[2]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NOISE4:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] =
+               result[1] =
+               result[2] =
+               result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NOP:
+         break;
+      case OPCODE_NOT:         /* bitwise NOT */
+         {
+            GLuint a[4], result[4];
+            fetch_vector4ui(&inst->SrcReg[0], machine, a);
+            result[0] = ~a[0];
+            result[1] = ~a[1];
+            result[2] = ~a[2];
+            result[3] = ~a[3];
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_NRM3:        /* 3-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
+            if (tmp != 0.0F)
+               tmp = INV_SQRTF(tmp);
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_NRM4:        /* 4-component normalization */
+         {
+            GLfloat a[4], result[4];
+            GLfloat tmp;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
+            if (tmp != 0.0F)
+               tmp = INV_SQRTF(tmp);
+            result[0] = tmp * a[0];
+            result[1] = tmp * a[1];
+            result[2] = tmp * a[2];
+            result[3] = tmp * a[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_OR:          /* bitwise OR */
+         {
+            GLuint a[4], b[4], result[4];
+            fetch_vector4ui(&inst->SrcReg[0], machine, a);
+            fetch_vector4ui(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] | b[0];
+            result[1] = a[1] | b[1];
+            result[2] = a[2] | b[2];
+            result[3] = a[3] | b[3];
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
+         {
+            GLfloat a[4];
+            GLuint result[4];
+            GLhalfNV hx, hy;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            hx = _mesa_float_to_half(a[0]);
+            hy = _mesa_float_to_half(a[1]);
+            result[0] =
+            result[1] =
+            result[2] =
+            result[3] = hx | (hy << 16);
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
+         {
+            GLfloat a[4];
+            GLuint result[4], usx, usy;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            a[0] = CLAMP(a[0], 0.0F, 1.0F);
+            a[1] = CLAMP(a[1], 0.0F, 1.0F);
+            usx = IROUND(a[0] * 65535.0F);
+            usy = IROUND(a[1] * 65535.0F);
+            result[0] =
+            result[1] =
+            result[2] =
+            result[3] = usx | (usy << 16);
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
+         {
+            GLfloat a[4];
+            GLuint result[4], ubx, uby, ubz, ubw;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
+            a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
+            a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
+            a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
+            ubx = IROUND(127.0F * a[0] + 128.0F);
+            uby = IROUND(127.0F * a[1] + 128.0F);
+            ubz = IROUND(127.0F * a[2] + 128.0F);
+            ubw = IROUND(127.0F * a[3] + 128.0F);
+            result[0] =
+            result[1] =
+            result[2] =
+            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
+         {
+            GLfloat a[4];
+            GLuint result[4], ubx, uby, ubz, ubw;
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            a[0] = CLAMP(a[0], 0.0F, 1.0F);
+            a[1] = CLAMP(a[1], 0.0F, 1.0F);
+            a[2] = CLAMP(a[2], 0.0F, 1.0F);
+            a[3] = CLAMP(a[3], 0.0F, 1.0F);
+            ubx = IROUND(255.0F * a[0]);
+            uby = IROUND(255.0F * a[1]);
+            ubz = IROUND(255.0F * a[2]);
+            ubw = IROUND(255.0F * a[3]);
+            result[0] =
+            result[1] =
+            result[2] =
+            result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_POW:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            fetch_vector1(&inst->SrcReg[1], machine, b);
+            result[0] = result[1] = result[2] = result[3]
+               = (GLfloat) pow(a[0], b[0]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_RCC:  /* clamped riciprocal */
+         {
+            const float largest = 1.884467e+19, smallest = 5.42101e-20;
+            GLfloat a[4], r, result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            if (DEBUG_PROG) {
+               if (a[0] == 0)
+                  printf("RCC(0)\n");
+               else if (IS_INF_OR_NAN(a[0]))
+                  printf("RCC(inf)\n");
+            }
+            if (a[0] == 1.0F) {
+               r = 1.0F;
+            }
+            else {
+               r = 1.0F / a[0];
+            }
+            if (positive(r)) {
+               if (r > largest) {
+                  r = largest;
+               }
+               else if (r < smallest) {
+                  r = smallest;
+               }
+            }
+            else {
+               if (r < -largest) {
+                  r = -largest;
+               }
+               else if (r > -smallest) {
+                  r = -smallest;
+               }
+            }
+            result[0] = result[1] = result[2] = result[3] = r;
+            store_vector4(inst, machine, result);
+         }
+         break;
+
+      case OPCODE_RCP:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            if (DEBUG_PROG) {
+               if (a[0] == 0)
+                  printf("RCP(0)\n");
+               else if (IS_INF_OR_NAN(a[0]))
+                  printf("RCP(inf)\n");
+            }
+            result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_RET:         /* return from subroutine (conditional) */
+         if (eval_condition(machine, inst)) {
+            if (machine->StackDepth == 0) {
+               return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
+            }
+            /* subtract one because of pc++ in the for loop */
+            pc = machine->CallStack[--machine->StackDepth] - 1;
+         }
+         break;
+      case OPCODE_RFL:         /* reflection vector */
+         {
+            GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
+            fetch_vector4(&inst->SrcReg[0], machine, axis);
+            fetch_vector4(&inst->SrcReg[1], machine, dir);
+            tmpW = DOT3(axis, axis);
+            tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
+            result[0] = tmpX * axis[0] - dir[0];
+            result[1] = tmpX * axis[1] - dir[1];
+            result[2] = tmpX * axis[2] - dir[2];
+            /* result[3] is never written! XXX enforce in parser! */
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_RSQ:         /* 1 / sqrt() */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            a[0] = FABSF(a[0]);
+            result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
+            }
+         }
+         break;
+      case OPCODE_SCS:         /* sine and cos */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            result[0] = (GLfloat) cos(a[0]);
+            result[1] = (GLfloat) sin(a[0]);
+            result[2] = 0.0;    /* undefined! */
+            result[3] = 0.0;    /* undefined! */
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_SEQ:         /* set on equal */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SFL:         /* set false, operands ignored */
+         {
+            static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_SGE:         /* set on greater or equal */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SGT:         /* set on greater */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SIN:
+         {
+            GLfloat a[4], result[4];
+            fetch_vector1(&inst->SrcReg[0], machine, a);
+            result[0] = result[1] = result[2] = result[3]
+               = (GLfloat) sin(a[0]);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_SLE:         /* set on less or equal */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SLT:         /* set on less */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SNE:         /* set on not equal */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
+            result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
+            result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
+            result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3],
+                      b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
+            result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
+            result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
+            result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_STR:         /* set true, operands ignored */
+         {
+            static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_SUB:
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] - b[0];
+            result[1] = a[1] - b[1];
+            result[2] = a[2] - b[2];
+            result[3] = a[3] - b[3];
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
+            }
+         }
+         break;
+      case OPCODE_SWZ:         /* extended swizzle */
+         {
+            const struct prog_src_register *source = &inst->SrcReg[0];
+            const GLfloat *src = get_src_register_pointer(source, machine);
+            GLfloat result[4];
+            GLuint i;
+            for (i = 0; i < 4; i++) {
+               const GLuint swz = GET_SWZ(source->Swizzle, i);
+               if (swz == SWIZZLE_ZERO)
+                  result[i] = 0.0;
+               else if (swz == SWIZZLE_ONE)
+                  result[i] = 1.0;
+               else {
+                  ASSERT(swz >= 0);
+                  ASSERT(swz <= 3);
+                  result[i] = src[swz];
+               }
+               if (source->Negate & (1 << i))
+                  result[i] = -result[i];
+            }
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_TEX:         /* Both ARB and NV frag prog */
+         /* Simple texel lookup */
+         {
+            GLfloat texcoord[4], color[4];
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+            if (DEBUG_PROG) {
+               printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
+                      color[0], color[1], color[2], color[3],
+                      inst->TexSrcUnit,
+                      texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
+            }
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TXB:         /* GL_ARB_fragment_program only */
+         /* Texel lookup with LOD bias */
+         {
+            GLfloat texcoord[4], color[4], lodBias;
+
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+            /* texcoord[3] is the bias to add to lambda */
+            lodBias = texcoord[3];
+
+            fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
+
+            if (DEBUG_PROG) {
+               printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
+                      "  bias %g\n",
+                      color[0], color[1], color[2], color[3],
+                      inst->TexSrcUnit,
+                      texcoord[0],
+                      texcoord[1],
+                      texcoord[2],
+                      texcoord[3],
+                      lodBias);
+            }
+
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TXD:         /* GL_NV_fragment_program only */
+         /* Texture lookup w/ partial derivatives for LOD */
+         {
+            GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+            fetch_vector4(&inst->SrcReg[1], machine, dtdx);
+            fetch_vector4(&inst->SrcReg[2], machine, dtdy);
+            machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
+                                     0.0, /* lodBias */
+                                     inst->TexSrcUnit, color);
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TXL:
+         /* Texel lookup with explicit LOD */
+         {
+            GLfloat texcoord[4], color[4], lod;
+
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+
+            /* texcoord[3] is the LOD */
+            lod = texcoord[3];
+
+	    machine->FetchTexelLod(ctx, texcoord, lod,
+				   machine->Samplers[inst->TexSrcUnit], color);
+
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TXP:         /* GL_ARB_fragment_program only */
+         /* Texture lookup w/ projective divide */
+         {
+            GLfloat texcoord[4], color[4];
+
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+            /* Not so sure about this test - if texcoord[3] is
+             * zero, we'd probably be fine except for an ASSERT in
+             * IROUND_POS() which gets triggered by the inf values created.
+             */
+            if (texcoord[3] != 0.0) {
+               texcoord[0] /= texcoord[3];
+               texcoord[1] /= texcoord[3];
+               texcoord[2] /= texcoord[3];
+            }
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
+         /* Texture lookup w/ projective divide, as above, but do not
+          * do the divide by w if sampling from a cube map.
+          */
+         {
+            GLfloat texcoord[4], color[4];
+
+            fetch_vector4(&inst->SrcReg[0], machine, texcoord);
+            if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
+                texcoord[3] != 0.0) {
+               texcoord[0] /= texcoord[3];
+               texcoord[1] /= texcoord[3];
+               texcoord[2] /= texcoord[3];
+            }
+
+            fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
+
+            store_vector4(inst, machine, color);
+         }
+         break;
+      case OPCODE_TRUNC:       /* truncate toward zero */
+         {
+            GLfloat a[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            result[0] = (GLfloat) (GLint) a[0];
+            result[1] = (GLfloat) (GLint) a[1];
+            result[2] = (GLfloat) (GLint) a[2];
+            result[3] = (GLfloat) (GLint) a[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_UP2H:        /* unpack two 16-bit floats */
+         {
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            GLushort hx, hy;
+            hx = raw & 0xffff;
+            hy = raw >> 16;
+            result[0] = result[2] = _mesa_half_to_float(hx);
+            result[1] = result[3] = _mesa_half_to_float(hy);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_UP2US:       /* unpack two GLushorts */
+         {
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            GLushort usx, usy;
+            usx = raw & 0xffff;
+            usy = raw >> 16;
+            result[0] = result[2] = usx * (1.0f / 65535.0f);
+            result[1] = result[3] = usy * (1.0f / 65535.0f);
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_UP4B:        /* unpack four GLbytes */
+         {
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
+            result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
+            result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
+            result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_UP4UB:       /* unpack four GLubytes */
+         {
+            const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
+            GLfloat result[4];
+            result[0] = ((raw >> 0) & 0xff) / 255.0F;
+            result[1] = ((raw >> 8) & 0xff) / 255.0F;
+            result[2] = ((raw >> 16) & 0xff) / 255.0F;
+            result[3] = ((raw >> 24) & 0xff) / 255.0F;
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_XOR:         /* bitwise XOR */
+         {
+            GLuint a[4], b[4], result[4];
+            fetch_vector4ui(&inst->SrcReg[0], machine, a);
+            fetch_vector4ui(&inst->SrcReg[1], machine, b);
+            result[0] = a[0] ^ b[0];
+            result[1] = a[1] ^ b[1];
+            result[2] = a[2] ^ b[2];
+            result[3] = a[3] ^ b[3];
+            store_vector4ui(inst, machine, result);
+         }
+         break;
+      case OPCODE_XPD:         /* cross product */
+         {
+            GLfloat a[4], b[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            result[0] = a[1] * b[2] - a[2] * b[1];
+            result[1] = a[2] * b[0] - a[0] * b[2];
+            result[2] = a[0] * b[1] - a[1] * b[0];
+            result[3] = 1.0;
+            store_vector4(inst, machine, result);
+            if (DEBUG_PROG) {
+               printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
+                      result[0], result[1], result[2], result[3],
+                      a[0], a[1], a[2], b[0], b[1], b[2]);
+            }
+         }
+         break;
+      case OPCODE_X2D:         /* 2-D matrix transform */
+         {
+            GLfloat a[4], b[4], c[4], result[4];
+            fetch_vector4(&inst->SrcReg[0], machine, a);
+            fetch_vector4(&inst->SrcReg[1], machine, b);
+            fetch_vector4(&inst->SrcReg[2], machine, c);
+            result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
+            result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
+            result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
+            result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
+            store_vector4(inst, machine, result);
+         }
+         break;
+      case OPCODE_PRINT:
+         {
+            if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
+               GLfloat a[4];
+               fetch_vector4(&inst->SrcReg[0], machine, a);
+               printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
+                            a[0], a[1], a[2], a[3]);
+            }
+            else {
+               printf("%s\n", (const char *) inst->Data);
+            }
+         }
+         break;
+      case OPCODE_END:
+         return GL_TRUE;
+      default:
+         _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
+                       inst->Opcode);
+         return GL_TRUE;        /* return value doesn't matter */
+      }
+
+      numExec++;
+      if (numExec > maxExec) {
+	 static GLboolean reported = GL_FALSE;
+	 if (!reported) {
+	    _mesa_problem(ctx, "Infinite loop detected in fragment program");
+	    reported = GL_TRUE;
+	 }
+         return GL_TRUE;
+      }
+
+   } /* for pc */
+
+   return GL_TRUE;
+}
diff --git a/mesalib/src/mesa/program/prog_parameter.c b/mesalib/src/mesa/program/prog_parameter.c
index 157e31b56..49b3ffbdd 100644
--- a/mesalib/src/mesa/program/prog_parameter.c
+++ b/mesalib/src/mesa/program/prog_parameter.c
@@ -1,656 +1,680 @@
-/*
- * Mesa 3-D graphics library
- * Version:  7.3
- *
- * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file prog_parameter.c
- * Program parameter lists and functions.
- * \author Brian Paul
- */
-
-
-#include "main/glheader.h"
-#include "main/imports.h"
-#include "main/macros.h"
-#include "prog_instruction.h"
-#include "prog_parameter.h"
-#include "prog_statevars.h"
-
-
-struct gl_program_parameter_list *
-_mesa_new_parameter_list(void)
-{
-   return CALLOC_STRUCT(gl_program_parameter_list);
-}
-
-
-struct gl_program_parameter_list *
-_mesa_new_parameter_list_sized(unsigned size)
-{
-   struct gl_program_parameter_list *p = _mesa_new_parameter_list();
-
-   if ((p != NULL) && (size != 0)) {
-      p->Size = size;
-
-      /* alloc arrays */
-      p->Parameters = (struct gl_program_parameter *)
-	 calloc(1, size * sizeof(struct gl_program_parameter));
-
-      p->ParameterValues = (GLfloat (*)[4])
-         _mesa_align_malloc(size * 4 *sizeof(GLfloat), 16);
-
-
-      if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
-	 free(p->Parameters);
-	 _mesa_align_free(p->ParameterValues);
-	 free(p);
-	 p = NULL;
-      }
-   }
-
-   return p;
-}
-
-
-/**
- * Free a parameter list and all its parameters
- */
-void
-_mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
-{
-   GLuint i;
-   for (i = 0; i < paramList->NumParameters; i++) {
-      if (paramList->Parameters[i].Name)
-	 free((void *) paramList->Parameters[i].Name);
-   }
-   free(paramList->Parameters);
-   if (paramList->ParameterValues)
-      _mesa_align_free(paramList->ParameterValues);
-   free(paramList);
-}
-
-
-/**
- * Add a new parameter to a parameter list.
- * Note that parameter values are usually 4-element GLfloat vectors.
- * When size > 4 we'll allocate a sequential block of parameters to
- * store all the values (in blocks of 4).
- *
- * \param paramList  the list to add the parameter to
- * \param type  type of parameter, such as 
- * \param name  the parameter name, will be duplicated/copied!
- * \param size  number of elements in 'values' vector (1..4, or more)
- * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
- * \param values  initial parameter value, up to 4 GLfloats, or NULL
- * \param state  state indexes, or NULL
- * \return  index of new parameter in the list, or -1 if error (out of mem)
- */
-GLint
-_mesa_add_parameter(struct gl_program_parameter_list *paramList,
-                    gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
-                    const gl_state_index state[STATE_LENGTH],
-                    GLbitfield flags)
-{
-   const GLuint oldNum = paramList->NumParameters;
-   const GLuint sz4 = (size + 3) / 4; /* no. of new param slots needed */
-
-   assert(size > 0);
-
-   if (oldNum + sz4 > paramList->Size) {
-      /* Need to grow the parameter list array (alloc some extra) */
-      paramList->Size = paramList->Size + 4 * sz4;
-
-      /* realloc arrays */
-      paramList->Parameters = (struct gl_program_parameter *)
-	 _mesa_realloc(paramList->Parameters,
-		       oldNum * sizeof(struct gl_program_parameter),
-		       paramList->Size * sizeof(struct gl_program_parameter));
-
-      paramList->ParameterValues = (GLfloat (*)[4])
-         _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
-                             oldNum * 4 * sizeof(GLfloat),      /* old size */
-                             paramList->Size * 4 *sizeof(GLfloat), /* new sz */
-                             16);
-   }
-
-   if (!paramList->Parameters ||
-       !paramList->ParameterValues) {
-      /* out of memory */
-      paramList->NumParameters = 0;
-      paramList->Size = 0;
-      return -1;
-   }
-   else {
-      GLuint i;
-
-      paramList->NumParameters = oldNum + sz4;
-
-      memset(&paramList->Parameters[oldNum], 0,
-             sz4 * sizeof(struct gl_program_parameter));
-
-      for (i = 0; i < sz4; i++) {
-         struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
-         p->Name = name ? _mesa_strdup(name) : NULL;
-         p->Type = type;
-         p->Size = size;
-         p->DataType = datatype;
-         p->Flags = flags;
-         if (values) {
-            COPY_4V(paramList->ParameterValues[oldNum + i], values);
-            values += 4;
-            p->Initialized = GL_TRUE;
-         }
-         else {
-            /* silence valgrind */
-            ASSIGN_4V(paramList->ParameterValues[oldNum + i], 0, 0, 0, 0);
-         }
-         size -= 4;
-      }
-
-      if (state) {
-         for (i = 0; i < STATE_LENGTH; i++)
-            paramList->Parameters[oldNum].StateIndexes[i] = state[i];
-      }
-
-      return (GLint) oldNum;
-   }
-}
-
-
-/**
- * Add a new named program parameter (Ex: NV_fragment_program DEFINE statement)
- * \return index of the new entry in the parameter list
- */
-GLint
-_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4])
-{
-   return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
-                              4, GL_NONE, values, NULL, 0x0);
-                              
-}
-
-
-/**
- * Add a new named constant to the parameter list.
- * This will be used when the program contains something like this:
- *    PARAM myVals = { 0, 1, 2, 3 };
- *
- * \param paramList  the parameter list
- * \param name  the name for the constant
- * \param values  four float values
- * \return index/position of the new parameter in the parameter list
- */
-GLint
-_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
-                         GLuint size)
-{
-   /* first check if this is a duplicate constant */
-   GLint pos;
-   for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
-      const GLfloat *pvals = paramList->ParameterValues[pos];
-      if (pvals[0] == values[0] &&
-          pvals[1] == values[1] &&
-          pvals[2] == values[2] &&
-          pvals[3] == values[3] &&
-          strcmp(paramList->Parameters[pos].Name, name) == 0) {
-         /* Same name and value is already in the param list - reuse it */
-         return pos;
-      }
-   }
-   /* not found, add new parameter */
-   return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name,
-                              size, GL_NONE, values, NULL, 0x0);
-}
-
-
-/**
- * Add a new unnamed constant to the parameter list.  This will be used
- * when a fragment/vertex program contains something like this:
- *    MOV r, { 0, 1, 2, 3 };
- * If swizzleOut is non-null we'll search the parameter list for an
- * existing instance of the constant which matches with a swizzle.
- *
- * \param paramList  the parameter list
- * \param values  four float values
- * \param swizzleOut  returns swizzle mask for accessing the constant
- * \return index/position of the new parameter in the parameter list.
- */
-GLint
-_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
-                           GLuint *swizzleOut)
-{
-   GLint pos;
-   ASSERT(size >= 1);
-   ASSERT(size <= 4);
-
-   if (swizzleOut &&
-       _mesa_lookup_parameter_constant(paramList, values,
-                                       size, &pos, swizzleOut)) {
-      return pos;
-   }
-
-   /* Look for empty space in an already unnamed constant parameter
-    * to add this constant.  This will only work for single-element
-    * constants because we rely on smearing (i.e. .yyyy or .zzzz).
-    */
-   if (size == 1 && swizzleOut) {
-      for (pos = 0; pos < (GLint) paramList->NumParameters; pos++) {
-         struct gl_program_parameter *p = paramList->Parameters + pos;
-         if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
-            /* ok, found room */
-            GLfloat *pVal = paramList->ParameterValues[pos];
-            GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
-            pVal[p->Size] = values[0];
-            p->Size++;
-            *swizzleOut = MAKE_SWIZZLE4(swz, swz, swz, swz);
-            return pos;
-         }
-      }
-   }
-
-   /* add a new parameter to store this constant */
-   pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
-                             size, GL_NONE, values, NULL, 0x0);
-   if (pos >= 0 && swizzleOut) {
-      if (size == 1)
-         *swizzleOut = SWIZZLE_XXXX;
-      else
-         *swizzleOut = SWIZZLE_NOOP;
-   }
-   return pos;
-}
-
-/**
- * Add parameter representing a varying variable.
- */
-GLint
-_mesa_add_varying(struct gl_program_parameter_list *paramList,
-                  const char *name, GLuint size, GLenum datatype,
-                  GLbitfield flags)
-{
-   GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
-   if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_VARYING) {
-      /* already in list */
-      return i;
-   }
-   else {
-      /*assert(size == 4);*/
-      i = _mesa_add_parameter(paramList, PROGRAM_VARYING, name,
-                              size, datatype, NULL, NULL, flags);
-      return i;
-   }
-}
-
-
-/**
- * Add parameter representing a vertex program attribute.
- * \param size  size of attribute (in floats), may be -1 if unknown
- * \param attrib  the attribute index, or -1 if unknown
- */
-GLint
-_mesa_add_attribute(struct gl_program_parameter_list *paramList,
-                    const char *name, GLint size, GLenum datatype, GLint attrib)
-{
-   GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
-   if (i >= 0) {
-      /* replace */
-      if (attrib < 0)
-         attrib = i;
-      paramList->Parameters[i].StateIndexes[0] = attrib;
-   }
-   else {
-      /* add */
-      gl_state_index state[STATE_LENGTH];
-      state[0] = (gl_state_index) attrib;
-      if (size < 0)
-         size = 4;
-      i = _mesa_add_parameter(paramList, PROGRAM_INPUT, name,
-                              size, datatype, NULL, state, 0x0);
-   }
-   return i;
-}
-
-
-
-#if 0 /* not used yet */
-/**
- * Returns the number of 4-component registers needed to store a piece
- * of GL state.  For matrices this may be as many as 4 registers,
- * everything else needs
- * just 1 register.
- */
-static GLuint
-sizeof_state_reference(const GLint *stateTokens)
-{
-   if (stateTokens[0] == STATE_MATRIX) {
-      GLuint rows = stateTokens[4] - stateTokens[3] + 1;
-      assert(rows >= 1);
-      assert(rows <= 4);
-      return rows;
-   }
-   else {
-      return 1;
-   }
-}
-#endif
-
-
-/**
- * Add a new state reference to the parameter list.
- * This will be used when the program contains something like this:
- *    PARAM ambient = state.material.front.ambient;
- *
- * \param paramList  the parameter list
- * \param stateTokens  an array of 5 (STATE_LENGTH) state tokens
- * \return index of the new parameter.
- */
-GLint
-_mesa_add_state_reference(struct gl_program_parameter_list *paramList,
-                          const gl_state_index stateTokens[STATE_LENGTH])
-{
-   const GLuint size = 4; /* XXX fix */
-   char *name;
-   GLint index;
-
-   /* Check if the state reference is already in the list */
-   for (index = 0; index < (GLint) paramList->NumParameters; index++) {
-      if (!memcmp(paramList->Parameters[index].StateIndexes,
-		  stateTokens, STATE_LENGTH * sizeof(gl_state_index))) {
-	 return index;
-      }
-   }
-
-   name = _mesa_program_state_string(stateTokens);
-   index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name,
-                               size, GL_NONE,
-                               NULL, (gl_state_index *) stateTokens, 0x0);
-   paramList->StateFlags |= _mesa_program_state_flags(stateTokens);
-
-   /* free name string here since we duplicated it in add_parameter() */
-   free(name);
-
-   return index;
-}
-
-
-/**
- * Lookup a parameter value by name in the given parameter list.
- * \return pointer to the float[4] values.
- */
-GLfloat *
-_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
-                             GLsizei nameLen, const char *name)
-{
-   GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name);
-   if (i < 0)
-      return NULL;
-   else
-      return paramList->ParameterValues[i];
-}
-
-
-/**
- * Given a program parameter name, find its position in the list of parameters.
- * \param paramList  the parameter list to search
- * \param nameLen  length of name (in chars).
- *                 If length is negative, assume that name is null-terminated.
- * \param name  the name to search for
- * \return index of parameter in the list.
- */
-GLint
-_mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
-                             GLsizei nameLen, const char *name)
-{
-   GLint i;
-
-   if (!paramList)
-      return -1;
-
-   if (nameLen == -1) {
-      /* name is null-terminated */
-      for (i = 0; i < (GLint) paramList->NumParameters; i++) {
-         if (paramList->Parameters[i].Name &&
-	     strcmp(paramList->Parameters[i].Name, name) == 0)
-            return i;
-      }
-   }
-   else {
-      /* name is not null-terminated, use nameLen */
-      for (i = 0; i < (GLint) paramList->NumParameters; i++) {
-         if (paramList->Parameters[i].Name &&
-	     strncmp(paramList->Parameters[i].Name, name, nameLen) == 0
-             && strlen(paramList->Parameters[i].Name) == (size_t)nameLen)
-            return i;
-      }
-   }
-   return -1;
-}
-
-
-/**
- * Look for a float vector in the given parameter list.  The float vector
- * may be of length 1, 2, 3 or 4.  If swizzleOut is non-null, we'll try
- * swizzling to find a match.
- * \param list  the parameter list to search
- * \param v  the float vector to search for
- * \param vSize  number of element in v
- * \param posOut  returns the position of the constant, if found
- * \param swizzleOut  returns a swizzle mask describing location of the
- *                    vector elements if found.
- * \return GL_TRUE if found, GL_FALSE if not found
- */
-GLboolean
-_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
-                                GLint *posOut, GLuint *swizzleOut)
-{
-   GLuint i;
-
-   assert(vSize >= 1);
-   assert(vSize <= 4);
-
-   if (!list) {
-      *posOut = -1;
-      return GL_FALSE;
-   }
-
-   for (i = 0; i < list->NumParameters; i++) {
-      if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
-         if (!swizzleOut) {
-            /* swizzle not allowed */
-            GLuint j, match = 0;
-            for (j = 0; j < vSize; j++) {
-               if (v[j] == list->ParameterValues[i][j])
-                  match++;
-            }
-            if (match == vSize) {
-               *posOut = i;
-               return GL_TRUE;
-            }
-         }
-         else {
-            /* try matching w/ swizzle */
-             if (vSize == 1) {
-                /* look for v[0] anywhere within float[4] value */
-                GLuint j;
-                for (j = 0; j < list->Parameters[i].Size; j++) {
-                   if (list->ParameterValues[i][j] == v[0]) {
-                      /* found it */
-                      *posOut = i;
-                      *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
-                      return GL_TRUE;
-                   }
-                }
-             }
-             else if (vSize <= list->Parameters[i].Size) {
-                /* see if we can match this constant (with a swizzle) */
-                GLuint swz[4];
-                GLuint match = 0, j, k;
-                for (j = 0; j < vSize; j++) {
-                   if (v[j] == list->ParameterValues[i][j]) {
-                      swz[j] = j;
-                      match++;
-                   }
-                   else {
-                      for (k = 0; k < list->Parameters[i].Size; k++) {
-                         if (v[j] == list->ParameterValues[i][k]) {
-                            swz[j] = k;
-                            match++;
-                            break;
-                         }
-                      }
-                   }
-                }
-                /* smear last value to remaining positions */
-                for (; j < 4; j++)
-                   swz[j] = swz[j-1];
-
-                if (match == vSize) {
-                   *posOut = i;
-                   *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
-                   return GL_TRUE;
-                }
-             }
-         }
-      }
-   }
-
-   *posOut = -1;
-   return GL_FALSE;
-}
-
-
-struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
-{
-   struct gl_program_parameter_list *clone;
-   GLuint i;
-
-   clone = _mesa_new_parameter_list();
-   if (!clone)
-      return NULL;
-
-   /** Not too efficient, but correct */
-   for (i = 0; i < list->NumParameters; i++) {
-      struct gl_program_parameter *p = list->Parameters + i;
-      struct gl_program_parameter *pCopy;
-      GLuint size = MIN2(p->Size, 4);
-      GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
-                                    list->ParameterValues[i], NULL, 0x0);
-      ASSERT(j >= 0);
-      pCopy = clone->Parameters + j;
-      pCopy->Flags = p->Flags;
-      /* copy state indexes */
-      if (p->Type == PROGRAM_STATE_VAR) {
-         GLint k;
-         for (k = 0; k < STATE_LENGTH; k++) {
-            pCopy->StateIndexes[k] = p->StateIndexes[k];
-         }
-      }
-      else {
-         clone->Parameters[j].Size = p->Size;
-      }
-      
-   }
-
-   clone->StateFlags = list->StateFlags;
-
-   return clone;
-}
-
-
-/**
- * Return a new parameter list which is listA + listB.
- */
-struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
-                              const struct gl_program_parameter_list *listB)
-{
-   struct gl_program_parameter_list *list;
-
-   if (listA) {
-      list = _mesa_clone_parameter_list(listA);
-      if (list && listB) {
-         GLuint i;
-         for (i = 0; i < listB->NumParameters; i++) {
-            struct gl_program_parameter *param = listB->Parameters + i;
-            _mesa_add_parameter(list, param->Type, param->Name, param->Size,
-                                param->DataType,
-                                listB->ParameterValues[i],
-                                param->StateIndexes,
-                                param->Flags);
-         }
-      }
-   }
-   else if (listB) {
-      list = _mesa_clone_parameter_list(listB);
-   }
-   else {
-      list = NULL;
-   }
-   return list;
-}
-
-
-
-/**
- * Find longest name of all uniform parameters in list.
- */
-GLuint
-_mesa_longest_parameter_name(const struct gl_program_parameter_list *list,
-                             gl_register_file type)
-{
-   GLuint i, maxLen = 0;
-   if (!list)
-      return 0;
-   for (i = 0; i < list->NumParameters; i++) {
-      if (list->Parameters[i].Type == type) {
-         GLuint len = strlen(list->Parameters[i].Name);
-         if (len > maxLen)
-            maxLen = len;
-      }
-   }
-   return maxLen;
-}
-
-
-/**
- * Count the number of parameters in the last that match the given type.
- */
-GLuint
-_mesa_num_parameters_of_type(const struct gl_program_parameter_list *list,
-                             gl_register_file type)
-{
-   GLuint i, count = 0;
-   if (list) {
-      for (i = 0; i < list->NumParameters; i++) {
-         if (list->Parameters[i].Type == type)
-            count++;
-      }
-   }
-   return count;
-}
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.3
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file prog_parameter.c
+ * Program parameter lists and functions.
+ * \author Brian Paul
+ */
+
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "prog_instruction.h"
+#include "prog_parameter.h"
+#include "prog_statevars.h"
+
+
+struct gl_program_parameter_list *
+_mesa_new_parameter_list(void)
+{
+   return CALLOC_STRUCT(gl_program_parameter_list);
+}
+
+
+struct gl_program_parameter_list *
+_mesa_new_parameter_list_sized(unsigned size)
+{
+   struct gl_program_parameter_list *p = _mesa_new_parameter_list();
+
+   if ((p != NULL) && (size != 0)) {
+      p->Size = size;
+
+      /* alloc arrays */
+      p->Parameters = (struct gl_program_parameter *)
+	 calloc(1, size * sizeof(struct gl_program_parameter));
+
+      p->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_malloc(size * 4 *sizeof(gl_constant_value), 16);
+
+
+      if ((p->Parameters == NULL) || (p->ParameterValues == NULL)) {
+	 free(p->Parameters);
+	 _mesa_align_free(p->ParameterValues);
+	 free(p);
+	 p = NULL;
+      }
+   }
+
+   return p;
+}
+
+
+/**
+ * Free a parameter list and all its parameters
+ */
+void
+_mesa_free_parameter_list(struct gl_program_parameter_list *paramList)
+{
+   GLuint i;
+   for (i = 0; i < paramList->NumParameters; i++) {
+      if (paramList->Parameters[i].Name)
+	 free((void *) paramList->Parameters[i].Name);
+   }
+   free(paramList->Parameters);
+   if (paramList->ParameterValues)
+      _mesa_align_free(paramList->ParameterValues);
+   free(paramList);
+}
+
+
+/**
+ * Add a new parameter to a parameter list.
+ * Note that parameter values are usually 4-element GLfloat vectors.
+ * When size > 4 we'll allocate a sequential block of parameters to
+ * store all the values (in blocks of 4).
+ *
+ * \param paramList  the list to add the parameter to
+ * \param type  type of parameter, such as 
+ * \param name  the parameter name, will be duplicated/copied!
+ * \param size  number of elements in 'values' vector (1..4, or more)
+ * \param datatype  GL_FLOAT, GL_FLOAT_VECx, GL_INT, GL_INT_VECx or GL_NONE.
+ * \param values  initial parameter value, up to 4 gl_constant_values, or NULL
+ * \param state  state indexes, or NULL
+ * \return  index of new parameter in the list, or -1 if error (out of mem)
+ */
+GLint
+_mesa_add_parameter(struct gl_program_parameter_list *paramList,
+                    gl_register_file type, const char *name,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
+                    const gl_state_index state[STATE_LENGTH],
+                    GLbitfield flags)
+{
+   const GLuint oldNum = paramList->NumParameters;
+   const GLuint sz4 = (size + 3) / 4; /* no. of new param slots needed */
+
+   assert(size > 0);
+
+   if (oldNum + sz4 > paramList->Size) {
+      /* Need to grow the parameter list array (alloc some extra) */
+      paramList->Size = paramList->Size + 4 * sz4;
+
+      /* realloc arrays */
+      paramList->Parameters = (struct gl_program_parameter *)
+	 _mesa_realloc(paramList->Parameters,
+		       oldNum * sizeof(struct gl_program_parameter),
+		       paramList->Size * sizeof(struct gl_program_parameter));
+
+      paramList->ParameterValues = (gl_constant_value (*)[4])
+         _mesa_align_realloc(paramList->ParameterValues,         /* old buf */
+                             oldNum * 4 * sizeof(gl_constant_value),/* old sz */
+                             paramList->Size*4*sizeof(gl_constant_value),/*new*/
+                             16);
+   }
+
+   if (!paramList->Parameters ||
+       !paramList->ParameterValues) {
+      /* out of memory */
+      paramList->NumParameters = 0;
+      paramList->Size = 0;
+      return -1;
+   }
+   else {
+      GLuint i, j;
+
+      paramList->NumParameters = oldNum + sz4;
+
+      memset(&paramList->Parameters[oldNum], 0,
+             sz4 * sizeof(struct gl_program_parameter));
+
+      for (i = 0; i < sz4; i++) {
+         struct gl_program_parameter *p = paramList->Parameters + oldNum + i;
+         p->Name = name ? _mesa_strdup(name) : NULL;
+         p->Type = type;
+         p->Size = size;
+         p->DataType = datatype;
+         p->Flags = flags;
+         if (values) {
+            COPY_4V(paramList->ParameterValues[oldNum + i], values);
+            values += 4;
+            p->Initialized = GL_TRUE;
+         }
+         else {
+            /* silence valgrind */
+            for (j = 0; j < 4; j++)
+            	paramList->ParameterValues[oldNum + i][j].f = 0;
+         }
+         size -= 4;
+      }
+
+      if (state) {
+         for (i = 0; i < STATE_LENGTH; i++)
+            paramList->Parameters[oldNum].StateIndexes[i] = state[i];
+      }
+
+      return (GLint) oldNum;
+   }
+}
+
+
+/**
+ * Add a new named program parameter (Ex: NV_fragment_program DEFINE statement)
+ * \return index of the new entry in the parameter list
+ */
+GLint
+_mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
+                          const char *name, const gl_constant_value values[4])
+{
+   return _mesa_add_parameter(paramList, PROGRAM_NAMED_PARAM, name,
+                              4, GL_NONE, values, NULL, 0x0);
+                              
+}
+
+
+/**
+ * Add a new named constant to the parameter list.
+ * This will be used when the program contains something like this:
+ *    PARAM myVals = { 0, 1, 2, 3 };
+ *
+ * \param paramList  the parameter list
+ * \param name  the name for the constant
+ * \param values  four float values
+ * \return index/position of the new parameter in the parameter list
+ */
+GLint
+_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
+                         const char *name, const gl_constant_value values[4],
+                         GLuint size)
+{
+   /* first check if this is a duplicate constant */
+   GLint pos;
+   for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
+      const gl_constant_value *pvals = paramList->ParameterValues[pos];
+      if (pvals[0].u == values[0].u &&
+          pvals[1].u == values[1].u &&
+          pvals[2].u == values[2].u &&
+          pvals[3].u == values[3].u &&
+          strcmp(paramList->Parameters[pos].Name, name) == 0) {
+         /* Same name and value is already in the param list - reuse it */
+         return pos;
+      }
+   }
+   /* not found, add new parameter */
+   return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name,
+                              size, GL_NONE, values, NULL, 0x0);
+}
+
+
+/**
+ * Add a new unnamed constant to the parameter list.  This will be used
+ * when a fragment/vertex program contains something like this:
+ *    MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList  the parameter list
+ * \param values  four float values
+ * \param swizzleOut  returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ */
+GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut)
+{
+   GLint pos;
+   ASSERT(size >= 1);
+   ASSERT(size <= 4);
+
+   if (swizzleOut &&
+       _mesa_lookup_parameter_constant(paramList, values,
+                                       size, &pos, swizzleOut)) {
+      return pos;
+   }
+
+   /* Look for empty space in an already unnamed constant parameter
+    * to add this constant.  This will only work for single-element
+    * constants because we rely on smearing (i.e. .yyyy or .zzzz).
+    */
+   if (size == 1 && swizzleOut) {
+      for (pos = 0; pos < (GLint) paramList->NumParameters; pos++) {
+         struct gl_program_parameter *p = paramList->Parameters + pos;
+         if (p->Type == PROGRAM_CONSTANT && p->Size + size <= 4) {
+            /* ok, found room */
+            gl_constant_value *pVal = paramList->ParameterValues[pos];
+            GLuint swz = p->Size; /* 1, 2 or 3 for Y, Z, W */
+            pVal[p->Size] = values[0];
+            p->Size++;
+            *swizzleOut = MAKE_SWIZZLE4(swz, swz, swz, swz);
+            return pos;
+         }
+      }
+   }
+
+   /* add a new parameter to store this constant */
+   pos = _mesa_add_parameter(paramList, PROGRAM_CONSTANT, NULL,
+                             size, datatype, values, NULL, 0x0);
+   if (pos >= 0 && swizzleOut) {
+      if (size == 1)
+         *swizzleOut = SWIZZLE_XXXX;
+      else
+         *swizzleOut = SWIZZLE_NOOP;
+   }
+   return pos;
+}
+
+/**
+ * Add a new unnamed constant to the parameter list.  This will be used
+ * when a fragment/vertex program contains something like this:
+ *    MOV r, { 0, 1, 2, 3 };
+ * If swizzleOut is non-null we'll search the parameter list for an
+ * existing instance of the constant which matches with a swizzle.
+ *
+ * \param paramList  the parameter list
+ * \param values  four float values
+ * \param swizzleOut  returns swizzle mask for accessing the constant
+ * \return index/position of the new parameter in the parameter list.
+ * \sa _mesa_add_typed_unnamed_constant
+ */
+GLint
+_mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLuint *swizzleOut)
+{
+   return _mesa_add_typed_unnamed_constant(paramList, values, size, GL_NONE,
+                                           swizzleOut);
+}
+
+/**
+ * Add parameter representing a varying variable.
+ */
+GLint
+_mesa_add_varying(struct gl_program_parameter_list *paramList,
+                  const char *name, GLuint size, GLenum datatype,
+                  GLbitfield flags)
+{
+   GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+   if (i >= 0 && paramList->Parameters[i].Type == PROGRAM_VARYING) {
+      /* already in list */
+      return i;
+   }
+   else {
+      /*assert(size == 4);*/
+      i = _mesa_add_parameter(paramList, PROGRAM_VARYING, name,
+                              size, datatype, NULL, NULL, flags);
+      return i;
+   }
+}
+
+
+/**
+ * Add parameter representing a vertex program attribute.
+ * \param size  size of attribute (in floats), may be -1 if unknown
+ * \param attrib  the attribute index, or -1 if unknown
+ */
+GLint
+_mesa_add_attribute(struct gl_program_parameter_list *paramList,
+                    const char *name, GLint size, GLenum datatype, GLint attrib)
+{
+   GLint i = _mesa_lookup_parameter_index(paramList, -1, name);
+   if (i >= 0) {
+      /* replace */
+      if (attrib < 0)
+         attrib = i;
+      paramList->Parameters[i].StateIndexes[0] = attrib;
+   }
+   else {
+      /* add */
+      gl_state_index state[STATE_LENGTH];
+      state[0] = (gl_state_index) attrib;
+      if (size < 0)
+         size = 4;
+      i = _mesa_add_parameter(paramList, PROGRAM_INPUT, name,
+                              size, datatype, NULL, state, 0x0);
+   }
+   return i;
+}
+
+
+
+#if 0 /* not used yet */
+/**
+ * Returns the number of 4-component registers needed to store a piece
+ * of GL state.  For matrices this may be as many as 4 registers,
+ * everything else needs
+ * just 1 register.
+ */
+static GLuint
+sizeof_state_reference(const GLint *stateTokens)
+{
+   if (stateTokens[0] == STATE_MATRIX) {
+      GLuint rows = stateTokens[4] - stateTokens[3] + 1;
+      assert(rows >= 1);
+      assert(rows <= 4);
+      return rows;
+   }
+   else {
+      return 1;
+   }
+}
+#endif
+
+
+/**
+ * Add a new state reference to the parameter list.
+ * This will be used when the program contains something like this:
+ *    PARAM ambient = state.material.front.ambient;
+ *
+ * \param paramList  the parameter list
+ * \param stateTokens  an array of 5 (STATE_LENGTH) state tokens
+ * \return index of the new parameter.
+ */
+GLint
+_mesa_add_state_reference(struct gl_program_parameter_list *paramList,
+                          const gl_state_index stateTokens[STATE_LENGTH])
+{
+   const GLuint size = 4; /* XXX fix */
+   char *name;
+   GLint index;
+
+   /* Check if the state reference is already in the list */
+   for (index = 0; index < (GLint) paramList->NumParameters; index++) {
+      if (!memcmp(paramList->Parameters[index].StateIndexes,
+		  stateTokens, STATE_LENGTH * sizeof(gl_state_index))) {
+	 return index;
+      }
+   }
+
+   name = _mesa_program_state_string(stateTokens);
+   index = _mesa_add_parameter(paramList, PROGRAM_STATE_VAR, name,
+                               size, GL_NONE,
+                               NULL, (gl_state_index *) stateTokens, 0x0);
+   paramList->StateFlags |= _mesa_program_state_flags(stateTokens);
+
+   /* free name string here since we duplicated it in add_parameter() */
+   free(name);
+
+   return index;
+}
+
+
+/**
+ * Lookup a parameter value by name in the given parameter list.
+ * \return pointer to the float[4] values.
+ */
+gl_constant_value *
+_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
+                             GLsizei nameLen, const char *name)
+{
+   GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name);
+   if (i < 0)
+      return NULL;
+   else
+      return paramList->ParameterValues[i];
+}
+
+
+/**
+ * Given a program parameter name, find its position in the list of parameters.
+ * \param paramList  the parameter list to search
+ * \param nameLen  length of name (in chars).
+ *                 If length is negative, assume that name is null-terminated.
+ * \param name  the name to search for
+ * \return index of parameter in the list.
+ */
+GLint
+_mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
+                             GLsizei nameLen, const char *name)
+{
+   GLint i;
+
+   if (!paramList)
+      return -1;
+
+   if (nameLen == -1) {
+      /* name is null-terminated */
+      for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+         if (paramList->Parameters[i].Name &&
+	     strcmp(paramList->Parameters[i].Name, name) == 0)
+            return i;
+      }
+   }
+   else {
+      /* name is not null-terminated, use nameLen */
+      for (i = 0; i < (GLint) paramList->NumParameters; i++) {
+         if (paramList->Parameters[i].Name &&
+	     strncmp(paramList->Parameters[i].Name, name, nameLen) == 0
+             && strlen(paramList->Parameters[i].Name) == (size_t)nameLen)
+            return i;
+      }
+   }
+   return -1;
+}
+
+
+/**
+ * Look for a float vector in the given parameter list.  The float vector
+ * may be of length 1, 2, 3 or 4.  If swizzleOut is non-null, we'll try
+ * swizzling to find a match.
+ * \param list  the parameter list to search
+ * \param v  the float vector to search for
+ * \param vSize  number of element in v
+ * \param posOut  returns the position of the constant, if found
+ * \param swizzleOut  returns a swizzle mask describing location of the
+ *                    vector elements if found.
+ * \return GL_TRUE if found, GL_FALSE if not found
+ */
+GLboolean
+_mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
+                                const gl_constant_value v[], GLuint vSize,
+                                GLint *posOut, GLuint *swizzleOut)
+{
+   GLuint i;
+
+   assert(vSize >= 1);
+   assert(vSize <= 4);
+
+   if (!list) {
+      *posOut = -1;
+      return GL_FALSE;
+   }
+
+   for (i = 0; i < list->NumParameters; i++) {
+      if (list->Parameters[i].Type == PROGRAM_CONSTANT) {
+         if (!swizzleOut) {
+            /* swizzle not allowed */
+            GLuint j, match = 0;
+            for (j = 0; j < vSize; j++) {
+               if (v[j].u == list->ParameterValues[i][j].u)
+                  match++;
+            }
+            if (match == vSize) {
+               *posOut = i;
+               return GL_TRUE;
+            }
+         }
+         else {
+            /* try matching w/ swizzle */
+             if (vSize == 1) {
+                /* look for v[0] anywhere within float[4] value */
+                GLuint j;
+                for (j = 0; j < list->Parameters[i].Size; j++) {
+                   if (list->ParameterValues[i][j].u == v[0].u) {
+                      /* found it */
+                      *posOut = i;
+                      *swizzleOut = MAKE_SWIZZLE4(j, j, j, j);
+                      return GL_TRUE;
+                   }
+                }
+             }
+             else if (vSize <= list->Parameters[i].Size) {
+                /* see if we can match this constant (with a swizzle) */
+                GLuint swz[4];
+                GLuint match = 0, j, k;
+                for (j = 0; j < vSize; j++) {
+                   if (v[j].u == list->ParameterValues[i][j].u) {
+                      swz[j] = j;
+                      match++;
+                   }
+                   else {
+                      for (k = 0; k < list->Parameters[i].Size; k++) {
+                         if (v[j].u == list->ParameterValues[i][k].u) {
+                            swz[j] = k;
+                            match++;
+                            break;
+                         }
+                      }
+                   }
+                }
+                /* smear last value to remaining positions */
+                for (; j < 4; j++)
+                   swz[j] = swz[j-1];
+
+                if (match == vSize) {
+                   *posOut = i;
+                   *swizzleOut = MAKE_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]);
+                   return GL_TRUE;
+                }
+             }
+         }
+      }
+   }
+
+   *posOut = -1;
+   return GL_FALSE;
+}
+
+
+struct gl_program_parameter_list *
+_mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
+{
+   struct gl_program_parameter_list *clone;
+   GLuint i;
+
+   clone = _mesa_new_parameter_list();
+   if (!clone)
+      return NULL;
+
+   /** Not too efficient, but correct */
+   for (i = 0; i < list->NumParameters; i++) {
+      struct gl_program_parameter *p = list->Parameters + i;
+      struct gl_program_parameter *pCopy;
+      GLuint size = MIN2(p->Size, 4);
+      GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
+                                    list->ParameterValues[i], NULL, 0x0);
+      ASSERT(j >= 0);
+      pCopy = clone->Parameters + j;
+      pCopy->Flags = p->Flags;
+      /* copy state indexes */
+      if (p->Type == PROGRAM_STATE_VAR) {
+         GLint k;
+         for (k = 0; k < STATE_LENGTH; k++) {
+            pCopy->StateIndexes[k] = p->StateIndexes[k];
+         }
+      }
+      else {
+         clone->Parameters[j].Size = p->Size;
+      }
+      
+   }
+
+   clone->StateFlags = list->StateFlags;
+
+   return clone;
+}
+
+
+/**
+ * Return a new parameter list which is listA + listB.
+ */
+struct gl_program_parameter_list *
+_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
+                              const struct gl_program_parameter_list *listB)
+{
+   struct gl_program_parameter_list *list;
+
+   if (listA) {
+      list = _mesa_clone_parameter_list(listA);
+      if (list && listB) {
+         GLuint i;
+         for (i = 0; i < listB->NumParameters; i++) {
+            struct gl_program_parameter *param = listB->Parameters + i;
+            _mesa_add_parameter(list, param->Type, param->Name, param->Size,
+                                param->DataType,
+                                listB->ParameterValues[i],
+                                param->StateIndexes,
+                                param->Flags);
+         }
+      }
+   }
+   else if (listB) {
+      list = _mesa_clone_parameter_list(listB);
+   }
+   else {
+      list = NULL;
+   }
+   return list;
+}
+
+
+
+/**
+ * Find longest name of all uniform parameters in list.
+ */
+GLuint
+_mesa_longest_parameter_name(const struct gl_program_parameter_list *list,
+                             gl_register_file type)
+{
+   GLuint i, maxLen = 0;
+   if (!list)
+      return 0;
+   for (i = 0; i < list->NumParameters; i++) {
+      if (list->Parameters[i].Type == type) {
+         GLuint len = strlen(list->Parameters[i].Name);
+         if (len > maxLen)
+            maxLen = len;
+      }
+   }
+   return maxLen;
+}
+
+
+/**
+ * Count the number of parameters in the last that match the given type.
+ */
+GLuint
+_mesa_num_parameters_of_type(const struct gl_program_parameter_list *list,
+                             gl_register_file type)
+{
+   GLuint i, count = 0;
+   if (list) {
+      for (i = 0; i < list->NumParameters; i++) {
+         if (list->Parameters[i].Type == type)
+            count++;
+      }
+   }
+   return count;
+}
diff --git a/mesalib/src/mesa/program/prog_parameter.h b/mesalib/src/mesa/program/prog_parameter.h
index 10cbbe57a..f858cf0fa 100644
--- a/mesalib/src/mesa/program/prog_parameter.h
+++ b/mesalib/src/mesa/program/prog_parameter.h
@@ -46,7 +46,15 @@
 #define PROG_PARAM_BIT_CYL_WRAP  0x10  /**< XXX gallium debug */
 /*@}*/
 
-
+/**
+ * Actual data for constant values of parameters.
+ */
+typedef union gl_constant_value {
+	GLfloat f;
+	GLboolean b;
+	GLint i;
+	GLuint u;
+} gl_constant_value;
 
 /**
  * Program parameter.
@@ -81,7 +89,7 @@ struct gl_program_parameter_list
    GLuint Size;           /**< allocated size of Parameters, ParameterValues */
    GLuint NumParameters;  /**< number of parameters in arrays */
    struct gl_program_parameter *Parameters; /**< Array [Size] */
-   GLfloat (*ParameterValues)[4];        /**< Array [Size] of GLfloat[4] */
+   gl_constant_value (*ParameterValues)[4]; /**< Array [Size] of constant[4] */
    GLbitfield StateFlags; /**< _NEW_* flags indicating which state changes
                                might invalidate ParameterValues[] */
 };
@@ -112,22 +120,28 @@ _mesa_num_parameters(const struct gl_program_parameter_list *list)
 extern GLint
 _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     gl_register_file type, const char *name,
-                    GLuint size, GLenum datatype, const GLfloat *values,
+                    GLuint size, GLenum datatype,
+                    const gl_constant_value *values,
                     const gl_state_index state[STATE_LENGTH],
                     GLbitfield flags);
 
 extern GLint
 _mesa_add_named_parameter(struct gl_program_parameter_list *paramList,
-                          const char *name, const GLfloat values[4]);
+                          const char *name, const gl_constant_value values[4]);
 
 extern GLint
 _mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const GLfloat values[4],
+                         const char *name, const gl_constant_value values[4],
                          GLuint size);
 
+extern GLint
+_mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
+                           const gl_constant_value values[4], GLuint size,
+                           GLenum datatype, GLuint *swizzleOut);
+
 extern GLint
 _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
-                           const GLfloat values[4], GLuint size,
+                           const gl_constant_value values[4], GLuint size,
                            GLuint *swizzleOut);
 
 extern GLint
@@ -143,7 +157,7 @@ extern GLint
 _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
                           const gl_state_index stateTokens[STATE_LENGTH]);
 
-extern GLfloat *
+extern gl_constant_value *
 _mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name);
 
@@ -153,7 +167,7 @@ _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
 
 extern GLboolean
 _mesa_lookup_parameter_constant(const struct gl_program_parameter_list *list,
-                                const GLfloat v[], GLuint vSize,
+                                const gl_constant_value v[], GLuint vSize,
                                 GLint *posOut, GLuint *swizzleOut);
 
 extern GLuint
diff --git a/mesalib/src/mesa/program/prog_parameter_layout.c b/mesalib/src/mesa/program/prog_parameter_layout.c
index 90a977108..28fca3b92 100644
--- a/mesalib/src/mesa/program/prog_parameter_layout.c
+++ b/mesalib/src/mesa/program/prog_parameter_layout.c
@@ -182,7 +182,7 @@ _mesa_layout_parameters(struct asm_parser_state *state)
 
 	 switch (p->Type) {
 	 case PROGRAM_CONSTANT: {
-	    const float *const v =
+	    const gl_constant_value *const v =
 	       state->prog->Parameters->ParameterValues[idx];
 
 	    inst->Base.SrcReg[i].Index =
diff --git a/mesalib/src/mesa/program/prog_print.c b/mesalib/src/mesa/program/prog_print.c
index 7c3b4909e..70412b1fa 100644
--- a/mesalib/src/mesa/program/prog_print.c
+++ b/mesalib/src/mesa/program/prog_print.c
@@ -985,7 +985,7 @@ _mesa_fprint_parameter_list(FILE *f,
    fprintf(f, "dirty state flags: 0x%x\n", list->StateFlags);
    for (i = 0; i < list->NumParameters; i++){
       struct gl_program_parameter *param = list->Parameters + i;
-      const GLfloat *v = list->ParameterValues[i];
+      const GLfloat *v = (GLfloat *) list->ParameterValues[i];
       fprintf(f, "param[%d] sz=%d %s %s = {%.3g, %.3g, %.3g, %.3g}",
 	      i, param->Size,
 	      _mesa_register_file_name(list->Parameters[i].Type),
diff --git a/mesalib/src/mesa/program/prog_statevars.c b/mesalib/src/mesa/program/prog_statevars.c
index 16f9690e8..6aa2409e8 100644
--- a/mesalib/src/mesa/program/prog_statevars.c
+++ b/mesalib/src/mesa/program/prog_statevars.c
@@ -1111,7 +1111,7 @@ _mesa_load_state_parameters(struct gl_context *ctx,
       if (paramList->Parameters[i].Type == PROGRAM_STATE_VAR) {
          _mesa_fetch_state(ctx,
 			   paramList->Parameters[i].StateIndexes,
-                           paramList->ParameterValues[i]);
+                           &paramList->ParameterValues[i][0].f);
       }
    }
 }
diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c
index adca094ee..ecff2344a 100644
--- a/mesalib/src/mesa/program/program.c
+++ b/mesalib/src/mesa/program/program.c
@@ -388,8 +388,9 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
    if (prog->String)
       free(prog->String);
 
-   _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
-
+   if (prog->Instructions) {
+      _mesa_free_instructions(prog->Instructions, prog->NumInstructions);
+   }
    if (prog->Parameters) {
       _mesa_free_parameter_list(prog->Parameters);
    }
@@ -1031,7 +1032,8 @@ _mesa_postprocess_program(struct gl_context *ctx, struct gl_program *prog)
    GLuint i;
    GLuint whiteSwizzle;
    GLint whiteIndex = _mesa_add_unnamed_constant(prog->Parameters,
-                                                 white, 4, &whiteSwizzle);
+                                                 (gl_constant_value *) white,
+                                                 4, &whiteSwizzle);
 
    (void) whiteIndex;
 
diff --git a/mesalib/src/mesa/program/program_parse.y b/mesalib/src/mesa/program/program_parse.y
index dbf5abaa6..dec35038b 100644
--- a/mesalib/src/mesa/program/program_parse.y
+++ b/mesalib/src/mesa/program/program_parse.y
@@ -1854,64 +1854,64 @@ paramConstUse: paramConstScalarUse | paramConstVector;
 paramConstScalarDecl: signedFloatConstant
 	{
 	   $$.count = 4;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	;
 
 paramConstScalarUse: REAL
 	{
 	   $$.count = 1;
-	   $$.data[0] = $1;
-	   $$.data[1] = $1;
-	   $$.data[2] = $1;
-	   $$.data[3] = $1;
+	   $$.data[0].f = $1;
+	   $$.data[1].f = $1;
+	   $$.data[2].f = $1;
+	   $$.data[3].f = $1;
 	}
 	| INTEGER
 	{
 	   $$.count = 1;
-	   $$.data[0] = (float) $1;
-	   $$.data[1] = (float) $1;
-	   $$.data[2] = (float) $1;
-	   $$.data[3] = (float) $1;
+	   $$.data[0].f = (float) $1;
+	   $$.data[1].f = (float) $1;
+	   $$.data[2].f = (float) $1;
+	   $$.data[3].f = (float) $1;
 	}
 	;
 
 paramConstVector: '{' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = 0.0f;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = 0.0f;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = 0.0f;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = 0.0f;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = 1.0f;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = 1.0f;
 	}
 	| '{' signedFloatConstant ',' signedFloatConstant ','
               signedFloatConstant ',' signedFloatConstant '}'
 	{
 	   $$.count = 4;
-	   $$.data[0] = $2;
-	   $$.data[1] = $4;
-	   $$.data[2] = $6;
-	   $$.data[3] = $8;
+	   $$.data[0].f = $2;
+	   $$.data[1].f = $4;
+	   $$.data[2].f = $6;
+	   $$.data[3].f = $8;
 	}
 	;
 
diff --git a/mesalib/src/mesa/program/program_parser.h b/mesalib/src/mesa/program/program_parser.h
index 8e5aaee95..5637598f3 100644
--- a/mesalib/src/mesa/program/program_parser.h
+++ b/mesalib/src/mesa/program/program_parser.h
@@ -23,6 +23,7 @@
 #pragma once
 
 #include "main/config.h"
+#include "program/prog_parameter.h"
 
 struct gl_context;
 
@@ -96,7 +97,7 @@ struct asm_symbol {
 
 struct asm_vector {
    unsigned count;
-   float    data[4];
+   gl_constant_value data[4];
 };
 
 
diff --git a/mesalib/src/mesa/program/sampler.cpp b/mesalib/src/mesa/program/sampler.cpp
index 1457d1199..e8d34c670 100644
--- a/mesalib/src/mesa/program/sampler.cpp
+++ b/mesalib/src/mesa/program/sampler.cpp
@@ -132,6 +132,6 @@ _mesa_get_sampler_uniform_value(class ir_dereference *sampler,
 
    index += getname.offset;
 
-   return prog->Parameters->ParameterValues[index][0];
+   return prog->Parameters->ParameterValues[index][0].f;
 }
 }
diff --git a/mesalib/src/mesa/sources.mak b/mesalib/src/mesa/sources.mak
index 4b2ec08bb..ed008f881 100644
--- a/mesalib/src/mesa/sources.mak
+++ b/mesalib/src/mesa/sources.mak
@@ -336,7 +336,8 @@ MESA_GALLIUM_SOURCES = \
 
 MESA_GALLIUM_CXX_SOURCES = \
 	$(MAIN_CXX_SOURCES) \
-	$(SHADER_CXX_SOURCES)
+	$(SHADER_CXX_SOURCES) \
+	state_tracker/st_glsl_to_tgsi.cpp
 
 # All the core C sources, for dependency checking
 ALL_SOURCES = \
diff --git a/mesalib/src/mesa/state_tracker/st_atom_pixeltransfer.c b/mesalib/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 1f833d282..12b5bc5ba 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -84,26 +84,6 @@ make_state_key(struct gl_context *ctx,  struct state_key *key)
 }
 
 
-static struct pipe_resource *
-create_color_map_texture(struct gl_context *ctx)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt;
-   enum pipe_format format;
-   const uint texSize = 256; /* simple, and usually perfect */
-
-   /* find an RGBA texture format */
-   format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
-                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
-
-   /* create texture for color map/table */
-   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
-                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
-   return pt;
-}
-
-
 /**
  * Update the pixelmap texture with the contents of the R/G/B/A pixel maps.
  */
@@ -219,7 +199,7 @@ get_pixel_transfer_program(struct gl_context *ctx, const struct state_key *key)
 
       /* create the colormap/texture now if not already done */
       if (!st->pixel_xfer.pixelmap_texture) {
-         st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
          st->pixel_xfer.pixelmap_sampler_view =
             st_create_texture_sampler_view(st->pipe,
                                            st->pixel_xfer.pixelmap_texture);
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
index f49c03b46..beb5e7cab 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bitmap.c
@@ -1,894 +1,917 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Brian Paul
-  */
-
-#include "main/imports.h"
-#include "main/image.h"
-#include "main/bufferobj.h"
-#include "main/macros.h"
-#include "main/mfeatures.h"
-#include "main/pbo.h"
-#include "program/program.h"
-#include "program/prog_print.h"
-
-#include "st_context.h"
-#include "st_atom.h"
-#include "st_atom_constbuf.h"
-#include "st_program.h"
-#include "st_cb_bitmap.h"
-#include "st_texture.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_inlines.h"
-#include "util/u_draw_quad.h"
-#include "util/u_simple_shaders.h"
-#include "program/prog_instruction.h"
-#include "cso_cache/cso_context.h"
-
-
-#if FEATURE_drawpix
-
-/**
- * glBitmaps are drawn as textured quads.  The user's bitmap pattern
- * is stored in a texture image.  An alpha8 texture format is used.
- * The fragment shader samples a bit (texel) from the texture, then
- * discards the fragment if the bit is off.
- *
- * Note that we actually store the inverse image of the bitmap to
- * simplify the fragment program.  An "on" bit gets stored as texel=0x0
- * and an "off" bit is stored as texel=0xff.  Then we kill the
- * fragment if the negated texel value is less than zero.
- */
-
-
-/**
- * The bitmap cache attempts to accumulate multiple glBitmap calls in a
- * buffer which is then rendered en mass upon a flush, state change, etc.
- * A wide, short buffer is used to target the common case of a series
- * of glBitmap calls being used to draw text.
- */
-static GLboolean UseBitmapCache = GL_TRUE;
-
-
-#define BITMAP_CACHE_WIDTH  512
-#define BITMAP_CACHE_HEIGHT 32
-
-struct bitmap_cache
-{
-   /** Window pos to render the cached image */
-   GLint xpos, ypos;
-   /** Bounds of region used in window coords */
-   GLint xmin, ymin, xmax, ymax;
-
-   GLfloat color[4];
-
-   /** Bitmap's Z position */
-   GLfloat zpos;
-
-   struct pipe_resource *texture;
-   struct pipe_transfer *trans;
-
-   GLboolean empty;
-
-   /** An I8 texture image: */
-   ubyte *buffer;
-};
-
-
-/** Epsilon for Z comparisons */
-#define Z_EPSILON 1e-06
-
-
-/**
- * Make fragment program for glBitmap:
- *   Sample the texture and kill the fragment if the bit is 0.
- * This program will be combined with the user's fragment program.
- */
-static struct st_fragment_program *
-make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
-{
-   struct st_context *st = st_context(ctx);
-   struct st_fragment_program *stfp;
-   struct gl_program *p;
-   GLuint ic = 0;
-
-   p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
-   if (!p)
-      return NULL;
-
-   p->NumInstructions = 3;
-
-   p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
-   if (!p->Instructions) {
-      ctx->Driver.DeleteProgram(ctx, p);
-      return NULL;
-   }
-   _mesa_init_instructions(p->Instructions, p->NumInstructions);
-
-   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
-   p->Instructions[ic].Opcode = OPCODE_TEX;
-   p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
-   p->Instructions[ic].DstReg.Index = 0;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
-   p->Instructions[ic].TexSrcUnit = samplerIndex;
-   p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
-   ic++;
-
-   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
-   p->Instructions[ic].Opcode = OPCODE_KIL;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
-
-   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
-      p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
-
-   p->Instructions[ic].SrcReg[0].Index = 0;
-   p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
-   ic++;
-
-   /* END; */
-   p->Instructions[ic++].Opcode = OPCODE_END;
-
-   assert(ic == p->NumInstructions);
-
-   p->InputsRead = FRAG_BIT_TEX0;
-   p->OutputsWritten = 0x0;
-   p->SamplersUsed = (1 << samplerIndex);
-
-   stfp = (struct st_fragment_program *) p;
-   stfp->Base.UsesKill = GL_TRUE;
-
-   return stfp;
-}
-
-
-static int
-find_free_bit(uint bitfield)
-{
-   int i;
-   for (i = 0; i < 32; i++) {
-      if ((bitfield & (1 << i)) == 0) {
-         return i;
-      }
-   }
-   return -1;
-}
-
-
-/**
- * Combine basic bitmap fragment program with the user-defined program.
- * \param st  current context
- * \param fpIn  the incoming fragment program
- * \param fpOut  the new fragment program which does fragment culling
- * \param bitmap_sampler  sampler number for the bitmap texture
- */
-void
-st_make_bitmap_fragment_program(struct st_context *st,
-                                struct gl_fragment_program *fpIn,
-                                struct gl_fragment_program **fpOut,
-                                GLuint *bitmap_sampler)
-{
-   struct st_fragment_program *bitmap_prog;
-   struct gl_program *newProg;
-   uint sampler;
-
-   /*
-    * Generate new program which is the user-defined program prefixed
-    * with the bitmap sampler/kill instructions.
-    */
-   sampler = find_free_bit(fpIn->Base.SamplersUsed);
-   bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
-
-   newProg = _mesa_combine_programs(st->ctx,
-                                    &bitmap_prog->Base.Base,
-                                    &fpIn->Base);
-   /* done with this after combining */
-   st_reference_fragprog(st, &bitmap_prog, NULL);
-
-#if 0
-   {
-      printf("Combined bitmap program:\n");
-      _mesa_print_program(newProg);
-      printf("InputsRead: 0x%x\n", newProg->InputsRead);
-      printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
-      _mesa_print_parameter_list(newProg->Parameters);
-   }
-#endif
-
-   /* return results */
-   *fpOut = (struct gl_fragment_program *) newProg;
-   *bitmap_sampler = sampler;
-}
-
-
-/**
- * Copy user-provide bitmap bits into texture buffer, expanding
- * bits into texels.
- * "On" bits will set texels to 0x0.
- * "Off" bits will not modify texels.
- * Note that the image is actually going to be upside down in
- * the texture.  We deal with that with texcoords.
- */
-static void
-unpack_bitmap(struct st_context *st,
-              GLint px, GLint py, GLsizei width, GLsizei height,
-              const struct gl_pixelstore_attrib *unpack,
-              const GLubyte *bitmap,
-              ubyte *destBuffer, uint destStride)
-{
-   destBuffer += py * destStride + px;
-
-   _mesa_expand_bitmap(width, height, unpack, bitmap,
-                       destBuffer, destStride, 0x0);
-}
-
-
-/**
- * Create a texture which represents a bitmap image.
- */
-static struct pipe_resource *
-make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
-                    const struct gl_pixelstore_attrib *unpack,
-                    const GLubyte *bitmap)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_transfer *transfer;
-   ubyte *dest;
-   struct pipe_resource *pt;
-
-   /* PBO source... */
-   bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
-   if (!bitmap) {
-      return NULL;
-   }
-
-   /**
-    * Create texture to hold bitmap pattern.
-    */
-   pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
-                          0, width, height, 1, 1,
-                          PIPE_BIND_SAMPLER_VIEW);
-   if (!pt) {
-      _mesa_unmap_pbo_source(ctx, unpack);
-      return NULL;
-   }
-
-   transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
-                                PIPE_TRANSFER_WRITE,
-                                0, 0, width, height);
-
-   dest = pipe_transfer_map(pipe, transfer);
-
-   /* Put image into texture transfer */
-   memset(dest, 0xff, height * transfer->stride);
-   unpack_bitmap(st, 0, 0, width, height, unpack, bitmap,
-                 dest, transfer->stride);
-
-   _mesa_unmap_pbo_source(ctx, unpack);
-
-   /* Release transfer */
-   pipe_transfer_unmap(pipe, transfer);
-   pipe->transfer_destroy(pipe, transfer);
-
-   return pt;
-}
-
-static GLuint
-setup_bitmap_vertex_data(struct st_context *st, bool normalized,
-                         int x, int y, int width, int height,
-                         float z, const float color[4])
-{
-   struct pipe_context *pipe = st->pipe;
-   const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-   const GLfloat fb_width = (GLfloat)fb->Width;
-   const GLfloat fb_height = (GLfloat)fb->Height;
-   const GLfloat x0 = (GLfloat)x;
-   const GLfloat x1 = (GLfloat)(x + width);
-   const GLfloat y0 = (GLfloat)y;
-   const GLfloat y1 = (GLfloat)(y + height);
-   GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
-   GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
-   const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
-   const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
-   const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
-   const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
-   GLuint i;
-
-   if(!normalized)
-   {
-      sRight = width;
-      tBot = height;
-   }
-
-   /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
-    * no_flush) updates to buffers where we know there is no conflict
-    * with previous data.  Currently using max_slots > 1 will cause
-    * synchronous rendering if the driver flushes its command buffers
-    * between one bitmap and the next.  Our flush hook below isn't
-    * sufficient to catch this as the driver doesn't tell us when it
-    * flushes its own command buffers.  Until this gets fixed, pay the
-    * price of allocating a new buffer for each bitmap cache-flush to
-    * avoid synchronous rendering.
-    */
-   if (st->bitmap.vbuf_slot >= max_slots) {
-      pipe_resource_reference(&st->bitmap.vbuf, NULL);
-      st->bitmap.vbuf_slot = 0;
-   }
-
-   if (!st->bitmap.vbuf) {
-      st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 
-                                           PIPE_BIND_VERTEX_BUFFER,
-                                           PIPE_USAGE_STREAM,
-                                           max_slots *
-                                           sizeof(st->bitmap.vertices));
-   }
-
-   /* Positions are in clip coords since we need to do clipping in case
-    * the bitmap quad goes beyond the window bounds.
-    */
-   st->bitmap.vertices[0][0][0] = clip_x0;
-   st->bitmap.vertices[0][0][1] = clip_y0;
-   st->bitmap.vertices[0][2][0] = sLeft;
-   st->bitmap.vertices[0][2][1] = tTop;
-
-   st->bitmap.vertices[1][0][0] = clip_x1;
-   st->bitmap.vertices[1][0][1] = clip_y0;
-   st->bitmap.vertices[1][2][0] = sRight;
-   st->bitmap.vertices[1][2][1] = tTop;
-   
-   st->bitmap.vertices[2][0][0] = clip_x1;
-   st->bitmap.vertices[2][0][1] = clip_y1;
-   st->bitmap.vertices[2][2][0] = sRight;
-   st->bitmap.vertices[2][2][1] = tBot;
-   
-   st->bitmap.vertices[3][0][0] = clip_x0;
-   st->bitmap.vertices[3][0][1] = clip_y1;
-   st->bitmap.vertices[3][2][0] = sLeft;
-   st->bitmap.vertices[3][2][1] = tBot;
-   
-   /* same for all verts: */
-   for (i = 0; i < 4; i++) {
-      st->bitmap.vertices[i][0][2] = z;
-      st->bitmap.vertices[i][0][3] = 1.0;
-      st->bitmap.vertices[i][1][0] = color[0];
-      st->bitmap.vertices[i][1][1] = color[1];
-      st->bitmap.vertices[i][1][2] = color[2];
-      st->bitmap.vertices[i][1][3] = color[3];
-      st->bitmap.vertices[i][2][2] = 0.0; /*R*/
-      st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
-   }
-
-   /* put vertex data into vbuf */
-   pipe_buffer_write_nooverlap(st->pipe,
-                               st->bitmap.vbuf,
-                               st->bitmap.vbuf_slot
-                               * sizeof(st->bitmap.vertices),
-                               sizeof st->bitmap.vertices,
-                               st->bitmap.vertices);
-
-   return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
-}
-
-
-
-/**
- * Render a glBitmap by drawing a textured quad
- */
-static void
-draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
-                 GLsizei width, GLsizei height,
-                 struct pipe_sampler_view *sv,
-                 const GLfloat *color)
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_context *pipe = st->pipe;
-   struct cso_context *cso = st->cso_context;
-   struct st_fp_variant *fpv;
-   struct st_fp_variant_key key;
-   GLuint maxSize;
-   GLuint offset;
-
-   memset(&key, 0, sizeof(key));
-   key.st = st;
-   key.bitmap = GL_TRUE;
-
-   fpv = st_get_fp_variant(st, st->fp, &key);
-
-   /* As an optimization, Mesa's fragment programs will sometimes get the
-    * primary color from a statevar/constant rather than a varying variable.
-    * when that's the case, we need to ensure that we use the 'color'
-    * parameter and not the current attribute color (which may have changed
-    * through glRasterPos and state validation.
-    * So, we force the proper color here.  Not elegant, but it works.
-    */
-   {
-      GLfloat colorSave[4];
-      COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
-      COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
-      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
-      COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
-   }
-
-
-   /* limit checks */
-   /* XXX if the bitmap is larger than the max texture size, break
-    * it up into chunks.
-    */
-   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
-                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
-   assert(width <= (GLsizei)maxSize);
-   assert(height <= (GLsizei)maxSize);
-
-   cso_save_rasterizer(cso);
-   cso_save_samplers(cso);
-   cso_save_fragment_sampler_views(cso);
-   cso_save_viewport(cso);
-   cso_save_fragment_shader(cso);
-   cso_save_vertex_shader(cso);
-   cso_save_vertex_elements(cso);
-   cso_save_vertex_buffers(cso);
-
-   /* rasterizer state: just scissor */
-   st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
-   cso_set_rasterizer(cso, &st->bitmap.rasterizer);
-
-   /* fragment shader state: TEX lookup program */
-   cso_set_fragment_shader_handle(cso, fpv->driver_shader);
-
-   /* vertex shader state: position + texcoord pass-through */
-   cso_set_vertex_shader_handle(cso, st->bitmap.vs);
-
-   /* user samplers, plus our bitmap sampler */
-   {
-      struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
-      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
-      uint i;
-      for (i = 0; i < st->state.num_samplers; i++) {
-         samplers[i] = &st->state.samplers[i];
-      }
-      samplers[fpv->bitmap_sampler] =
-         &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
-      cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
-   }
-
-   /* user textures, plus the bitmap texture */
-   {
-      struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
-      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
-      memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
-      sampler_views[fpv->bitmap_sampler] = sv;
-      cso_set_fragment_sampler_views(cso, num, sampler_views);
-   }
-
-   /* viewport state: viewport matching window dims */
-   {
-      const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-      const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
-      const GLfloat width = (GLfloat)fb->Width;
-      const GLfloat height = (GLfloat)fb->Height;
-      struct pipe_viewport_state vp;
-      vp.scale[0] =  0.5f * width;
-      vp.scale[1] = height * (invert ? -0.5f : 0.5f);
-      vp.scale[2] = 0.5f;
-      vp.scale[3] = 1.0f;
-      vp.translate[0] = 0.5f * width;
-      vp.translate[1] = 0.5f * height;
-      vp.translate[2] = 0.5f;
-      vp.translate[3] = 0.0f;
-      cso_set_viewport(cso, &vp);
-   }
-
-   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
-
-   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
-   z = z * 2.0 - 1.0;
-
-   /* draw textured quad */
-   offset = setup_bitmap_vertex_data(st,
-                                     sv->texture->target != PIPE_TEXTURE_RECT,
-                                     x, y, width, height, z, color);
-
-   util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset,
-                           PIPE_PRIM_TRIANGLE_FAN,
-                           4,  /* verts */
-                           3); /* attribs/vert */
-
-
-   /* restore state */
-   cso_restore_rasterizer(cso);
-   cso_restore_samplers(cso);
-   cso_restore_fragment_sampler_views(cso);
-   cso_restore_viewport(cso);
-   cso_restore_fragment_shader(cso);
-   cso_restore_vertex_shader(cso);
-   cso_restore_vertex_elements(cso);
-   cso_restore_vertex_buffers(cso);
-}
-
-
-static void
-reset_cache(struct st_context *st)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct bitmap_cache *cache = st->bitmap.cache;
-
-   /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
-   cache->empty = GL_TRUE;
-
-   cache->xmin = 1000000;
-   cache->xmax = -1000000;
-   cache->ymin = 1000000;
-   cache->ymax = -1000000;
-
-   if (cache->trans) {
-      pipe->transfer_destroy(pipe, cache->trans);
-      cache->trans = NULL;
-   }
-
-   assert(!cache->texture);
-
-   /* allocate a new texture */
-   cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
-                                      st->bitmap.tex_format, 0,
-                                      BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
-                                      1, 1,
-				      PIPE_BIND_SAMPLER_VIEW);
-}
-
-
-/** Print bitmap image to stdout (debug) */
-static void
-print_cache(const struct bitmap_cache *cache)
-{
-   int i, j, k;
-
-   for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) {
-      k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1);
-      for (j = 0; j < BITMAP_CACHE_WIDTH; j++) {
-         if (cache->buffer[k])
-            printf("X");
-         else
-            printf(" ");
-         k++;
-      }
-      printf("\n");
-   }
-}
-
-
-/**
- * Create gallium pipe_transfer object for the bitmap cache.
- */
-static void
-create_cache_trans(struct st_context *st)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct bitmap_cache *cache = st->bitmap.cache;
-
-   if (cache->trans)
-      return;
-
-   /* Map the texture transfer.
-    * Subsequent glBitmap calls will write into the texture image.
-    */
-   cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
-                                    PIPE_TRANSFER_WRITE, 0, 0,
-                                    BITMAP_CACHE_WIDTH,
-                                    BITMAP_CACHE_HEIGHT);
-   cache->buffer = pipe_transfer_map(pipe, cache->trans);
-
-   /* init image to all 0xff */
-   memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
-}
-
-
-/**
- * If there's anything in the bitmap cache, draw/flush it now.
- */
-void
-st_flush_bitmap_cache(struct st_context *st)
-{
-   if (!st->bitmap.cache->empty) {
-      struct bitmap_cache *cache = st->bitmap.cache;
-
-      if (st->ctx->DrawBuffer) {
-         struct pipe_context *pipe = st->pipe;
-         struct pipe_sampler_view *sv;
-
-         assert(cache->xmin <= cache->xmax);
- 
-/*         printf("flush size %d x %d  at %d, %d\n",
-                cache->xmax - cache->xmin,
-                cache->ymax - cache->ymin,
-                cache->xpos, cache->ypos);
-*/
-
-         /* The texture transfer has been mapped until now.
-          * So unmap and release the texture transfer before drawing.
-          */
-         if (cache->trans) {
-            if (0)
-               print_cache(cache);
-            pipe_transfer_unmap(pipe, cache->trans);
-            cache->buffer = NULL;
-
-            pipe->transfer_destroy(pipe, cache->trans);
-            cache->trans = NULL;
-         }
-
-         sv = st_create_texture_sampler_view(st->pipe, cache->texture);
-         if (sv) {
-            draw_bitmap_quad(st->ctx,
-                             cache->xpos,
-                             cache->ypos,
-                             cache->zpos,
-                             BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
-                             sv,
-                             cache->color);
-
-            pipe_sampler_view_reference(&sv, NULL);
-         }
-      }
-
-      /* release/free the texture */
-      pipe_resource_reference(&cache->texture, NULL);
-
-      reset_cache(st);
-   }
-}
-
-
-/**
- * Flush bitmap cache and release vertex buffer.
- */
-void
-st_flush_bitmap( struct st_context *st )
-{
-   st_flush_bitmap_cache(st);
-
-   /* Release vertex buffer to avoid synchronous rendering if we were
-    * to map it in the next frame.
-    */
-   pipe_resource_reference(&st->bitmap.vbuf, NULL);
-   st->bitmap.vbuf_slot = 0;
-}
-
-
-/**
- * Try to accumulate this glBitmap call in the bitmap cache.
- * \return  GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
- */
-static GLboolean
-accum_bitmap(struct st_context *st,
-             GLint x, GLint y, GLsizei width, GLsizei height,
-             const struct gl_pixelstore_attrib *unpack,
-             const GLubyte *bitmap )
-{
-   struct bitmap_cache *cache = st->bitmap.cache;
-   int px = -999, py = -999;
-   const GLfloat z = st->ctx->Current.RasterPos[2];
-
-   if (width > BITMAP_CACHE_WIDTH ||
-       height > BITMAP_CACHE_HEIGHT)
-      return GL_FALSE; /* too big to cache */
-
-   if (!cache->empty) {
-      px = x - cache->xpos;  /* pos in buffer */
-      py = y - cache->ypos;
-      if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
-          py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
-          !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
-          ((fabs(z - cache->zpos) > Z_EPSILON))) {
-         /* This bitmap would extend beyond cache bounds, or the bitmap
-          * color is changing
-          * so flush and continue.
-          */
-         st_flush_bitmap_cache(st);
-      }
-   }
-
-   if (cache->empty) {
-      /* Initialize.  Center bitmap vertically in the buffer. */
-      px = 0;
-      py = (BITMAP_CACHE_HEIGHT - height) / 2;
-      cache->xpos = x;
-      cache->ypos = y - py;
-      cache->zpos = z;
-      cache->empty = GL_FALSE;
-      COPY_4FV(cache->color, st->ctx->Current.RasterColor);
-   }
-
-   assert(px != -999);
-   assert(py != -999);
-
-   if (x < cache->xmin)
-      cache->xmin = x;
-   if (y < cache->ymin)
-      cache->ymin = y;
-   if (x + width > cache->xmax)
-      cache->xmax = x + width;
-   if (y + height > cache->ymax)
-      cache->ymax = y + height;
-
-   /* create the transfer if needed */
-   create_cache_trans(st);
-
-   unpack_bitmap(st, px, py, width, height, unpack, bitmap,
-                 cache->buffer, BITMAP_CACHE_WIDTH);
-
-   return GL_TRUE; /* accumulated */
-}
-
-
-
-/**
- * Called via ctx->Driver.Bitmap()
- */
-static void
-st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
-          GLsizei width, GLsizei height,
-          const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
-{
-   struct st_context *st = st_context(ctx);
-   struct pipe_resource *pt;
-
-   if (width == 0 || height == 0)
-      return;
-
-   st_validate_state(st);
-
-   if (!st->bitmap.vs) {
-      /* create pass-through vertex shader now */
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_COLOR,
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indexes[] = { 0, 0, 0 };
-      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
-                                                          semantic_names,
-                                                          semantic_indexes);
-   }
-
-   if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
-      return;
-
-   pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
-   if (pt) {
-      struct pipe_sampler_view *sv =
-         st_create_texture_sampler_view(st->pipe, pt);
-
-      assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
-
-      if (sv) {
-         draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
-                          width, height, sv,
-                          st->ctx->Current.RasterColor);
-
-         pipe_sampler_view_reference(&sv, NULL);
-      }
-
-      /* release/free the texture */
-      pipe_resource_reference(&pt, NULL);
-   }
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap_functions(struct dd_function_table *functions)
-{
-   functions->Bitmap = st_Bitmap;
-}
-
-
-/** Per-context init */
-void
-st_init_bitmap(struct st_context *st)
-{
-   struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_screen *screen = pipe->screen;
-
-   /* init sampler state once */
-   memset(sampler, 0, sizeof(*sampler));
-   sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
-   sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
-   sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
-   sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   st->bitmap.samplers[1] = *sampler;
-   st->bitmap.samplers[1].normalized_coords = 1;
-
-   /* init baseline rasterizer state once */
-   memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
-   st->bitmap.rasterizer.gl_rasterization_rules = 1;
-
-   /* find a usable texture format */
-   if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
-                                   PIPE_TEXTURE_2D, 0,
-                                   PIPE_BIND_SAMPLER_VIEW)) {
-      st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
-   }
-   else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
-                                        PIPE_TEXTURE_2D, 0,
-                                        PIPE_BIND_SAMPLER_VIEW)) {
-      st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
-   }
-   else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
-                                        PIPE_TEXTURE_2D, 0,
-                                        PIPE_BIND_SAMPLER_VIEW)) {
-      st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
-   }
-   else {
-      /* XXX support more formats */
-      assert(0);
-   }
-
-   /* alloc bitmap cache object */
-   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
-
-   reset_cache(st);
-}
-
-
-/** Per-context tear-down */
-void
-st_destroy_bitmap(struct st_context *st)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct bitmap_cache *cache = st->bitmap.cache;
-
-   if (st->bitmap.vs) {
-      cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
-      st->bitmap.vs = NULL;
-   }
-
-   if (st->bitmap.vbuf) {
-      pipe_resource_reference(&st->bitmap.vbuf, NULL);
-      st->bitmap.vbuf = NULL;
-   }
-
-   if (cache) {
-      if (cache->trans) {
-         pipe_transfer_unmap(pipe, cache->trans);
-         pipe->transfer_destroy(pipe, cache->trans);
-      }
-      pipe_resource_reference(&st->bitmap.cache->texture, NULL);
-      free(st->bitmap.cache);
-      st->bitmap.cache = NULL;
-   }
-}
-
-#endif /* FEATURE_drawpix */
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Brian Paul
+  */
+
+#include "main/imports.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/macros.h"
+#include "main/mfeatures.h"
+#include "main/pbo.h"
+#include "program/program.h"
+#include "program/prog_print.h"
+
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_atom_constbuf.h"
+#include "st_program.h"
+#include "st_cb_bitmap.h"
+#include "st_texture.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "util/u_inlines.h"
+#include "util/u_draw_quad.h"
+#include "util/u_simple_shaders.h"
+#include "program/prog_instruction.h"
+#include "cso_cache/cso_context.h"
+
+
+#if FEATURE_drawpix
+
+/**
+ * glBitmaps are drawn as textured quads.  The user's bitmap pattern
+ * is stored in a texture image.  An alpha8 texture format is used.
+ * The fragment shader samples a bit (texel) from the texture, then
+ * discards the fragment if the bit is off.
+ *
+ * Note that we actually store the inverse image of the bitmap to
+ * simplify the fragment program.  An "on" bit gets stored as texel=0x0
+ * and an "off" bit is stored as texel=0xff.  Then we kill the
+ * fragment if the negated texel value is less than zero.
+ */
+
+
+/**
+ * The bitmap cache attempts to accumulate multiple glBitmap calls in a
+ * buffer which is then rendered en mass upon a flush, state change, etc.
+ * A wide, short buffer is used to target the common case of a series
+ * of glBitmap calls being used to draw text.
+ */
+static GLboolean UseBitmapCache = GL_TRUE;
+
+
+#define BITMAP_CACHE_WIDTH  512
+#define BITMAP_CACHE_HEIGHT 32
+
+struct bitmap_cache
+{
+   /** Window pos to render the cached image */
+   GLint xpos, ypos;
+   /** Bounds of region used in window coords */
+   GLint xmin, ymin, xmax, ymax;
+
+   GLfloat color[4];
+
+   /** Bitmap's Z position */
+   GLfloat zpos;
+
+   struct pipe_resource *texture;
+   struct pipe_transfer *trans;
+
+   GLboolean empty;
+
+   /** An I8 texture image: */
+   ubyte *buffer;
+};
+
+
+/** Epsilon for Z comparisons */
+#define Z_EPSILON 1e-06
+
+
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ */
+static struct st_fragment_program *
+make_bitmap_fragment_program(struct gl_context *ctx, GLuint samplerIndex)
+{
+   struct st_context *st = st_context(ctx);
+   struct st_fragment_program *stfp;
+   struct gl_program *p;
+   GLuint ic = 0;
+
+   p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+   if (!p)
+      return NULL;
+
+   p->NumInstructions = 3;
+
+   p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
+   if (!p->Instructions) {
+      ctx->Driver.DeleteProgram(ctx, p);
+      return NULL;
+   }
+   _mesa_init_instructions(p->Instructions, p->NumInstructions);
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   p->Instructions[ic].Opcode = OPCODE_TEX;
+   p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY;
+   p->Instructions[ic].DstReg.Index = 0;
+   p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+   p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+   p->Instructions[ic].TexSrcUnit = samplerIndex;
+   p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+   ic++;
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   p->Instructions[ic].Opcode = OPCODE_KIL;
+   p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY;
+
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      p->Instructions[ic].SrcReg[0].Swizzle = SWIZZLE_XXXX;
+
+   p->Instructions[ic].SrcReg[0].Index = 0;
+   p->Instructions[ic].SrcReg[0].Negate = NEGATE_XYZW;
+   ic++;
+
+   /* END; */
+   p->Instructions[ic++].Opcode = OPCODE_END;
+
+   assert(ic == p->NumInstructions);
+
+   p->InputsRead = FRAG_BIT_TEX0;
+   p->OutputsWritten = 0x0;
+   p->SamplersUsed = (1 << samplerIndex);
+
+   stfp = (struct st_fragment_program *) p;
+   stfp->Base.UsesKill = GL_TRUE;
+
+   return stfp;
+}
+
+
+static struct gl_program *
+make_bitmap_fragment_program_glsl(struct st_context *st,
+                                  struct st_fragment_program *orig,
+                                  GLuint samplerIndex)
+{
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+   
+   get_bitmap_visitor(fp, orig->glsl_to_tgsi, samplerIndex);
+   return &fp->Base.Base;
+}
+
+
+static int
+find_free_bit(uint bitfield)
+{
+   int i;
+   for (i = 0; i < 32; i++) {
+      if ((bitfield & (1 << i)) == 0) {
+         return i;
+      }
+   }
+   return -1;
+}
+
+
+/**
+ * Combine basic bitmap fragment program with the user-defined program.
+ * \param st  current context
+ * \param fpIn  the incoming fragment program
+ * \param fpOut  the new fragment program which does fragment culling
+ * \param bitmap_sampler  sampler number for the bitmap texture
+ */
+void
+st_make_bitmap_fragment_program(struct st_context *st,
+                                struct gl_fragment_program *fpIn,
+                                struct gl_fragment_program **fpOut,
+                                GLuint *bitmap_sampler)
+{
+   struct st_fragment_program *bitmap_prog;
+   struct st_fragment_program *stfpIn = (struct st_fragment_program *) fpIn;
+   struct gl_program *newProg;
+   uint sampler;
+
+   /*
+    * Generate new program which is the user-defined program prefixed
+    * with the bitmap sampler/kill instructions.
+    */
+   sampler = find_free_bit(fpIn->Base.SamplersUsed);
+   
+   if (stfpIn->glsl_to_tgsi)
+      newProg = make_bitmap_fragment_program_glsl(st, stfpIn, sampler);
+   else {
+      bitmap_prog = make_bitmap_fragment_program(st->ctx, sampler);
+
+      newProg = _mesa_combine_programs(st->ctx,
+                                       &bitmap_prog->Base.Base,
+                                       &fpIn->Base);
+      /* done with this after combining */
+      st_reference_fragprog(st, &bitmap_prog, NULL);
+   }
+
+#if 0
+   {
+      printf("Combined bitmap program:\n");
+      _mesa_print_program(newProg);
+      printf("InputsRead: 0x%x\n", newProg->InputsRead);
+      printf("OutputsWritten: 0x%x\n", newProg->OutputsWritten);
+      _mesa_print_parameter_list(newProg->Parameters);
+   }
+#endif
+
+   /* return results */
+   *fpOut = (struct gl_fragment_program *) newProg;
+   *bitmap_sampler = sampler;
+}
+
+
+/**
+ * Copy user-provide bitmap bits into texture buffer, expanding
+ * bits into texels.
+ * "On" bits will set texels to 0x0.
+ * "Off" bits will not modify texels.
+ * Note that the image is actually going to be upside down in
+ * the texture.  We deal with that with texcoords.
+ */
+static void
+unpack_bitmap(struct st_context *st,
+              GLint px, GLint py, GLsizei width, GLsizei height,
+              const struct gl_pixelstore_attrib *unpack,
+              const GLubyte *bitmap,
+              ubyte *destBuffer, uint destStride)
+{
+   destBuffer += py * destStride + px;
+
+   _mesa_expand_bitmap(width, height, unpack, bitmap,
+                       destBuffer, destStride, 0x0);
+}
+
+
+/**
+ * Create a texture which represents a bitmap image.
+ */
+static struct pipe_resource *
+make_bitmap_texture(struct gl_context *ctx, GLsizei width, GLsizei height,
+                    const struct gl_pixelstore_attrib *unpack,
+                    const GLubyte *bitmap)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_transfer *transfer;
+   ubyte *dest;
+   struct pipe_resource *pt;
+
+   /* PBO source... */
+   bitmap = _mesa_map_pbo_source(ctx, unpack, bitmap);
+   if (!bitmap) {
+      return NULL;
+   }
+
+   /**
+    * Create texture to hold bitmap pattern.
+    */
+   pt = st_texture_create(st, st->internal_target, st->bitmap.tex_format,
+                          0, width, height, 1, 1,
+                          PIPE_BIND_SAMPLER_VIEW);
+   if (!pt) {
+      _mesa_unmap_pbo_source(ctx, unpack);
+      return NULL;
+   }
+
+   transfer = pipe_get_transfer(st->pipe, pt, 0, 0,
+                                PIPE_TRANSFER_WRITE,
+                                0, 0, width, height);
+
+   dest = pipe_transfer_map(pipe, transfer);
+
+   /* Put image into texture transfer */
+   memset(dest, 0xff, height * transfer->stride);
+   unpack_bitmap(st, 0, 0, width, height, unpack, bitmap,
+                 dest, transfer->stride);
+
+   _mesa_unmap_pbo_source(ctx, unpack);
+
+   /* Release transfer */
+   pipe_transfer_unmap(pipe, transfer);
+   pipe->transfer_destroy(pipe, transfer);
+
+   return pt;
+}
+
+static GLuint
+setup_bitmap_vertex_data(struct st_context *st, bool normalized,
+                         int x, int y, int width, int height,
+                         float z, const float color[4])
+{
+   struct pipe_context *pipe = st->pipe;
+   const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+   const GLfloat fb_width = (GLfloat)fb->Width;
+   const GLfloat fb_height = (GLfloat)fb->Height;
+   const GLfloat x0 = (GLfloat)x;
+   const GLfloat x1 = (GLfloat)(x + width);
+   const GLfloat y0 = (GLfloat)y;
+   const GLfloat y1 = (GLfloat)(y + height);
+   GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0;
+   GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop;
+   const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0);
+   const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0);
+   const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0);
+   const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0);
+   const GLuint max_slots = 1; /* 4096 / sizeof(st->bitmap.vertices); */
+   GLuint i;
+
+   if(!normalized)
+   {
+      sRight = (GLfloat) width;
+      tBot = (GLfloat) height;
+   }
+
+   /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
+    * no_flush) updates to buffers where we know there is no conflict
+    * with previous data.  Currently using max_slots > 1 will cause
+    * synchronous rendering if the driver flushes its command buffers
+    * between one bitmap and the next.  Our flush hook below isn't
+    * sufficient to catch this as the driver doesn't tell us when it
+    * flushes its own command buffers.  Until this gets fixed, pay the
+    * price of allocating a new buffer for each bitmap cache-flush to
+    * avoid synchronous rendering.
+    */
+   if (st->bitmap.vbuf_slot >= max_slots) {
+      pipe_resource_reference(&st->bitmap.vbuf, NULL);
+      st->bitmap.vbuf_slot = 0;
+   }
+
+   if (!st->bitmap.vbuf) {
+      st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 
+                                           PIPE_BIND_VERTEX_BUFFER,
+                                           PIPE_USAGE_STREAM,
+                                           max_slots *
+                                           sizeof(st->bitmap.vertices));
+   }
+
+   /* Positions are in clip coords since we need to do clipping in case
+    * the bitmap quad goes beyond the window bounds.
+    */
+   st->bitmap.vertices[0][0][0] = clip_x0;
+   st->bitmap.vertices[0][0][1] = clip_y0;
+   st->bitmap.vertices[0][2][0] = sLeft;
+   st->bitmap.vertices[0][2][1] = tTop;
+
+   st->bitmap.vertices[1][0][0] = clip_x1;
+   st->bitmap.vertices[1][0][1] = clip_y0;
+   st->bitmap.vertices[1][2][0] = sRight;
+   st->bitmap.vertices[1][2][1] = tTop;
+   
+   st->bitmap.vertices[2][0][0] = clip_x1;
+   st->bitmap.vertices[2][0][1] = clip_y1;
+   st->bitmap.vertices[2][2][0] = sRight;
+   st->bitmap.vertices[2][2][1] = tBot;
+   
+   st->bitmap.vertices[3][0][0] = clip_x0;
+   st->bitmap.vertices[3][0][1] = clip_y1;
+   st->bitmap.vertices[3][2][0] = sLeft;
+   st->bitmap.vertices[3][2][1] = tBot;
+   
+   /* same for all verts: */
+   for (i = 0; i < 4; i++) {
+      st->bitmap.vertices[i][0][2] = z;
+      st->bitmap.vertices[i][0][3] = 1.0f;
+      st->bitmap.vertices[i][1][0] = color[0];
+      st->bitmap.vertices[i][1][1] = color[1];
+      st->bitmap.vertices[i][1][2] = color[2];
+      st->bitmap.vertices[i][1][3] = color[3];
+      st->bitmap.vertices[i][2][2] = 0.0; /*R*/
+      st->bitmap.vertices[i][2][3] = 1.0; /*Q*/
+   }
+
+   /* put vertex data into vbuf */
+   pipe_buffer_write_nooverlap(st->pipe,
+                               st->bitmap.vbuf,
+                               st->bitmap.vbuf_slot
+                               * sizeof(st->bitmap.vertices),
+                               sizeof st->bitmap.vertices,
+                               st->bitmap.vertices);
+
+   return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices;
+}
+
+
+
+/**
+ * Render a glBitmap by drawing a textured quad
+ */
+static void
+draw_bitmap_quad(struct gl_context *ctx, GLint x, GLint y, GLfloat z,
+                 GLsizei width, GLsizei height,
+                 struct pipe_sampler_view *sv,
+                 const GLfloat *color)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct cso_context *cso = st->cso_context;
+   struct st_fp_variant *fpv;
+   struct st_fp_variant_key key;
+   GLuint maxSize;
+   GLuint offset;
+
+   memset(&key, 0, sizeof(key));
+   key.st = st;
+   key.bitmap = GL_TRUE;
+
+   fpv = st_get_fp_variant(st, st->fp, &key);
+
+   /* As an optimization, Mesa's fragment programs will sometimes get the
+    * primary color from a statevar/constant rather than a varying variable.
+    * when that's the case, we need to ensure that we use the 'color'
+    * parameter and not the current attribute color (which may have changed
+    * through glRasterPos and state validation.
+    * So, we force the proper color here.  Not elegant, but it works.
+    */
+   {
+      GLfloat colorSave[4];
+      COPY_4V(colorSave, ctx->Current.Attrib[VERT_ATTRIB_COLOR0]);
+      COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], color);
+      st_upload_constants(st, fpv->parameters, PIPE_SHADER_FRAGMENT);
+      COPY_4V(ctx->Current.Attrib[VERT_ATTRIB_COLOR0], colorSave);
+   }
+
+
+   /* limit checks */
+   /* XXX if the bitmap is larger than the max texture size, break
+    * it up into chunks.
+    */
+   maxSize = 1 << (pipe->screen->get_param(pipe->screen,
+                                    PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1);
+   assert(width <= (GLsizei)maxSize);
+   assert(height <= (GLsizei)maxSize);
+
+   cso_save_rasterizer(cso);
+   cso_save_samplers(cso);
+   cso_save_fragment_sampler_views(cso);
+   cso_save_viewport(cso);
+   cso_save_fragment_shader(cso);
+   cso_save_vertex_shader(cso);
+   cso_save_vertex_elements(cso);
+   cso_save_vertex_buffers(cso);
+
+   /* rasterizer state: just scissor */
+   st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled;
+   cso_set_rasterizer(cso, &st->bitmap.rasterizer);
+
+   /* fragment shader state: TEX lookup program */
+   cso_set_fragment_shader_handle(cso, fpv->driver_shader);
+
+   /* vertex shader state: position + texcoord pass-through */
+   cso_set_vertex_shader_handle(cso, st->bitmap.vs);
+
+   /* user samplers, plus our bitmap sampler */
+   {
+      struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS];
+      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_samplers);
+      uint i;
+      for (i = 0; i < st->state.num_samplers; i++) {
+         samplers[i] = &st->state.samplers[i];
+      }
+      samplers[fpv->bitmap_sampler] =
+         &st->bitmap.samplers[sv->texture->target != PIPE_TEXTURE_RECT];
+      cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers);
+   }
+
+   /* user textures, plus the bitmap texture */
+   {
+      struct pipe_sampler_view *sampler_views[PIPE_MAX_SAMPLERS];
+      uint num = MAX2(fpv->bitmap_sampler + 1, st->state.num_textures);
+      memcpy(sampler_views, st->state.sampler_views, sizeof(sampler_views));
+      sampler_views[fpv->bitmap_sampler] = sv;
+      cso_set_fragment_sampler_views(cso, num, sampler_views);
+   }
+
+   /* viewport state: viewport matching window dims */
+   {
+      const struct gl_framebuffer *fb = st->ctx->DrawBuffer;
+      const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
+      const GLfloat width = (GLfloat)fb->Width;
+      const GLfloat height = (GLfloat)fb->Height;
+      struct pipe_viewport_state vp;
+      vp.scale[0] =  0.5f * width;
+      vp.scale[1] = height * (invert ? -0.5f : 0.5f);
+      vp.scale[2] = 0.5f;
+      vp.scale[3] = 1.0f;
+      vp.translate[0] = 0.5f * width;
+      vp.translate[1] = 0.5f * height;
+      vp.translate[2] = 0.5f;
+      vp.translate[3] = 0.0f;
+      cso_set_viewport(cso, &vp);
+   }
+
+   cso_set_vertex_elements(cso, 3, st->velems_util_draw);
+
+   /* convert Z from [0,1] to [-1,-1] to match viewport Z scale/bias */
+   z = z * 2.0f - 1.0f;
+
+   /* draw textured quad */
+   offset = setup_bitmap_vertex_data(st,
+                                     sv->texture->target != PIPE_TEXTURE_RECT,
+                                     x, y, width, height, z, color);
+
+   util_draw_vertex_buffer(pipe, st->cso_context, st->bitmap.vbuf, offset,
+                           PIPE_PRIM_TRIANGLE_FAN,
+                           4,  /* verts */
+                           3); /* attribs/vert */
+
+
+   /* restore state */
+   cso_restore_rasterizer(cso);
+   cso_restore_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
+   cso_restore_viewport(cso);
+   cso_restore_fragment_shader(cso);
+   cso_restore_vertex_shader(cso);
+   cso_restore_vertex_elements(cso);
+   cso_restore_vertex_buffers(cso);
+}
+
+
+static void
+reset_cache(struct st_context *st)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct bitmap_cache *cache = st->bitmap.cache;
+
+   /*memset(cache->buffer, 0xff, sizeof(cache->buffer));*/
+   cache->empty = GL_TRUE;
+
+   cache->xmin = 1000000;
+   cache->xmax = -1000000;
+   cache->ymin = 1000000;
+   cache->ymax = -1000000;
+
+   if (cache->trans) {
+      pipe->transfer_destroy(pipe, cache->trans);
+      cache->trans = NULL;
+   }
+
+   assert(!cache->texture);
+
+   /* allocate a new texture */
+   cache->texture = st_texture_create(st, PIPE_TEXTURE_2D,
+                                      st->bitmap.tex_format, 0,
+                                      BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
+                                      1, 1,
+				      PIPE_BIND_SAMPLER_VIEW);
+}
+
+
+/** Print bitmap image to stdout (debug) */
+static void
+print_cache(const struct bitmap_cache *cache)
+{
+   int i, j, k;
+
+   for (i = 0; i < BITMAP_CACHE_HEIGHT; i++) {
+      k = BITMAP_CACHE_WIDTH * (BITMAP_CACHE_HEIGHT - i - 1);
+      for (j = 0; j < BITMAP_CACHE_WIDTH; j++) {
+         if (cache->buffer[k])
+            printf("X");
+         else
+            printf(" ");
+         k++;
+      }
+      printf("\n");
+   }
+}
+
+
+/**
+ * Create gallium pipe_transfer object for the bitmap cache.
+ */
+static void
+create_cache_trans(struct st_context *st)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct bitmap_cache *cache = st->bitmap.cache;
+
+   if (cache->trans)
+      return;
+
+   /* Map the texture transfer.
+    * Subsequent glBitmap calls will write into the texture image.
+    */
+   cache->trans = pipe_get_transfer(st->pipe, cache->texture, 0, 0,
+                                    PIPE_TRANSFER_WRITE, 0, 0,
+                                    BITMAP_CACHE_WIDTH,
+                                    BITMAP_CACHE_HEIGHT);
+   cache->buffer = pipe_transfer_map(pipe, cache->trans);
+
+   /* init image to all 0xff */
+   memset(cache->buffer, 0xff, cache->trans->stride * BITMAP_CACHE_HEIGHT);
+}
+
+
+/**
+ * If there's anything in the bitmap cache, draw/flush it now.
+ */
+void
+st_flush_bitmap_cache(struct st_context *st)
+{
+   if (!st->bitmap.cache->empty) {
+      struct bitmap_cache *cache = st->bitmap.cache;
+
+      if (st->ctx->DrawBuffer) {
+         struct pipe_context *pipe = st->pipe;
+         struct pipe_sampler_view *sv;
+
+         assert(cache->xmin <= cache->xmax);
+ 
+/*         printf("flush size %d x %d  at %d, %d\n",
+                cache->xmax - cache->xmin,
+                cache->ymax - cache->ymin,
+                cache->xpos, cache->ypos);
+*/
+
+         /* The texture transfer has been mapped until now.
+          * So unmap and release the texture transfer before drawing.
+          */
+         if (cache->trans) {
+            if (0)
+               print_cache(cache);
+            pipe_transfer_unmap(pipe, cache->trans);
+            cache->buffer = NULL;
+
+            pipe->transfer_destroy(pipe, cache->trans);
+            cache->trans = NULL;
+         }
+
+         sv = st_create_texture_sampler_view(st->pipe, cache->texture);
+         if (sv) {
+            draw_bitmap_quad(st->ctx,
+                             cache->xpos,
+                             cache->ypos,
+                             cache->zpos,
+                             BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT,
+                             sv,
+                             cache->color);
+
+            pipe_sampler_view_reference(&sv, NULL);
+         }
+      }
+
+      /* release/free the texture */
+      pipe_resource_reference(&cache->texture, NULL);
+
+      reset_cache(st);
+   }
+}
+
+
+/**
+ * Flush bitmap cache and release vertex buffer.
+ */
+void
+st_flush_bitmap( struct st_context *st )
+{
+   st_flush_bitmap_cache(st);
+
+   /* Release vertex buffer to avoid synchronous rendering if we were
+    * to map it in the next frame.
+    */
+   pipe_resource_reference(&st->bitmap.vbuf, NULL);
+   st->bitmap.vbuf_slot = 0;
+}
+
+
+/**
+ * Try to accumulate this glBitmap call in the bitmap cache.
+ * \return  GL_TRUE for success, GL_FALSE if bitmap is too large, etc.
+ */
+static GLboolean
+accum_bitmap(struct st_context *st,
+             GLint x, GLint y, GLsizei width, GLsizei height,
+             const struct gl_pixelstore_attrib *unpack,
+             const GLubyte *bitmap )
+{
+   struct bitmap_cache *cache = st->bitmap.cache;
+   int px = -999, py = -999;
+   const GLfloat z = st->ctx->Current.RasterPos[2];
+
+   if (width > BITMAP_CACHE_WIDTH ||
+       height > BITMAP_CACHE_HEIGHT)
+      return GL_FALSE; /* too big to cache */
+
+   if (!cache->empty) {
+      px = x - cache->xpos;  /* pos in buffer */
+      py = y - cache->ypos;
+      if (px < 0 || px + width > BITMAP_CACHE_WIDTH ||
+          py < 0 || py + height > BITMAP_CACHE_HEIGHT ||
+          !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color) ||
+          ((fabs(z - cache->zpos) > Z_EPSILON))) {
+         /* This bitmap would extend beyond cache bounds, or the bitmap
+          * color is changing
+          * so flush and continue.
+          */
+         st_flush_bitmap_cache(st);
+      }
+   }
+
+   if (cache->empty) {
+      /* Initialize.  Center bitmap vertically in the buffer. */
+      px = 0;
+      py = (BITMAP_CACHE_HEIGHT - height) / 2;
+      cache->xpos = x;
+      cache->ypos = y - py;
+      cache->zpos = z;
+      cache->empty = GL_FALSE;
+      COPY_4FV(cache->color, st->ctx->Current.RasterColor);
+   }
+
+   assert(px != -999);
+   assert(py != -999);
+
+   if (x < cache->xmin)
+      cache->xmin = x;
+   if (y < cache->ymin)
+      cache->ymin = y;
+   if (x + width > cache->xmax)
+      cache->xmax = x + width;
+   if (y + height > cache->ymax)
+      cache->ymax = y + height;
+
+   /* create the transfer if needed */
+   create_cache_trans(st);
+
+   unpack_bitmap(st, px, py, width, height, unpack, bitmap,
+                 cache->buffer, BITMAP_CACHE_WIDTH);
+
+   return GL_TRUE; /* accumulated */
+}
+
+
+
+/**
+ * Called via ctx->Driver.Bitmap()
+ */
+static void
+st_Bitmap(struct gl_context *ctx, GLint x, GLint y,
+          GLsizei width, GLsizei height,
+          const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap )
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_resource *pt;
+
+   if (width == 0 || height == 0)
+      return;
+
+   st_validate_state(st);
+
+   if (!st->bitmap.vs) {
+      /* create pass-through vertex shader now */
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_COLOR,
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indexes[] = { 0, 0, 0 };
+      st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3,
+                                                          semantic_names,
+                                                          semantic_indexes);
+   }
+
+   if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap))
+      return;
+
+   pt = make_bitmap_texture(ctx, width, height, unpack, bitmap);
+   if (pt) {
+      struct pipe_sampler_view *sv =
+         st_create_texture_sampler_view(st->pipe, pt);
+
+      assert(pt->target == PIPE_TEXTURE_2D || pt->target == PIPE_TEXTURE_RECT);
+
+      if (sv) {
+         draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2],
+                          width, height, sv,
+                          st->ctx->Current.RasterColor);
+
+         pipe_sampler_view_reference(&sv, NULL);
+      }
+
+      /* release/free the texture */
+      pipe_resource_reference(&pt, NULL);
+   }
+}
+
+
+/** Per-context init */
+void
+st_init_bitmap_functions(struct dd_function_table *functions)
+{
+   functions->Bitmap = st_Bitmap;
+}
+
+
+/** Per-context init */
+void
+st_init_bitmap(struct st_context *st)
+{
+   struct pipe_sampler_state *sampler = &st->bitmap.samplers[0];
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+
+   /* init sampler state once */
+   memset(sampler, 0, sizeof(*sampler));
+   sampler->wrap_s = PIPE_TEX_WRAP_CLAMP;
+   sampler->wrap_t = PIPE_TEX_WRAP_CLAMP;
+   sampler->wrap_r = PIPE_TEX_WRAP_CLAMP;
+   sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   st->bitmap.samplers[1] = *sampler;
+   st->bitmap.samplers[1].normalized_coords = 1;
+
+   /* init baseline rasterizer state once */
+   memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer));
+   st->bitmap.rasterizer.gl_rasterization_rules = 1;
+
+   /* find a usable texture format */
+   if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
+                                   PIPE_TEXTURE_2D, 0,
+                                   PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM;
+   }
+   else if (screen->is_format_supported(screen, PIPE_FORMAT_A8_UNORM,
+                                        PIPE_TEXTURE_2D, 0,
+                                        PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_A8_UNORM;
+   }
+   else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
+                                        PIPE_TEXTURE_2D, 0,
+                                        PIPE_BIND_SAMPLER_VIEW)) {
+      st->bitmap.tex_format = PIPE_FORMAT_L8_UNORM;
+   }
+   else {
+      /* XXX support more formats */
+      assert(0);
+   }
+
+   /* alloc bitmap cache object */
+   st->bitmap.cache = ST_CALLOC_STRUCT(bitmap_cache);
+
+   reset_cache(st);
+}
+
+
+/** Per-context tear-down */
+void
+st_destroy_bitmap(struct st_context *st)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct bitmap_cache *cache = st->bitmap.cache;
+
+   if (st->bitmap.vs) {
+      cso_delete_vertex_shader(st->cso_context, st->bitmap.vs);
+      st->bitmap.vs = NULL;
+   }
+
+   if (st->bitmap.vbuf) {
+      pipe_resource_reference(&st->bitmap.vbuf, NULL);
+      st->bitmap.vbuf = NULL;
+   }
+
+   if (cache) {
+      if (cache->trans) {
+         pipe_transfer_unmap(pipe, cache->trans);
+         pipe->transfer_destroy(pipe, cache->trans);
+      }
+      pipe_resource_reference(&st->bitmap.cache->texture, NULL);
+      free(st->bitmap.cache);
+      st->bitmap.cache = NULL;
+   }
+}
+
+#endif /* FEATURE_drawpix */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_blit.c b/mesalib/src/mesa/state_tracker/st_cb_blit.c
index 416be194d..626db1243 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_blit.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_blit.c
@@ -61,6 +61,82 @@ st_destroy_blit(struct st_context *st)
 
 #if FEATURE_EXT_framebuffer_blit
 
+static void
+st_BlitFramebuffer_resolve(struct gl_context *ctx,
+                           GLbitfield mask,
+                           struct pipe_resolve_info *info)
+{
+   const GLbitfield depthStencil = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
+
+   struct st_context *st = st_context(ctx);
+
+   struct st_renderbuffer *srcRb, *dstRb;
+
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      srcRb = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+      dstRb = st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]);
+
+      info->mask = PIPE_MASK_RGBA;
+
+      info->src.res = srcRb->texture;
+      info->src.layer = srcRb->surface->u.tex.first_layer;
+      info->dst.res = dstRb->texture;
+      info->dst.level = dstRb->surface->u.tex.level;
+      info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+      st->pipe->resource_resolve(st->pipe, info);
+   }
+
+   if (mask & depthStencil) {
+      struct gl_renderbuffer_attachment *srcDepth, *srcStencil;
+      struct gl_renderbuffer_attachment *dstDepth, *dstStencil;
+      boolean combined;
+
+      srcDepth = &ctx->ReadBuffer->Attachment[BUFFER_DEPTH];
+      dstDepth = &ctx->DrawBuffer->Attachment[BUFFER_DEPTH];
+      srcStencil = &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
+      dstStencil = &ctx->DrawBuffer->Attachment[BUFFER_STENCIL];
+
+      combined =
+         st_is_depth_stencil_combined(srcDepth, srcStencil) &&
+         st_is_depth_stencil_combined(dstDepth, dstStencil);
+
+      if ((mask & GL_DEPTH_BUFFER_BIT) || combined) {
+         /* resolve depth and, if combined and requested, stencil as well */
+         srcRb = st_renderbuffer(srcDepth->Renderbuffer);
+         dstRb = st_renderbuffer(dstDepth->Renderbuffer);
+
+         info->mask = (mask & GL_DEPTH_BUFFER_BIT) ? PIPE_MASK_Z : 0;
+         if (combined && (mask & GL_STENCIL_BUFFER_BIT))
+            info->mask |= PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+
+      if (mask & GL_STENCIL_BUFFER_BIT) {
+         /* resolve separate stencil buffer */
+         srcRb = st_renderbuffer(srcStencil->Renderbuffer);
+         dstRb = st_renderbuffer(dstStencil->Renderbuffer);
+
+         info->mask = PIPE_MASK_S;
+
+         info->src.res = srcRb->texture;
+         info->src.layer = srcRb->surface->u.tex.first_layer;
+         info->dst.res = dstRb->texture;
+         info->dst.level = dstRb->surface->u.tex.level;
+         info->dst.layer = dstRb->surface->u.tex.first_layer;
+
+         st->pipe->resource_resolve(st->pipe, info);
+      }
+   }
+}
+
 static void
 st_BlitFramebuffer(struct gl_context *ctx,
                    GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -95,6 +171,42 @@ st_BlitFramebuffer(struct gl_context *ctx,
       srcY1 = readFB->Height - srcY1;
    }
 
+   /* Disable conditional rendering. */
+   if (st->render_condition) {
+      st->pipe->render_condition(st->pipe, NULL, 0);
+   }
+
+   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+      struct pipe_resolve_info info;
+
+      if (dstX0 < dstX1) {
+         info.dst.x0 = dstX0;
+         info.dst.x1 = dstX1;
+         info.src.x0 = srcX0;
+         info.src.x1 = srcX1;
+      } else {
+         info.dst.x0 = dstX1;
+         info.dst.x1 = dstX0;
+         info.src.x0 = srcX1;
+         info.src.x1 = srcX0;
+      }
+      if (dstY0 < dstY1) {
+         info.dst.y0 = dstY0;
+         info.dst.y1 = dstY1;
+         info.src.y0 = srcY0;
+         info.src.y1 = srcY1;
+      } else {
+         info.dst.y0 = dstY1;
+         info.dst.y1 = dstY0;
+         info.src.y0 = srcY1;
+         info.src.y1 = srcY0;
+      }
+
+      st_BlitFramebuffer_resolve(ctx, mask, &info); /* filter doesn't apply */
+
+      goto done;
+   }
+
    if (srcY0 > srcY1 && dstY0 > dstY1) {
       /* Both src and dst are upside down.  Swap Y to make it
        * right-side up to increase odds of using a fast path.
@@ -109,11 +221,6 @@ st_BlitFramebuffer(struct gl_context *ctx,
       dstY1 = tmp;
    }
 
-   /* Disable conditional rendering. */
-   if (st->render_condition) {
-      st->pipe->render_condition(st->pipe, NULL, 0);
-   }
-
    if (mask & GL_COLOR_BUFFER_BIT) {
       struct gl_renderbuffer_attachment *srcAtt =
          &readFB->Attachment[readFB->_ColorReadBufferIndex];
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
index 1d908c031..390c51869 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -94,6 +94,46 @@ is_passthrough_program(const struct gl_fragment_program *prog)
 }
 
 
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ */
+static struct gl_fragment_program *
+get_glsl_pixel_transfer_program(struct st_context *st,
+                                struct st_fragment_program *orig)
+{
+   int pixelMaps = 0, scaleAndBias = 0;
+   struct gl_context *ctx = st->ctx;
+   struct st_fragment_program *fp = (struct st_fragment_program *)
+      ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+
+   if (!fp)
+      return NULL;
+
+   if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 ||
+       ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 ||
+       ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 ||
+       ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) {
+      scaleAndBias = 1;
+   }
+
+   pixelMaps = ctx->Pixel.MapColorFlag;
+
+   if (pixelMaps) {
+      /* create the colormap/texture now if not already done */
+      if (!st->pixel_xfer.pixelmap_texture) {
+         st->pixel_xfer.pixelmap_texture = st_create_color_map_texture(ctx);
+         st->pixel_xfer.pixelmap_sampler_view =
+            st_create_texture_sampler_view(st->pipe,
+                                           st->pixel_xfer.pixelmap_texture);
+      }
+   }
+
+   get_pixel_transfer_visitor(fp, orig->glsl_to_tgsi,
+                              scaleAndBias, pixelMaps);
+
+   return &fp->Base;
+}
+
 
 /**
  * Make fragment shader for glDraw/CopyPixels.  This shader is made
@@ -107,11 +147,15 @@ st_make_drawpix_fragment_program(struct st_context *st,
                                  struct gl_fragment_program **fpOut)
 {
    struct gl_program *newProg;
+   struct st_fragment_program *stfp = (struct st_fragment_program *) fpIn;
 
    if (is_passthrough_program(fpIn)) {
       newProg = (struct gl_program *) _mesa_clone_fragment_program(st->ctx,
                                              &st->pixel_xfer.program->Base);
    }
+   else if (stfp->glsl_to_tgsi != NULL) {
+      newProg = (struct gl_program *) get_glsl_pixel_transfer_program(st, stfp);
+   }
    else {
 #if 0
       /* debug */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_program.c b/mesalib/src/mesa/state_tracker/st_cb_program.c
index 5096528a3..2abb4d8f0 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_program.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_program.c
@@ -1,245 +1,259 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "main/shaderapi.h"
-#include "program/prog_instruction.h"
-#include "program/program.h"
-
-#include "cso_cache/cso_context.h"
-#include "draw/draw_context.h"
-
-#include "st_context.h"
-#include "st_program.h"
-#include "st_mesa_to_tgsi.h"
-#include "st_cb_program.h"
-
-
-
-/**
- * Called via ctx->Driver.BindProgram() to bind an ARB vertex or
- * fragment program.
- */
-static void
-st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog)
-{
-   struct st_context *st = st_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: 
-      st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
-      break;
-   case MESA_GEOMETRY_PROGRAM:
-      st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
-      break;
-   }
-}
-
-
-/**
- * Called via ctx->Driver.UseProgram() to bind a linked GLSL program
- * (vertex shader + fragment shader).
- */
-static void
-st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
-{
-   struct st_context *st = st_context(ctx);
-
-   st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
-   st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
-   st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
-}
-
-
-/**
- * Called via ctx->Driver.NewProgram() to allocate a new vertex or
- * fragment program.
- */
-static struct gl_program *
-st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
-{
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: {
-      struct st_vertex_program *prog = ST_CALLOC_STRUCT(st_vertex_program);
-      return _mesa_init_vertex_program(ctx, &prog->Base, target, id);
-   }
-
-   case GL_FRAGMENT_PROGRAM_ARB:
-   case GL_FRAGMENT_PROGRAM_NV: {
-      struct st_fragment_program *prog = ST_CALLOC_STRUCT(st_fragment_program);
-      return _mesa_init_fragment_program(ctx, &prog->Base, target, id);
-   }
-
-   case MESA_GEOMETRY_PROGRAM: {
-      struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program);
-      return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
-   }
-
-   default:
-      assert(0);
-      return NULL;
-   }
-}
-
-
-/**
- * Called via ctx->Driver.DeleteProgram()
- */
-static void
-st_delete_program(struct gl_context *ctx, struct gl_program *prog)
-{
-   struct st_context *st = st_context(ctx);
-
-   switch( prog->Target ) {
-   case GL_VERTEX_PROGRAM_ARB:
-      {
-         struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
-         st_release_vp_variants( st, stvp );
-      }
-      break;
-   case MESA_GEOMETRY_PROGRAM:
-      {
-         struct st_geometry_program *stgp =
-            (struct st_geometry_program *) prog;
-
-         st_release_gp_variants(st, stgp);
-
-         if (stgp->tgsi.tokens) {
-            st_free_tokens((void *) stgp->tgsi.tokens);
-            stgp->tgsi.tokens = NULL;
-         }
-      }
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      {
-         struct st_fragment_program *stfp =
-            (struct st_fragment_program *) prog;
-
-         st_release_fp_variants(st, stfp);
-         
-         if (stfp->tgsi.tokens) {
-            st_free_tokens(stfp->tgsi.tokens);
-            stfp->tgsi.tokens = NULL;
-         }
-      }
-      break;
-   default:
-      assert(0); /* problem */
-   }
-
-   /* delete base class */
-   _mesa_delete_program( ctx, prog );
-}
-
-
-/**
- * Called via ctx->Driver.IsProgramNative()
- */
-static GLboolean
-st_is_program_native(struct gl_context *ctx,
-                     GLenum target, 
-                     struct gl_program *prog)
-{
-   return GL_TRUE;
-}
-
-
-/**
- * Called via ctx->Driver.ProgramStringNotify()
- * Called when the program's text/code is changed.  We have to free
- * all shader variants and corresponding gallium shaders when this happens.
- */
-static GLboolean
-st_program_string_notify( struct gl_context *ctx,
-                                           GLenum target,
-                                           struct gl_program *prog )
-{
-   struct st_context *st = st_context(ctx);
-
-   if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
-
-      st_release_fp_variants(st, stfp);
-
-      if (stfp->tgsi.tokens) {
-         st_free_tokens(stfp->tgsi.tokens);
-         stfp->tgsi.tokens = NULL;
-      }
-
-      if (st->fp == stfp)
-	 st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
-   }
-   else if (target == MESA_GEOMETRY_PROGRAM) {
-      struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
-
-      st_release_gp_variants(st, stgp);
-
-      if (stgp->tgsi.tokens) {
-         st_free_tokens((void *) stgp->tgsi.tokens);
-         stgp->tgsi.tokens = NULL;
-      }
-
-      if (st->gp == stgp)
-	 st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
-   }
-   else if (target == GL_VERTEX_PROGRAM_ARB) {
-      struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
-
-      st_release_vp_variants( st, stvp );
-
-      if (st->vp == stvp)
-	 st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
-   }
-
-   /* XXX check if program is legal, within limits */
-   return GL_TRUE;
-}
-
-
-/**
- * Plug in the program and shader-related device driver functions.
- */
-void
-st_init_program_functions(struct dd_function_table *functions)
-{
-   functions->BindProgram = st_bind_program;
-   functions->UseProgram = st_use_program;
-   functions->NewProgram = st_new_program;
-   functions->DeleteProgram = st_delete_program;
-   functions->IsProgramNative = st_is_program_native;
-   functions->ProgramStringNotify = st_program_string_notify;
-}
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/shaderapi.h"
+#include "program/prog_instruction.h"
+#include "program/program.h"
+
+#include "cso_cache/cso_context.h"
+#include "draw/draw_context.h"
+
+#include "st_context.h"
+#include "st_program.h"
+#include "st_mesa_to_tgsi.h"
+#include "st_cb_program.h"
+#include "st_glsl_to_tgsi.h"
+
+
+
+/**
+ * Called via ctx->Driver.BindProgram() to bind an ARB vertex or
+ * fragment program.
+ */
+static void
+st_bind_program(struct gl_context *ctx, GLenum target, struct gl_program *prog)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: 
+      st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
+      break;
+   }
+}
+
+
+/**
+ * Called via ctx->Driver.UseProgram() to bind a linked GLSL program
+ * (vertex shader + fragment shader).
+ */
+static void
+st_use_program(struct gl_context *ctx, struct gl_shader_program *shProg)
+{
+   struct st_context *st = st_context(ctx);
+
+   st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
+   st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
+   st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
+}
+
+
+/**
+ * Called via ctx->Driver.NewProgram() to allocate a new vertex or
+ * fragment program.
+ */
+static struct gl_program *
+st_new_program(struct gl_context *ctx, GLenum target, GLuint id)
+{
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct st_vertex_program *prog = ST_CALLOC_STRUCT(st_vertex_program);
+      return _mesa_init_vertex_program(ctx, &prog->Base, target, id);
+   }
+
+   case GL_FRAGMENT_PROGRAM_ARB:
+   case GL_FRAGMENT_PROGRAM_NV: {
+      struct st_fragment_program *prog = ST_CALLOC_STRUCT(st_fragment_program);
+      return _mesa_init_fragment_program(ctx, &prog->Base, target, id);
+   }
+
+   case MESA_GEOMETRY_PROGRAM: {
+      struct st_geometry_program *prog = ST_CALLOC_STRUCT(st_geometry_program);
+      return _mesa_init_geometry_program(ctx, &prog->Base, target, id);
+   }
+
+   default:
+      assert(0);
+      return NULL;
+   }
+}
+
+
+/**
+ * Called via ctx->Driver.DeleteProgram()
+ */
+static void
+st_delete_program(struct gl_context *ctx, struct gl_program *prog)
+{
+   struct st_context *st = st_context(ctx);
+
+   switch( prog->Target ) {
+   case GL_VERTEX_PROGRAM_ARB:
+      {
+         struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
+         st_release_vp_variants( st, stvp );
+         
+         if (stvp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stvp->glsl_to_tgsi);
+      }
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      {
+         struct st_geometry_program *stgp =
+            (struct st_geometry_program *) prog;
+
+         st_release_gp_variants(st, stgp);
+         
+         if (stgp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stgp->glsl_to_tgsi);
+
+         if (stgp->tgsi.tokens) {
+            st_free_tokens((void *) stgp->tgsi.tokens);
+            stgp->tgsi.tokens = NULL;
+         }
+      }
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      {
+         struct st_fragment_program *stfp =
+            (struct st_fragment_program *) prog;
+
+         st_release_fp_variants(st, stfp);
+         
+         if (stfp->glsl_to_tgsi)
+            free_glsl_to_tgsi_visitor(stfp->glsl_to_tgsi);
+         
+         if (stfp->tgsi.tokens) {
+            st_free_tokens(stfp->tgsi.tokens);
+            stfp->tgsi.tokens = NULL;
+         }
+      }
+      break;
+   default:
+      assert(0); /* problem */
+   }
+
+   /* delete base class */
+   _mesa_delete_program( ctx, prog );
+}
+
+
+/**
+ * Called via ctx->Driver.IsProgramNative()
+ */
+static GLboolean
+st_is_program_native(struct gl_context *ctx,
+                     GLenum target, 
+                     struct gl_program *prog)
+{
+   return GL_TRUE;
+}
+
+
+/**
+ * Called via ctx->Driver.ProgramStringNotify()
+ * Called when the program's text/code is changed.  We have to free
+ * all shader variants and corresponding gallium shaders when this happens.
+ */
+static GLboolean
+st_program_string_notify( struct gl_context *ctx,
+                                           GLenum target,
+                                           struct gl_program *prog )
+{
+   struct st_context *st = st_context(ctx);
+
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
+
+      st_release_fp_variants(st, stfp);
+
+      if (stfp->tgsi.tokens) {
+         st_free_tokens(stfp->tgsi.tokens);
+         stfp->tgsi.tokens = NULL;
+      }
+
+      if (st->fp == stfp)
+	 st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM;
+   }
+   else if (target == MESA_GEOMETRY_PROGRAM) {
+      struct st_geometry_program *stgp = (struct st_geometry_program *) prog;
+
+      st_release_gp_variants(st, stgp);
+
+      if (stgp->tgsi.tokens) {
+         st_free_tokens((void *) stgp->tgsi.tokens);
+         stgp->tgsi.tokens = NULL;
+      }
+
+      if (st->gp == stgp)
+	 st->dirty.st |= ST_NEW_GEOMETRY_PROGRAM;
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB) {
+      struct st_vertex_program *stvp = (struct st_vertex_program *) prog;
+
+      st_release_vp_variants( st, stvp );
+
+      if (st->vp == stvp)
+	 st->dirty.st |= ST_NEW_VERTEX_PROGRAM;
+   }
+
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
+}
+
+
+/**
+ * Plug in the program and shader-related device driver functions.
+ */
+void
+st_init_program_functions(struct dd_function_table *functions)
+{
+   functions->BindProgram = st_bind_program;
+   functions->UseProgram = st_use_program;
+   functions->NewProgram = st_new_program;
+   functions->DeleteProgram = st_delete_program;
+   functions->IsProgramNative = st_is_program_native;
+   functions->ProgramStringNotify = st_program_string_notify;
+   
+   functions->NewShader = st_new_shader;
+   functions->NewShaderProgram = st_new_shader_program;
+   functions->LinkShader = st_link_shader;
+}
diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c
index b5f6d356e..8e9009340 100644
--- a/mesalib/src/mesa/state_tracker/st_extensions.c
+++ b/mesalib/src/mesa/state_tracker/st_extensions.c
@@ -228,6 +228,7 @@ void st_init_extensions(struct st_context *st)
 {
    struct pipe_screen *screen = st->pipe->screen;
    struct gl_context *ctx = st->ctx;
+   int i;
 
    /*
     * Extensions that are supported by all Gallium drivers:
@@ -605,6 +606,16 @@ void st_init_extensions(struct st_context *st)
       ctx->Extensions.EXT_packed_float = GL_TRUE;
    }
 
+   /* Maximum sample count. */
+   for (i = 16; i > 0; --i) {
+      if (screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_UNORM,
+                                      PIPE_TEXTURE_2D, i,
+                                      PIPE_BIND_RENDER_TARGET)) {
+         ctx->Const.MaxSamples = i;
+         break;
+      }
+   }
+
    if (screen->get_param(screen, PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE)) {
       ctx->Extensions.ARB_seamless_cube_map = GL_TRUE;
       ctx->Extensions.AMD_seamless_cubemap_per_texture = GL_TRUE;
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
new file mode 100644
index 000000000..460bafb38
--- /dev/null
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -0,0 +1,4961 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file glsl_to_tgsi.cpp
+ *
+ * Translate GLSL IR to TGSI.
+ */
+
+#include <stdio.h>
+#include "main/compiler.h"
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_print_visitor.h"
+#include "ir_expression_flattening.h"
+#include "glsl_types.h"
+#include "glsl_parser_extras.h"
+#include "../glsl/program.h"
+#include "ir_optimization.h"
+#include "ast.h"
+
+extern "C" {
+#include "main/mtypes.h"
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+#include "main/uniforms.h"
+#include "program/hash_table.h"
+#include "program/prog_instruction.h"
+#include "program/prog_optimize.h"
+#include "program/prog_print.h"
+#include "program/program.h"
+#include "program/prog_uniform.h"
+#include "program/prog_parameter.h"
+#include "program/sampler.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_state.h"
+#include "util/u_math.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "st_context.h"
+#include "st_program.h"
+#include "st_glsl_to_tgsi.h"
+#include "st_mesa_to_tgsi.h"
+}
+
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
+#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
+                           (1 << PROGRAM_ENV_PARAM) |    \
+                           (1 << PROGRAM_STATE_VAR) |    \
+                           (1 << PROGRAM_NAMED_PARAM) |  \
+                           (1 << PROGRAM_CONSTANT) |     \
+                           (1 << PROGRAM_UNIFORM))
+
+#define MAX_TEMPS         4096
+
+class st_src_reg;
+class st_dst_reg;
+
+static int swizzle_for_size(int size);
+
+/**
+ * This struct is a corresponding struct to TGSI ureg_src.
+ */
+class st_src_reg {
+public:
+   st_src_reg(gl_register_file file, int index, const glsl_type *type)
+   {
+      this->file = file;
+      this->index = index;
+      if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+         this->swizzle = swizzle_for_size(type->vector_elements);
+      else
+         this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->type = type ? type->base_type : GLSL_TYPE_ERROR;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg(gl_register_file file, int index, int type)
+   {
+      this->type = type;
+      this->file = file;
+      this->index = index;
+      this->swizzle = SWIZZLE_XYZW;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   st_src_reg()
+   {
+      this->type = GLSL_TYPE_ERROR;
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->swizzle = 0;
+      this->negate = 0;
+      this->reladdr = NULL;
+   }
+
+   explicit st_src_reg(st_dst_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
+   int negate; /**< NEGATE_XYZW mask from mesa */
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+class st_dst_reg {
+public:
+   st_dst_reg(gl_register_file file, int writemask, int type)
+   {
+      this->file = file;
+      this->index = 0;
+      this->writemask = writemask;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+      this->type = type;
+   }
+
+   st_dst_reg()
+   {
+      this->type = GLSL_TYPE_ERROR;
+      this->file = PROGRAM_UNDEFINED;
+      this->index = 0;
+      this->writemask = 0;
+      this->cond_mask = COND_TR;
+      this->reladdr = NULL;
+   }
+
+   explicit st_dst_reg(st_src_reg reg);
+
+   gl_register_file file; /**< PROGRAM_* from Mesa */
+   int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
+   int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+   GLuint cond_mask:4;
+   int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   /** Register index should be offset by the integer in this reg. */
+   st_src_reg *reladdr;
+};
+
+st_src_reg::st_src_reg(st_dst_reg reg)
+{
+   this->type = reg.type;
+   this->file = reg.file;
+   this->index = reg.index;
+   this->swizzle = SWIZZLE_XYZW;
+   this->negate = 0;
+   this->reladdr = reg.reladdr;
+}
+
+st_dst_reg::st_dst_reg(st_src_reg reg)
+{
+   this->type = reg.type;
+   this->file = reg.file;
+   this->index = reg.index;
+   this->writemask = WRITEMASK_XYZW;
+   this->cond_mask = COND_TR;
+   this->reladdr = reg.reladdr;
+}
+
+class glsl_to_tgsi_instruction : public exec_node {
+public:
+   /* Callers of this ralloc-based new need not call delete. It's
+    * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   unsigned op;
+   st_dst_reg dst;
+   st_src_reg src[3];
+   /** Pointer to the ir source this tree came from for debugging */
+   ir_instruction *ir;
+   GLboolean cond_update;
+   bool saturate;
+   int sampler; /**< sampler index */
+   int tex_target; /**< One of TEXTURE_*_INDEX */
+   GLboolean tex_shadow;
+   int dead_mask; /**< Used in dead code elimination */
+
+   class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
+};
+
+class variable_storage : public exec_node {
+public:
+   variable_storage(ir_variable *var, gl_register_file file, int index)
+      : file(file), index(index), var(var)
+   {
+      /* empty */
+   }
+
+   gl_register_file file;
+   int index;
+   ir_variable *var; /* variable that maps to this, if any */
+};
+
+class immediate_storage : public exec_node {
+public:
+   immediate_storage(gl_constant_value *values, int size, int type)
+   {
+      memcpy(this->values, values, size * sizeof(gl_constant_value));
+      this->size = size;
+      this->type = type;
+   }
+   
+   gl_constant_value values[4];
+   int size; /**< Number of components (1-4) */
+   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
+class function_entry : public exec_node {
+public:
+   ir_function_signature *sig;
+
+   /**
+    * identifier of this function signature used by the program.
+    *
+    * At the point that TGSI instructions for function calls are
+    * generated, we don't know the address of the first instruction of
+    * the function body.  So we make the BranchTarget that is called a
+    * small integer and rewrite them during set_branchtargets().
+    */
+   int sig_id;
+
+   /**
+    * Pointer to first instruction of the function body.
+    *
+    * Set during function body emits after main() is processed.
+    */
+   glsl_to_tgsi_instruction *bgn_inst;
+
+   /**
+    * Index of the first instruction of the function body in actual TGSI.
+    *
+    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
+    */
+   int inst;
+
+   /** Storage for the return value. */
+   st_src_reg return_reg;
+};
+
+class glsl_to_tgsi_visitor : public ir_visitor {
+public:
+   glsl_to_tgsi_visitor();
+   ~glsl_to_tgsi_visitor();
+
+   function_entry *current_function;
+
+   struct gl_context *ctx;
+   struct gl_program *prog;
+   struct gl_shader_program *shader_program;
+   struct gl_shader_compiler_options *options;
+
+   int next_temp;
+
+   int num_address_regs;
+   int samplers_used;
+   bool indirect_addr_temps;
+   bool indirect_addr_consts;
+   
+   int glsl_version;
+
+   variable_storage *find_variable_storage(ir_variable *var);
+
+   int add_constant(gl_register_file file, gl_constant_value values[4],
+                    int size, int datatype, GLuint *swizzle_out);
+
+   function_entry *get_function_signature(ir_function_signature *sig);
+
+   st_src_reg get_temp(const glsl_type *type);
+   void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
+
+   st_src_reg st_src_reg_for_float(float val);
+   st_src_reg st_src_reg_for_int(int val);
+   st_src_reg st_src_reg_for_type(int type, int val);
+
+   /**
+    * \name Visit methods
+    *
+    * As typical for the visitor pattern, there must be one \c visit method for
+    * each concrete subclass of \c ir_instruction.  Virtual base classes within
+    * the hierarchy should not have \c visit methods.
+    */
+   /*@{*/
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_dereference_variable  *);
+   virtual void visit(ir_dereference_array *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_if *);
+   /*@}*/
+
+   st_src_reg result;
+
+   /** List of variable_storage */
+   exec_list variables;
+
+   /** List of immediate_storage */
+   exec_list immediates;
+   int num_immediates;
+
+   /** List of function_entry */
+   exec_list function_signatures;
+   int next_signature_id;
+
+   /** List of glsl_to_tgsi_instruction */
+   exec_list instructions;
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst,
+        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+   
+   unsigned get_opcode(ir_instruction *ir, unsigned op,
+                    st_dst_reg dst,
+                    st_src_reg src0, st_src_reg src1);
+
+   /**
+    * Emit the correct dot-product instruction for the type of arguments
+    */
+   void emit_dp(ir_instruction *ir,
+                st_dst_reg dst,
+                st_src_reg src0,
+                st_src_reg src1,
+                unsigned elements);
+
+   void emit_scalar(ir_instruction *ir, unsigned op,
+        	    st_dst_reg dst, st_src_reg src0);
+
+   void emit_scalar(ir_instruction *ir, unsigned op,
+        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+
+   void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
+
+   void emit_scs(ir_instruction *ir, unsigned op,
+        	 st_dst_reg dst, const st_src_reg &src);
+
+   GLboolean try_emit_mad(ir_expression *ir,
+        		  int mul_operand);
+   GLboolean try_emit_sat(ir_expression *ir);
+
+   void emit_swz(ir_expression *ir);
+
+   bool process_move_condition(ir_rvalue *ir);
+
+   void remove_output_reads(gl_register_file type);
+   void simplify_cmp(void);
+
+   void rename_temp_register(int index, int new_index);
+   int get_first_temp_read(int index);
+   int get_first_temp_write(int index);
+   int get_last_temp_read(int index);
+   int get_last_temp_write(int index);
+
+   void copy_propagate(void);
+   void eliminate_dead_code(void);
+   int eliminate_dead_code_advanced(void);
+   void merge_registers(void);
+   void renumber_registers(void);
+
+   void *mem_ctx;
+};
+
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
+
+static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
+
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+
+static void
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list args;
+   va_start(args, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
+   va_end(args);
+
+   prog->LinkStatus = GL_FALSE;
+}
+
+static int
+swizzle_for_size(int size)
+{
+   int size_swizzles[4] = {
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+      MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+   };
+
+   assert((size >= 1) && (size <= 4));
+   return size_swizzles[size - 1];
+}
+
+static bool
+is_tex_instruction(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex;
+}
+
+static unsigned
+num_inst_dst_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->num_dst;
+}
+
+static unsigned
+num_inst_src_regs(unsigned opcode)
+{
+   const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
+   return info->is_tex ? info->num_src - 1 : info->num_src;
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1, st_src_reg src2)
+{
+   glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
+   int num_reladdr = 0, i;
+   
+   op = get_opcode(ir, op, dst, src0, src1);
+
+   /* If we have to do relative addressing, we want to load the ARL
+    * reg directly for one of the regs, and preload the other reladdr
+    * sources into temps.
+    */
+   num_reladdr += dst.reladdr != NULL;
+   num_reladdr += src0.reladdr != NULL;
+   num_reladdr += src1.reladdr != NULL;
+   num_reladdr += src2.reladdr != NULL;
+
+   reladdr_to_temp(ir, &src2, &num_reladdr);
+   reladdr_to_temp(ir, &src1, &num_reladdr);
+   reladdr_to_temp(ir, &src0, &num_reladdr);
+
+   if (dst.reladdr) {
+      emit_arl(ir, address_reg, *dst.reladdr);
+      num_reladdr--;
+   }
+   assert(num_reladdr == 0);
+
+   inst->op = op;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+   inst->ir = ir;
+   inst->dead_mask = 0;
+
+   inst->function = NULL;
+   
+   if (op == TGSI_OPCODE_ARL)
+      this->num_address_regs = 1;
+   
+   /* Update indirect addressing status used by TGSI */
+   if (dst.reladdr) {
+      switch(dst.file) {
+      case PROGRAM_TEMPORARY:
+         this->indirect_addr_temps = true;
+         break;
+      case PROGRAM_LOCAL_PARAM:
+      case PROGRAM_ENV_PARAM:
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_UNIFORM:
+         this->indirect_addr_consts = true;
+         break;
+      case PROGRAM_IMMEDIATE:
+         assert(!"immediates should not have indirect addressing");
+         break;
+      default:
+         break;
+      }
+   }
+   else {
+      for (i=0; i<3; i++) {
+         if(inst->src[i].reladdr) {
+            switch(inst->src[i].file) {
+            case PROGRAM_TEMPORARY:
+               this->indirect_addr_temps = true;
+               break;
+            case PROGRAM_LOCAL_PARAM:
+            case PROGRAM_ENV_PARAM:
+            case PROGRAM_STATE_VAR:
+            case PROGRAM_NAMED_PARAM:
+            case PROGRAM_CONSTANT:
+            case PROGRAM_UNIFORM:
+               this->indirect_addr_consts = true;
+               break;
+            case PROGRAM_IMMEDIATE:
+               assert(!"immediates should not have indirect addressing");
+               break;
+            default:
+               break;
+            }
+         }
+      }
+   }
+
+   this->instructions.push_tail(inst);
+   
+   return inst;
+}
+
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+{
+   return emit(ir, op, dst, src0, src1, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst, st_src_reg src0)
+{
+   assert(dst.writemask != 0);
+   return emit(ir, op, dst, src0, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
+{
+   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
+}
+
+/**
+ * Determines whether to use an integer, unsigned integer, or float opcode 
+ * based on the operands and input opcode, then emits the result.
+ * 
+ * TODO: type checking for remaining TGSI opcodes
+ */
+unsigned
+glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst,
+        		 st_src_reg src0, st_src_reg src1)
+{
+   int type = GLSL_TYPE_FLOAT;
+   
+   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+      type = GLSL_TYPE_FLOAT;
+   else if (glsl_version >= 130)
+      type = src0.type;
+
+#define case4(c, f, i, u) \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
+      else op = TGSI_OPCODE_##f; \
+      break;
+#define case3(f, i, u)  case4(f, f, i, u)
+#define case2fi(f, i)   case4(f, f, i, i)
+#define case2iu(i, u)   case4(i, LAST, i, u)
+   
+   switch(op) {
+      case2fi(ADD, UADD);
+      case2fi(MUL, UMUL);
+      case2fi(MAD, UMAD);
+      case3(DIV, IDIV, UDIV);
+      case3(MAX, IMAX, UMAX);
+      case3(MIN, IMIN, UMIN);
+      case2iu(MOD, UMOD);
+      
+      case2fi(SEQ, USEQ);
+      case2fi(SNE, USNE);
+      case3(SGE, ISGE, USGE);
+      case3(SLT, ISLT, USLT);
+      
+      case2iu(SHL, SHL);
+      case2iu(ISHR, USHR);
+      case2iu(NOT, NOT);
+      case2iu(AND, AND);
+      case2iu(OR, OR);
+      case2iu(XOR, XOR);
+      
+      default: break;
+   }
+   
+   assert(op != TGSI_OPCODE_LAST);
+   return op;
+}
+
+void
+glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
+        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+        		    unsigned elements)
+{
+   static const unsigned dot_opcodes[] = {
+      TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
+   };
+
+   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+/**
+ * Emits TGSI scalar opcodes to produce unique answers across channels.
+ *
+ * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
+ * channel determines the result across all channels.  So to do a vec4
+ * of this operation, we want to emit a scalar per source channel used
+ * to produce dest channels.
+ */
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst,
+        			st_src_reg orig_src0, st_src_reg orig_src1)
+{
+   int i, j;
+   int done_mask = ~dst.writemask;
+
+   /* TGSI RCP is a scalar operation splatting results to all channels,
+    * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
+    * dst channels.
+    */
+   for (i = 0; i < 4; i++) {
+      GLuint this_mask = (1 << i);
+      glsl_to_tgsi_instruction *inst;
+      st_src_reg src0 = orig_src0;
+      st_src_reg src1 = orig_src1;
+
+      if (done_mask & this_mask)
+         continue;
+
+      GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
+      GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
+      for (j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz &&
+             GET_SWZ(src1.swizzle, j) == src1_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
+        			  src1_swiz, src1_swiz);
+
+      inst = emit(ir, op, dst, src0, src1);
+      inst->dst.writemask = this_mask;
+      done_mask |= this_mask;
+   }
+}
+
+void
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg undef = undef_src;
+
+   undef.swizzle = SWIZZLE_XXXX;
+
+   emit_scalar(ir, op, dst, src0, undef);
+}
+
+void
+glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
+        		        st_dst_reg dst, st_src_reg src0)
+{
+   st_src_reg tmp = get_temp(glsl_type::float_type);
+
+   if (src0.type == GLSL_TYPE_INT)
+      emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
+   else if (src0.type == GLSL_TYPE_UINT)
+      emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
+   else
+      tmp = src0;
+   
+   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+}
+
+/**
+ * Emit an TGSI_OPCODE_SCS instruction
+ *
+ * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
+ * Instead of splatting its result across all four components of the 
+ * destination, it writes one value to the \c x component and another value to 
+ * the \c y component.
+ *
+ * \param ir        IR instruction being processed
+ * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 
+ *                  on which value is desired.
+ * \param dst       Destination register
+ * \param src       Source register
+ */
+void
+glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
+        		     st_dst_reg dst,
+        		     const st_src_reg &src)
+{
+   /* Vertex programs cannot use the SCS opcode.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
+      emit_scalar(ir, op, dst, src);
+      return;
+   }
+
+   const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
+   const unsigned scs_mask = (1U << component);
+   int done_mask = ~dst.writemask;
+   st_src_reg tmp;
+
+   assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
+
+   /* If there are compnents in the destination that differ from the component
+    * that will be written by the SCS instrution, we'll need a temporary.
+    */
+   if (scs_mask != unsigned(dst.writemask)) {
+      tmp = get_temp(glsl_type::vec4_type);
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      unsigned this_mask = (1U << i);
+      st_src_reg src0 = src;
+
+      if ((done_mask & this_mask) != 0)
+         continue;
+
+      /* The source swizzle specified which component of the source generates
+       * sine / cosine for the current component in the destination.  The SCS
+       * instruction requires that this value be swizzle to the X component.
+       * Replace the current swizzle with a swizzle that puts the source in
+       * the X component.
+       */
+      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
+
+      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
+        			   src0_swiz, src0_swiz);
+      for (unsigned j = i + 1; j < 4; j++) {
+         /* If there is another enabled component in the destination that is
+          * derived from the same inputs, generate its value on this pass as
+          * well.
+          */
+         if (!(done_mask & (1 << j)) &&
+             GET_SWZ(src0.swizzle, j) == src0_swiz) {
+            this_mask |= (1 << j);
+         }
+      }
+
+      if (this_mask != scs_mask) {
+         glsl_to_tgsi_instruction *inst;
+         st_dst_reg tmp_dst = st_dst_reg(tmp);
+
+         /* Emit the SCS instruction.
+          */
+         inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
+         inst->dst.writemask = scs_mask;
+
+         /* Move the result of the SCS instruction to the desired location in
+          * the destination.
+          */
+         tmp.swizzle = MAKE_SWIZZLE4(component, component,
+        			     component, component);
+         inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
+         inst->dst.writemask = this_mask;
+      } else {
+         /* Emit the SCS instruction to write directly to the destination.
+          */
+         glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
+         inst->dst.writemask = scs_mask;
+      }
+
+      done_mask |= this_mask;
+   }
+}
+
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+        		     gl_constant_value values[4], int size, int datatype,
+        		     GLuint *swizzle_out)
+{
+   if (file == PROGRAM_CONSTANT) {
+      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+                                              size, datatype, swizzle_out);
+   } else {
+      int index = 0;
+      immediate_storage *entry;
+      assert(file == PROGRAM_IMMEDIATE);
+
+      /* Search immediate storage to see if we already have an identical
+       * immediate that we can use instead of adding a duplicate entry.
+       */
+      foreach_iter(exec_list_iterator, iter, this->immediates) {
+         entry = (immediate_storage *)iter.get();
+         
+         if (entry->size == size &&
+             entry->type == datatype &&
+             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+             return index;
+         }
+         index++;
+      }
+      
+      /* Add this immediate to the list. */
+      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      this->immediates.push_tail(entry);
+      this->num_immediates++;
+      return index;
+   }
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
+   union gl_constant_value uval;
+
+   uval.f = val;
+   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
+
+   return src;
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
+   union gl_constant_value uval;
+   
+   assert(glsl_version >= 130);
+
+   uval.i = val;
+   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
+
+   return src;
+}
+
+struct st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
+{
+   if (glsl_version >= 130)
+      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
+                                       st_src_reg_for_int(val);
+   else
+      return st_src_reg_for_float(val);
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+   unsigned int i;
+   int size;
+
+   switch (type->base_type) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (type->is_matrix()) {
+         return type->matrix_columns;
+      } else {
+         /* Regardless of size of vector, it gets a vec4. This is bad
+          * packing for things like floats, but otherwise arrays become a
+          * mess.  Hopefully a later pass over the code can pack scalars
+          * down if appropriate.
+          */
+         return 1;
+      }
+   case GLSL_TYPE_ARRAY:
+      assert(type->length > 0);
+      return type_size(type->fields.array) * type->length;
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < type->length; i++) {
+         size += type_size(type->fields.structure[i].type);
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      /* Samplers take up one slot in UNIFORMS[], but they're baked in
+       * at link time.
+       */
+      return 1;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+/**
+ * In the initial pass of codegen, we assign temporary numbers to
+ * intermediate results.  (not SSA -- variable assignments will reuse
+ * storage).
+ */
+st_src_reg
+glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
+{
+   st_src_reg src;
+   int swizzle[4];
+   int i;
+
+   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+   src.file = PROGRAM_TEMPORARY;
+   src.index = next_temp;
+   src.reladdr = NULL;
+   next_temp += type_size(type);
+
+   if (type->is_array() || type->is_record()) {
+      src.swizzle = SWIZZLE_NOOP;
+   } else {
+      for (i = 0; i < type->vector_elements; i++)
+         swizzle[i] = i;
+      for (; i < 4; i++)
+         swizzle[i] = type->vector_elements - 1;
+      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
+        			  swizzle[2], swizzle[3]);
+   }
+   src.negate = 0;
+
+   return src;
+}
+
+variable_storage *
+glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
+{
+   
+   variable_storage *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->variables) {
+      entry = (variable_storage *)iter.get();
+
+      if (entry->var == var)
+         return entry;
+   }
+
+   return NULL;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_variable *ir)
+{
+   if (strcmp(ir->name, "gl_FragCoord") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+      fp->OriginUpperLeft = ir->origin_upper_left;
+      fp->PixelCenterInteger = ir->pixel_center_integer;
+
+   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+      switch (ir->depth_layout) {
+      case ir_depth_layout_none:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
+         break;
+      case ir_depth_layout_any:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
+         break;
+      case ir_depth_layout_greater:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
+         break;
+      case ir_depth_layout_less:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
+         break;
+      case ir_depth_layout_unchanged:
+         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+   }
+
+   if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
+      unsigned int i;
+      const ir_state_slot *const slots = ir->state_slots;
+      assert(ir->state_slots != NULL);
+
+      /* Check if this statevar's setup in the STATE file exactly
+       * matches how we'll want to reference it as a
+       * struct/array/whatever.  If not, then we need to move it into
+       * temporary storage and hope that it'll get copy-propagated
+       * out.
+       */
+      for (i = 0; i < ir->num_state_slots; i++) {
+         if (slots[i].swizzle != SWIZZLE_XYZW) {
+            break;
+         }
+      }
+
+      struct variable_storage *storage;
+      st_dst_reg dst;
+      if (i == ir->num_state_slots) {
+         /* We'll set the index later. */
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
+         this->variables.push_tail(storage);
+
+         dst = undef_dst;
+      } else {
+         /* The variable_storage constructor allocates slots based on the size
+          * of the type.  However, this had better match the number of state
+          * elements that we're going to copy into the new temporary.
+          */
+         assert((int) ir->num_state_slots == type_size(ir->type));
+
+         storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
+        					 this->next_temp);
+         this->variables.push_tail(storage);
+         this->next_temp += type_size(ir->type);
+
+         dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
+               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+      }
+
+
+      for (unsigned int i = 0; i < ir->num_state_slots; i++) {
+         int index = _mesa_add_state_reference(this->prog->Parameters,
+        				       (gl_state_index *)slots[i].tokens);
+
+         if (storage->file == PROGRAM_STATE_VAR) {
+            if (storage->index == -1) {
+               storage->index = index;
+            } else {
+               assert(index == storage->index + (int)i);
+            }
+         } else {
+            st_src_reg src(PROGRAM_STATE_VAR, index,
+                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+            src.swizzle = slots[i].swizzle;
+            emit(ir, TGSI_OPCODE_MOV, dst, src);
+            /* even a float takes up a whole vec4 reg in a struct/array. */
+            dst.index++;
+         }
+      }
+
+      if (storage->file == PROGRAM_TEMPORARY &&
+          dst.index != storage->index + (int) ir->num_state_slots) {
+         fail_link(this->shader_program,
+        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
+        	   ir->name, dst.index - storage->index,
+        	   type_size(ir->type));
+      }
+   }
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop *ir)
+{
+   ir_dereference_variable *counter = NULL;
+
+   if (ir->counter != NULL)
+      counter = new(ir) ir_dereference_variable(ir->counter);
+
+   if (ir->from != NULL) {
+      assert(ir->counter != NULL);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+      a->accept(this);
+      delete a;
+   }
+
+   emit(NULL, TGSI_OPCODE_BGNLOOP);
+
+   if (ir->to) {
+      ir_expression *e =
+         new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+        		       counter, ir->to);
+      ir_if *if_stmt =  new(ir) ir_if(e);
+
+      ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+      if_stmt->then_instructions.push_tail(brk);
+
+      if_stmt->accept(this);
+
+      delete if_stmt;
+      delete e;
+      delete brk;
+   }
+
+   visit_exec_list(&ir->body_instructions, this);
+
+   if (ir->increment) {
+      ir_expression *e =
+         new(ir) ir_expression(ir_binop_add, counter->type,
+        		       counter, ir->increment);
+
+      ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+      a->accept(this);
+      delete a;
+      delete e;
+   }
+
+   emit(NULL, TGSI_OPCODE_ENDLOOP);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
+{
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      emit(NULL, TGSI_OPCODE_BRK);
+      break;
+   case ir_loop_jump::jump_continue:
+      emit(NULL, TGSI_OPCODE_CONT);
+      break;
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
+{
+   assert(0);
+   (void)ir;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_function *ir)
+{
+   /* Ignore function bodies other than main() -- we shouldn't see calls to
+    * them since they should all be inlined before we get to glsl_to_tgsi.
+    */
+   if (strcmp(ir->name, "main") == 0) {
+      const ir_function_signature *sig;
+      exec_list empty;
+
+      sig = ir->matching_signature(&empty);
+
+      assert(sig);
+
+      foreach_iter(exec_list_iterator, iter, sig->body) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+
+         ir->accept(this);
+      }
+   }
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
+{
+   int nonmul_operand = 1 - mul_operand;
+   st_src_reg a, b, c;
+   st_dst_reg result_dst;
+
+   ir_expression *expr = ir->operands[mul_operand]->as_expression();
+   if (!expr || expr->operation != ir_binop_mul)
+      return false;
+
+   expr->operands[0]->accept(this);
+   a = this->result;
+   expr->operands[1]->accept(this);
+   b = this->result;
+   ir->operands[nonmul_operand]->accept(this);
+   c = this->result;
+
+   this->result = get_temp(ir->type);
+   result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
+
+   return true;
+}
+
+GLboolean
+glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
+{
+   /* Saturates were only introduced to vertex programs in
+    * NV_vertex_program3, so don't give them to drivers in the VP.
+    */
+   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
+      return false;
+
+   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+   if (!sat_src)
+      return false;
+
+   sat_src->accept(this);
+   st_src_reg src = this->result;
+
+   this->result = get_temp(ir->type);
+   st_dst_reg result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   glsl_to_tgsi_instruction *inst;
+   inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+   inst->saturate = true;
+
+   return true;
+}
+
+void
+glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
+        			    st_src_reg *reg, int *num_reladdr)
+{
+   if (!reg->reladdr)
+      return;
+
+   emit_arl(ir, address_reg, *reg->reladdr);
+
+   if (*num_reladdr != 1) {
+      st_src_reg temp = get_temp(glsl_type::vec4_type);
+
+      emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
+      *reg = temp;
+   }
+
+   (*num_reladdr)--;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_expression *ir)
+{
+   unsigned int operand;
+   st_src_reg op[Elements(ir->operands)];
+   st_src_reg result_src;
+   st_dst_reg result_dst;
+
+   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
+    */
+   if (ir->operation == ir_binop_add) {
+      if (try_emit_mad(ir, 1))
+         return;
+      if (try_emit_mad(ir, 0))
+         return;
+   }
+   if (try_emit_sat(ir))
+      return;
+
+   if (ir->operation == ir_quadop_vector)
+      assert(!"ir_quadop_vector should have been lowered");
+
+   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+      this->result.file = PROGRAM_UNDEFINED;
+      ir->operands[operand]->accept(this);
+      if (this->result.file == PROGRAM_UNDEFINED) {
+         ir_print_visitor v;
+         printf("Failed to get tree for expression operand:\n");
+         ir->operands[operand]->accept(&v);
+         exit(1);
+      }
+      op[operand] = this->result;
+
+      /* Matrix expression operands should have been broken down to vector
+       * operations already.
+       */
+      assert(!ir->operands[operand]->type->is_matrix());
+   }
+
+   int vector_elements = ir->operands[0]->type->vector_elements;
+   if (ir->operands[1]) {
+      vector_elements = MAX2(vector_elements,
+        		     ir->operands[1]->type->vector_elements);
+   }
+
+   this->result.file = PROGRAM_UNDEFINED;
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(ir->type);
+   /* convenience for the emit functions below. */
+   result_dst = st_dst_reg(result_src);
+   /* Limit writes to the channels that will be used by result_src later.
+    * This does limit this temp's use as a temporary for multi-instruction
+    * sequences.
+    */
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+   switch (ir->operation) {
+   case ir_unop_logic_not:
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      break;
+   case ir_unop_neg:
+      assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
+      if (result_dst.type == GLSL_TYPE_INT)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else {
+         op[0].negate = ~op[0].negate;
+         result_src = op[0];
+      }
+      break;
+   case ir_unop_abs:
+      assert(result_dst.type == GLSL_TYPE_FLOAT);
+      emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+      break;
+   case ir_unop_sign:
+      emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
+      break;
+   case ir_unop_rcp:
+      emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
+      break;
+
+   case ir_unop_exp2:
+      emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
+      break;
+   case ir_unop_exp:
+   case ir_unop_log:
+      assert(!"not reached: should be handled by ir_explog_to_explog2");
+      break;
+   case ir_unop_log2:
+      emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
+      break;
+   case ir_unop_sin:
+      emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos:
+      emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+      break;
+   case ir_unop_sin_reduced:
+      emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
+      break;
+   case ir_unop_cos_reduced:
+      emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
+      break;
+
+   case ir_unop_dFdx:
+      emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
+      break;
+   case ir_unop_dFdy:
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
+      break;
+
+   case ir_unop_noise: {
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
+      break;
+   }
+
+   case ir_binop_add:
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_sub:
+      emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_mul:
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_div:
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      else
+         emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_mod:
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      else
+         emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_less:
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_greater:
+      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_lequal:
+      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_gequal:
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_equal:
+      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_nequal:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_all_equal:
+      /* "==" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
+      }
+      break;
+   case ir_binop_any_nequal:
+      /* "!=" operator producing a scalar boolean. */
+      if (ir->operands[0]->type->is_vector() ||
+          ir->operands[1]->type->is_vector()) {
+         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
+               glsl_type::vec4_type);
+         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+         emit_dp(ir, result_dst, temp, temp, vector_elements);
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      } else {
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      }
+      break;
+
+   case ir_unop_any:
+      assert(ir->operands[0]->type->is_vector());
+      emit_dp(ir, result_dst, op[0], op[0],
+              ir->operands[0]->type->vector_elements);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_xor:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_logic_or:
+      /* This could be a saturated add and skip the SNE. */
+      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+      break;
+
+   case ir_binop_logic_and:
+      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
+      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      break;
+
+   case ir_binop_dot:
+      assert(ir->operands[0]->type->is_vector());
+      assert(ir->operands[0]->type == ir->operands[1]->type);
+      emit_dp(ir, result_dst, op[0], op[1],
+              ir->operands[0]->type->vector_elements);
+      break;
+
+   case ir_unop_sqrt:
+      /* sqrt(x) = x * rsq(x). */
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
+      /* For incoming channels <= 0, set the result to 0. */
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_CMP, result_dst,
+        		  op[0], result_src, st_src_reg_for_float(0.0));
+      break;
+   case ir_unop_rsq:
+      emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
+      break;
+   case ir_unop_i2f:
+   case ir_unop_b2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+      /* Converting between signed and unsigned integers is a no-op. */
+   case ir_unop_b2i:
+      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+      result_src = op[0];
+      break;
+   case ir_unop_f2i:
+      if (glsl_version >= 130)
+         emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_f2b:
+   case ir_unop_i2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
+            st_src_reg_for_type(result_dst.type, 0));
+      break;
+   case ir_unop_trunc:
+      emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
+      break;
+   case ir_unop_ceil:
+      op[0].negate = ~op[0].negate;
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+      result_src.negate = ~result_src.negate;
+      break;
+   case ir_unop_floor:
+      emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
+      break;
+   case ir_unop_fract:
+      emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
+      break;
+
+   case ir_binop_min:
+      emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_max:
+      emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
+      break;
+   case ir_binop_pow:
+      emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
+      break;
+
+   case ir_unop_bit_not:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_u2f:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_lshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_rshift:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_and:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_xor:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
+         break;
+      }
+   case ir_binop_bit_or:
+      if (glsl_version >= 130) {
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
+         break;
+      }
+   case ir_unop_round_even:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+
+   case ir_quadop_vector:
+      /* This operation should have already been handled.
+       */
+      assert(!"Should not get here.");
+      break;
+   }
+
+   this->result = result_src;
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
+{
+   st_src_reg src;
+   int i;
+   int swizzle[4];
+
+   /* Note that this is only swizzles in expressions, not those on the left
+    * hand side of an assignment, which do write masking.  See ir_assignment
+    * for that.
+    */
+
+   ir->val->accept(this);
+   src = this->result;
+   assert(src.file != PROGRAM_UNDEFINED);
+
+   for (i = 0; i < 4; i++) {
+      if (i < ir->type->vector_elements) {
+         switch (i) {
+         case 0:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
+            break;
+         case 1:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
+            break;
+         case 2:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
+            break;
+         case 3:
+            swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
+            break;
+         }
+      } else {
+         /* If the type is smaller than a vec4, replicate the last
+          * channel out.
+          */
+         swizzle[i] = swizzle[ir->type->vector_elements - 1];
+      }
+   }
+
+   src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
+{
+   variable_storage *entry = find_variable_storage(ir->var);
+   ir_variable *var = ir->var;
+
+   if (!entry) {
+      switch (var->mode) {
+      case ir_var_uniform:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
+        				       var->location);
+         this->variables.push_tail(entry);
+         break;
+      case ir_var_in:
+      case ir_var_inout:
+         /* The linker assigns locations for varyings and attributes,
+          * including deprecated builtins (like gl_Color), user-assign
+          * generic attributes (glBindVertexLocation), and
+          * user-defined varyings.
+          *
+          * FINISHME: We would hit this path for function arguments.  Fix!
+          */
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_INPUT,
+                                               var->location);
+         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
+             var->location >= VERT_ATTRIB_GENERIC0) {
+            _mesa_add_attribute(this->prog->Attributes,
+                                var->name,
+                                _mesa_sizeof_glsl_type(var->type->gl_type),
+                                var->type->gl_type,
+                                var->location - VERT_ATTRIB_GENERIC0);
+         }
+         break;
+      case ir_var_out:
+         assert(var->location != -1);
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_OUTPUT,
+                                               var->location);
+         break;
+      case ir_var_system_value:
+         entry = new(mem_ctx) variable_storage(var,
+                                               PROGRAM_SYSTEM_VALUE,
+                                               var->location);
+         break;
+      case ir_var_auto:
+      case ir_var_temporary:
+         entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
+        				       this->next_temp);
+         this->variables.push_tail(entry);
+
+         next_temp += type_size(var->type);
+         break;
+      }
+
+      if (!entry) {
+         printf("Failed to make storage for %s\n", var->name);
+         exit(1);
+      }
+   }
+
+   this->result = st_src_reg(entry->file, entry->index, var->type);
+   if (glsl_version <= 120)
+      this->result.type = GLSL_TYPE_FLOAT;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
+{
+   ir_constant *index;
+   st_src_reg src;
+   int element_size = type_size(ir->type);
+
+   index = ir->array_index->constant_expression_value();
+
+   ir->array->accept(this);
+   src = this->result;
+
+   if (index) {
+      src.index += index->value.i[0] * element_size;
+   } else {
+      /* Variable index array dereference.  It eats the "vec4" of the
+       * base of the array and an index that offsets the TGSI register
+       * index.
+       */
+      ir->array_index->accept(this);
+
+      st_src_reg index_reg;
+
+      if (element_size == 1) {
+         index_reg = this->result;
+      } else {
+         index_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
+              this->result, st_src_reg_for_float(element_size));
+      }
+
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL) {
+         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+              index_reg, *src.reladdr);
+
+         index_reg = accum_reg;
+      }
+
+      src.reladdr = ralloc(mem_ctx, st_src_reg);
+      memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      src.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      src.swizzle = SWIZZLE_NOOP;
+
+   this->result = src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
+{
+   unsigned int i;
+   const glsl_type *struct_type = ir->record->type;
+   int offset = 0;
+
+   ir->record->accept(this);
+
+   for (i = 0; i < struct_type->length; i++) {
+      if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+         break;
+      offset += type_size(struct_type->fields.structure[i].type);
+   }
+
+   /* If the type is smaller than a vec4, replicate the last channel out. */
+   if (ir->type->is_scalar() || ir->type->is_vector())
+      this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+   else
+      this->result.swizzle = SWIZZLE_NOOP;
+
+   this->result.index += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static st_dst_reg
+get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
+{
+   /* The LHS must be a dereference.  If the LHS is a variable indexed array
+    * access of a vector, it must be separated into a series conditional moves
+    * before reaching this point (see ir_vec_index_to_cond_assign).
+    */
+   assert(ir->as_dereference());
+   ir_dereference_array *deref_array = ir->as_dereference_array();
+   if (deref_array) {
+      assert(!deref_array->array->type->is_vector());
+   }
+
+   /* Use the rvalue deref handler for the most part.  We'll ignore
+    * swizzles in it and write swizzles using writemask, though.
+    */
+   ir->accept(v);
+   return st_dst_reg(v->result);
+}
+
+/**
+ * Process the condition of a conditional assignment
+ *
+ * Examines the condition of a conditional assignment to generate the optimal
+ * first operand of a \c CMP instruction.  If the condition is a relational
+ * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
+ * used as the source for the \c CMP instruction.  Otherwise the comparison
+ * is processed to a boolean result, and the boolean result is used as the
+ * operand to the CMP instruction.
+ */
+bool
+glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
+{
+   ir_rvalue *src_ir = ir;
+   bool negate = true;
+   bool switch_order = false;
+
+   ir_expression *const expr = ir->as_expression();
+   if ((expr != NULL) && (expr->get_num_operands() == 2)) {
+      bool zero_on_left = false;
+
+      if (expr->operands[0]->is_zero()) {
+         src_ir = expr->operands[1];
+         zero_on_left = true;
+      } else if (expr->operands[1]->is_zero()) {
+         src_ir = expr->operands[0];
+         zero_on_left = false;
+      }
+
+      /*      a is -  0  +            -  0  +
+       * (a <  0)  T  F  F  ( a < 0)  T  F  F
+       * (0 <  a)  F  F  T  (-a < 0)  F  F  T
+       * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (a >  0)  F  F  T  (-a < 0)  F  F  T
+       * (0 >  a)  T  F  F  ( a < 0)  T  F  F
+       * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
+       * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
+       *
+       * Note that exchanging the order of 0 and 'a' in the comparison simply
+       * means that the value of 'a' should be negated.
+       */
+      if (src_ir != ir) {
+         switch (expr->operation) {
+         case ir_binop_less:
+            switch_order = false;
+            negate = zero_on_left;
+            break;
+
+         case ir_binop_greater:
+            switch_order = false;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_lequal:
+            switch_order = true;
+            negate = !zero_on_left;
+            break;
+
+         case ir_binop_gequal:
+            switch_order = true;
+            negate = zero_on_left;
+            break;
+
+         default:
+            /* This isn't the right kind of comparison afterall, so make sure
+             * the whole condition is visited.
+             */
+            src_ir = ir;
+            break;
+         }
+      }
+   }
+
+   src_ir->accept(this);
+
+   /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
+    * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
+    * choose which value TGSI_OPCODE_CMP produces without an extra instruction
+    * computing the condition.
+    */
+   if (negate)
+      this->result.negate = ~this->result.negate;
+
+   return switch_order;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_assignment *ir)
+{
+   st_dst_reg l;
+   st_src_reg r;
+   int i;
+
+   ir->rhs->accept(this);
+   r = this->result;
+
+   l = get_assignment_lhs(ir->lhs, this);
+
+   /* FINISHME: This should really set to the correct maximal writemask for each
+    * FINISHME: component written (in the loops below).  This case can only
+    * FINISHME: occur for matrices, arrays, and structures.
+    */
+   if (ir->write_mask == 0) {
+      assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+      l.writemask = WRITEMASK_XYZW;
+   } else if (ir->lhs->type->is_scalar() &&
+              ir->lhs->variable_referenced()->mode == ir_var_out) {
+      /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
+       * FINISHME: W component of fragment shader output zero, work correctly.
+       */
+      l.writemask = WRITEMASK_XYZW;
+   } else {
+      int swizzles[4];
+      int first_enabled_chan = 0;
+      int rhs_chan = 0;
+
+      l.writemask = ir->write_mask;
+
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i)) {
+            first_enabled_chan = GET_SWZ(r.swizzle, i);
+            break;
+         }
+      }
+
+      /* Swizzle a small RHS vector into the channels being written.
+       *
+       * glsl ir treats write_mask as dictating how many channels are
+       * present on the RHS while TGSI treats write_mask as just
+       * showing which channels of the vec4 RHS get written.
+       */
+      for (int i = 0; i < 4; i++) {
+         if (l.writemask & (1 << i))
+            swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
+         else
+            swizzles[i] = first_enabled_chan;
+      }
+      r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
+        			swizzles[2], swizzles[3]);
+   }
+
+   assert(l.file != PROGRAM_UNDEFINED);
+   assert(r.file != PROGRAM_UNDEFINED);
+
+   if (ir->condition) {
+      const bool switch_order = this->process_move_condition(ir->condition);
+      st_src_reg condition = this->result;
+
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         st_src_reg l_src = st_src_reg(l);
+         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+         
+         if (switch_order) {
+            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
+         } else {
+            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
+         }
+
+         l.index++;
+         r.index++;
+      }
+   } else if (ir->rhs->as_expression() &&
+              this->instructions.get_tail() &&
+              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+              type_size(ir->lhs->type) == 1) {
+      /* To avoid emitting an extra MOV when assigning an expression to a 
+       * variable, emit the last instruction of the expression again, but
+       * replace the destination register with the target of the assignment.
+       * Dead code elimination will remove the original instruction.
+       */
+      glsl_to_tgsi_instruction *inst;
+      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+   } else {
+      for (i = 0; i < type_size(ir->lhs->type); i++) {
+         emit(ir, TGSI_OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+}
+
+
+void
+glsl_to_tgsi_visitor::visit(ir_constant *ir)
+{
+   st_src_reg src;
+   GLfloat stack_vals[4] = { 0 };
+   gl_constant_value *values = (gl_constant_value *) stack_vals;
+   GLenum gl_type = GL_NONE;
+   unsigned int i;
+   static int in_array = 0;
+   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
+
+   /* Unfortunately, 4 floats is all we can get into
+    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
+    * aggregate constant and move each constant value into it.  If we
+    * get lucky, copy propagation will eliminate the extra moves.
+    */
+   if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+
+      foreach_iter(exec_list_iterator, iter, ir->components) {
+         ir_constant *field_value = (ir_constant *)iter.get();
+         int size = type_size(field_value->type);
+
+         assert(size > 0);
+
+         field_value->accept(this);
+         src = this->result;
+
+         for (i = 0; i < (unsigned int)size; i++) {
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      return;
+   }
+
+   if (ir->type->is_array()) {
+      st_src_reg temp_base = get_temp(ir->type);
+      st_dst_reg temp = st_dst_reg(temp_base);
+      int size = type_size(ir->type->fields.array);
+
+      assert(size > 0);
+      in_array++;
+
+      for (i = 0; i < ir->type->length; i++) {
+         ir->array_elements[i]->accept(this);
+         src = this->result;
+         for (int j = 0; j < size; j++) {
+            emit(ir, TGSI_OPCODE_MOV, temp, src);
+
+            src.index++;
+            temp.index++;
+         }
+      }
+      this->result = temp_base;
+      in_array--;
+      return;
+   }
+
+   if (ir->type->is_matrix()) {
+      st_src_reg mat = get_temp(ir->type);
+      st_dst_reg mat_column = st_dst_reg(mat);
+
+      for (i = 0; i < ir->type->matrix_columns; i++) {
+         assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+         values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+
+         src = st_src_reg(file, -1, ir->type->base_type);
+         src.index = add_constant(file,
+                                  values,
+                                  ir->type->vector_elements,
+                                  GL_FLOAT,
+                                  &src.swizzle);
+         emit(ir, TGSI_OPCODE_MOV, mat_column, src);
+
+         mat_column.index++;
+      }
+
+      this->result = mat;
+      return;
+   }
+
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      gl_type = GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i].f = ir->value.f[i];
+      }
+      break;
+   case GLSL_TYPE_UINT:
+      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (glsl_version >= 130)
+            values[i].u = ir->value.u[i];
+         else
+            values[i].f = ir->value.u[i];
+      }
+      break;
+   case GLSL_TYPE_INT:
+      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (glsl_version >= 130)
+            values[i].i = ir->value.i[i];
+         else
+            values[i].f = ir->value.i[i];
+      }
+      break;
+   case GLSL_TYPE_BOOL:
+      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         if (glsl_version >= 130)
+            values[i].b = ir->value.b[i];
+         else
+            values[i].f = ir->value.b[i];
+      }
+      break;
+   default:
+      assert(!"Non-float/uint/int/bool constant");
+   }
+
+   this->result = st_src_reg(file, -1, ir->type);
+   this->result.index = add_constant(file,
+                                     values,
+                                     ir->type->vector_elements,
+                                     gl_type,
+                                     &this->result.swizzle);
+}
+
+function_entry *
+glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
+{
+   function_entry *entry;
+
+   foreach_iter(exec_list_iterator, iter, this->function_signatures) {
+      entry = (function_entry *)iter.get();
+
+      if (entry->sig == sig)
+         return entry;
+   }
+
+   entry = ralloc(mem_ctx, function_entry);
+   entry->sig = sig;
+   entry->sig_id = this->next_signature_id++;
+   entry->bgn_inst = NULL;
+
+   /* Allocate storage for all the parameters. */
+   foreach_iter(exec_list_iterator, iter, sig->parameters) {
+      ir_variable *param = (ir_variable *)iter.get();
+      variable_storage *storage;
+
+      storage = find_variable_storage(param);
+      assert(!storage);
+
+      storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
+        				      this->next_temp);
+      this->variables.push_tail(storage);
+
+      this->next_temp += type_size(param->type);
+   }
+
+   if (!sig->return_type->is_void()) {
+      entry->return_reg = get_temp(sig->return_type);
+   } else {
+      entry->return_reg = undef_src;
+   }
+
+   this->function_signatures.push_tail(entry);
+   return entry;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_call *ir)
+{
+   glsl_to_tgsi_instruction *call_inst;
+   ir_function_signature *sig = ir->get_callee();
+   function_entry *entry = get_function_signature(sig);
+   int i;
+
+   /* Process in parameters. */
+   exec_list_iterator sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_in ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         param_rval->accept(this);
+         st_src_reg r = this->result;
+
+         st_dst_reg l;
+         l.file = storage->file;
+         l.index = storage->index;
+         l.reladdr = NULL;
+         l.writemask = WRITEMASK_XYZW;
+         l.cond_mask = COND_TR;
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, TGSI_OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Emit call instruction */
+   call_inst = emit(ir, TGSI_OPCODE_CAL);
+   call_inst->function = entry;
+
+   /* Process out parameters. */
+   sig_iter = sig->parameters.iterator();
+   foreach_iter(exec_list_iterator, iter, *ir) {
+      ir_rvalue *param_rval = (ir_rvalue *)iter.get();
+      ir_variable *param = (ir_variable *)sig_iter.get();
+
+      if (param->mode == ir_var_out ||
+          param->mode == ir_var_inout) {
+         variable_storage *storage = find_variable_storage(param);
+         assert(storage);
+
+         st_src_reg r;
+         r.file = storage->file;
+         r.index = storage->index;
+         r.reladdr = NULL;
+         r.swizzle = SWIZZLE_NOOP;
+         r.negate = 0;
+
+         param_rval->accept(this);
+         st_dst_reg l = st_dst_reg(this->result);
+
+         for (i = 0; i < type_size(param->type); i++) {
+            emit(ir, TGSI_OPCODE_MOV, l, r);
+            l.index++;
+            r.index++;
+         }
+      }
+
+      sig_iter.next();
+   }
+   assert(!sig_iter.has_next());
+
+   /* Process return value. */
+   this->result = entry->return_reg;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_texture *ir)
+{
+   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_dst_reg result_dst, coord_dst;
+   glsl_to_tgsi_instruction *inst = NULL;
+   unsigned opcode = TGSI_OPCODE_NOP;
+
+   ir->coordinate->accept(this);
+
+   /* Put our coords in a temp.  We'll need to modify them for shadow,
+    * projection, or LOD, so the only case we'd use it as is is if
+    * we're doing plain old texturing.  The optimization passes on
+    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+    */
+   coord = get_temp(glsl_type::vec4_type);
+   coord_dst = st_dst_reg(coord);
+   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+
+   if (ir->projector) {
+      ir->projector->accept(this);
+      projector = this->result;
+   }
+
+   /* Storage for our result.  Ideally for an assignment we'd be using
+    * the actual storage for the result here, instead.
+    */
+   result_src = get_temp(glsl_type::vec4_type);
+   result_dst = st_dst_reg(result_src);
+
+   switch (ir->op) {
+   case ir_tex:
+      opcode = TGSI_OPCODE_TEX;
+      break;
+   case ir_txb:
+      opcode = TGSI_OPCODE_TXB;
+      ir->lod_info.bias->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txl:
+      opcode = TGSI_OPCODE_TXL;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txd:
+      opcode = TGSI_OPCODE_TXD;
+      ir->lod_info.grad.dPdx->accept(this);
+      dx = this->result;
+      ir->lod_info.grad.dPdy->accept(this);
+      dy = this->result;
+      break;
+   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+   }
+
+   if (ir->projector) {
+      if (opcode == TGSI_OPCODE_TEX) {
+         /* Slot the projector in as the last component of the coord. */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
+         coord_dst.writemask = WRITEMASK_XYZW;
+         opcode = TGSI_OPCODE_TXP;
+      } else {
+         st_src_reg coord_w = coord;
+         coord_w.swizzle = SWIZZLE_WWWW;
+
+         /* For the other TEX opcodes there's no projective version
+          * since the last slot is taken up by LOD info.  Do the
+          * projective divide now.
+          */
+         coord_dst.writemask = WRITEMASK_W;
+         emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
+
+         /* In the case where we have to project the coordinates "by hand,"
+          * the shadow comparator value must also be projected.
+          */
+         st_src_reg tmp_src = coord;
+         if (ir->shadow_comparitor) {
+            /* Slot the shadow value in as the second to last component of the
+             * coord.
+             */
+            ir->shadow_comparitor->accept(this);
+
+            tmp_src = get_temp(glsl_type::vec4_type);
+            st_dst_reg tmp_dst = st_dst_reg(tmp_src);
+
+            tmp_dst.writemask = WRITEMASK_Z;
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
+
+            tmp_dst.writemask = WRITEMASK_XY;
+            emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
+         }
+
+         coord_dst.writemask = WRITEMASK_XYZ;
+         emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
+
+         coord_dst.writemask = WRITEMASK_XYZW;
+         coord.swizzle = SWIZZLE_XYZW;
+      }
+   }
+
+   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
+    * comparator was put in the correct place (and projected) by the code,
+    * above, that handles by-hand projection.
+    */
+   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
+      /* Slot the shadow value in as the second to last component of the
+       * coord.
+       */
+      ir->shadow_comparitor->accept(this);
+      coord_dst.writemask = WRITEMASK_Z;
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+      /* TGSI stores LOD or LOD bias in the last channel of the coords. */
+      coord_dst.writemask = WRITEMASK_W;
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
+      coord_dst.writemask = WRITEMASK_XYZW;
+   }
+
+   if (opcode == TGSI_OPCODE_TXD)
+      inst = emit(ir, opcode, result_dst, coord, dx, dy);
+   else
+      inst = emit(ir, opcode, result_dst, coord);
+
+   if (ir->shadow_comparitor)
+      inst->tex_shadow = GL_TRUE;
+
+   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
+        					   this->shader_program,
+        					   this->prog);
+
+   const glsl_type *sampler_type = ir->sampler->type;
+
+   switch (sampler_type->sampler_dimensionality) {
+   case GLSL_SAMPLER_DIM_1D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+      inst->tex_target = (sampler_type->sampler_array)
+         ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+      inst->tex_target = TEXTURE_3D_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_CUBE:
+      inst->tex_target = TEXTURE_CUBE_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_RECT:
+      inst->tex_target = TEXTURE_RECT_INDEX;
+      break;
+   case GLSL_SAMPLER_DIM_BUF:
+      assert(!"FINISHME: Implement ARB_texture_buffer_object");
+      break;
+   default:
+      assert(!"Should not get here.");
+   }
+
+   this->result = result_src;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_return *ir)
+{
+   if (ir->get_value()) {
+      st_dst_reg l;
+      int i;
+
+      assert(current_function);
+
+      ir->get_value()->accept(this);
+      st_src_reg r = this->result;
+
+      l = st_dst_reg(current_function->return_reg);
+
+      for (i = 0; i < type_size(current_function->sig->return_type); i++) {
+         emit(ir, TGSI_OPCODE_MOV, l, r);
+         l.index++;
+         r.index++;
+      }
+   }
+
+   emit(ir, TGSI_OPCODE_RET);
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_discard *ir)
+{
+   struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
+
+   if (ir->condition) {
+      ir->condition->accept(this);
+      this->result.negate = ~this->result.negate;
+      emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
+   } else {
+      emit(ir, TGSI_OPCODE_KILP);
+   }
+
+   fp->UsesKill = GL_TRUE;
+}
+
+void
+glsl_to_tgsi_visitor::visit(ir_if *ir)
+{
+   glsl_to_tgsi_instruction *cond_inst, *if_inst;
+   glsl_to_tgsi_instruction *prev_inst;
+
+   prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+   ir->condition->accept(this);
+   assert(this->result.file != PROGRAM_UNDEFINED);
+
+   if (this->options->EmitCondCodes) {
+      cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+
+      /* See if we actually generated any instruction for generating
+       * the condition.  If not, then cook up a move to a temp so we
+       * have something to set cond_update on.
+       */
+      if (cond_inst == prev_inst) {
+         st_src_reg temp = get_temp(glsl_type::bool_type);
+         cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
+      }
+      cond_inst->cond_update = GL_TRUE;
+
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF);
+      if_inst->dst.cond_mask = COND_NE;
+   } else {
+      if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
+   }
+
+   this->instructions.push_tail(if_inst);
+
+   visit_exec_list(&ir->then_instructions, this);
+
+   if (!ir->else_instructions.is_empty()) {
+      emit(ir->condition, TGSI_OPCODE_ELSE);
+      visit_exec_list(&ir->else_instructions, this);
+   }
+
+   if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
+}
+
+glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
+{
+   result.file = PROGRAM_UNDEFINED;
+   next_temp = 1;
+   next_signature_id = 1;
+   num_immediates = 0;
+   current_function = NULL;
+   num_address_regs = 0;
+   indirect_addr_temps = false;
+   indirect_addr_consts = false;
+   mem_ctx = ralloc_context(NULL);
+}
+
+glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
+{
+   ralloc_free(mem_ctx);
+}
+
+extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
+{
+   delete v;
+}
+
+
+/**
+ * Count resources used by the given gpu program (number of texture
+ * samplers, etc).
+ */
+static void
+count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
+{
+   v->samplers_used = 0;
+
+   foreach_iter(exec_list_iterator, iter, v->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      if (is_tex_instruction(inst->op)) {
+         v->samplers_used |= 1 << inst->sampler;
+
+         prog->SamplerTargets[inst->sampler] =
+            (gl_texture_index)inst->tex_target;
+         if (inst->tex_shadow) {
+            prog->ShadowSamplers |= 1 << inst->sampler;
+         }
+      }
+   }
+   
+   prog->SamplersUsed = v->samplers_used;
+   _mesa_update_shader_textures_used(prog);
+}
+
+
+/**
+ * Check if the given vertex/fragment/shader program is within the
+ * resource limits of the context (number of texture units, etc).
+ * If any of those checks fail, record a linker error.
+ *
+ * XXX more checks are needed...
+ */
+static void
+check_resources(const struct gl_context *ctx,
+                struct gl_shader_program *shader_program,
+                glsl_to_tgsi_visitor *prog,
+                struct gl_program *proginfo)
+{
+   switch (proginfo->Target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxVertexTextureImageUnits) {
+         fail_link(shader_program, "Too many vertex shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many vertex shader constants");
+      }
+      break;
+   case MESA_GEOMETRY_PROGRAM:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxGeometryTextureImageUnits) {
+         fail_link(shader_program, "Too many geometry shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters >
+          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
+         fail_link(shader_program, "Too many geometry shader constants");
+      }
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      if (_mesa_bitcount(prog->samplers_used) >
+          ctx->Const.MaxTextureImageUnits) {
+         fail_link(shader_program, "Too many fragment shader texture samplers");
+      }
+      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
+         fail_link(shader_program, "Too many fragment shader constants");
+      }
+      break;
+   default:
+      _mesa_problem(ctx, "unexpected program type in check_resources()");
+   }
+}
+
+
+
+struct uniform_sort {
+   struct gl_uniform *u;
+   int pos;
+};
+
+/* The shader_program->Uniforms list is almost sorted in increasing
+ * uniform->{Frag,Vert}Pos locations, but not quite when there are
+ * uniforms shared between targets.  We need to add parameters in
+ * increasing order for the targets.
+ */
+static int
+sort_uniforms(const void *a, const void *b)
+{
+   struct uniform_sort *u1 = (struct uniform_sort *)a;
+   struct uniform_sort *u2 = (struct uniform_sort *)b;
+
+   return u1->pos - u2->pos;
+}
+
+/* Add the uniforms to the parameters.  The linker chose locations
+ * in our parameters lists (which weren't created yet), which the
+ * uniforms code will use to poke values into our parameters list
+ * when uniforms are updated.
+ */
+static void
+add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
+        			struct gl_shader *shader,
+        			struct gl_program *prog)
+{
+   unsigned int i;
+   unsigned int next_sampler = 0, num_uniforms = 0;
+   struct uniform_sort *sorted_uniforms;
+
+   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
+        			  shader_program->Uniforms->NumUniforms);
+
+   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
+      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
+      int parameter_index = -1;
+
+      switch (shader->Type) {
+      case GL_VERTEX_SHADER:
+         parameter_index = uniform->VertPos;
+         break;
+      case GL_FRAGMENT_SHADER:
+         parameter_index = uniform->FragPos;
+         break;
+      case GL_GEOMETRY_SHADER:
+         parameter_index = uniform->GeomPos;
+         break;
+      }
+
+      /* Only add uniforms used in our target. */
+      if (parameter_index != -1) {
+         sorted_uniforms[num_uniforms].pos = parameter_index;
+         sorted_uniforms[num_uniforms].u = uniform;
+         num_uniforms++;
+      }
+   }
+
+   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
+         sort_uniforms);
+
+   for (i = 0; i < num_uniforms; i++) {
+      struct gl_uniform *uniform = sorted_uniforms[i].u;
+      int parameter_index = sorted_uniforms[i].pos;
+      const glsl_type *type = uniform->Type;
+      unsigned int size;
+
+      if (type->is_vector() ||
+          type->is_scalar()) {
+         size = type->vector_elements;
+      } else {
+         size = type_size(type) * 4;
+      }
+
+      gl_register_file file;
+      if (type->is_sampler() ||
+          (type->is_array() && type->fields.array->is_sampler())) {
+         file = PROGRAM_SAMPLER;
+      } else {
+         file = PROGRAM_UNIFORM;
+      }
+
+      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
+        					 uniform->Name);
+
+      if (index < 0) {
+         index = _mesa_add_parameter(prog->Parameters, file,
+        			     uniform->Name, size, type->gl_type,
+        			     NULL, NULL, 0x0);
+
+         /* Sampler uniform values are stored in prog->SamplerUnits,
+          * and the entry in that array is selected by this index we
+          * store in ParameterValues[].
+          */
+         if (file == PROGRAM_SAMPLER) {
+            for (unsigned int j = 0; j < size / 4; j++)
+               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
+         }
+
+         /* The location chosen in the Parameters list here (returned
+          * from _mesa_add_uniform) has to match what the linker chose.
+          */
+         if (index != parameter_index) {
+            fail_link(shader_program, "Allocation of uniform `%s' to target "
+        	      "failed (%d vs %d)\n",
+        	      uniform->Name, index, parameter_index);
+         }
+      }
+   }
+
+   ralloc_free(sorted_uniforms);
+}
+
+static void
+set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
+        		struct gl_shader_program *shader_program,
+        		const char *name, const glsl_type *type,
+        		ir_constant *val)
+{
+   if (type->is_record()) {
+      ir_constant *field_constant;
+
+      field_constant = (ir_constant *)val->components.get_head();
+
+      for (unsigned int i = 0; i < type->length; i++) {
+         const glsl_type *field_type = type->fields.structure[i].type;
+         const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
+        				    type->fields.structure[i].name);
+         set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
+        			 field_type, field_constant);
+         field_constant = (ir_constant *)field_constant->next;
+      }
+      return;
+   }
+
+   int loc = _mesa_get_uniform_location(ctx, shader_program, name);
+
+   if (loc == -1) {
+      fail_link(shader_program,
+        	"Couldn't find uniform for initializer %s\n", name);
+      return;
+   }
+
+   for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
+      ir_constant *element;
+      const glsl_type *element_type;
+      if (type->is_array()) {
+         element = val->array_elements[i];
+         element_type = type->fields.array;
+      } else {
+         element = val;
+         element_type = type;
+      }
+
+      void *values;
+
+      if (element_type->base_type == GLSL_TYPE_BOOL) {
+         int *conv = ralloc_array(mem_ctx, int, element_type->components());
+         for (unsigned int j = 0; j < element_type->components(); j++) {
+            conv[j] = element->value.b[j];
+         }
+         values = (void *)conv;
+         element_type = glsl_type::get_instance(GLSL_TYPE_INT,
+        					element_type->vector_elements,
+        					1);
+      } else {
+         values = &element->value;
+      }
+
+      if (element_type->is_matrix()) {
+         _mesa_uniform_matrix(ctx, shader_program,
+        		      element_type->matrix_columns,
+        		      element_type->vector_elements,
+        		      loc, 1, GL_FALSE, (GLfloat *)values);
+         loc += element_type->matrix_columns;
+      } else {
+         _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
+        	       values, element_type->gl_type);
+         loc += type_size(element_type);
+      }
+   }
+}
+
+static void
+set_uniform_initializers(struct gl_context *ctx,
+        		 struct gl_shader_program *shader_program)
+{
+   void *mem_ctx = NULL;
+
+   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_shader *shader = shader_program->_LinkedShaders[i];
+
+      if (shader == NULL)
+         continue;
+
+      foreach_iter(exec_list_iterator, iter, *shader->ir) {
+         ir_instruction *ir = (ir_instruction *)iter.get();
+         ir_variable *var = ir->as_variable();
+
+         if (!var || var->mode != ir_var_uniform || !var->constant_value)
+            continue;
+
+         if (!mem_ctx)
+            mem_ctx = ralloc_context(NULL);
+
+         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
+        			 var->type, var->constant_value);
+      }
+   }
+
+   ralloc_free(mem_ctx);
+}
+
+/*
+ * Scan/rewrite program to remove reads of custom (output) registers.
+ * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
+ * (for vertex shaders).
+ * In GLSL shaders, varying vars can be read and written.
+ * On some hardware, trying to read an output register causes trouble.
+ * So, rewrite the program to use a temporary register in this case.
+ * 
+ * Based on _mesa_remove_output_reads from programopt.c.
+ */
+void
+glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
+{
+   GLuint i;
+   GLint outputMap[VERT_RESULT_MAX];
+   GLint outputTypes[VERT_RESULT_MAX];
+   GLuint numVaryingReads = 0;
+   GLboolean usedTemps[MAX_TEMPS];
+   GLuint firstTemp = 0;
+
+   _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
+                             usedTemps, MAX_TEMPS);
+
+   assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
+   assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      outputMap[i] = -1;
+
+   /* look for instructions which read from varying vars */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      const GLuint numSrc = num_inst_src_regs(inst->op);
+      GLuint j;
+      for (j = 0; j < numSrc; j++) {
+         if (inst->src[j].file == type) {
+            /* replace the read with a temp reg */
+            const GLuint var = inst->src[j].index;
+            if (outputMap[var] == -1) {
+               numVaryingReads++;
+               outputMap[var] = _mesa_find_free_register(usedTemps,
+                                                         MAX_TEMPS,
+                                                         firstTemp);
+               outputTypes[var] = inst->src[j].type;
+               firstTemp = outputMap[var] + 1;
+            }
+            inst->src[j].file = PROGRAM_TEMPORARY;
+            inst->src[j].index = outputMap[var];
+         }
+      }
+   }
+
+   if (numVaryingReads == 0)
+      return; /* nothing to be done */
+
+   /* look for instructions which write to the varying vars identified above */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
+         /* change inst to write to the temp reg, instead of the varying */
+         inst->dst.file = PROGRAM_TEMPORARY;
+         inst->dst.index = outputMap[inst->dst.index];
+      }
+   }
+   
+   /* insert new MOV instructions at the end */
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (outputMap[i] >= 0) {
+         /* MOV VAR[i], TEMP[tmp]; */
+         st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
+         st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
+         dst.index = i;
+         this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
+      }
+   }
+}
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+   int read_mask = 0, comp;
+
+   /* Now, given the src swizzle and the written channels, find which
+    * components are actually read
+    */
+   for (comp = 0; comp < 4; ++comp) {
+      const unsigned coord = GET_SWZ(src.swizzle, comp);
+      ASSERT(coord < 4);
+      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+         read_mask |= 1 << coord;
+   }
+
+   return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0.  There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * 	MOV T0, T2;
+ * else
+ * 	MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program.  If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+   unsigned tempWrites[MAX_TEMPS];
+   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(outputWrites, 0, sizeof(outputWrites));
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned prevWriteMask = 0;
+
+      /* Give up if we encounter relative addressing or flow control. */
+      if (inst->dst.reladdr ||
+          tgsi_get_opcode_info(inst->op)->is_branch ||
+          inst->op == TGSI_OPCODE_BGNSUB ||
+          inst->op == TGSI_OPCODE_CONT ||
+          inst->op == TGSI_OPCODE_END ||
+          inst->op == TGSI_OPCODE_ENDSUB ||
+          inst->op == TGSI_OPCODE_RET) {
+         return;
+      }
+
+      if (inst->dst.file == PROGRAM_OUTPUT) {
+         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst.index];
+         outputWrites[inst->dst.index] |= inst->dst.writemask;
+      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+         assert(inst->dst.index < MAX_TEMPS);
+         prevWriteMask = tempWrites[inst->dst.index];
+         tempWrites[inst->dst.index] |= inst->dst.writemask;
+      }
+
+      /* For a CMP to be considered a conditional write, the destination
+       * register and source register two must be the same. */
+      if (inst->op == TGSI_OPCODE_CMP
+          && !(inst->dst.writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst.file
+          && inst->src[2].index == inst->dst.index
+          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+         inst->op = TGSI_OPCODE_MOV;
+         inst->src[0] = inst->src[1];
+      }
+   }
+}
+
+/* Replaces all references to a temporary register index with another index. */
+void
+glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
+{
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned j;
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            inst->src[j].index = new_index;
+         }
+      }
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         inst->dst.index = new_index;
+      }
+   }
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            return (depth == 0) ? i : loop_start;
+         }
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_first_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int loop_start = -1; /* index of the first active BGNLOOP (if any) */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
+         return (depth == 0) ? i : loop_start;
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP) {
+         if(depth++ == 0)
+            loop_start = i;
+      } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
+         if (--depth == 0)
+            loop_start = -1;
+      }
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   return -1;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_read(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that reads the temporary */
+   unsigned i = 0, j;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      for (j=0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+             inst->src[j].index == index) {
+            last = (depth == 0) ? i : -2;
+         }
+      }
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+int
+glsl_to_tgsi_visitor::get_last_temp_write(int index)
+{
+   int depth = 0; /* loop depth */
+   int last = -1; /* index of last instruction that writes to the temporary */
+   int i = 0;
+   
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
+         last = (depth == 0) ? i : -2;
+      
+      if (inst->op == TGSI_OPCODE_BGNLOOP)
+         depth++;
+      else if (inst->op == TGSI_OPCODE_ENDLOOP)
+         if (--depth == 0 && last == -2)
+            last = i;
+      assert(depth >= 0);
+      
+      i++;
+   }
+   
+   assert(last >= -1);
+   return last;
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY register
+ * channels for copy propagation and updates following instructions to
+ * use the original versions.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
+ *
+ * and after:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * which allows for dead code elimination on TEMP[1]'s writes.
+ */
+void
+glsl_to_tgsi_visitor::copy_propagate(void)
+{
+   glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
+        					    glsl_to_tgsi_instruction *,
+        					    this->next_temp * 4);
+   int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+
+      /* First, do any copy propagation possible into the src regs. */
+      for (int r = 0; r < 3; r++) {
+         glsl_to_tgsi_instruction *first = NULL;
+         bool good = true;
+         int acp_base = inst->src[r].index * 4;
+
+         if (inst->src[r].file != PROGRAM_TEMPORARY ||
+             inst->src[r].reladdr)
+            continue;
+
+         /* See if we can find entries in the ACP consisting of MOVs
+          * from the same src register for all the swizzled channels
+          * of this src register reference.
+          */
+         for (int i = 0; i < 4; i++) {
+            int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+            glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
+
+            if (!copy_chan) {
+               good = false;
+               break;
+            }
+
+            assert(acp_level[acp_base + src_chan] <= level);
+
+            if (!first) {
+               first = copy_chan;
+            } else {
+               if (first->src[0].file != copy_chan->src[0].file ||
+        	   first->src[0].index != copy_chan->src[0].index) {
+        	  good = false;
+        	  break;
+               }
+            }
+         }
+
+         if (good) {
+            /* We've now validated that we can copy-propagate to
+             * replace this src register reference.  Do it.
+             */
+            inst->src[r].file = first->src[0].file;
+            inst->src[r].index = first->src[0].index;
+
+            int swizzle = 0;
+            for (int i = 0; i < 4; i++) {
+               int src_chan = GET_SWZ(inst->src[r].swizzle, i);
+               glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
+               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
+        		   (3 * i));
+            }
+            inst->src[r].swizzle = swizzle;
+         }
+      }
+
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the ACP entirely. */
+         memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the block from the ACP, but
+          * leaving those that were not touched.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!acp[4 * r + c])
+        	  continue;
+
+               if (acp_level[4 * r + c] >= level)
+        	  acp[4 * r + c] = NULL;
+            }
+         }
+         if (inst->op == TGSI_OPCODE_ENDIF)
+            --level;
+         break;
+
+      default:
+         /* Continuing the block, clear any written channels from
+          * the ACP.
+          */
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
+            /* Any temporary might be written, so no copy propagation
+             * across this instruction.
+             */
+            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+         } else if (inst->dst.file == PROGRAM_OUTPUT &&
+        	    inst->dst.reladdr) {
+            /* Any output might be written, so no copy propagation
+             * from outputs across this instruction.
+             */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+        	     acp[4 * r + c] = NULL;
+               }
+            }
+         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
+        	    inst->dst.file == PROGRAM_OUTPUT) {
+            /* Clear where it's used as dst. */
+            if (inst->dst.file == PROGRAM_TEMPORARY) {
+               for (int c = 0; c < 4; c++) {
+        	  if (inst->dst.writemask & (1 << c)) {
+        	     acp[4 * inst->dst.index + c] = NULL;
+        	  }
+               }
+            }
+
+            /* Clear where it's used as src. */
+            for (int r = 0; r < this->next_temp; r++) {
+               for (int c = 0; c < 4; c++) {
+        	  if (!acp[4 * r + c])
+        	     continue;
+
+        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
+        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
+        	      inst->dst.writemask & (1 << src_chan))
+        	  {
+        	     acp[4 * r + c] = NULL;
+        	  }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this is a copy, add it to the ACP. */
+      if (inst->op == TGSI_OPCODE_MOV &&
+          inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate &&
+          !inst->src[0].reladdr &&
+          !inst->src[0].negate) {
+         for (int i = 0; i < 4; i++) {
+            if (inst->dst.writemask & (1 << i)) {
+               acp[4 * inst->dst.index + i] = inst;
+               acp_level[4 * inst->dst.index + i] = level;
+            }
+         }
+      }
+   }
+
+   ralloc_free(acp_level);
+   ralloc_free(acp);
+}
+
+/*
+ * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.  As an example, a TXP production after copy propagation but 
+ * before this pass:
+ *
+ * 0: MOV TEMP[1], INPUT[4].xyyy;
+ * 1: MOV TEMP[1].w, INPUT[4].wwww;
+ * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ *
+ * and after this pass:
+ *
+ * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
+ * 
+ * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
+ * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
+ */
+void
+glsl_to_tgsi_visitor::eliminate_dead_code(void)
+{
+   int i;
+   
+   for (i=0; i < this->next_temp; i++) {
+      int last_read = get_last_temp_read(i);
+      int j = 0;
+      
+      foreach_iter(exec_list_iterator, iter, this->instructions) {
+         glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
+             j > last_read)
+         {
+            iter.remove();
+            delete inst;
+         }
+         
+         j++;
+      }
+   }
+}
+
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.  This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code.  However, there is some dead code that can be eliminated by 
+ * eliminate_dead_code() but not this function - for example, this function 
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+                                                     glsl_to_tgsi_instruction *,
+                                                     this->next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+   int removed = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+      
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the write array entirely.
+          * FIXME: This keeps us from killing dead code when the writes are
+          * on either side of a loop, even when the register isn't touched
+          * inside the loop.
+          */
+         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+         --level;
+         break;
+
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the preceding if block from the
+          * write array, but leave those that were not touched.
+          *
+          * FIXME: This destroys opportunities to remove dead code inside of
+          * IF blocks that are followed by an ELSE block.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!writes[4 * r + c])
+        	         continue;
+
+               if (write_level[4 * r + c] >= level)
+        	         writes[4 * r + c] = NULL;
+            }
+         }
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         /* fallthrough to default case to mark the condition as read */
+      
+      default:
+         /* Continuing the block, clear any channels from the write array that
+          * are read by this instruction.
+          */
+         for (int i = 0; i < 4; i++) {
+            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+               /* Any temporary might be read, so no dead code elimination 
+                * across this instruction.
+                */
+               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+               /* Clear where it's used as src. */
+               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+               
+               for (int c = 0; c < 4; c++) {
+              	   if (src_chans & (1 << c)) {
+              	      writes[4 * inst->src[i].index + c] = NULL;
+              	   }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this instruction writes to a temporary, add it to the write array.
+       * If there is already an instruction in the write array for one or more
+       * of the channels, flag that channel write as dead.
+       */
+      if (inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate) {
+         for (int c = 0; c < 4; c++) {
+            if (inst->dst.writemask & (1 << c)) {
+               if (writes[4 * inst->dst.index + c]) {
+                  if (write_level[4 * inst->dst.index + c] < level)
+                     continue;
+                  else
+                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+               }
+               writes[4 * inst->dst.index + c] = inst;
+               write_level[4 * inst->dst.index + c] = level;
+            }
+         }
+      }
+   }
+
+   /* Anything still in the write array at this point is dead code. */
+   for (int r = 0; r < this->next_temp; r++) {
+      for (int c = 0; c < 4; c++) {
+         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+         if (inst)
+            inst->dead_mask |= (1 << c);
+      }
+   }
+
+   /* Now actually remove the instructions that are completely dead and update
+    * the writemask of other instructions with dead channels.
+    */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (!inst->dead_mask || !inst->dst.writemask)
+         continue;
+      else if (inst->dead_mask == inst->dst.writemask) {
+         iter.remove();
+         delete inst;
+         removed++;
+      } else
+         inst->dst.writemask &= ~(inst->dead_mask);
+   }
+
+   ralloc_free(write_level);
+   ralloc_free(writes);
+   
+   return removed;
+}
+
+/* Merges temporary registers together where possible to reduce the number of 
+ * registers needed to run a program.
+ * 
+ * Produces optimal code only after copy propagation and dead code elimination 
+ * have been run. */
+void
+glsl_to_tgsi_visitor::merge_registers(void)
+{
+   int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
+   int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
+   int i, j;
+   
+   /* Read the indices of the last read and first write to each temp register
+    * into an array so that we don't have to traverse the instruction list as 
+    * much. */
+   for (i=0; i < this->next_temp; i++) {
+      last_reads[i] = get_last_temp_read(i);
+      first_writes[i] = get_first_temp_write(i);
+   }
+   
+   /* Start looking for registers with non-overlapping usages that can be 
+    * merged together. */
+   for (i=0; i < this->next_temp; i++) {
+      /* Don't touch unused registers. */
+      if (last_reads[i] < 0 || first_writes[i] < 0) continue;
+      
+      for (j=0; j < this->next_temp; j++) {
+         /* Don't touch unused registers. */
+         if (last_reads[j] < 0 || first_writes[j] < 0) continue;
+         
+         /* We can merge the two registers if the first write to j is after or 
+          * in the same instruction as the last read from i.  Note that the 
+          * register at index i will always be used earlier or at the same time 
+          * as the register at index j. */
+         if (first_writes[i] <= first_writes[j] && 
+             last_reads[i] <= first_writes[j])
+         {
+            rename_temp_register(j, i); /* Replace all references to j with i.*/
+            
+            /* Update the first_writes and last_reads arrays with the new 
+             * values for the merged register index, and mark the newly unused 
+             * register index as such. */
+            last_reads[i] = last_reads[j];
+            first_writes[j] = -1;
+            last_reads[j] = -1;
+         }
+      }
+   }
+   
+   ralloc_free(last_reads);
+   ralloc_free(first_writes);
+}
+
+/* Reassign indices to temporary registers by reusing unused indices created 
+ * by optimization passes. */
+void
+glsl_to_tgsi_visitor::renumber_registers(void)
+{
+   int i = 0;
+   int new_index = 0;
+   
+   for (i=0; i < this->next_temp; i++) {
+      if (get_first_temp_read(i) < 0) continue;
+      if (i != new_index)
+         rename_temp_register(i, new_index);
+      new_index++;
+   }
+   
+   this->next_temp = new_index;
+}
+
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                           glsl_to_tgsi_visitor *original,
+                           int scale_and_bias, int pixel_maps)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /*
+    * Get initial pixel color from the texture.
+    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+    */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = 0;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+   v->samplers_used |= (1 << 0);
+
+   if (scale_and_bias) {
+      static const gl_state_index scale_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_SCALE,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      static const gl_state_index bias_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_BIAS,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      GLint scale_p, bias_p;
+      st_src_reg scale, bias;
+
+      scale_p = _mesa_add_state_reference(params, scale_state);
+      bias_p = _mesa_add_state_reference(params, bias_state);
+
+      /* MAD colorTemp, colorTemp, scale, bias; */
+      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+   }
+
+   if (pixel_maps) {
+      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+
+      assert(st->pixel_xfer.pixelmap_texture);
+
+      /* With a little effort, we can do four pixel map look-ups with
+       * two TEX instructions:
+       */
+
+      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+      v->samplers_used |= (1 << 1);
+
+      /* MOV colorTemp, temp; */
+      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+   }
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT &&
+             src_regs[i].index == FRAG_ATTRIB_COL0)
+         {
+            src_regs[i].file = PROGRAM_TEMPORARY;
+            src_regs[i].index = src0.index;
+         }
+         else if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_combine_parameter_lists(params,
+                                                    original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   _mesa_free_parameter_list(params);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+                   glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = samplerIndex;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+   v->samplers_used |= (1 << samplerIndex);
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   src0.negate = NEGATE_XYZW;
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      src0.swizzle = SWIZZLE_XXXX;
+   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
+   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
+/* ------------------------- TGSI conversion stuff -------------------------- */
+struct label {
+   unsigned branch_target;
+   unsigned token;
+};
+
+/**
+ * Intermediate state used during shader translation.
+ */
+struct st_translate {
+   struct ureg_program *ureg;
+
+   struct ureg_dst temps[MAX_TEMPS];
+   struct ureg_src *constants;
+   struct ureg_src *immediates;
+   struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_dst address[1];
+   struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+   struct ureg_src systemValues[SYSTEM_VALUE_MAX];
+
+   /* Extra info for handling point size clamping in vertex shader */
+   struct ureg_dst pointSizeResult; /**< Actual point size output register */
+   struct ureg_src pointSizeConst;  /**< Point size range constant register */
+   GLint pointSizeOutIndex;         /**< Temp point size output register */
+   GLboolean prevInstWrotePointSize;
+
+   const GLuint *inputMapping;
+   const GLuint *outputMapping;
+
+   /* For every instruction that contains a label (eg CALL), keep
+    * details so that we can go back afterwards and emit the correct
+    * tgsi instruction number for each label.
+    */
+   struct label *labels;
+   unsigned labels_size;
+   unsigned labels_count;
+
+   /* Keep a record of the tgsi instruction number that each mesa
+    * instruction starts at, will be used to fix up labels after
+    * translation.
+    */
+   unsigned *insn;
+   unsigned insn_size;
+   unsigned insn_count;
+
+   unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
+
+   boolean error;
+};
+
+/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
+static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
+   TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_INSTANCEID
+};
+
+/**
+ * Make note of a branch to a label in the TGSI code.
+ * After we've emitted all instructions, we'll go over the list
+ * of labels built here and patch the TGSI code with the actual
+ * location of each label.
+ */
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
+{
+   unsigned i;
+
+   if (t->labels_count + 1 >= t->labels_size) {
+      t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
+      t->labels = (struct label *)realloc(t->labels, 
+                                          t->labels_size * sizeof(struct label));
+      if (t->labels == NULL) {
+         static unsigned dummy;
+         t->error = TRUE;
+         return &dummy;
+      }
+   }
+
+   i = t->labels_count++;
+   t->labels[i].branch_target = branch_target;
+   return &t->labels[i].token;
+}
+
+/**
+ * Called prior to emitting the TGSI code for each instruction.
+ * Allocate additional space for instructions if needed.
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
+ * the next TGSI instruction.
+ */
+static void set_insn_start(struct st_translate *t, unsigned start)
+{
+   if (t->insn_count + 1 >= t->insn_size) {
+      t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
+      if (t->insn == NULL) {
+         t->error = TRUE;
+         return;
+      }
+   }
+
+   t->insn[t->insn_count++] = start;
+}
+
+/**
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate(struct st_translate *t,
+               gl_constant_value values[4],
+               int type, int size)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   switch(type)
+   {
+   case GL_FLOAT:
+      return ureg_DECL_immediate(ureg, &values[0].f, size);
+   case GL_INT:
+      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
+   case GL_UNSIGNED_INT:
+   case GL_BOOL:
+      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
+   default:
+      assert(!"should not get here - type must be float, int, uint, or bool");
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
+ */
+static struct ureg_dst
+dst_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
+{
+   switch(file) {
+   case PROGRAM_UNDEFINED:
+      return ureg_dst_undef();
+
+   case PROGRAM_TEMPORARY:
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
+
+      return t->temps[index];
+
+   case PROGRAM_OUTPUT:
+      if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
+         t->prevInstWrotePointSize = GL_TRUE;
+
+      if (t->procType == TGSI_PROCESSOR_VERTEX)
+         assert(index < VERT_RESULT_MAX);
+      else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
+         assert(index < FRAG_RESULT_MAX);
+      else
+         assert(index < GEOM_RESULT_MAX);
+
+      assert(t->outputMapping[index] < Elements(t->outputs));
+
+      return t->outputs[t->outputMapping[index]];
+
+   case PROGRAM_ADDRESS:
+      return t->address[index];
+
+   default:
+      assert(!"unknown dst register file");
+      return ureg_dst_undef();
+   }
+}
+
+/**
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
+ */
+static struct ureg_src
+src_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
+{
+   switch(file) {
+   case PROGRAM_UNDEFINED:
+      return ureg_src_undef();
+
+   case PROGRAM_TEMPORARY:
+      assert(index >= 0);
+      assert(index < Elements(t->temps));
+      if (ureg_dst_is_undef(t->temps[index]))
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
+      return ureg_src(t->temps[index]);
+
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_UNIFORM:
+      assert(index >= 0);
+      return t->constants[index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:       /* ie, immediate */
+      if (index < 0)
+         return ureg_DECL_constant(t->ureg, 0);
+      else
+         return t->constants[index];
+
+   case PROGRAM_IMMEDIATE:
+      return t->immediates[index];
+
+   case PROGRAM_INPUT:
+      assert(t->inputMapping[index] < Elements(t->inputs));
+      return t->inputs[t->inputMapping[index]];
+
+   case PROGRAM_OUTPUT:
+      assert(t->outputMapping[index] < Elements(t->outputs));
+      return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
+
+   case PROGRAM_ADDRESS:
+      return ureg_src(t->address[index]);
+
+   case PROGRAM_SYSTEM_VALUE:
+      assert(index < Elements(t->systemValues));
+      return t->systemValues[index];
+
+   default:
+      assert(!"unknown src register file");
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Create a TGSI ureg_dst register from an st_dst_reg.
+ */
+static struct ureg_dst
+translate_dst(struct st_translate *t,
+              const st_dst_reg *dst_reg,
+              bool saturate)
+{
+   struct ureg_dst dst = dst_register(t, 
+                                      dst_reg->file,
+                                      dst_reg->index);
+
+   dst = ureg_writemask(dst, dst_reg->writemask);
+   
+   if (saturate)
+      dst = ureg_saturate(dst);
+
+   if (dst_reg->reladdr != NULL)
+      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
+
+   return dst;
+}
+
+/**
+ * Create a TGSI ureg_src register from an st_src_reg.
+ */
+static struct ureg_src
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
+{
+   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
+
+   src = ureg_swizzle(src,
+                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
+
+   if ((src_reg->negate & 0xf) == NEGATE_XYZW)
+      src = ureg_negate(src);
+
+   if (src_reg->reladdr != NULL) {
+      /* Normally ureg_src_indirect() would be used here, but a stupid compiler 
+       * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 
+       * set the bit for src.Negate.  So we have to do the operation manually
+       * here to work around the compiler's problems. */
+      /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
+      struct ureg_src addr = ureg_src(t->address[0]);
+      src.Indirect = 1;
+      src.IndirectFile = addr.File;
+      src.IndirectIndex = addr.Index;
+      src.IndirectSwizzle = addr.SwizzleX;
+      
+      if (src_reg->file != PROGRAM_INPUT &&
+          src_reg->file != PROGRAM_OUTPUT) {
+         /* If src_reg->index was negative, it was set to zero in
+          * src_register().  Reassign it now.  But don't do this
+          * for input/output regs since they get remapped while
+          * const buffers don't.
+          */
+         src.Index = src_reg->index;
+      }
+   }
+
+   return src;
+}
+
+static void
+compile_tgsi_instruction(struct st_translate *t,
+                         const struct glsl_to_tgsi_instruction *inst)
+{
+   struct ureg_program *ureg = t->ureg;
+   GLuint i;
+   struct ureg_dst dst[1];
+   struct ureg_src src[4];
+   unsigned num_dst;
+   unsigned num_src;
+
+   num_dst = num_inst_dst_regs(inst->op);
+   num_src = num_inst_src_regs(inst->op);
+
+   if (num_dst) 
+      dst[0] = translate_dst(t, 
+                             &inst->dst,
+                             inst->saturate);
+
+   for (i = 0; i < num_src; i++) 
+      src[i] = translate_src(t, &inst->src[i]);
+
+   switch(inst->op) {
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+   case TGSI_OPCODE_ELSE:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_IF:
+      assert(num_dst == 0);
+      ureg_label_insn(ureg,
+                      inst->op,
+                      src, num_src,
+                      get_label(t, 
+                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
+      return;
+
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXD:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXP:
+      src[num_src++] = t->samplers[inst->sampler];
+      ureg_tex_insn(ureg,
+                    inst->op,
+                    dst, num_dst, 
+                    translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    src, num_src);
+      return;
+
+   case TGSI_OPCODE_SCS:
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
+      break;
+
+   default:
+      ureg_insn(ureg,
+                inst->op,
+                dst, num_dst,
+                src, num_src);
+      break;
+   }
+}
+
+/**
+ * Emit the TGSI instructions to adjust the WPOS pixel center convention
+ * Basically, add (adjX, adjY) to the fragment position.
+ */
+static void
+emit_adjusted_wpos(struct st_translate *t,
+                   const struct gl_program *program,
+                   float adjX, float adjY)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* Note that we bias X and Y and pass Z and W through unchanged.
+    * The shader might also use gl_FragCoord.w and .z.
+    */
+   ureg_ADD(ureg, wpos_temp, wpos_input,
+            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
+
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * This code is unavoidable because it also depends on whether
+ * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
+ */
+static void
+emit_wpos_inversion(struct st_translate *t,
+                    const struct gl_program *program,
+                    bool invert)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   /* Fragment program uses fragment position input.
+    * Need to replace instances of INPUT[WPOS] with temp T
+    * where T = INPUT[WPOS] by y is inverted.
+    */
+   static const gl_state_index wposTransformState[STATE_LENGTH]
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 
+          (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+   
+   /* XXX: note we are modifying the incoming shader here!  Need to
+    * do this before emitting the constant decls below, or this
+    * will be missed:
+    */
+   unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
+                                                       wposTransformState);
+
+   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
+   struct ureg_dst wpos_temp;
+   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
+
+   /* MOV wpos_temp, input[wpos]
+    */
+   if (wpos_input.File == TGSI_FILE_TEMPORARY)
+      wpos_temp = ureg_dst(wpos_input);
+   else {
+      wpos_temp = ureg_DECL_temporary(ureg);
+      ureg_MOV(ureg, wpos_temp, wpos_input);
+   }
+
+   if (invert) {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
+       */
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
+   } else {
+      /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
+       */
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
+   }
+
+   /* Use wpos_temp as position input from here on:
+    */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
+}
+
+
+/**
+ * Emit fragment position/ooordinate code.
+ */
+static void
+emit_wpos(struct st_context *st,
+          struct st_translate *t,
+          const struct gl_program *program,
+          struct ureg_program *ureg)
+{
+   const struct gl_fragment_program *fp =
+      (const struct gl_fragment_program *) program;
+   struct pipe_screen *pscreen = st->pipe->screen;
+   boolean invert = FALSE;
+
+   if (fp->OriginUpperLeft) {
+      /* Fragment shader wants origin in upper-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
+         /* the driver supports upper-left origin */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
+         /* the driver supports lower-left origin, need to invert Y */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+         invert = TRUE;
+      }
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants origin in lower-left */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
+         /* the driver supports lower-left origin */
+         ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
+         /* the driver supports upper-left origin, need to invert Y */
+         invert = TRUE;
+      else
+         assert(0);
+   }
+   
+   if (fp->PixelCenterInteger) {
+      /* Fragment shader wants pixel center integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+         /* the driver supports pixel center integer */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+         /* the driver supports pixel center half integer, need to bias X,Y */
+         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+      else
+         assert(0);
+   }
+   else {
+      /* Fragment shader wants pixel center half integer */
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
+         /* the driver supports pixel center half integer */
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
+         /* the driver supports pixel center integer, need to bias X,Y */
+         ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
+      }
+      else
+         assert(0);
+   }
+
+   /* we invert after adjustment so that we avoid the MOV to temporary,
+    * and reuse the adjustment ADD instead */
+   emit_wpos_inversion(t, program, invert);
+}
+
+/**
+ * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
+ * TGSI uses +1 for front, -1 for back.
+ * This function converts the TGSI value to the GL value.  Simply clamping/
+ * saturating the value to [0,1] does the job.
+ */
+static void
+emit_face_var(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
+   struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
+
+   /* MOV_SAT face_temp, input[face] */
+   face_temp = ureg_saturate(face_temp);
+   ureg_MOV(ureg, face_temp, face_input);
+
+   /* Use face_temp as face input from here on: */
+   t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
+}
+
+static void
+emit_edgeflags(struct st_translate *t)
+{
+   struct ureg_program *ureg = t->ureg;
+   struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
+   struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
+
+   ureg_MOV(ureg, edge_dst, edge_src);
+}
+
+/**
+ * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
+ * \param program  the program to translate
+ * \param numInputs  number of input registers used
+ * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
+ *                      input indexes
+ * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
+ * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                            each input
+ * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
+ * \param numOutputs  number of output registers used
+ * \param outputMapping  maps Mesa fragment program outputs to TGSI
+ *                       generic outputs
+ * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
+ * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
+ *                             each output
+ *
+ * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
+ */
+extern "C" enum pipe_error
+st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags)
+{
+   struct st_translate translate, *t;
+   unsigned i;
+   enum pipe_error ret = PIPE_OK;
+
+   assert(numInputs <= Elements(t->inputs));
+   assert(numOutputs <= Elements(t->outputs));
+
+   t = &translate;
+   memset(t, 0, sizeof *t);
+
+   t->procType = procType;
+   t->inputMapping = inputMapping;
+   t->outputMapping = outputMapping;
+   t->ureg = ureg;
+   t->pointSizeOutIndex = -1;
+   t->prevInstWrotePointSize = GL_FALSE;
+
+   /*
+    * Declare input attributes.
+    */
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_fs_input(ureg,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i],
+                                           interpMode[i]);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_WPOS) {
+         /* Must do this after setting up t->inputs, and before
+          * emitting constant references, below:
+          */
+          emit_wpos(st_context(ctx), t, proginfo, ureg);
+      }
+
+      if (proginfo->InputsRead & FRAG_BIT_FACE)
+         emit_face_var(t);
+
+      /*
+       * Declare output attributes.
+       */
+      for (i = 0; i < numOutputs; i++) {
+         switch (outputSemanticName[i]) {
+         case TGSI_SEMANTIC_POSITION:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
+            break;
+         case TGSI_SEMANTIC_STENCIL:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
+            break;
+         case TGSI_SEMANTIC_COLOR:
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_COLOR,
+                                             outputSemanticIndex[i]);
+            break;
+         default:
+            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
+            return PIPE_ERROR_BAD_INPUT;
+         }
+      }
+   }
+   else if (procType == TGSI_PROCESSOR_GEOMETRY) {
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_gs_input(ureg,
+                                           i,
+                                           inputSemanticName[i],
+                                           inputSemanticIndex[i]);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
+      }
+   }
+   else {
+      assert(procType == TGSI_PROCESSOR_VERTEX);
+
+      for (i = 0; i < numInputs; i++) {
+         t->inputs[i] = ureg_DECL_vs_input(ureg, i);
+      }
+
+      for (i = 0; i < numOutputs; i++) {
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
+         if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
+            /* Writing to the point size result register requires special
+             * handling to implement clamping.
+             */
+            static const gl_state_index pointSizeClampState[STATE_LENGTH]
+               = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
+               /* XXX: note we are modifying the incoming shader here!  Need to
+               * do this before emitting the constant decls below, or this
+               * will be missed.
+               */
+            unsigned pointSizeClampConst =
+               _mesa_add_state_reference(proginfo->Parameters,
+                                         pointSizeClampState);
+            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
+            t->pointSizeResult = t->outputs[i];
+            t->pointSizeOutIndex = i;
+            t->outputs[i] = psizregtemp;
+         }
+      }
+      if (passthrough_edgeflags)
+         emit_edgeflags(t);
+   }
+
+   /* Declare address register.
+    */
+   if (program->num_address_regs > 0) {
+      assert(program->num_address_regs == 1);
+      t->address[0] = ureg_DECL_address(ureg);
+   }
+
+   /* Declare misc input registers
+    */
+   {
+      GLbitfield sysInputs = proginfo->SystemValuesRead;
+      unsigned numSys = 0;
+      for (i = 0; sysInputs; i++) {
+         if (sysInputs & (1 << i)) {
+            unsigned semName = mesa_sysval_to_semantic[i];
+            t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
+            numSys++;
+            sysInputs &= ~(1 << i);
+         }
+      }
+   }
+
+   if (program->indirect_addr_temps) {
+      /* If temps are accessed with indirect addressing, declare temporaries
+       * in sequential order.  Else, we declare them on demand elsewhere.
+       * (Note: the number of temporaries is equal to program->next_temp)
+       */
+      for (i = 0; i < (unsigned)program->next_temp; i++) {
+         /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
+         t->temps[i] = ureg_DECL_temporary(t->ureg);
+      }
+   }
+
+   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+    * so we put all the translated regs in t->constants.
+    */
+   if (proginfo->Parameters) {
+      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
+      if (t->constants == NULL) {
+         ret = PIPE_ERROR_OUT_OF_MEMORY;
+         goto out;
+      }
+
+      for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+         switch (proginfo->Parameters->Parameters[i].Type) {
+         case PROGRAM_ENV_PARAM:
+         case PROGRAM_LOCAL_PARAM:
+         case PROGRAM_STATE_VAR:
+         case PROGRAM_NAMED_PARAM:
+         case PROGRAM_UNIFORM:
+            t->constants[i] = ureg_DECL_constant(ureg, i);
+            break;
+
+         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+          * addressing of the const buffer.
+          * FIXME: Be smarter and recognize param arrays:
+          * indirect addressing is only valid within the referenced
+          * array.
+          */
+         case PROGRAM_CONSTANT:
+            if (program->indirect_addr_consts)
+               t->constants[i] = ureg_DECL_constant(ureg, i);
+            else
+               t->constants[i] = emit_immediate(t,
+                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->Parameters[i].DataType,
+                                                4);
+            break;
+         default:
+            break;
+         }
+      }
+   }
+   
+   /* Emit immediate values.
+    */
+   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
+   if (t->immediates == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, program->immediates) {
+      immediate_storage *imm = (immediate_storage *)iter.get();
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+   }
+
+   /* texture samplers */
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (program->samplers_used & (1 << i)) {
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
+      }
+   }
+
+   /* Emit each instruction in turn:
+    */
+   foreach_iter(exec_list_iterator, iter, program->instructions) {
+      set_insn_start(t, ureg_get_instruction_number(ureg));
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
+
+      if (t->prevInstWrotePointSize && proginfo->Id) {
+         /* The previous instruction wrote to the (fake) vertex point size
+          * result register.  Now we need to clamp that value to the min/max
+          * point size range, putting the result into the real point size
+          * register.
+          * Note that we can't do this easily at the end of program due to
+          * possible early return.
+          */
+         set_insn_start(t, ureg_get_instruction_number(ureg));
+         ureg_MAX(t->ureg,
+                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+      }
+      t->prevInstWrotePointSize = GL_FALSE;
+   }
+
+   /* Fix up all emitted labels:
+    */
+   for (i = 0; i < t->labels_count; i++) {
+      ureg_fixup_label(ureg, t->labels[i].token,
+                       t->insn[t->labels[i].branch_target]);
+   }
+
+out:
+   FREE(t->insn);
+   FREE(t->labels);
+   FREE(t->constants);
+   FREE(t->immediates);
+
+   if (t->error) {
+      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+   }
+
+   return ret;
+}
+/* ----------------------------- End TGSI code ------------------------------ */
+
+/**
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without 
+ * generating Mesa IR.
+ */
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
+        	 struct gl_shader *shader)
+{
+   glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
+   struct gl_program *prog;
+   GLenum target;
+   const char *target_string;
+   bool progress;
+   struct gl_shader_compiler_options *options =
+         &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
+
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      target = GL_VERTEX_PROGRAM_ARB;
+      target_string = "vertex";
+      break;
+   case GL_FRAGMENT_SHADER:
+      target = GL_FRAGMENT_PROGRAM_ARB;
+      target_string = "fragment";
+      break;
+   case GL_GEOMETRY_SHADER:
+      target = GL_GEOMETRY_PROGRAM_NV;
+      target_string = "geometry";
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   validate_ir_tree(shader->ir);
+
+   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+   if (!prog)
+      return NULL;
+   prog->Parameters = _mesa_new_parameter_list();
+   prog->Varying = _mesa_new_parameter_list();
+   prog->Attributes = _mesa_new_parameter_list();
+   v->ctx = ctx;
+   v->prog = prog;
+   v->shader_program = shader_program;
+   v->options = options;
+   v->glsl_version = ctx->Const.GLSLVersion;
+
+   add_uniforms_to_parameters_list(shader_program, shader, prog);
+
+   /* Emit intermediate IR for main(). */
+   visit_exec_list(shader->ir, v);
+
+   /* Now emit bodies for any functions that were used. */
+   do {
+      progress = GL_FALSE;
+
+      foreach_iter(exec_list_iterator, iter, v->function_signatures) {
+         function_entry *entry = (function_entry *)iter.get();
+
+         if (!entry->bgn_inst) {
+            v->current_function = entry;
+
+            entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
+            entry->bgn_inst->function = entry;
+
+            visit_exec_list(&entry->sig->body, v);
+
+            glsl_to_tgsi_instruction *last;
+            last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
+            if (last->op != TGSI_OPCODE_RET)
+               v->emit(NULL, TGSI_OPCODE_RET);
+
+            glsl_to_tgsi_instruction *end;
+            end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
+            end->function = entry;
+
+            progress = GL_TRUE;
+         }
+      }
+   } while (progress);
+
+#if 0
+   /* Print out some information (for debugging purposes) used by the 
+    * optimization passes. */
+   for (i=0; i < v->next_temp; i++) {
+      int fr = v->get_first_temp_read(i);
+      int fw = v->get_first_temp_write(i);
+      int lr = v->get_last_temp_read(i);
+      int lw = v->get_last_temp_write(i);
+      
+      printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
+      assert(fw <= fr);
+   }
+#endif
+
+   /* Remove reads to output registers, and to varyings in vertex shaders. */
+   v->remove_output_reads(PROGRAM_OUTPUT);
+   if (target == GL_VERTEX_PROGRAM_ARB)
+      v->remove_output_reads(PROGRAM_VARYING);
+   
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+   v->simplify_cmp();
+   v->copy_propagate();
+   while (v->eliminate_dead_code_advanced());
+
+   /* FIXME: These passes to optimize temporary registers don't work when there
+    * is indirect addressing of the temporary register space.  We need proper 
+    * array support so that we don't have to give up these passes in every 
+    * shader that uses arrays.
+    */
+   if (!v->indirect_addr_temps) {
+      v->eliminate_dead_code();
+      v->merge_registers();
+      v->renumber_registers();
+   }
+   
+   /* Write the END instruction. */
+   v->emit(NULL, TGSI_OPCODE_END);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      printf("\n");
+      printf("GLSL IR for linked %s program %d:\n", target_string,
+             shader_program->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+      printf("\n");
+   }
+
+   prog->Instructions = NULL;
+   prog->NumInstructions = 0;
+
+   do_set_program_inouts(shader->ir, prog);
+   count_resources(v, prog);
+
+   check_resources(ctx, shader_program, v, prog);
+
+   _mesa_reference_program(ctx, &shader->Program, prog);
+   
+   struct st_vertex_program *stvp;
+   struct st_fragment_program *stfp;
+   struct st_geometry_program *stgp;
+   
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      stvp = (struct st_vertex_program *)prog;
+      stvp->glsl_to_tgsi = v;
+      break;
+   case GL_FRAGMENT_SHADER:
+      stfp = (struct st_fragment_program *)prog;
+      stfp->glsl_to_tgsi = v;
+      break;
+   case GL_GEOMETRY_SHADER:
+      stgp = (struct st_geometry_program *)prog;
+      stgp->glsl_to_tgsi = v;
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   return prog;
+}
+
+extern "C" {
+
+struct gl_shader *
+st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
+{
+   struct gl_shader *shader;
+   assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
+          type == GL_GEOMETRY_SHADER_ARB);
+   shader = rzalloc(NULL, struct gl_shader);
+   if (shader) {
+      shader->Type = type;
+      shader->Name = name;
+      _mesa_init_shader(ctx, shader);
+   }
+   return shader;
+}
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name)
+{
+   struct gl_shader_program *shProg;
+   shProg = rzalloc(NULL, struct gl_shader_program);
+   if (shProg) {
+      shProg->Name = name;
+      _mesa_init_shader_program(ctx, shProg);
+   }
+   return shProg;
+}
+
+/**
+ * Link a shader.
+ * Called via ctx->Driver.LinkShader()
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR 
+ * with code lowering and other optimizations.
+ */
+GLboolean
+st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   assert(prog->LinkStatus);
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      bool progress;
+      exec_list *ir = prog->_LinkedShaders[i]->ir;
+      const struct gl_shader_compiler_options *options =
+            &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
+
+      do {
+         progress = false;
+
+         /* Lowering */
+         do_mat_op_to_vec(ir);
+         lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+        			 | LOG_TO_LOG2
+        			 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+
+         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
+
+         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+
+         progress = lower_quadop_vector(ir, false) || progress;
+
+         if (options->EmitNoIfs) {
+            progress = lower_discard(ir) || progress;
+            progress = lower_if_to_cond_assign(ir) || progress;
+         }
+
+         if (options->EmitNoNoise)
+            progress = lower_noise(ir) || progress;
+
+         /* If there are forms of indirect addressing that the driver
+          * cannot handle, perform the lowering pass.
+          */
+         if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
+             || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
+           progress =
+             lower_variable_index_to_cond_assign(ir,
+        					 options->EmitNoIndirectInput,
+        					 options->EmitNoIndirectOutput,
+        					 options->EmitNoIndirectTemp,
+        					 options->EmitNoIndirectUniform)
+             || progress;
+
+         progress = do_vec_index_to_cond_assign(ir) || progress;
+      } while (progress);
+
+      validate_ir_tree(ir);
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
+      struct gl_program *linked_prog;
+
+      if (prog->_LinkedShaders[i] == NULL)
+         continue;
+
+      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+
+      if (linked_prog) {
+         bool ok = true;
+
+         switch (prog->_LinkedShaders[i]->Type) {
+         case GL_VERTEX_SHADER:
+            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
+                                     (struct gl_vertex_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_FRAGMENT_SHADER:
+            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
+                                     (struct gl_fragment_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                                                 linked_prog);
+            break;
+         case GL_GEOMETRY_SHADER:
+            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
+                                     (struct gl_geometry_program *)linked_prog);
+            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
+                                                 linked_prog);
+            break;
+         }
+         if (!ok) {
+            return GL_FALSE;
+         }
+      }
+
+      _mesa_reference_program(ctx, &linked_prog, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Link a GLSL shader program.  Called via glLinkProgram().
+ */
+void
+st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   unsigned int i;
+
+   _mesa_clear_shader_program_data(ctx, prog);
+
+   prog->LinkStatus = GL_TRUE;
+
+   for (i = 0; i < prog->NumShaders; i++) {
+      if (!prog->Shaders[i]->CompileStatus) {
+         fail_link(prog, "linking with uncompiled shader");
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   prog->Varying = _mesa_new_parameter_list();
+   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+
+   if (prog->LinkStatus) {
+      link_shaders(ctx, prog);
+   }
+
+   if (prog->LinkStatus) {
+      if (!ctx->Driver.LinkShader(ctx, prog)) {
+         prog->LinkStatus = GL_FALSE;
+      }
+   }
+
+   set_uniform_initializers(ctx, prog);
+
+   if (ctx->Shader.Flags & GLSL_DUMP) {
+      if (!prog->LinkStatus) {
+         printf("GLSL shader program %d failed to link\n", prog->Name);
+      }
+
+      if (prog->InfoLog && prog->InfoLog[0] != 0) {
+         printf("GLSL shader program %d info log:\n", prog->Name);
+         printf("%s\n", prog->InfoLog);
+      }
+   }
+}
+
+} /* extern "C" */
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.h b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.h
new file mode 100644
index 000000000..d87747178
--- /dev/null
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2011 Bryan Cain
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+#include "tgsi/tgsi_ureg.h"
+
+struct gl_context;
+struct gl_shader;
+struct gl_shader_program;
+struct glsl_to_tgsi_visitor;
+
+enum pipe_error st_translate_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   struct glsl_to_tgsi_visitor *program,
+   const struct gl_program *proginfo,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags);
+
+void free_glsl_to_tgsi_visitor(struct glsl_to_tgsi_visitor *v);
+void get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                                struct glsl_to_tgsi_visitor *original,
+                                int scale_and_bias, int pixel_maps);
+void get_bitmap_visitor(struct st_fragment_program *fp,
+                        struct glsl_to_tgsi_visitor *original,
+                        int samplerIndex);
+
+struct gl_shader *st_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
+
+struct gl_shader_program *
+st_new_shader_program(struct gl_context *ctx, GLuint name);
+
+void st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+GLboolean st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
index a41e5b16a..656c985d7 100644
--- a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -267,7 +267,7 @@ src_register( struct st_translate *t,
 /**
  * Map mesa texture target to TGSI texture target.
  */
-static unsigned
+unsigned
 translate_texture_target( GLuint textarget,
                           GLboolean shadow )
 {
@@ -511,7 +511,7 @@ static void emit_ddy( struct st_translate *t,
 
 
 
-static unsigned
+unsigned
 translate_opcode( unsigned op )
 {
    switch( op ) {
@@ -1207,7 +1207,7 @@ st_translate_mesa_program(
             else
                t->constants[i] = 
                   ureg_DECL_immediate( ureg,
-                                       program->Parameters->ParameterValues[i],
+                                       (const float*) program->Parameters->ParameterValues[i],
                                        4 );
             break;
          default:
diff --git a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.h b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.h
index aa42fc0cc..0dbdf5f61 100644
--- a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -1,73 +1,79 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef ST_MESA_TO_TGSI_H
-#define ST_MESA_TO_TGSI_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#include "main/glheader.h"
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_defines.h"
-
-struct gl_context;
-struct gl_program;
-struct tgsi_token;
-struct ureg_program;
-
-
-enum pipe_error
-st_translate_mesa_program(
-   struct gl_context *ctx,
-   uint procType,
-   struct ureg_program *ureg,
-   const struct gl_program *program,
-   GLuint numInputs,
-   const GLuint inputMapping[],
-   const ubyte inputSemanticName[],
-   const ubyte inputSemanticIndex[],
-   const GLuint interpMode[],
-   GLuint numOutputs,
-   const GLuint outputMapping[],
-   const ubyte outputSemanticName[],
-   const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags );
-
-void
-st_free_tokens(const struct tgsi_token *tokens);
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* ST_MESA_TO_TGSI_H */
-
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef ST_MESA_TO_TGSI_H
+#define ST_MESA_TO_TGSI_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#include "main/glheader.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+struct gl_context;
+struct gl_program;
+struct tgsi_token;
+struct ureg_program;
+
+
+enum pipe_error
+st_translate_mesa_program(
+   struct gl_context *ctx,
+   uint procType,
+   struct ureg_program *ureg,
+   const struct gl_program *program,
+   GLuint numInputs,
+   const GLuint inputMapping[],
+   const ubyte inputSemanticName[],
+   const ubyte inputSemanticIndex[],
+   const GLuint interpMode[],
+   GLuint numOutputs,
+   const GLuint outputMapping[],
+   const ubyte outputSemanticName[],
+   const ubyte outputSemanticIndex[],
+   boolean passthrough_edgeflags );
+
+void
+st_free_tokens(const struct tgsi_token *tokens);
+
+unsigned
+translate_opcode(unsigned op);
+
+unsigned
+translate_texture_target(GLuint textarget, GLboolean shadow);
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* ST_MESA_TO_TGSI_H */
+
diff --git a/mesalib/src/mesa/state_tracker/st_program.c b/mesalib/src/mesa/state_tracker/st_program.c
index 132ebdbad..ca01d2e19 100644
--- a/mesalib/src/mesa/state_tracker/st_program.c
+++ b/mesalib/src/mesa/state_tracker/st_program.c
@@ -174,8 +174,8 @@ st_release_gp_variants(struct st_context *st, struct st_geometry_program *stgp)
  * \param tokensOut  destination for TGSI tokens
  * \return  pointer to cached pipe_shader object.
  */
-static void
-st_prepare_vertex_program(struct st_context *st,
+void
+st_prepare_vertex_program(struct gl_context *ctx,
                             struct st_vertex_program *stvp)
 {
    GLuint attr;
@@ -184,9 +184,10 @@ st_prepare_vertex_program(struct st_context *st,
    stvp->num_outputs = 0;
 
    if (stvp->Base.IsPositionInvariant)
-      _mesa_insert_mvp_code(st->ctx, &stvp->Base);
+      _mesa_insert_mvp_code(ctx, &stvp->Base);
 
-   assert(stvp->Base.Base.NumInstructions > 1);
+   if (!stvp->glsl_to_tgsi)
+      assert(stvp->Base.Base.NumInstructions > 1);
 
    /*
     * Determine number of inputs, the mappings between VERT_ATTRIB_x
@@ -292,10 +293,13 @@ st_translate_vertex_program(struct st_context *st,
    enum pipe_error error;
    unsigned num_outputs;
 
-   st_prepare_vertex_program( st, stvp );
+   st_prepare_vertex_program(st->ctx, stvp);
 
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
-   _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   if (!stvp->glsl_to_tgsi)
+   {
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
+      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_VARYING);
+   }
 
    ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
    if (ureg == NULL) {
@@ -318,22 +322,41 @@ st_translate_vertex_program(struct st_context *st,
       debug_printf("\n");
    }
 
-   error = st_translate_mesa_program(st->ctx,
-                                     TGSI_PROCESSOR_VERTEX,
-                                     ureg,
-                                     &stvp->Base.Base,
-                                     /* inputs */
-                                     vpv->num_inputs,
-                                     stvp->input_to_index,
-                                     NULL, /* input semantic name */
-                                     NULL, /* input semantic index */
-                                     NULL,
-                                     /* outputs */
-                                     num_outputs,
-                                     stvp->result_to_output,
-                                     stvp->output_semantic_name,
-                                     stvp->output_semantic_index,
-                                     key->passthrough_edgeflags );
+   if (stvp->glsl_to_tgsi)
+      error = st_translate_program(st->ctx,
+                                   TGSI_PROCESSOR_VERTEX,
+                                   ureg,
+                                   stvp->glsl_to_tgsi,
+                                   &stvp->Base.Base,
+                                   /* inputs */
+                                   stvp->num_inputs,
+                                   stvp->input_to_index,
+                                   NULL, /* input semantic name */
+                                   NULL, /* input semantic index */
+                                   NULL, /* interp mode */
+                                   /* outputs */
+                                   stvp->num_outputs,
+                                   stvp->result_to_output,
+                                   stvp->output_semantic_name,
+                                   stvp->output_semantic_index,
+                                   key->passthrough_edgeflags );
+   else
+      error = st_translate_mesa_program(st->ctx,
+                                        TGSI_PROCESSOR_VERTEX,
+                                        ureg,
+                                        &stvp->Base.Base,
+                                        /* inputs */
+                                        vpv->num_inputs,
+                                        stvp->input_to_index,
+                                        NULL, /* input semantic name */
+                                        NULL, /* input semantic index */
+                                        NULL,
+                                        /* outputs */
+                                        num_outputs,
+                                        stvp->result_to_output,
+                                        stvp->output_semantic_name,
+                                        stvp->output_semantic_index,
+                                        key->passthrough_edgeflags );
 
    if (error)
       goto fail;
@@ -393,6 +416,151 @@ st_get_vp_variant(struct st_context *st,
    return vpv;
 }
 
+/**
+ * Translate Mesa fragment shader attributes to TGSI attributes.
+ * \return GL_TRUE if color output should be written to all render targets, 
+ *         GL_FALSE if not
+ */
+GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp)
+{
+   GLuint attr;
+   const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
+   GLboolean write_all = GL_FALSE;
+
+   /*
+    * Convert Mesa program inputs to TGSI input register semantics.
+    */
+   for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
+      if (inputsRead & (1 << attr)) {
+         const GLuint slot = stfp->num_inputs++;
+
+         stfp->input_to_index[attr] = slot;
+
+         switch (attr) {
+         case FRAG_ATTRIB_WPOS:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL0:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_COL1:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
+            stfp->input_semantic_index[slot] = 1;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            break;
+         case FRAG_ATTRIB_FOGC:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         case FRAG_ATTRIB_FACE:
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
+            stfp->input_semantic_index[slot] = 0;
+            stfp->interp_mode[slot] = TGSI_INTERPOLATE_CONSTANT;
+            break;
+            /* In most cases, there is nothing special about these
+             * inputs, so adopt a convention to use the generic
+             * semantic name and the mesa FRAG_ATTRIB_ number as the
+             * index. 
+             * 
+             * All that is required is that the vertex shader labels
+             * its own outputs similarly, and that the vertex shader
+             * generates at least every output required by the
+             * fragment shader plus fixed-function hardware (such as
+             * BFC).
+             * 
+             * There is no requirement that semantic indexes start at
+             * zero or be restricted to a particular range -- nobody
+             * should be building tables based on semantic index.
+             */
+         case FRAG_ATTRIB_PNTC:
+         case FRAG_ATTRIB_TEX0:
+         case FRAG_ATTRIB_TEX1:
+         case FRAG_ATTRIB_TEX2:
+         case FRAG_ATTRIB_TEX3:
+         case FRAG_ATTRIB_TEX4:
+         case FRAG_ATTRIB_TEX5:
+         case FRAG_ATTRIB_TEX6:
+         case FRAG_ATTRIB_TEX7:
+         case FRAG_ATTRIB_VAR0:
+         default:
+            /* Actually, let's try and zero-base this just for
+             * readability of the generated TGSI.
+             */
+            assert(attr >= FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
+            stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
+            if (attr == FRAG_ATTRIB_PNTC)
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_LINEAR;
+            else
+               stfp->interp_mode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
+            break;
+         }
+      }
+      else {
+         stfp->input_to_index[attr] = -1;
+      }
+   }
+
+   /*
+    * Semantics and mapping for outputs
+    */
+   {
+      uint numColors = 0;
+      GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
+
+      /* if z is written, emit that first */
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_POSITION;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_DEPTH] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
+      }
+
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+         stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_STENCIL;
+         stfp->output_semantic_index[stfp->num_outputs] = 0;
+         stfp->result_to_output[FRAG_RESULT_STENCIL] = stfp->num_outputs;
+         stfp->num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+      }
+
+      /* handle remaning outputs (color) */
+      for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
+         if (outputsWritten & BITFIELD64_BIT(attr)) {
+            switch (attr) {
+            case FRAG_RESULT_DEPTH:
+            case FRAG_RESULT_STENCIL:
+               /* handled above */
+               assert(0);
+               break;
+            case FRAG_RESULT_COLOR:
+               write_all = GL_TRUE; /* fallthrough */
+            default:
+               assert(attr == FRAG_RESULT_COLOR ||
+                      (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+               stfp->output_semantic_name[stfp->num_outputs] = TGSI_SEMANTIC_COLOR;
+               stfp->output_semantic_index[stfp->num_outputs] = numColors;
+               stfp->result_to_output[attr] = stfp->num_outputs;
+               numColors++;
+               break;
+            }
+
+            stfp->num_outputs++;
+         }
+      }
+   }
+   
+   return write_all;
+}
+
 
 /**
  * Translate a Mesa fragment shader into a TGSI shader using extra info in
@@ -445,153 +613,11 @@ st_translate_fragment_program(struct st_context *st,
 
    if (!stfp->tgsi.tokens) {
       /* need to translate Mesa instructions to TGSI now */
-      GLuint outputMapping[FRAG_RESULT_MAX];
-      GLuint inputMapping[FRAG_ATTRIB_MAX];
-      GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
-      GLuint attr;
-      const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
       struct ureg_program *ureg;
-      GLboolean write_all = GL_FALSE;
-
-      ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-      ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-      uint fs_num_inputs = 0;
-
-      ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS];
-      ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS];
-      uint fs_num_outputs = 0;
-
-
-      _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
-
-      /*
-       * Convert Mesa program inputs to TGSI input register semantics.
-       */
-      for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) {
-         if (inputsRead & (1 << attr)) {
-            const GLuint slot = fs_num_inputs++;
-
-            inputMapping[attr] = slot;
-
-            switch (attr) {
-            case FRAG_ATTRIB_WPOS:
-               input_semantic_name[slot] = TGSI_SEMANTIC_POSITION;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL0:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_COL1:
-               input_semantic_name[slot] = TGSI_SEMANTIC_COLOR;
-               input_semantic_index[slot] = 1;
-               interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               break;
-            case FRAG_ATTRIB_FOGC:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FOG;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            case FRAG_ATTRIB_FACE:
-               input_semantic_name[slot] = TGSI_SEMANTIC_FACE;
-               input_semantic_index[slot] = 0;
-               interpMode[slot] = TGSI_INTERPOLATE_CONSTANT;
-               break;
-               /* In most cases, there is nothing special about these
-                * inputs, so adopt a convention to use the generic
-                * semantic name and the mesa FRAG_ATTRIB_ number as the
-                * index. 
-                * 
-                * All that is required is that the vertex shader labels
-                * its own outputs similarly, and that the vertex shader
-                * generates at least every output required by the
-                * fragment shader plus fixed-function hardware (such as
-                * BFC).
-                * 
-                * There is no requirement that semantic indexes start at
-                * zero or be restricted to a particular range -- nobody
-                * should be building tables based on semantic index.
-                */
-            case FRAG_ATTRIB_PNTC:
-            case FRAG_ATTRIB_TEX0:
-            case FRAG_ATTRIB_TEX1:
-            case FRAG_ATTRIB_TEX2:
-            case FRAG_ATTRIB_TEX3:
-            case FRAG_ATTRIB_TEX4:
-            case FRAG_ATTRIB_TEX5:
-            case FRAG_ATTRIB_TEX6:
-            case FRAG_ATTRIB_TEX7:
-            case FRAG_ATTRIB_VAR0:
-            default:
-               /* Actually, let's try and zero-base this just for
-                * readability of the generated TGSI.
-                */
-               assert(attr >= FRAG_ATTRIB_TEX0);
-               input_semantic_index[slot] = (attr - FRAG_ATTRIB_TEX0);
-               input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
-               if (attr == FRAG_ATTRIB_PNTC)
-                  interpMode[slot] = TGSI_INTERPOLATE_LINEAR;
-               else
-                  interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
-               break;
-            }
-         }
-         else {
-            inputMapping[attr] = -1;
-         }
-      }
-
-      /*
-       * Semantics and mapping for outputs
-       */
-      {
-         uint numColors = 0;
-         GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten;
-
-         /* if z is written, emit that first */
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
-         }
-
-         if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
-            fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
-            fs_output_semantic_index[fs_num_outputs] = 0;
-            outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
-            fs_num_outputs++;
-            outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
-         }
-
-         /* handle remaning outputs (color) */
-         for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
-            if (outputsWritten & BITFIELD64_BIT(attr)) {
-               switch (attr) {
-               case FRAG_RESULT_DEPTH:
-               case FRAG_RESULT_STENCIL:
-                  /* handled above */
-                  assert(0);
-                  break;
-               case FRAG_RESULT_COLOR:
-                  write_all = GL_TRUE; /* fallthrough */
-               default:
-                  assert(attr == FRAG_RESULT_COLOR ||
-                         (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
-                  fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
-                  fs_output_semantic_index[fs_num_outputs] = numColors;
-                  outputMapping[attr] = fs_num_outputs;
-                  numColors++;
-                  break;
-               }
-
-               fs_num_outputs++;
-            }
-         }
-      }
+      GLboolean write_all = st_prepare_fragment_program(st->ctx, stfp);
+      
+      if (!stfp->glsl_to_tgsi)
+         _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
 
       ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
       if (ureg == NULL)
@@ -605,21 +631,39 @@ st_translate_fragment_program(struct st_context *st,
       if (write_all == GL_TRUE)
          ureg_property_fs_color0_writes_all_cbufs(ureg, 1);
 
-      st_translate_mesa_program(st->ctx,
-                                TGSI_PROCESSOR_FRAGMENT,
-                                ureg,
-                                &stfp->Base.Base,
-                                /* inputs */
-                                fs_num_inputs,
-                                inputMapping,
-                                input_semantic_name,
-                                input_semantic_index,
-                                interpMode,
-                                /* outputs */
-                                fs_num_outputs,
-                                outputMapping,
-                                fs_output_semantic_name,
-                                fs_output_semantic_index, FALSE );
+      if (stfp->glsl_to_tgsi)
+         st_translate_program(st->ctx,
+                              TGSI_PROCESSOR_FRAGMENT,
+                              ureg,
+                              stfp->glsl_to_tgsi,
+                              &stfp->Base.Base,
+                              /* inputs */
+                              stfp->num_inputs,
+                              stfp->input_to_index,
+                              stfp->input_semantic_name,
+                              stfp->input_semantic_index,
+                              stfp->interp_mode,
+                              /* outputs */
+                              stfp->num_outputs,
+                              stfp->result_to_output,
+                              stfp->output_semantic_name,
+                              stfp->output_semantic_index, FALSE );
+      else
+         st_translate_mesa_program(st->ctx,
+                                   TGSI_PROCESSOR_FRAGMENT,
+                                   ureg,
+                                   &stfp->Base.Base,
+                                   /* inputs */
+                                   stfp->num_inputs,
+                                   stfp->input_to_index,
+                                   stfp->input_semantic_name,
+                                   stfp->input_semantic_index,
+                                   stfp->interp_mode,
+                                   /* outputs */
+                                   stfp->num_outputs,
+                                   stfp->result_to_output,
+                                   stfp->output_semantic_name,
+                                   stfp->output_semantic_index, FALSE );
 
       stfp->tgsi.tokens = ureg_get_tokens( ureg, NULL );
       ureg_destroy( ureg );
diff --git a/mesalib/src/mesa/state_tracker/st_program.h b/mesalib/src/mesa/state_tracker/st_program.h
index a240d4ef4..67723de6d 100644
--- a/mesalib/src/mesa/state_tracker/st_program.h
+++ b/mesalib/src/mesa/state_tracker/st_program.h
@@ -1,300 +1,327 @@
-/**************************************************************************
- * 
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-    
-
-#ifndef ST_PROGRAM_H
-#define ST_PROGRAM_H
-
-#include "main/mtypes.h"
-#include "program/program.h"
-#include "pipe/p_state.h"
-#include "st_context.h"
-
-
-/** Fragment program variant key */
-struct st_fp_variant_key
-{
-   struct st_context *st;         /**< variants are per-context */
-
-   /** for glBitmap */
-   GLuint bitmap:1;               /**< glBitmap variant? */
-
-   /** for glDrawPixels */
-   GLuint drawpixels:1;           /**< glDrawPixels variant */
-   GLuint scaleAndBias:1;         /**< glDrawPixels w/ scale and/or bias? */
-   GLuint pixelMaps:1;            /**< glDrawPixels w/ pixel lookup map? */
-   GLuint drawpixels_z:1;         /**< glDrawPixels(GL_DEPTH) */
-   GLuint drawpixels_stencil:1;   /**< glDrawPixels(GL_STENCIL) */
-};
-
-
-/**
- * Variant of a fragment program.
- */
-struct st_fp_variant
-{
-   /** Parameters which generated this version of fragment program */
-   struct st_fp_variant_key key;
-
-   /** Driver's compiled shader */
-   void *driver_shader;
-
-   /** For glBitmap variants */
-   struct gl_program_parameter_list *parameters;
-   uint bitmap_sampler;
-
-   /** next in linked list */
-   struct st_fp_variant *next;
-};
-
-
-/**
- * Derived from Mesa gl_fragment_program:
- */
-struct st_fragment_program
-{
-   struct gl_fragment_program Base;
-
-   struct pipe_shader_state tgsi;
-
-   struct st_fp_variant *variants;
-};
-
-
-
-/** Vertex program variant key */
-struct st_vp_variant_key
-{
-   struct st_context *st;          /**< variants are per-context */
-   boolean passthrough_edgeflags;
-};
-
-
-/**
- * This represents a vertex program, especially translated to match
- * the inputs of a particular fragment shader.
- */
-struct st_vp_variant
-{
-   /* Parameters which generated this translated version of a vertex
-    * shader:
-    */
-   struct st_vp_variant_key key;
-
-   /**
-    * TGSI tokens (to later generate a 'draw' module shader for
-    * selection/feedback/rasterpos)
-    */
-   struct pipe_shader_state tgsi;
-
-   /** Driver's compiled shader */
-   void *driver_shader;
-
-   /** For using our private draw module (glRasterPos) */
-   struct draw_vertex_shader *draw_shader;
-
-   /** Next in linked list */
-   struct st_vp_variant *next;  
-
-   /** similar to that in st_vertex_program, but with edgeflags info too */
-   GLuint num_inputs;
-};
-
-
-/**
- * Derived from Mesa gl_fragment_program:
- */
-struct st_vertex_program
-{
-   struct gl_vertex_program Base;  /**< The Mesa vertex program */
-
-   /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
-   GLuint input_to_index[VERT_ATTRIB_MAX];
-   /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
-   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
-   GLuint num_inputs;
-
-   /** Maps VERT_RESULT_x to slot */
-   GLuint result_to_output[VERT_RESULT_MAX];
-   ubyte output_semantic_name[VERT_RESULT_MAX];
-   ubyte output_semantic_index[VERT_RESULT_MAX];
-   GLuint num_outputs;
-
-   /** List of translated variants of this vertex program.
-    */
-   struct st_vp_variant *variants;
-};
-
-
-
-/** Geometry program variant key */
-struct st_gp_variant_key
-{
-   struct st_context *st;          /**< variants are per-context */
-   /* no other fields yet */
-};
-
-
-/**
- * Geometry program variant.
- */
-struct st_gp_variant
-{
-   /* Parameters which generated this translated version of a vertex */
-   struct st_gp_variant_key key;
-
-   void *driver_shader;
-
-   struct st_gp_variant *next;
-};
-
-
-/**
- * Derived from Mesa gl_geometry_program:
- */
-struct st_geometry_program
-{
-   struct gl_geometry_program Base;  /**< The Mesa geometry program */
-
-   /** map GP input back to VP output */
-   GLuint input_map[PIPE_MAX_SHADER_INPUTS];
-
-   /** maps a Mesa GEOM_ATTRIB_x to a packed TGSI input index */
-   GLuint input_to_index[GEOM_ATTRIB_MAX];
-   /** maps a TGSI input index back to a Mesa GEOM_ATTRIB_x */
-   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
-
-   GLuint num_inputs;
-
-   GLuint input_to_slot[GEOM_ATTRIB_MAX];  /**< Maps GEOM_ATTRIB_x to slot */
-   GLuint num_input_slots;
-
-   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
-   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
-
-   struct pipe_shader_state tgsi;
-
-   struct st_gp_variant *variants;
-};
-
-
-
-static INLINE struct st_fragment_program *
-st_fragment_program( struct gl_fragment_program *fp )
-{
-   return (struct st_fragment_program *)fp;
-}
-
-
-static INLINE struct st_vertex_program *
-st_vertex_program( struct gl_vertex_program *vp )
-{
-   return (struct st_vertex_program *)vp;
-}
-
-static INLINE struct st_geometry_program *
-st_geometry_program( struct gl_geometry_program *gp )
-{
-   return (struct st_geometry_program *)gp;
-}
-
-static INLINE void
-st_reference_vertprog(struct st_context *st,
-                      struct st_vertex_program **ptr,
-                      struct st_vertex_program *prog)
-{
-   _mesa_reference_program(st->ctx,
-                           (struct gl_program **) ptr,
-                           (struct gl_program *) prog);
-}
-
-static INLINE void
-st_reference_geomprog(struct st_context *st,
-                      struct st_geometry_program **ptr,
-                      struct st_geometry_program *prog)
-{
-   _mesa_reference_program(st->ctx,
-                           (struct gl_program **) ptr,
-                           (struct gl_program *) prog);
-}
-
-static INLINE void
-st_reference_fragprog(struct st_context *st,
-                      struct st_fragment_program **ptr,
-                      struct st_fragment_program *prog)
-{
-   _mesa_reference_program(st->ctx,
-                           (struct gl_program **) ptr,
-                           (struct gl_program *) prog);
-}
-
-
-extern struct st_vp_variant *
-st_get_vp_variant(struct st_context *st,
-                  struct st_vertex_program *stvp,
-                  const struct st_vp_variant_key *key);
-
-
-extern struct st_fp_variant *
-st_get_fp_variant(struct st_context *st,
-                  struct st_fragment_program *stfp,
-                  const struct st_fp_variant_key *key);
-
-
-extern struct st_gp_variant *
-st_get_gp_variant(struct st_context *st,
-                  struct st_geometry_program *stgp,
-                  const struct st_gp_variant_key *key);
-
-
-
-extern void
-st_release_vp_variants( struct st_context *st,
-                        struct st_vertex_program *stvp );
-
-extern void
-st_release_fp_variants( struct st_context *st,
-                        struct st_fragment_program *stfp );
-
-extern void
-st_release_gp_variants(struct st_context *st,
-                       struct st_geometry_program *stgp);
-
-
-extern void
-st_print_shaders(struct gl_context *ctx);
-
-extern void
-st_destroy_program_variants(struct st_context *st);
-
-
-#endif
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#ifndef ST_PROGRAM_H
+#define ST_PROGRAM_H
+
+#include "main/mtypes.h"
+#include "program/program.h"
+#include "pipe/p_state.h"
+#include "st_context.h"
+#include "st_glsl_to_tgsi.h"
+
+
+/** Fragment program variant key */
+struct st_fp_variant_key
+{
+   struct st_context *st;         /**< variants are per-context */
+
+   /** for glBitmap */
+   GLuint bitmap:1;               /**< glBitmap variant? */
+
+   /** for glDrawPixels */
+   GLuint drawpixels:1;           /**< glDrawPixels variant */
+   GLuint scaleAndBias:1;         /**< glDrawPixels w/ scale and/or bias? */
+   GLuint pixelMaps:1;            /**< glDrawPixels w/ pixel lookup map? */
+   GLuint drawpixels_z:1;         /**< glDrawPixels(GL_DEPTH) */
+   GLuint drawpixels_stencil:1;   /**< glDrawPixels(GL_STENCIL) */
+};
+
+
+/**
+ * Variant of a fragment program.
+ */
+struct st_fp_variant
+{
+   /** Parameters which generated this version of fragment program */
+   struct st_fp_variant_key key;
+
+   /** Driver's compiled shader */
+   void *driver_shader;
+
+   /** For glBitmap variants */
+   struct gl_program_parameter_list *parameters;
+   uint bitmap_sampler;
+
+   /** next in linked list */
+   struct st_fp_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_fragment_program:
+ */
+struct st_fragment_program
+{
+   struct gl_fragment_program Base;
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+   
+   /** maps a Mesa FRAG_ATTRIB_x to a packed TGSI input index */
+   GLuint input_to_index[FRAG_ATTRIB_MAX];
+   /** maps a TGSI input index back to a Mesa FRAG_ATTRIB_x */
+   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+   GLuint num_inputs;
+   GLuint interp_mode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
+
+   /** Maps FRAG_RESULT_x to slot */
+   GLuint result_to_output[FRAG_RESULT_MAX];
+   ubyte output_semantic_name[FRAG_RESULT_MAX];
+   ubyte output_semantic_index[FRAG_RESULT_MAX];
+   GLuint num_outputs;
+
+   struct pipe_shader_state tgsi;
+
+   struct st_fp_variant *variants;
+};
+
+
+
+/** Vertex program variant key */
+struct st_vp_variant_key
+{
+   struct st_context *st;          /**< variants are per-context */
+   boolean passthrough_edgeflags;
+};
+
+
+/**
+ * This represents a vertex program, especially translated to match
+ * the inputs of a particular fragment shader.
+ */
+struct st_vp_variant
+{
+   /* Parameters which generated this translated version of a vertex
+    * shader:
+    */
+   struct st_vp_variant_key key;
+
+   /**
+    * TGSI tokens (to later generate a 'draw' module shader for
+    * selection/feedback/rasterpos)
+    */
+   struct pipe_shader_state tgsi;
+
+   /** Driver's compiled shader */
+   void *driver_shader;
+
+   /** For using our private draw module (glRasterPos) */
+   struct draw_vertex_shader *draw_shader;
+
+   /** Next in linked list */
+   struct st_vp_variant *next;  
+
+   /** similar to that in st_vertex_program, but with edgeflags info too */
+   GLuint num_inputs;
+};
+
+
+/**
+ * Derived from Mesa gl_fragment_program:
+ */
+struct st_vertex_program
+{
+   struct gl_vertex_program Base;  /**< The Mesa vertex program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+   /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
+   GLuint input_to_index[VERT_ATTRIB_MAX];
+   /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
+   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
+   GLuint num_inputs;
+
+   /** Maps VERT_RESULT_x to slot */
+   GLuint result_to_output[VERT_RESULT_MAX];
+   ubyte output_semantic_name[VERT_RESULT_MAX];
+   ubyte output_semantic_index[VERT_RESULT_MAX];
+   GLuint num_outputs;
+
+   /** List of translated variants of this vertex program.
+    */
+   struct st_vp_variant *variants;
+};
+
+
+
+/** Geometry program variant key */
+struct st_gp_variant_key
+{
+   struct st_context *st;          /**< variants are per-context */
+   /* no other fields yet */
+};
+
+
+/**
+ * Geometry program variant.
+ */
+struct st_gp_variant
+{
+   /* Parameters which generated this translated version of a vertex */
+   struct st_gp_variant_key key;
+
+   void *driver_shader;
+
+   struct st_gp_variant *next;
+};
+
+
+/**
+ * Derived from Mesa gl_geometry_program:
+ */
+struct st_geometry_program
+{
+   struct gl_geometry_program Base;  /**< The Mesa geometry program */
+   struct glsl_to_tgsi_visitor* glsl_to_tgsi;
+
+   /** map GP input back to VP output */
+   GLuint input_map[PIPE_MAX_SHADER_INPUTS];
+
+   /** maps a Mesa GEOM_ATTRIB_x to a packed TGSI input index */
+   GLuint input_to_index[GEOM_ATTRIB_MAX];
+   /** maps a TGSI input index back to a Mesa GEOM_ATTRIB_x */
+   GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
+
+   GLuint num_inputs;
+
+   GLuint input_to_slot[GEOM_ATTRIB_MAX];  /**< Maps GEOM_ATTRIB_x to slot */
+   GLuint num_input_slots;
+
+   ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS];
+   ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS];
+
+   struct pipe_shader_state tgsi;
+
+   struct st_gp_variant *variants;
+};
+
+
+
+static INLINE struct st_fragment_program *
+st_fragment_program( struct gl_fragment_program *fp )
+{
+   return (struct st_fragment_program *)fp;
+}
+
+
+static INLINE struct st_vertex_program *
+st_vertex_program( struct gl_vertex_program *vp )
+{
+   return (struct st_vertex_program *)vp;
+}
+
+static INLINE struct st_geometry_program *
+st_geometry_program( struct gl_geometry_program *gp )
+{
+   return (struct st_geometry_program *)gp;
+}
+
+static INLINE void
+st_reference_vertprog(struct st_context *st,
+                      struct st_vertex_program **ptr,
+                      struct st_vertex_program *prog)
+{
+   _mesa_reference_program(st->ctx,
+                           (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
+static INLINE void
+st_reference_geomprog(struct st_context *st,
+                      struct st_geometry_program **ptr,
+                      struct st_geometry_program *prog)
+{
+   _mesa_reference_program(st->ctx,
+                           (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
+static INLINE void
+st_reference_fragprog(struct st_context *st,
+                      struct st_fragment_program **ptr,
+                      struct st_fragment_program *prog)
+{
+   _mesa_reference_program(st->ctx,
+                           (struct gl_program **) ptr,
+                           (struct gl_program *) prog);
+}
+
+
+extern struct st_vp_variant *
+st_get_vp_variant(struct st_context *st,
+                  struct st_vertex_program *stvp,
+                  const struct st_vp_variant_key *key);
+
+
+extern struct st_fp_variant *
+st_get_fp_variant(struct st_context *st,
+                  struct st_fragment_program *stfp,
+                  const struct st_fp_variant_key *key);
+
+
+extern struct st_gp_variant *
+st_get_gp_variant(struct st_context *st,
+                  struct st_geometry_program *stgp,
+                  const struct st_gp_variant_key *key);
+
+
+extern void
+st_prepare_vertex_program(struct gl_context *ctx,
+                          struct st_vertex_program *stvp);
+
+extern GLboolean
+st_prepare_fragment_program(struct gl_context *ctx,
+                            struct st_fragment_program *stfp);
+
+
+extern void
+st_release_vp_variants( struct st_context *st,
+                        struct st_vertex_program *stvp );
+
+extern void
+st_release_fp_variants( struct st_context *st,
+                        struct st_fragment_program *stfp );
+
+extern void
+st_release_gp_variants(struct st_context *st,
+                       struct st_geometry_program *stgp);
+
+
+extern void
+st_print_shaders(struct gl_context *ctx);
+
+extern void
+st_destroy_program_variants(struct st_context *st);
+
+
+#endif
diff --git a/mesalib/src/mesa/state_tracker/st_texture.c b/mesalib/src/mesa/state_tracker/st_texture.c
index 22ec1306c..0e857fddc 100644
--- a/mesalib/src/mesa/state_tracker/st_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_texture.c
@@ -1,398 +1,418 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include <stdio.h>
-
-#include "st_context.h"
-#include "st_format.h"
-#include "st_texture.h"
-#include "st_cb_fbo.h"
-#include "main/enums.h"
-
-#include "pipe/p_state.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_format.h"
-#include "util/u_rect.h"
-#include "util/u_math.h"
-
-
-#define DBG if(0) printf
-
-
-/**
- * Allocate a new pipe_resource object
- * width0, height0, depth0 are the dimensions of the level 0 image
- * (the highest resolution).  last_level indicates how many mipmap levels
- * to allocate storage for.  For non-mipmapped textures, this will be zero.
- */
-struct pipe_resource *
-st_texture_create(struct st_context *st,
-                  enum pipe_texture_target target,
-		  enum pipe_format format,
-		  GLuint last_level,
-		  GLuint width0,
-		  GLuint height0,
-		  GLuint depth0,
-                  GLuint layers,
-                  GLuint bind )
-{
-   struct pipe_resource pt, *newtex;
-   struct pipe_screen *screen = st->pipe->screen;
-
-   assert(target < PIPE_MAX_TEXTURE_TYPES);
-   assert(width0 > 0);
-   assert(height0 > 0);
-   assert(depth0 > 0);
-   if (target == PIPE_TEXTURE_CUBE)
-      assert(layers == 6);
-
-   DBG("%s target %s format %s last_level %d\n", __FUNCTION__,
-       _mesa_lookup_enum_by_nr(target),
-       _mesa_lookup_enum_by_nr(format), last_level);
-
-   assert(format);
-   assert(screen->is_format_supported(screen, format, target, 0,
-                                      PIPE_BIND_SAMPLER_VIEW));
-
-   memset(&pt, 0, sizeof(pt));
-   pt.target = target;
-   pt.format = format;
-   pt.last_level = last_level;
-   pt.width0 = width0;
-   pt.height0 = height0;
-   pt.depth0 = depth0;
-   pt.array_size = (target == PIPE_TEXTURE_CUBE ? 6 : layers);
-   pt.usage = PIPE_USAGE_DEFAULT;
-   pt.bind = bind;
-   pt.flags = 0;
-
-   newtex = screen->resource_create(screen, &pt);
-
-   assert(!newtex || pipe_is_referenced(&newtex->reference));
-
-   return newtex;
-}
-
-
-/**
- * In OpenGL the number of 1D array texture layers is the "height" and
- * the number of 2D array texture layers is the "depth".  In Gallium the
- * number of layers in an array texture is a separate 'array_size' field.
- * This function converts dimensions from the former to the later.
- */
-void
-st_gl_texture_dims_to_pipe_dims(GLenum texture,
-                                GLuint widthIn,
-                                GLuint heightIn,
-                                GLuint depthIn,
-                                GLuint *widthOut,
-                                GLuint *heightOut,
-                                GLuint *depthOut,
-                                GLuint *layersOut)
-{
-   switch (texture) {
-   case GL_TEXTURE_1D:
-      assert(heightIn == 1);
-      assert(depthIn == 1);
-      *widthOut = widthIn;
-      *heightOut = 1;
-      *depthOut = 1;
-      *layersOut = 1;
-      break;
-   case GL_TEXTURE_1D_ARRAY:
-      assert(depthIn == 1);
-      *widthOut = widthIn;
-      *heightOut = 1;
-      *depthOut = 1;
-      *layersOut = heightIn;
-      break;
-   case GL_TEXTURE_2D:
-   case GL_TEXTURE_RECTANGLE:
-      assert(depthIn == 1);
-      *widthOut = widthIn;
-      *heightOut = heightIn;
-      *depthOut = 1;
-      *layersOut = 1;
-      break;
-   case GL_TEXTURE_CUBE_MAP:
-      assert(depthIn == 1);
-      *widthOut = widthIn;
-      *heightOut = heightIn;
-      *depthOut = 1;
-      *layersOut = 6;
-      break;
-   case GL_TEXTURE_2D_ARRAY:
-      *widthOut = widthIn;
-      *heightOut = heightIn;
-      *depthOut = 1;
-      *layersOut = depthIn;
-      break;
-   default:
-      assert(0 && "Unexpected texture in st_gl_texture_dims_to_pipe_dims()");
-      /* fall-through */
-   case GL_TEXTURE_3D:
-      *widthOut = widthIn;
-      *heightOut = heightIn;
-      *depthOut = depthIn;
-      *layersOut = 1;
-      break;
-   }
-}
-
-
-/**
- * Check if a texture image can be pulled into a unified mipmap texture.
- */
-GLboolean
-st_texture_match_image(const struct pipe_resource *pt,
-                       const struct gl_texture_image *image,
-                       GLuint face, GLuint level)
-{
-   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
-
-   /* Images with borders are never pulled into mipmap textures. 
-    */
-   if (image->Border) 
-      return GL_FALSE;
-
-   /* Check if this image's format matches the established texture's format.
-    */
-   if (st_mesa_format_to_pipe_format(image->TexFormat) != pt->format)
-      return GL_FALSE;
-
-   st_gl_texture_dims_to_pipe_dims(image->TexObject->Target,
-                                   image->Width, image->Height, image->Depth,
-                                   &ptWidth, &ptHeight, &ptDepth, &ptLayers);
-
-   /* Test if this image's size matches what's expected in the
-    * established texture.
-    */
-   if (ptWidth != u_minify(pt->width0, level) ||
-       ptHeight != u_minify(pt->height0, level) ||
-       ptDepth != u_minify(pt->depth0, level) ||
-       ptLayers != pt->array_size)
-      return GL_FALSE;
-
-   return GL_TRUE;
-}
-
-
-/**
- * Map a texture image and return the address for a particular 2D face/slice/
- * layer.  The stImage indicates the cube face and mipmap level.  The slice
- * of the 3D texture is passed in 'zoffset'.
- * \param usage  one of the PIPE_TRANSFER_x values
- * \param x, y, w, h  the region of interest of the 2D image.
- * \return address of mapping or NULL if any error
- */
-GLubyte *
-st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
-                     GLuint zoffset, enum pipe_transfer_usage usage,
-                     GLuint x, GLuint y, GLuint w, GLuint h)
-{
-   struct pipe_context *pipe = st->pipe;
-   struct pipe_resource *pt = stImage->pt;
-
-   DBG("%s \n", __FUNCTION__);
-
-   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
-                                         stImage->face + zoffset,
-                                         usage, x, y, w, h);
-
-   if (stImage->transfer)
-      return pipe_transfer_map(pipe, stImage->transfer);
-   else
-      return NULL;
-}
-
-
-void
-st_texture_image_unmap(struct st_context *st,
-                       struct st_texture_image *stImage)
-{
-   struct pipe_context *pipe = st->pipe;
-
-   DBG("%s\n", __FUNCTION__);
-
-   pipe_transfer_unmap(pipe, stImage->transfer);
-
-   pipe->transfer_destroy(pipe, stImage->transfer);
-}
-
-
-
-/**
- * Upload data to a rectangular sub-region.  Lots of choices how to do this:
- *
- * - memcpy by span to current destination
- * - upload data as new buffer and blit
- *
- * Currently always memcpy.
- */
-static void
-st_surface_data(struct pipe_context *pipe,
-		struct pipe_transfer *dst,
-		unsigned dstx, unsigned dsty,
-		const void *src, unsigned src_stride,
-		unsigned srcx, unsigned srcy, unsigned width, unsigned height)
-{
-   void *map = pipe_transfer_map(pipe, dst);
-
-   assert(dst->resource);
-   util_copy_rect(map,
-                  dst->resource->format,
-                  dst->stride,
-                  dstx, dsty, 
-                  width, height, 
-                  src, src_stride, 
-                  srcx, srcy);
-
-   pipe_transfer_unmap(pipe, dst);
-}
-
-
-/* Upload data for a particular image.
- */
-void
-st_texture_image_data(struct st_context *st,
-                      struct pipe_resource *dst,
-                      GLuint face,
-                      GLuint level,
-                      void *src,
-                      GLuint src_row_stride, GLuint src_image_stride)
-{
-   struct pipe_context *pipe = st->pipe;
-   GLuint i;
-   const GLubyte *srcUB = src;
-   struct pipe_transfer *dst_transfer;
-   GLuint layers;
-
-   if (dst->target == PIPE_TEXTURE_1D_ARRAY ||
-       dst->target == PIPE_TEXTURE_2D_ARRAY)
-      layers = dst->array_size;
-   else
-      layers = u_minify(dst->depth0, level);
-
-   DBG("%s\n", __FUNCTION__);
-
-   for (i = 0; i < layers; i++) {
-      dst_transfer = pipe_get_transfer(st->pipe, dst, level, face + i,
-                                       PIPE_TRANSFER_WRITE, 0, 0,
-                                       u_minify(dst->width0, level),
-                                       u_minify(dst->height0, level));
-
-      st_surface_data(pipe, dst_transfer,
-		      0, 0,                             /* dstx, dsty */
-		      srcUB,
-		      src_row_stride,
-		      0, 0,                             /* source x, y */
-		      u_minify(dst->width0, level),
-                      u_minify(dst->height0, level));    /* width, height */
-
-      pipe->transfer_destroy(pipe, dst_transfer);
-
-      srcUB += src_image_stride;
-   }
-}
-
-
-/**
- * For debug only: get/print center pixel in the src resource.
- */
-static void
-print_center_pixel(struct pipe_context *pipe, struct pipe_resource *src)
-{
-   struct pipe_transfer *xfer;
-   struct pipe_box region;
-   ubyte *map;
-
-   region.x = src->width0 / 2;
-   region.y = src->height0 / 2;
-   region.z = 0;
-   region.width = 1;
-   region.height = 1;
-   region.depth = 1;
-
-   xfer = pipe->get_transfer(pipe, src, 0, PIPE_TRANSFER_READ, &region);
-   map = pipe->transfer_map(pipe, xfer);
-
-   printf("center pixel: %d %d %d %d\n", map[0], map[1], map[2], map[3]);
-
-   pipe->transfer_unmap(pipe, xfer);
-   pipe->transfer_destroy(pipe, xfer);
-}
-
-
-/**
- * Copy the image at level=0 in 'src' to the 'dst' resource at 'dstLevel'.
- * This is used to copy mipmap images from one texture buffer to another.
- * This typically happens when our initial guess at the total texture size
- * is incorrect (see the guess_and_alloc_texture() function).
- */
-void
-st_texture_image_copy(struct pipe_context *pipe,
-                      struct pipe_resource *dst, GLuint dstLevel,
-                      struct pipe_resource *src, GLuint srcLevel,
-                      GLuint face)
-{
-   GLuint width = u_minify(dst->width0, dstLevel);
-   GLuint height = u_minify(dst->height0, dstLevel);
-   GLuint depth = u_minify(dst->depth0, dstLevel);
-   struct pipe_box src_box;
-   GLuint i;
-
-   assert(u_minify(src->width0, srcLevel) == width);
-   assert(u_minify(src->height0, srcLevel) == height);
-   assert(u_minify(src->depth0, srcLevel) == depth);
-
-   src_box.x = 0;
-   src_box.y = 0;
-   src_box.width = width;
-   src_box.height = height;
-   src_box.depth = 1;
-   /* Loop over 3D image slices */
-   /* could (and probably should) use "true" 3d box here -
-      but drivers can't quite handle it yet */
-   for (i = face; i < face + depth; i++) {
-      src_box.z = i;
-
-      if (0)  {
-         print_center_pixel(pipe, src);
-      }
-
-      pipe->resource_copy_region(pipe,
-                                 dst,
-                                 dstLevel,
-                                 0, 0, i,/* destX, Y, Z */
-                                 src,
-                                 srcLevel,
-                                 &src_box);
-   }
-}
-
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "st_context.h"
+#include "st_format.h"
+#include "st_texture.h"
+#include "st_cb_fbo.h"
+#include "main/enums.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_rect.h"
+#include "util/u_math.h"
+
+
+#define DBG if(0) printf
+
+
+/**
+ * Allocate a new pipe_resource object
+ * width0, height0, depth0 are the dimensions of the level 0 image
+ * (the highest resolution).  last_level indicates how many mipmap levels
+ * to allocate storage for.  For non-mipmapped textures, this will be zero.
+ */
+struct pipe_resource *
+st_texture_create(struct st_context *st,
+                  enum pipe_texture_target target,
+		  enum pipe_format format,
+		  GLuint last_level,
+		  GLuint width0,
+		  GLuint height0,
+		  GLuint depth0,
+                  GLuint layers,
+                  GLuint bind )
+{
+   struct pipe_resource pt, *newtex;
+   struct pipe_screen *screen = st->pipe->screen;
+
+   assert(target < PIPE_MAX_TEXTURE_TYPES);
+   assert(width0 > 0);
+   assert(height0 > 0);
+   assert(depth0 > 0);
+   if (target == PIPE_TEXTURE_CUBE)
+      assert(layers == 6);
+
+   DBG("%s target %s format %s last_level %d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       _mesa_lookup_enum_by_nr(format), last_level);
+
+   assert(format);
+   assert(screen->is_format_supported(screen, format, target, 0,
+                                      PIPE_BIND_SAMPLER_VIEW));
+
+   memset(&pt, 0, sizeof(pt));
+   pt.target = target;
+   pt.format = format;
+   pt.last_level = last_level;
+   pt.width0 = width0;
+   pt.height0 = height0;
+   pt.depth0 = depth0;
+   pt.array_size = (target == PIPE_TEXTURE_CUBE ? 6 : layers);
+   pt.usage = PIPE_USAGE_DEFAULT;
+   pt.bind = bind;
+   pt.flags = 0;
+
+   newtex = screen->resource_create(screen, &pt);
+
+   assert(!newtex || pipe_is_referenced(&newtex->reference));
+
+   return newtex;
+}
+
+
+/**
+ * In OpenGL the number of 1D array texture layers is the "height" and
+ * the number of 2D array texture layers is the "depth".  In Gallium the
+ * number of layers in an array texture is a separate 'array_size' field.
+ * This function converts dimensions from the former to the later.
+ */
+void
+st_gl_texture_dims_to_pipe_dims(GLenum texture,
+                                GLuint widthIn,
+                                GLuint heightIn,
+                                GLuint depthIn,
+                                GLuint *widthOut,
+                                GLuint *heightOut,
+                                GLuint *depthOut,
+                                GLuint *layersOut)
+{
+   switch (texture) {
+   case GL_TEXTURE_1D:
+      assert(heightIn == 1);
+      assert(depthIn == 1);
+      *widthOut = widthIn;
+      *heightOut = 1;
+      *depthOut = 1;
+      *layersOut = 1;
+      break;
+   case GL_TEXTURE_1D_ARRAY:
+      assert(depthIn == 1);
+      *widthOut = widthIn;
+      *heightOut = 1;
+      *depthOut = 1;
+      *layersOut = heightIn;
+      break;
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE:
+      assert(depthIn == 1);
+      *widthOut = widthIn;
+      *heightOut = heightIn;
+      *depthOut = 1;
+      *layersOut = 1;
+      break;
+   case GL_TEXTURE_CUBE_MAP:
+      assert(depthIn == 1);
+      *widthOut = widthIn;
+      *heightOut = heightIn;
+      *depthOut = 1;
+      *layersOut = 6;
+      break;
+   case GL_TEXTURE_2D_ARRAY:
+      *widthOut = widthIn;
+      *heightOut = heightIn;
+      *depthOut = 1;
+      *layersOut = depthIn;
+      break;
+   default:
+      assert(0 && "Unexpected texture in st_gl_texture_dims_to_pipe_dims()");
+      /* fall-through */
+   case GL_TEXTURE_3D:
+      *widthOut = widthIn;
+      *heightOut = heightIn;
+      *depthOut = depthIn;
+      *layersOut = 1;
+      break;
+   }
+}
+
+
+/**
+ * Check if a texture image can be pulled into a unified mipmap texture.
+ */
+GLboolean
+st_texture_match_image(const struct pipe_resource *pt,
+                       const struct gl_texture_image *image,
+                       GLuint face, GLuint level)
+{
+   GLuint ptWidth, ptHeight, ptDepth, ptLayers;
+
+   /* Images with borders are never pulled into mipmap textures. 
+    */
+   if (image->Border) 
+      return GL_FALSE;
+
+   /* Check if this image's format matches the established texture's format.
+    */
+   if (st_mesa_format_to_pipe_format(image->TexFormat) != pt->format)
+      return GL_FALSE;
+
+   st_gl_texture_dims_to_pipe_dims(image->TexObject->Target,
+                                   image->Width, image->Height, image->Depth,
+                                   &ptWidth, &ptHeight, &ptDepth, &ptLayers);
+
+   /* Test if this image's size matches what's expected in the
+    * established texture.
+    */
+   if (ptWidth != u_minify(pt->width0, level) ||
+       ptHeight != u_minify(pt->height0, level) ||
+       ptDepth != u_minify(pt->depth0, level) ||
+       ptLayers != pt->array_size)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Map a texture image and return the address for a particular 2D face/slice/
+ * layer.  The stImage indicates the cube face and mipmap level.  The slice
+ * of the 3D texture is passed in 'zoffset'.
+ * \param usage  one of the PIPE_TRANSFER_x values
+ * \param x, y, w, h  the region of interest of the 2D image.
+ * \return address of mapping or NULL if any error
+ */
+GLubyte *
+st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
+                     GLuint zoffset, enum pipe_transfer_usage usage,
+                     GLuint x, GLuint y, GLuint w, GLuint h)
+{
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt = stImage->pt;
+
+   DBG("%s \n", __FUNCTION__);
+
+   stImage->transfer = pipe_get_transfer(st->pipe, pt, stImage->level,
+                                         stImage->face + zoffset,
+                                         usage, x, y, w, h);
+
+   if (stImage->transfer)
+      return pipe_transfer_map(pipe, stImage->transfer);
+   else
+      return NULL;
+}
+
+
+void
+st_texture_image_unmap(struct st_context *st,
+                       struct st_texture_image *stImage)
+{
+   struct pipe_context *pipe = st->pipe;
+
+   DBG("%s\n", __FUNCTION__);
+
+   pipe_transfer_unmap(pipe, stImage->transfer);
+
+   pipe->transfer_destroy(pipe, stImage->transfer);
+}
+
+
+
+/**
+ * Upload data to a rectangular sub-region.  Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+static void
+st_surface_data(struct pipe_context *pipe,
+		struct pipe_transfer *dst,
+		unsigned dstx, unsigned dsty,
+		const void *src, unsigned src_stride,
+		unsigned srcx, unsigned srcy, unsigned width, unsigned height)
+{
+   void *map = pipe_transfer_map(pipe, dst);
+
+   assert(dst->resource);
+   util_copy_rect(map,
+                  dst->resource->format,
+                  dst->stride,
+                  dstx, dsty, 
+                  width, height, 
+                  src, src_stride, 
+                  srcx, srcy);
+
+   pipe_transfer_unmap(pipe, dst);
+}
+
+
+/* Upload data for a particular image.
+ */
+void
+st_texture_image_data(struct st_context *st,
+                      struct pipe_resource *dst,
+                      GLuint face,
+                      GLuint level,
+                      void *src,
+                      GLuint src_row_stride, GLuint src_image_stride)
+{
+   struct pipe_context *pipe = st->pipe;
+   GLuint i;
+   const GLubyte *srcUB = src;
+   struct pipe_transfer *dst_transfer;
+   GLuint layers;
+
+   if (dst->target == PIPE_TEXTURE_1D_ARRAY ||
+       dst->target == PIPE_TEXTURE_2D_ARRAY)
+      layers = dst->array_size;
+   else
+      layers = u_minify(dst->depth0, level);
+
+   DBG("%s\n", __FUNCTION__);
+
+   for (i = 0; i < layers; i++) {
+      dst_transfer = pipe_get_transfer(st->pipe, dst, level, face + i,
+                                       PIPE_TRANSFER_WRITE, 0, 0,
+                                       u_minify(dst->width0, level),
+                                       u_minify(dst->height0, level));
+
+      st_surface_data(pipe, dst_transfer,
+		      0, 0,                             /* dstx, dsty */
+		      srcUB,
+		      src_row_stride,
+		      0, 0,                             /* source x, y */
+		      u_minify(dst->width0, level),
+                      u_minify(dst->height0, level));    /* width, height */
+
+      pipe->transfer_destroy(pipe, dst_transfer);
+
+      srcUB += src_image_stride;
+   }
+}
+
+
+/**
+ * For debug only: get/print center pixel in the src resource.
+ */
+static void
+print_center_pixel(struct pipe_context *pipe, struct pipe_resource *src)
+{
+   struct pipe_transfer *xfer;
+   struct pipe_box region;
+   ubyte *map;
+
+   region.x = src->width0 / 2;
+   region.y = src->height0 / 2;
+   region.z = 0;
+   region.width = 1;
+   region.height = 1;
+   region.depth = 1;
+
+   xfer = pipe->get_transfer(pipe, src, 0, PIPE_TRANSFER_READ, &region);
+   map = pipe->transfer_map(pipe, xfer);
+
+   printf("center pixel: %d %d %d %d\n", map[0], map[1], map[2], map[3]);
+
+   pipe->transfer_unmap(pipe, xfer);
+   pipe->transfer_destroy(pipe, xfer);
+}
+
+
+/**
+ * Copy the image at level=0 in 'src' to the 'dst' resource at 'dstLevel'.
+ * This is used to copy mipmap images from one texture buffer to another.
+ * This typically happens when our initial guess at the total texture size
+ * is incorrect (see the guess_and_alloc_texture() function).
+ */
+void
+st_texture_image_copy(struct pipe_context *pipe,
+                      struct pipe_resource *dst, GLuint dstLevel,
+                      struct pipe_resource *src, GLuint srcLevel,
+                      GLuint face)
+{
+   GLuint width = u_minify(dst->width0, dstLevel);
+   GLuint height = u_minify(dst->height0, dstLevel);
+   GLuint depth = u_minify(dst->depth0, dstLevel);
+   struct pipe_box src_box;
+   GLuint i;
+
+   assert(u_minify(src->width0, srcLevel) == width);
+   assert(u_minify(src->height0, srcLevel) == height);
+   assert(u_minify(src->depth0, srcLevel) == depth);
+
+   src_box.x = 0;
+   src_box.y = 0;
+   src_box.width = width;
+   src_box.height = height;
+   src_box.depth = 1;
+   /* Loop over 3D image slices */
+   /* could (and probably should) use "true" 3d box here -
+      but drivers can't quite handle it yet */
+   for (i = face; i < face + depth; i++) {
+      src_box.z = i;
+
+      if (0)  {
+         print_center_pixel(pipe, src);
+      }
+
+      pipe->resource_copy_region(pipe,
+                                 dst,
+                                 dstLevel,
+                                 0, 0, i,/* destX, Y, Z */
+                                 src,
+                                 srcLevel,
+                                 &src_box);
+   }
+}
+
+
+struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx)
+{
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_resource *pt;
+   enum pipe_format format;
+   const uint texSize = 256; /* simple, and usually perfect */
+
+   /* find an RGBA texture format */
+   format = st_choose_format(pipe->screen, GL_RGBA, GL_NONE, GL_NONE,
+                             PIPE_TEXTURE_2D, 0, PIPE_BIND_SAMPLER_VIEW);
+
+   /* create texture for color map/table */
+   pt = st_texture_create(st, PIPE_TEXTURE_2D, format, 0,
+                          texSize, texSize, 1, 1, PIPE_BIND_SAMPLER_VIEW);
+   return pt;
+}
+
diff --git a/mesalib/src/mesa/state_tracker/st_texture.h b/mesalib/src/mesa/state_tracker/st_texture.h
index d50c3c9af..b822f47cf 100644
--- a/mesalib/src/mesa/state_tracker/st_texture.h
+++ b/mesalib/src/mesa/state_tracker/st_texture.h
@@ -232,4 +232,8 @@ st_texture_image_copy(struct pipe_context *pipe,
                       struct pipe_resource *src, GLuint srcLevel,
                       GLuint face);
 
+
+extern struct pipe_resource *
+st_create_color_map_texture(struct gl_context *ctx);
+
 #endif
diff --git a/mesalib/src/mesa/vbo/vbo_exec_array.c b/mesalib/src/mesa/vbo/vbo_exec_array.c
index b908d5aea..32ce0e4a8 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_array.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_array.c
@@ -909,11 +909,10 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
       if (0)
          _mesa_print_arrays(ctx);
 
-#ifdef DEBUG
       /* 'end' was out of bounds, but now let's check the actual array
        * indexes to see if any of them are out of bounds.
        */
-      {
+      if (0) {
          GLuint max = _mesa_max_buffer_index(ctx, count, type, indices,
                                              ctx->Array.ElementArrayBufferObj);
          if (max >= ctx->Array.ArrayObj->_MaxElement) {
@@ -934,7 +933,6 @@ vbo_exec_DrawRangeElementsBaseVertex(GLenum mode,
           * upper bound wrong.
           */
       }
-#endif
 
       /* Set 'end' to the max possible legal value */
       assert(ctx->Array.ArrayObj->_MaxElement >= 1);
-- 
cgit v1.2.3