diff options
| author | marha <marha@users.sourceforge.net> | 2011-03-13 20:15:26 +0000 | 
|---|---|---|
| committer | marha <marha@users.sourceforge.net> | 2011-03-13 20:15:26 +0000 | 
| commit | b5d1fd89898edb34f73679b542c754d837d44cf8 (patch) | |
| tree | b3d14f22d0c5fd984f5ec1ed71ad5263a46e1583 /mesalib/src/mesa | |
| parent | 77ec02adbc8f9657e7749b307d3cc86ccbd163ea (diff) | |
| download | vcxsrv-b5d1fd89898edb34f73679b542c754d837d44cf8.tar.gz vcxsrv-b5d1fd89898edb34f73679b542c754d837d44cf8.tar.bz2 vcxsrv-b5d1fd89898edb34f73679b542c754d837d44cf8.zip | |
xkeyboard-config libxcb pixman mesalib git update 13 Mar 2011
Diffstat (limited to 'mesalib/src/mesa')
| -rw-r--r-- | mesalib/src/mesa/main/ff_fragment_shader.cpp | 1435 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/mtypes.h | 1 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/state.c | 19 | ||||
| -rw-r--r-- | mesalib/src/mesa/main/texenvprogram.h | 2 | ||||
| -rw-r--r-- | mesalib/src/mesa/program/program.c | 2 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_clear.c | 1180 | ||||
| -rw-r--r-- | mesalib/src/mesa/state_tracker/st_cb_texture.c | 2 | 
7 files changed, 1386 insertions, 1255 deletions
| diff --git a/mesalib/src/mesa/main/ff_fragment_shader.cpp b/mesalib/src/mesa/main/ff_fragment_shader.cpp index ed513397a..0bc534df5 100644 --- a/mesalib/src/mesa/main/ff_fragment_shader.cpp +++ b/mesalib/src/mesa/main/ff_fragment_shader.cpp @@ -3,7 +3,6 @@   * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   * All Rights Reserved.
   * Copyright 2009 VMware, Inc.  All Rights Reserved.
 - * Copyright © 2010 Intel Corporation
   * 
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the
 @@ -31,8 +30,6 @@ extern "C" {  #include "glheader.h"
  #include "imports.h"
  #include "mtypes.h"
 -#include "main/uniforms.h"
 -#include "main/macros.h"
  #include "program/program.h"
  #include "program/prog_parameter.h"
  #include "program/prog_cache.h"
 @@ -42,13 +39,6 @@ extern "C" {  #include "program/programopt.h"
  #include "texenvprogram.h"
  }
 -#include "../glsl/glsl_types.h"
 -#include "../glsl/ir.h"
 -#include "../glsl/glsl_symbol_table.h"
 -#include "../glsl/glsl_parser_extras.h"
 -#include "../glsl/ir_optimization.h"
 -#include "../glsl/ir_print_visitor.h"
 -#include "../program/ir_to_mesa.h"
  /*
   * Note on texture units:
 @@ -69,7 +59,7 @@ struct texenvprog_cache_item  {
     GLuint hash;
     void *key;
 -   struct gl_shader_program *data;
 +   struct gl_fragment_program *data;
     struct texenvprog_cache_item *next;
  };
 @@ -86,6 +76,13 @@ texenv_doing_secondary_color(struct gl_context *ctx)     return GL_FALSE;
  }
 +/**
 + * Up to nine instructions per tex unit, plus fog, specular color.
 + */
 +#define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12)
 +
 +#define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM)
 +
  struct mode_opt {
  #ifdef __GNUC__
     __extension__ GLubyte Source:4;  /**< SRC_x */
 @@ -119,6 +116,8 @@ struct state_key {        GLuint NumArgsA:3;  /**< up to MAX_COMBINER_TERMS */
        GLuint ModeA:5;     /**< MODE_x */
 +      GLuint texture_cyl_wrap:1; /**< For gallium test/debug only */
 +
        struct mode_opt OptRGB[MAX_COMBINER_TERMS];
        struct mode_opt OptA[MAX_COMBINER_TERMS];
     } unit[MAX_TEXTURE_UNITS];
 @@ -471,6 +470,10 @@ static GLuint make_state_key( struct gl_context *ctx,  struct state_key *key )           key->unit[i].OptRGB[1].Operand = OPR_SRC_COLOR;
           key->unit[i].OptRGB[1].Source = texUnit->BumpTarget - GL_TEXTURE0 + SRC_TEXTURE0;
         }
 +
 +      /* this is a back-door for enabling cylindrical texture wrap mode */
 +      if (texObj->Priority == 0.125)
 +         key->unit[i].texture_cyl_wrap = 1;
     }
     /* _NEW_LIGHT | _NEW_FOG */
 @@ -499,15 +502,40 @@ static GLuint make_state_key( struct gl_context *ctx,  struct state_key *key )  }
 +/**
 + * Use uregs to represent registers internally, translate to Mesa's
 + * expected formats on emit.  
 + *
 + * NOTE: These are passed by value extensively in this file rather
 + * than as usual by pointer reference.  If this disturbs you, try
 + * remembering they are just 32bits in size.
 + *
 + * GCC is smart enough to deal with these dword-sized structures in
 + * much the same way as if I had defined them as dwords and was using
 + * macros to access and set the fields.  This is much nicer and easier
 + * to evolve.
 + */
 +struct ureg {
 +   GLuint file:4;
 +   GLuint idx:8;
 +   GLuint negatebase:1;
 +   GLuint swz:12;
 +   GLuint pad:7;
 +};
 +
 +static const struct ureg undef = { 
 +   PROGRAM_UNDEFINED,
 +   255,
 +   0,
 +   0,
 +   0
 +};
 +
 +
  /** State used to build the fragment program:
   */
  struct texenv_fragment_program {
 -   struct gl_shader_program *shader_program;
 -   struct gl_shader *shader;
     struct gl_fragment_program *program;
 -   exec_list *instructions;
 -   exec_list *top_instructions;
 -   void *mem_ctx;
     struct state_key *state;
     GLbitfield alu_temps;	/**< Track texture indirections, see spec. */
 @@ -515,35 +543,385 @@ struct texenv_fragment_program {     GLbitfield temp_in_use;	/**< Tracks temporary regs which are in use. */
     GLboolean error;
 -   ir_variable *src_texture[MAX_TEXTURE_COORD_UNITS];
 +   struct ureg src_texture[MAX_TEXTURE_COORD_UNITS];   
     /* Reg containing each texture unit's sampled texture color,
      * else undef.
      */
 -   /* Texcoord override from bumpmapping. */
 -   struct ir_variable *texcoord_tex[MAX_TEXTURE_COORD_UNITS];
 -
 +   struct ureg texcoord_tex[MAX_TEXTURE_COORD_UNITS];
     /* Reg containing texcoord for a texture unit,
      * needed for bump mapping, else undef.
      */
 -   ir_rvalue *src_previous;	/**< Reg containing color from previous
 +   struct ureg src_previous;	/**< Reg containing color from previous 
  				 * stage.  May need to be decl'd.
  				 */
     GLuint last_tex_stage;	/**< Number of last enabled texture unit */
 +
 +   struct ureg half;
 +   struct ureg one;
 +   struct ureg zero;
  };
 -static ir_rvalue *
 -get_source(struct texenv_fragment_program *p,
 -	   GLuint src, GLuint unit)
 +
 +
 +static struct ureg make_ureg(GLuint file, GLuint idx)
 +{
 +   struct ureg reg;
 +   reg.file = file;
 +   reg.idx = idx;
 +   reg.negatebase = 0;
 +   reg.swz = SWIZZLE_NOOP;
 +   reg.pad = 0;
 +   return reg;
 +}
 +
 +static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
 +{
 +   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
 +			   GET_SWZ(reg.swz, y),
 +			   GET_SWZ(reg.swz, z),
 +			   GET_SWZ(reg.swz, w));
 +
 +   return reg;
 +}
 +
 +static struct ureg swizzle1( struct ureg reg, int x )
 +{
 +   return swizzle(reg, x, x, x, x);
 +}
 +
 +static struct ureg negate( struct ureg reg )
 +{
 +   reg.negatebase ^= 1;
 +   return reg;
 +}
 +
 +static GLboolean is_undef( struct ureg reg )
 +{
 +   return reg.file == PROGRAM_UNDEFINED;
 +}
 +
 +
 +static struct ureg get_temp( struct texenv_fragment_program *p )
 +{
 +   GLint bit;
 +   
 +   /* First try and reuse temps which have been used already:
 +    */
 +   bit = _mesa_ffs( ~p->temp_in_use & p->alu_temps );
 +
 +   /* Then any unused temporary:
 +    */
 +   if (!bit)
 +      bit = _mesa_ffs( ~p->temp_in_use );
 +
 +   if (!bit) {
 +      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 +      exit(1);
 +   }
 +
 +   if ((GLuint) bit > p->program->Base.NumTemporaries)
 +      p->program->Base.NumTemporaries = bit;
 +
 +   p->temp_in_use |= 1<<(bit-1);
 +   return make_ureg(PROGRAM_TEMPORARY, (bit-1));
 +}
 +
 +static struct ureg get_tex_temp( struct texenv_fragment_program *p )
 +{
 +   int bit;
 +   
 +   /* First try to find available temp not previously used (to avoid
 +    * starting a new texture indirection).  According to the spec, the
 +    * ~p->temps_output isn't necessary, but will keep it there for
 +    * now:
 +    */
 +   bit = _mesa_ffs( ~p->temp_in_use & ~p->alu_temps & ~p->temps_output );
 +
 +   /* Then any unused temporary:
 +    */
 +   if (!bit) 
 +      bit = _mesa_ffs( ~p->temp_in_use );
 +
 +   if (!bit) {
 +      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
 +      exit(1);
 +   }
 +
 +   if ((GLuint) bit > p->program->Base.NumTemporaries)
 +      p->program->Base.NumTemporaries = bit;
 +
 +   p->temp_in_use |= 1<<(bit-1);
 +   return make_ureg(PROGRAM_TEMPORARY, (bit-1));
 +}
 +
 +
 +/** Mark a temp reg as being no longer allocatable. */
 +static void reserve_temp( struct texenv_fragment_program *p, struct ureg r )
 +{
 +   if (r.file == PROGRAM_TEMPORARY)
 +      p->temps_output |= (1 << r.idx);
 +}
 +
 +
 +static void release_temps(struct gl_context *ctx, struct texenv_fragment_program *p )
 +{
 +   GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps;
 +
 +   /* KW: To support tex_env_crossbar, don't release the registers in
 +    * temps_output.
 +    */
 +   if (max_temp >= sizeof(int) * 8)
 +      p->temp_in_use = p->temps_output;
 +   else
 +      p->temp_in_use = ~((1<<max_temp)-1) | p->temps_output;
 +}
 +
 +
 +static struct ureg register_param5( struct texenv_fragment_program *p, 
 +				    GLint s0,
 +				    GLint s1,
 +				    GLint s2,
 +				    GLint s3,
 +				    GLint s4)
 +{
 +   int tokens[STATE_LENGTH];
 +   GLuint idx;
 +   tokens[0] = s0;
 +   tokens[1] = s1;
 +   tokens[2] = s2;
 +   tokens[3] = s3;
 +   tokens[4] = s4;
 +   idx = _mesa_add_state_reference(p->program->Base.Parameters,
 +				   (gl_state_index *)tokens);
 +   return make_ureg(PROGRAM_STATE_VAR, idx);
 +}
 +
 +
 +#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
 +#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
 +#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
 +#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
 +
 +static GLuint frag_to_vert_attrib( GLuint attrib )
 +{
 +   switch (attrib) {
 +   case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
 +   case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
 +   default:
 +      assert(attrib >= FRAG_ATTRIB_TEX0);
 +      assert(attrib <= FRAG_ATTRIB_TEX7);
 +      return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
 +   }
 +}
 +
 +
 +static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
 +{
 +   if (p->state->inputs_available & (1<<input)) {
 +      p->program->Base.InputsRead |= (1 << input);
 +      return make_ureg(PROGRAM_INPUT, input);
 +   }
 +   else {
 +      GLuint idx = frag_to_vert_attrib( input );
 +      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
 +   }
 +}
 +
 +
 +static void emit_arg( struct prog_src_register *reg,
 +		      struct ureg ureg )
 +{
 +   reg->File = ureg.file;
 +   reg->Index = ureg.idx;
 +   reg->Swizzle = ureg.swz;
 +   reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE;
 +   reg->Abs = GL_FALSE;
 +}
 +
 +static void emit_dst( struct prog_dst_register *dst,
 +		      struct ureg ureg, GLuint mask )
 +{
 +   dst->File = ureg.file;
 +   dst->Index = ureg.idx;
 +   dst->WriteMask = mask;
 +   dst->CondMask = COND_TR;  /* always pass cond test */
 +   dst->CondSwizzle = SWIZZLE_NOOP;
 +}
 +
 +static struct prog_instruction *
 +emit_op(struct texenv_fragment_program *p,
 +	enum prog_opcode op,
 +	struct ureg dest,
 +	GLuint mask,
 +	GLboolean saturate,
 +	struct ureg src0,
 +	struct ureg src1,
 +	struct ureg src2 )
 +{
 +   const GLuint nr = p->program->Base.NumInstructions++;
 +   struct prog_instruction *inst = &p->program->Base.Instructions[nr];
 +
 +   assert(nr < MAX_INSTRUCTIONS);
 +
 +   _mesa_init_instructions(inst, 1);
 +   inst->Opcode = op;
 +   
 +   emit_arg( &inst->SrcReg[0], src0 );
 +   emit_arg( &inst->SrcReg[1], src1 );
 +   emit_arg( &inst->SrcReg[2], src2 );
 +   
 +   inst->SaturateMode = saturate ? SATURATE_ZERO_ONE : SATURATE_OFF;
 +
 +   emit_dst( &inst->DstReg, dest, mask );
 +
 +#if 0
 +   /* Accounting for indirection tracking:
 +    */
 +   if (dest.file == PROGRAM_TEMPORARY)
 +      p->temps_output |= 1 << dest.idx;
 +#endif
 +
 +   return inst;
 +}
 +   
 +
 +static struct ureg emit_arith( struct texenv_fragment_program *p,
 +			       enum prog_opcode op,
 +			       struct ureg dest,
 +			       GLuint mask,
 +			       GLboolean saturate,
 +			       struct ureg src0,
 +			       struct ureg src1,
 +			       struct ureg src2 )
 +{
 +   emit_op(p, op, dest, mask, saturate, src0, src1, src2);
 +   
 +   /* Accounting for indirection tracking:
 +    */
 +   if (src0.file == PROGRAM_TEMPORARY)
 +      p->alu_temps |= 1 << src0.idx;
 +
 +   if (!is_undef(src1) && src1.file == PROGRAM_TEMPORARY)
 +      p->alu_temps |= 1 << src1.idx;
 +
 +   if (!is_undef(src2) && src2.file == PROGRAM_TEMPORARY)
 +      p->alu_temps |= 1 << src2.idx;
 +
 +   if (dest.file == PROGRAM_TEMPORARY)
 +      p->alu_temps |= 1 << dest.idx;
 +       
 +   p->program->Base.NumAluInstructions++;
 +   return dest;
 +}
 +
 +static struct ureg emit_texld( struct texenv_fragment_program *p,
 +			       enum prog_opcode op,
 +			       struct ureg dest,
 +			       GLuint destmask,
 +			       GLuint tex_unit,
 +			       GLuint tex_idx,
 +                               GLuint tex_shadow,
 +			       struct ureg coord )
 +{
 +   struct prog_instruction *inst = emit_op( p, op, 
 +					  dest, destmask, 
 +					  GL_FALSE,	/* don't saturate? */
 +					  coord, 	/* arg 0? */
 +					  undef,
 +					  undef);
 +   
 +   inst->TexSrcTarget = tex_idx;
 +   inst->TexSrcUnit = tex_unit;
 +   inst->TexShadow = tex_shadow;
 +
 +   p->program->Base.NumTexInstructions++;
 +
 +   /* Accounting for indirection tracking:
 +    */
 +   reserve_temp(p, dest);
 +
 +#if 0
 +   /* Is this a texture indirection?
 +    */
 +   if ((coord.file == PROGRAM_TEMPORARY &&
 +	(p->temps_output & (1<<coord.idx))) ||
 +       (dest.file == PROGRAM_TEMPORARY &&
 +	(p->alu_temps & (1<<dest.idx)))) {
 +      p->program->Base.NumTexIndirections++;
 +      p->temps_output = 1<<coord.idx;
 +      p->alu_temps = 0;
 +      assert(0);		/* KW: texture env crossbar */
 +   }
 +#endif
 +
 +   return dest;
 +}
 +
 +
 +static struct ureg register_const4f( struct texenv_fragment_program *p, 
 +				     GLfloat s0,
 +				     GLfloat s1,
 +				     GLfloat s2,
 +				     GLfloat s3)
 +{
 +   GLfloat values[4];
 +   GLuint idx, swizzle;
 +   struct ureg r;
 +   values[0] = s0;
 +   values[1] = s1;
 +   values[2] = s2;
 +   values[3] = s3;
 +   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
 +                                     &swizzle );
 +   r = make_ureg(PROGRAM_CONSTANT, idx);
 +   r.swz = swizzle;
 +   return r;
 +}
 +
 +#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
 +#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
 +#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
 +#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
 +
 +
 +static struct ureg get_one( struct texenv_fragment_program *p )
 +{
 +   if (is_undef(p->one)) 
 +      p->one = register_scalar_const(p, 1.0);
 +   return p->one;
 +}
 +
 +static struct ureg get_half( struct texenv_fragment_program *p )
  {
 -   ir_variable *var;
 -   ir_dereference *deref;
 +   if (is_undef(p->half)) 
 +      p->half = register_scalar_const(p, 0.5);
 +   return p->half;
 +}
 +
 +static struct ureg get_zero( struct texenv_fragment_program *p )
 +{
 +   if (is_undef(p->zero)) 
 +      p->zero = register_scalar_const(p, 0.0);
 +   return p->zero;
 +}
 +
 +
 +static void program_error( struct texenv_fragment_program *p, const char *msg )
 +{
 +   _mesa_problem(NULL, "%s", msg);
 +   p->error = 1;
 +}
 +static struct ureg get_source( struct texenv_fragment_program *p, 
 +			       GLuint src, GLuint unit )
 +{
     switch (src) {
     case SRC_TEXTURE: 
 -      return new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]);
 +      assert(!is_undef(p->src_texture[unit]));
 +      return p->src_texture[unit];
     case SRC_TEXTURE0:
     case SRC_TEXTURE1:
 @@ -553,69 +931,66 @@ get_source(struct texenv_fragment_program *p,     case SRC_TEXTURE5:
     case SRC_TEXTURE6:
     case SRC_TEXTURE7: 
 -      return new(p->mem_ctx)
 -	 ir_dereference_variable(p->src_texture[src - SRC_TEXTURE0]);
 +      assert(!is_undef(p->src_texture[src - SRC_TEXTURE0]));
 +      return p->src_texture[src - SRC_TEXTURE0];
     case SRC_CONSTANT:
 -      var = p->shader->symbols->get_variable("gl_TextureEnvColor");
 -      assert(var);
 -      deref = new(p->mem_ctx) ir_dereference_variable(var);
 -      var->max_array_access = MAX2(var->max_array_access, unit);
 -      return new(p->mem_ctx) ir_dereference_array(deref,
 -						  new(p->mem_ctx) ir_constant(unit));
 +      return register_param2(p, STATE_TEXENV_COLOR, unit);
     case SRC_PRIMARY_COLOR:
 -      var = p->shader->symbols->get_variable("gl_Color");
 -      assert(var);
 -      return new(p->mem_ctx) ir_dereference_variable(var);
 +      return register_input(p, FRAG_ATTRIB_COL0);
     case SRC_ZERO:
 -      return new(p->mem_ctx) ir_constant(0.0f);
 +      return get_zero(p);
     case SRC_PREVIOUS:
 -      if (!p->src_previous) {
 -	 var = p->shader->symbols->get_variable("gl_Color");
 -	 assert(var);
 -	 return new(p->mem_ctx) ir_dereference_variable(var);
 -      } else {
 -	 return p->src_previous->clone(p->mem_ctx, NULL);
 -      }
 +      if (is_undef(p->src_previous))
 +	 return register_input(p, FRAG_ATTRIB_COL0);
 +      else
 +	 return p->src_previous;
     default:
        assert(0);
 -      return NULL;
 +      return undef;
     }
  }
 -static ir_rvalue *
 -emit_combine_source(struct texenv_fragment_program *p,
 -		    GLuint unit,
 -		    GLuint source,
 -		    GLuint operand)
 +static struct ureg emit_combine_source( struct texenv_fragment_program *p, 
 +					GLuint mask,
 +					GLuint unit,
 +					GLuint source, 
 +					GLuint operand )
  {
 -   ir_rvalue *src;
 +   struct ureg arg, src, one;
     src = get_source(p, source, unit);
     switch (operand) {
     case OPR_ONE_MINUS_SRC_COLOR: 
 -      return new(p->mem_ctx) ir_expression(ir_binop_sub,
 -					   new(p->mem_ctx) ir_constant(1.0f),
 -					   src);
 +      /* Get unused tmp,
 +       * Emit tmp = 1.0 - arg.xyzw
 +       */
 +      arg = get_temp( p );
 +      one = get_one( p );
 +      return emit_arith( p, OPCODE_SUB, arg, mask, 0, one, src, undef);
     case OPR_SRC_ALPHA: 
 -      return new(p->mem_ctx) ir_swizzle(src, 3, 3, 3, 3, 1);
 -
 +      if (mask == WRITEMASK_W)
 +	 return src;
 +      else
 +	 return swizzle1( src, SWIZZLE_W );
     case OPR_ONE_MINUS_SRC_ALPHA: 
 -      return new(p->mem_ctx) ir_expression(ir_binop_sub,
 -					   new(p->mem_ctx) ir_constant(1.0f),
 -					   new(p->mem_ctx) ir_swizzle(src,
 -								      3, 3,
 -								      3, 3, 1));
 +      /* Get unused tmp,
 +       * Emit tmp = 1.0 - arg.wwww
 +       */
 +      arg = get_temp(p);
 +      one = get_one(p);
 +      return emit_arith(p, OPCODE_SUB, arg, mask, 0,
 +			one, swizzle1(src, SWIZZLE_W), undef);
     case OPR_ZERO:
 -      return new(p->mem_ctx) ir_constant(0.0f);
 +      return get_zero(p);
     case OPR_ONE:
 -      return new(p->mem_ctx) ir_constant(1.0f);
 +      return get_one(p);
     case OPR_SRC_COLOR: 
        return src;
     default:
 @@ -664,104 +1039,112 @@ static GLboolean args_match( const struct state_key *key, GLuint unit )     return GL_TRUE;
  }
 -static ir_rvalue *
 -smear(struct texenv_fragment_program *p, ir_rvalue *val)
 -{
 -   if (!val->type->is_scalar())
 -      return val;
 -
 -   return new(p->mem_ctx) ir_swizzle(val, 0, 0, 0, 0, 4);
 -}
 -
 -static ir_rvalue *
 -emit_combine(struct texenv_fragment_program *p,
 -	     GLuint unit,
 -	     GLuint nr,
 -	     GLuint mode,
 -	     const struct mode_opt *opt)
 +static struct ureg emit_combine( struct texenv_fragment_program *p,
 +				 struct ureg dest,
 +				 GLuint mask,
 +				 GLboolean saturate,
 +				 GLuint unit,
 +				 GLuint nr,
 +				 GLuint mode,
 +				 const struct mode_opt *opt)
  {
 -   ir_rvalue *src[MAX_COMBINER_TERMS];
 -   ir_rvalue *tmp0, *tmp1;
 +   struct ureg src[MAX_COMBINER_TERMS];
 +   struct ureg tmp, half;
     GLuint i;
     assert(nr <= MAX_COMBINER_TERMS);
     for (i = 0; i < nr; i++)
 -      src[i] = emit_combine_source( p, unit, opt[i].Source, opt[i].Operand );
 +      src[i] = emit_combine_source( p, mask, unit, opt[i].Source, opt[i].Operand );
     switch (mode) {
     case MODE_REPLACE: 
 -      return src[0];
 -
 +      if (mask == WRITEMASK_XYZW && !saturate)
 +	 return src[0];
 +      else
 +	 return emit_arith( p, OPCODE_MOV, dest, mask, saturate, src[0], undef, undef );
     case MODE_MODULATE: 
 -      return new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]);
 -
 +      return emit_arith( p, OPCODE_MUL, dest, mask, saturate,
 +			 src[0], src[1], undef );
     case MODE_ADD: 
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, src[0], src[1]);
 -
 +      return emit_arith( p, OPCODE_ADD, dest, mask, saturate, 
 +			 src[0], src[1], undef );
     case MODE_ADD_SIGNED:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, src[0], src[1]);
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0,
 -					   new(p->mem_ctx) ir_constant(-0.5f));
 -
 +      /* tmp = arg0 + arg1
 +       * result = tmp - .5
 +       */
 +      half = get_half(p);
 +      tmp = get_temp( p );
 +      emit_arith( p, OPCODE_ADD, tmp, mask, 0, src[0], src[1], undef );
 +      emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp, half, undef );
 +      return dest;
     case MODE_INTERPOLATE: 
 -      /* Arg0 * (Arg2) + Arg1 * (1-Arg2) */
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]);
 -
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_sub,
 -					   new(p->mem_ctx) ir_constant(1.0f),
 -					   src[2]->clone(p->mem_ctx, NULL));
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[1], tmp1);
 -
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1);
 +      /* Arg0 * (Arg2) + Arg1 * (1-Arg2) -- note arguments are reordered:
 +       */
 +      return emit_arith( p, OPCODE_LRP, dest, mask, saturate, src[2], src[0], src[1] );
     case MODE_SUBTRACT: 
 -      return new(p->mem_ctx) ir_expression(ir_binop_sub, src[0], src[1]);
 +      return emit_arith( p, OPCODE_SUB, dest, mask, saturate, src[0], src[1], undef );
     case MODE_DOT3_RGBA:
     case MODE_DOT3_RGBA_EXT: 
     case MODE_DOT3_RGB_EXT:
     case MODE_DOT3_RGB: {
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0],
 -					   new(p->mem_ctx) ir_constant(2.0f));
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0,
 -					   new(p->mem_ctx) ir_constant(-1.0f));
 -      tmp0 = new(p->mem_ctx) ir_swizzle(smear(p, tmp0), 0, 1, 2, 3, 3);
 -
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[1],
 -					   new(p->mem_ctx) ir_constant(2.0f));
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp1,
 -					   new(p->mem_ctx) ir_constant(-1.0f));
 -      tmp1 = new(p->mem_ctx) ir_swizzle(smear(p, tmp1), 0, 1, 2, 3, 3);
 -
 -      return new(p->mem_ctx) ir_expression(ir_binop_dot, tmp0, tmp1);
 -   }
 -   case MODE_MODULATE_ADD_ATI:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]);
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, src[1]);
 +      struct ureg tmp0 = get_temp( p );
 +      struct ureg tmp1 = get_temp( p );
 +      struct ureg neg1 = register_scalar_const(p, -1);
 +      struct ureg two  = register_scalar_const(p, 2);
 -   case MODE_MODULATE_SIGNED_ADD_ATI:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]);
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, src[1]);
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0,
 -					   new(p->mem_ctx) ir_constant(-0.5f));
 +      /* tmp0 = 2*src0 - 1
 +       * tmp1 = 2*src1 - 1
 +       *
 +       * dst = tmp0 dot3 tmp1 
 +       */
 +      emit_arith( p, OPCODE_MAD, tmp0, WRITEMASK_XYZW, 0, 
 +		  two, src[0], neg1);
 +      if (memcmp(&src[0], &src[1], sizeof(struct ureg)) == 0)
 +	 tmp1 = tmp0;
 +      else
 +	 emit_arith( p, OPCODE_MAD, tmp1, WRITEMASK_XYZW, 0, 
 +		     two, src[1], neg1);
 +      emit_arith( p, OPCODE_DP3, dest, mask, saturate, tmp0, tmp1, undef);
 +      return dest;
 +   }
 +   case MODE_MODULATE_ADD_ATI:
 +      /* Arg0 * Arg2 + Arg1 */
 +      return emit_arith( p, OPCODE_MAD, dest, mask, saturate,
 +			 src[0], src[2], src[1] );
 +   case MODE_MODULATE_SIGNED_ADD_ATI: {
 +      /* Arg0 * Arg2 + Arg1 - 0.5 */
 +      struct ureg tmp0 = get_temp(p);
 +      half = get_half(p);
 +      emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[0], src[2], src[1] );
 +      emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
 +      return dest;
 +   }
     case MODE_MODULATE_SUBTRACT_ATI:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[2]);
 -      return new(p->mem_ctx) ir_expression(ir_binop_sub, tmp0, src[1]);
 -
 +      /* Arg0 * Arg2 - Arg1 */
 +      emit_arith( p, OPCODE_MAD, dest, mask, 0, src[0], src[2], negate(src[1]) );
 +      return dest;
     case MODE_ADD_PRODUCTS:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]);
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[2], src[3]);
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1);
 -
 +      /* Arg0 * Arg1 + Arg2 * Arg3 */
 +      {
 +         struct ureg tmp0 = get_temp(p);
 +         emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
 +         emit_arith( p, OPCODE_MAD, dest, mask, saturate, src[2], src[3], tmp0 );
 +      }
 +      return dest;
     case MODE_ADD_PRODUCTS_SIGNED:
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[0], src[1]);
 -      tmp1 = new(p->mem_ctx) ir_expression(ir_binop_mul, src[2], src[3]);
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add, tmp0, tmp1);
 -      return new(p->mem_ctx) ir_expression(ir_binop_add, tmp0,
 -					   new(p->mem_ctx) ir_constant(-0.5f));
 -
 +      /* Arg0 * Arg1 + Arg2 * Arg3 - 0.5 */
 +      {
 +         struct ureg tmp0 = get_temp(p);
 +         half = get_half(p);
 +         emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
 +         emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[2], src[3], tmp0 );
 +         emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
 +      }
 +      return dest;
     case MODE_BUMP_ENVMAP_ATI:
        /* special - not handled here */
        assert(0);
 @@ -772,24 +1155,17 @@ emit_combine(struct texenv_fragment_program *p,     }
  }
 -static ir_rvalue *
 -saturate(struct texenv_fragment_program *p, ir_rvalue *val)
 -{
 -   val = new(p->mem_ctx) ir_expression(ir_binop_min, val,
 -				       new(p->mem_ctx) ir_constant(1.0f));
 -   return new(p->mem_ctx) ir_expression(ir_binop_max, val,
 -					new(p->mem_ctx) ir_constant(0.0f));
 -}
  /**
   * Generate instructions for one texture unit's env/combiner mode.
   */
 -static ir_rvalue *
 +static struct ureg
  emit_texenv(struct texenv_fragment_program *p, GLuint unit)
  {
     const struct state_key *key = p->state;
     GLboolean rgb_saturate, alpha_saturate;
     GLuint rgb_shift, alpha_shift;
 +   struct ureg out, dest;
     if (!key->unit[unit].enabled) {
        return get_source(p, SRC_PREVIOUS, 0);
 @@ -831,232 +1207,129 @@ emit_texenv(struct texenv_fragment_program *p, GLuint unit)     else
        alpha_saturate = GL_FALSE;
 -   ir_variable *temp_var = new(p->mem_ctx) ir_variable(glsl_type::vec4_type,
 -						       "texenv_combine",
 -						       ir_var_temporary);
 -   p->instructions->push_tail(temp_var);
 -
 -   ir_dereference *deref;
 -   ir_assignment *assign;
 -   ir_rvalue *val;
 +   /* If this is the very last calculation (and various other conditions
 +    * are met), emit directly to the color output register.  Otherwise,
 +    * emit to a temporary register.
 +    */
 +   if (key->separate_specular ||
 +       unit != p->last_tex_stage ||
 +       alpha_shift ||
 +       key->num_draw_buffers != 1 ||
 +       rgb_shift)
 +      dest = get_temp( p );
 +   else
 +      dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
     /* Emit the RGB and A combine ops
      */
     if (key->unit[unit].ModeRGB == key->unit[unit].ModeA &&
         args_match(key, unit)) {
 -      val = emit_combine(p, unit,
 -			 key->unit[unit].NumArgsRGB,
 -			 key->unit[unit].ModeRGB,
 -			 key->unit[unit].OptRGB);
 -      val = smear(p, val);
 -      if (rgb_saturate)
 -	 val = saturate(p, val);
 -
 -      deref = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      assign = new(p->mem_ctx) ir_assignment(deref, val, NULL);
 -      p->instructions->push_tail(assign);
 +      out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
 +			  unit,
 +			  key->unit[unit].NumArgsRGB,
 +			  key->unit[unit].ModeRGB,
 +			  key->unit[unit].OptRGB);
     }
     else if (key->unit[unit].ModeRGB == MODE_DOT3_RGBA_EXT ||
  	    key->unit[unit].ModeRGB == MODE_DOT3_RGBA) {
 -      ir_rvalue *val = emit_combine(p, unit,
 -				    key->unit[unit].NumArgsRGB,
 -				    key->unit[unit].ModeRGB,
 -				    key->unit[unit].OptRGB);
 -      val = smear(p, val);
 -      if (rgb_saturate)
 -	 val = saturate(p, val);
 -      deref = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      assign = new(p->mem_ctx) ir_assignment(deref, val, NULL);
 -      p->instructions->push_tail(assign);
 +      out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
 +			  unit,
 +			  key->unit[unit].NumArgsRGB,
 +			  key->unit[unit].ModeRGB,
 +			  key->unit[unit].OptRGB);
     }
     else {
        /* Need to do something to stop from re-emitting identical
         * argument calculations here:
         */
 -      val = emit_combine(p, unit,
 -			 key->unit[unit].NumArgsRGB,
 -			 key->unit[unit].ModeRGB,
 -			 key->unit[unit].OptRGB);
 -      val = smear(p, val);
 -      val = new(p->mem_ctx) ir_swizzle(val, 0, 1, 2, 3, 3);
 -      if (rgb_saturate)
 -	 val = saturate(p, val);
 -      deref = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      assign = new(p->mem_ctx) ir_assignment(deref, val, NULL, WRITEMASK_XYZ);
 -      p->instructions->push_tail(assign);
 -
 -      val = emit_combine(p, unit,
 -			 key->unit[unit].NumArgsA,
 -			 key->unit[unit].ModeA,
 -			 key->unit[unit].OptA);
 -      val = smear(p, val);
 -      val = new(p->mem_ctx) ir_swizzle(val, 3, 3, 3, 3, 1);
 -      if (alpha_saturate)
 -	 val = saturate(p, val);
 -      deref = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      assign = new(p->mem_ctx) ir_assignment(deref, val, NULL, WRITEMASK_W);
 -      p->instructions->push_tail(assign);
 +      out = emit_combine( p, dest, WRITEMASK_XYZ, rgb_saturate,
 +			  unit,
 +			  key->unit[unit].NumArgsRGB,
 +			  key->unit[unit].ModeRGB,
 +			  key->unit[unit].OptRGB);
 +      out = emit_combine( p, dest, WRITEMASK_W, alpha_saturate,
 +			  unit,
 +			  key->unit[unit].NumArgsA,
 +			  key->unit[unit].ModeA,
 +			  key->unit[unit].OptA);
     }
 -   deref = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -
     /* Deal with the final shift:
      */
     if (alpha_shift || rgb_shift) {
 -      ir_constant *shift;
 +      struct ureg shift;
 +      GLboolean saturate = GL_TRUE;  /* always saturate at this point */
        if (rgb_shift == alpha_shift) {
 -	 shift = new(p->mem_ctx) ir_constant((float)(1 << rgb_shift));
 +	 shift = register_scalar_const(p, (GLfloat)(1<<rgb_shift));
        }
        else {
 -	 float const_data[4] = {
 -	    1 << rgb_shift,
 -	    1 << rgb_shift,
 -	    1 << rgb_shift,
 -	    1 << alpha_shift
 -	 };
 -	 shift = new(p->mem_ctx) ir_constant(glsl_type::vec4_type,
 -					     (ir_constant_data *)const_data);
 +	 shift = register_const4f(p, 
 +				  (GLfloat)(1<<rgb_shift),
 +				  (GLfloat)(1<<rgb_shift),
 +				  (GLfloat)(1<<rgb_shift),
 +				  (GLfloat)(1<<alpha_shift));
        }
 -
 -      return saturate(p, new(p->mem_ctx) ir_expression(ir_binop_mul,
 -						       deref, shift));
 +      return emit_arith( p, OPCODE_MUL, dest, WRITEMASK_XYZW, 
 +			 saturate, out, shift, undef );
     }
     else
 -      return deref;
 +      return out;
  }
  /**
   * Generate instruction for getting a texture source term.
   */
 - static void load_texture( struct texenv_fragment_program *p, GLuint unit )
 - {
 -    ir_dereference *deref;
 -    ir_assignment *assign;
 -
 -    if (p->src_texture[unit])
 -       return;
 -
 -    const GLuint texTarget = p->state->unit[unit].source_index;
 -    ir_rvalue *texcoord;
 -
 -    if (p->texcoord_tex[unit]) {
 -       texcoord = new(p->mem_ctx) ir_dereference_variable(p->texcoord_tex[unit]);
 -    }
 -    else {
 -       ir_variable *tc_array = p->shader->symbols->get_variable("gl_TexCoord");
 -       assert(tc_array);
 -       texcoord = new(p->mem_ctx) ir_dereference_variable(tc_array);
 -       ir_rvalue *index = new(p->mem_ctx) ir_constant(unit);
 -       texcoord = new(p->mem_ctx) ir_dereference_array(texcoord, index);
 -       tc_array->max_array_access = MAX2(tc_array->max_array_access, unit);
 -    }
 -
 -    if (!p->state->unit[unit].enabled) {
 -       p->src_texture[unit] = new(p->mem_ctx) ir_variable(glsl_type::vec4_type,
 -							  "dummy_tex",
 -							  ir_var_temporary);
 -       p->instructions->push_tail(p->src_texture[unit]);
 -
 -       deref = new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]);
 -       assign = new(p->mem_ctx) ir_assignment(deref,
 -					      new(p->mem_ctx) ir_constant(0.0f),
 -					      NULL);
 -       p->instructions->push_tail(assign);
 -       return ;
 -    }
 -
 -    const glsl_type *sampler_type = NULL;
 -    int coords = 0;
 -
 -    switch (texTarget) {
 -    case TEXTURE_1D_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("sampler1DShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("sampler1D");
 -       coords = 1;
 -       break;
 -    case TEXTURE_1D_ARRAY_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("sampler1DArrayShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("sampler1DArray");
 -       coords = 2;
 -       break;
 -    case TEXTURE_2D_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("sampler2DShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("sampler2D");
 -       coords = 2;
 -       break;
 -    case TEXTURE_2D_ARRAY_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("sampler2DArrayShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("sampler2DArray");
 -       coords = 3;
 -       break;
 -    case TEXTURE_RECT_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("sampler2DRectShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("sampler2DRect");
 -       coords = 2;
 -       break;
 -    case TEXTURE_3D_INDEX:
 -       assert(!p->state->unit[unit].shadow);
 -       sampler_type = p->shader->symbols->get_type("sampler3D");
 -       coords = 3;
 -       break;
 -    case TEXTURE_CUBE_INDEX:
 -       if (p->state->unit[unit].shadow)
 -	  sampler_type = p->shader->symbols->get_type("samplerCubeShadow");
 -       else
 -	  sampler_type = p->shader->symbols->get_type("samplerCube");
 -       coords = 3;
 -       break;
 -    }
 -
 -    p->src_texture[unit] = new(p->mem_ctx) ir_variable(glsl_type::vec4_type,
 -						       "tex",
 -						       ir_var_temporary);
 -    p->instructions->push_tail(p->src_texture[unit]);
 -
 -    ir_texture *tex = new(p->mem_ctx) ir_texture(ir_tex);
 -
 -
 -    char *sampler_name = ralloc_asprintf(p->mem_ctx, "sampler_%d", unit);
 -    ir_variable *sampler = new(p->mem_ctx) ir_variable(sampler_type,
 -						       sampler_name,
 -						       ir_var_uniform);
 -    p->top_instructions->push_head(sampler);
 -    deref = new(p->mem_ctx) ir_dereference_variable(sampler);
 -    tex->set_sampler(deref);
 -
 -    tex->coordinate = new(p->mem_ctx) ir_swizzle(texcoord, 0, 1, 2, 3, coords);
 -
 -    if (p->state->unit[unit].shadow) {
 -       texcoord = texcoord->clone(p->mem_ctx, NULL);
 -       tex->shadow_comparitor = new(p->mem_ctx) ir_swizzle(texcoord,
 -							   coords, 0, 0, 0,
 -							   1);
 -       coords++;
 -    }
 -
 -    texcoord = texcoord->clone(p->mem_ctx, NULL);
 -    tex->projector = new(p->mem_ctx) ir_swizzle(texcoord, 3, 0, 0, 0, 1);
 -
 -    deref = new(p->mem_ctx) ir_dereference_variable(p->src_texture[unit]);
 -    assign = new(p->mem_ctx) ir_assignment(deref, tex, NULL);
 -    p->instructions->push_tail(assign);
 - }
 +static void load_texture( struct texenv_fragment_program *p, GLuint unit )
 +{
 +   if (is_undef(p->src_texture[unit])) {
 +      const GLuint texTarget = p->state->unit[unit].source_index;
 +      struct ureg texcoord;
 +      struct ureg tmp = get_tex_temp( p );
 -static void
 -load_texenv_source(struct texenv_fragment_program *p,
 -		   GLuint src, GLuint unit)
 +      if (is_undef(p->texcoord_tex[unit])) {
 +         texcoord = register_input(p, FRAG_ATTRIB_TEX0+unit);
 +      }
 +      else {
 +         /* might want to reuse this reg for tex output actually */
 +         texcoord = p->texcoord_tex[unit];
 +      }
 +
 +      /* TODO: Use D0_MASK_XY where possible.
 +       */
 +      if (p->state->unit[unit].enabled) {
 +         GLboolean shadow = GL_FALSE;
 +
 +	 if (p->state->unit[unit].shadow) {
 +	    p->program->Base.ShadowSamplers |= 1 << unit;
 +            shadow = GL_TRUE;
 +         }
 +
 +	 p->src_texture[unit] = emit_texld( p, OPCODE_TXP,
 +					    tmp, WRITEMASK_XYZW, 
 +					    unit, texTarget, shadow,
 +                                            texcoord );
 +
 +         p->program->Base.SamplersUsed |= (1 << unit);
 +         /* This identity mapping should already be in place
 +          * (see _mesa_init_program_struct()) but let's be safe.
 +          */
 +         p->program->Base.SamplerUnits[unit] = unit;
 +      }
 +      else
 +	 p->src_texture[unit] = get_zero(p);
 +
 +      if (p->state->unit[unit].texture_cyl_wrap) {
 +         /* set flag which is checked by Mesa->Gallium program translation */
 +         p->program->Base.InputFlags[0] |= PROG_PARAM_BIT_CYL_WRAP;
 +      }
 +
 +   }
 +}
 +
 +static GLboolean load_texenv_source( struct texenv_fragment_program *p, 
 +				     GLuint src, GLuint unit )
  {
     switch (src) {
     case SRC_TEXTURE:
 @@ -1078,6 +1351,8 @@ load_texenv_source(struct texenv_fragment_program *p,        /* not a texture src - do nothing */
        break;
     }
 + 
 +   return GL_TRUE;
  }
 @@ -1104,214 +1379,108 @@ load_texunit_sources( struct texenv_fragment_program *p, GLuint unit )  /**
   * Generate instructions for loading bump map textures.
   */
 -static void
 +static GLboolean
  load_texunit_bumpmap( struct texenv_fragment_program *p, GLuint unit )
  {
     const struct state_key *key = p->state;
     GLuint bumpedUnitNr = key->unit[unit].OptRGB[1].Source - SRC_TEXTURE0;
 -   ir_rvalue *bump;
 -   ir_rvalue *texcoord;
 -   ir_variable *rot_mat_0_var, *rot_mat_1_var;
 -   ir_dereference_variable *rot_mat_0, *rot_mat_1;
 -
 -   rot_mat_0_var = p->shader->symbols->get_variable("gl_MESABumpRotMatrix0");
 -   rot_mat_1_var = p->shader->symbols->get_variable("gl_MESABumpRotMatrix1");
 -   rot_mat_0 = new(p->mem_ctx) ir_dereference_variable(rot_mat_0_var);
 -   rot_mat_1 = new(p->mem_ctx) ir_dereference_variable(rot_mat_1_var);
 -
 -   ir_variable *tc_array = p->shader->symbols->get_variable("gl_TexCoord");
 -   assert(tc_array);
 -   texcoord = new(p->mem_ctx) ir_dereference_variable(tc_array);
 -   ir_rvalue *index = new(p->mem_ctx) ir_constant(bumpedUnitNr);
 -   texcoord = new(p->mem_ctx) ir_dereference_array(texcoord, index);
 -   tc_array->max_array_access = MAX2(tc_array->max_array_access, unit);
 +   struct ureg texcDst, bumpMapRes;
 +   struct ureg constdudvcolor = register_const4f(p, 0.0, 0.0, 0.0, 1.0);
 +   struct ureg texcSrc = register_input(p, FRAG_ATTRIB_TEX0 + bumpedUnitNr);
 +   struct ureg rotMat0 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_0, unit );
 +   struct ureg rotMat1 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_1, unit );
     load_texenv_source( p, unit + SRC_TEXTURE0, unit );
 +   bumpMapRes = get_source(p, key->unit[unit].OptRGB[0].Source, unit);
 +   texcDst = get_tex_temp( p );
 +   p->texcoord_tex[bumpedUnitNr] = texcDst;
 +
     /* Apply rot matrix and add coords to be available in next phase.
 -    * dest = Arg1 + (Arg0.xx * rotMat0) + (Arg0.yy * rotMat1)
 +    * dest = (Arg0.xxxx * rotMat0 + Arg1) + (Arg0.yyyy * rotMat1)
      * note only 2 coords are affected the rest are left unchanged (mul by 0)
      */
 -   ir_dereference *deref;
 -   ir_assignment *assign;
 -   ir_rvalue *bump_x, *bump_y;
 -
 -   texcoord = smear(p, texcoord);
 -
 -   /* bump_texcoord = texcoord */
 -   ir_variable *bumped = new(p->mem_ctx) ir_variable(texcoord->type,
 -						     "bump_texcoord",
 -						     ir_var_temporary);
 -   p->instructions->push_tail(bumped);
 -
 -   deref = new(p->mem_ctx) ir_dereference_variable(bumped);
 -   assign = new(p->mem_ctx) ir_assignment(deref, texcoord, NULL);
 -   p->instructions->push_tail(assign);
 -
 -   /* bump_texcoord.xy += arg0.x * rotmat0 + arg0.y * rotmat1 */
 -   bump = get_source(p, key->unit[unit].OptRGB[0].Source, unit);
 -   bump_x = new(p->mem_ctx) ir_swizzle(bump, 0, 0, 0, 0, 1);
 -   bump = bump->clone(p->mem_ctx, NULL);
 -   bump_y = new(p->mem_ctx) ir_swizzle(bump, 1, 0, 0, 0, 1);
 -
 -   bump_x = new(p->mem_ctx) ir_expression(ir_binop_mul, bump_x, rot_mat_0);
 -   bump_y = new(p->mem_ctx) ir_expression(ir_binop_mul, bump_y, rot_mat_1);
 -
 -   ir_expression *expr;
 -   expr = new(p->mem_ctx) ir_expression(ir_binop_add, bump_x, bump_y);
 -
 -   deref = new(p->mem_ctx) ir_dereference_variable(bumped);
 -   expr = new(p->mem_ctx) ir_expression(ir_binop_add,
 -					new(p->mem_ctx) ir_swizzle(deref,
 -								   0, 1, 1, 1,
 -								   2),
 -					expr);
 -
 -   deref = new(p->mem_ctx) ir_dereference_variable(bumped);
 -   assign = new(p->mem_ctx) ir_assignment(deref, expr, NULL, WRITEMASK_XY);
 -   p->instructions->push_tail(assign);
 -
 -   p->texcoord_tex[bumpedUnitNr] = bumped;
 +   emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
 +               swizzle1(bumpMapRes, SWIZZLE_X), rotMat0, texcSrc );
 +   emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
 +               swizzle1(bumpMapRes, SWIZZLE_Y), rotMat1, texcDst );
 +
 +   /* Move 0,0,0,1 into bumpmap src if someone (crossbar) is foolish
 +    * enough to access this later, should optimize away.
 +    */
 +   emit_arith( p, OPCODE_MOV, bumpMapRes, WRITEMASK_XYZW, 0,
 +               constdudvcolor, undef, undef );
 +
 +   return GL_TRUE;
  }
  /**
 - * Applies the fog calculations.
 - *
 - * This is basically like the ARB_fragment_prorgam fog options.  Note
 - * that ffvertex_prog.c produces fogcoord for us when
 - * GL_FOG_COORDINATE_EXT is set to GL_FRAGMENT_DEPTH_EXT.
 + * Generate a new fragment program which implements the context's
 + * current texture env/combine mode.
   */
 -static ir_rvalue *
 -emit_fog_instructions(struct texenv_fragment_program *p,
 -		      ir_rvalue *fragcolor)
 +static void
 +create_new_program(struct gl_context *ctx, struct state_key *key,
 +                   struct gl_fragment_program *program)
  {
 -   struct state_key *key = p->state;
 -   ir_rvalue *f, *temp;
 -   ir_variable *params, *oparams;
 -   ir_variable *fogcoord;
 -   ir_assignment *assign;
 -
 -   /* Temporary storage for the whole fog result.  Fog calculations
 -    * only affect rgb so we're hanging on to the .a value of fragcolor
 -    * this way.
 -    */
 -   ir_variable *fog_result = new(p->mem_ctx) ir_variable(glsl_type::vec4_type,
 -							 "fog_result",
 -							 ir_var_auto);
 -   p->instructions->push_tail(fog_result);
 -   temp = new(p->mem_ctx) ir_dereference_variable(fog_result);
 -   assign = new(p->mem_ctx) ir_assignment(temp, fragcolor, NULL);
 -   p->instructions->push_tail(assign);
 -
 -   temp = new(p->mem_ctx) ir_dereference_variable(fog_result);
 -   fragcolor = new(p->mem_ctx) ir_swizzle(temp, 0, 1, 2, 3, 3);
 -
 -   oparams = p->shader->symbols->get_variable("gl_MESAFogParamsOptimized");
 -   fogcoord = p->shader->symbols->get_variable("gl_FogFragCoord");
 -   params = p->shader->symbols->get_variable("gl_Fog");
 -   f = new(p->mem_ctx) ir_dereference_variable(fogcoord);
 -
 -   ir_variable *f_var = new(p->mem_ctx) ir_variable(glsl_type::float_type,
 -						    "fog_factor", ir_var_auto);
 -   p->instructions->push_tail(f_var);
 -
 -   switch (key->fog_mode) {
 -   case FOG_LINEAR:
 -      /* f = (end - z) / (end - start)
 -       *
 -       * gl_MesaFogParamsOptimized gives us (-1 / (end - start)) and
 -       * (end / (end - start)) so we can generate a single MAD.
 -       */
 -      temp = new(p->mem_ctx) ir_dereference_variable(oparams);
 -      temp = new(p->mem_ctx) ir_swizzle(temp, 0, 0, 0, 0, 1);
 -      f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp);
 -
 -      temp = new(p->mem_ctx) ir_dereference_variable(oparams);
 -      temp = new(p->mem_ctx) ir_swizzle(temp, 1, 0, 0, 0, 1);
 -      f = new(p->mem_ctx) ir_expression(ir_binop_add, f, temp);
 -      break;
 -   case FOG_EXP:
 -      /* f = e^(-(density * fogcoord))
 -       *
 -       * gl_MesaFogParamsOptimized gives us density/ln(2) so we can
 -       * use EXP2 which is generally the native instruction without
 -       * having to do any further math on the fog density uniform.
 -       */
 -      temp = new(p->mem_ctx) ir_dereference_variable(oparams);
 -      temp = new(p->mem_ctx) ir_swizzle(temp, 2, 0, 0, 0, 1);
 -      f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp);
 -      f = new(p->mem_ctx) ir_expression(ir_unop_neg, f);
 -      f = new(p->mem_ctx) ir_expression(ir_unop_exp2, f);
 -      break;
 -   case FOG_EXP2:
 -      /* f = e^(-(density * fogcoord)^2)
 -       *
 -       * gl_MesaFogParamsOptimized gives us density/sqrt(ln(2)) so we
 -       * can do this like FOG_EXP but with a squaring after the
 -       * multiply by density.
 -       */
 -      ir_variable *temp_var = new(p->mem_ctx) ir_variable(glsl_type::float_type,
 -							  "fog_temp",
 -							  ir_var_auto);
 -      p->instructions->push_tail(temp_var);
 -
 -      temp = new(p->mem_ctx) ir_dereference_variable(oparams);
 -      temp = new(p->mem_ctx) ir_swizzle(temp, 3, 0, 0, 0, 1);
 -      f = new(p->mem_ctx) ir_expression(ir_binop_mul,
 -					f, temp);
 -
 -      temp = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      ir_assignment *assign = new(p->mem_ctx) ir_assignment(temp, f, NULL);
 -      p->instructions->push_tail(assign);
 -
 -      f = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      temp = new(p->mem_ctx) ir_dereference_variable(temp_var);
 -      f = new(p->mem_ctx) ir_expression(ir_binop_mul, f, temp);
 -      f = new(p->mem_ctx) ir_expression(ir_unop_neg, f);
 -      f = new(p->mem_ctx) ir_expression(ir_unop_exp2, f);
 -      break;
 -   }
 -
 -   f = saturate(p, f);
 +   struct prog_instruction instBuffer[MAX_INSTRUCTIONS];
 +   struct texenv_fragment_program p;
 +   GLuint unit;
 +   struct ureg cf, out;
 +   int i;
 -   temp = new(p->mem_ctx) ir_dereference_variable(f_var);
 -   assign = new(p->mem_ctx) ir_assignment(temp, f, NULL);
 -   p->instructions->push_tail(assign);
 +   memset(&p, 0, sizeof(p));
 +   p.state = key;
 +   p.program = program;
 -   f = new(p->mem_ctx) ir_dereference_variable(f_var);
 -   f = new(p->mem_ctx) ir_expression(ir_binop_sub,
 -				     new(p->mem_ctx) ir_constant(1.0f),
 -				     f);
 -   temp = new(p->mem_ctx) ir_dereference_variable(params);
 -   temp = new(p->mem_ctx) ir_dereference_record(temp, "color");
 -   temp = new(p->mem_ctx) ir_swizzle(temp, 0, 1, 2, 3, 3);
 -   temp = new(p->mem_ctx) ir_expression(ir_binop_mul, temp, f);
 +   /* During code generation, use locally-allocated instruction buffer,
 +    * then alloc dynamic storage below.
 +    */
 +   p.program->Base.Instructions = instBuffer;
 +   p.program->Base.Target = GL_FRAGMENT_PROGRAM_ARB;
 +   p.program->Base.String = NULL;
 +   p.program->Base.NumTexIndirections = 1; /* is this right? */
 +   p.program->Base.NumTexInstructions = 0;
 +   p.program->Base.NumAluInstructions = 0;
 +   p.program->Base.NumInstructions = 0;
 +   p.program->Base.NumTemporaries = 0;
 +   p.program->Base.NumParameters = 0;
 +   p.program->Base.NumAttributes = 0;
 +   p.program->Base.NumAddressRegs = 0;
 +   p.program->Base.Parameters = _mesa_new_parameter_list();
 +   p.program->Base.InputsRead = 0x0;
 +
 +   if (key->num_draw_buffers == 1)
 +      p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR;
 +   else {
 +      for (i = 0; i < key->num_draw_buffers; i++)
 +	 p.program->Base.OutputsWritten |= (1 << (FRAG_RESULT_DATA0 + i));
 +   }
 -   f = new(p->mem_ctx) ir_dereference_variable(f_var);
 -   f = new(p->mem_ctx) ir_expression(ir_binop_mul, fragcolor, f);
 -   f = new(p->mem_ctx) ir_expression(ir_binop_add, temp, f);
 +   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 +      p.src_texture[unit] = undef;
 +      p.texcoord_tex[unit] = undef;
 +   }
 -   ir_dereference *deref = new(p->mem_ctx) ir_dereference_variable(fog_result);
 -   assign = new(p->mem_ctx) ir_assignment(deref, f, NULL, WRITEMASK_XYZ);
 -   p->instructions->push_tail(assign);
 +   p.src_previous = undef;
 +   p.half = undef;
 +   p.zero = undef;
 +   p.one = undef;
 -   return new(p->mem_ctx) ir_dereference_variable(fog_result);
 -}
 +   p.last_tex_stage = 0;
 +   release_temps(ctx, &p);
 -static void
 -emit_instructions(struct texenv_fragment_program *p)
 -{
 -   struct state_key *key = p->state;
 -   GLuint unit;
 +   if (key->enabled_units && key->num_draw_buffers) {
 +      GLboolean needbumpstage = GL_FALSE;
 -   if (key->enabled_units) {
        /* Zeroth pass - bump map textures first */
 -      for (unit = 0; unit < key->nr_enabled_units; unit++) {
 +      for (unit = 0; unit < key->nr_enabled_units; unit++)
  	 if (key->unit[unit].enabled &&
               key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
 -	    load_texunit_bumpmap(p, unit);
 +	    needbumpstage = GL_TRUE;
 +	    load_texunit_bumpmap( &p, unit );
  	 }
 -      }
 +      if (needbumpstage)
 +	 p.program->Base.NumTexIndirections++;
        /* First pass - to support texture_env_crossbar, first identify
         * all referenced texture sources and emit texld instructions
 @@ -1319,157 +1488,104 @@ emit_instructions(struct texenv_fragment_program *p)         */
        for (unit = 0; unit < key->nr_enabled_units; unit++)
  	 if (key->unit[unit].enabled) {
 -	    load_texunit_sources(p, unit);
 -	    p->last_tex_stage = unit;
 +	    load_texunit_sources( &p, unit );
 +	    p.last_tex_stage = unit;
  	 }
        /* Second pass - emit combine instructions to build final color:
         */
 -      for (unit = 0; unit < key->nr_enabled_units; unit++) {
 +      for (unit = 0; unit < key->nr_enabled_units; unit++)
  	 if (key->unit[unit].enabled) {
 -	    p->src_previous = emit_texenv(p, unit);
 +	    p.src_previous = emit_texenv( &p, unit );
 +            reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */
 +	    release_temps(ctx, &p);	/* release all temps */
  	 }
 -      }
     }
 -   ir_rvalue *cf = get_source(p, SRC_PREVIOUS, 0);
 -   ir_dereference_variable *deref;
 -   ir_assignment *assign;
 -
 -   if (key->separate_specular) {
 -      ir_rvalue *tmp0, *tmp1;
 -      ir_variable *spec_result = new(p->mem_ctx) ir_variable(glsl_type::vec4_type,
 -							    "specular_add",
 -							    ir_var_temporary);
 -
 -      p->instructions->push_tail(spec_result);
 -
 -      deref = new(p->mem_ctx) ir_dereference_variable(spec_result);
 -      assign = new(p->mem_ctx) ir_assignment(deref, cf, NULL);
 -      p->instructions->push_tail(assign);
 -
 -      deref = new(p->mem_ctx) ir_dereference_variable(spec_result);
 -      tmp0 = new(p->mem_ctx) ir_swizzle(deref, 0, 1, 2, 3, 3);
 +   cf = get_source( &p, SRC_PREVIOUS, 0 );
 -      ir_variable *secondary =
 -	 p->shader->symbols->get_variable("gl_SecondaryColor");
 -      assert(secondary);
 -      deref = new(p->mem_ctx) ir_dereference_variable(secondary);
 -      tmp1 = new(p->mem_ctx) ir_swizzle(deref, 0, 1, 2, 3, 3);
 -
 -      tmp0 = new(p->mem_ctx) ir_expression(ir_binop_add,
 -					  tmp0, tmp1);
 -
 -      deref = new(p->mem_ctx) ir_dereference_variable(spec_result);
 -      assign = new(p->mem_ctx) ir_assignment(deref, tmp0, NULL, WRITEMASK_XYZ);
 -      p->instructions->push_tail(assign);
 +   for (i = 0; i < key->num_draw_buffers; i++) {
 +      if (key->num_draw_buffers == 1)
 +	 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR );
 +      else {
 +	 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i );
 +      }
 -      cf = new(p->mem_ctx) ir_dereference_variable(spec_result);
 +      if (key->separate_specular) {
 +	 /* Emit specular add.
 +	  */
 +	 struct ureg s = register_input(&p, FRAG_ATTRIB_COL1);
 +	 emit_arith( &p, OPCODE_ADD, out, WRITEMASK_XYZ, 0, cf, s, undef );
 +	 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_W, 0, cf, undef, undef );
 +      }
 +      else if (memcmp(&cf, &out, sizeof(cf)) != 0) {
 +	 /* Will wind up in here if no texture enabled or a couple of
 +	  * other scenarios (GL_REPLACE for instance).
 +	  */
 +	 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_XYZW, 0, cf, undef, undef );
 +      }
     }
 +   /* Finish up:
 +    */
 +   emit_arith( &p, OPCODE_END, undef, WRITEMASK_XYZW, 0, undef, undef, undef);
     if (key->fog_enabled) {
 -      cf = emit_fog_instructions(p, cf);
 +      /* Pull fog mode from struct gl_context, the value in the state key is
 +       * a reduced value and not what is expected in FogOption
 +       */
 +      p.program->FogOption = ctx->Fog.Mode;
 +      p.program->Base.InputsRead |= FRAG_BIT_FOGC;
     }
 -
 -   ir_variable *frag_color = p->shader->symbols->get_variable("gl_FragColor");
 -   assert(frag_color);
 -   deref = new(p->mem_ctx) ir_dereference_variable(frag_color);
 -   assign = new(p->mem_ctx) ir_assignment(deref, cf, NULL);
 -   p->instructions->push_tail(assign);
 -}
 -
 -/**
 - * Generate a new fragment program which implements the context's
 - * current texture env/combine mode.
 - */
 -static struct gl_shader_program *
 -create_new_program(struct gl_context *ctx, struct state_key *key)
 -{
 -   struct texenv_fragment_program p;
 -   unsigned int unit;
 -   _mesa_glsl_parse_state *state;
 -
 -   memset(&p, 0, sizeof(p));
 -   p.mem_ctx = ralloc_context(NULL);
 -   p.shader = ctx->Driver.NewShader(ctx, 0, GL_FRAGMENT_SHADER);
 -   p.shader->ir = new(p.shader) exec_list;
 -   state = new(p.shader) _mesa_glsl_parse_state(ctx, GL_FRAGMENT_SHADER,
 -						p.shader);
 -   p.shader->symbols = state->symbols;
 -   p.top_instructions = p.shader->ir;
 -   p.instructions = p.shader->ir;
 -   p.state = key;
 -   p.shader_program = ctx->Driver.NewShaderProgram(ctx, 0);
 -
 -   state->language_version = 120;
 -   _mesa_glsl_initialize_types(state);
 -   _mesa_glsl_initialize_variables(p.instructions, state);
 -
 -   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
 -      p.src_texture[unit] = NULL;
 -      p.texcoord_tex[unit] = NULL;
 +   else {
 +      p.program->FogOption = GL_NONE;
     }
 -   p.src_previous = NULL;
 +   if (p.program->Base.NumTexIndirections > ctx->Const.FragmentProgram.MaxTexIndirections) 
 +      program_error(&p, "Exceeded max nr indirect texture lookups");
 -   p.last_tex_stage = 0;
 +   if (p.program->Base.NumTexInstructions > ctx->Const.FragmentProgram.MaxTexInstructions)
 +      program_error(&p, "Exceeded max TEX instructions");
 -   ir_function *main_f = new(p.mem_ctx) ir_function("main");
 -   p.instructions->push_tail(main_f);
 -   state->symbols->add_function(main_f);
 +   if (p.program->Base.NumAluInstructions > ctx->Const.FragmentProgram.MaxAluInstructions)
 +      program_error(&p, "Exceeded max ALU instructions");
 -   ir_function_signature *main_sig =
 -      new(p.mem_ctx) ir_function_signature(p.shader->symbols->get_type("void"));
 -   main_sig->is_defined = true;
 -   main_f->add_signature(main_sig);
 +   ASSERT(p.program->Base.NumInstructions <= MAX_INSTRUCTIONS);
 -   p.instructions = &main_sig->body;
 -   if (key->num_draw_buffers)
 -      emit_instructions(&p);
 -
 -   validate_ir_tree(p.shader->ir);
 -
 -   while (do_common_optimization(p.shader->ir, false, 32))
 -      ;
 -   reparent_ir(p.shader->ir, p.shader->ir);
 +   /* Allocate final instruction array */
 +   p.program->Base.Instructions
 +      = _mesa_alloc_instructions(p.program->Base.NumInstructions);
 +   if (!p.program->Base.Instructions) {
 +      _mesa_error(ctx, GL_OUT_OF_MEMORY,
 +                  "generating tex env program");
 +      return;
 +   }
 +   _mesa_copy_instructions(p.program->Base.Instructions, instBuffer,
 +                           p.program->Base.NumInstructions);
 -   p.shader->CompileStatus = true;
 -   p.shader->Version = state->language_version;
 -   p.shader->num_builtins_to_link = state->num_builtins_to_link;
 -   p.shader_program->Shaders =
 -      (gl_shader **)malloc(sizeof(*p.shader_program->Shaders));
 -   p.shader_program->Shaders[0] = p.shader;
 -   p.shader_program->NumShaders = 1;
 +   if (key->num_draw_buffers && p.program->FogOption) {
 +      _mesa_append_fog_code(ctx, p.program);
 +      p.program->FogOption = GL_NONE;
 +   }
 -   _mesa_glsl_link_shader(ctx, p.shader_program);
 -   /* Set the sampler uniforms, and relink to get them into the linked
 -    * program.
 +   /* Notify driver the fragment program has (actually) changed.
      */
 -   struct gl_fragment_program *fp = p.shader_program->FragmentProgram;
 -   for (unsigned int i = 0; i < MAX_TEXTURE_UNITS; i++) {
 -      char *name = ralloc_asprintf(p.mem_ctx, "sampler_%d", i);
 -      int loc = _mesa_get_uniform_location(ctx, p.shader_program, name);
 -      if (loc != -1) {
 -	 /* Avoid using _mesa_uniform() because it flags state
 -	  * updates, so if we're generating this shader_program in a
 -	  * state update, we end up recursing.  Instead, just set the
 -	  * value, which is picked up at re-link.
 -	  */
 -	 loc = (loc & 0xffff) + (loc >> 16);
 -	 int sampler = fp->Base.Parameters->ParameterValues[loc][0];
 -	 fp->Base.SamplerUnits[sampler] = i;
 -      }
 +   if (ctx->Driver.ProgramStringNotify) {
 +      GLboolean ok = ctx->Driver.ProgramStringNotify(ctx,
 +                                                     GL_FRAGMENT_PROGRAM_ARB, 
 +                                                     &p.program->Base);
 +      /* Driver should be able to handle any texenv programs as long as
 +       * the driver correctly reported max number of texture units correctly,
 +       * etc.
 +       */
 +      ASSERT(ok);
 +      (void) ok; /* silence unused var warning */
     }
 -   _mesa_update_shader_textures_used(&fp->Base);
 -   (void) ctx->Driver.ProgramStringNotify(ctx, fp->Base.Target, &fp->Base);
 -   if (!p.shader_program->LinkStatus)
 -      _mesa_problem(ctx, "Failed to link fixed function fragment shader: %s\n",
 -		    p.shader_program->InfoLog);
 -
 -   ralloc_free(p.mem_ctx);
 -   return p.shader_program;
 +   if (DISASSEM) {
 +      _mesa_print_program(&p.program->Base);
 +      printf("\n");
 +   }
  }
  extern "C" {
 @@ -1478,27 +1594,30 @@ extern "C" {   * Return a fragment program which implements the current
   * fixed-function texture, fog and color-sum operations.
   */
 -struct gl_shader_program *
 +struct gl_fragment_program *
  _mesa_get_fixed_func_fragment_program(struct gl_context *ctx)
  {
 -   struct gl_shader_program *shader_program;
 +   struct gl_fragment_program *prog;
     struct state_key key;
     GLuint keySize;
 -
 +	
     keySize = make_state_key(ctx, &key);
 -
 -   shader_program = (struct gl_shader_program *)
 +      
 +   prog = (struct gl_fragment_program *)
        _mesa_search_program_cache(ctx->FragmentProgram.Cache,
                                   &key, keySize);
 -   if (!shader_program) {
 -      shader_program = create_new_program(ctx, &key);
 +   if (!prog) {
 +      prog = (struct gl_fragment_program *) 
 +         ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
 +
 +      create_new_program(ctx, &key, prog);
 -      _mesa_shader_cache_insert(ctx, ctx->FragmentProgram.Cache,
 -				&key, keySize, shader_program);
 +      _mesa_program_cache_insert(ctx, ctx->FragmentProgram.Cache,
 +                                 &key, keySize, &prog->Base);
     }
 -   return shader_program;
 +   return prog;
  }
  }
 diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h index 520d96689..39b6f72cc 100644 --- a/mesalib/src/mesa/main/mtypes.h +++ b/mesalib/src/mesa/main/mtypes.h @@ -2191,7 +2191,6 @@ struct gl_shader_state     struct gl_shader_program *CurrentVertexProgram;
     struct gl_shader_program *CurrentGeometryProgram;
     struct gl_shader_program *CurrentFragmentProgram;
 -   struct gl_shader_program *_CurrentFragmentProgram;
     /**
      * Program used by glUniform calls.
 diff --git a/mesalib/src/mesa/main/state.c b/mesalib/src/mesa/main/state.c index 5651e3263..f50f2af1e 100644 --- a/mesalib/src/mesa/main/state.c +++ b/mesalib/src/mesa/main/state.c @@ -43,7 +43,6 @@  #include "pixel.h"
  #include "program/program.h"
  #include "program/prog_parameter.h"
 -#include "shaderobj.h"
  #include "state.h"
  #include "stencil.h"
  #include "texenvprogram.h"
 @@ -250,7 +249,7 @@ update_program(struct gl_context *ctx)  {
     const struct gl_shader_program *vsProg = ctx->Shader.CurrentVertexProgram;
     const struct gl_shader_program *gsProg = ctx->Shader.CurrentGeometryProgram;
 -   struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram;
 +   const struct gl_shader_program *fsProg = ctx->Shader.CurrentFragmentProgram;
     const struct gl_vertex_program *prevVP = ctx->VertexProgram._Current;
     const struct gl_fragment_program *prevFP = ctx->FragmentProgram._Current;
     const struct gl_geometry_program *prevGP = ctx->GeometryProgram._Current;
 @@ -276,31 +275,23 @@ update_program(struct gl_context *ctx)        /* Use shader programs */
        _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
                                 fsProg->FragmentProgram);
 -      _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram,
 -				     fsProg);
     }
     else if (ctx->FragmentProgram._Enabled) {
 -      /* use user-defined fragment program */
 +      /* use user-defined vertex program */
        _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
                                 ctx->FragmentProgram.Current);
 -      _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram,
 -				     NULL);
     }
     else if (ctx->FragmentProgram._MaintainTexEnvProgram) {
        /* Use fragment program generated from fixed-function state.
         */
 -      struct gl_shader_program *f = _mesa_get_fixed_func_fragment_program(ctx);
 -      _mesa_reference_shader_program(ctx,
 -				     &ctx->Shader._CurrentFragmentProgram, f);
 -
        _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current,
 -                               f->FragmentProgram);
 +                               _mesa_get_fixed_func_fragment_program(ctx));
 +      _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram,
 +                               ctx->FragmentProgram._Current);
     }
     else {
        /* no fragment program */
        _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL);
 -      _mesa_reference_shader_program(ctx, &ctx->Shader._CurrentFragmentProgram,
 -				     NULL);
     }
     if (gsProg && gsProg->LinkStatus && gsProg->GeometryProgram) {
 diff --git a/mesalib/src/mesa/main/texenvprogram.h b/mesalib/src/mesa/main/texenvprogram.h index dba775feb..0895ebacb 100644 --- a/mesalib/src/mesa/main/texenvprogram.h +++ b/mesalib/src/mesa/main/texenvprogram.h @@ -29,7 +29,7 @@  struct gl_context;
 -extern struct gl_shader_program *
 +extern struct gl_fragment_program *
  _mesa_get_fixed_func_fragment_program(struct gl_context *ctx);
  #endif
 diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c index 43f894a9b..6c97787e8 100644 --- a/mesalib/src/mesa/program/program.c +++ b/mesalib/src/mesa/program/program.c @@ -140,7 +140,7 @@ _mesa_free_program_data(struct gl_context *ctx)  #endif
  #if FEATURE_NV_fragment_program || FEATURE_ARB_fragment_program
     _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, NULL);
 -   _mesa_delete_shader_cache(ctx, ctx->FragmentProgram.Cache);
 +   _mesa_delete_program_cache(ctx, ctx->FragmentProgram.Cache);
  #endif
  #if FEATURE_ARB_geometry_shader4
     _mesa_reference_geomprog(ctx, &ctx->GeometryProgram.Current, NULL);
 diff --git a/mesalib/src/mesa/state_tracker/st_cb_clear.c b/mesalib/src/mesa/state_tracker/st_cb_clear.c index 0e0c4326e..0130c7a5a 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_clear.c +++ b/mesalib/src/mesa/state_tracker/st_cb_clear.c @@ -1,579 +1,601 @@ -/************************************************************************** - *  - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc.  All Rights Reserved. - *  - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - *  - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - *  - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *  - **************************************************************************/ - - /* -  * Authors: -  *   Keith Whitwell <keith@tungstengraphics.com> -  *   Brian Paul -  *   Michel Dänzer -  */ - -#include "main/glheader.h" -#include "main/formats.h" -#include "main/macros.h" -#include "program/prog_instruction.h" -#include "st_context.h" -#include "st_atom.h" -#include "st_cb_accum.h" -#include "st_cb_clear.h" -#include "st_cb_fbo.h" -#include "st_format.h" -#include "st_program.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_state.h" -#include "pipe/p_defines.h" -#include "util/u_format.h" -#include "util/u_inlines.h" -#include "util/u_simple_shaders.h" -#include "util/u_draw_quad.h" - -#include "cso_cache/cso_context.h" - - -/** - * Do per-context initialization for glClear. - */ -void -st_init_clear(struct st_context *st) -{ -   struct pipe_context *pipe = st->pipe; -   struct pipe_screen *pscreen = st->pipe->screen; - -   memset(&st->clear, 0, sizeof(st->clear)); - -   st->clear.raster.gl_rasterization_rules = 1; -   st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE); - -   /* fragment shader state: color pass-through program */ -   st->clear.fs = util_make_fragment_passthrough_shader(pipe); - -   /* vertex shader state: color/position pass-through */ -   { -      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, -                                      TGSI_SEMANTIC_COLOR }; -      const uint semantic_indexes[] = { 0, 0 }; -      st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, -                                                         semantic_names, -                                                         semantic_indexes); -   } -} - - -/** - * Free per-context state for glClear. - */ -void -st_destroy_clear(struct st_context *st) -{ -   if (st->clear.fs) { -      cso_delete_fragment_shader(st->cso_context, st->clear.fs); -      st->clear.fs = NULL; -   } -   if (st->clear.vs) { -      cso_delete_vertex_shader(st->cso_context, st->clear.vs); -      st->clear.vs = NULL; -   } -   if (st->clear.vbuf) { -      pipe_resource_reference(&st->clear.vbuf, NULL); -      st->clear.vbuf = NULL; -   } -} - - -/** - * Draw a screen-aligned quadrilateral. - * Coords are clip coords with y=0=bottom. - */ -static void -draw_quad(struct st_context *st, -          float x0, float y0, float x1, float y1, GLfloat z, -          const GLfloat color[4]) -{ -   struct pipe_context *pipe = st->pipe; - -   /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as -    * no_flush) updates to buffers where we know there is no conflict -    * with previous data.  Currently using max_slots > 1 will cause -    * synchronous rendering if the driver flushes its command buffers -    * between one bitmap and the next.  Our flush hook below isn't -    * sufficient to catch this as the driver doesn't tell us when it -    * flushes its own command buffers.  Until this gets fixed, pay the -    * price of allocating a new buffer for each bitmap cache-flush to -    * avoid synchronous rendering. -    */ -   const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */ -   GLuint i; - -   if (st->clear.vbuf_slot >= max_slots) { -      pipe_resource_reference(&st->clear.vbuf, NULL); -      st->clear.vbuf_slot = 0; -   } - -   if (!st->clear.vbuf) { -      st->clear.vbuf = pipe_buffer_create(pipe->screen, -                                          PIPE_BIND_VERTEX_BUFFER, -                                          PIPE_USAGE_STREAM, -                                          max_slots * sizeof(st->clear.vertices)); -   } - -   /* positions */ -   st->clear.vertices[0][0][0] = x0; -   st->clear.vertices[0][0][1] = y0; - -   st->clear.vertices[1][0][0] = x1; -   st->clear.vertices[1][0][1] = y0; - -   st->clear.vertices[2][0][0] = x1; -   st->clear.vertices[2][0][1] = y1; - -   st->clear.vertices[3][0][0] = x0; -   st->clear.vertices[3][0][1] = y1; - -   /* same for all verts: */ -   for (i = 0; i < 4; i++) { -      st->clear.vertices[i][0][2] = z; -      st->clear.vertices[i][0][3] = 1.0; -      st->clear.vertices[i][1][0] = color[0]; -      st->clear.vertices[i][1][1] = color[1]; -      st->clear.vertices[i][1][2] = color[2]; -      st->clear.vertices[i][1][3] = color[3]; -   } - -   /* put vertex data into vbuf */ -   pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf, -                                           st->clear.vbuf_slot -                                             * sizeof(st->clear.vertices), -                                           sizeof(st->clear.vertices), -                                           st->clear.vertices); - -   /* draw */ -   util_draw_vertex_buffer(pipe, -                           st->cso_context, -                           st->clear.vbuf,  -                           st->clear.vbuf_slot * sizeof(st->clear.vertices), -                           PIPE_PRIM_TRIANGLE_FAN, -                           4,  /* verts */ -                           2); /* attribs/vert */ - -   /* Increment slot */ -   st->clear.vbuf_slot++; -} - - - -/** - * Do glClear by drawing a quadrilateral. - * The vertices of the quad will be computed from the - * ctx->DrawBuffer->_X/Ymin/max fields. - */ -static void -clear_with_quad(struct gl_context *ctx, -                GLboolean color, GLboolean depth, GLboolean stencil) -{ -   struct st_context *st = st_context(ctx); -   const struct gl_framebuffer *fb = ctx->DrawBuffer; -   const GLfloat fb_width = (GLfloat) fb->Width; -   const GLfloat fb_height = (GLfloat) fb->Height; -   const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f; -   const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f; -   const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f; -   const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f; -   float clearColor[4]; - -   /* -   printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__,  -	  color ? "color, " : "", -	  depth ? "depth, " : "", -	  stencil ? "stencil" : "", -	  x0, y0, -	  x1, y1); -   */ - -   cso_save_blend(st->cso_context); -   cso_save_stencil_ref(st->cso_context); -   cso_save_depth_stencil_alpha(st->cso_context); -   cso_save_rasterizer(st->cso_context); -   cso_save_viewport(st->cso_context); -   cso_save_clip(st->cso_context); -   cso_save_fragment_shader(st->cso_context); -   cso_save_vertex_shader(st->cso_context); -   cso_save_vertex_elements(st->cso_context); -   cso_save_vertex_buffers(st->cso_context); - -   /* blend state: RGBA masking */ -   { -      struct pipe_blend_state blend; -      memset(&blend, 0, sizeof(blend)); -      blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE; -      blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE; -      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; -      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; -      if (color) { -         if (ctx->Color.ColorMask[0][0]) -            blend.rt[0].colormask |= PIPE_MASK_R; -         if (ctx->Color.ColorMask[0][1]) -            blend.rt[0].colormask |= PIPE_MASK_G; -         if (ctx->Color.ColorMask[0][2]) -            blend.rt[0].colormask |= PIPE_MASK_B; -         if (ctx->Color.ColorMask[0][3]) -            blend.rt[0].colormask |= PIPE_MASK_A; -         if (st->ctx->Color.DitherFlag) -            blend.dither = 1; -      } -      cso_set_blend(st->cso_context, &blend); -   } - -   /* depth_stencil state: always pass/set to ref value */ -   { -      struct pipe_depth_stencil_alpha_state depth_stencil; -      memset(&depth_stencil, 0, sizeof(depth_stencil)); -      if (depth) { -         depth_stencil.depth.enabled = 1; -         depth_stencil.depth.writemask = 1; -         depth_stencil.depth.func = PIPE_FUNC_ALWAYS; -      } - -      if (stencil) { -         struct pipe_stencil_ref stencil_ref; -         memset(&stencil_ref, 0, sizeof(stencil_ref)); -         depth_stencil.stencil[0].enabled = 1; -         depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; -         depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; -         depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; -         depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; -         depth_stencil.stencil[0].valuemask = 0xff; -         depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff; -         stencil_ref.ref_value[0] = ctx->Stencil.Clear; -         cso_set_stencil_ref(st->cso_context, &stencil_ref); -      } - -      cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); -   } - -   cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw); - -   cso_set_rasterizer(st->cso_context, &st->clear.raster); - -   /* viewport state: viewport matching window dims */ -   { -      const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); -      struct pipe_viewport_state vp; -      vp.scale[0] = 0.5f * fb_width; -      vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f); -      vp.scale[2] = 1.0f; -      vp.scale[3] = 1.0f; -      vp.translate[0] = 0.5f * fb_width; -      vp.translate[1] = 0.5f * fb_height; -      vp.translate[2] = 0.0f; -      vp.translate[3] = 0.0f; -      cso_set_viewport(st->cso_context, &vp); -   } - -   cso_set_clip(st->cso_context, &st->clear.clip); -   cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); -   cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); - -   if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { -      st_translate_color(ctx->Color.ClearColor, -                         ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, -                         clearColor); -   } - -   /* draw quad matching scissor rect */ -   draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, clearColor); - -   /* Restore pipe state */ -   cso_restore_blend(st->cso_context); -   cso_restore_stencil_ref(st->cso_context); -   cso_restore_depth_stencil_alpha(st->cso_context); -   cso_restore_rasterizer(st->cso_context); -   cso_restore_viewport(st->cso_context); -   cso_restore_clip(st->cso_context); -   cso_restore_fragment_shader(st->cso_context); -   cso_restore_vertex_shader(st->cso_context); -   cso_restore_vertex_elements(st->cso_context); -   cso_restore_vertex_buffers(st->cso_context); -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ -   if (ctx->Scissor.Enabled && -       (ctx->Scissor.X != 0 || -        ctx->Scissor.Y != 0 || -        ctx->Scissor.Width < rb->Width || -        ctx->Scissor.Height < rb->Height)) -      return GL_TRUE; - -   if (!ctx->Color.ColorMask[0][0] || -       !ctx->Color.ColorMask[0][1] || -       !ctx->Color.ColorMask[0][2] || -       !ctx->Color.ColorMask[0][3]) -      return GL_TRUE; - -   return GL_FALSE; -} - - -/** - * Determine if we need to clear the combiend depth/stencil buffer by - * drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb) -{ -   const GLuint stencilMax = 0xff; -   GLboolean maskStencil -      = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - -   assert(rb->Format == MESA_FORMAT_S8 || -          rb->Format == MESA_FORMAT_Z24_S8 || -          rb->Format == MESA_FORMAT_S8_Z24); - -   if (ctx->Scissor.Enabled && -       (ctx->Scissor.X != 0 || -        ctx->Scissor.Y != 0 || -        ctx->Scissor.Width < rb->Width || -        ctx->Scissor.Height < rb->Height)) -      return GL_TRUE; - -   if (maskStencil) -      return GL_TRUE; - -   return GL_FALSE; -} - - -/** - * Determine if we need to clear the depth buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, -                            boolean ds_separate) -{ -   const struct st_renderbuffer *strb = st_renderbuffer(rb); -   const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); - -   if (ctx->Scissor.Enabled && -       (ctx->Scissor.X != 0 || -        ctx->Scissor.Y != 0 || -        ctx->Scissor.Width < rb->Width || -        ctx->Scissor.Height < rb->Height)) -      return GL_TRUE; - -   if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0) -      return GL_TRUE; - -   return GL_FALSE; -} - - -/** - * Determine if we need to clear the stencil buffer by drawing a quad. - */ -static INLINE GLboolean -check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb, -                              boolean ds_separate) -{ -   const struct st_renderbuffer *strb = st_renderbuffer(rb); -   const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format); -   const GLuint stencilMax = 0xff; -   const GLboolean maskStencil -      = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; - -   assert(rb->Format == MESA_FORMAT_S8 || -          rb->Format == MESA_FORMAT_Z24_S8 || -          rb->Format == MESA_FORMAT_S8_Z24); - -   if (maskStencil)  -      return GL_TRUE; - -   if (ctx->Scissor.Enabled && -       (ctx->Scissor.X != 0 || -        ctx->Scissor.Y != 0 || -        ctx->Scissor.Width < rb->Width || -        ctx->Scissor.Height < rb->Height)) -      return GL_TRUE; - -   /* This is correct, but it is necessary to look at the depth clear -    * value held in the surface when it comes time to issue the clear, -    * rather than taking depth and stencil clear values from the -    * current state. -    */ -   if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0) -      return GL_TRUE; - -   return GL_FALSE; -} - - - -/** - * Called when we need to flush. - */ -void -st_flush_clear(struct st_context *st) -{ -   /* Release vertex buffer to avoid synchronous rendering if we were -    * to map it in the next frame. -    */ -   pipe_resource_reference(&st->clear.vbuf, NULL); -   st->clear.vbuf_slot = 0; -} -  - - -/** - * Called via ctx->Driver.Clear() - */ -static void -st_Clear(struct gl_context *ctx, GLbitfield mask) -{ -   static const GLbitfield BUFFER_BITS_DS -      = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); -   struct st_context *st = st_context(ctx); -   struct gl_renderbuffer *depthRb -      = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; -   struct gl_renderbuffer *stencilRb -      = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; -   GLbitfield quad_buffers = 0x0; -   GLbitfield clear_buffers = 0x0; -   GLuint i; - -   /* This makes sure the pipe has the latest scissor, etc values */ -   st_validate_state( st ); - -   if (mask & BUFFER_BITS_COLOR) { -      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { -         GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i]; - -         if (mask & (1 << b)) { -            struct gl_renderbuffer *rb -               = ctx->DrawBuffer->Attachment[b].Renderbuffer; -            struct st_renderbuffer *strb = st_renderbuffer(rb); - -            if (!strb || !strb->surface) -               continue; - -            if (check_clear_color_with_quad( ctx, rb )) -               quad_buffers |= PIPE_CLEAR_COLOR; -            else -               clear_buffers |= PIPE_CLEAR_COLOR; -         } -      } -   } - -   if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { -      /* clearing combined depth + stencil */ -      struct st_renderbuffer *strb = st_renderbuffer(depthRb); - -      if (strb->surface) { -         if (check_clear_depth_stencil_with_quad(ctx, depthRb)) -            quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL; -         else -            clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; -      } -   } -   else { -      /* separate depth/stencil clears */ -      /* I don't think truly separate buffers are actually possible in gallium or hw? */ -      if (mask & BUFFER_BIT_DEPTH) { -         struct st_renderbuffer *strb = st_renderbuffer(depthRb); - -         if (strb->surface) { -            if (check_clear_depth_with_quad(ctx, depthRb, -                                            st->clear.enable_ds_separate)) -               quad_buffers |= PIPE_CLEAR_DEPTH; -            else -               clear_buffers |= PIPE_CLEAR_DEPTH; -         } -      } -      if (mask & BUFFER_BIT_STENCIL) { -         struct st_renderbuffer *strb = st_renderbuffer(stencilRb); - -         if (strb->surface) { -            if (check_clear_stencil_with_quad(ctx, stencilRb, -                                              st->clear.enable_ds_separate)) -               quad_buffers |= PIPE_CLEAR_STENCIL; -            else -               clear_buffers |= PIPE_CLEAR_STENCIL; -         } -      } -   } - -   /* -    * If we're going to use clear_with_quad() for any reason, use it for -    * everything possible. -    */ -   if (quad_buffers) { -      quad_buffers |= clear_buffers; -      clear_with_quad(ctx, -                      quad_buffers & PIPE_CLEAR_COLOR, -                      quad_buffers & PIPE_CLEAR_DEPTH, -                      quad_buffers & PIPE_CLEAR_STENCIL); -   } else if (clear_buffers) { -      /* driver cannot know it can clear everything if the buffer -       * is a combined depth/stencil buffer but this wasn't actually -       * required from the visual. Hence fix this up to avoid potential -       * read-modify-write in the driver. -       */ -      float clearColor[4]; - -      if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) && -          ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) && -          (depthRb == stencilRb) && -          (ctx->DrawBuffer->Visual.depthBits == 0 || -           ctx->DrawBuffer->Visual.stencilBits == 0)) -         clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL; - -      if (ctx->DrawBuffer->_ColorDrawBuffers[0]) { -         st_translate_color(ctx->Color.ClearColor, -                            ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat, -                            clearColor); -      } - -      st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor, -                      ctx->Depth.Clear, ctx->Stencil.Clear); -   } -   if (mask & BUFFER_BIT_ACCUM) -      st_clear_accum_buffer(ctx, -                            ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); -} - - -void -st_init_clear_functions(struct dd_function_table *functions) -{ -   functions->Clear = st_Clear; -} +/**************************************************************************
 + * 
 + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
 + * All Rights Reserved.
 + * Copyright 2009 VMware, Inc.  All Rights Reserved.
 + * 
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the
 + * "Software"), to deal in the Software without restriction, including
 + * without limitation the rights to use, copy, modify, merge, publish,
 + * distribute, sub license, and/or sell copies of the Software, and to
 + * permit persons to whom the Software is furnished to do so, subject to
 + * the following conditions:
 + * 
 + * The above copyright notice and this permission notice (including the
 + * next paragraph) shall be included in all copies or substantial portions
 + * of the Software.
 + * 
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
 + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + * 
 + **************************************************************************/
 +
 + /*
 +  * Authors:
 +  *   Keith Whitwell <keith@tungstengraphics.com>
 +  *   Brian Paul
 +  *   Michel Dänzer
 +  */
 +
 +#include "main/glheader.h"
 +#include "main/formats.h"
 +#include "main/macros.h"
 +#include "program/prog_instruction.h"
 +#include "st_context.h"
 +#include "st_atom.h"
 +#include "st_cb_accum.h"
 +#include "st_cb_clear.h"
 +#include "st_cb_fbo.h"
 +#include "st_format.h"
 +#include "st_program.h"
 +
 +#include "pipe/p_context.h"
 +#include "pipe/p_shader_tokens.h"
 +#include "pipe/p_state.h"
 +#include "pipe/p_defines.h"
 +#include "util/u_format.h"
 +#include "util/u_inlines.h"
 +#include "util/u_simple_shaders.h"
 +#include "util/u_draw_quad.h"
 +
 +#include "cso_cache/cso_context.h"
 +
 +
 +/**
 + * Do per-context initialization for glClear.
 + */
 +void
 +st_init_clear(struct st_context *st)
 +{
 +   struct pipe_screen *pscreen = st->pipe->screen;
 +
 +   memset(&st->clear, 0, sizeof(st->clear));
 +
 +   st->clear.raster.gl_rasterization_rules = 1;
 +   st->clear.enable_ds_separate = pscreen->get_param(pscreen, PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE);
 +}
 +
 +
 +/**
 + * Free per-context state for glClear.
 + */
 +void
 +st_destroy_clear(struct st_context *st)
 +{
 +   if (st->clear.fs) {
 +      cso_delete_fragment_shader(st->cso_context, st->clear.fs);
 +      st->clear.fs = NULL;
 +   }
 +   if (st->clear.vs) {
 +      cso_delete_vertex_shader(st->cso_context, st->clear.vs);
 +      st->clear.vs = NULL;
 +   }
 +   if (st->clear.vbuf) {
 +      pipe_resource_reference(&st->clear.vbuf, NULL);
 +      st->clear.vbuf = NULL;
 +   }
 +}
 +
 +
 +/**
 + * Helper function to set the fragment shaders.
 + */
 +static INLINE void
 +set_fragment_shader(struct st_context *st)
 +{
 +   if (!st->clear.fs)
 +      st->clear.fs = util_make_fragment_passthrough_shader(st->pipe);
 +
 +   cso_set_fragment_shader_handle(st->cso_context, st->clear.fs);
 +}
 +
 +
 +/**
 + * Helper function to set the vertex shader.
 + */
 +static INLINE void
 +set_vertex_shader(struct st_context *st)
 +{
 +   /* vertex shader - still required to provide the linkage between
 +    * fragment shader input semantics and vertex_element/buffers.
 +    */
 +   if (!st->clear.vs)
 +   {
 +      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
 +                                      TGSI_SEMANTIC_COLOR };
 +      const uint semantic_indexes[] = { 0, 0 };
 +      st->clear.vs = util_make_vertex_passthrough_shader(st->pipe, 2,
 +                                                         semantic_names,
 +                                                         semantic_indexes);
 +   }
 +
 +   cso_set_vertex_shader_handle(st->cso_context, st->clear.vs);
 +}
 +
 +
 +/**
 + * Draw a screen-aligned quadrilateral.
 + * Coords are clip coords with y=0=bottom.
 + */
 +static void
 +draw_quad(struct st_context *st,
 +          float x0, float y0, float x1, float y1, GLfloat z,
 +          const GLfloat color[4])
 +{
 +   struct pipe_context *pipe = st->pipe;
 +
 +   /* XXX: Need to improve buffer_write to allow NO_WAIT (as well as
 +    * no_flush) updates to buffers where we know there is no conflict
 +    * with previous data.  Currently using max_slots > 1 will cause
 +    * synchronous rendering if the driver flushes its command buffers
 +    * between one bitmap and the next.  Our flush hook below isn't
 +    * sufficient to catch this as the driver doesn't tell us when it
 +    * flushes its own command buffers.  Until this gets fixed, pay the
 +    * price of allocating a new buffer for each bitmap cache-flush to
 +    * avoid synchronous rendering.
 +    */
 +   const GLuint max_slots = 1; /* 1024 / sizeof(st->clear.vertices); */
 +   GLuint i;
 +
 +   if (st->clear.vbuf_slot >= max_slots) {
 +      pipe_resource_reference(&st->clear.vbuf, NULL);
 +      st->clear.vbuf_slot = 0;
 +   }
 +
 +   if (!st->clear.vbuf) {
 +      st->clear.vbuf = pipe_buffer_create(pipe->screen,
 +                                          PIPE_BIND_VERTEX_BUFFER,
 +                                          PIPE_USAGE_STREAM,
 +                                          max_slots * sizeof(st->clear.vertices));
 +   }
 +
 +   /* positions */
 +   st->clear.vertices[0][0][0] = x0;
 +   st->clear.vertices[0][0][1] = y0;
 +
 +   st->clear.vertices[1][0][0] = x1;
 +   st->clear.vertices[1][0][1] = y0;
 +
 +   st->clear.vertices[2][0][0] = x1;
 +   st->clear.vertices[2][0][1] = y1;
 +
 +   st->clear.vertices[3][0][0] = x0;
 +   st->clear.vertices[3][0][1] = y1;
 +
 +   /* same for all verts: */
 +   for (i = 0; i < 4; i++) {
 +      st->clear.vertices[i][0][2] = z;
 +      st->clear.vertices[i][0][3] = 1.0;
 +      st->clear.vertices[i][1][0] = color[0];
 +      st->clear.vertices[i][1][1] = color[1];
 +      st->clear.vertices[i][1][2] = color[2];
 +      st->clear.vertices[i][1][3] = color[3];
 +   }
 +
 +   /* put vertex data into vbuf */
 +   pipe_buffer_write_nooverlap(st->pipe, st->clear.vbuf,
 +                                           st->clear.vbuf_slot
 +                                             * sizeof(st->clear.vertices),
 +                                           sizeof(st->clear.vertices),
 +                                           st->clear.vertices);
 +
 +   /* draw */
 +   util_draw_vertex_buffer(pipe,
 +                           st->cso_context,
 +                           st->clear.vbuf, 
 +                           st->clear.vbuf_slot * sizeof(st->clear.vertices),
 +                           PIPE_PRIM_TRIANGLE_FAN,
 +                           4,  /* verts */
 +                           2); /* attribs/vert */
 +
 +   /* Increment slot */
 +   st->clear.vbuf_slot++;
 +}
 +
 +
 +
 +/**
 + * Do glClear by drawing a quadrilateral.
 + * The vertices of the quad will be computed from the
 + * ctx->DrawBuffer->_X/Ymin/max fields.
 + */
 +static void
 +clear_with_quad(struct gl_context *ctx,
 +                GLboolean color, GLboolean depth, GLboolean stencil)
 +{
 +   struct st_context *st = st_context(ctx);
 +   const struct gl_framebuffer *fb = ctx->DrawBuffer;
 +   const GLfloat fb_width = (GLfloat) fb->Width;
 +   const GLfloat fb_height = (GLfloat) fb->Height;
 +   const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin / fb_width * 2.0f - 1.0f;
 +   const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax / fb_width * 2.0f - 1.0f;
 +   const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin / fb_height * 2.0f - 1.0f;
 +   const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax / fb_height * 2.0f - 1.0f;
 +   float clearColor[4];
 +
 +   /*
 +   printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, 
 +	  color ? "color, " : "",
 +	  depth ? "depth, " : "",
 +	  stencil ? "stencil" : "",
 +	  x0, y0,
 +	  x1, y1);
 +   */
 +
 +   cso_save_blend(st->cso_context);
 +   cso_save_stencil_ref(st->cso_context);
 +   cso_save_depth_stencil_alpha(st->cso_context);
 +   cso_save_rasterizer(st->cso_context);
 +   cso_save_viewport(st->cso_context);
 +   cso_save_clip(st->cso_context);
 +   cso_save_fragment_shader(st->cso_context);
 +   cso_save_vertex_shader(st->cso_context);
 +   cso_save_vertex_elements(st->cso_context);
 +   cso_save_vertex_buffers(st->cso_context);
 +
 +   /* blend state: RGBA masking */
 +   {
 +      struct pipe_blend_state blend;
 +      memset(&blend, 0, sizeof(blend));
 +      blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
 +      blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
 +      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
 +      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
 +      if (color) {
 +         if (ctx->Color.ColorMask[0][0])
 +            blend.rt[0].colormask |= PIPE_MASK_R;
 +         if (ctx->Color.ColorMask[0][1])
 +            blend.rt[0].colormask |= PIPE_MASK_G;
 +         if (ctx->Color.ColorMask[0][2])
 +            blend.rt[0].colormask |= PIPE_MASK_B;
 +         if (ctx->Color.ColorMask[0][3])
 +            blend.rt[0].colormask |= PIPE_MASK_A;
 +         if (st->ctx->Color.DitherFlag)
 +            blend.dither = 1;
 +      }
 +      cso_set_blend(st->cso_context, &blend);
 +   }
 +
 +   /* depth_stencil state: always pass/set to ref value */
 +   {
 +      struct pipe_depth_stencil_alpha_state depth_stencil;
 +      memset(&depth_stencil, 0, sizeof(depth_stencil));
 +      if (depth) {
 +         depth_stencil.depth.enabled = 1;
 +         depth_stencil.depth.writemask = 1;
 +         depth_stencil.depth.func = PIPE_FUNC_ALWAYS;
 +      }
 +
 +      if (stencil) {
 +         struct pipe_stencil_ref stencil_ref;
 +         memset(&stencil_ref, 0, sizeof(stencil_ref));
 +         depth_stencil.stencil[0].enabled = 1;
 +         depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS;
 +         depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE;
 +         depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
 +         depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE;
 +         depth_stencil.stencil[0].valuemask = 0xff;
 +         depth_stencil.stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
 +         stencil_ref.ref_value[0] = ctx->Stencil.Clear;
 +         cso_set_stencil_ref(st->cso_context, &stencil_ref);
 +      }
 +
 +      cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil);
 +   }
 +
 +   cso_set_vertex_elements(st->cso_context, 2, st->velems_util_draw);
 +
 +   cso_set_rasterizer(st->cso_context, &st->clear.raster);
 +
 +   /* viewport state: viewport matching window dims */
 +   {
 +      const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP);
 +      struct pipe_viewport_state vp;
 +      vp.scale[0] = 0.5f * fb_width;
 +      vp.scale[1] = fb_height * (invert ? -0.5f : 0.5f);
 +      vp.scale[2] = 1.0f;
 +      vp.scale[3] = 1.0f;
 +      vp.translate[0] = 0.5f * fb_width;
 +      vp.translate[1] = 0.5f * fb_height;
 +      vp.translate[2] = 0.0f;
 +      vp.translate[3] = 0.0f;
 +      cso_set_viewport(st->cso_context, &vp);
 +   }
 +
 +   cso_set_clip(st->cso_context, &st->clear.clip);
 +   set_fragment_shader(st);
 +   set_vertex_shader(st);
 +
 +   if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
 +      st_translate_color(ctx->Color.ClearColor,
 +                         ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
 +                         clearColor);
 +   }
 +
 +   /* draw quad matching scissor rect */
 +   draw_quad(st, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, clearColor);
 +
 +   /* Restore pipe state */
 +   cso_restore_blend(st->cso_context);
 +   cso_restore_stencil_ref(st->cso_context);
 +   cso_restore_depth_stencil_alpha(st->cso_context);
 +   cso_restore_rasterizer(st->cso_context);
 +   cso_restore_viewport(st->cso_context);
 +   cso_restore_clip(st->cso_context);
 +   cso_restore_fragment_shader(st->cso_context);
 +   cso_restore_vertex_shader(st->cso_context);
 +   cso_restore_vertex_elements(st->cso_context);
 +   cso_restore_vertex_buffers(st->cso_context);
 +}
 +
 +
 +/**
 + * Determine if we need to clear the depth buffer by drawing a quad.
 + */
 +static INLINE GLboolean
 +check_clear_color_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
 +{
 +   if (ctx->Scissor.Enabled &&
 +       (ctx->Scissor.X != 0 ||
 +        ctx->Scissor.Y != 0 ||
 +        ctx->Scissor.Width < rb->Width ||
 +        ctx->Scissor.Height < rb->Height))
 +      return GL_TRUE;
 +
 +   if (!ctx->Color.ColorMask[0][0] ||
 +       !ctx->Color.ColorMask[0][1] ||
 +       !ctx->Color.ColorMask[0][2] ||
 +       !ctx->Color.ColorMask[0][3])
 +      return GL_TRUE;
 +
 +   return GL_FALSE;
 +}
 +
 +
 +/**
 + * Determine if we need to clear the combiend depth/stencil buffer by
 + * drawing a quad.
 + */
 +static INLINE GLboolean
 +check_clear_depth_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb)
 +{
 +   const GLuint stencilMax = 0xff;
 +   GLboolean maskStencil
 +      = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
 +
 +   assert(rb->Format == MESA_FORMAT_S8 ||
 +          rb->Format == MESA_FORMAT_Z24_S8 ||
 +          rb->Format == MESA_FORMAT_S8_Z24);
 +
 +   if (ctx->Scissor.Enabled &&
 +       (ctx->Scissor.X != 0 ||
 +        ctx->Scissor.Y != 0 ||
 +        ctx->Scissor.Width < rb->Width ||
 +        ctx->Scissor.Height < rb->Height))
 +      return GL_TRUE;
 +
 +   if (maskStencil)
 +      return GL_TRUE;
 +
 +   return GL_FALSE;
 +}
 +
 +
 +/**
 + * Determine if we need to clear the depth buffer by drawing a quad.
 + */
 +static INLINE GLboolean
 +check_clear_depth_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
 +                            boolean ds_separate)
 +{
 +   const struct st_renderbuffer *strb = st_renderbuffer(rb);
 +   const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
 +
 +   if (ctx->Scissor.Enabled &&
 +       (ctx->Scissor.X != 0 ||
 +        ctx->Scissor.Y != 0 ||
 +        ctx->Scissor.Width < rb->Width ||
 +        ctx->Scissor.Height < rb->Height))
 +      return GL_TRUE;
 +
 +   if (!ds_separate && isDS && ctx->DrawBuffer->Visual.stencilBits > 0)
 +      return GL_TRUE;
 +
 +   return GL_FALSE;
 +}
 +
 +
 +/**
 + * Determine if we need to clear the stencil buffer by drawing a quad.
 + */
 +static INLINE GLboolean
 +check_clear_stencil_with_quad(struct gl_context *ctx, struct gl_renderbuffer *rb,
 +                              boolean ds_separate)
 +{
 +   const struct st_renderbuffer *strb = st_renderbuffer(rb);
 +   const GLboolean isDS = util_format_is_depth_and_stencil(strb->surface->format);
 +   const GLuint stencilMax = 0xff;
 +   const GLboolean maskStencil
 +      = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax;
 +
 +   assert(rb->Format == MESA_FORMAT_S8 ||
 +          rb->Format == MESA_FORMAT_Z24_S8 ||
 +          rb->Format == MESA_FORMAT_S8_Z24);
 +
 +   if (maskStencil) 
 +      return GL_TRUE;
 +
 +   if (ctx->Scissor.Enabled &&
 +       (ctx->Scissor.X != 0 ||
 +        ctx->Scissor.Y != 0 ||
 +        ctx->Scissor.Width < rb->Width ||
 +        ctx->Scissor.Height < rb->Height))
 +      return GL_TRUE;
 +
 +   /* This is correct, but it is necessary to look at the depth clear
 +    * value held in the surface when it comes time to issue the clear,
 +    * rather than taking depth and stencil clear values from the
 +    * current state.
 +    */
 +   if (!ds_separate && isDS && ctx->DrawBuffer->Visual.depthBits > 0)
 +      return GL_TRUE;
 +
 +   return GL_FALSE;
 +}
 +
 +
 +
 +/**
 + * Called when we need to flush.
 + */
 +void
 +st_flush_clear(struct st_context *st)
 +{
 +   /* Release vertex buffer to avoid synchronous rendering if we were
 +    * to map it in the next frame.
 +    */
 +   pipe_resource_reference(&st->clear.vbuf, NULL);
 +   st->clear.vbuf_slot = 0;
 +}
 + 
 +
 +
 +/**
 + * Called via ctx->Driver.Clear()
 + */
 +static void
 +st_Clear(struct gl_context *ctx, GLbitfield mask)
 +{
 +   static const GLbitfield BUFFER_BITS_DS
 +      = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
 +   struct st_context *st = st_context(ctx);
 +   struct gl_renderbuffer *depthRb
 +      = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
 +   struct gl_renderbuffer *stencilRb
 +      = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
 +   GLbitfield quad_buffers = 0x0;
 +   GLbitfield clear_buffers = 0x0;
 +   GLuint i;
 +
 +   /* This makes sure the pipe has the latest scissor, etc values */
 +   st_validate_state( st );
 +
 +   if (mask & BUFFER_BITS_COLOR) {
 +      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 +         GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
 +
 +         if (mask & (1 << b)) {
 +            struct gl_renderbuffer *rb
 +               = ctx->DrawBuffer->Attachment[b].Renderbuffer;
 +            struct st_renderbuffer *strb = st_renderbuffer(rb);
 +
 +            if (!strb || !strb->surface)
 +               continue;
 +
 +            if (check_clear_color_with_quad( ctx, rb ))
 +               quad_buffers |= PIPE_CLEAR_COLOR;
 +            else
 +               clear_buffers |= PIPE_CLEAR_COLOR;
 +         }
 +      }
 +   }
 +
 +   if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) {
 +      /* clearing combined depth + stencil */
 +      struct st_renderbuffer *strb = st_renderbuffer(depthRb);
 +
 +      if (strb->surface) {
 +         if (check_clear_depth_stencil_with_quad(ctx, depthRb))
 +            quad_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
 +         else
 +            clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
 +      }
 +   }
 +   else {
 +      /* separate depth/stencil clears */
 +      /* I don't think truly separate buffers are actually possible in gallium or hw? */
 +      if (mask & BUFFER_BIT_DEPTH) {
 +         struct st_renderbuffer *strb = st_renderbuffer(depthRb);
 +
 +         if (strb->surface) {
 +            if (check_clear_depth_with_quad(ctx, depthRb,
 +                                            st->clear.enable_ds_separate))
 +               quad_buffers |= PIPE_CLEAR_DEPTH;
 +            else
 +               clear_buffers |= PIPE_CLEAR_DEPTH;
 +         }
 +      }
 +      if (mask & BUFFER_BIT_STENCIL) {
 +         struct st_renderbuffer *strb = st_renderbuffer(stencilRb);
 +
 +         if (strb->surface) {
 +            if (check_clear_stencil_with_quad(ctx, stencilRb,
 +                                              st->clear.enable_ds_separate))
 +               quad_buffers |= PIPE_CLEAR_STENCIL;
 +            else
 +               clear_buffers |= PIPE_CLEAR_STENCIL;
 +         }
 +      }
 +   }
 +
 +   /*
 +    * If we're going to use clear_with_quad() for any reason, use it for
 +    * everything possible.
 +    */
 +   if (quad_buffers) {
 +      quad_buffers |= clear_buffers;
 +      clear_with_quad(ctx,
 +                      quad_buffers & PIPE_CLEAR_COLOR,
 +                      quad_buffers & PIPE_CLEAR_DEPTH,
 +                      quad_buffers & PIPE_CLEAR_STENCIL);
 +   } else if (clear_buffers) {
 +      /* driver cannot know it can clear everything if the buffer
 +       * is a combined depth/stencil buffer but this wasn't actually
 +       * required from the visual. Hence fix this up to avoid potential
 +       * read-modify-write in the driver.
 +       */
 +      float clearColor[4];
 +
 +      if ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) &&
 +          ((clear_buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL) &&
 +          (depthRb == stencilRb) &&
 +          (ctx->DrawBuffer->Visual.depthBits == 0 ||
 +           ctx->DrawBuffer->Visual.stencilBits == 0))
 +         clear_buffers |= PIPE_CLEAR_DEPTHSTENCIL;
 +
 +      if (ctx->DrawBuffer->_ColorDrawBuffers[0]) {
 +         st_translate_color(ctx->Color.ClearColor,
 +                            ctx->DrawBuffer->_ColorDrawBuffers[0]->_BaseFormat,
 +                            clearColor);
 +      }
 +
 +      st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
 +                      ctx->Depth.Clear, ctx->Stencil.Clear);
 +   }
 +   if (mask & BUFFER_BIT_ACCUM)
 +      st_clear_accum_buffer(ctx,
 +                            ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
 +}
 +
 +
 +void
 +st_init_clear_functions(struct dd_function_table *functions)
 +{
 +   functions->Clear = st_Clear;
 +}
 diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c index 8bdb3c801..1b824c0de 100644 --- a/mesalib/src/mesa/state_tracker/st_cb_texture.c +++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c @@ -1530,7 +1530,7 @@ st_copy_texsubimage(struct gl_context *ctx,           GLint srcY0, srcY1;
           struct pipe_surface surf_tmpl;
           memset(&surf_tmpl, 0, sizeof(surf_tmpl));
 -         surf_tmpl.format = stImage->pt->format;
 +         surf_tmpl.format = util_format_linear(stImage->pt->format);
           surf_tmpl.usage = PIPE_BIND_RENDER_TARGET;
           surf_tmpl.u.tex.level = stImage->level;
           surf_tmpl.u.tex.first_layer = stImage->face + destZ;
 | 
