8 files changed, 214 insertions, 159 deletions
diff --git a/mesalib/src/gallium/auxiliary/util/u_format.csv b/mesalib/src/gallium/auxiliary/util/u_format.csv
index a3d2aae62..6c008cc71 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format.csv
+++ b/mesalib/src/gallium/auxiliary/util/u_format.csv
@@ -265,5 +265,9 @@ PIPE_FORMAT_YV16                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, y
 PIPE_FORMAT_IYUV                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
 PIPE_FORMAT_NV12                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
 PIPE_FORMAT_NV21                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_IA44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
-PIPE_FORMAT_AI44                  , other, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+
+# Usually used to implement IA44 and AI44 formats in video decoding
+PIPE_FORMAT_R4A4_UNORM            , plain, 1, 1, un4 , un4 ,     ,     , y00x, rgb
+PIPE_FORMAT_A4R4_UNORM            , plain, 1, 1, un4 , un4 ,     ,     , x00y, rgb
+PIPE_FORMAT_R8A8_UNORM            , plain, 1, 1, un8 , un8 ,     ,     , x00y, rgb
+PIPE_FORMAT_A8R8_UNORM            , plain, 1, 1, un8 , un8 ,     ,     , y00x, rgb
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_yuv.c b/mesalib/src/gallium/auxiliary/util/u_format_yuv.c
index 64ea0b353..38a25b186 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format_yuv.c
+++ b/mesalib/src/gallium/auxiliary/util/u_format_yuv.c
@@ -1142,41 +1142,3 @@ util_format_nv21_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
 void
 util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src,
                              unsigned i, unsigned j) {}
-void
-util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
-                             const float *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src,
-                             unsigned i, unsigned j) {}
-void
-util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
-                             const float *src_row, unsigned src_stride,
-                             unsigned width, unsigned height) {}
-void
-util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src,
-                             unsigned i, unsigned j) {}
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_yuv.h b/mesalib/src/gallium/auxiliary/util/u_format_yuv.h
index 9f2365a52..4cb22df7a 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format_yuv.h
+++ b/mesalib/src/gallium/auxiliary/util/u_format_yuv.h
@@ -266,46 +266,6 @@ void
 util_format_nv21_fetch_rgba_float(float *dst, const uint8_t *src,
                              unsigned i, unsigned j);
 void
-util_format_ia44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ia44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ia44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ia44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
-                             const float *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ia44_fetch_rgba_float(float *dst, const uint8_t *src,
-                             unsigned i, unsigned j);
-void
-util_format_ai44_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ai44_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ai44_unpack_rgba_float(float *dst_row, unsigned dst_stride,
-                             const uint8_t *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ai44_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
-                             const float *src_row, unsigned src_stride,
-                             unsigned width, unsigned height);
-void
-util_format_ai44_fetch_rgba_float(float *dst, const uint8_t *src,
-                             unsigned i, unsigned j);
-
-
-void
 util_format_r8g8_b8g8_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride,
                                          const uint8_t *src_row, unsigned src_stride,
                                          unsigned width, unsigned height);
diff --git a/mesalib/src/mesa/main/bufferobj.c b/mesalib/src/mesa/main/bufferobj.c
index c453f9c85..a30340174 100644
--- a/mesalib/src/mesa/main/bufferobj.c
+++ b/mesalib/src/mesa/main/bufferobj.c
@@ -1004,6 +1004,12 @@ _mesa_MapBufferARB(GLenum target, GLenum access)
       return NULL;
    }
 
+   if (!bufObj->Size) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY,
+                  "glMapBuffer(buffer size = 0)");
+      return NULL;
+   }
+
    ASSERT(ctx->Driver.MapBufferRange);
    map = ctx->Driver.MapBufferRange(ctx, 0, bufObj->Size, accessFlags, bufObj);
    if (!map) {
@@ -1410,7 +1416,23 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
                   "glMapBufferRange(buffer already mapped)");
       return NULL;
    }
-      
+
+   if (!bufObj->Size) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY,
+                  "glMapBufferRange(buffer size = 0)");
+      return NULL;
+   }
+
+   /* Mapping zero bytes should return a non-null pointer. */
+   if (!length) {
+      static long dummy = 0;
+      bufObj->Pointer = &dummy;
+      bufObj->Length = length;
+      bufObj->Offset = offset;
+      bufObj->AccessFlags = access;
+      return bufObj->Pointer;
+   }
+
    ASSERT(ctx->Driver.MapBufferRange);
    map = ctx->Driver.MapBufferRange(ctx, offset, length, access, bufObj);
    if (!map) {
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
index a451b4404..aab7444e2 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -229,13 +229,6 @@ st_bufferobj_data(struct gl_context *ctx,
 
 
 /**
- * Dummy data whose's pointer is used for zero size buffers or ranges.
- */
-static long st_bufferobj_zero_length = 0;
-
-
-
-/**
  * Called via glMapBufferRange().
  */
 static void *
@@ -280,24 +273,15 @@ st_bufferobj_map_range(struct gl_context *ctx,
    assert(offset < obj->Size);
    assert(offset + length <= obj->Size);
 
-   /*
-    * We go out of way here to hide the degenerate yet valid case of zero
-    * length range from the pipe driver.
-    */
-   if (!length) {
-      obj->Pointer = &st_bufferobj_zero_length;
-   }
-   else {
-      obj->Pointer = pipe_buffer_map_range(pipe, 
-                                           st_obj->buffer,
-                                           offset, length,
-                                           flags,
-                                           &st_obj->transfer);
-      if (obj->Pointer) {
-         obj->Pointer = (ubyte *) obj->Pointer + offset;
-      }
+   obj->Pointer = pipe_buffer_map_range(pipe,
+                                        st_obj->buffer,
+                                        offset, length,
+                                        flags,
+                                        &st_obj->transfer);
+   if (obj->Pointer) {
+      obj->Pointer = (ubyte *) obj->Pointer + offset;
    }
-   
+
    if (obj->Pointer) {
       obj->Offset = offset;
       obj->Length = length;
diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c
index 322dbbfd5..aa7f3b52c 100644
--- a/mesalib/src/mesa/state_tracker/st_extensions.c
+++ b/mesalib/src/mesa/state_tracker/st_extensions.c
@@ -203,6 +203,9 @@ void st_init_limits(struct st_context *st)
 
    /* XXX we'll need a better query here someday */
    if (screen->get_param(screen, PIPE_CAP_GLSL)) {
+      c->MinProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MIN_TEXEL_OFFSET);
+      c->MaxProgramTexelOffset = screen->get_param(screen, PIPE_CAP_MAX_TEXEL_OFFSET);
+
       c->GLSLVersion = 120;
    }
 }
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e2857edf3..9394bea00 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -80,6 +80,9 @@ extern "C" {
 
 #define MAX_TEMPS         4096
 
+/* will be 4 for GLSL 4.00 */
+#define MAX_GLSL_TEXTURE_OFFSET 1
+
 class st_src_reg;
 class st_dst_reg;
 
@@ -211,6 +214,8 @@ public:
    int sampler; /**< sampler index */
    int tex_target; /**< One of TEXTURE_*_INDEX */
    GLboolean tex_shadow;
+   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+   unsigned tex_offset_num_offset;
    int dead_mask; /**< Used in dead code elimination */
 
    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
@@ -385,6 +390,8 @@ public:
    void emit_scalar(ir_instruction *ir, unsigned op,
         	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
+   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 
    void emit_scs(ir_instruction *ir, unsigned op,
@@ -562,7 +569,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    }
 
    this->instructions.push_tail(inst);
-   
+
+   if (native_integers)
+      try_emit_float_set(ir, op, dst);
+
    return inst;
 }
 
@@ -588,11 +598,28 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
+ /**
+ * Emits the code to convert the result of float SET instructions to integers.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
+        		 st_dst_reg dst)
+{
+   if ((op == TGSI_OPCODE_SEQ ||
+        op == TGSI_OPCODE_SNE ||
+        op == TGSI_OPCODE_SGE ||
+        op == TGSI_OPCODE_SLT))
+   {
+      st_src_reg src = st_src_reg(dst);
+      src.negate = ~src.negate;
+      dst.type = GLSL_TYPE_FLOAT;
+      emit(ir, TGSI_OPCODE_F2I, dst, src);
+   }
+}
+
 /**
  * Determines whether to use an integer, unsigned integer, or float opcode 
  * based on the operands and input opcode, then emits the result.
- * 
- * TODO: type checking for remaining TGSI opcodes
  */
 unsigned
 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
@@ -604,7 +631,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
       type = GLSL_TYPE_FLOAT;
    else if (native_integers)
-      type = src0.type;
+      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 
 #define case4(c, f, i, u) \
    case TGSI_OPCODE_##c: \
@@ -630,12 +657,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
       case3(SGE, ISGE, USGE);
       case3(SLT, ISLT, USLT);
       
-      case2iu(SHL, SHL);
       case2iu(ISHR, USHR);
-      case2iu(NOT, NOT);
-      case2iu(AND, AND);
-      case2iu(OR, OR);
-      case2iu(XOR, XOR);
       
       default: break;
    }
@@ -1389,7 +1411,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    switch (ir->operation) {
    case ir_unop_logic_not:
       if (result_dst.type != GLSL_TYPE_FLOAT)
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
       else {
          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
           * older GPUs implement SEQ using multiple instructions (i915 uses two
@@ -1489,10 +1511,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
       break;
    case ir_binop_greater:
-      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
       break;
    case ir_binop_lequal:
-      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
       break;
    case ir_binop_gequal:
       emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
@@ -1605,41 +1627,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    }
 
    case ir_binop_logic_xor:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_logic_or: {
-      /* After the addition, the value will be an integer on the
-       * range [0,2].  Zero stays zero, and positive values become 1.0.
-       */
-      glsl_to_tgsi_instruction *add =
-         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
-          result_dst.type == GLSL_TYPE_FLOAT) {
-         /* The clamping to [0,1] can be done for free in the fragment
-          * shader with a saturate if floats are being used as boolean values.
-          */
-         add->saturate = true;
-      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
-         /* Negating the result of the addition gives values on the range
-          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
-          * is achieved using SLT.
+      if (native_integers) {
+         /* If integers are used as booleans, we can use an actual "or" 
+          * instruction.
           */
-         st_src_reg slt_src = result_src;
-         slt_src.negate = ~slt_src.negate;
-         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         assert(native_integers);
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
       } else {
-         /* Use an SNE on the result of the addition.  Zero stays zero,
-          * 1 stays 1, and 2 becomes 1.
+         /* After the addition, the value will be an integer on the
+          * range [0,2].  Zero stays zero, and positive values become 1.0.
           */
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+         glsl_to_tgsi_instruction *add =
+            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate if floats are being used as boolean values.
+             */
+            add->saturate = true;
+         } else {
+            /* Negating the result of the addition gives values on the range
+             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+             * is achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         }
       }
       break;
    }
 
    case ir_binop_logic_and:
-      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      /* If native integers are disabled, the bool args are stored as float 0.0
+       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
+       * actual AND opcode.
+       */
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
 
    case ir_binop_dot:
@@ -1662,18 +1695,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
       break;
    case ir_unop_i2f:
-   case ir_unop_b2f:
       if (native_integers) {
          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
          break;
       }
+      /* fallthrough to next case otherwise */
+   case ir_unop_b2f:
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+         break;
+      }
+      /* fallthrough to next case otherwise */
    case ir_unop_i2u:
    case ir_unop_u2i:
       /* Converting between signed and unsigned integers is a no-op. */
-   case ir_unop_b2i:
-      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
       result_src = op[0];
       break;
+   case ir_unop_b2i:
+      if (native_integers) {
+         /* Booleans are stored as integers using ~0 for true and 0 for false.
+          * GLSL requires that int(bool) return 1 for true and 0 for false.
+          * This conversion is done with AND, but it could be done with NEG.
+          */
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+      } else {
+         /* Booleans and integers are both stored as floats when native 
+          * integers are disabled.
+          */
+         result_src = op[0];
+      }
+      break;
    case ir_unop_f2i:
       if (native_integers)
          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
@@ -1681,9 +1732,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
       break;
    case ir_unop_f2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+      break;
    case ir_unop_i2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
-            st_src_reg_for_type(result_dst.type, 0));
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
    case ir_unop_trunc:
       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1711,7 +1766,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_bit_not:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
          break;
       }
@@ -1721,27 +1776,27 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
          break;
       }
    case ir_binop_lshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
          break;
       }
    case ir_binop_rshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
          break;
       }
    case ir_binop_bit_and:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
          break;
       }
    case ir_binop_bit_xor:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
          break;
       }
    case ir_binop_bit_or:
-      if (glsl_version >= 130) {
+      if (native_integers) {
          emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
          break;
       }
@@ -1898,17 +1953,19 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
       if (element_size == 1) {
          index_reg = this->result;
       } else {
-         index_reg = get_temp(glsl_type::float_type);
+         index_reg = get_temp(native_integers ?
+                              glsl_type::int_type : glsl_type::float_type);
 
          emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
-              this->result, st_src_reg_for_float(element_size));
+              this->result, st_src_reg_for_type(index_reg.type, element_size));
       }
 
       /* If there was already a relative address register involved, add the
        * new and the old together to get the new offset.
        */
       if (src.reladdr != NULL) {
-         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+         st_src_reg accum_reg = get_temp(native_integers ?
+                                glsl_type::int_type : glsl_type::float_type);
 
          emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
               index_reg, *src.reladdr);
@@ -2129,12 +2186,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          st_src_reg l_src = st_src_reg(l);
+         st_src_reg condition_temp = condition;
          l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
          
+         if (native_integers) {
+            /* This is necessary because TGSI's CMP instruction expects the
+             * condition to be a float, and we store booleans as integers.
+             * If TGSI had a UCMP instruction or similar, this extra
+             * instruction would not be necessary.
+             */
+            condition_temp = get_temp(glsl_type::vec4_type);
+            condition.negate = 0;
+            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
+            condition_temp.swizzle = condition.swizzle;
+         }
+         
          if (switch_order) {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
          } else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
          }
 
          l.index++;
@@ -2154,6 +2224,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
       new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
       new_inst->saturate = inst->saturate;
+      inst->dead_mask = inst->dst.writemask;
    } else {
       for (i = 0; i < type_size(ir->lhs->type); i++) {
          emit(ir, TGSI_OPCODE_MOV, l, r);
@@ -2421,7 +2492,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
 void
 glsl_to_tgsi_visitor::visit(ir_texture *ir)
 {
-   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
    st_dst_reg result_dst, coord_dst;
    glsl_to_tgsi_instruction *inst = NULL;
    unsigned opcode = TGSI_OPCODE_NOP;
@@ -2480,6 +2551,10 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       opcode = TGSI_OPCODE_TXF;
       ir->lod_info.lod->accept(this);
       lod_info = this->result;
+      if (ir->offset) {
+	 ir->offset->accept(this);
+	 offset = this->result;
+      }
       break;
    }
 
@@ -2555,7 +2630,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
       inst = emit(ir, opcode, result_dst, coord, dx, dy);
    else if (opcode == TGSI_OPCODE_TXQ)
       inst = emit(ir, opcode, result_dst, lod_info);
-   else
+   else if (opcode == TGSI_OPCODE_TXF) {
+      inst = emit(ir, opcode, result_dst, coord);
+   } else
       inst = emit(ir, opcode, result_dst, coord);
 
    if (ir->shadow_comparitor)
@@ -2565,6 +2642,15 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
         					   this->shader_program,
         					   this->prog);
 
+   if (ir->offset) {
+       inst->tex_offset_num_offset = 1;
+       inst->tex_offsets[0].Index = offset.index;
+       inst->tex_offsets[0].File = offset.file;
+       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
+       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
+       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+   }
+
    const glsl_type *sampler_type = ir->sampler->type;
 
    switch (sampler_type->sampler_dimensionality) {
@@ -4216,6 +4302,23 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
    return src;
 }
 
+static struct tgsi_texture_offset
+translate_tex_offset(struct st_translate *t,
+                     const struct tgsi_texture_offset *in_offset)
+{
+   struct tgsi_texture_offset offset;
+
+   assert(in_offset->File == PROGRAM_IMMEDIATE);
+
+   offset.File = TGSI_FILE_IMMEDIATE;
+   offset.Index = in_offset->Index;
+   offset.SwizzleX = in_offset->SwizzleX;
+   offset.SwizzleY = in_offset->SwizzleY;
+   offset.SwizzleZ = in_offset->SwizzleZ;
+
+   return offset;
+}
+
 static void
 compile_tgsi_instruction(struct st_translate *t,
                          const glsl_to_tgsi_instruction *inst)
@@ -4224,6 +4327,8 @@ compile_tgsi_instruction(struct st_translate *t,
    GLuint i;
    struct ureg_dst dst[1];
    struct ureg_src src[4];
+   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
+
    unsigned num_dst;
    unsigned num_src;
 
@@ -4260,10 +4365,14 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_TXQ:
    case TGSI_OPCODE_TXF:
       src[num_src++] = t->samplers[inst->sampler];
+      for (i = 0; i < inst->tex_offset_num_offset; i++) {
+         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+      }
       ureg_tex_insn(ureg,
                     inst->op,
                     dst, num_dst, 
                     translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    texoffsets, inst->tex_offset_num_offset,
                     src, num_src);
       return;
 
@@ -5035,21 +5144,31 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                                      (struct gl_vertex_program *)linked_prog);
             ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
                                                  linked_prog);
+            if (!ok) {
+               _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
+            }
             break;
          case GL_FRAGMENT_SHADER:
             _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
                                      (struct gl_fragment_program *)linked_prog);
             ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
                                                  linked_prog);
+            if (!ok) {
+               _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
+            }
             break;
          case GL_GEOMETRY_SHADER:
             _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
                                      (struct gl_geometry_program *)linked_prog);
             ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
                                                  linked_prog);
+            if (!ok) {
+               _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
+            }
             break;
          }
          if (!ok) {
+            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL);
             _mesa_reference_program(ctx, &linked_prog, NULL);
             return GL_FALSE;
          }
diff --git a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 656c985d7..f4263c64c 100644
--- a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -705,6 +705,7 @@ compile_instruction(
                      dst, num_dst, 
                      translate_texture_target( inst->TexSrcTarget,
                                                inst->TexShadow ),
+                     NULL, 0,
                      src, num_src );
       return;