18 files changed, 331 insertions, 182 deletions
diff --git a/mesalib/Makefile.am b/mesalib/Makefile.am
index d2916da43..c9aadc827 100644
--- a/mesalib/Makefile.am
+++ b/mesalib/Makefile.am
@@ -64,7 +64,6 @@ IGNORE_FILES = \
 
 parsers: configure
 	$(MAKE) -C src/glsl glsl_parser.cpp glsl_parser.h glsl_lexer.cpp glcpp/glcpp-lex.c glcpp/glcpp-parse.c glcpp/glcpp-parse.h
-	$(MAKE) -C src/mesa program/lex.yy.c program/program_parse.tab.c program/program_parse.tab.h
 
 # Everything for new a Mesa release:
 ARCHIVES = $(PACKAGE_NAME).tar.gz \
diff --git a/mesalib/docs/GL3.txt b/mesalib/docs/GL3.txt
index 56c4994e5..f5d5e72c9 100644
--- a/mesalib/docs/GL3.txt
+++ b/mesalib/docs/GL3.txt
@@ -104,7 +104,7 @@ GL 4.0, GLSL 4.00:
   - Fused multiply-add                                 DONE ()
   - Packing/bitfield/conversion functions              DONE (r600)
   - Enhanced textureGather                             DONE (r600, radeonsi)
-  - Geometry shader instancing                         DONE ()
+  - Geometry shader instancing                         DONE (r600)
   - Geometry shader multiple streams                   DONE ()
   - Enhanced per-sample shading                        DONE (r600)
   - Interpolation functions                            DONE ()
@@ -151,7 +151,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_arrays_of_arrays                              started (Timothy)
   GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
   GL_ARB_clear_buffer_object                           DONE (all drivers)
-  GL_ARB_compute_shader                                started (currently stalled)
+  GL_ARB_compute_shader                                in progress (jljusten)
   GL_ARB_copy_image                                    DONE (i965)
   GL_KHR_debug                                         DONE (all drivers)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
@@ -202,7 +202,7 @@ GL 4.5, GLSL 4.50:
 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
   GL_ARB_arrays_of_arrays                              started (Timothy)
-  GL_ARB_compute_shader                                started (currently stalled)
+  GL_ARB_compute_shader                                in progress (jljusten)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
   GL_ARB_framebuffer_no_attachments                    not started
   GL_ARB_program_interface_query                       not started
diff --git a/mesalib/src/gallium/auxiliary/util/u_blitter.c b/mesalib/src/gallium/auxiliary/util/u_blitter.c
index 609e02fd1..f3fe949c2 100644
--- a/mesalib/src/gallium/auxiliary/util/u_blitter.c
+++ b/mesalib/src/gallium/auxiliary/util/u_blitter.c
@@ -130,6 +130,7 @@ struct blitter_context_priv
    unsigned dst_height;
 
    boolean has_geometry_shader;
+   boolean has_layered;
    boolean has_stream_out;
    boolean has_stencil_export;
    boolean has_texture_multisample;
@@ -288,28 +289,24 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
       }
    }
 
-   /* Fragment shaders are created on-demand, except these.
-    * The interpolation must be constant for integer texture clearing to work.
-    */
-   ctx->fs_empty = util_make_empty_fragment_shader(pipe);
-   ctx->fs_write_one_cbuf =
-      util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
-                                            TGSI_INTERPOLATE_CONSTANT, FALSE);
-   ctx->fs_write_all_cbufs =
-      util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
-                                            TGSI_INTERPOLATE_CONSTANT, TRUE);
-
-   /* vertex shaders */
-   {
-      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
-                                      TGSI_SEMANTIC_GENERIC };
-      const uint semantic_indices[] = { 0, 0 };
-      ctx->vs =
-         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
-                                             semantic_indices);
-   }
+   ctx->has_layered =
+      pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) &&
+      pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT);
 
-   if (ctx->has_stream_out) {
+   /* set invariant vertex coordinates */
+   for (i = 0; i < 4; i++)
+      ctx->vertices[i][0][3] = 1; /*v.w*/
+
+   ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+
+   return &ctx->base;
+}
+
+static void bind_vs_pos_only(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->vs_pos_only) {
       struct pipe_stream_output_info so;
       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
       const uint semantic_indices[] = { 0 };
@@ -324,18 +321,71 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
                                                      semantic_indices, &so);
    }
 
-   if (pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_INSTANCEID) &&
-       pipe->screen->get_param(pipe->screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+}
+
+static void bind_vs_passthrough(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->vs) {
+      const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
+                                      TGSI_SEMANTIC_GENERIC };
+      const uint semantic_indices[] = { 0, 0 };
+      ctx->vs =
+         util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
+                                             semantic_indices);
+   }
+
+   pipe->bind_vs_state(pipe, ctx->vs);
+}
+
+static void bind_vs_layered(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->vs_layered) {
       ctx->vs_layered = util_make_layered_clear_vertex_shader(pipe);
    }
 
-   /* set invariant vertex coordinates */
-   for (i = 0; i < 4; i++)
-      ctx->vertices[i][0][3] = 1; /*v.w*/
+   pipe->bind_vs_state(pipe, ctx->vs_layered);
+}
 
-   ctx->upload = u_upload_create(pipe, 65536, 4, PIPE_BIND_VERTEX_BUFFER);
+static void bind_fs_empty(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
 
-   return &ctx->base;
+   if (!ctx->fs_empty) {
+      ctx->fs_empty = util_make_empty_fragment_shader(pipe);
+   }
+
+   pipe->bind_fs_state(pipe, ctx->fs_empty);
+}
+
+static void bind_fs_write_one_cbuf(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->fs_write_one_cbuf) {
+      ctx->fs_write_one_cbuf =
+         util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                               TGSI_INTERPOLATE_CONSTANT, FALSE);
+   }
+
+   pipe->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+}
+
+static void bind_fs_write_all_cbufs(struct blitter_context_priv *ctx)
+{
+   struct pipe_context *pipe = ctx->base.pipe;
+
+   if (!ctx->fs_write_all_cbufs) {
+      ctx->fs_write_all_cbufs =
+         util_make_fragment_passthrough_shader(pipe, TGSI_SEMANTIC_GENERIC,
+                                               TGSI_INTERPOLATE_CONSTANT, TRUE);
+   }
+
+   pipe->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
 }
 
 void util_blitter_destroy(struct blitter_context *blitter)
@@ -361,7 +411,8 @@ void util_blitter_destroy(struct blitter_context *blitter)
    pipe->delete_rasterizer_state(pipe, ctx->rs_state_scissor);
    if (ctx->rs_discard_state)
       pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
-   pipe->delete_vs_state(pipe, ctx->vs);
+   if (ctx->vs)
+      pipe->delete_vs_state(pipe, ctx->vs);
    if (ctx->vs_pos_only)
       pipe->delete_vs_state(pipe, ctx->vs_pos_only);
    if (ctx->vs_layered)
@@ -408,9 +459,12 @@ void util_blitter_destroy(struct blitter_context *blitter)
                ctx->delete_fs_state(pipe, ctx->fs_resolve_uint[i][j][f]);
    }
 
-   ctx->delete_fs_state(pipe, ctx->fs_empty);
-   ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
-   ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
+   if (ctx->fs_empty)
+      ctx->delete_fs_state(pipe, ctx->fs_empty);
+   if (ctx->fs_write_one_cbuf)
+      ctx->delete_fs_state(pipe, ctx->fs_write_one_cbuf);
+   if (ctx->fs_write_all_cbufs)
+      ctx->delete_fs_state(pipe, ctx->fs_write_all_cbufs);
 
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect_linear);
    pipe->delete_sampler_state(pipe, ctx->sampler_state_rect);
@@ -1021,7 +1075,11 @@ static void blitter_set_common_draw_rect_state(struct blitter_context_priv *ctx,
 
    pipe->bind_rasterizer_state(pipe, scissor ? ctx->rs_state_scissor
                                              : ctx->rs_state);
-   pipe->bind_vs_state(pipe, vs_layered ? ctx->vs_layered : ctx->vs);
+   if (vs_layered)
+      bind_vs_layered(ctx);
+   else
+      bind_vs_passthrough(ctx);
+
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    if (ctx->has_stream_out)
@@ -1118,7 +1176,7 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
    struct pipe_context *pipe = ctx->base.pipe;
    struct pipe_stencil_ref sr = { { 0 } };
 
-   assert(ctx->vs_layered || num_layers <= 1);
+   assert(ctx->has_layered || num_layers <= 1);
 
    blitter_set_running_flag(ctx);
    blitter_check_saved_vertex_states(ctx);
@@ -1148,12 +1206,12 @@ static void util_blitter_clear_custom(struct blitter_context *blitter,
    pipe->set_stencil_ref(pipe, &sr);
 
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
-   ctx->bind_fs_state(pipe, ctx->fs_write_all_cbufs);
+   bind_fs_write_all_cbufs(ctx);
    pipe->set_sample_mask(pipe, ~0);
 
    blitter_set_dst_dimensions(ctx, width, height);
 
-   if (num_layers > 1 && ctx->vs_layered) {
+   if (num_layers > 1 && ctx->has_layered) {
       blitter_set_common_draw_rect_state(ctx, FALSE, TRUE);
       blitter_set_clear_color(ctx, color);
       blitter_draw(ctx, 0, 0, width, height, depth, num_layers);
@@ -1680,7 +1738,7 @@ void util_blitter_clear_render_target(struct blitter_context *blitter,
    /* bind states */
    pipe->bind_blend_state(pipe, ctx->blend[PIPE_MASK_RGBA]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-   ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+   bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1748,7 +1806,7 @@ void util_blitter_clear_depth_stencil(struct blitter_context *blitter,
       /* hmm that should be illegal probably, or make it a no-op somewhere */
       pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
 
-   ctx->bind_fs_state(pipe, ctx->fs_empty);
+   bind_fs_empty(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1799,7 +1857,10 @@ void util_blitter_custom_depth_stencil(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, cbsurf ? ctx->blend[PIPE_MASK_RGBA] :
                                          ctx->blend[0]);
    pipe->bind_depth_stencil_alpha_state(pipe, dsa_stage);
-   ctx->bind_fs_state(pipe, cbsurf ? ctx->fs_write_one_cbuf : ctx->fs_empty);
+   if (cbsurf)
+      bind_fs_write_one_cbuf(ctx);
+   else
+      bind_fs_empty(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
 
    /* set a framebuffer state */
@@ -1876,7 +1937,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
 
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
-   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   bind_vs_pos_only(ctx);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
@@ -1936,7 +1997,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe,
                                     ctx->velem_state_readbuf[num_channels-1]);
-   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   bind_vs_pos_only(ctx);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
@@ -1978,7 +2039,7 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, custom_blend);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
-   ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+   bind_fs_write_one_cbuf(ctx);
    pipe->set_sample_mask(pipe, sample_mask);
 
    memset(&surf_tmpl, 0, sizeof(surf_tmpl));
@@ -2041,7 +2102,7 @@ void util_blitter_custom_color(struct blitter_context *blitter,
    pipe->bind_blend_state(pipe, custom_blend ? custom_blend
                                              : ctx->blend[PIPE_MASK_RGBA]);
    pipe->bind_depth_stencil_alpha_state(pipe, ctx->dsa_keep_depth_stencil);
-   ctx->bind_fs_state(pipe, ctx->fs_write_one_cbuf);
+   bind_fs_write_one_cbuf(ctx);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state);
    pipe->set_sample_mask(pipe, (1ull << MAX2(1, dstsurf->texture->nr_samples)) - 1);
 
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index b289c2975..739a9f412 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -255,6 +255,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_dFdy_fine:
    case ir_unop_bitfield_reverse:
    case ir_unop_interpolate_at_centroid:
+   case ir_unop_saturate:
       this->type = op0->type;
       break;
 
@@ -534,6 +535,7 @@ static const char *const operator_strs[] = {
    "bit_count",
    "find_msb",
    "find_lsb",
+   "sat",
    "noise",
    "interpolate_at_centroid",
    "+",
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index e9051732b..8003f88ce 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -1250,6 +1250,7 @@ enum ir_expression_operation {
    ir_unop_find_lsb,
    /*@}*/
 
+   ir_unop_saturate,
    ir_unop_noise,
 
    /**
diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp
index f03941443..a2f6f2967 100644
--- a/mesalib/src/glsl/ir_builder.cpp
+++ b/mesalib/src/glsl/ir_builder.cpp
@@ -271,11 +271,7 @@ clamp(operand a, operand b, operand c)
 ir_expression *
 saturate(operand a)
 {
-   void *mem_ctx = ralloc_parent(a.val);
-
-   return expr(ir_binop_max,
-	       expr(ir_binop_min, a, new(mem_ctx) ir_constant(1.0f)),
-	       new(mem_ctx) ir_constant(0.0f));
+   return expr(ir_unop_saturate, a);
 }
 
 ir_expression *
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 96060217c..1e8b3a3cc 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -1469,6 +1469,12 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       }
       break;
 
+   case ir_unop_saturate:
+      for (unsigned c = 0; c < components; c++) {
+         data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f);
+      }
+      break;
+
    case ir_triop_bitfield_extract: {
       int offset = op[1]->value.i[0];
       int bits = op[2]->value.i[0];
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index b83c22592..1c6f72b54 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -40,6 +40,7 @@
 #define LDEXP_TO_ARITH     0x100
 #define CARRY_TO_ARITH     0x200
 #define BORROW_TO_ARITH    0x400
+#define SAT_TO_CLAMP       0x800
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 5b2067782..97a581dc2 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -241,6 +241,7 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_unop_log:
    case ir_unop_exp2:
    case ir_unop_log2:
+   case ir_unop_saturate:
       assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
       assert(ir->type == ir->operands[0]->type);
       break;
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index d5473adc3..57be4931d 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -2439,8 +2439,10 @@ check_explicit_uniform_locations(struct gl_context *ctx,
          ir_variable *var = node->as_variable();
          if ((var && var->data.mode == ir_var_uniform) &&
              var->data.explicit_location) {
-            if (!reserve_explicit_locations(prog, uniform_map, var))
+            if (!reserve_explicit_locations(prog, uniform_map, var)) {
+               delete uniform_map;
                return;
+            }
          }
       }
    }
diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp
index 176070c87..684285350 100644
--- a/mesalib/src/glsl/lower_instructions.cpp
+++ b/mesalib/src/glsl/lower_instructions.cpp
@@ -41,6 +41,7 @@
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - CARRY_TO_ARITH
  * - BORROW_TO_ARITH
+ * - SAT_TO_CLAMP
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -104,6 +105,10 @@
  * ----------------
  * Converts ir_borrow into (x < y).
  *
+ * SAT_TO_CLAMP:
+ * -------------
+ * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
+ *
  */
 
 #include "main/core.h" /* for M_LOG2E */
@@ -139,6 +144,7 @@ private:
    void ldexp_to_arith(ir_expression *);
    void carry_to_arith(ir_expression *);
    void borrow_to_arith(ir_expression *);
+   void sat_to_clamp(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -484,6 +490,24 @@ lower_instructions_visitor::borrow_to_arith(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
+{
+   /* Translates
+    *   ir_unop_saturate x
+    * into
+    *   ir_binop_min (ir_binop_max(x, 0.0), 1.0)
+    */
+
+   ir->operation = ir_binop_min;
+   ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type,
+                                           ir->operands[0],
+                                           new(ir) ir_constant(0.0f));
+   ir->operands[1] = new(ir) ir_constant(1.0f);
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -540,6 +564,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          borrow_to_arith(ir);
       break;
 
+   case ir_unop_saturate:
+      if (lowering(SAT_TO_CLAMP))
+         sat_to_clamp(ir);
+      break;
+
    default:
       return visit_continue;
    }
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index ac7514acf..447618f9e 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -110,6 +110,48 @@ is_vec_basis(ir_constant *ir)
    return (ir == NULL) ? false : ir->is_basis();
 }
 
+static inline bool
+is_valid_vec_const(ir_constant *ir)
+{
+   if (ir == NULL)
+      return false;
+
+   if (!ir->type->is_scalar() && !ir->type->is_vector())
+      return false;
+
+   return true;
+}
+
+static inline bool
+is_less_than_one(ir_constant *ir)
+{
+   if (!is_valid_vec_const(ir))
+      return false;
+
+   unsigned component = 0;
+   for (int c = 0; c < ir->type->vector_elements; c++) {
+      if (ir->get_float_component(c) < 1.0f)
+         component++;
+   }
+
+   return (component == ir->type->vector_elements);
+}
+
+static inline bool
+is_greater_than_zero(ir_constant *ir)
+{
+   if (!is_valid_vec_const(ir))
+      return false;
+
+   unsigned component = 0;
+   for (int c = 0; c < ir->type->vector_elements; c++) {
+      if (ir->get_float_component(c) > 0.0f)
+         component++;
+   }
+
+   return (component == ir->type->vector_elements);
+}
+
 static void
 update_type(ir_expression *ir)
 {
@@ -614,6 +656,62 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
       break;
 
+   case ir_binop_min:
+   case ir_binop_max:
+      if (ir->type->base_type != GLSL_TYPE_FLOAT)
+         break;
+
+      /* Replace min(max) operations and its commutative combinations with
+       * a saturate operation
+       */
+      for (int op = 0; op < 2; op++) {
+         ir_expression *minmax = op_expr[op];
+         ir_constant *outer_const = op_const[1 - op];
+         ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
+            ir_binop_min : ir_binop_max;
+
+         if (!minmax || !outer_const || (minmax->operation != op_cond))
+            continue;
+
+         /* Found a min(max) combination. Now try to see if its operands
+          * meet our conditions that we can do just a single saturate operation
+          */
+         for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
+            ir_rvalue *inner_val_a = minmax->operands[minmax_op];
+            ir_rvalue *inner_val_b = minmax->operands[1 - minmax_op];
+
+            if (!inner_val_a || !inner_val_b)
+               continue;
+
+            /* Found a {min|max} ({max|min} (x, 0.0), 1.0) operation and its variations */
+            if ((outer_const->is_one() && inner_val_a->is_zero()) ||
+                (inner_val_a->is_one() && outer_const->is_zero()))
+               return saturate(inner_val_b);
+
+            /* Found a {min|max} ({max|min} (x, 0.0), b) where b < 1.0
+             * and its variations
+             */
+            if (is_less_than_one(outer_const) && inner_val_b->is_zero())
+               return expr(ir_binop_min, saturate(inner_val_a), outer_const);
+
+            if (!inner_val_b->as_constant())
+               continue;
+
+            if (is_less_than_one(inner_val_b->as_constant()) && outer_const->is_zero())
+               return expr(ir_binop_min, saturate(inner_val_a), inner_val_b);
+
+            /* Found a {min|max} ({max|min} (x, b), 1.0), where b > 0.0
+             * and its variations
+             */
+            if (outer_const->is_one() && is_greater_than_zero(inner_val_b->as_constant()))
+               return expr(ir_binop_max, saturate(inner_val_a), inner_val_b);
+            if (inner_val_b->as_constant()->is_one() && is_greater_than_zero(outer_const))
+               return expr(ir_binop_max, saturate(inner_val_a), outer_const);
+         }
+      }
+
+      break;
+
    case ir_unop_rcp:
       if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
 	 return op_expr[0]->operands[0];
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index dc25ea467..4fb30ffba 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -2348,6 +2348,20 @@ struct gl_fragment_program_state
 
 
 /**
+ * Context state for compute programs.
+ */
+struct gl_compute_program_state
+{
+   struct gl_compute_program *Current;  /**< user-bound compute program */
+
+   /** Currently enabled and valid program (including internal programs
+    * and compiled shader programs).
+    */
+   struct gl_compute_program *_Current;
+};
+
+
+/**
  * ATI_fragment_shader runtime state
  */
 #define ATI_FS_INPUT_PRIMARY 0
@@ -3908,32 +3922,32 @@ typedef enum
 struct gl_driver_flags
 {
    /** gl_context::Array::_DrawArrays (vertex array state) */
-   GLbitfield NewArray;
+   uint64_t NewArray;
 
    /** gl_context::TransformFeedback::CurrentObject */
-   GLbitfield NewTransformFeedback;
+   uint64_t NewTransformFeedback;
 
    /** gl_context::TransformFeedback::CurrentObject::shader_program */
-   GLbitfield NewTransformFeedbackProg;
+   uint64_t NewTransformFeedbackProg;
 
    /** gl_context::RasterDiscard */
-   GLbitfield NewRasterizerDiscard;
+   uint64_t NewRasterizerDiscard;
 
    /**
     * gl_context::UniformBufferBindings
     * gl_shader_program::UniformBlocks
     */
-   GLbitfield NewUniformBuffer;
+   uint64_t NewUniformBuffer;
 
    /**
     * gl_context::AtomicBufferBindings
     */
-   GLbitfield NewAtomicBuffer;
+   uint64_t NewAtomicBuffer;
 
    /**
     * gl_context::ImageUnits
     */
-   GLbitfield NewImageUnits;
+   uint64_t NewImageUnits;
 };
 
 struct gl_uniform_buffer_binding
@@ -4153,6 +4167,7 @@ struct gl_context
    struct gl_vertex_program_state VertexProgram;
    struct gl_fragment_program_state FragmentProgram;
    struct gl_geometry_program_state GeometryProgram;
+   struct gl_compute_program_state ComputeProgram;
    struct gl_ati_fragment_shader_state ATIFragmentShader;
 
    struct gl_pipeline_shader_state Pipeline; /**< GLSL pipeline shader object state */
@@ -4240,7 +4255,7 @@ struct gl_context
 
    GLenum RenderMode;        /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */
    GLbitfield NewState;      /**< bitwise-or of _NEW_* flags */
-   GLbitfield NewDriverState;/**< bitwise-or of flags from DriverFlags */
+   uint64_t NewDriverState;  /**< bitwise-or of flags from DriverFlags */
 
    struct gl_driver_flags DriverFlags;
 
diff --git a/mesalib/src/mesa/main/textureview.c b/mesalib/src/mesa/main/textureview.c
index b3521e219..6e86a9a44 100644
--- a/mesalib/src/mesa/main/textureview.c
+++ b/mesalib/src/mesa/main/textureview.c
@@ -536,6 +536,9 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
    /* Adjust width, height, depth to be appropriate for new target */
    switch (target) {
    case GL_TEXTURE_1D:
+      height = 1;
+      break;
+
    case GL_TEXTURE_3D:
       break;
 
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index e5844c3c4..49e4a7a40 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -311,7 +311,6 @@ public:
 			  int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
 				 int mul_operand);
-   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -866,50 +865,6 @@ ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
    return true;
 }
 
-bool
-ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
-{
-   /* Saturates were only introduced to vertex programs in
-    * NV_vertex_program3, so don't give them to drivers in the VP.
-    */
-   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
-      return false;
-
-   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
-   if (!sat_src)
-      return false;
-
-   sat_src->accept(this);
-   src_reg src = this->result;
-
-   /* If we generated an expression instruction into a temporary in
-    * processing the saturate's operand, apply the saturate to that
-    * instruction.  Otherwise, generate a MOV to do the saturate.
-    *
-    * Note that we have to be careful to only do this optimization if
-    * the instruction in question was what generated src->result.  For
-    * example, ir_dereference_array might generate a MUL instruction
-    * to create the reladdr, and return us a src reg using that
-    * reladdr.  That MUL result is not the value we're trying to
-    * saturate.
-    */
-   ir_expression *sat_src_expr = sat_src->as_expression();
-   ir_to_mesa_instruction *new_inst;
-   new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
-   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
-			sat_src_expr->operation == ir_binop_add ||
-			sat_src_expr->operation == ir_binop_dot)) {
-      new_inst->saturate = true;
-   } else {
-      this->result = get_temp(ir->type);
-      ir_to_mesa_instruction *inst;
-      inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
-      inst->saturate = true;
-   }
-
-   return true;
-}
-
 void
 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
 				    src_reg *reg, int *num_reladdr)
@@ -1072,9 +1027,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	 return;
    }
 
-   if (try_emit_sat(ir))
-      return;
-
    if (ir->operation == ir_quadop_vector) {
       this->emit_swz(ir);
       return;
@@ -1171,6 +1123,12 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       emit(ir, OPCODE_DDY, result_dst, op[0]);
       break;
 
+   case ir_unop_saturate: {
+      ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
+                                          result_dst, op[0]);
+      inst->saturate = true;
+      break;
+   }
    case ir_unop_noise: {
       const enum prog_opcode opcode =
 	 prog_opcode(OPCODE_NOISE1
@@ -2990,9 +2948,12 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 /* Lowering */
 	 do_mat_op_to_vec(ir);
+	 GLenum target = _mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage);
 	 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
 				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
-				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
+				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)
+				 | ((target == GL_VERTEX_PROGRAM_ARB) ? SAT_TO_CLAMP
+                                    : 0)));
 
 	 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
 
diff --git a/mesalib/src/mesa/program/prog_cache.c b/mesalib/src/mesa/program/prog_cache.c
index 07192a96a..34609f056 100644
--- a/mesalib/src/mesa/program/prog_cache.c
+++ b/mesalib/src/mesa/program/prog_cache.c
@@ -143,7 +143,7 @@ _mesa_new_program_cache(void)
    if (cache) {
       cache->size = 17;
       cache->items =
-         calloc(1, cache->size * sizeof(struct cache_item));
+         calloc(cache->size, sizeof(struct cache_item *));
       if (!cache->items) {
          free(cache);
          return NULL;
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h
index 6d572bd49..58f14f954 100644
--- a/mesalib/src/mesa/state_tracker/st_context.h
+++ b/mesalib/src/mesa/state_tracker/st_context.h
@@ -56,7 +56,7 @@ struct u_upload_mgr;
 
 struct st_state_flags {
    GLuint mesa;
-   GLuint st;
+   uint64_t st;
 };
 
 struct st_tracked_state {
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 84bdc4f06..dd9c84f1a 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -75,14 +75,6 @@ extern "C" {
                            (1 << PROGRAM_UNIFORM))
 
 /**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS         4096
-
-/**
  * Maximum number of arrays
  */
 #define MAX_ARRAYS        256
@@ -446,7 +438,6 @@ public:
               int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
               int mul_operand);
-   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -1270,53 +1261,6 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
    return true;
 }
 
-bool
-glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
-{
-   /* Emit saturates in the vertex shader only if SM 3.0 is supported.
-    */
-   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
-       !st_context(this->ctx)->has_shader_model3) {
-      return false;
-   }
-
-   ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
-   if (!sat_src)
-      return false;
-
-   sat_src->accept(this);
-   st_src_reg src = this->result;
-
-   /* If we generated an expression instruction into a temporary in
-    * processing the saturate's operand, apply the saturate to that
-    * instruction.  Otherwise, generate a MOV to do the saturate.
-    *
-    * Note that we have to be careful to only do this optimization if
-    * the instruction in question was what generated src->result.  For
-    * example, ir_dereference_array might generate a MUL instruction
-    * to create the reladdr, and return us a src reg using that
-    * reladdr.  That MUL result is not the value we're trying to
-    * saturate.
-    */
-   ir_expression *sat_src_expr = sat_src->as_expression();
-   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
-			sat_src_expr->operation == ir_binop_add ||
-			sat_src_expr->operation == ir_binop_dot)) {
-      glsl_to_tgsi_instruction *new_inst;
-      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      new_inst->saturate = true;
-   } else {
-      this->result = get_temp(ir->type);
-      st_dst_reg result_dst = st_dst_reg(this->result);
-      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
-      glsl_to_tgsi_instruction *inst;
-      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
-      inst->saturate = true;
-   }
-
-   return true;
-}
-
 void
 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
         			    st_src_reg *reg, int *num_reladdr)
@@ -1363,9 +1307,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 	 return;
    }
 
-   if (try_emit_sat(ir))
-      return;
-
    if (ir->operation == ir_quadop_vector)
       assert(!"ir_quadop_vector should have been lowered");
 
@@ -1460,6 +1401,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_cos_reduced:
       emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
       break;
+   case ir_unop_saturate: {
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      inst->saturate = true;
+      break;
+   }
 
    case ir_unop_dFdx:
    case ir_unop_dFdx_coarse:
@@ -3301,14 +3248,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
 void
 glsl_to_tgsi_visitor::simplify_cmp(void)
 {
-   unsigned *tempWrites;
+   int tempWritesSize = 0;
+   unsigned *tempWrites = NULL;
    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
 
-   tempWrites = new unsigned[MAX_TEMPS];
-   if (!tempWrites) {
-      return;
-   }
-   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
    memset(outputWrites, 0, sizeof(outputWrites));
 
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
@@ -3330,7 +3273,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
          prevWriteMask = outputWrites[inst->dst.index];
          outputWrites[inst->dst.index] |= inst->dst.writemask;
       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         assert(inst->dst.index < MAX_TEMPS);
+         if (inst->dst.index >= tempWritesSize) {
+            const int inc = 4096;
+
+            tempWrites = (unsigned*)
+                         realloc(tempWrites,
+                                 (tempWritesSize + inc) * sizeof(unsigned));
+            if (!tempWrites)
+               return;
+
+            memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+            tempWritesSize += inc;
+         }
+
          prevWriteMask = tempWrites[inst->dst.index];
          tempWrites[inst->dst.index] |= inst->dst.writemask;
       } else
@@ -3349,7 +3304,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
       }
    }
 
-   delete [] tempWrites;
+   free(tempWrites);
 }
 
 /* Replaces all references to a temporary register index with another index. */
@@ -4158,7 +4113,9 @@ struct label {
 struct st_translate {
    struct ureg_program *ureg;
 
-   struct ureg_dst temps[MAX_TEMPS];
+   unsigned temps_size;
+   struct ureg_dst *temps;
+
    struct ureg_dst arrays[MAX_ARRAYS];
    struct ureg_src *constants;
    struct ureg_src *immediates;
@@ -4299,7 +4256,19 @@ dst_register(struct st_translate *t,
       return ureg_dst_undef();
 
    case PROGRAM_TEMPORARY:
-      assert(index < Elements(t->temps));
+      /* Allocate space for temporaries on demand. */
+      if (index >= t->temps_size) {
+         const int inc = 4096;
+
+         t->temps = (struct ureg_dst*)
+                    realloc(t->temps,
+                            (t->temps_size + inc) * sizeof(struct ureg_dst));
+         if (!t->temps)
+            return ureg_dst_undef();
+
+         memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+         t->temps_size += inc;
+      }
 
       if (ureg_dst_is_undef(t->temps[index]))
          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
@@ -5158,6 +5127,7 @@ st_translate_program(
 
 out:
    if (t) {
+      free(t->temps);
       free(t->insn);
       free(t->labels);
       free(t->constants);
@@ -5429,6 +5399,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
          lower_offset_arrays(ir);
       do_mat_op_to_vec(ir);
+      /* Emit saturates in the vertex shader only if SM 3.0 is supported. */
+      bool vs_sm3 = (_mesa_shader_stage_to_program(prog->_LinkedShaders[i]->Stage) ==
+                         GL_VERTEX_PROGRAM_ARB) && st_context(ctx)->has_shader_model3;
       lower_instructions(ir,
                          MOD_TO_FRACT |
                          DIV_TO_MUL_RCP |
@@ -5438,7 +5411,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                          CARRY_TO_ARITH |
                          BORROW_TO_ARITH |
                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
-                         (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
+                         (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
+                         (vs_sm3 ? SAT_TO_CLAMP : 0));
 
       lower_ubo_reference(prog->_LinkedShaders[i], ir);
       do_vec_index_to_cond_assign(ir);