22 files changed, 668 insertions, 96 deletions
diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h
index 502490200..e4c00a408 100644
--- a/mesalib/src/glsl/ast.h
+++ b/mesalib/src/glsl/ast.h
@@ -464,6 +464,12 @@ struct ast_type_qualifier {
 	 unsigned prim_type:1;
 	 unsigned max_vertices:1;
 	 /** \} */
+
+         /**
+          * local_size_{x,y,z} flags for compute shaders.  Bit 0 represents
+          * local_size_x, and so on.
+          */
+         unsigned local_size:3;
       }
       /** \brief Set of flags, accessed by name. */
       q;
@@ -514,6 +520,13 @@ struct ast_type_qualifier {
    int offset;
 
    /**
+    * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}"
+    * layout qualifier.  Element i of this array is only valid if
+    * flags.q.local_size & (1 << i) is set.
+    */
+   int local_size[3];
+
+   /**
     * Return true if and only if an interpolation qualifier is present.
     */
    bool has_interpolation() const;
@@ -892,14 +905,13 @@ public:
 
    ast_node *body;
 
-private:
    /**
     * Generate IR from the condition of a loop
     *
     * This is factored out of ::hir because some loops have the condition
     * test at the top (for and while), and others have it at the end (do-while).
     */
-   void condition_to_hir(class ir_loop *, struct _mesa_glsl_parse_state *);
+   void condition_to_hir(exec_list *, struct _mesa_glsl_parse_state *);
 };
 
 
@@ -994,6 +1006,27 @@ private:
    const GLenum prim_type;
 };
 
+
+/**
+ * AST node representing a decalaration of the input layout for compute
+ * shaders.
+ */
+class ast_cs_input_layout : public ast_node
+{
+public:
+   ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size)
+   {
+      memcpy(this->local_size, local_size, sizeof(this->local_size));
+      set_location(locp);
+   }
+
+   virtual ir_rvalue *hir(exec_list *instructions,
+                          struct _mesa_glsl_parse_state *state);
+
+private:
+   unsigned local_size[3];
+};
+
 /*@}*/
 
 extern void
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index 1bfb4e531..c89a26bf9 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -77,6 +77,7 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
    state->toplevel_ir = instructions;
 
    state->gs_input_prim_type_specified = false;
+   state->cs_input_local_size_specified = false;
 
    /* Section 4.2 of the GLSL 1.20 specification states:
     * "The built-in functions are scoped in a scope outside the global scope
@@ -2155,6 +2156,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
 
       fail = true;
       break;
+
+   case MESA_SHADER_COMPUTE:
+      _mesa_glsl_error(loc, state,
+                       "compute shader variables cannot be given "
+                       "explicit locations");
+      return;
    };
 
    if (fail) {
@@ -2275,6 +2282,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       var->data.mode = ir_var_uniform;
 
    if (!is_parameter && is_varying_var(var, state->stage)) {
+      /* User-defined ins/outs are not permitted in compute shaders. */
+      if (state->stage == MESA_SHADER_COMPUTE) {
+         _mesa_glsl_error(loc, state,
+                          "user-defined input and output variables are not "
+                          "permitted in compute shaders");
+      }
+
       /* This variable is being used to link data between shader stages (in
        * pre-glsl-1.30 parlance, it's a "varying").  Check that it has a type
        * that is allowed for such purposes.
@@ -2337,6 +2351,9 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
 	 if (var->data.mode == ir_var_shader_in)
 	    var->data.invariant = true;
 	 break;
+      case MESA_SHADER_COMPUTE:
+         /* Invariance isn't meaningful in compute shaders. */
+         break;
       }
    }
 
@@ -4029,17 +4046,22 @@ ast_jump_statement::hir(exec_list *instructions,
 	 _mesa_glsl_error(& loc, state,
 			  "break may only appear in a loop or a switch");
       } else {
-	 /* For a loop, inline the for loop expression again,
-	  * since we don't know where near the end of
-	  * the loop body the normal copy of it
-	  * is going to be placed.
+	 /* For a loop, inline the for loop expression again, since we don't
+	  * know where near the end of the loop body the normal copy of it is
+	  * going to be placed.  Same goes for the condition for a do-while
+	  * loop.
 	  */
 	 if (state->loop_nesting_ast != NULL &&
-	     mode == ast_continue &&
-	     state->loop_nesting_ast->rest_expression) {
-	    state->loop_nesting_ast->rest_expression->hir(instructions,
-							  state);
-	 }
+	     mode == ast_continue) {
+            if (state->loop_nesting_ast->rest_expression) {
+               state->loop_nesting_ast->rest_expression->hir(instructions,
+                                                             state);
+            }
+            if (state->loop_nesting_ast->mode ==
+                ast_iteration_statement::ast_do_while) {
+               state->loop_nesting_ast->condition_to_hir(instructions, state);
+            }
+         }
 
 	 if (state->switch_state.is_switch_innermost &&
 	     mode == ast_break) {
@@ -4369,14 +4391,14 @@ ast_case_label::hir(exec_list *instructions,
 }
 
 void
-ast_iteration_statement::condition_to_hir(ir_loop *stmt,
+ast_iteration_statement::condition_to_hir(exec_list *instructions,
 					  struct _mesa_glsl_parse_state *state)
 {
    void *ctx = state;
 
    if (condition != NULL) {
       ir_rvalue *const cond =
-	 condition->hir(& stmt->body_instructions, state);
+	 condition->hir(instructions, state);
 
       if ((cond == NULL)
 	  || !cond->type->is_boolean() || !cond->type->is_scalar()) {
@@ -4397,7 +4419,7 @@ ast_iteration_statement::condition_to_hir(ir_loop *stmt,
 	    new(ctx) ir_loop_jump(ir_loop_jump::jump_break);
 
 	 if_stmt->then_instructions.push_tail(break_stmt);
-	 stmt->body_instructions.push_tail(if_stmt);
+	 instructions->push_tail(if_stmt);
       }
    }
 }
@@ -4432,7 +4454,7 @@ ast_iteration_statement::hir(exec_list *instructions,
    state->switch_state.is_switch_innermost = false;
 
    if (mode != ast_do_while)
-      condition_to_hir(stmt, state);
+      condition_to_hir(&stmt->body_instructions, state);
 
    if (body != NULL)
       body->hir(& stmt->body_instructions, state);
@@ -4441,7 +4463,7 @@ ast_iteration_statement::hir(exec_list *instructions,
       rest_expression->hir(& stmt->body_instructions, state);
 
    if (mode == ast_do_while)
-      condition_to_hir(stmt, state);
+      condition_to_hir(&stmt->body_instructions, state);
 
    if (mode != ast_do_while)
       state->symbols->pop_scope();
@@ -5289,6 +5311,84 @@ ast_gs_input_layout::hir(exec_list *instructions,
 }
 
 
+ir_rvalue *
+ast_cs_input_layout::hir(exec_list *instructions,
+                         struct _mesa_glsl_parse_state *state)
+{
+   YYLTYPE loc = this->get_location();
+
+   /* If any compute input layout declaration preceded this one, make sure it
+    * was consistent with this one.
+    */
+   if (state->cs_input_local_size_specified) {
+      for (int i = 0; i < 3; i++) {
+         if (state->cs_input_local_size[i] != this->local_size[i]) {
+            _mesa_glsl_error(&loc, state,
+                             "compute shader input layout does not match"
+                             " previous declaration");
+            return NULL;
+         }
+      }
+   }
+
+   /* From the ARB_compute_shader specification:
+    *
+    *     If the local size of the shader in any dimension is greater
+    *     than the maximum size supported by the implementation for that
+    *     dimension, a compile-time error results.
+    *
+    * It is not clear from the spec how the error should be reported if
+    * the total size of the work group exceeds
+    * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to
+    * report it at compile time as well.
+    */
+   GLuint64 total_invocations = 1;
+   for (int i = 0; i < 3; i++) {
+      if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+         _mesa_glsl_error(&loc, state,
+                          "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
+                          " (%d)", 'x' + i,
+                          state->ctx->Const.MaxComputeWorkGroupSize[i]);
+         break;
+      }
+      total_invocations *= this->local_size[i];
+      if (total_invocations >
+          state->ctx->Const.MaxComputeWorkGroupInvocations) {
+         _mesa_glsl_error(&loc, state,
+                          "product of local_sizes exceeds "
+                          "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)",
+                          state->ctx->Const.MaxComputeWorkGroupInvocations);
+         break;
+      }
+   }
+
+   state->cs_input_local_size_specified = true;
+   for (int i = 0; i < 3; i++)
+      state->cs_input_local_size[i] = this->local_size[i];
+
+   /* We may now declare the built-in constant gl_WorkGroupSize (see
+    * builtin_variable_generator::generate_constants() for why we didn't
+    * declare it earlier).
+    */
+   ir_variable *var = new(state->symbols)
+      ir_variable(glsl_type::ivec3_type, "gl_WorkGroupSize", ir_var_auto);
+   var->data.how_declared = ir_var_declared_implicitly;
+   var->data.read_only = true;
+   instructions->push_tail(var);
+   state->symbols->add_variable(var);
+   ir_constant_data data;
+   memset(&data, 0, sizeof(data));
+   for (int i = 0; i < 3; i++)
+      data.i[i] = this->local_size[i];
+   var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data);
+   var->constant_initializer =
+      new(var) ir_constant(glsl_type::ivec3_type, &data);
+   var->data.has_initializer = true;
+
+   return NULL;
+}
+
+
 static void
 detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
 			       exec_list *instructions)
diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp
index 637da0dfb..bbc430808 100644
--- a/mesalib/src/glsl/ast_type.cpp
+++ b/mesalib/src/glsl/ast_type.cpp
@@ -118,6 +118,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    ubo_layout_mask.flags.q.shared = 1;
 
    ast_type_qualifier ubo_binding_mask;
+   ubo_binding_mask.flags.i = 0;
    ubo_binding_mask.flags.q.explicit_binding = 1;
    ubo_binding_mask.flags.q.explicit_offset = 1;
 
@@ -158,6 +159,20 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
    if ((q.flags.i & ubo_layout_mask.flags.i) != 0)
       this->flags.i &= ~ubo_layout_mask.flags.i;
 
+   for (int i = 0; i < 3; i++) {
+      if (q.flags.q.local_size & (1 << i)) {
+         if ((this->flags.q.local_size & (1 << i)) &&
+             this->local_size[i] != q.local_size[i]) {
+            _mesa_glsl_error(loc, state,
+                             "compute shader set conflicting values for "
+                             "local_size_%c (%d and %d)", 'x' + i,
+                             this->local_size[i], q.local_size[i]);
+            return false;
+         }
+         this->local_size[i] = q.local_size[i];
+      }
+   }
+
    this->flags.i |= q.flags.i;
 
    if (q.flags.q.explicit_location)
diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp
index d6bc3c073..cc423383d 100644
--- a/mesalib/src/glsl/builtin_variables.cpp
+++ b/mesalib/src/glsl/builtin_variables.cpp
@@ -356,6 +356,7 @@ public:
    void generate_vs_special_vars();
    void generate_gs_special_vars();
    void generate_fs_special_vars();
+   void generate_cs_special_vars();
    void generate_varyings();
 
 private:
@@ -389,6 +390,7 @@ private:
                              enum ir_variable_mode mode, int slot);
    ir_variable *add_uniform(const glsl_type *type, const char *name);
    ir_variable *add_const(const char *name, int value);
+   ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
    void add_varying(int slot, const glsl_type *type, const char *name,
                     const char *name_as_gs_input);
 
@@ -529,6 +531,25 @@ builtin_variable_generator::add_const(const char *name, int value)
 }
 
 
+ir_variable *
+builtin_variable_generator::add_const_ivec3(const char *name, int x, int y,
+                                            int z)
+{
+   ir_variable *const var = add_variable(name, glsl_type::ivec3_type,
+                                         ir_var_auto, -1);
+   ir_constant_data data;
+   memset(&data, 0, sizeof(data));
+   data.i[0] = x;
+   data.i[1] = y;
+   data.i[2] = z;
+   var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data);
+   var->constant_initializer =
+      new(var) ir_constant(glsl_type::ivec3_type, &data);
+   var->data.has_initializer = true;
+   return var;
+}
+
+
 void
 builtin_variable_generator::generate_constants()
 {
@@ -659,6 +680,37 @@ builtin_variable_generator::generate_constants()
       add_const("gl_MaxTessControlAtomicCounters", 0);
       add_const("gl_MaxTessEvaluationAtomicCounters", 0);
    }
+
+   if (state->is_version(430, 0) || state->ARB_compute_shader_enable) {
+      add_const_ivec3("gl_MaxComputeWorkGroupCount",
+                      state->Const.MaxComputeWorkGroupCount[0],
+                      state->Const.MaxComputeWorkGroupCount[1],
+                      state->Const.MaxComputeWorkGroupCount[2]);
+      add_const_ivec3("gl_MaxComputeWorkGroupSize",
+                      state->Const.MaxComputeWorkGroupSize[0],
+                      state->Const.MaxComputeWorkGroupSize[1],
+                      state->Const.MaxComputeWorkGroupSize[2]);
+
+      /* From the GLSL 4.40 spec, section 7.1 (Built-In Language Variables):
+       *
+       *     The built-in constant gl_WorkGroupSize is a compute-shader
+       *     constant containing the local work-group size of the shader.  The
+       *     size of the work group in the X, Y, and Z dimensions is stored in
+       *     the x, y, and z components.  The constants values in
+       *     gl_WorkGroupSize will match those specified in the required
+       *     local_size_x, local_size_y, and local_size_z layout qualifiers
+       *     for the current shader.  This is a constant so that it can be
+       *     used to size arrays of memory that can be shared within the local
+       *     work group.  It is a compile-time error to use gl_WorkGroupSize
+       *     in a shader that does not declare a fixed local group size, or
+       *     before that shader has declared a fixed local group size, using
+       *     local_size_x, local_size_y, and local_size_z.
+       *
+       * To prevent the shader from trying to refer to gl_WorkGroupSize before
+       * the layout declaration, we don't define it here.  Intead we define it
+       * in ast_cs_input_layout::hir().
+       */
+   }
 }
 
 
@@ -868,6 +920,16 @@ builtin_variable_generator::generate_fs_special_vars()
 
 
 /**
+ * Generate variables which only exist in compute shaders.
+ */
+void
+builtin_variable_generator::generate_cs_special_vars()
+{
+   /* TODO: finish this. */
+}
+
+
+/**
  * Add a single "varying" variable.  The variable's type and direction (input
  * or output) are adjusted as appropriate for the type of shader being
  * compiled.  For geometry shaders using {ARB,EXT}_geometry_shader4,
@@ -888,6 +950,9 @@ builtin_variable_generator::add_varying(int slot, const glsl_type *type,
    case MESA_SHADER_FRAGMENT:
       add_input(slot, type, name);
       break;
+   case MESA_SHADER_COMPUTE:
+      /* Compute shaders don't have varyings. */
+      break;
    }
 }
 
@@ -975,5 +1040,8 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
    case MESA_SHADER_FRAGMENT:
       gen.generate_fs_special_vars();
       break;
+   case MESA_SHADER_COMPUTE:
+      gen.generate_cs_special_vars();
+      break;
    }
 }
diff --git a/mesalib/src/glsl/glcpp/glcpp-lex.l b/mesalib/src/glsl/glcpp/glcpp-lex.l
index f1fa192c5..ea3b862e4 100644
--- a/mesalib/src/glsl/glcpp/glcpp-lex.l
+++ b/mesalib/src/glsl/glcpp/glcpp-lex.l
@@ -155,8 +155,6 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 
 	/* Single-line comments */
 "//"[^\n]* {
-	if (parser->commented_newlines)
-		BEGIN NEWLINE_CATCHUP;
 }
 
 	/* Multi-line comments */
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index 184e5c237..eeafa4d39 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -30,6 +30,7 @@
 
 #include "glcpp.h"
 #include "main/core.h" /* for struct gl_extensions */
+#include "main/mtypes.h" /* for gl_api enum */
 
 static void
 yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error);
@@ -194,7 +195,7 @@ line:
 		ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n");
 	}
 |	HASH_LINE {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} pp_tokens NEWLINE {
 
 		if (parser->skip_stack == NULL ||
@@ -254,10 +255,10 @@ define:
 
 control_line:
 	HASH_DEFINE {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} define
 |	HASH_UNDEF {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} IDENTIFIER NEWLINE {
 		macro_t *macro = hash_table_find (parser->defines, $3);
 		if (macro) {
@@ -267,7 +268,7 @@ control_line:
 		ralloc_free ($3);
 	}
 |	HASH_IF {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} conditional_tokens NEWLINE {
 		/* Be careful to only evaluate the 'if' expression if
 		 * we are not skipping. When we are skipping, we
@@ -299,14 +300,14 @@ control_line:
 		_glcpp_parser_skip_stack_push_if (parser, & @1, 0);
 	}
 |	HASH_IFDEF {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} IDENTIFIER junk NEWLINE {
 		macro_t *macro = hash_table_find (parser->defines, $3);
 		ralloc_free ($3);
 		_glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL);
 	}
 |	HASH_IFNDEF {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	} IDENTIFIER junk NEWLINE {
 		macro_t *macro = hash_table_find (parser->defines, $3);
 		ralloc_free ($3);
@@ -374,13 +375,19 @@ control_line:
 		_glcpp_parser_skip_stack_pop (parser, & @1);
 	} NEWLINE
 |	HASH_VERSION integer_constant NEWLINE {
+		if (parser->version_resolved) {
+			glcpp_error(& @1, parser, "#version must appear on the first line");
+		}
 		_glcpp_parser_handle_version_declaration(parser, $2, NULL, true);
 	}
 |	HASH_VERSION integer_constant IDENTIFIER NEWLINE {
+		if (parser->version_resolved) {
+			glcpp_error(& @1, parser, "#version must appear on the first line");
+		}
 		_glcpp_parser_handle_version_declaration(parser, $2, $3, true);
 	}
 |	HASH NEWLINE {
-		glcpp_parser_resolve_version(parser);
+		glcpp_parser_resolve_implicit_version(parser);
 	}
 ;
 
@@ -1186,7 +1193,7 @@ static void add_builtin_define(glcpp_parser_t *parser,
 }
 
 glcpp_parser_t *
-glcpp_parser_create (const struct gl_extensions *extensions)
+glcpp_parser_create (const struct gl_extensions *extensions, gl_api api)
 {
 	glcpp_parser_t *parser;
 
@@ -1215,6 +1222,7 @@ glcpp_parser_create (const struct gl_extensions *extensions)
 	parser->error = 0;
 
         parser->extensions = extensions;
+        parser->api = api;
         parser->version_resolved = false;
 
 	parser->has_new_line_number = 0;
@@ -2024,6 +2032,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 {
 	const struct gl_extensions *extensions = parser->extensions;
 
+	if (parser->version_resolved)
+		return;
+
 	parser->version_resolved = true;
 
 	add_builtin_define (parser, "__VERSION__", version);
@@ -2043,6 +2054,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	} else {
 	   add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
 	   add_builtin_define(parser, "GL_ARB_texture_rectangle", 1);
+           add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1);
+
 
 	   if (extensions != NULL) {
 	      if (extensions->EXT_texture_array)
@@ -2108,11 +2121,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	      if (extensions->ARB_shader_atomic_counters)
 	         add_builtin_define(parser, "GL_ARB_shader_atomic_counters", 1);
 
-	      if (extensions->AMD_shader_trinary_minmax)
-	         add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1);
-
 	      if (extensions->ARB_viewport_array)
 	         add_builtin_define(parser, "GL_ARB_viewport_array", 1);
+
+              if (extensions->ARB_compute_shader)
+                 add_builtin_define(parser, "GL_ARB_compute_shader", 1);
 	   }
 	}
 
@@ -2140,15 +2153,19 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	}
 }
 
-/* GLSL version is no version is explicitly specified. */
+/* GLSL version if no version is explicitly specified. */
 #define IMPLICIT_GLSL_VERSION 110
 
+/* GLSL ES version if no version is explicitly specified. */
+#define IMPLICIT_GLSL_ES_VERSION 100
+
 void
-glcpp_parser_resolve_version(glcpp_parser_t *parser)
+glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser)
 {
-	if (parser->version_resolved)
-		return;
+	int language_version = parser->api == API_OPENGLES2 ?
+			       IMPLICIT_GLSL_ES_VERSION :
+			       IMPLICIT_GLSL_VERSION;
 
-	_glcpp_parser_handle_version_declaration(parser, IMPLICIT_GLSL_VERSION,
+	_glcpp_parser_handle_version_declaration(parser, language_version,
 						 NULL, false);
 }
diff --git a/mesalib/src/glsl/glcpp/glcpp.c b/mesalib/src/glsl/glcpp/glcpp.c
index b632f1715..0144be85b 100644..100755
--- a/mesalib/src/glsl/glcpp/glcpp.c
+++ b/mesalib/src/glsl/glcpp/glcpp.c
@@ -38,7 +38,7 @@
 #define close _close
 #endif
 
-extern int yydebug;
+extern int glcpp_parser_debug;
 
 void
 _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
@@ -109,6 +109,7 @@ load_text_file(void *ctx, const char *filename)
 static void
 init_fake_gl_context (struct gl_context *gl_ctx)
 {
+	gl_ctx->API = API_OPENGL_COMPAT;
 	gl_ctx->Const.DisableGLSLLineContinuations = false;
 }
 
diff --git a/mesalib/src/glsl/glcpp/glcpp.h b/mesalib/src/glsl/glcpp/glcpp.h
index 4aa200a63..79ccb234f 100644
--- a/mesalib/src/glsl/glcpp/glcpp.h
+++ b/mesalib/src/glsl/glcpp/glcpp.h
@@ -183,6 +183,7 @@ struct glcpp_parser {
 	size_t info_log_length;
 	int error;
 	const struct gl_extensions *extensions;
+	gl_api api;
 	bool version_resolved;
 	bool has_new_line_number;
 	int new_line_number;
@@ -194,7 +195,7 @@ struct glcpp_parser {
 struct gl_extensions;
 
 glcpp_parser_t *
-glcpp_parser_create (const struct gl_extensions *extensions);
+glcpp_parser_create (const struct gl_extensions *extensions, gl_api api);
 
 int
 glcpp_parser_parse (glcpp_parser_t *parser);
@@ -203,7 +204,7 @@ void
 glcpp_parser_destroy (glcpp_parser_t *parser);
 
 void
-glcpp_parser_resolve_version(glcpp_parser_t *parser);
+glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser);
 
 int
 glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
diff --git a/mesalib/src/glsl/glcpp/pp.c b/mesalib/src/glsl/glcpp/pp.c
index 637a58f9c..4a623f81e 100644
--- a/mesalib/src/glsl/glcpp/pp.c
+++ b/mesalib/src/glsl/glcpp/pp.c
@@ -139,7 +139,7 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
 	   const struct gl_extensions *extensions, struct gl_context *gl_ctx)
 {
 	int errors;
-	glcpp_parser_t *parser = glcpp_parser_create (extensions);
+	glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx->API);
 
 	if (! gl_ctx->Const.DisableGLSLLineContinuations)
 		*shader = remove_line_continuations(parser, *shader);
@@ -151,7 +151,7 @@ glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log,
 	if (parser->skip_stack)
 		glcpp_error (&parser->skip_stack->loc, parser, "Unterminated #if\n");
 
-	glcpp_parser_resolve_version(parser);
+	glcpp_parser_resolve_implicit_version(parser);
 
 	ralloc_strcat(info_log, parser->info_log);
 
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 50875bf3b..3208b32da 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -349,7 +349,8 @@ layout		{
 		      || yyextra->ARB_explicit_attrib_location_enable
 		      || yyextra->ARB_uniform_buffer_object_enable
 		      || yyextra->ARB_fragment_coord_conventions_enable
-                      || yyextra->ARB_shading_language_420pack_enable) {
+                      || yyextra->ARB_shading_language_420pack_enable
+                      || yyextra->ARB_compute_shader_enable) {
 		      return LAYOUT_TOK;
 		   } else {
 		      yylval->identifier = strdup(yytext);
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index 5d8dfac02..c4d01a8cd 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -1292,6 +1292,34 @@ layout_qualifier_id:
          }
       }
 
+      static const char *local_size_qualifiers[3] = {
+         "local_size_x",
+         "local_size_y",
+         "local_size_z",
+      };
+      for (int i = 0; i < 3; i++) {
+         if (match_layout_qualifier(local_size_qualifiers[i], $1,
+                                    state) == 0) {
+            if ($3 <= 0) {
+               _mesa_glsl_error(& @3, state,
+                                "invalid %s of %d specified",
+                                local_size_qualifiers[i], $3);
+               YYERROR;
+            } else if (!state->is_version(430, 0) &&
+                       !state->ARB_compute_shader_enable) {
+               _mesa_glsl_error(& @3, state,
+                                "%s qualifier requires GLSL 4.30 or "
+                                "ARB_compute_shader",
+                                local_size_qualifiers[i]);
+               YYERROR;
+            } else {
+               $$.flags.q.local_size |= (1 << i);
+               $$.local_size[i] = $3;
+            }
+            break;
+         }
+      }
+
       /* If the identifier didn't match any known layout identifiers,
        * emit an error.
        */
@@ -1467,7 +1495,7 @@ type_qualifier:
                           "just before storage qualifiers");
       }
       $$ = $1;
-      $$.flags.i |= $2.flags.i;
+      $$.merge_qualifier(&@1, state, $2);
    }
    | storage_qualifier type_qualifier
    {
@@ -2335,29 +2363,53 @@ layout_defaults:
    {
       void *ctx = state;
       $$ = NULL;
-      if (state->stage != MESA_SHADER_GEOMETRY) {
+      switch (state->stage) {
+      case MESA_SHADER_GEOMETRY: {
+         if (!$1.flags.q.prim_type) {
+            _mesa_glsl_error(& @1, state,
+                             "input layout qualifiers must specify a primitive"
+                             " type");
+         } else {
+            /* Make sure this is a valid input primitive type. */
+            switch ($1.prim_type) {
+            case GL_POINTS:
+            case GL_LINES:
+            case GL_LINES_ADJACENCY:
+            case GL_TRIANGLES:
+            case GL_TRIANGLES_ADJACENCY:
+               $$ = new(ctx) ast_gs_input_layout(@1, $1.prim_type);
+               break;
+            default:
+               _mesa_glsl_error(&@1, state,
+                                "invalid geometry shader input primitive type");
+               break;
+            }
+         }
+      }
+         break;
+      case MESA_SHADER_COMPUTE: {
+         if ($1.flags.q.local_size == 0) {
+            _mesa_glsl_error(& @1, state,
+                             "input layout qualifiers must specify a local "
+                             "size");
+         } else {
+            /* Infer a local_size of 1 for every unspecified dimension */
+            unsigned local_size[3];
+            for (int i = 0; i < 3; i++) {
+               if ($1.flags.q.local_size & (1 << i))
+                  local_size[i] = $1.local_size[i];
+               else
+                  local_size[i] = 1;
+            }
+            $$ = new(ctx) ast_cs_input_layout(@1, local_size);
+         }
+      }
+         break;
+      default:
          _mesa_glsl_error(& @1, state,
                           "input layout qualifiers only valid in "
-                          "geometry shaders");
-      } else if (!$1.flags.q.prim_type) {
-         _mesa_glsl_error(& @1, state,
-                          "input layout qualifiers must specify a primitive"
-                          " type");
-      } else {
-         /* Make sure this is a valid input primitive type. */
-         switch ($1.prim_type) {
-         case GL_POINTS:
-         case GL_LINES:
-         case GL_LINES_ADJACENCY:
-         case GL_TRIANGLES:
-         case GL_TRIANGLES_ADJACENCY:
-            $$ = new(ctx) ast_gs_input_layout(@1, $1.prim_type);
-            break;
-         default:
-            _mesa_glsl_error(&@1, state,
-                             "invalid geometry shader input primitive type");
-            break;
-         }
+                          "geometry and compute shaders");
+         break;
       }
    }
 
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index 87784ed69..b822d2292 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -56,7 +56,8 @@ static unsigned known_desktop_glsl_versions[] =
 _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
 					       gl_shader_stage stage,
                                                void *mem_ctx)
-   : ctx(_ctx), switch_state()
+   : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(),
+     switch_state()
 {
    assert(stage < MESA_SHADER_STAGES);
    this->stage = stage;
@@ -123,6 +124,12 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters;
    this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings;
 
+   /* Compute shader constants */
+   for (unsigned i = 0; i < Elements(this->Const.MaxComputeWorkGroupCount); i++)
+      this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i];
+   for (unsigned i = 0; i < Elements(this->Const.MaxComputeWorkGroupSize); i++)
+      this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i];
+
    this->current_function = NULL;
    this->toplevel_ir = NULL;
    this->found_return = false;
@@ -519,6 +526,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_sample_shading,             true,  false,     ARB_sample_shading),
    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
    EXT(ARB_viewport_array,             true,  false,     ARB_viewport_array),
+   EXT(ARB_compute_shader,             true,  false,     ARB_compute_shader),
 };
 
 #undef EXT
@@ -1332,23 +1340,45 @@ set_shader_inout_layout(struct gl_shader *shader,
       /* Should have been prevented by the parser. */
       assert(!state->gs_input_prim_type_specified);
       assert(!state->out_qualifier->flags.i);
-      return;
    }
 
-   shader->Geom.VerticesOut = 0;
-   if (state->out_qualifier->flags.q.max_vertices)
-      shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
-
-   if (state->gs_input_prim_type_specified) {
-      shader->Geom.InputType = state->gs_input_prim_type;
-   } else {
-      shader->Geom.InputType = PRIM_UNKNOWN;
+   if (shader->Stage != MESA_SHADER_COMPUTE) {
+      /* Should have been prevented by the parser. */
+      assert(!state->cs_input_local_size_specified);
    }
 
-   if (state->out_qualifier->flags.q.prim_type) {
-      shader->Geom.OutputType = state->out_qualifier->prim_type;
-   } else {
-      shader->Geom.OutputType = PRIM_UNKNOWN;
+   switch (shader->Stage) {
+   case MESA_SHADER_GEOMETRY:
+      shader->Geom.VerticesOut = 0;
+      if (state->out_qualifier->flags.q.max_vertices)
+         shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
+
+      if (state->gs_input_prim_type_specified) {
+         shader->Geom.InputType = state->gs_input_prim_type;
+      } else {
+         shader->Geom.InputType = PRIM_UNKNOWN;
+      }
+
+      if (state->out_qualifier->flags.q.prim_type) {
+         shader->Geom.OutputType = state->out_qualifier->prim_type;
+      } else {
+         shader->Geom.OutputType = PRIM_UNKNOWN;
+      }
+      break;
+
+   case MESA_SHADER_COMPUTE:
+      if (state->cs_input_local_size_specified) {
+         for (int i = 0; i < 3; i++)
+            shader->Comp.LocalSize[i] = state->cs_input_local_size[i];
+      } else {
+         for (int i = 0; i < 3; i++)
+            shader->Comp.LocalSize[i] = 0;
+      }
+      break;
+
+   default:
+      /* Nothing to do. */
+      break;
    }
 }
 
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 8c900e637..7b0fbbfa1 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -200,6 +200,21 @@ struct _mesa_glsl_parse_state {
     */
    GLenum gs_input_prim_type;
 
+   /**
+    * True if a compute shader input local size was specified using a layout
+    * directive.
+    *
+    * Note: this value is computed at ast_to_hir time rather than at parse
+    * time.
+    */
+   bool cs_input_local_size_specified;
+
+   /**
+    * If cs_input_local_size_specified is true, the local size that was
+    * specified.  Otherwise ignored.
+    */
+   unsigned cs_input_local_size[3];
+
    /** Output layout qualifiers from GLSL 1.50. (geometry shader controls)*/
    struct ast_type_qualifier *out_qualifier;
 
@@ -254,6 +269,10 @@ struct _mesa_glsl_parse_state {
       unsigned MaxFragmentAtomicCounters;
       unsigned MaxCombinedAtomicCounters;
       unsigned MaxAtomicBufferBindings;
+
+      /* ARB_compute_shader */
+      unsigned MaxComputeWorkGroupCount[3];
+      unsigned MaxComputeWorkGroupSize[3];
    } Const;
 
    /**
@@ -360,6 +379,8 @@ struct _mesa_glsl_parse_state {
    bool AMD_shader_trinary_minmax_warn;
    bool ARB_viewport_array_enable;
    bool ARB_viewport_array_warn;
+   bool ARB_compute_shader_enable;
+   bool ARB_compute_shader_warn;
    /*@}*/
 
    /** Extensions supported by the OpenGL implementation. */
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index 19e8383b2..e266328b2 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -471,7 +471,7 @@ public:
    void reinit_interface_type(const struct glsl_type *type)
    {
       if (this->max_ifc_array_access != NULL) {
-#ifndef _NDEBUG
+#ifndef NDEBUG
          /* Redeclaring gl_PerVertex is only allowed if none of the built-ins
           * it defines have been accessed yet; so it's safe to throw away the
           * old max_ifc_array_access pointer, since all of its values are
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index c24d3c96b..48cf47f53 100755
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -1390,6 +1390,23 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
    }
 
+   case ir_binop_bfm: {
+      int bits = op[0]->value.i[0];
+      int offset = op[1]->value.i[0];
+
+      for (unsigned c = 0; c < components; c++) {
+         if (bits == 0)
+            data.u[c] = op[0]->value.u[c];
+         else if (offset < 0 || bits < 0)
+            data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */
+         else if (offset + bits > 32)
+            data.u[c] = 0; /* Undefined for bitfieldInsert, per spec. */
+         else
+            data.u[c] = ((1 << bits) - 1) << offset;
+      }
+      break;
+   }
+
    case ir_binop_ldexp:
       for (unsigned c = 0; c < components; c++) {
          data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 93b475497..bcd739476 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -1287,6 +1287,69 @@ link_gs_inout_layout_qualifiers(struct gl_shader_program *prog,
    prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut;
 }
 
+
+/**
+ * Perform cross-validation of compute shader local_size_{x,y,z} layout
+ * qualifiers for the attached compute shaders, and propagate them to the
+ * linked CS and linked shader program.
+ */
+static void
+link_cs_input_layout_qualifiers(struct gl_shader_program *prog,
+                                struct gl_shader *linked_shader,
+                                struct gl_shader **shader_list,
+                                unsigned num_shaders)
+{
+   for (int i = 0; i < 3; i++)
+      linked_shader->Comp.LocalSize[i] = 0;
+
+   /* This function is called for all shader stages, but it only has an effect
+    * for compute shaders.
+    */
+   if (linked_shader->Stage != MESA_SHADER_COMPUTE)
+      return;
+
+   /* From the ARB_compute_shader spec, in the section describing local size
+    * declarations:
+    *
+    *     If multiple compute shaders attached to a single program object
+    *     declare local work-group size, the declarations must be identical;
+    *     otherwise a link-time error results. Furthermore, if a program
+    *     object contains any compute shaders, at least one must contain an
+    *     input layout qualifier specifying the local work sizes of the
+    *     program, or a link-time error will occur.
+    */
+   for (unsigned sh = 0; sh < num_shaders; sh++) {
+      struct gl_shader *shader = shader_list[sh];
+
+      if (shader->Comp.LocalSize[0] != 0) {
+         if (linked_shader->Comp.LocalSize[0] != 0) {
+            for (int i = 0; i < 3; i++) {
+               if (linked_shader->Comp.LocalSize[i] !=
+                   shader->Comp.LocalSize[i]) {
+                  linker_error(prog, "compute shader defined with conflicting "
+                               "local sizes\n");
+                  return;
+               }
+            }
+         }
+         for (int i = 0; i < 3; i++)
+            linked_shader->Comp.LocalSize[i] = shader->Comp.LocalSize[i];
+      }
+   }
+
+   /* Just do the intrastage -> interstage propagation right now,
+    * since we already know we're in the right type of shader program
+    * for doing it.
+    */
+   if (linked_shader->Comp.LocalSize[0] == 0) {
+      linker_error(prog, "compute shader didn't declare local size\n");
+      return;
+   }
+   for (int i = 0; i < 3; i++)
+      prog->Comp.LocalSize[i] = linked_shader->Comp.LocalSize[i];
+}
+
+
 /**
  * Combine a group of shaders for a single stage to generate a linked shader
  *
@@ -1391,6 +1454,7 @@ link_intrastage_shaders(void *mem_ctx,
    ralloc_steal(linked, linked->UniformBlocks);
 
    link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders);
+   link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders);
 
    populate_symbol_table(linked);
 
@@ -2045,6 +2109,13 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       goto done;
    }
 
+   /* Compute shaders have additional restrictions. */
+   if (num_shaders[MESA_SHADER_COMPUTE] > 0 &&
+       num_shaders[MESA_SHADER_COMPUTE] != prog->NumShaders) {
+      linker_error(prog, "Compute shaders may not be linked with any other "
+                   "type of shader\n");
+   }
+
    for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
       if (prog->_LinkedShaders[i] != NULL)
 	 ctx->Driver.DeleteShader(ctx, prog->_LinkedShaders[i]);
@@ -2098,7 +2169,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
    unsigned prev;
 
-   for (prev = 0; prev < MESA_SHADER_STAGES; prev++) {
+   for (prev = 0; prev <= MESA_SHADER_FRAGMENT; prev++) {
       if (prog->_LinkedShaders[prev] != NULL)
          break;
    }
@@ -2106,7 +2177,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    /* Validate the inputs of each stage with the output of the preceding
     * stage.
     */
-   for (unsigned i = prev + 1; i < MESA_SHADER_STAGES; i++) {
+   for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) {
       if (prog->_LinkedShaders[i] == NULL)
          continue;
 
@@ -2201,7 +2272,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    }
 
    unsigned first;
-   for (first = 0; first < MESA_SHADER_STAGES; first++) {
+   for (first = 0; first <= MESA_SHADER_FRAGMENT; first++) {
       if (prog->_LinkedShaders[first] != NULL)
 	 break;
    }
@@ -2233,7 +2304,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     * eliminated if they are (transitively) not used in a later stage.
     */
    int last, next;
-   for (last = MESA_SHADER_STAGES-1; last >= 0; last--) {
+   for (last = MESA_SHADER_FRAGMENT; last >= 0; last--) {
       if (prog->_LinkedShaders[last] != NULL)
          break;
    }
diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp
index 8f8d448ea..44a6e8021 100644
--- a/mesalib/src/glsl/lower_instructions.cpp
+++ b/mesalib/src/glsl/lower_instructions.cpp
@@ -384,10 +384,10 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
    /* Constants */
    ir_constant *zeroi = ir_constant::zero(ir, ivec);
 
-   ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem);
    ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
 
    ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem);
+   ir_constant *exp_width = new(ir) ir_constant(8u, vec_elem);
 
    /* Temporary variables */
    ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
@@ -449,11 +449,17 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
     */
 
    ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
-   ir->operation = ir_unop_bitcast_u2f;
-   ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),
-                            lshift(i2u(resulting_biased_exp), exp_shift_clone));
+   ir->operation = ir_unop_bitcast_i2f;
+   ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp,
+                                     exp_shift_clone, exp_width);
    ir->operands[1] = NULL;
 
+   /* Don't generate new IR that would need to be lowered in an additional
+    * pass.
+    */
+   if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
+      bitfield_insert_to_bfm_bfi(ir->operands[0]->as_expression());
+
    this->progress = true;
 }
 
diff --git a/mesalib/src/glsl/main.cpp b/mesalib/src/glsl/main.cpp
index 56c32e688..48ab1c73a 100644
--- a/mesalib/src/glsl/main.cpp
+++ b/mesalib/src/glsl/main.cpp
@@ -61,6 +61,17 @@ initialize_context(struct gl_context *ctx, gl_api api)
     */
    ctx->Const.GLSLVersion = glsl_version;
    ctx->Extensions.ARB_ES3_compatibility = true;
+   ctx->Const.MaxComputeWorkGroupCount[0] = 65535;
+   ctx->Const.MaxComputeWorkGroupCount[1] = 65535;
+   ctx->Const.MaxComputeWorkGroupCount[2] = 65535;
+   ctx->Const.MaxComputeWorkGroupSize[0] = 1024;
+   ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
+   ctx->Const.MaxComputeWorkGroupSize[2] = 64;
+   ctx->Const.MaxComputeWorkGroupInvocations = 1024;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
 
    switch (ctx->Const.GLSLVersion) {
    case 100:
@@ -232,7 +243,7 @@ load_text_file(void *ctx, const char *file_name)
 			if (bytes < size - total_read) {
 				free(text);
 				text = NULL;
-				break;
+				goto error;
 			}
 
 			if (bytes == 0) {
@@ -243,6 +254,7 @@ load_text_file(void *ctx, const char *file_name)
 		} while (total_read < size);
 
 		text[total_read] = '\0';
+error:;
 	}
 
 	fclose(fp);
@@ -371,6 +383,8 @@ main(int argc, char **argv)
 	 shader->Type = GL_GEOMETRY_SHADER;
       else if (strncmp(".frag", ext, 5) == 0)
 	 shader->Type = GL_FRAGMENT_SHADER;
+      else if (strncmp(".comp", ext, 5) == 0)
+         shader->Type = GL_COMPUTE_SHADER;
       else
 	 usage_fail(argv[0]);
       shader->Stage = _mesa_shader_enum_to_shader_stage(shader->Type);
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index d1f6435f4..1b4d31936 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -218,6 +218,11 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       this->mem_ctx = ralloc_parent(ir);
 
    switch (ir->operation) {
+   case ir_unop_bit_not:
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
+         return op_expr[0]->operands[0];
+      break;
+
    case ir_unop_abs:
       if (op_expr[0] == NULL)
 	 break;
@@ -240,6 +245,42 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       }
       break;
 
+   case ir_unop_exp:
+      if (op_expr[0] == NULL)
+	 break;
+
+      if (op_expr[0]->operation == ir_unop_log) {
+         return op_expr[0]->operands[0];
+      }
+      break;
+
+   case ir_unop_log:
+      if (op_expr[0] == NULL)
+	 break;
+
+      if (op_expr[0]->operation == ir_unop_exp) {
+         return op_expr[0]->operands[0];
+      }
+      break;
+
+   case ir_unop_exp2:
+      if (op_expr[0] == NULL)
+	 break;
+
+      if (op_expr[0]->operation == ir_unop_log2) {
+         return op_expr[0]->operands[0];
+      }
+      break;
+
+   case ir_unop_log2:
+      if (op_expr[0] == NULL)
+	 break;
+
+      if (op_expr[0]->operation == ir_unop_exp2) {
+         return op_expr[0]->operands[0];
+      }
+      break;
+
    case ir_unop_logic_not: {
       enum ir_expression_operation new_op = ir_unop_logic_not;
 
@@ -479,6 +520,10 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (is_vec_one(op_const[0]))
          return op_const[0];
 
+      /* x^1 == x */
+      if (is_vec_one(op_const[1]))
+         return ir->operands[0];
+
       /* pow(2,x) == exp2(x) */
       if (is_vec_two(op_const[0]))
          return expr(ir_unop_exp2, ir->operands[1]);
@@ -502,15 +547,37 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
       break;
 
+   case ir_triop_fma:
+      /* Operands are op0 * op1 + op2. */
+      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
+         return ir->operands[2];
+      } else if (is_vec_zero(op_const[2])) {
+         return mul(ir->operands[0], ir->operands[1]);
+      } else if (is_vec_one(op_const[0])) {
+         return add(ir->operands[1], ir->operands[2]);
+      } else if (is_vec_one(op_const[1])) {
+         return add(ir->operands[0], ir->operands[2]);
+      }
+      break;
+
    case ir_triop_lrp:
       /* Operands are (x, y, a). */
       if (is_vec_zero(op_const[2])) {
          return ir->operands[0];
       } else if (is_vec_one(op_const[2])) {
          return ir->operands[1];
+      } else if (ir->operands[0]->equals(ir->operands[1])) {
+         return ir->operands[0];
       }
       break;
 
+   case ir_triop_csel:
+      if (is_vec_one(op_const[0]))
+	 return ir->operands[1];
+      if (is_vec_zero(op_const[0]))
+	 return ir->operands[2];
+      break;
+
    default:
       break;
    }
diff --git a/mesalib/src/glsl/opt_vectorize.cpp b/mesalib/src/glsl/opt_vectorize.cpp
index 9ca811a86..8ee81f1a3 100644
--- a/mesalib/src/glsl/opt_vectorize.cpp
+++ b/mesalib/src/glsl/opt_vectorize.cpp
@@ -82,6 +82,8 @@ public:
 
    virtual ir_visitor_status visit_enter(ir_assignment *);
    virtual ir_visitor_status visit_enter(ir_swizzle *);
+   virtual ir_visitor_status visit_enter(ir_if *);
+   virtual ir_visitor_status visit_enter(ir_loop *);
 
    virtual ir_visitor_status visit_leave(ir_assignment *);
 
@@ -104,9 +106,10 @@ public:
  * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
  * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
  *
- * The function modifies only ir_expressions and ir_swizzles. For expressions
- * it sets a new type and swizzles any scalar dereferences into appropriately
- * sized vector arguments. For example, if combining
+ * The function operates on ir_expressions (and its operands) and ir_swizzles.
+ * For expressions it sets a new type and swizzles any non-expression and non-
+ * swizzle scalar operands into appropriately sized vector arguments. For
+ * example, if combining
  *
  * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
  * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
@@ -144,9 +147,10 @@ rewrite_swizzle(ir_instruction *ir, void *data)
                                            mask->num_components, 1);
       for (unsigned i = 0; i < 4; i++) {
          if (expr->operands[i]) {
-            ir_dereference *deref = expr->operands[i]->as_dereference();
-            if (deref && deref->type->is_scalar()) {
-               expr->operands[i] = new(ir) ir_swizzle(deref, 0, 0, 0, 0,
+            ir_rvalue *rval = expr->operands[i]->as_rvalue();
+            if (rval && rval->type->is_scalar() &&
+                !rval->as_expression() && !rval->as_swizzle()) {
+               expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
                                                       mask->num_components);
             }
          }
@@ -170,22 +174,31 @@ void
 ir_vectorize_visitor::try_vectorize()
 {
    if (this->last_assignment && this->channels > 1) {
-      ir_swizzle_mask mask = {0, 1, 2, 3, channels, 0};
-
-      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
+      ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
 
       this->last_assignment->write_mask = 0;
 
-      for (unsigned i = 0; i < 4; i++) {
+      for (unsigned i = 0, j = 0; i < 4; i++) {
          if (this->assignment[i]) {
             this->last_assignment->write_mask |= 1 << i;
 
             if (this->assignment[i] != this->last_assignment) {
                this->assignment[i]->remove();
             }
+
+            switch (j) {
+            case 0: mask.x = i; break;
+            case 1: mask.y = i; break;
+            case 2: mask.z = i; break;
+            case 3: mask.w = i; break;
+            }
+
+            j++;
          }
       }
 
+      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
+
       this->progress = true;
    }
    clear();
@@ -276,6 +289,39 @@ ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
    return visit_continue;
 }
 
+/* Since there is no statement to visit between the "then" and "else"
+ * instructions try to vectorize before, in between, and after them to avoid
+ * combining statements from different basic blocks.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_enter(ir_if *ir)
+{
+   try_vectorize();
+
+   visit_list_elements(this, &ir->then_instructions);
+   try_vectorize();
+
+   visit_list_elements(this, &ir->else_instructions);
+   try_vectorize();
+
+   return visit_continue_with_parent;
+}
+
+/* Since there is no statement to visit between the instructions in the body of
+ * the loop and the instructions after it try to vectorize before and after the
+ * body to avoid combining statements from different basic blocks.
+ */
+ir_visitor_status
+ir_vectorize_visitor::visit_enter(ir_loop *ir)
+{
+   try_vectorize();
+
+   visit_list_elements(this, &ir->body_instructions);
+   try_vectorize();
+
+   return visit_continue_with_parent;
+}
+
 /**
  * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
  * the swizzle mask(s) found were appropriate. Also save a pointer in
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 91794719b..6c25010b7 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -91,6 +91,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
 
    ctx->Extensions.dummy_false = false;
    ctx->Extensions.dummy_true = true;
+   ctx->Extensions.ARB_compute_shader = true;
    ctx->Extensions.ARB_conservative_depth = true;
    ctx->Extensions.ARB_draw_instanced = true;
    ctx->Extensions.ARB_ES2_compatibility = true;
@@ -140,6 +141,17 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
    ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32;
 
    ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.MaxComputeWorkGroupCount[0] = 65535;
+   ctx->Const.MaxComputeWorkGroupCount[1] = 65535;
+   ctx->Const.MaxComputeWorkGroupCount[2] = 65535;
+   ctx->Const.MaxComputeWorkGroupSize[0] = 1024;
+   ctx->Const.MaxComputeWorkGroupSize[1] = 1024;
+   ctx->Const.MaxComputeWorkGroupSize[2] = 64;
+   ctx->Const.MaxComputeWorkGroupInvocations = 1024;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024;
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */
+   ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */
 
    /* Set up default shader compiler options. */
    struct gl_shader_compiler_options options;
diff --git a/mesalib/src/glsl/standalone_scaffolding.h b/mesalib/src/glsl/standalone_scaffolding.h
index 327fef2df..df783afdb 100644
--- a/mesalib/src/glsl/standalone_scaffolding.h
+++ b/mesalib/src/glsl/standalone_scaffolding.h
@@ -58,6 +58,8 @@ _mesa_shader_enum_to_shader_stage(GLenum v)
       return MESA_SHADER_FRAGMENT;
    case GL_GEOMETRY_SHADER:
       return MESA_SHADER_GEOMETRY;
+   case GL_COMPUTE_SHADER:
+      return MESA_SHADER_COMPUTE;
    default:
       assert(!"bad value in _mesa_shader_enum_to_shader_stage()");
       return MESA_SHADER_VERTEX;