29 files changed, 1132 insertions, 297 deletions
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index 5945590a5..b54eae72d 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -96,6 +96,7 @@ LIBGLSL_FILES = \
 	$(GLSL_SRCDIR)/opt_function_inlining.cpp \
 	$(GLSL_SRCDIR)/opt_if_simplification.cpp \
 	$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
+	$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
 	$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
 	$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
 	$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index d1c77f1ec..7ba04a808 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -49,7 +49,6 @@
  * parser (and lexer) sources.
  */
 
-#include "main/core.h" /* for struct gl_extensions */
 #include "glsl_symbol_table.h"
 #include "glsl_parser_extras.h"
 #include "ast.h"
@@ -2182,6 +2181,41 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
 {
    bool fail = false;
 
+   /* Checks for GL_ARB_explicit_uniform_location. */
+   if (qual->flags.q.uniform) {
+      if (!state->check_explicit_uniform_location_allowed(loc, var))
+         return;
+
+      const struct gl_context *const ctx = state->ctx;
+      unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+
+      /* ARB_explicit_uniform_location specification states:
+       *
+       *     "The explicitly defined locations and the generated locations
+       *     must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
+       *
+       *     "Valid locations for default-block uniform variable locations
+       *     are in the range of 0 to the implementation-defined maximum
+       *     number of uniform locations."
+       */
+      if (qual->location < 0) {
+         _mesa_glsl_error(loc, state,
+                          "explicit location < 0 for uniform %s", var->name);
+         return;
+      }
+
+      if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
+         _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
+                          ">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
+                          ctx->Const.MaxUserAssignableUniformLocations);
+         return;
+      }
+
+      var->data.explicit_location = true;
+      var->data.location = qual->location;
+      return;
+   }
+
    /* Between GL_ARB_explicit_attrib_location an
     * GL_ARB_separate_shader_objects, the inputs and outputs of any shader
     * stage can be assigned explicit locations.  The checking here associates
@@ -2435,6 +2469,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                        _mesa_shader_stage_to_string(state->stage));
    }
 
+   /* Disallow layout qualifiers which may only appear on layout declarations. */
+   if (qual->flags.q.prim_type) {
+      _mesa_glsl_error(loc, state,
+                       "Primitive type may only be specified on GS input or output "
+                       "layout declaration, not on variables.");
+   }
+
    /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says:
     *
     *     "However, the const qualifier cannot be used with out or inout."
@@ -2649,6 +2690,36 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
    const bool uses_deprecated_qualifier = qual->flags.q.attribute
       || qual->flags.q.varying;
 
+
+   /* Validate auxiliary storage qualifiers */
+
+   /* From section 4.3.4 of the GLSL 1.30 spec:
+    *    "It is an error to use centroid in in a vertex shader."
+    *
+    * From section 4.3.4 of the GLSL ES 3.00 spec:
+    *    "It is an error to use centroid in or interpolation qualifiers in
+    *    a vertex shader input."
+    */
+
+   /* Section 4.3.6 of the GLSL 1.30 specification states:
+    * "It is an error to use centroid out in a fragment shader."
+    *
+    * The GL_ARB_shading_language_420pack extension specification states:
+    * "It is an error to use auxiliary storage qualifiers or interpolation
+    *  qualifiers on an output in a fragment shader."
+    */
+   if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) {
+      _mesa_glsl_error(loc, state,
+                       "sample qualifier may only be used on `in` or `out` "
+                       "variables between shader stages");
+   }
+   if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) {
+      _mesa_glsl_error(loc, state,
+                       "centroid qualifier may only be used with `in', "
+                       "`out' or `varying' variables between shader stages");
+   }
+
+
    /* Is the 'layout' keyword used with parameters that allow relaxed checking.
     * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
     * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
@@ -3606,45 +3677,6 @@ ast_declarator_list::hir(exec_list *instructions,
       }
 
 
-      /* From section 4.3.4 of the GLSL 1.30 spec:
-       *    "It is an error to use centroid in in a vertex shader."
-       *
-       * From section 4.3.4 of the GLSL ES 3.00 spec:
-       *    "It is an error to use centroid in or interpolation qualifiers in
-       *    a vertex shader input."
-       */
-      if (state->is_version(130, 300)
-          && this->type->qualifier.flags.q.centroid
-          && this->type->qualifier.flags.q.in
-          && state->stage == MESA_SHADER_VERTEX) {
-
-         _mesa_glsl_error(&loc, state,
-                          "'centroid in' cannot be used in a vertex shader");
-      }
-
-      if (state->stage == MESA_SHADER_VERTEX
-          && this->type->qualifier.flags.q.sample
-          && this->type->qualifier.flags.q.in) {
-
-         _mesa_glsl_error(&loc, state,
-                        "'sample in' cannot be used in a vertex shader");
-      }
-
-      /* Section 4.3.6 of the GLSL 1.30 specification states:
-       * "It is an error to use centroid out in a fragment shader."
-       *
-       * The GL_ARB_shading_language_420pack extension specification states:
-       * "It is an error to use auxiliary storage qualifiers or interpolation
-       *  qualifiers on an output in a fragment shader."
-       */
-      if (state->stage == MESA_SHADER_FRAGMENT &&
-          this->type->qualifier.flags.q.out &&
-          this->type->qualifier.has_auxiliary_storage()) {
-         _mesa_glsl_error(&loc, state,
-                          "auxiliary storage qualifiers cannot be used on "
-                          "fragment shader outputs");
-      }
-
       /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30.
        */
       if (this->type->qualifier.precision != ast_precision_none) {
@@ -4632,9 +4664,51 @@ ast_case_label::hir(exec_list *instructions,
       ir_dereference_variable *deref_test_var =
          new(ctx) ir_dereference_variable(state->switch_state.test_var);
 
-      ir_rvalue *const test_cond = new(ctx) ir_expression(ir_binop_all_equal,
-                                                          label_const,
-                                                          deref_test_var);
+      ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal,
+                                                        label_const,
+                                                        deref_test_var);
+
+      /*
+       * From GLSL 4.40 specification section 6.2 ("Selection"):
+       *
+       *     "The type of the init-expression value in a switch statement must
+       *     be a scalar int or uint. The type of the constant-expression value
+       *     in a case label also must be a scalar int or uint. When any pair
+       *     of these values is tested for "equal value" and the types do not
+       *     match, an implicit conversion will be done to convert the int to a
+       *     uint (see section 4.1.10 “Implicit Conversions”) before the compare
+       *     is done."
+       */
+      if (label_const->type != state->switch_state.test_var->type) {
+         YYLTYPE loc = this->test_value->get_location();
+
+         const glsl_type *type_a = label_const->type;
+         const glsl_type *type_b = state->switch_state.test_var->type;
+
+         /* Check if int->uint implicit conversion is supported. */
+         bool integer_conversion_supported =
+            glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type,
+                                                           state);
+
+         if ((!type_a->is_integer() || !type_b->is_integer()) ||
+              !integer_conversion_supported) {
+            _mesa_glsl_error(&loc, state, "type mismatch with switch "
+                             "init-expression and case label (%s != %s)",
+                             type_a->name, type_b->name);
+         } else {
+            /* Conversion of the case label. */
+            if (type_a->base_type == GLSL_TYPE_INT) {
+               if (!apply_implicit_conversion(glsl_type::uint_type,
+                                              test_cond->operands[0], state))
+                  _mesa_glsl_error(&loc, state, "implicit type conversion error");
+            } else {
+               /* Conversion of the init-expression value. */
+               if (!apply_implicit_conversion(glsl_type::uint_type,
+                                              test_cond->operands[1], state))
+                  _mesa_glsl_error(&loc, state, "implicit type conversion error");
+            }
+         }
+      }
 
       ir_assignment *set_fallthru_on_test =
          new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond);
@@ -5041,6 +5115,13 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                              "with uniform interface blocks");
          }
 
+         if ((qual->flags.q.uniform || !is_interface) &&
+             qual->has_auxiliary_storage()) {
+            _mesa_glsl_error(&loc, state,
+                             "auxiliary storage qualifiers cannot be used "
+                             "in uniform blocks or structures.");
+         }
+
          if (field_type->is_matrix() ||
              (field_type->is_array() && field_type->fields.array->is_matrix())) {
             fields[i].row_major = block_row_major;
@@ -5090,7 +5171,7 @@ ast_struct_specifier::hir(exec_list *instructions,
     */
    if (state->language_version != 110 && state->struct_specifier_depth != 0)
       _mesa_glsl_error(&loc, state,
-		       "embedded structure declartions are not allowed");
+		       "embedded structure declarations are not allowed");
 
    state->struct_specifier_depth++;
 
@@ -5206,6 +5287,12 @@ ast_interface_block::hir(exec_list *instructions,
    bool block_row_major = this->layout.flags.q.row_major;
    exec_list declared_variables;
    glsl_struct_field *fields;
+
+   /* Treat an interface block as one level of nesting, so that embedded struct
+    * specifiers will be disallowed.
+    */
+   state->struct_specifier_depth++;
+
    unsigned int num_variables =
       ast_process_structure_or_interface_block(&declared_variables,
                                                state,
@@ -5217,6 +5304,8 @@ ast_interface_block::hir(exec_list *instructions,
                                                redeclaring_per_vertex,
                                                var_mode);
 
+   state->struct_specifier_depth--;
+
    if (!redeclaring_per_vertex)
       validate_identifier(this->block_name, loc, state);
 
diff --git a/mesalib/src/glsl/ast_type.cpp b/mesalib/src/glsl/ast_type.cpp
index 0ee2c495a..77053d5b1 100644
--- a/mesalib/src/glsl/ast_type.cpp
+++ b/mesalib/src/glsl/ast_type.cpp
@@ -247,7 +247,7 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
          q.flags.q.local_size != 0 &&
          state->in_qualifier->flags.q.local_size == 0;
 
-      valid_in_mask.flags.q.local_size = 1;
+      valid_in_mask.flags.q.local_size = 7;
       break;
    default:
       _mesa_glsl_error(loc, state,
diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp
index 9b35850ee..b9c69d23c 100644
--- a/mesalib/src/glsl/builtin_variables.cpp
+++ b/mesalib/src/glsl/builtin_variables.cpp
@@ -26,7 +26,6 @@
 #include "glsl_symbol_table.h"
 #include "main/core.h"
 #include "main/uniforms.h"
-#include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
 #include "program/prog_instruction.h"
 
@@ -939,6 +938,11 @@ builtin_variable_generator::generate_fs_special_vars()
    if (state->ARB_gpu_shader5_enable) {
       add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn");
    }
+
+   if (state->ARB_fragment_layer_viewport_enable) {
+      add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+      add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+   }
 }
 
 
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index 98875837c..d8c395778 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -2086,9 +2086,15 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	         add_builtin_define(parser, "GL_ARB_fragment_coord_conventions",
 				    1);
 
+              if (extensions->ARB_fragment_layer_viewport)
+                 add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1);
+
 	      if (extensions->ARB_explicit_attrib_location)
 	         add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1);
 
+	      if (extensions->ARB_explicit_uniform_location)
+	         add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1);
+
 	      if (extensions->ARB_shader_texture_lod)
 	         add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1);
 
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 6c3f9b692..db7b1d179 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -396,6 +396,7 @@ layout		{
 		      || yyextra->AMD_conservative_depth_enable
 		      || yyextra->ARB_conservative_depth_enable
 		      || yyextra->ARB_explicit_attrib_location_enable
+		      || yyextra->ARB_explicit_uniform_location_enable
                       || yyextra->has_separate_shader_objects()
 		      || yyextra->ARB_uniform_buffer_object_enable
 		      || yyextra->ARB_fragment_coord_conventions_enable
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index b69802ddb..2b2de3047 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -1559,11 +1559,6 @@ type_qualifier:
       if ($2.flags.q.invariant)
          _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
 
-      if ($2.has_layout()) {
-         _mesa_glsl_error(&@1, state,
-                          "\"invariant\" cannot be used with layout(...)");
-      }
-
       if (!state->ARB_shading_language_420pack_enable && $2.flags.q.precise)
          _mesa_glsl_error(&@1, state,
                           "\"invariant\" must come after \"precise\"");
@@ -1586,11 +1581,6 @@ type_qualifier:
       if ($2.has_interpolation())
          _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
 
-      if ($2.has_layout()) {
-         _mesa_glsl_error(&@1, state, "interpolation qualifiers cannot be used "
-                          "with layout(...)");
-      }
-
       if (!state->ARB_shading_language_420pack_enable &&
           ($2.flags.q.precise || $2.flags.q.invariant)) {
          _mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
@@ -1602,28 +1592,18 @@ type_qualifier:
    }
    | layout_qualifier type_qualifier
    {
-      /* The GLSL 1.50 grammar indicates that a layout(...) declaration can be
-       * used standalone or immediately before a storage qualifier.  It cannot
-       * be used with interpolation qualifiers or invariant.  There does not
-       * appear to be any text indicating that it must come before the storage
-       * qualifier, but always seems to in examples.
+      /* In the absence of ARB_shading_language_420pack, layout qualifiers may
+       * appear no later than auxiliary storage qualifiers. There is no
+       * particularly clear spec language mandating this, but in all examples
+       * the layout qualifier precedes the storage qualifier.
+       *
+       * We allow combinations of layout with interpolation, invariant or
+       * precise qualifiers since these are useful in ARB_separate_shader_objects.
+       * There is no clear spec guidance on this either.
        */
       if (!state->ARB_shading_language_420pack_enable && $2.has_layout())
          _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
 
-      if ($2.flags.q.invariant)
-         _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
-                          "the \"invariant\" qualifier");
-
-      if ($2.flags.q.precise)
-         _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
-                          "the \"precise\" qualifier");
-
-      if ($2.has_interpolation()) {
-         _mesa_glsl_error(&@1, state, "layout(...) cannot be used with "
-                          "interpolation qualifiers");
-      }
-
       $$ = $1;
       $$.merge_qualifier(&@1, state, $2);
    }
@@ -2181,7 +2161,7 @@ condition:
    ;
 
 /*
- * siwtch_statement grammar is based on the syntax described in the body
+ * switch_statement grammar is based on the syntax described in the body
  * of the GLSL spec, not in it's appendix!!!
  */
 switch_statement:
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index f3c5bd049..11a9a4320 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -515,7 +515,9 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_draw_buffers,               true,  false,     dummy_true),
    EXT(ARB_draw_instanced,             true,  false,     ARB_draw_instanced),
    EXT(ARB_explicit_attrib_location,   true,  false,     ARB_explicit_attrib_location),
+   EXT(ARB_explicit_uniform_location,  true,  false,     ARB_explicit_uniform_location),
    EXT(ARB_fragment_coord_conventions, true,  false,     ARB_fragment_coord_conventions),
+   EXT(ARB_fragment_layer_viewport,    true,  false,     ARB_fragment_layer_viewport),
    EXT(ARB_gpu_shader5,                true,  false,     ARB_gpu_shader5),
    EXT(ARB_sample_shading,             true,  false,     ARB_sample_shading),
    EXT(ARB_separate_shader_objects,    true,  false,     dummy_true),
@@ -1568,7 +1570,8 @@ do_common_optimization(exec_list *ir, bool linked,
       progress = do_constant_variable_unlinked(ir) || progress;
    progress = do_constant_folding(ir) || progress;
    progress = do_cse(ir) || progress;
-   progress = do_algebraic(ir, native_integers) || progress;
+   progress = do_rebalance_tree(ir) || progress;
+   progress = do_algebraic(ir, native_integers, options) || progress;
    progress = do_lower_jumps(ir) || progress;
    progress = do_vec_index_to_swizzle(ir) || progress;
    progress = lower_vector_insert(ir, false) || progress;
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 49402fa21..2a5aea477 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -155,6 +155,21 @@ struct _mesa_glsl_parse_state {
       return true;
    }
 
+   bool check_explicit_uniform_location_allowed(YYLTYPE *locp,
+                                                const ir_variable *)
+   {
+      if (!this->has_explicit_attrib_location() ||
+          !this->ARB_explicit_uniform_location_enable) {
+         _mesa_glsl_error(locp, this,
+                          "uniform explicit location requires "
+                          "GL_ARB_explicit_uniform_location and either "
+                          "GL_ARB_explicit_attrib_location or GLSL 330.");
+         return false;
+      }
+
+      return true;
+   }
+
    bool has_explicit_attrib_location() const
    {
       return ARB_explicit_attrib_location_enable || is_version(330, 300);
@@ -192,7 +207,7 @@ struct _mesa_glsl_parse_state {
    /**
     * Number of nested struct_specifier levels
     *
-    * Outside a struct_specifer, this is zero.
+    * Outside a struct_specifier, this is zero.
     */
    unsigned struct_specifier_depth;
 
@@ -367,8 +382,12 @@ struct _mesa_glsl_parse_state {
    bool ARB_draw_instanced_warn;
    bool ARB_explicit_attrib_location_enable;
    bool ARB_explicit_attrib_location_warn;
+   bool ARB_explicit_uniform_location_enable;
+   bool ARB_explicit_uniform_location_warn;
    bool ARB_fragment_coord_conventions_enable;
    bool ARB_fragment_coord_conventions_warn;
+   bool ARB_fragment_layer_viewport_enable;
+   bool ARB_fragment_layer_viewport_warn;
    bool ARB_gpu_shader5_enable;
    bool ARB_gpu_shader5_warn;
    bool ARB_sample_shading_enable;
diff --git a/mesalib/src/glsl/glsl_symbol_table.h b/mesalib/src/glsl/glsl_symbol_table.h
index 83d7935d8..db8863a20 100644
--- a/mesalib/src/glsl/glsl_symbol_table.h
+++ b/mesalib/src/glsl/glsl_symbol_table.h
@@ -32,9 +32,9 @@ extern "C" {
 #include "program/symbol_table.h"
 }
 #include "ir.h"
-#include "glsl_types.h"
 
 class symbol_table_entry;
+struct glsl_type;
 
 /**
  * Facade class for _mesa_symbol_table
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index e77146cdf..f9cd258fe 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -22,9 +22,7 @@
  */
 
 #include <stdio.h>
-#include <stdlib.h>
-#include "main/core.h" /* for Elements */
-#include "glsl_symbol_table.h"
+#include "main/core.h" /* for Elements, MAX2 */
 #include "glsl_parser_extras.h"
 #include "glsl_types.h"
 extern "C" {
@@ -677,6 +675,32 @@ glsl_type::component_slots() const
    return 0;
 }
 
+unsigned
+glsl_type::uniform_locations() const
+{
+   if (this->is_matrix())
+      return 1;
+
+   unsigned size = 0;
+
+   switch (this->base_type) {
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_INTERFACE:
+      for (unsigned i = 0; i < this->length; i++)
+         size += this->fields.structure[i].type->uniform_locations();
+      return size;
+   case GLSL_TYPE_ARRAY:
+      return this->length * this->fields.array->uniform_locations();
+   default:
+      break;
+   }
+
+   /* The location count for many types match with component_slots() result,
+    * all expections should be handled above.
+    */
+   return component_slots();
+}
+
 bool
 glsl_type::can_implicitly_convert_to(const glsl_type *desired,
                                      _mesa_glsl_parse_state *state) const
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 35a4e6acc..f6d4a02ab 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -256,6 +256,12 @@ struct glsl_type {
    unsigned component_slots() const;
 
    /**
+    * Calculate the number of unique values from glGetUniformLocation for the
+    * elements of the type.
+    */
+   unsigned uniform_locations() const;
+
+   /**
     * Calculate the number of attribute slots required to hold this type
     *
     * This implements the language rules of GLSL 1.50 for counting the number
diff --git a/mesalib/src/glsl/hir_field_selection.cpp b/mesalib/src/glsl/hir_field_selection.cpp
index 1e92c89ae..0fa976811 100644
--- a/mesalib/src/glsl/hir_field_selection.cpp
+++ b/mesalib/src/glsl/hir_field_selection.cpp
@@ -22,7 +22,6 @@
  */
 
 #include "ir.h"
-#include "program/symbol_table.h"
 #include "glsl_parser_extras.h"
 #include "ast.h"
 #include "glsl_types.h"
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index 2f4a0bec8..67dbac1eb 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -23,7 +23,6 @@
 #include <string.h>
 #include "main/core.h" /* for MAX2 */
 #include "ir.h"
-#include "ir_visitor.h"
 #include "glsl_types.h"
 
 ir_rvalue::ir_rvalue(enum ir_node_type t)
diff --git a/mesalib/src/glsl/ir_basic_block.cpp b/mesalib/src/glsl/ir_basic_block.cpp
index 426fda2f2..74ee4b696 100644
--- a/mesalib/src/glsl/ir_basic_block.cpp
+++ b/mesalib/src/glsl/ir_basic_block.cpp
@@ -28,9 +28,7 @@
  */
 
 #include "ir.h"
-#include "ir_visitor.h"
 #include "ir_basic_block.h"
-#include "glsl_types.h"
 
 /**
  * Calls a user function for every basic block in the instruction stream.
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index f5cb12343..73380d243 100755
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -36,7 +36,6 @@
 #include <math.h>
 #include "main/core.h" /* for MAX2, MIN2, CLAMP */
 #include "ir.h"
-#include "ir_visitor.h"
 #include "glsl_types.h"
 #include "program/hash_table.h"
 
diff --git a/mesalib/src/glsl/ir_expression_flattening.cpp b/mesalib/src/glsl/ir_expression_flattening.cpp
index c1cadb122..0b1ada519 100644
--- a/mesalib/src/glsl/ir_expression_flattening.cpp
+++ b/mesalib/src/glsl/ir_expression_flattening.cpp
@@ -32,10 +32,8 @@
  */
 
 #include "ir.h"
-#include "ir_visitor.h"
 #include "ir_rvalue_visitor.h"
 #include "ir_expression_flattening.h"
-#include "glsl_types.h"
 
 class ir_expression_flattening_visitor : public ir_rvalue_visitor {
 public:
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index c63921c26..b83c22592 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -71,7 +71,9 @@ bool do_common_optimization(exec_list *ir, bool linked,
                             const struct gl_shader_compiler_options *options,
                             bool native_integers);
 
-bool do_algebraic(exec_list *instructions, bool native_integers);
+bool do_rebalance_tree(exec_list *instructions);
+bool do_algebraic(exec_list *instructions, bool native_integers,
+                  const struct gl_shader_compiler_options *options);
 bool do_constant_folding(exec_list *instructions);
 bool do_constant_variable(exec_list *instructions);
 bool do_constant_variable_unlinked(exec_list *instructions);
diff --git a/mesalib/src/glsl/ir_uniform.h b/mesalib/src/glsl/ir_uniform.h
index 3508509d4..2f7352825 100644
--- a/mesalib/src/glsl/ir_uniform.h
+++ b/mesalib/src/glsl/ir_uniform.h
@@ -32,12 +32,17 @@
 
 #include "program/prog_parameter.h"  /* For union gl_constant_value. */
 
+/**
+ * Used by GL_ARB_explicit_uniform_location extension code in the linker
+ * and glUniform* functions to identify inactive explicit uniform locations.
+ */
+#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1)
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-enum gl_uniform_driver_format {
+enum PACKED gl_uniform_driver_format {
    uniform_native = 0,          /**< Store data in the native format. */
    uniform_int_float,           /**< Store integer data as floats. */
    uniform_bool_float,          /**< Store boolean data as floats. */
@@ -66,11 +71,8 @@ struct gl_uniform_driver_storage {
 
    /**
     * Base format of the stored data.
-    *
-    * This field must have a value from \c GLSL_TYPE_UINT through \c
-    * GLSL_TYPE_SAMPLER.
     */
-   uint8_t format;
+   enum gl_uniform_driver_format format;
 
    /**
     * Pointer to the base of the data.
diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.h b/mesalib/src/glsl/link_uniform_block_active_visitor.h
index d76dbcaf1..524cd6b91 100644
--- a/mesalib/src/glsl/link_uniform_block_active_visitor.h
+++ b/mesalib/src/glsl/link_uniform_block_active_visitor.h
@@ -26,8 +26,6 @@
 #define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H
 
 #include "ir.h"
-#include "ir_visitor.h"
-#include "glsl_types.h"
 #include "main/hash_table.h"
 
 struct link_uniform_block_active {
diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp
index 2100e0517..d755cec98 100644
--- a/mesalib/src/glsl/link_uniform_initializers.cpp
+++ b/mesalib/src/glsl/link_uniform_initializers.cpp
@@ -25,8 +25,6 @@
 #include "ir.h"
 #include "linker.h"
 #include "ir_uniform.h"
-#include "glsl_symbol_table.h"
-#include "program/hash_table.h"
 
 /* These functions are put in a "private" namespace instead of being marked
  * static so that the unit tests can access them.  See
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index 377fed64a..5dcb7b571 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -37,6 +37,11 @@
  */
 
 /**
+ * Used by linker to indicate uniforms that have no location set.
+ */
+#define UNMAPPED_UNIFORM_LOC ~0u
+
+/**
  * Count the backing storage requirements for a type
  */
 static unsigned
@@ -386,6 +391,9 @@ public:
    void set_and_process(struct gl_shader_program *prog,
 			ir_variable *var)
    {
+      current_var = var;
+      field_counter = 0;
+
       ubo_block_index = -1;
       if (var->is_in_uniform_block()) {
          if (var->is_interface_instance() && var->type->is_array()) {
@@ -542,6 +550,22 @@ private:
          return;
       }
 
+      /* Assign explicit locations. */
+      if (current_var->data.explicit_location) {
+         /* Set sequential locations for struct fields. */
+         if (record_type != NULL) {
+            const unsigned entries = MAX2(1, this->uniforms[id].array_elements);
+            this->uniforms[id].remap_location =
+               current_var->data.location + field_counter;
+            field_counter += entries;
+         } else {
+            this->uniforms[id].remap_location = current_var->data.location;
+         }
+      } else {
+         /* Initialize to to indicate that no location is set */
+         this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC;
+      }
+
       this->uniforms[id].name = ralloc_strdup(this->uniforms, name);
       this->uniforms[id].type = base_type;
       this->uniforms[id].initialized = 0;
@@ -597,6 +621,17 @@ public:
    gl_texture_index targets[MAX_SAMPLERS];
 
    /**
+    * Current variable being processed.
+    */
+   ir_variable *current_var;
+
+   /**
+    * Field counter is used to take care that uniform structures
+    * with explicit locations get sequential locations.
+    */
+   unsigned field_counter;
+
+   /**
     * Mask of samplers used by the current shader stage.
     */
    unsigned shader_samplers_used;
@@ -798,10 +833,6 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
    prog->UniformStorage = NULL;
    prog->NumUserUniformStorage = 0;
 
-   ralloc_free(prog->UniformRemapTable);
-   prog->UniformRemapTable = NULL;
-   prog->NumUniformRemapTable = 0;
-
    if (prog->UniformHash != NULL) {
       prog->UniformHash->clear();
    } else {
@@ -914,8 +945,28 @@ link_assign_uniform_locations(struct gl_shader_program *prog)
              sizeof(prog->_LinkedShaders[i]->SamplerTargets));
    }
 
-   /* Build the uniform remap table that is used to set/get uniform locations */
+   /* Reserve all the explicit locations of the active uniforms. */
    for (unsigned i = 0; i < num_user_uniforms; i++) {
+      if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) {
+         /* How many new entries for this uniform? */
+         const unsigned entries = MAX2(1, uniforms[i].array_elements);
+
+         /* Set remap table entries point to correct gl_uniform_storage. */
+         for (unsigned j = 0; j < entries; j++) {
+            unsigned element_loc = uniforms[i].remap_location + j;
+            assert(prog->UniformRemapTable[element_loc] ==
+                   INACTIVE_UNIFORM_EXPLICIT_LOCATION);
+            prog->UniformRemapTable[element_loc] = &uniforms[i];
+         }
+      }
+   }
+
+   /* Reserve locations for rest of the uniforms. */
+   for (unsigned i = 0; i < num_user_uniforms; i++) {
+
+      /* Explicit ones have been set already. */
+      if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC)
+         continue;
 
       /* how many new entries for this uniform? */
       const unsigned entries = MAX2(1, uniforms[i].array_elements);
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index a43d23082..0b6a71679 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -74,6 +74,7 @@
 #include "link_varyings.h"
 #include "ir_optimization.h"
 #include "ir_rvalue_visitor.h"
+#include "ir_uniform.h"
 
 extern "C" {
 #include "main/shaderobj.h"
@@ -2224,6 +2225,115 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
       linker_error(prog, "Too many combined image uniforms and fragment outputs");
 }
 
+
+/**
+ * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION
+ * for a variable, checks for overlaps between other uniforms using explicit
+ * locations.
+ */
+static bool
+reserve_explicit_locations(struct gl_shader_program *prog,
+                           string_to_uint_map *map, ir_variable *var)
+{
+   unsigned slots = var->type->uniform_locations();
+   unsigned max_loc = var->data.location + slots - 1;
+
+   /* Resize remap table if locations do not fit in the current one. */
+   if (max_loc + 1 > prog->NumUniformRemapTable) {
+      prog->UniformRemapTable =
+         reralloc(prog, prog->UniformRemapTable,
+                  gl_uniform_storage *,
+                  max_loc + 1);
+
+      if (!prog->UniformRemapTable) {
+         linker_error(prog, "Out of memory during linking.");
+         return false;
+      }
+
+      /* Initialize allocated space. */
+      for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++)
+         prog->UniformRemapTable[i] = NULL;
+
+      prog->NumUniformRemapTable = max_loc + 1;
+   }
+
+   for (unsigned i = 0; i < slots; i++) {
+      unsigned loc = var->data.location + i;
+
+      /* Check if location is already used. */
+      if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) {
+
+         /* Possibly same uniform from a different stage, this is ok. */
+         unsigned hash_loc;
+         if (map->get(hash_loc, var->name) && hash_loc == loc - i)
+               continue;
+
+         /* ARB_explicit_uniform_location specification states:
+          *
+          *     "No two default-block uniform variables in the program can have
+          *     the same location, even if they are unused, otherwise a compiler
+          *     or linker error will be generated."
+          */
+         linker_error(prog,
+                      "location qualifier for uniform %s overlaps"
+                      "previously used location",
+                      var->name);
+         return false;
+      }
+
+      /* Initialize location as inactive before optimization
+       * rounds and location assignment.
+       */
+      prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION;
+   }
+
+   /* Note, base location used for arrays. */
+   map->put(var->data.location, var->name);
+
+   return true;
+}
+
+/**
+ * Check and reserve all explicit uniform locations, called before
+ * any optimizations happen to handle also inactive uniforms and
+ * inactive array elements that may get trimmed away.
+ */
+static void
+check_explicit_uniform_locations(struct gl_context *ctx,
+                                 struct gl_shader_program *prog)
+{
+   if (!ctx->Extensions.ARB_explicit_uniform_location)
+      return;
+
+   /* This map is used to detect if overlapping explicit locations
+    * occur with the same uniform (from different stage) or a different one.
+    */
+   string_to_uint_map *uniform_map = new string_to_uint_map;
+
+   if (!uniform_map) {
+      linker_error(prog, "Out of memory during linking.");
+      return;
+   }
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader *sh = prog->_LinkedShaders[i];
+
+      if (!sh)
+         continue;
+
+      foreach_list(node, sh->ir) {
+         ir_variable *var = ((ir_instruction *)node)->as_variable();
+         if ((var && var->data.mode == ir_var_uniform) &&
+             var->data.explicit_location) {
+            if (!reserve_explicit_locations(prog, uniform_map, var))
+               return;
+         }
+      }
+   }
+
+   delete uniform_map;
+}
+
 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 {
@@ -2372,6 +2482,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
          break;
    }
 
+   check_explicit_uniform_locations(ctx, prog);
+   if (!prog->LinkStatus)
+      goto done;
+
    /* Validate the inputs of each stage with the output of the preceding
     * stage.
     */
diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h
index b2e249657..fa6ec12cc 100644
--- a/mesalib/src/glsl/list.h
+++ b/mesalib/src/glsl/list.h
@@ -87,67 +87,29 @@ struct exec_node {
       /* empty */
    }
 
-   const exec_node *get_next() const
-   {
-      return next;
-   }
+   const exec_node *get_next() const;
+   exec_node *get_next();
 
-   exec_node *get_next()
-   {
-      return next;
-   }
+   const exec_node *get_prev() const;
+   exec_node *get_prev();
 
-   const exec_node *get_prev() const
-   {
-      return prev;
-   }
-
-   exec_node *get_prev()
-   {
-      return prev;
-   }
-
-   void remove()
-   {
-      next->prev = prev;
-      prev->next = next;
-      next = NULL;
-      prev = NULL;
-   }
+   void remove();
 
    /**
     * Link a node with itself
     *
     * This creates a sort of degenerate list that is occasionally useful.
     */
-   void self_link()
-   {
-      next = this;
-      prev = this;
-   }
+   void self_link();
 
    /**
     * Insert a node in the list after the current node
     */
-   void insert_after(exec_node *after)
-   {
-      after->next = this->next;
-      after->prev = this;
-
-      this->next->prev = after;
-      this->next = after;
-   }
+   void insert_after(exec_node *after);
    /**
     * Insert a node in the list before the current node
     */
-   void insert_before(exec_node *before)
-   {
-      before->next = this;
-      before->prev = this->prev;
-
-      this->prev->next = before;
-      this->prev = before;
-   }
+   void insert_before(exec_node *before);
 
    /**
     * Insert another list in the list before the current node
@@ -157,33 +119,165 @@ struct exec_node {
    /**
     * Replace the current node with the given node.
     */
-   void replace_with(exec_node *replacement)
-   {
-      replacement->prev = this->prev;
-      replacement->next = this->next;
-
-      this->prev->next = replacement;
-      this->next->prev = replacement;
-   }
+   void replace_with(exec_node *replacement);
 
    /**
     * Is this the sentinel at the tail of the list?
     */
-   bool is_tail_sentinel() const
-   {
-      return this->next == NULL;
-   }
+   bool is_tail_sentinel() const;
 
    /**
     * Is this the sentinel at the head of the list?
     */
-   bool is_head_sentinel() const
-   {
-      return this->prev == NULL;
-   }
+   bool is_head_sentinel() const;
 #endif
 };
 
+static inline void
+exec_node_init(struct exec_node *n)
+{
+   n->next = NULL;
+   n->prev = NULL;
+}
+
+static inline const struct exec_node *
+exec_node_get_next_const(const struct exec_node *n)
+{
+   return n->next;
+}
+
+static inline struct exec_node *
+exec_node_get_next(struct exec_node *n)
+{
+   return n->next;
+}
+
+static inline const struct exec_node *
+exec_node_get_prev_const(const struct exec_node *n)
+{
+   return n->prev;
+}
+
+static inline struct exec_node *
+exec_node_get_prev(struct exec_node *n)
+{
+   return n->prev;
+}
+
+static inline void
+exec_node_remove(struct exec_node *n)
+{
+   n->next->prev = n->prev;
+   n->prev->next = n->next;
+   n->next = NULL;
+   n->prev = NULL;
+}
+
+static inline void
+exec_node_self_link(struct exec_node *n)
+{
+   n->next = n;
+   n->prev = n;
+}
+
+static inline void
+exec_node_insert_after(struct exec_node *n, struct exec_node *after)
+{
+   after->next = n->next;
+   after->prev = n;
+
+   n->next->prev = after;
+   n->next = after;
+}
+
+static inline void
+exec_node_insert_node_before(struct exec_node *n, struct exec_node *before)
+{
+   before->next = n;
+   before->prev = n->prev;
+
+   n->prev->next = before;
+   n->prev = before;
+}
+
+static inline void
+exec_node_replace_with(struct exec_node *n, struct exec_node *replacement)
+{
+   replacement->prev = n->prev;
+   replacement->next = n->next;
+
+   n->prev->next = replacement;
+   n->next->prev = replacement;
+}
+
+static inline bool
+exec_node_is_tail_sentinel(const struct exec_node *n)
+{
+   return n->next == NULL;
+}
+
+static inline bool
+exec_node_is_head_sentinel(const struct exec_node *n)
+{
+   return n->prev == NULL;
+}
+
+#ifdef __cplusplus
+inline const exec_node *exec_node::get_next() const
+{
+   return exec_node_get_next_const(this);
+}
+
+inline exec_node *exec_node::get_next()
+{
+   return exec_node_get_next(this);
+}
+
+inline const exec_node *exec_node::get_prev() const
+{
+   return exec_node_get_prev_const(this);
+}
+
+inline exec_node *exec_node::get_prev()
+{
+   return exec_node_get_prev(this);
+}
+
+inline void exec_node::remove()
+{
+   exec_node_remove(this);
+}
+
+inline void exec_node::self_link()
+{
+   exec_node_self_link(this);
+}
+
+inline void exec_node::insert_after(exec_node *after)
+{
+   exec_node_insert_after(this, after);
+}
+
+inline void exec_node::insert_before(exec_node *before)
+{
+   exec_node_insert_node_before(this, before);
+}
+
+inline void exec_node::replace_with(exec_node *replacement)
+{
+   exec_node_replace_with(this, replacement);
+}
+
+inline bool exec_node::is_tail_sentinel() const
+{
+   return exec_node_is_tail_sentinel(this);
+}
+
+inline bool exec_node::is_head_sentinel() const
+{
+   return exec_node_is_head_sentinel(this);
+}
+#endif
 
 #ifdef __cplusplus
 /* This macro will not work correctly if `t' uses virtual inheritance.  If you
@@ -229,75 +323,19 @@ struct exec_list {
       make_empty();
    }
 
-   void make_empty()
-   {
-      head = (exec_node *) & tail;
-      tail = NULL;
-      tail_pred = (exec_node *) & head;
-   }
-
-   bool is_empty() const
-   {
-      /* There are three ways to test whether a list is empty or not.
-       *
-       * - Check to see if the \c head points to the \c tail.
-       * - Check to see if the \c tail_pred points to the \c head.
-       * - Check to see if the \c head is the sentinel node by test whether its
-       *   \c next pointer is \c NULL.
-       *
-       * The first two methods tend to generate better code on modern systems
-       * because they save a pointer dereference.
-       */
-      return head == (exec_node *) &tail;
-   }
-
-   const exec_node *get_head() const
-   {
-      return !is_empty() ? head : NULL;
-   }
-
-   exec_node *get_head()
-   {
-      return !is_empty() ? head : NULL;
-   }
-
-   const exec_node *get_tail() const
-   {
-      return !is_empty() ? tail_pred : NULL;
-   }
-
-   exec_node *get_tail()
-   {
-      return !is_empty() ? tail_pred : NULL;
-   }
+   void make_empty();
 
-   void push_head(exec_node *n)
-   {
-      n->next = head;
-      n->prev = (exec_node *) &head;
+   bool is_empty() const;
 
-      n->next->prev = n;
-      head = n;
-   }
+   const exec_node *get_head() const;
+   exec_node *get_head();
 
-   void push_tail(exec_node *n)
-   {
-      n->next = (exec_node *) &tail;
-      n->prev = tail_pred;
+   const exec_node *get_tail() const;
+   exec_node *get_tail();
 
-      n->prev->next = n;
-      tail_pred = n;
-   }
-
-   void push_degenerate_list_at_head(exec_node *n)
-   {
-      assert(n->prev->next == n);
-
-      n->prev->next = head;
-      head->prev = n->prev;
-      n->prev = (exec_node *) &head;
-      head = n;
-   }
+   void push_head(exec_node *n);
+   void push_tail(exec_node *n);
+   void push_degenerate_list_at_head(exec_node *n);
 
    /**
     * Remove the first node from a list and return it
@@ -307,87 +345,239 @@ struct exec_list {
     *
     * \sa exec_list::get_head
     */
-   exec_node *pop_head()
-   {
-      exec_node *const n = this->get_head();
-      if (n != NULL)
-	 n->remove();
-
-      return n;
-   }
+   exec_node *pop_head();
 
    /**
     * Move all of the nodes from this list to the target list
     */
-   void move_nodes_to(exec_list *target)
-   {
-      if (is_empty()) {
-	 target->make_empty();
-      } else {
-	 target->head = head;
-	 target->tail = NULL;
-	 target->tail_pred = tail_pred;
-
-	 target->head->prev = (exec_node *) &target->head;
-	 target->tail_pred->next = (exec_node *) &target->tail;
-
-	 make_empty();
-      }
-   }
+   void move_nodes_to(exec_list *target);
 
    /**
     * Append all nodes from the source list to the target list
     */
-   void
-   append_list(exec_list *source)
-   {
-      if (source->is_empty())
-	 return;
-
-      /* Link the first node of the source with the last node of the target list.
-       */
-      this->tail_pred->next = source->head;
-      source->head->prev = this->tail_pred;
-
-      /* Make the tail of the source list be the tail of the target list.
-       */
-      this->tail_pred = source->tail_pred;
-      this->tail_pred->next = (exec_node *) &this->tail;
-
-      /* Make the source list empty for good measure.
-       */
-      source->make_empty();
-   }
+   void append_list(exec_list *source);
 #endif
 };
 
+static inline void
+exec_list_make_empty(struct exec_list *list)
+{
+   list->head = (struct exec_node *) & list->tail;
+   list->tail = NULL;
+   list->tail_pred = (struct exec_node *) & list->head;
+}
 
-#ifdef __cplusplus
-inline void exec_node::insert_before(exec_list *before)
+static inline bool
+exec_list_is_empty(const struct exec_list *list)
+{
+   /* There are three ways to test whether a list is empty or not.
+    *
+    * - Check to see if the \c head points to the \c tail.
+    * - Check to see if the \c tail_pred points to the \c head.
+    * - Check to see if the \c head is the sentinel node by test whether its
+    *   \c next pointer is \c NULL.
+    *
+    * The first two methods tend to generate better code on modern systems
+    * because they save a pointer dereference.
+    */
+   return list->head == (struct exec_node *) &list->tail;
+}
+
+static inline const struct exec_node *
+exec_list_get_head_const(const struct exec_list *list)
+{
+   return !exec_list_is_empty(list) ? list->head : NULL;
+}
+
+static inline struct exec_node *
+exec_list_get_head(struct exec_list *list)
+{
+   return !exec_list_is_empty(list) ? list->head : NULL;
+}
+
+static inline const struct exec_node *
+exec_list_get_tail_const(const struct exec_list *list)
+{
+   return !exec_list_is_empty(list) ? list->tail_pred : NULL;
+}
+
+static inline struct exec_node *
+exec_list_get_tail(struct exec_list *list)
+{
+   return !exec_list_is_empty(list) ? list->tail_pred : NULL;
+}
+
+static inline void
+exec_list_push_head(struct exec_list *list, struct exec_node *n)
+{
+   n->next = list->head;
+   n->prev = (struct exec_node *) &list->head;
+
+   n->next->prev = n;
+   list->head = n;
+}
+
+static inline void
+exec_list_push_tail(struct exec_list *list, struct exec_node *n)
+{
+   n->next = (struct exec_node *) &list->tail;
+   n->prev = list->tail_pred;
+
+   n->prev->next = n;
+   list->tail_pred = n;
+}
+
+static inline void
+exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n)
+{
+   assert(n->prev->next == n);
+
+   n->prev->next = list->head;
+   list->head->prev = n->prev;
+   n->prev = (struct exec_node *) &list->head;
+   list->head = n;
+}
+
+static inline struct exec_node *
+exec_list_pop_head(struct exec_list *list)
+{
+   struct exec_node *const n = exec_list_get_head(list);
+   if (n != NULL)
+      exec_node_remove(n);
+
+   return n;
+}
+
+static inline void
+exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target)
+{
+   if (exec_list_is_empty(list)) {
+      exec_list_make_empty(target);
+   } else {
+      target->head = list->head;
+      target->tail = NULL;
+      target->tail_pred = list->tail_pred;
+
+      target->head->prev = (struct exec_node *) &target->head;
+      target->tail_pred->next = (struct exec_node *) &target->tail;
+
+      exec_list_make_empty(list);
+   }
+}
+
+static inline void
+exec_list_append(struct exec_list *list, struct exec_list *source)
+{
+   if (exec_list_is_empty(source))
+      return;
+
+   /* Link the first node of the source with the last node of the target list.
+    */
+   list->tail_pred->next = source->head;
+   source->head->prev = list->tail_pred;
+
+   /* Make the tail of the source list be the tail of the target list.
+    */
+   list->tail_pred = source->tail_pred;
+   list->tail_pred->next = (struct exec_node *) &list->tail;
+
+   /* Make the source list empty for good measure.
+    */
+   exec_list_make_empty(source);
+}
+
+static inline void
+exec_node_insert_list_before(struct exec_node *n, struct exec_list *before)
 {
-   if (before->is_empty())
+   if (exec_list_is_empty(before))
       return;
 
-   before->tail_pred->next = this;
-   before->head->prev = this->prev;
+   before->tail_pred->next = n;
+   before->head->prev = n->prev;
 
-   this->prev->next = before->head;
-   this->prev = before->tail_pred;
+   n->prev->next = before->head;
+   n->prev = before->tail_pred;
 
-   before->make_empty();
+   exec_list_make_empty(before);
+}
+
+#ifdef __cplusplus
+inline void exec_list::make_empty()
+{
+   exec_list_make_empty(this);
+}
+
+inline bool exec_list::is_empty() const
+{
+   return exec_list_is_empty(this);
+}
+
+inline const exec_node *exec_list::get_head() const
+{
+   return exec_list_get_head_const(this);
+}
+
+inline exec_node *exec_list::get_head()
+{
+   return exec_list_get_head(this);
+}
+
+inline const exec_node *exec_list::get_tail() const
+{
+   return exec_list_get_tail_const(this);
+}
+
+inline exec_node *exec_list::get_tail()
+{
+   return exec_list_get_tail(this);
+}
+
+inline void exec_list::push_head(exec_node *n)
+{
+   exec_list_push_head(this, n);
+}
+
+inline void exec_list::push_tail(exec_node *n)
+{
+   exec_list_push_tail(this, n);
+}
+
+inline void exec_list::push_degenerate_list_at_head(exec_node *n)
+{
+   exec_list_push_degenerate_list_at_head(this, n);
+}
+
+inline exec_node *exec_list::pop_head()
+{
+   return exec_list_pop_head(this);
+}
+
+inline void exec_list::move_nodes_to(exec_list *target)
+{
+   exec_list_move_nodes_to(this, target);
+}
+
+inline void exec_list::append_list(exec_list *source)
+{
+   exec_list_append(this, source);
+}
+
+inline void exec_node::insert_before(exec_list *before)
+{
+   exec_node_insert_list_before(this, before);
 }
 #endif
 
 /**
  * This version is safe even if the current node is removed.
  */ 
-#define foreach_list_safe(__node, __list)			     \
-   for (exec_node * __node = (__list)->head, * __next = __node->next \
-	; __next != NULL					     \
+#define foreach_list_safe(__node, __list)					\
+   for (struct exec_node * __node = (__list)->head, * __next = __node->next	\
+	; __next != NULL							\
 	; __node = __next, __next = __next->next)
 
 #define foreach_list(__node, __list)			\
-   for (exec_node * __node = (__list)->head		\
+   for (struct exec_node * __node = (__list)->head	\
 	; (__node)->next != NULL 			\
 	; (__node) = (__node)->next)
 
@@ -397,19 +587,19 @@ inline void exec_node::insert_before(exec_list *before)
  * This is safe against either current node being removed or replaced.
  */
 #define foreach_two_lists(__node1, __list1, __node2, __list2) \
-   for (exec_node * __node1 = (__list1)->head,                \
-                  * __node2 = (__list2)->head,                \
-                  * __next1 = __node1->next,                  \
-                  * __next2 = __node2->next                   \
+   for (struct exec_node * __node1 = (__list1)->head,         \
+                         * __node2 = (__list2)->head,         \
+                         * __next1 = __node1->next,           \
+                         * __next2 = __node2->next            \
 	; __next1 != NULL && __next2 != NULL                  \
 	; __node1 = __next1,                                  \
           __node2 = __next2,                                  \
           __next1 = __next1->next,                            \
           __next2 = __next2->next)
 
-#define foreach_list_const(__node, __list)		\
-   for (const exec_node * __node = (__list)->head	\
-	; (__node)->next != NULL 			\
+#define foreach_list_const(__node, __list)			\
+   for (const struct exec_node * __node = (__list)->head	\
+	; (__node)->next != NULL 				\
 	; (__node) = (__node)->next)
 
 #define foreach_list_typed(__type, __node, __field, __list)		\
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index 9d5539252..ac7514acf 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -45,7 +45,9 @@ namespace {
 
 class ir_algebraic_visitor : public ir_rvalue_visitor {
 public:
-   ir_algebraic_visitor(bool native_integers)
+   ir_algebraic_visitor(bool native_integers,
+                        const struct gl_shader_compiler_options *options)
+      : options(options)
    {
       this->progress = false;
       this->mem_ctx = NULL;
@@ -69,6 +71,7 @@ public:
    ir_rvalue *swizzle_if_required(ir_expression *expr,
 				  ir_rvalue *operand);
 
+   const struct gl_shader_compiler_options *options;
    void *mem_ctx;
 
    bool native_integers;
@@ -116,6 +119,46 @@ update_type(ir_expression *ir)
       ir->type = ir->operands[1]->type;
 }
 
+/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+static ir_expression *
+try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
+{
+   if (expr0 && expr0->operation == ir_binop_add &&
+       expr0->type->is_float() &&
+       expr1 && expr1->operation == ir_binop_add &&
+       expr1->type->is_float()) {
+      ir_swizzle *x = expr0->operands[0]->as_swizzle();
+      ir_swizzle *y = expr0->operands[1]->as_swizzle();
+      ir_swizzle *z = expr1->operands[0]->as_swizzle();
+      ir_swizzle *w = expr1->operands[1]->as_swizzle();
+
+      if (!x || x->mask.num_components != 1 ||
+          !y || y->mask.num_components != 1 ||
+          !z || z->mask.num_components != 1 ||
+          !w || w->mask.num_components != 1) {
+         return NULL;
+      }
+
+      bool swiz_seen[4] = {false, false, false, false};
+      swiz_seen[x->mask.x] = true;
+      swiz_seen[y->mask.x] = true;
+      swiz_seen[z->mask.x] = true;
+      swiz_seen[w->mask.x] = true;
+
+      if (!swiz_seen[0] || !swiz_seen[1] ||
+          !swiz_seen[2] || !swiz_seen[3]) {
+         return NULL;
+      }
+
+      if (x->val->equals(y->val) &&
+          x->val->equals(z->val) &&
+          x->val->equals(w->val)) {
+         return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
+      }
+   }
+   return NULL;
+}
+
 void
 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
 					   int op1,
@@ -329,6 +372,14 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (op_const[1] && !op_const[0])
 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
 
+      /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
+      if (options->OptimizeForAOS) {
+         ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
+                                                    mem_ctx);
+         if (expr)
+            return expr;
+      }
+
       /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
        *
        * (-x + y) * a + x
@@ -380,6 +431,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
             }
          }
       }
+
       break;
 
    case ir_binop_sub:
@@ -647,9 +699,10 @@ ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
 }
 
 bool
-do_algebraic(exec_list *instructions, bool native_integers)
+do_algebraic(exec_list *instructions, bool native_integers,
+             const struct gl_shader_compiler_options *options)
 {
-   ir_algebraic_visitor v(native_integers);
+   ir_algebraic_visitor v(native_integers, options);
 
    visit_list_elements(&v, instructions);
 
diff --git a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
index 6612592aa..50c8aa763 100644
--- a/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/mesalib/src/glsl/opt_dead_builtin_varyings.cpp
@@ -334,7 +334,7 @@ public:
    }
 
    void prepare_array(exec_list *ir,
-                      struct ir_variable **new_var,
+                      ir_variable **new_var,
                       int max_elements, unsigned start_location,
                       const char *var_name, const char *mode_str,
                       unsigned usage, unsigned external_usage)
diff --git a/mesalib/src/glsl/opt_rebalance_tree.cpp b/mesalib/src/glsl/opt_rebalance_tree.cpp
new file mode 100644
index 000000000..773aab3f6
--- /dev/null
+++ b/mesalib/src/glsl/opt_rebalance_tree.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file opt_rebalance_tree.cpp
+ *
+ * Rebalances a reduction expression tree.
+ *
+ * For reduction operations (e.g., x + y + z + w) we generate an expression
+ * tree like
+ *
+ *        +
+ *       / \
+ *      +   w
+ *     / \
+ *    +   z
+ *   / \
+ *  x   y
+ *
+ * which we can rebalance into
+ *
+ *       +
+ *      / \
+ *     /   \
+ *    +     +
+ *   / \   / \
+ *  x   y z   w
+ *
+ * to get a better instruction scheduling.
+ *
+ * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
+ * and Bette L. Warren.
+ *
+ * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
+ * explanation of the of the tree_to_vine() (rightward rotation) and
+ * vine_to_tree() (leftward rotation) algorithms.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+
+/* The DSW algorithm generates a degenerate tree (really, a linked list) in
+ * tree_to_vine(). We'd rather not leave a binary expression with only one
+ * operand, so trivial modifications (the ternary operators below) are needed
+ * to ensure that we only rotate around the ir_expression nodes of the tree.
+ */
+static unsigned
+tree_to_vine(ir_expression *root)
+{
+   unsigned size = 0;
+   ir_rvalue *vine_tail = root;
+   ir_rvalue *remainder = root->operands[1];
+
+   while (remainder != NULL) {
+      ir_expression *remainder_temp = remainder->as_expression();
+      ir_expression *remainder_left = remainder_temp ?
+         remainder_temp->operands[0]->as_expression() : NULL;
+
+      if (remainder_left == NULL) {
+         /* move vine_tail down one */
+         vine_tail = remainder;
+         remainder = remainder->as_expression() ?
+            ((ir_expression *)remainder)->operands[1] : NULL;
+         size++;
+      } else {
+         /* rotate */
+         ir_expression *tempptr = remainder_left;
+         ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
+         tempptr->operands[1] = remainder;
+         remainder = tempptr;
+         ((ir_expression *)vine_tail)->operands[1] = tempptr;
+      }
+   }
+
+   return size;
+}
+
+static void
+compression(ir_expression *root, unsigned count)
+{
+   ir_expression *scanner = root;
+
+   for (unsigned i = 0; i < count; i++) {
+      ir_expression *child = (ir_expression *)scanner->operands[1];
+      scanner->operands[1] = child->operands[1];
+      scanner = (ir_expression *)scanner->operands[1];
+      child->operands[1] = scanner->operands[0];
+      scanner->operands[0] = child;
+   }
+}
+
+static void
+vine_to_tree(ir_expression *root, unsigned size)
+{
+   int n = size - 1;
+   for (int m = n / 2; m > 0; m = n / 2) {
+      compression(root, m);
+      n -= m + 1;
+   }
+}
+
+namespace {
+
+class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
+public:
+   ir_rebalance_visitor()
+   {
+      progress = false;
+   }
+
+   void handle_rvalue(ir_rvalue **rvalue);
+
+   bool progress;
+};
+
+struct is_reduction_data {
+   ir_expression_operation operation;
+   const glsl_type *type;
+   unsigned num_expr;
+   bool is_reduction;
+   bool contains_constant;
+};
+
+} /* anonymous namespace */
+
+static bool
+is_reduction_operation(ir_expression_operation operation)
+{
+   switch (operation) {
+   case ir_binop_add:
+   case ir_binop_mul:
+   case ir_binop_bit_and:
+   case ir_binop_bit_xor:
+   case ir_binop_bit_or:
+   case ir_binop_logic_and:
+   case ir_binop_logic_xor:
+   case ir_binop_logic_or:
+   case ir_binop_min:
+   case ir_binop_max:
+      return true;
+   default:
+      return false;
+   }
+}
+
+/* Note that this function does not attempt to recognize that reduction trees
+ * are already balanced.
+ *
+ * We return false from this function for a number of reasons other than an
+ * expression tree not being a mathematical reduction. Namely,
+ *
+ *    - if the tree contains multiple constants that we may be able to combine.
+ *    - if the tree contains matrices:
+ *       - they might contain vec4's with many constant components that we can
+ *         simplify after splitting.
+ *       - applying the matrix chain ordering optimization is more than just
+ *         balancing an expression tree.
+ *    - if the tree contains operations on multiple types.
+ *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
+ *      would trick the visiting pass.
+ */
+static void
+is_reduction(ir_instruction *ir, void *data)
+{
+   struct is_reduction_data *ird = (struct is_reduction_data *)data;
+   if (!ird->is_reduction)
+      return;
+
+   /* We don't want to balance a tree that contains multiple constants, since
+    * we'll be able to constant fold them if they're not in separate subtrees.
+    */
+   if (ir->as_constant()) {
+      if (ird->contains_constant) {
+         ird->is_reduction = false;
+      }
+      ird->contains_constant = true;
+      return;
+   }
+
+   /* Array/record dereferences have subtrees that are not part of the expr
+    * tree we're balancing. Skip trees containing them.
+    */
+   if (ir->ir_type == ir_type_dereference_array ||
+       ir->ir_type == ir_type_dereference_record) {
+      ird->is_reduction = false;
+      return;
+   }
+
+   ir_expression *expr = ir->as_expression();
+   if (!expr)
+      return;
+
+   /* Non-constant matrices might still contain constant vec4 that we can
+    * constant fold once split up. Handling matrices will need some more
+    * work.
+    */
+   if (expr->type->is_matrix()) {
+      ird->is_reduction = false;
+      return;
+   }
+
+   if (ird->type != NULL && ird->type != expr->type) {
+      ird->is_reduction = false;
+      return;
+   }
+   ird->type = expr->type;
+
+   ird->num_expr++;
+   if (is_reduction_operation(expr->operation)) {
+      if (ird->operation != 0 && ird->operation != expr->operation)
+         ird->is_reduction = false;
+      ird->operation = expr->operation;
+   } else {
+      ird->is_reduction = false;
+   }
+}
+
+static ir_rvalue *
+handle_expression(ir_expression *expr)
+{
+   struct is_reduction_data ird;
+   ird.operation = (ir_expression_operation)0;
+   ird.type = NULL;
+   ird.num_expr = 0;
+   ird.is_reduction = true;
+   ird.contains_constant = false;
+
+   visit_tree(expr, is_reduction, (void *)&ird);
+
+   if (ird.is_reduction && ird.num_expr > 2) {
+      ir_constant z = ir_constant(0.0f);
+      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
+
+      unsigned size = tree_to_vine(&pseudo_root);
+      vine_to_tree(&pseudo_root, size);
+
+      expr = (ir_expression *)pseudo_root.operands[1];
+   }
+   return expr;
+}
+
+void
+ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+   if (!*rvalue)
+      return;
+
+   ir_expression *expr = (*rvalue)->as_expression();
+   if (!expr || !is_reduction_operation(expr->operation))
+      return;
+
+   ir_rvalue *new_rvalue = handle_expression(expr);
+
+   /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
+    * or some other set of cases) new_rvalue will point to the same root as
+    * before.
+    *
+    * Similarly, if the tree rooted at *rvalue was a reduction and was already
+    * balanced, the algorithm will rearrange the tree but will ultimately
+    * return an identical tree, so this check will handle that as well and
+    * will not set progress = true.
+    */
+   if (new_rvalue == *rvalue)
+      return;
+
+   *rvalue = new_rvalue;
+   this->progress = true;
+}
+
+bool
+do_rebalance_tree(exec_list *instructions)
+{
+   ir_rebalance_visitor v;
+
+   v.run(instructions);
+
+   return v.progress;
+}
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 6c25010b7..809732c7e 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -98,6 +98,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
    ctx->Extensions.ARB_ES3_compatibility = true;
    ctx->Extensions.ARB_explicit_attrib_location = true;
    ctx->Extensions.ARB_fragment_coord_conventions = true;
+   ctx->Extensions.ARB_fragment_layer_viewport = true;
    ctx->Extensions.ARB_gpu_shader5 = true;
    ctx->Extensions.ARB_sample_shading = true;
    ctx->Extensions.ARB_shader_bit_encoding = true;
diff --git a/mesalib/src/glsl/test_optpass.cpp b/mesalib/src/glsl/test_optpass.cpp
index db5cb2662..e4878bf15 100644
--- a/mesalib/src/glsl/test_optpass.cpp
+++ b/mesalib/src/glsl/test_optpass.cpp
@@ -65,7 +65,7 @@ do_optimization(struct exec_list *ir, const char *optimization,
    if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) {
       return do_common_optimization(ir, int_0 != 0, false, options, true);
    } else if (strcmp(optimization, "do_algebraic") == 0) {
-      return do_algebraic(ir, true);
+      return do_algebraic(ir, true, options);
    } else if (strcmp(optimization, "do_constant_folding") == 0) {
       return do_constant_folding(ir);
    } else if (strcmp(optimization, "do_constant_variable") == 0) {