51 files changed, 2033 insertions, 445 deletions
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am
index b466a3b5c..23c6fe8bb 100644
--- a/mesalib/src/glsl/Makefile.am
+++ b/mesalib/src/glsl/Makefile.am
@@ -46,6 +46,7 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README	\
 	glcpp/glcpp-lex.l				\
 	glcpp/glcpp-parse.y				\
 	nir/nir_algebraic.py				\
+	nir/nir_builder_opcodes_h.py			\
 	nir/nir_constant_expressions.py			\
 	nir/nir_opcodes.py				\
 	nir/nir_opcodes_c.py				\
@@ -67,7 +68,7 @@ TESTS_ENVIRONMENT= \
 	export PYTHON2=$(PYTHON2); \
 	export PYTHON_FLAGS=$(PYTHON_FLAGS);
 
-noinst_LTLIBRARIES = libglsl.la libglcpp.la
+noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la
 check_PROGRAMS =					\
 	glcpp/glcpp					\
 	glsl_test					\
@@ -147,6 +148,12 @@ libglsl_la_SOURCES =					\
 	$(LIBGLSL_FILES)				\
 	$(NIR_FILES)
 
+libnir_la_SOURCES =					\
+	glsl_types.cpp					\
+	builtin_types.cpp				\
+	glsl_symbol_table.cpp				\
+	$(NIR_FILES)
+
 glsl_compiler_SOURCES = \
 	$(GLSL_COMPILER_CXX_FILES)
 
@@ -251,8 +258,6 @@ nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
 	$(MKDIR_P) nir;							\
 	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
 
-nir/nir.h: nir/nir_opcodes.h
-
 nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
 	$(MKDIR_P) nir;							\
 	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index b876642e8..c471eca23 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -22,6 +22,7 @@ NIR_FILES = \
 	nir/glsl_to_nir.h \
 	nir/nir.c \
 	nir/nir.h \
+	nir/nir_builder.h \
 	nir/nir_constant_expressions.h \
 	nir/nir_dominance.c \
 	nir/nir_from_ssa.c \
@@ -32,21 +33,25 @@ NIR_FILES = \
 	nir/nir_lower_atomics.c \
 	nir/nir_lower_global_vars_to_local.c \
 	nir/nir_lower_locals_to_regs.c \
+	nir/nir_lower_idiv.c \
 	nir/nir_lower_io.c \
 	nir/nir_lower_phis_to_scalar.c \
 	nir/nir_lower_samplers.cpp \
 	nir/nir_lower_system_values.c \
+	nir/nir_lower_tex_projector.c \
 	nir/nir_lower_to_source_mods.c \
 	nir/nir_lower_vars_to_ssa.c \
 	nir/nir_lower_var_copies.c \
 	nir/nir_lower_vec_to_movs.c \
 	nir/nir_metadata.c \
+	nir/nir_normalize_cubemap_coords.c \
 	nir/nir_opt_constant_folding.c \
 	nir/nir_opt_copy_propagate.c \
 	nir/nir_opt_cse.c \
 	nir/nir_opt_dce.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_peephole_ffma.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
 	nir/nir_print.c \
@@ -54,9 +59,11 @@ NIR_FILES = \
 	nir/nir_search.c \
 	nir/nir_search.h \
 	nir/nir_split_var_copies.c \
+	nir/nir_sweep.c \
 	nir/nir_to_ssa.c \
 	nir/nir_types.h \
 	nir/nir_validate.c \
+	nir/nir_vla.h \
 	nir/nir_worklist.c \
 	nir/nir_worklist.h \
 	nir/nir_types.cpp \
@@ -183,7 +190,8 @@ LIBGLSL_FILES = \
 	opt_vectorize.cpp \
 	program.h \
 	s_expression.cpp \
-	s_expression.h
+	s_expression.h \
+	shader_enums.h
 
 # glsl_compiler
 
diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp
index 918be6966..87df93e68 100644
--- a/mesalib/src/glsl/ast_function.cpp
+++ b/mesalib/src/glsl/ast_function.cpp
@@ -1370,71 +1370,59 @@ emit_inline_matrix_constructor(const glsl_type *type,
    } else {
       const unsigned cols = type->matrix_columns;
       const unsigned rows = type->vector_elements;
+      unsigned remaining_slots = rows * cols;
       unsigned col_idx = 0;
       unsigned row_idx = 0;
 
       foreach_in_list(ir_rvalue, rhs, parameters) {
-	 const unsigned components_remaining_this_column = rows - row_idx;
-	 unsigned rhs_components = rhs->type->components();
-	 unsigned rhs_base = 0;
-
-	 /* Since the parameter might be used in the RHS of two assignments,
-	  * generate a temporary and copy the paramter there.
-	  */
-	 ir_variable *rhs_var =
-	    new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary);
-	 instructions->push_tail(rhs_var);
-
-	 ir_dereference *rhs_var_ref =
-	    new(ctx) ir_dereference_variable(rhs_var);
-	 ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL);
-	 instructions->push_tail(inst);
-
-	 /* Assign the current parameter to as many components of the matrix
-	  * as it will fill.
-	  *
-	  * NOTE: A single vector parameter can span two matrix columns.  A
-	  * single vec4, for example, can completely fill a mat2.
-	  */
-	 if (rhs_components >= components_remaining_this_column) {
-	    const unsigned count = MIN2(rhs_components,
-					components_remaining_this_column);
-
-	    rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
-
-	    ir_instruction *inst = assign_to_matrix_column(var, col_idx,
-							   row_idx,
-							   rhs_var_ref, 0,
-							   count, ctx);
-	    instructions->push_tail(inst);
-
-	    rhs_base = count;
-
-	    col_idx++;
-	    row_idx = 0;
-	 }
-
-	 /* If there is data left in the parameter and components left to be
-	  * set in the destination, emit another assignment.  It is possible
-	  * that the assignment could be of a vec4 to the last element of the
-	  * matrix.  In this case col_idx==cols, but there is still data
-	  * left in the source parameter.  Obviously, don't emit an assignment
-	  * to data outside the destination matrix.
-	  */
-	 if ((col_idx < cols) && (rhs_base < rhs_components)) {
-	    const unsigned count = rhs_components - rhs_base;
-
-	    rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
-
-	    ir_instruction *inst = assign_to_matrix_column(var, col_idx,
-							   row_idx,
-							   rhs_var_ref,
-							   rhs_base,
-							   count, ctx);
-	    instructions->push_tail(inst);
-
-	    row_idx += count;
-	 }
+         unsigned rhs_components = rhs->type->components();
+         unsigned rhs_base = 0;
+
+         if (remaining_slots == 0)
+            break;
+
+         /* Since the parameter might be used in the RHS of two assignments,
+          * generate a temporary and copy the paramter there.
+          */
+         ir_variable *rhs_var =
+            new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary);
+         instructions->push_tail(rhs_var);
+
+         ir_dereference *rhs_var_ref =
+            new(ctx) ir_dereference_variable(rhs_var);
+         ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL);
+         instructions->push_tail(inst);
+
+         do {
+            /* Assign the current parameter to as many components of the matrix
+             * as it will fill.
+             *
+             * NOTE: A single vector parameter can span two matrix columns.  A
+             * single vec4, for example, can completely fill a mat2.
+             */
+            unsigned count = MIN2(rows - row_idx,
+                                  rhs_components - rhs_base);
+
+            rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var);
+            ir_instruction *inst = assign_to_matrix_column(var, col_idx,
+                                                         row_idx,
+                                                         rhs_var_ref,
+                                                         rhs_base,
+                                                         count, ctx);
+            instructions->push_tail(inst);
+            rhs_base += count;
+            row_idx += count;
+            remaining_slots -= count;
+
+            /* Sometimes, there is still data left in the parameters and
+             * components left to be set in the destination but in other
+             * column.
+             */
+            if (row_idx >= rows) {
+               row_idx = 0;
+               col_idx++;
+            }
+         } while(remaining_slots > 0 && rhs_base < rhs_components);
       }
    }
 
@@ -1791,7 +1779,7 @@ ast_function_expression::hir(exec_list *instructions,
       return value;
    }
 
-   return ir_rvalue::error_value(ctx);
+   unreachable("not reached");
 }
 
 ir_rvalue *
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index d387b2e35..78369360f 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -375,66 +375,14 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
       if (type_a == type_b)
          return type_a;
    } else {
-      if (type_a->is_matrix() && type_b->is_matrix()) {
-         /* Matrix multiply.  The columns of A must match the rows of B.  Given
-          * the other previously tested constraints, this means the vector type
-          * of a row from A must be the same as the vector type of a column from
-          * B.
-          */
-         if (type_a->row_type() == type_b->column_type()) {
-            /* The resulting matrix has the number of columns of matrix B and
-             * the number of rows of matrix A.  We get the row count of A by
-             * looking at the size of a vector that makes up a column.  The
-             * transpose (size of a row) is done for B.
-             */
-            const glsl_type *const type =
-               glsl_type::get_instance(type_a->base_type,
-                                       type_a->column_type()->vector_elements,
-                                       type_b->row_type()->vector_elements);
-            assert(type != glsl_type::error_type);
-
-            return type;
-         }
-      } else if (type_a->is_matrix()) {
-         /* A is a matrix and B is a column vector.  Columns of A must match
-          * rows of B.  Given the other previously tested constraints, this
-          * means the vector type of a row from A must be the same as the
-          * vector the type of B.
-          */
-         if (type_a->row_type() == type_b) {
-            /* The resulting vector has a number of elements equal to
-             * the number of rows of matrix A. */
-            const glsl_type *const type =
-               glsl_type::get_instance(type_a->base_type,
-                                       type_a->column_type()->vector_elements,
-                                       1);
-            assert(type != glsl_type::error_type);
-
-            return type;
-         }
-      } else {
-         assert(type_b->is_matrix());
+      const glsl_type *type = glsl_type::get_mul_type(type_a, type_b);
 
-         /* A is a row vector and B is a matrix.  Columns of A must match rows
-          * of B.  Given the other previously tested constraints, this means
-          * the type of A must be the same as the vector type of a column from
-          * B.
-          */
-         if (type_a == type_b->column_type()) {
-            /* The resulting vector has a number of elements equal to
-             * the number of columns of matrix B. */
-            const glsl_type *const type =
-               glsl_type::get_instance(type_a->base_type,
-                                       type_b->row_type()->vector_elements,
-                                       1);
-            assert(type != glsl_type::error_type);
-
-            return type;
-         }
+      if (type == glsl_type::error_type) {
+         _mesa_glsl_error(loc, state,
+                          "size mismatch for matrix multiplication");
       }
 
-      _mesa_glsl_error(loc, state, "size mismatch for matrix multiplication");
-      return glsl_type::error_type;
+      return type;
    }
 
 
@@ -5776,6 +5724,9 @@ ast_interface_block::hir(exec_list *instructions,
       var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
          ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
 
+      if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+         var->data.read_only = true;
+
       if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in)
          handle_geometry_shader_input_decl(state, loc, var);
 
@@ -5816,6 +5767,9 @@ ast_interface_block::hir(exec_list *instructions,
          var->data.sample = fields[i].sample;
          var->init_interface_type(block_type);
 
+         if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
+            var->data.read_only = true;
+
          if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) {
             var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
                ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp
index c6075722c..524b8d6e8 100644
--- a/mesalib/src/glsl/builtin_functions.cpp
+++ b/mesalib/src/glsl/builtin_functions.cpp
@@ -60,7 +60,7 @@
 #include "ir_builder.h"
 #include "glsl_parser_extras.h"
 #include "program/prog_instruction.h"
-#include <limits>
+#include <math.h>
 
 #define M_PIf   ((float) M_PI)
 #define M_PI_2f ((float) M_PI_2)
@@ -3215,7 +3215,7 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
 
    ir_constant_data infinities;
    for (int i = 0; i < type->vector_elements; i++) {
-      infinities.f[i] = std::numeric_limits<float>::infinity();
+      infinities.f[i] = INFINITY;
    }
 
    body.emit(ret(equal(abs(x), imm(type, infinities))));
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 8dc3d106b..2785ed168 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -36,14 +36,13 @@ static int classify_identifier(struct _mesa_glsl_parse_state *, const char *);
 
 #define YY_USER_ACTION						\
    do {								\
-      yylloc->source = 0;					\
       yylloc->first_column = yycolumn + 1;			\
       yylloc->first_line = yylloc->last_line = yylineno + 1;	\
       yycolumn += yyleng;					\
       yylloc->last_column = yycolumn + 1;			\
    } while(0);
 
-#define YY_USER_INIT yylineno = 0; yycolumn = 0;
+#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0;
 
 /* A macro for handling reserved words and keywords across language versions.
  *
@@ -188,6 +187,15 @@ HASH		^{SPC}#{SPC}
 				    * one-based.
 				    */
 				   yylineno = strtol(ptr, &ptr, 0) - 1;
+
+                                   /* From GLSL 3.30 and GLSL ES on, after processing the
+                                    * line directive (including its new-line), the implementation
+                                    * will behave as if it is compiling at the line number passed
+                                    * as argument. It was line number + 1 in older specifications.
+                                    */
+                                   if (yyextra->is_version(330, 100))
+                                      yylineno--;
+
 				   yylloc->source = strtol(ptr, NULL, 0);
 				}
 {HASH}line{SPCP}{INT}{SPC}$	{
@@ -203,6 +211,14 @@ HASH		^{SPC}#{SPC}
 				    * one-based.
 				    */
 				   yylineno = strtol(ptr, &ptr, 0) - 1;
+
+                                   /* From GLSL 3.30 and GLSL ES on, after processing the
+                                    * line directive (including its new-line), the implementation
+                                    * will behave as if it is compiling at the line number passed
+                                    * as argument. It was line number + 1 in older specifications.
+                                    */
+                                   if (yyextra->is_version(330, 100))
+                                      yylineno--;
 				}
 ^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) {
 				  BEGIN PP;
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index 79624bc26..0aa3c54fc 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -73,8 +73,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->uses_builtin_functions = false;
 
    /* Set default language version and extensions */
-   this->language_version = ctx->Const.ForceGLSLVersion ?
-                            ctx->Const.ForceGLSLVersion : 110;
+   this->language_version = 110;
+   this->forced_language_version = ctx->Const.ForceGLSLVersion;
    this->es_shader = false;
    this->ARB_texture_rectangle_enable = true;
 
@@ -320,11 +320,14 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version,
       this->ARB_texture_rectangle_enable = false;
    }
 
-   this->language_version = version;
+   if (this->forced_language_version)
+      this->language_version = this->forced_language_version;
+   else
+      this->language_version = version;
 
    bool supported = false;
    for (unsigned i = 0; i < this->num_supported_versions; i++) {
-      if (this->supported_versions[i].ver == (unsigned) version
+      if (this->supported_versions[i].ver == this->language_version
           && this->supported_versions[i].es == this->es_shader) {
          supported = true;
          break;
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 0975c86ed..dae7864fd 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -105,8 +105,10 @@ struct _mesa_glsl_parse_state {
    {
       unsigned required_version = this->es_shader ?
          required_glsl_es_version : required_glsl_version;
+      unsigned this_version = this->forced_language_version
+         ? this->forced_language_version : this->language_version;
       return required_version != 0
-         && this->language_version >= required_version;
+         && this_version >= required_version;
    }
 
    bool check_version(unsigned required_glsl_version,
@@ -226,6 +228,7 @@ struct _mesa_glsl_parse_state {
 
    bool es_shader;
    unsigned language_version;
+   unsigned forced_language_version;
    gl_shader_stage stage;
 
    /**
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index 38b37a6a9..4aa36a794 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -825,6 +825,73 @@ glsl_type::get_interface_instance(const glsl_struct_field *fields,
 
 
 const glsl_type *
+glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
+{
+   if (type_a == type_b) {
+      return type_a;
+   } else if (type_a->is_matrix() && type_b->is_matrix()) {
+      /* Matrix multiply.  The columns of A must match the rows of B.  Given
+       * the other previously tested constraints, this means the vector type
+       * of a row from A must be the same as the vector type of a column from
+       * B.
+       */
+      if (type_a->row_type() == type_b->column_type()) {
+         /* The resulting matrix has the number of columns of matrix B and
+          * the number of rows of matrix A.  We get the row count of A by
+          * looking at the size of a vector that makes up a column.  The
+          * transpose (size of a row) is done for B.
+          */
+         const glsl_type *const type =
+            get_instance(type_a->base_type,
+                         type_a->column_type()->vector_elements,
+                         type_b->row_type()->vector_elements);
+         assert(type != error_type);
+
+         return type;
+      }
+   } else if (type_a->is_matrix()) {
+      /* A is a matrix and B is a column vector.  Columns of A must match
+       * rows of B.  Given the other previously tested constraints, this
+       * means the vector type of a row from A must be the same as the
+       * vector the type of B.
+       */
+      if (type_a->row_type() == type_b) {
+         /* The resulting vector has a number of elements equal to
+          * the number of rows of matrix A. */
+         const glsl_type *const type =
+            get_instance(type_a->base_type,
+                         type_a->column_type()->vector_elements,
+                         1);
+         assert(type != error_type);
+
+         return type;
+      }
+   } else {
+      assert(type_b->is_matrix());
+
+      /* A is a row vector and B is a matrix.  Columns of A must match rows
+       * of B.  Given the other previously tested constraints, this means
+       * the type of A must be the same as the vector type of a column from
+       * B.
+       */
+      if (type_a == type_b->column_type()) {
+         /* The resulting vector has a number of elements equal to
+          * the number of columns of matrix B. */
+         const glsl_type *const type =
+            get_instance(type_a->base_type,
+                         type_b->row_type()->vector_elements,
+                         1);
+         assert(type != error_type);
+
+         return type;
+      }
+   }
+
+   return error_type;
+}
+
+
+const glsl_type *
 glsl_type::field_type(const char *name) const
 {
    if (this->base_type != GLSL_TYPE_STRUCT
@@ -1077,15 +1144,6 @@ glsl_type::std140_base_alignment(bool row_major) const
       return base_alignment;
    }
 
-   /* A sampler may never occur in a UBO (without bindless of some sort),
-    * however it is convenient to use this alignment function even with
-    * regular uniforms. This allows use of this function on uniform structs
-    * that contain samplers.
-    */
-   if (this->is_sampler()) {
-      return 0;
-   }
-
    assert(!"not reached");
    return -1;
 }
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 7359e9476..d383dd5be 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -276,6 +276,12 @@ struct glsl_type {
 						  const char *block_name);
 
    /**
+    * Get the type resulting from a multiplication of \p type_a * \p type_b
+    */
+   static const glsl_type *get_mul_type(const glsl_type *type_a,
+                                        const glsl_type *type_b);
+
+   /**
     * Query the total number of scalars that make up a scalar, vector or matrix
     */
    unsigned components() const
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index 54656f899..9e3238552 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -240,8 +240,6 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_round_even:
    case ir_unop_sin:
    case ir_unop_cos:
-   case ir_unop_sin_reduced:
-   case ir_unop_cos_reduced:
    case ir_unop_dFdx:
    case ir_unop_dFdx_coarse:
    case ir_unop_dFdx_fine:
@@ -380,10 +378,12 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
       } else if (op1->type->is_scalar()) {
 	 this->type = op0->type;
       } else {
-	 /* FINISHME: matrix types */
-	 assert(!op0->type->is_matrix() && !op1->type->is_matrix());
-	 assert(op0->type == op1->type);
-	 this->type = op0->type;
+         if (this->operation == ir_binop_mul) {
+            this->type = glsl_type::get_mul_type(op0->type, op1->type);
+         } else {
+            assert(op0->type == op1->type);
+            this->type = op0->type;
+         }
       }
       break;
 
@@ -540,8 +540,6 @@ static const char *const operator_strs[] = {
    "round_even",
    "sin",
    "cos",
-   "sin_reduced",
-   "cos_reduced",
    "dFdx",
    "dFdxCoarse",
    "dFdxFine",
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index fdc22edf1..fab1cd2d2 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -109,6 +109,31 @@ public:
    virtual ir_instruction *clone(void *mem_ctx,
 				 struct hash_table *ht) const = 0;
 
+   bool is_rvalue() const
+   {
+      return ir_type == ir_type_dereference_array ||
+             ir_type == ir_type_dereference_record ||
+             ir_type == ir_type_dereference_variable ||
+             ir_type == ir_type_constant ||
+             ir_type == ir_type_expression ||
+             ir_type == ir_type_swizzle ||
+             ir_type == ir_type_texture;
+   }
+
+   bool is_dereference() const
+   {
+      return ir_type == ir_type_dereference_array ||
+             ir_type == ir_type_dereference_record ||
+             ir_type == ir_type_dereference_variable;
+   }
+
+   bool is_jump() const
+   {
+      return ir_type == ir_type_loop_jump ||
+             ir_type == ir_type_return ||
+             ir_type == ir_type_discard;
+   }
+
    /**
     * \name IR instruction downcast functions
     *
@@ -117,45 +142,33 @@ public:
     * Additional downcast functions will be added as needed.
     */
    /*@{*/
-   class ir_rvalue *as_rvalue()
-   {
-      assume(this != NULL);
-      if (ir_type == ir_type_dereference_array ||
-          ir_type == ir_type_dereference_record ||
-          ir_type == ir_type_dereference_variable ||
-          ir_type == ir_type_constant ||
-          ir_type == ir_type_expression ||
-          ir_type == ir_type_swizzle ||
-          ir_type == ir_type_texture)
-         return (class ir_rvalue *) this;
-      return NULL;
-   }
-
-   class ir_dereference *as_dereference()
-   {
-      assume(this != NULL);
-      if (ir_type == ir_type_dereference_array ||
-          ir_type == ir_type_dereference_record ||
-          ir_type == ir_type_dereference_variable)
-         return (class ir_dereference *) this;
-      return NULL;
-   }
-
-   class ir_jump *as_jump()
-   {
-      assume(this != NULL);
-      if (ir_type == ir_type_loop_jump ||
-          ir_type == ir_type_return ||
-          ir_type == ir_type_discard)
-         return (class ir_jump *) this;
-      return NULL;
-   }
+   #define AS_BASE(TYPE)                                \
+   class ir_##TYPE *as_##TYPE()                         \
+   {                                                    \
+      assume(this != NULL);                             \
+      return is_##TYPE() ? (ir_##TYPE *) this : NULL;   \
+   }                                                    \
+   const class ir_##TYPE *as_##TYPE() const             \
+   {                                                    \
+      assume(this != NULL);                             \
+      return is_##TYPE() ? (ir_##TYPE *) this : NULL;   \
+   }
+
+   AS_BASE(rvalue)
+   AS_BASE(dereference)
+   AS_BASE(jump)
+   #undef AS_BASE
 
    #define AS_CHILD(TYPE) \
    class ir_##TYPE * as_##TYPE() \
    { \
       assume(this != NULL);                                         \
       return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \
+   }                                                                      \
+   const class ir_##TYPE * as_##TYPE() const                              \
+   {                                                                      \
+      assume(this != NULL);                                               \
+      return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \
    }
    AS_CHILD(variable)
    AS_CHILD(function)
@@ -183,7 +196,8 @@ public:
     * in particular.  No support for other instruction types (assignments,
     * jumps, calls, etc.) is planned.
     */
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
 protected:
    ir_instruction(enum ir_node_type t)
@@ -1300,8 +1314,6 @@ enum ir_expression_operation {
    /*@{*/
    ir_unop_sin,
    ir_unop_cos,
-   ir_unop_sin_reduced,    /**< Reduced range sin. [-pi, pi] */
-   ir_unop_cos_reduced,    /**< Reduced range cos. [-pi, pi] */
    /*@}*/
 
    /**
@@ -1598,7 +1610,8 @@ public:
     */
    ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const;
 
@@ -1909,7 +1922,8 @@ public:
 
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    /**
     * Return a string representing the ir_texture_opcode.
@@ -2010,7 +2024,8 @@ public:
 
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    bool is_lvalue() const
    {
@@ -2063,7 +2078,8 @@ public:
 
    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    /**
     * Get the variable that is ultimately referenced by an r-value
@@ -2109,7 +2125,8 @@ public:
 
    virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    /**
     * Get the variable that is ultimately referenced by an r-value
@@ -2219,7 +2236,8 @@ public:
 
    virtual ir_visitor_status accept(ir_hierarchical_visitor *);
 
-   virtual bool equals(ir_instruction *ir, enum ir_node_type ignore = ir_type_unset);
+   virtual bool equals(const ir_instruction *ir,
+                       enum ir_node_type ignore = ir_type_unset) const;
 
    /**
     * Get a particular component of a constant as a specific type
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index ecebc3cdc..171b8e954 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -781,7 +781,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_sin:
-   case ir_unop_sin_reduced:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 data.f[c] = sinf(op[0]->value.f[c]);
@@ -789,7 +788,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_cos:
-   case ir_unop_cos_reduced:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 data.f[c] = cosf(op[0]->value.f[c]);
diff --git a/mesalib/src/glsl/ir_equals.cpp b/mesalib/src/glsl/ir_equals.cpp
index 65376cd94..cc1964eef 100644
--- a/mesalib/src/glsl/ir_equals.cpp
+++ b/mesalib/src/glsl/ir_equals.cpp
@@ -28,7 +28,8 @@
  * can't access a's vtable in that case.
  */
 static bool
-possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ignore)
+possibly_null_equals(const ir_instruction *a, const ir_instruction *b,
+                     enum ir_node_type ignore)
 {
    if (!a || !b)
       return !a && !b;
@@ -41,13 +42,13 @@ possibly_null_equals(ir_instruction *a, ir_instruction *b, enum ir_node_type ign
  * about.
  */
 bool
-ir_instruction::equals(ir_instruction *, enum ir_node_type)
+ir_instruction::equals(const ir_instruction *, enum ir_node_type) const
 {
    return false;
 }
 
 bool
-ir_constant::equals(ir_instruction *ir, enum ir_node_type)
+ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
 {
    const ir_constant *other = ir->as_constant();
    if (!other)
@@ -65,7 +66,8 @@ ir_constant::equals(ir_instruction *ir, enum ir_node_type)
 }
 
 bool
-ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type)
+ir_dereference_variable::equals(const ir_instruction *ir,
+                                enum ir_node_type) const
 {
    const ir_dereference_variable *other = ir->as_dereference_variable();
    if (!other)
@@ -75,7 +77,8 @@ ir_dereference_variable::equals(ir_instruction *ir, enum ir_node_type)
 }
 
 bool
-ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_dereference_array::equals(const ir_instruction *ir,
+                             enum ir_node_type ignore) const
 {
    const ir_dereference_array *other = ir->as_dereference_array();
    if (!other)
@@ -94,7 +97,8 @@ ir_dereference_array::equals(ir_instruction *ir, enum ir_node_type ignore)
 }
 
 bool
-ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_swizzle::equals(const ir_instruction *ir,
+                   enum ir_node_type ignore) const
 {
    const ir_swizzle *other = ir->as_swizzle();
    if (!other)
@@ -116,7 +120,7 @@ ir_swizzle::equals(ir_instruction *ir, enum ir_node_type ignore)
 }
 
 bool
-ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
 {
    const ir_texture *other = ir->as_texture();
    if (!other)
@@ -179,7 +183,7 @@ ir_texture::equals(ir_instruction *ir, enum ir_node_type ignore)
 }
 
 bool
-ir_expression::equals(ir_instruction *ir, enum ir_node_type ignore)
+ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const
 {
    const ir_expression *other = ir->as_expression();
    if (!other)
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 7a7688cb2..cfe0df3dc 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -334,8 +334,6 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
    case ir_unop_sin:
    case ir_unop_cos:
-   case ir_unop_sin_reduced:
-   case ir_unop_cos_reduced:
    case ir_unop_dFdx:
    case ir_unop_dFdx_coarse:
    case ir_unop_dFdx_fine:
@@ -543,9 +541,9 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_binop_logic_and:
    case ir_binop_logic_xor:
    case ir_binop_logic_or:
-      assert(ir->type == glsl_type::bool_type);
-      assert(ir->operands[0]->type == glsl_type::bool_type);
-      assert(ir->operands[1]->type == glsl_type::bool_type);
+      assert(ir->type->base_type == GLSL_TYPE_BOOL);
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL);
+      assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL);
       break;
 
    case ir_binop_dot:
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index 799c74bb9..59adc298b 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -547,6 +547,8 @@ private:
    virtual void enter_record(const glsl_type *type, const char *name,
                              bool row_major) {
       assert(type->is_record());
+      if (this->ubo_block_index == -1)
+         return;
       this->ubo_byte_offset = glsl_align(
             this->ubo_byte_offset, type->std140_base_alignment(row_major));
    }
@@ -554,6 +556,8 @@ private:
    virtual void leave_record(const glsl_type *type, const char *name,
                              bool row_major) {
       assert(type->is_record());
+      if (this->ubo_block_index == -1)
+         return;
       this->ubo_byte_offset = glsl_align(
             this->ubo_byte_offset, type->std140_base_alignment(row_major));
    }
diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp
index 22617990f..605748a9c 100644
--- a/mesalib/src/glsl/link_varyings.cpp
+++ b/mesalib/src/glsl/link_varyings.cpp
@@ -263,6 +263,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
          if (output != NULL) {
             cross_validate_types_and_qualifiers(prog, input, output,
                                                 consumer->Stage, producer->Stage);
+         } else {
+            /* Check for input vars with unmatched output vars in prev stage
+             * taking into account that interface blocks could have a matching
+             * output but with different name, so we ignore them.
+             */
+            assert(!input->data.assigned);
+            if (input->data.used && !input->get_interface_type() &&
+                !input->data.explicit_location && !prog->SeparateShader)
+               linker_error(prog,
+                            "%s shader input `%s' "
+                            "has no matching output in the previous stage\n",
+                            _mesa_shader_stage_to_string(consumer->Stage),
+                            input->name);
          }
       }
    }
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index 4349f0973..b6baa5d36 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -1377,24 +1377,13 @@ link_fs_input_layout_qualifiers(struct gl_shader_program *prog,
        *   "If gl_FragCoord is redeclared in any fragment shader in a program,
        *    it must be redeclared in all the fragment shaders in that program
        *    that have a static use gl_FragCoord."
-       *
-       * Exclude the case when one of the 'linked_shader' or 'shader' redeclares
-       * gl_FragCoord with no layout qualifiers but the other one doesn't
-       * redeclare it. If we strictly follow GLSL 1.50 spec's language, it
-       * should be a link error. But, generating link error for this case will
-       * be a wrong behaviour which spec didn't intend to do and it could also
-       * break some applications.
        */
       if ((linked_shader->redeclares_gl_fragcoord
            && !shader->redeclares_gl_fragcoord
-           && shader->uses_gl_fragcoord
-           && (linked_shader->origin_upper_left
-               || linked_shader->pixel_center_integer))
+           && shader->uses_gl_fragcoord)
           || (shader->redeclares_gl_fragcoord
               && !linked_shader->redeclares_gl_fragcoord
-              && linked_shader->uses_gl_fragcoord
-              && (shader->origin_upper_left
-                  || shader->pixel_center_integer))) {
+              && linked_shader->uses_gl_fragcoord)) {
              linker_error(prog, "fragment shader defined with conflicting "
                          "layout qualifiers for gl_FragCoord\n");
       }
@@ -2503,6 +2492,194 @@ check_explicit_uniform_locations(struct gl_context *ctx,
    delete uniform_map;
 }
 
+static bool
+add_program_resource(struct gl_shader_program *prog, GLenum type,
+                     const void *data, uint8_t stages)
+{
+   assert(data);
+
+   /* If resource already exists, do not add it again. */
+   for (unsigned i = 0; i < prog->NumProgramResourceList; i++)
+      if (prog->ProgramResourceList[i].Data == data)
+         return true;
+
+   prog->ProgramResourceList =
+      reralloc(prog,
+               prog->ProgramResourceList,
+               gl_program_resource,
+               prog->NumProgramResourceList + 1);
+
+   if (!prog->ProgramResourceList) {
+      linker_error(prog, "Out of memory during linking.\n");
+      return false;
+   }
+
+   struct gl_program_resource *res =
+      &prog->ProgramResourceList[prog->NumProgramResourceList];
+
+   res->Type = type;
+   res->Data = data;
+   res->StageReferences = stages;
+
+   prog->NumProgramResourceList++;
+
+   return true;
+}
+
+/**
+ * Function builds a stage reference bitmask from variable name.
+ */
+static uint8_t
+build_stageref(struct gl_shader_program *shProg, const char *name)
+{
+   uint8_t stages = 0;
+
+   /* Note, that we assume MAX 8 stages, if there will be more stages, type
+    * used for reference mask in gl_program_resource will need to be changed.
+    */
+   assert(MESA_SHADER_STAGES < 8);
+
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_shader *sh = shProg->_LinkedShaders[i];
+      if (!sh)
+         continue;
+      ir_variable *var = sh->symbols->get_variable(name);
+      if (var)
+         stages |= (1 << i);
+   }
+   return stages;
+}
+
+static bool
+add_interface_variables(struct gl_shader_program *shProg,
+                        struct gl_shader *sh, GLenum programInterface)
+{
+   foreach_in_list(ir_instruction, node, sh->ir) {
+      ir_variable *var = node->as_variable();
+
+      if (!var)
+         continue;
+
+      switch (var->data.mode) {
+      /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes):
+       * "For GetActiveAttrib, all active vertex shader input variables
+       * are enumerated, including the special built-in inputs gl_VertexID
+       * and gl_InstanceID."
+       */
+      case ir_var_system_value:
+         if (var->data.location != SYSTEM_VALUE_VERTEX_ID &&
+             var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE &&
+             var->data.location != SYSTEM_VALUE_INSTANCE_ID)
+         continue;
+      case ir_var_shader_in:
+         if (programInterface != GL_PROGRAM_INPUT)
+            continue;
+         break;
+      case ir_var_shader_out:
+         if (programInterface != GL_PROGRAM_OUTPUT)
+            continue;
+         break;
+      default:
+         continue;
+      };
+
+      if (!add_program_resource(shProg, programInterface, var,
+                                build_stageref(shProg, var->name)))
+         return false;
+   }
+   return true;
+}
+
+/**
+ * Builds up a list of program resources that point to existing
+ * resource data.
+ */
+static void
+build_program_resource_list(struct gl_context *ctx,
+                            struct gl_shader_program *shProg)
+{
+   /* Rebuild resource list. */
+   if (shProg->ProgramResourceList) {
+      ralloc_free(shProg->ProgramResourceList);
+      shProg->ProgramResourceList = NULL;
+      shProg->NumProgramResourceList = 0;
+   }
+
+   int input_stage = MESA_SHADER_STAGES, output_stage = 0;
+
+   /* Determine first input and final output stage. These are used to
+    * detect which variables should be enumerated in the resource list
+    * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT.
+    */
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (!shProg->_LinkedShaders[i])
+         continue;
+      if (input_stage == MESA_SHADER_STAGES)
+         input_stage = i;
+      output_stage = i;
+   }
+
+   /* Empty shader, no resources. */
+   if (input_stage == MESA_SHADER_STAGES && output_stage == 0)
+      return;
+
+   /* Add inputs and outputs to the resource list. */
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage],
+                                GL_PROGRAM_INPUT))
+      return;
+
+   if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage],
+                                GL_PROGRAM_OUTPUT))
+      return;
+
+   /* Add transform feedback varyings. */
+   if (shProg->LinkedTransformFeedback.NumVarying > 0) {
+      for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) {
+         uint8_t stageref =
+            build_stageref(shProg,
+                           shProg->LinkedTransformFeedback.Varyings[i].Name);
+         if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING,
+                                   &shProg->LinkedTransformFeedback.Varyings[i],
+                                   stageref))
+         return;
+      }
+   }
+
+   /* Add uniforms from uniform storage. */
+   for (unsigned i = 0; i < shProg->NumUserUniformStorage; i++) {
+      /* Do not add uniforms internally used by Mesa. */
+      if (shProg->UniformStorage[i].hidden)
+         continue;
+
+      uint8_t stageref =
+         build_stageref(shProg, shProg->UniformStorage[i].name);
+      if (!add_program_resource(shProg, GL_UNIFORM,
+                                &shProg->UniformStorage[i], stageref))
+         return;
+   }
+
+   /* Add program uniform blocks. */
+   for (unsigned i = 0; i < shProg->NumUniformBlocks; i++) {
+      if (!add_program_resource(shProg, GL_UNIFORM_BLOCK,
+          &shProg->UniformBlocks[i], 0))
+         return;
+   }
+
+   /* Add atomic counter buffers. */
+   for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) {
+      if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER,
+                                &shProg->AtomicBuffers[i], 0))
+         return;
+   }
+
+   /* TODO - following extensions will require more resource types:
+    *
+    *    GL_ARB_shader_storage_buffer_object
+    *    GL_ARB_shader_subroutine
+    */
+}
+
+
 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 {
@@ -2737,10 +2914,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       goto done;
    }
 
-   unsigned first;
-   for (first = 0; first <= MESA_SHADER_FRAGMENT; first++) {
-      if (prog->_LinkedShaders[first] != NULL)
-	 break;
+   unsigned first, last;
+
+   first = MESA_SHADER_STAGES;
+   last = 0;
+
+   /* Determine first and last stage. */
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (!prog->_LinkedShaders[i])
+         continue;
+      if (first == MESA_SHADER_STAGES)
+         first = i;
+      last = i;
    }
 
    if (num_tfeedback_decls != 0) {
@@ -2769,13 +2954,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
     * ensures that inter-shader outputs written to in an earlier stage are
     * eliminated if they are (transitively) not used in a later stage.
     */
-   int last, next;
-   for (last = MESA_SHADER_FRAGMENT; last >= 0; last--) {
-      if (prog->_LinkedShaders[last] != NULL)
-         break;
-   }
+   int next;
 
-   if (last >= 0 && last < MESA_SHADER_FRAGMENT) {
+   if (first < MESA_SHADER_FRAGMENT) {
       gl_shader *const sh = prog->_LinkedShaders[last];
 
       if (first == MESA_SHADER_GEOMETRY) {
@@ -2787,13 +2968,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
           * MESA_SHADER_GEOMETRY.
           */
          if (!assign_varying_locations(ctx, mem_ctx, prog,
-                                       NULL, sh,
+                                       NULL, prog->_LinkedShaders[first],
                                        num_tfeedback_decls, tfeedback_decls,
                                        prog->Geom.VerticesIn))
             goto done;
       }
 
-      if (num_tfeedback_decls != 0 || prog->SeparateShader) {
+      if (last != MESA_SHADER_FRAGMENT &&
+         (num_tfeedback_decls != 0 || prog->SeparateShader)) {
          /* There was no fragment shader, but we still have to assign varying
           * locations for use by transform feedback.
           */
@@ -2905,6 +3087,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
       }
    }
 
+   build_program_resource_list(ctx, prog);
+   if (!prog->LinkStatus)
+      goto done;
+
    /* FINISHME: Assign fragment shader output locations. */
 
 done:
diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp
index d7f0b2809..51804bb5f 100644
--- a/mesalib/src/glsl/loop_controls.cpp
+++ b/mesalib/src/glsl/loop_controls.cpp
@@ -139,7 +139,7 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
          iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
          break;
       default:
-          unreachable(!"Unsupported type for loop iterator.");
+          unreachable("Unsupported type for loop iterator.");
       }
 
       ir_expression *const mul =
diff --git a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
index 2243f479a..44967dcdb 100644
--- a/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
+++ b/mesalib/src/glsl/lower_const_arrays_to_uniforms.cpp
@@ -49,7 +49,6 @@ public:
    {
       instructions = insts;
       progress = false;
-      index = 0;
    }
 
    bool run()
@@ -63,7 +62,6 @@ public:
 private:
    exec_list *instructions;
    bool progress;
-   unsigned index;
 };
 
 void
@@ -82,7 +80,7 @@ lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue)
 
    void *mem_ctx = ralloc_parent(con);
 
-   char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%d", index++);
+   char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra);
 
    ir_variable *uni =
       new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform);
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
index 357944da6..f6b8331d4 100644
--- a/mesalib/src/glsl/nir/glsl_to_nir.cpp
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -88,6 +88,8 @@ private:
    exec_list *cf_node_list;
    nir_instr *result; /* result of the expression tree last visited */
 
+   nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
    /* the head of the dereference chain we're creating */
    nir_deref_var *deref_head;
    /* the tail of the dereference chain we're creating */
@@ -156,6 +158,14 @@ nir_visitor::~nir_visitor()
    _mesa_hash_table_destroy(this->overload_table, NULL);
 }
 
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+   ir->accept(this);
+   ralloc_steal(mem_ctx, this->deref_head);
+   return this->deref_head;
+}
+
 static nir_constant *
 constant_copy(ir_constant *ir, void *mem_ctx)
 {
@@ -582,13 +592,11 @@ void
 nir_visitor::visit(ir_return *ir)
 {
    if (ir->value != NULL) {
-      ir->value->accept(this);
       nir_intrinsic_instr *copy =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
 
-      copy->variables[0] = nir_deref_var_create(this->shader,
-                                                this->impl->return_var);
-      copy->variables[1] = this->deref_head;
+      copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+      copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
    }
 
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
@@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir)
       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
       ir_dereference *param =
          (ir_dereference *) ir->actual_parameters.get_head();
-      param->accept(this);
-      instr->variables[0] = this->deref_head;
+      instr->variables[0] = evaluate_deref(&instr->instr, param);
       nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
 
       nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
       store_instr->num_components = 1;
 
-      ir->return_deref->accept(this);
-      store_instr->variables[0] = this->deref_head;
+      store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
       store_instr->src[0].is_ssa = true;
       store_instr->src[0].ssa = &instr->dest.ssa;
 
@@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir)
 
    unsigned i = 0;
    foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
-      param->accept(this);
-      instr->params[i] = this->deref_head;
+      instr->params[i] = evaluate_deref(&instr->instr, param);
       i++;
    }
 
-   ir->return_deref->accept(this);
-   instr->return_deref = this->deref_head;
+   instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
    nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
 }
 
@@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir)
       nir_intrinsic_instr *copy =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
 
-      ir->lhs->accept(this);
-      copy->variables[0] = this->deref_head;
-
-      ir->rhs->accept(this);
-      copy->variables[1] = this->deref_head;
-
+      copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+      copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
 
       if (ir->condition) {
          nir_if *if_stmt = nir_if_create(this->shader);
@@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir)
       load->num_components = ir->lhs->type->vector_elements;
       nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
       load->variables[0] = lhs_deref;
+      ralloc_steal(load, load->variables[0]);
       nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
 
       nir_op vec_op;
@@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir)
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
    store->num_components = ir->lhs->type->vector_elements;
-   nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+   nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = src;
 
@@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
       load_instr->num_components = ir->type->vector_elements;
       load_instr->variables[0] = this->deref_head;
+      ralloc_steal(load_instr, load_instr->variables[0]);
       add_instr(&load_instr->instr, ir->type->vector_elements);
    }
 
@@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir)
       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
       intrin->num_components = deref->type->vector_elements;
       intrin->variables[0] = this->deref_head;
+      ralloc_steal(intrin, intrin->variables[0]);
 
       if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
           intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
@@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
    case ir_unop_sin:   emit(nir_op_fsin,   dest_size, srcs); break;
    case ir_unop_cos:   emit(nir_op_fcos,   dest_size, srcs); break;
-   case ir_unop_sin_reduced:
-      emit(nir_op_fsin_reduced, dest_size, srcs);
-      break;
-   case ir_unop_cos_reduced:
-      emit(nir_op_fcos_reduced, dest_size, srcs);
-      break;
    case ir_unop_dFdx:        emit(nir_op_fddx,        dest_size, srcs); break;
    case ir_unop_dFdy:        emit(nir_op_fddy,        dest_size, srcs); break;
    case ir_unop_dFdx_fine:   emit(nir_op_fddx_fine,   dest_size, srcs); break;
@@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir)
    case ir_binop_bit_and:
    case ir_binop_bit_or:
    case ir_binop_bit_xor:
+   case ir_binop_logic_and:
+   case ir_binop_logic_or:
+   case ir_binop_logic_xor:
    case ir_binop_lshift:
    case ir_binop_rshift:
       switch (ir->operation) {
@@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir)
       case ir_binop_bit_xor:
          op = nir_op_ixor;
          break;
+      case ir_binop_logic_and:
+         if (supports_ints)
+            op = nir_op_iand;
+         else
+            op = nir_op_fand;
+         break;
+      case ir_binop_logic_or:
+         if (supports_ints)
+            op = nir_op_ior;
+         else
+            op = nir_op_for;
+         break;
+      case ir_binop_logic_xor:
+         if (supports_ints)
+            op = nir_op_ixor;
+         else
+            op = nir_op_fxor;
+         break;
       case ir_binop_lshift:
          op = nir_op_ishl;
          break;
@@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir)
          }
       }
       break;
-   case ir_binop_logic_and:
-      if (supports_ints)
-         emit(nir_op_iand, dest_size, srcs);
-      else
-         emit(nir_op_fand, dest_size, srcs);
-      break;
-   case ir_binop_logic_or:
-      if (supports_ints)
-         emit(nir_op_ior, dest_size, srcs);
-      else
-         emit(nir_op_for, dest_size, srcs);
-      break;
-   case ir_binop_logic_xor:
-      if (supports_ints)
-         emit(nir_op_ixor, dest_size, srcs);
-      else
-         emit(nir_op_fxor, dest_size, srcs);
-      break;
    case ir_binop_dot:
       switch (ir->operands[0]->type->vector_elements) {
          case 2: emit(nir_op_fdot2, dest_size, srcs); break;
@@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir)
       unreachable("not reached");
    }
 
-   ir->sampler->accept(this);
-   instr->sampler = this->deref_head;
+   instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
 
    unsigned src_number = 0;
 
@@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir)
    int field_index = this->deref_tail->type->field_index(ir->field);
    assert(field_index >= 0);
 
-   nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+   nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
    deref->deref.type = ir->type;
    this->deref_tail->child = &deref->deref;
    this->deref_tail = &deref->deref;
@@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir)
    ir->array->accept(this);
 
    this->deref_tail->child = &deref->deref;
+   ralloc_steal(this->deref_tail, deref);
    this->deref_tail = &deref->deref;
 }
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
index 6459d5108..c6e53612b 100644
--- a/mesalib/src/glsl/nir/nir.c
+++ b/mesalib/src/glsl/nir/nir.c
@@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list)
    nir_register *reg = ralloc(mem_ctx, nir_register);
 
    reg->parent_instr = NULL;
-   reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
                                    _mesa_key_pointer_equal);
 
    reg->num_components = 0;
@@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name)
 
    exec_list_push_tail(&shader->functions, &func->node);
    exec_list_make_empty(&func->overload_list);
-   func->name = name;
+   func->name = ralloc_strdup(func, name);
    func->shader = shader;
 
    return func;
@@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx)
    cf_init(&block->cf_node, nir_cf_node_block);
 
    block->successors[0] = block->successors[1] = NULL;
-   block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
    block->imm_dom = NULL;
-   block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
 
    exec_list_make_empty(&block->instr_list);
@@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src)
 }
 
 nir_alu_instr *
-nir_alu_instr_create(void *mem_ctx, nir_op op)
+nir_alu_instr_create(nir_shader *shader, nir_op op)
 {
    unsigned num_srcs = nir_op_infos[op].num_inputs;
    nir_alu_instr *instr =
-      ralloc_size(mem_ctx,
+      ralloc_size(shader,
                   sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
 
    instr_init(&instr->instr, nir_instr_type_alu);
@@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op)
 }
 
 nir_jump_instr *
-nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
 {
-   nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+   nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
    instr_init(&instr->instr, nir_instr_type_jump);
    instr->type = type;
    return instr;
 }
 
 nir_load_const_instr *
-nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
 {
-   nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+   nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
    instr_init(&instr->instr, nir_instr_type_load_const);
 
    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
 }
 
 nir_intrinsic_instr *
-nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
 {
    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
    nir_intrinsic_instr *instr =
-      ralloc_size(mem_ctx,
+      ralloc_size(shader,
                   sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
 
    instr_init(&instr->instr, nir_instr_type_intrinsic);
@@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
 }
 
 nir_call_instr *
-nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+nir_call_instr_create(nir_shader *shader, nir_function_overload *callee)
 {
-   nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+   nir_call_instr *instr = ralloc(shader, nir_call_instr);
    instr_init(&instr->instr, nir_instr_type_call);
 
    instr->callee = callee;
    instr->num_params = callee->num_params;
-   instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+   instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
    instr->return_deref = NULL;
 
    return instr;
 }
 
 nir_tex_instr *
-nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
 {
-   nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+   nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
    instr_init(&instr->instr, nir_instr_type_tex);
 
    dest_init(&instr->dest);
 
    instr->num_srcs = num_srcs;
-   instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+   instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
    for (unsigned i = 0; i < num_srcs; i++)
       src_init(&instr->src[i].src);
 
@@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
 }
 
 nir_phi_instr *
-nir_phi_instr_create(void *mem_ctx)
+nir_phi_instr_create(nir_shader *shader)
 {
-   nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+   nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
    instr_init(&instr->instr, nir_instr_type_phi);
 
    dest_init(&instr->dest);
@@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx)
 }
 
 nir_parallel_copy_instr *
-nir_parallel_copy_instr_create(void *mem_ctx)
+nir_parallel_copy_instr_create(nir_shader *shader)
 {
-   nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+   nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
    instr_init(&instr->instr, nir_instr_type_parallel_copy);
 
    exec_list_make_empty(&instr->entries);
@@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx)
 }
 
 nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
 {
-   nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+   nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
    instr_init(&instr->instr, nir_instr_type_ssa_undef);
 
    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
    nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
    }
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
    nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -1834,13 +1834,11 @@ void
 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                  unsigned num_components, const char *name)
 {
-   void *mem_ctx = ralloc_parent(instr);
-
    def->name = name;
    def->parent_instr = instr;
-   def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   def->uses = _mesa_set_create(instr, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer,
                                    _mesa_key_pointer_equal);
    def->num_components = num_components;
 
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
index 29fe94243..74772c798 100644
--- a/mesalib/src/glsl/nir/nir.h
+++ b/mesalib/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
 #include "util/set.h"
 #include "util/bitset.h"
 #include "nir_types.h"
+#include "glsl/shader_enums.h"
 #include <stdio.h>
 
 #include "nir_opcodes.h"
@@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg)
    return src;
 }
 
+static inline nir_instr *
+nir_src_get_parent_instr(const nir_src *src)
+{
+   if (src->is_ssa) {
+      return src->ssa->parent_instr;
+   } else {
+      return src->reg.reg->parent_instr;
+   }
+}
+
 static inline nir_dest
 nir_dest_for_reg(nir_register *reg)
 {
@@ -1365,11 +1376,17 @@ typedef struct nir_function {
 
 typedef struct nir_shader_compiler_options {
    bool lower_ffma;
+   bool lower_flrp;
    bool lower_fpow;
    bool lower_fsat;
    bool lower_fsqrt;
    /** lowers fneg and ineg to fsub and isub. */
    bool lower_negate;
+   /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+   bool lower_sub;
+
+   /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+   bool lower_scmp;
 
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
@@ -1414,6 +1431,9 @@ typedef struct nir_shader {
     * access plus one
     */
    unsigned num_inputs, num_uniforms, num_outputs;
+
+   /** the number of uniforms that are only accessed directly */
+   unsigned num_direct_uniforms;
 } nir_shader;
 
 #define nir_foreach_overload(shader, overload)                        \
@@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
 
 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
-nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
 
-nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
 
-nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
                                                   unsigned num_components);
 
-nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
                                                 nir_intrinsic_op op);
 
-nir_call_instr *nir_call_instr_create(void *mem_ctx,
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
                                       nir_function_overload *callee);
 
-nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
 
-nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
 
-nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
 
-nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
                                                 unsigned num_components);
 
 nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
@@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
 #else
-static inline void nir_validate_shader(nir_shader *shader) { }
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
 #endif /* DEBUG */
 
 void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_phis_to_scalar(nir_shader *shader);
 
 void nir_lower_samplers(nir_shader *shader,
-                        struct gl_shader_program *shader_program,
-                        struct gl_program *prog);
+                        const struct gl_shader_program *shader_program,
+                        gl_shader_stage stage);
 
 void nir_lower_system_values(nir_shader *shader);
+void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
 
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
+void nir_normalize_cubemap_coords(nir_shader *shader);
+
 void nir_live_variables_impl(nir_function_impl *impl);
 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
 
@@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader);
 void nir_convert_from_ssa(nir_shader *shader);
 
 bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
 bool nir_opt_constant_folding(nir_shader *shader);
 
 bool nir_opt_global_to_local(nir_shader *shader);
@@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
 
 bool nir_opt_remove_phis(nir_shader *shader);
 
+void nir_sweep(nir_shader *shader);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
index afab1a008..bbf4f08ef 100644
--- a/mesalib/src/glsl/nir/nir_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template("""
 #include "nir.h"
 #include "nir_search.h"
 
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
 struct transform {
    const nir_search_expression *search;
    const nir_search_value *replace;
    unsigned condition_offset;
 };
 
+struct opt_state {
+   void *mem_ctx;
+   bool progress;
+   const bool *condition_flags;
+};
+
+#endif
+
 % for (opcode, xform_list) in xform_dict.iteritems():
 % for xform in xform_list:
    ${xform.search.render()}
@@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = {
 };
 % endfor
 
-struct opt_state {
-   void *mem_ctx;
-   bool progress;
-   const bool *condition_flags;
-};
-
 static bool
 ${pass_name}_block(nir_block *block, void *void_state)
 {
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
index 7c4f7fd96..d1419ee21 100644
--- a/mesalib/src/glsl/nir/nir_builder.h
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -28,6 +28,9 @@ struct exec_list;
 
 typedef struct nir_builder {
    struct exec_list *cf_node_list;
+   nir_instr *before_instr;
+   nir_instr *after_instr;
+
    nir_shader *shader;
    nir_function_impl *impl;
 } nir_builder;
@@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build,
                                  struct exec_list *cf_node_list)
 {
    build->cf_node_list = cf_node_list;
+   build->before_instr = NULL;
+   build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
+{
+   build->cf_node_list = NULL;
+   build->before_instr = before_instr;
+   build->after_instr = NULL;
 }
 
+static inline void
+nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
+{
+   build->cf_node_list = NULL;
+   build->before_instr = NULL;
+   build->after_instr = after_instr;
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+   if (build->cf_node_list) {
+      nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+   } else if (build->before_instr) {
+      nir_instr_insert_before(build->before_instr, instr);
+   } else {
+      assert(build->after_instr);
+      nir_instr_insert_after(build->after_instr, instr);
+      build->after_instr = instr;
+   }
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+   nir_load_const_instr *load_const =
+      nir_load_const_instr_create(build->shader, num_components);
+   if (!load_const)
+      return NULL;
+
+   load_const->value = value;
+
+   nir_builder_instr_insert(build, &load_const->instr);
+
+   return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+   nir_const_value v = { { .f = {x, 0, 0, 0} } };
+   return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+   nir_const_value v = { { .f = {x, y, z, w} } };
+   return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+   nir_const_value v = { { .i = {x, 0, 0, 0} } };
+   return nir_build_imm(build, 1, v);
+}
 
 static inline nir_ssa_def *
 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
    instr->dest.write_mask = (1 << num_components) - 1;
 
-   nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(build, &instr->instr);
 
    return &instr->dest.dest.ssa;
 }
@@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
 
 #include "nir_builder_opcodes.h"
 
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   mov->dest.write_mask = (1 << num_components) - 1;
+   mov->src[0] = src;
+   nir_builder_instr_insert(build, &mov->instr);
+
+   return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   mov->dest.write_mask = (1 << num_components) - 1;
+   mov->src[0] = src;
+   nir_builder_instr_insert(build, &mov->instr);
+
+   return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+            unsigned num_components, bool use_fmov)
+{
+   nir_alu_src alu_src;
+   memset(&alu_src, 0, sizeof(alu_src));
+   alu_src.src = nir_src_for_ssa(src);
+   for (int i = 0; i < 4; i++)
+      alu_src.swizzle[i] = swiz[i];
+
+   return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+                     nir_imov_alu(build, alu_src, num_components);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+   if (src.is_ssa && src.ssa->num_components == num_components)
+      return src.ssa;
+
+   nir_alu_src alu;
+   memset(&alu, 0, sizeof(alu));
+   alu.src = src;
+   for (int j = 0; j < 4; j++)
+      alu.swizzle[j] = j;
+
+   return nir_imov_alu(build, alu, num_components);
+}
+
 #endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
index c3090fb06..184698abd 100644
--- a/mesalib/src/glsl/nir/nir_from_ssa.c
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
       reg->num_components = def->num_components;
       reg->num_array_elems = 0;
 
-      /* This register comes from an SSA definition that was not part of a
-       * phi-web.  Therefore, we know it has a single unique definition
-       * that dominates all of its uses.  Therefore, we can copy the
+      /* This register comes from an SSA definition that is defined and not
+       * part of a phi-web.  Therefore, we know it has a single unique
+       * definition that dominates all of its uses; we can copy the
        * parent_instr from the SSA def safely.
        */
-      reg->parent_instr = def->parent_instr;
+      if (def->parent_instr->type != nir_instr_type_ssa_undef)
+         reg->parent_instr = def->parent_instr;
 
       _mesa_hash_table_insert(state->ssa_table, def, reg);
       return reg;
diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000000000..7b6803207
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+   nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+   nir_op op = alu->op;
+   bool is_signed;
+
+   if ((op != nir_op_idiv) &&
+       (op != nir_op_udiv) &&
+       (op != nir_op_umod))
+      return;
+
+   is_signed = (op == nir_op_idiv);
+
+   nir_builder_insert_before_instr(bld, &alu->instr);
+
+   numer = nir_ssa_for_src(bld, alu->src[0].src,
+                           nir_ssa_alu_instr_src_components(alu, 0));
+   denom = nir_ssa_for_src(bld, alu->src[1].src,
+                           nir_ssa_alu_instr_src_components(alu, 1));
+
+   if (is_signed) {
+      af = nir_i2f(bld, numer);
+      bf = nir_i2f(bld, denom);
+      af = nir_fabs(bld, af);
+      bf = nir_fabs(bld, bf);
+      a  = nir_iabs(bld, numer);
+      b  = nir_iabs(bld, denom);
+   } else {
+      af = nir_u2f(bld, numer);
+      bf = nir_u2f(bld, denom);
+      a  = numer;
+      b  = denom;
+   }
+
+   /* get first result: */
+   bf = nir_frcp(bld, bf);
+   bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
+   q  = nir_fmul(bld, af, bf);
+
+   if (is_signed) {
+      q = nir_f2i(bld, q);
+   } else {
+      q = nir_f2u(bld, q);
+   }
+
+   /* get error of first result: */
+   r = nir_imul(bld, q, b);
+   r = nir_isub(bld, a, r);
+   r = nir_u2f(bld, r);
+   r = nir_fmul(bld, r, bf);
+   r = nir_f2u(bld, r);
+
+   /* add quotients: */
+   q = nir_iadd(bld, q, r);
+
+   /* correction: if modulus >= divisor, add 1 */
+   r = nir_imul(bld, q, b);
+   r = nir_isub(bld, a, r);
+
+   r = nir_ige(bld, r, b);
+   r = nir_b2i(bld, r);
+
+   q = nir_iadd(bld, q, r);
+   if (is_signed)  {
+      /* fix the sign: */
+      r = nir_ixor(bld, numer, denom);
+      r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+      r = nir_i2b(bld, r);
+      b = nir_ineg(bld, q);
+      q = nir_bcsel(bld, r, b, q);
+   }
+
+   if (op == nir_op_umod) {
+      /* division result in q */
+      r = nir_imul(bld, q, b);
+      q = nir_isub(bld, a, r);
+   }
+
+   assert(alu->dest.dest.is_ssa);
+   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+                            nir_src_for_ssa(q),
+                            ralloc_parent(alu));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+   nir_builder *b = state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_alu)
+         convert_instr(b, nir_instr_as_alu(instr));
+   }
+
+   return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, convert_block, &b);
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         convert_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
index 7cd93ea0a..4bdb80072 100644
--- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
             else
                nir_instr_insert_after_block(src->pred, &mov->instr);
 
-            nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+            nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
             new_src->pred = src->pred;
             new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
 
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
index 3015dbd09..cf8ab8325 100644
--- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,33 +36,26 @@ extern "C" {
 }
 
 static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
-                  const struct gl_program *prog)
+get_sampler_index(const struct gl_shader_program *shader_program,
+                  gl_shader_stage stage, const char *name)
 {
-   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
    unsigned location;
    if (!shader_program->UniformHash->get(location, name)) {
-      linker_error(shader_program,
-                   "failed to find sampler named %s.\n", name);
+      assert(!"failed to find sampler");
       return 0;
    }
 
-   if (!shader_program->UniformStorage[location].sampler[shader].active) {
-      assert(0 && "cannot return a sampler");
-      linker_error(shader_program,
-                   "cannot return a sampler named %s, because it is not "
-                   "used in this shader stage. This is a driver bug.\n",
-                   name);
+   if (!shader_program->UniformStorage[location].sampler[stage].active) {
+      assert(!"cannot return a sampler");
       return 0;
    }
 
-   return shader_program->UniformStorage[location].sampler[shader].index;
+   return shader_program->UniformStorage[location].sampler[stage].index;
 }
 
 static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
-              const struct gl_program *prog, void *mem_ctx)
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+              gl_shader_stage stage, void *mem_ctx)
 {
    if (instr->sampler == NULL)
       return;
@@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
                ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
             break;
          case nir_deref_array_type_indirect: {
-            instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+            instr->src = reralloc(instr, instr->src, nir_tex_src,
                                   instr->num_srcs + 1);
             memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
             instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
@@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
       }
    }
 
-   instr->sampler_index += get_sampler_index(shader_program, name, prog);
+   instr->sampler_index += get_sampler_index(shader_program, stage, name);
 
    instr->sampler = NULL;
 }
 
 typedef struct {
    void *mem_ctx;
-   struct gl_shader_program *shader_program;
-   struct gl_program *prog;
+   const struct gl_shader_program *shader_program;
+   gl_shader_stage stage;
 } lower_state;
 
 static bool
@@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state)
    nir_foreach_instr(block, instr) {
       if (instr->type == nir_instr_type_tex) {
          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
-         lower_sampler(tex_instr, state->shader_program, state->prog,
+         lower_sampler(tex_instr, state->shader_program, state->stage,
                        state->mem_ctx);
       }
    }
@@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state)
 }
 
 static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
-           struct gl_program *prog)
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+           gl_shader_stage stage)
 {
    lower_state state;
 
    state.mem_ctx = ralloc_parent(impl);
    state.shader_program = shader_program;
-   state.prog = prog;
+   state.stage = stage;
 
    nir_foreach_block(impl, lower_block_cb, &state);
 }
 
 extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
-                   struct gl_program *prog)
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program,
+                   gl_shader_stage stage)
 {
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         lower_impl(overload->impl, shader_program, prog);
+         lower_impl(overload->impl, shader_program, stage);
    }
 }
diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
new file mode 100644
index 000000000..6b0e9c340
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+nir_lower_tex_projector_block(nir_block *block, void *void_state)
+{
+   nir_builder *b = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      nir_builder_insert_before_instr(b, &tex->instr);
+
+      /* Find the projector in the srcs list, if present. */
+      int proj_index;
+      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+         if (tex->src[proj_index].src_type == nir_tex_src_projector)
+            break;
+      }
+      if (proj_index == tex->num_srcs)
+         continue;
+      nir_ssa_def *inv_proj =
+         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+      /* Walk through the sources projecting the arguments. */
+      for (int i = 0; i < tex->num_srcs; i++) {
+         switch (tex->src[i].src_type) {
+         case nir_tex_src_coord:
+         case nir_tex_src_comparitor:
+            break;
+         default:
+            continue;
+         }
+         nir_ssa_def *unprojected =
+            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+         /* Array indices don't get projected, so make an new vector with the
+          * coordinate's array index untouched.
+          */
+         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+            switch (tex->coord_components) {
+            case 4:
+               projected = nir_vec4(b,
+                                    channel(b, projected, 0),
+                                    channel(b, projected, 1),
+                                    channel(b, projected, 2),
+                                    channel(b, unprojected, 3));
+               break;
+            case 3:
+               projected = nir_vec3(b,
+                                    channel(b, projected, 0),
+                                    channel(b, projected, 1),
+                                    channel(b, unprojected, 2));
+               break;
+            case 2:
+               projected = nir_vec2(b,
+                                    channel(b, projected, 0),
+                                    channel(b, unprojected, 1));
+               break;
+            default:
+               unreachable("bad texture coord count for array");
+               break;
+            }
+         }
+
+         nir_instr_rewrite_src(&tex->instr,
+                               &tex->src[i].src,
+                               nir_src_for_ssa(projected));
+      }
+
+      /* Now move the later tex sources down the array so that the projector
+       * disappears.
+       */
+      nir_src dead;
+      memset(&dead, 0, sizeof dead);
+      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+      memmove(&tex->src[proj_index],
+              &tex->src[proj_index + 1],
+              (tex->num_srcs - proj_index) * sizeof(*tex->src));
+      tex->num_srcs--;
+   }
+
+   return true;
+}
+
+static void
+nir_lower_tex_projector_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_tex_projector(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_tex_projector_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
index 85ebb281c..58389a7c7 100644
--- a/mesalib/src/glsl/nir/nir_lower_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
 
       unsigned num_components = glsl_get_vector_elements(src_tail->type);
 
-      nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
-      nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
-
       nir_intrinsic_instr *load =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
       load->num_components = num_components;
-      load->variables[0] = nir_deref_as_var(src_deref);
+      load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
       nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
 
       nir_instr_insert_before(&copy_instr->instr, &load->instr);
@@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
       store->num_components = num_components;
-      store->variables[0] = nir_deref_as_var(dest_deref);
+      store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
       store->src[0].is_ssa = true;
       store->src[0].ssa = &load->dest.ssa;
 
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
index 86e6ab416..2ca74d71b 100644
--- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred,
 
       struct deref_node *node = entry->data;
 
-      nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+      nir_phi_src *src = ralloc(phi, nir_phi_src);
       src->pred = pred;
       src->src.is_ssa = true;
       src->src.ssa = get_ssa_def_for_block(node, pred, state);
diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 000000000..0da8447ac
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand <jason@jlekstrand.net>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0.  This is based on the old GLSL IR based pass by Eric.
+ */
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+   nir_builder *b = void_state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+         continue;
+
+      nir_builder_insert_before_instr(b, &tex->instr);
+
+      for (unsigned i = 0; i < tex->num_srcs; i++) {
+         if (tex->src[i].src_type != nir_tex_src_coord)
+            continue;
+
+         nir_ssa_def *orig_coord =
+            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+         assert(orig_coord->num_components >= 3);
+
+         nir_ssa_def *abs = nir_fabs(b, orig_coord);
+         nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
+                                         nir_fmax(b, channel(b, abs, 1),
+                                                     channel(b, abs, 2)));
+
+         nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+         /* Array indices don't have to be normalized, so make a new vector
+          * with the coordinate's array index untouched.
+          */
+         if (tex->coord_components == 4) {
+            normalized = nir_vec4(b,
+                                  channel(b, normalized, 0),
+                                  channel(b, normalized, 1),
+                                  channel(b, normalized, 2),
+                                  channel(b, orig_coord, 3));
+         }
+
+         nir_instr_rewrite_src(&tex->instr,
+                               &tex->src[i].src,
+                               nir_src_for_ssa(normalized));
+      }
+   }
+
+   return true;
+}
+
+static void
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         normalize_cubemap_coords_impl(overload->impl);
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
index 062cd628b..264806f5d 100644
--- a/mesalib/src/glsl/nir/nir_opcodes.py
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
 unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
 unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
-unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
 # Boolean-to-float conversion
 unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
-unop_convert("i2b", tint, tbool, "src0 == 0")
-unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
 unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
 
 unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
@@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
 
 unop("fsin", tfloat, "sinf(src0)")
 unop("fcos", tfloat, "cosf(src0)")
-unop("fsin_reduced", tfloat, "sinf(src0)")
-unop("fcos_reduced", tfloat, "cosf(src0)")
 
 
 # Partial derivatives.
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
index ef855aa77..cdb19241c 100644
--- a/mesalib/src/glsl/nir/nir_opt_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,9 @@ optimizations = [
    (('flrp', a, b, 1.0), b),
    (('flrp', a, a, b), a),
    (('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+   (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
    (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
    # Comparison simplifications
@@ -82,10 +85,6 @@ optimizations = [
    (('inot', ('fge', a, b)), ('flt', a, b)),
    (('inot', ('ilt', a, b)), ('ige', a, b)),
    (('inot', ('ige', a, b)), ('ilt', a, b)),
-   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
-   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
-   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
-   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
    (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
    (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
    (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
@@ -95,6 +94,18 @@ optimizations = [
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
    (('fsat', ('fsat', a)), ('fsat', a)),
    (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+   (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+   (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+   (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+   (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+   (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+   (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+   # Emulating booleans
+   (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+   (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+   (('iand', 'a@bool', 1.0), ('b2f', a)),
+   (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+   (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
    # Comparison with the same args.  Note that these are not done for
    # the float versions because NaN always returns false on float
    # inequalities.
@@ -122,7 +133,7 @@ optimizations = [
    (('ishr', 0, a), 0),
    (('ishr', a, 0), a),
    (('ushr', 0, a), 0),
-   (('ushr', a, 0), 0),
+   (('ushr', a, 0), a),
    # Exponential/logarithmic identities
    (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
    (('fexp',  ('flog',  a)), a), # e^ln(a)  = a
@@ -134,6 +145,26 @@ optimizations = [
    (('fpow', a, 1.0), a),
    (('fpow', a, 2.0), ('fmul', a, a)),
    (('fpow', 2.0, a), ('fexp2', a)),
+   (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+   (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+   (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+   (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+   (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+   (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+   (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+   (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+   (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+   (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+   (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+   (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+   (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+   (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+   (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+   (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+   (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+   (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+   (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+   (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
    # Division and reciprocal
    (('fdiv', 1.0, a), ('frcp', a)),
    (('frcp', ('frcp', a)), a),
@@ -154,18 +185,21 @@ optimizations = [
    (('bcsel', a, b, b), b),
    (('fcsel', a, b, b), b),
 
+   # Conversions
+   (('f2i', ('ftrunc', a)), ('f2i', a)),
+   (('f2u', ('ftrunc', a)), ('f2u', a)),
+
    # Subtracts
    (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
    (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+   (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+   (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
    (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
    (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
    (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
    (('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
-   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
 ]
 
 # Add optimizations to handle the case where the result of a ternary is
@@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne',
        ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
    ]
 
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished.  Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+                                  late_optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
index 9b383202d..553906e12 100644
--- a/mesalib/src/glsl/nir/nir_opt_cse.c
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -37,20 +37,19 @@ struct cse_state {
 };
 
 static bool
-nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
+                   unsigned src2)
 {
-   if (src1.abs != src2.abs || src1.negate != src2.negate)
+   if (alu1->src[src1].abs != alu2->src[src2].abs ||
+       alu1->src[src1].negate != alu2->src[src2].negate)
       return false;
 
-   for (int i = 0; i < 4; ++i) {
-      if (!(read_mask & (1 << i)))
-         continue;
-
-      if (src1.swizzle[i] != src2.swizzle[i])
+   for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+      if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
          return false;
    }
 
-   return nir_srcs_equal(src1.src, src2.src);
+   return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
 }
 
 static bool
@@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
       if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
          return false;
 
-      for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
-         if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
-                                 (1 << alu1->dest.dest.ssa.num_components) - 1))
-            return false;
+      if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+         assert(nir_op_infos[alu1->op].num_inputs == 2);
+         return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+                 nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+                (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+                 nir_alu_srcs_equal(alu1, alu2, 1, 0));
+      } else {
+         for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+            if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+               return false;
+         }
       }
       return true;
    }
@@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
 static bool
 src_is_ssa(nir_src *src, void *data)
 {
+   (void) data;
    return src->is_ssa;
 }
 
 static bool
 dest_is_ssa(nir_dest *dest, void *data)
 {
+   (void) data;
    return dest->is_ssa;
 }
 
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
new file mode 100644
index 000000000..9d5646fe6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+   bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+   if (def->if_uses->entries > 0)
+      return false;
+
+   struct set_entry *use_iter;
+   set_foreach(def->uses, use_iter) {
+      nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+      if (use_instr->type != nir_instr_type_alu)
+         return false;
+
+      nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+      switch (use_alu->op) {
+      case nir_op_fadd:
+         break; /* This one's ok */
+
+      case nir_op_imov:
+      case nir_op_fmov:
+      case nir_op_fneg:
+      case nir_op_fabs:
+         assert(use_alu->dest.dest.is_ssa);
+         if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+            return false;
+         break;
+
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+{
+   assert(src->src.is_ssa && !src->abs && !src->negate);
+
+   nir_instr *instr = src->src.ssa->parent_instr;
+   if (instr->type != nir_instr_type_alu)
+      return NULL;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+   switch (alu->op) {
+   case nir_op_imov:
+   case nir_op_fmov:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      break;
+
+   case nir_op_fneg:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      *negate = !*negate;
+      break;
+
+   case nir_op_fabs:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      *negate = false;
+      *abs = true;
+      break;
+
+   case nir_op_fmul:
+      /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+       * operations.  This prevents us from being too aggressive with our
+       * fusing which can actually lead to more instructions.
+       */
+      if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+         return NULL;
+      break;
+
+   default:
+      return NULL;
+   }
+
+   if (!alu)
+      return NULL;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (!(alu->dest.write_mask & (1 << i)))
+         break;
+
+      swizzle[i] = swizzle[src->swizzle[i]];
+   }
+
+   return alu;
+}
+
+static bool
+nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+   struct peephole_ffma_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *add = nir_instr_as_alu(instr);
+      if (add->op != nir_op_fadd)
+         continue;
+
+      /* TODO: Maybe bail if this expression is considered "precise"? */
+
+      assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+      /* This, is the case a + a.  We would rather handle this with an
+       * algebraic reduction than fuse it.  Also, we want to only fuse
+       * things where the multiply is used only once and, in this case,
+       * it would be used twice by the same instruction.
+       */
+      if (add->src[0].src.ssa == add->src[1].src.ssa)
+         continue;
+
+      nir_alu_instr *mul;
+      uint8_t add_mul_src, swizzle[4];
+      bool negate, abs;
+      for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+         for (unsigned i = 0; i < 4; i++)
+            swizzle[i] = i;
+
+         negate = false;
+         abs = false;
+
+         mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+
+         if (mul != NULL)
+            break;
+      }
+
+      if (mul == NULL)
+         continue;
+
+      nir_ssa_def *mul_src[2];
+      mul_src[0] = mul->src[0].src.ssa;
+      mul_src[1] = mul->src[1].src.ssa;
+
+      if (abs) {
+         for (unsigned i = 0; i < 2; i++) {
+            nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_fabs);
+            abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+            nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+                              mul_src[i]->num_components, NULL);
+            abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+            nir_instr_insert_before(&add->instr, &abs->instr);
+            mul_src[i] = &abs->dest.dest.ssa;
+         }
+      }
+
+      if (negate) {
+         nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+                                                   nir_op_fneg);
+         neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+         nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+                           mul_src[0]->num_components, NULL);
+         neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+         nir_instr_insert_before(&add->instr, &neg->instr);
+         mul_src[0] = &neg->dest.dest.ssa;
+      }
+
+      nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+      ffma->dest.saturate = add->dest.saturate;
+      ffma->dest.write_mask = add->dest.write_mask;
+
+      for (unsigned i = 0; i < 2; i++) {
+         ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+         for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+            ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+      }
+      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
+                       state->mem_ctx);
+
+      assert(add->dest.dest.is_ssa);
+
+      nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+                        add->dest.dest.ssa.num_components,
+                        add->dest.dest.ssa.name);
+      nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+                               nir_src_for_ssa(&ffma->dest.dest.ssa),
+                               state->mem_ctx);
+
+      nir_instr_insert_before(&add->instr, &ffma->instr);
+      assert(add->dest.dest.ssa.uses->entries == 0);
+      nir_instr_remove(&add->instr);
+
+      state->progress = true;
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+   struct peephole_ffma_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.progress = false;
+
+   nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_peephole_ffma(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_peephole_ffma_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
index b89451b09..f400cfd66 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block)
       case nir_instr_type_alu: {
          /* It must be a move operation */
          nir_alu_instr *mov = nir_instr_as_alu(instr);
-         if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+         if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
+             mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
+             mov->op != nir_op_fabs && mov->op != nir_op_iabs)
             return false;
 
          /* Can't handle saturate */
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
index fa11a312e..fb8c9344c 100644
--- a/mesalib/src/glsl/nir/nir_print.c
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp)
 }
 
 static void
-print_alu_src(nir_alu_src *src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
 {
-   if (src->negate)
+   if (instr->src[src].negate)
       fprintf(fp, "-");
-   if (src->abs)
+   if (instr->src[src].abs)
       fprintf(fp, "abs(");
 
-   print_src(&src->src, fp);
+   print_src(&instr->src[src].src, fp);
 
-   if (src->swizzle[0] != 0 ||
-       src->swizzle[1] != 1 ||
-       src->swizzle[2] != 2 ||
-       src->swizzle[3] != 3) {
+   bool print_swizzle = false;
+   for (unsigned i = 0; i < 4; i++) {
+      if (!nir_alu_instr_channel_used(instr, src, i))
+         continue;
+
+      if (instr->src[src].swizzle[i] != i) {
+         print_swizzle = true;
+         break;
+      }
+   }
+
+   if (print_swizzle) {
       fprintf(fp, ".");
-      for (unsigned i = 0; i < 4; i++)
-         fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+      for (unsigned i = 0; i < 4; i++) {
+         if (!nir_alu_instr_channel_used(instr, src, i))
+            continue;
+
+         fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+      }
    }
 
-   if (src->abs)
+   if (instr->src[src].abs)
       fprintf(fp, ")");
 }
 
@@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_alu_src(&instr->src[i], fp);
+      print_alu_src(instr, i, fp);
    }
 }
 
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
index e7f8aeacb..4417e2a48 100644
--- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live)
 }
 
 static void
-remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct set *live)
 {
-   foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+   foreach_list_typed_safe(nir_variable, var, node, var_list) {
       struct set_entry *entry = _mesa_set_search(live, var);
-      if (entry == NULL)
-         exec_node_remove(&var->node);
-   }
-}
-
-static void
-remove_dead_global_vars(nir_shader *shader, struct set *live)
-{
-   foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
-      struct set_entry *entry = _mesa_set_search(live, var);
-      if (entry == NULL)
+      if (entry == NULL) {
          exec_node_remove(&var->node);
+         ralloc_free(var);
+      }
    }
 }
 
@@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader)
 
    add_var_use_shader(shader, live);
 
-   remove_dead_global_vars(shader, live);
+   remove_dead_vars(&shader->globals, live);
 
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         remove_dead_local_vars(overload->impl, live);
+         remove_dead_vars(&overload->impl->locals, live);
    }
 
    _mesa_set_destroy(live, NULL);
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
index 73a802be7..5ba016085 100644
--- a/mesalib/src/glsl/nir/nir_search.c
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
    if (matched)
       return true;
 
-   if (nir_op_infos[instr->op].num_inputs == 2 &&
-       (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+   if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+      assert(nir_op_infos[instr->op].num_inputs == 2);
       if (!match_value(expr->srcs[0], instr, 1, num_components,
                        swizzle, state))
          return false;
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
index 4d663b51b..fc72c078c 100644
--- a/mesalib/src/glsl/nir/nir_split_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
           * belongs to the copy instruction and b) the deref chains may
           * have some of the same links due to the way we constructed them
           */
-         nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
-         nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+         nir_deref *src = nir_copy_deref(new_copy, src_head);
+         nir_deref *dest = nir_copy_deref(new_copy, dest_head);
 
          new_copy->variables[0] = nir_deref_as_var(dest);
          new_copy->variables[1] = nir_deref_as_var(src);
diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c
new file mode 100644
index 000000000..d3549756a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_sweep.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on).  However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+   foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+   if (!src->is_ssa && src->reg.indirect)
+      ralloc_steal(nir, src->reg.indirect);
+
+   return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+   if (!dest->is_ssa && dest->reg.indirect)
+      ralloc_steal(nir, dest->reg.indirect);
+
+   return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+   ralloc_steal(nir, block);
+
+   nir_foreach_instr(block, instr) {
+      ralloc_steal(nir, instr);
+
+      nir_foreach_src(instr, sweep_src_indirect, nir);
+      nir_foreach_dest(instr, sweep_dest_indirect, nir);
+   }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+   ralloc_steal(nir, iff);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+      sweep_cf_node(nir, cf_node);
+   }
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+      sweep_cf_node(nir, cf_node);
+   }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+   ralloc_steal(nir, loop);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+      sweep_cf_node(nir, cf_node);
+   }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+   switch (cf_node->type) {
+   case nir_cf_node_block:
+      sweep_block(nir, nir_cf_node_as_block(cf_node));
+      break;
+   case nir_cf_node_if:
+      sweep_if(nir, nir_cf_node_as_if(cf_node));
+      break;
+   case nir_cf_node_loop:
+      sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+      break;
+   default:
+      unreachable("Invalid CF node type");
+   }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+   ralloc_steal(nir, impl);
+
+   ralloc_steal(nir, impl->params);
+   ralloc_steal(nir, impl->return_var);
+   steal_list(nir, nir_variable, &impl->locals);
+   steal_list(nir, nir_register, &impl->registers);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+      sweep_cf_node(nir, cf_node);
+   }
+
+   sweep_block(nir, impl->end_block);
+
+   /* Wipe out all the metadata, if any. */
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+   ralloc_steal(nir, f);
+
+   foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
+      ralloc_steal(nir, overload);
+      ralloc_steal(nir, overload->params);
+      if (overload->impl)
+         sweep_impl(nir, overload->impl);
+   }
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+   void *rubbish = ralloc_context(NULL);
+
+   /* First, move ownership of all the memory to a temporary context; assume dead. */
+   ralloc_adopt(rubbish, nir);
+
+   /* Variables and registers are not dead.  Steal them back. */
+   steal_list(nir, nir_variable, &nir->uniforms);
+   steal_list(nir, nir_variable, &nir->inputs);
+   steal_list(nir, nir_variable, &nir->outputs);
+   steal_list(nir, nir_variable, &nir->globals);
+   steal_list(nir, nir_variable, &nir->system_values);
+   steal_list(nir, nir_register, &nir->registers);
+
+   /* Recurse into functions, stealing their contents back. */
+   foreach_list_typed(nir_function, func, node, &nir->functions) {
+      sweep_function(nir, func);
+   }
+
+   /* Free everything we didn't steal back. */
+   ralloc_free(rubbish);
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
index 47cf45393..53ff54766 100644
--- a/mesalib/src/glsl/nir/nir_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
    set_foreach(block->predecessors, entry) {
       nir_block *pred = (nir_block *) entry->key;
 
-      nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+      nir_phi_src *src = ralloc(instr, nir_phi_src);
       src->pred = pred;
       src->src.is_ssa = false;
       src->src.reg.base_offset = 0;
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
index a13c3e12a..f0d0b46d2 100644
--- a/mesalib/src/glsl/nir/nir_types.cpp
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_void_type(void)
 }
 
 const glsl_type *
+glsl_float_type(void)
+{
+   return glsl_type::float_type;
+}
+
+const glsl_type *
 glsl_vec4_type(void)
 {
    return glsl_type::vec4_type;
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
index 494051a67..276d4ad62 100644
--- a/mesalib/src/glsl/nir/nir_types.h
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
 bool glsl_type_is_matrix(const struct glsl_type *type);
 
 const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_float_type(void);
 const struct glsl_type *glsl_vec4_type(void);
 const struct glsl_type *glsl_array_type(const struct glsl_type *base,
                                         unsigned elements);
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
index f247ae069..a7aa79837 100644
--- a/mesalib/src/glsl/nir/nir_validate.c
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
 static void
 validate_deref_chain(nir_deref *deref, validate_state *state)
 {
+   assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
    nir_deref *parent = NULL;
    while (deref != NULL) {
       switch (deref->deref_type) {
@@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state)
 }
 
 static void
-validate_deref_var(nir_deref_var *deref, validate_state *state)
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
 {
    assert(deref != NULL);
+   assert(ralloc_parent(deref) == parent_mem_ctx);
    assert(deref->deref.type == deref->var->type);
 
    validate_var_use(deref->var, state);
@@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
 
    unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
    for (unsigned i = 0; i < num_vars; i++) {
-      validate_deref_var(instr->variables[i], state);
+      validate_deref_var(instr, instr->variables[i], state);
    }
 
    switch (instr->intrinsic) {
@@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
    }
 
    if (instr->sampler != NULL)
-      validate_deref_var(instr->sampler, state);
+      validate_deref_var(instr, instr->sampler, state);
 }
 
 static void
@@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
 
    for (unsigned i = 0; i < instr->num_params; i++) {
       assert(instr->callee->params[i].type == instr->params[i]->deref.type);
-      validate_deref_var(instr->params[i], state);
+      validate_deref_var(instr, instr->params[i], state);
    }
 
-   validate_deref_var(instr->return_deref, state);
+   validate_deref_var(instr, instr->return_deref, state);
 }
 
 static void
@@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state)
       break;
 
    default:
-      assert(!"Invalid ALU instruction type");
-      break;
+      unreachable("Invalid CF node type");
    }
 }
 
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index 69c03ea8b..3d2f2ca0b 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -290,6 +290,20 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
    ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
    unsigned int i;
 
+   if (ir->operation == ir_binop_mul &&
+       ir->operands[0]->type->is_matrix() &&
+       ir->operands[1]->type->is_vector()) {
+      ir_expression *matrix_mul = ir->operands[0]->as_expression();
+
+      if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
+         matrix_mul->operands[0]->type->is_matrix() &&
+         matrix_mul->operands[1]->type->is_matrix()) {
+
+         return mul(matrix_mul->operands[0],
+                    mul(matrix_mul->operands[1], ir->operands[1]));
+      }
+   }
+
    assert(ir->get_num_operands() <= 4);
    for (i = 0; i < ir->get_num_operands(); i++) {
       if (ir->operands[i]->type->is_matrix())
@@ -421,6 +435,18 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       break;
    }
 
+   case ir_unop_saturate:
+      if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
+         ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
+         ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
+
+         if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
+             b2f_1 && b2f_1->operation == ir_unop_b2f) {
+            return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
+         }
+      }
+      break;
+
    case ir_binop_add:
       if (is_vec_zero(op_const[0]))
 	 return ir->operands[1];
@@ -518,6 +544,10 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (is_vec_negative_one(op_const[1]))
          return neg(ir->operands[0]);
 
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
+          op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
+         return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
+      }
 
       /* Reassociate multiplication of constants so that we can do
        * constant folding.
@@ -544,6 +574,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
             continue;
 
          ir_expression *add_expr = floor_expr->operands[0]->as_expression();
+         if (!add_expr)
+            continue;
 
          for (int j = 0; j < 2; j++) {
             ir_expression *abs_expr = add_expr->operands[j]->as_expression();
diff --git a/mesalib/src/glsl/opt_cse.cpp b/mesalib/src/glsl/opt_cse.cpp
index b0b67f496..4b8e9a07b 100644
--- a/mesalib/src/glsl/opt_cse.cpp
+++ b/mesalib/src/glsl/opt_cse.cpp
@@ -63,6 +63,17 @@ public:
       var = NULL;
    }
 
+   void init(ir_instruction *base_ir, ir_rvalue **val)
+   {
+      this->val = val;
+      this->base_ir = base_ir;
+      this->var = NULL;
+
+      assert(val);
+      assert(*val);
+      assert(base_ir);
+   }
+
    /**
     * The pointer to the expression that we might be able to reuse
     *
@@ -116,6 +127,18 @@ private:
    ir_rvalue *try_cse(ir_rvalue *rvalue);
    void add_to_ae(ir_rvalue **rvalue);
 
+   /**
+    * Move all nodes from the ae list to the free list
+    */
+   void empty_ae_list();
+
+   /**
+    * Get and initialize a new ae_entry
+    *
+    * This will either come from the free list or be freshly allocated.
+    */
+   ae_entry *get_ae_entry(ir_rvalue **rvalue);
+
    /** List of ae_entry: The available expressions to reuse */
    exec_list *ae;
 
@@ -126,6 +149,11 @@ private:
     * right.
     */
    exec_list *validate_instructions;
+
+   /**
+    * List of available-for-use ae_entry objects.
+    */
+   exec_list free_ae_entries;
 };
 
 /**
@@ -322,6 +350,25 @@ cse_visitor::try_cse(ir_rvalue *rvalue)
    return NULL;
 }
 
+void
+cse_visitor::empty_ae_list()
+{
+   free_ae_entries.append_list(ae);
+}
+
+ae_entry *
+cse_visitor::get_ae_entry(ir_rvalue **rvalue)
+{
+   ae_entry *entry = (ae_entry *) free_ae_entries.pop_head();
+   if (entry) {
+      entry->init(base_ir, rvalue);
+   } else {
+      entry = new(mem_ctx) ae_entry(base_ir, rvalue);
+   }
+
+   return entry;
+}
+
 /** Add the rvalue to the list of available expressions for CSE. */
 void
 cse_visitor::add_to_ae(ir_rvalue **rvalue)
@@ -332,7 +379,7 @@ cse_visitor::add_to_ae(ir_rvalue **rvalue)
       printf("\n");
    }
 
-   ae->push_tail(new(mem_ctx) ae_entry(base_ir, rvalue));
+   ae->push_tail(get_ae_entry(rvalue));
 
    if (debug)
       dump_ae(ae);
@@ -370,33 +417,33 @@ cse_visitor::visit_enter(ir_if *ir)
 {
    handle_rvalue(&ir->condition);
 
-   ae->make_empty();
+   empty_ae_list();
    visit_list_elements(this, &ir->then_instructions);
 
-   ae->make_empty();
+   empty_ae_list();
    visit_list_elements(this, &ir->else_instructions);
 
-   ae->make_empty();
+   empty_ae_list();
    return visit_continue_with_parent;
 }
 
 ir_visitor_status
 cse_visitor::visit_enter(ir_function_signature *ir)
 {
-   ae->make_empty();
+   empty_ae_list();
    visit_list_elements(this, &ir->body);
 
-   ae->make_empty();
+   empty_ae_list();
    return visit_continue_with_parent;
 }
 
 ir_visitor_status
 cse_visitor::visit_enter(ir_loop *ir)
 {
-   ae->make_empty();
+   empty_ae_list();
    visit_list_elements(this, &ir->body_instructions);
 
-   ae->make_empty();
+   empty_ae_list();
    return visit_continue_with_parent;
 }
 
diff --git a/mesalib/src/glsl/s_expression.cpp b/mesalib/src/glsl/s_expression.cpp
index 7eaa491e2..f82e155a6 100644
--- a/mesalib/src/glsl/s_expression.cpp
+++ b/mesalib/src/glsl/s_expression.cpp
@@ -23,8 +23,8 @@
  */
 
 #include <assert.h>
-#include <limits>
 #include <stdio.h>
+#include <math.h>
 #include "s_expression.h"
 
 s_symbol::s_symbol(const char *str, size_t n)
@@ -70,7 +70,7 @@ read_atom(void *ctx, const char *&src, char *&symbol_buffer)
    // requires strtof to parse '+INF' as +Infinity, but we still support some
    // non-C99-compliant compilers (e.g. MSVC).
    if (n == 4 && strncmp(src, "+INF", 4) == 0) {
-      expr = new(ctx) s_float(std::numeric_limits<float>::infinity());
+      expr = new(ctx) s_float(INFINITY);
    } else {
       // Check if the atom is a number.
       char *float_end = NULL;
diff --git a/mesalib/src/glsl/shader_enums.h b/mesalib/src/glsl/shader_enums.h
new file mode 100644
index 000000000..79e0f6b5f
--- /dev/null
+++ b/mesalib/src/glsl/shader_enums.h
@@ -0,0 +1,187 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SHADER_ENUMS_H
+#define SHADER_ENUMS_H
+
+/**
+ * Shader stages. Note that these will become 5 with tessellation.
+ *
+ * The order must match how shaders are ordered in the pipeline.
+ * The GLSL linker assumes that if i<j, then the j-th shader is
+ * executed later than the i-th shader.
+ */
+typedef enum
+{
+   MESA_SHADER_VERTEX = 0,
+   MESA_SHADER_GEOMETRY = 1,
+   MESA_SHADER_FRAGMENT = 2,
+   MESA_SHADER_COMPUTE = 3,
+} gl_shader_stage;
+
+#define MESA_SHADER_STAGES (MESA_SHADER_COMPUTE + 1)
+
+/**
+ * Bitflags for system values.
+ */
+#define SYSTEM_BIT_SAMPLE_ID ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_ID)
+#define SYSTEM_BIT_SAMPLE_POS ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_POS)
+#define SYSTEM_BIT_SAMPLE_MASK_IN ((uint64_t)1 << SYSTEM_VALUE_SAMPLE_MASK_IN)
+/**
+ * If the gl_register_file is PROGRAM_SYSTEM_VALUE, the register index will be
+ * one of these values.  If a NIR variable's mode is nir_var_system_value, it
+ * will be one of these values.
+ */
+typedef enum
+{
+   /**
+    * \name Vertex shader system values
+    */
+   /*@{*/
+   /**
+    * OpenGL-style vertex ID.
+    *
+    * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the
+    * OpenGL 3.3 core profile spec says:
+    *
+    *     "gl_VertexID holds the integer index i implicitly passed by
+    *     DrawArrays or one of the other drawing commands defined in section
+    *     2.8.3."
+    *
+    * Section 2.8.3 (Drawing Commands) of the same spec says:
+    *
+    *     "The commands....are equivalent to the commands with the same base
+    *     name (without the BaseVertex suffix), except that the ith element
+    *     transferred by the corresponding draw call will be taken from
+    *     element indices[i] + basevertex of each enabled array."
+    *
+    * Additionally, the overview in the GL_ARB_shader_draw_parameters spec
+    * says:
+    *
+    *     "In unextended GL, vertex shaders have inputs named gl_VertexID and
+    *     gl_InstanceID, which contain, respectively the index of the vertex
+    *     and instance. The value of gl_VertexID is the implicitly passed
+    *     index of the vertex being processed, which includes the value of
+    *     baseVertex, for those commands that accept it."
+    *
+    * gl_VertexID gets basevertex added in.  This differs from DirectX where
+    * SV_VertexID does \b not get basevertex added in.
+    *
+    * \note
+    * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be
+    * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus
+    * \c SYSTEM_VALUE_BASE_VERTEX.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX
+    */
+   SYSTEM_VALUE_VERTEX_ID,
+
+   /**
+    * Instanced ID as supplied to gl_InstanceID
+    *
+    * Values assigned to gl_InstanceID always begin with zero, regardless of
+    * the value of baseinstance.
+    *
+    * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec
+    * says:
+    *
+    *     "gl_InstanceID holds the integer instance number of the current
+    *     primitive in an instanced draw call (see section 10.5)."
+    *
+    * Through a big chain of pseudocode, section 10.5 describes that
+    * baseinstance is not counted by gl_InstanceID.  In that section, notice
+    *
+    *     "If an enabled vertex attribute array is instanced (it has a
+    *     non-zero divisor as specified by VertexAttribDivisor), the element
+    *     index that is transferred to the GL, for all vertices, is given by
+    *
+    *         floor(instance/divisor) + baseinstance
+    *
+    *     If an array corresponding to an attribute required by a vertex
+    *     shader is not enabled, then the corresponding element is taken from
+    *     the current attribute state (see section 10.2)."
+    *
+    * Note that baseinstance is \b not included in the value of instance.
+    */
+   SYSTEM_VALUE_INSTANCE_ID,
+
+   /**
+    * DirectX-style vertex ID.
+    *
+    * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include
+    * the value of basevertex.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX
+    */
+   SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+
+   /**
+    * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar
+    * functions.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
+    */
+   SYSTEM_VALUE_BASE_VERTEX,
+   /*@}*/
+
+   /**
+    * \name Geometry shader system values
+    */
+   /*@{*/
+   SYSTEM_VALUE_INVOCATION_ID,
+   /*@}*/
+
+   /**
+    * \name Fragment shader system values
+    */
+   /*@{*/
+   SYSTEM_VALUE_FRONT_FACE,     /**< (not done yet) */
+   SYSTEM_VALUE_SAMPLE_ID,
+   SYSTEM_VALUE_SAMPLE_POS,
+   SYSTEM_VALUE_SAMPLE_MASK_IN,
+   /*@}*/
+
+   SYSTEM_VALUE_MAX             /**< Number of values */
+} gl_system_value;
+
+
+/**
+ * The possible interpolation qualifiers that can be applied to a fragment
+ * shader input in GLSL.
+ *
+ * Note: INTERP_QUALIFIER_NONE must be 0 so that memsetting the
+ * gl_fragment_program data structure to 0 causes the default behavior.
+ */
+enum glsl_interp_qualifier
+{
+   INTERP_QUALIFIER_NONE = 0,
+   INTERP_QUALIFIER_SMOOTH,
+   INTERP_QUALIFIER_FLAT,
+   INTERP_QUALIFIER_NOPERSPECTIVE,
+   INTERP_QUALIFIER_COUNT /**< Number of interpolation qualifiers */
+};
+
+
+#endif /* SHADER_ENUMS_H */