27 files changed, 1264 insertions, 200 deletions
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
index 357944da6..f6b8331d4 100644
--- a/mesalib/src/glsl/nir/glsl_to_nir.cpp
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -88,6 +88,8 @@ private:
    exec_list *cf_node_list;
    nir_instr *result; /* result of the expression tree last visited */
 
+   nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
+
    /* the head of the dereference chain we're creating */
    nir_deref_var *deref_head;
    /* the tail of the dereference chain we're creating */
@@ -156,6 +158,14 @@ nir_visitor::~nir_visitor()
    _mesa_hash_table_destroy(this->overload_table, NULL);
 }
 
+nir_deref_var *
+nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir)
+{
+   ir->accept(this);
+   ralloc_steal(mem_ctx, this->deref_head);
+   return this->deref_head;
+}
+
 static nir_constant *
 constant_copy(ir_constant *ir, void *mem_ctx)
 {
@@ -582,13 +592,11 @@ void
 nir_visitor::visit(ir_return *ir)
 {
    if (ir->value != NULL) {
-      ir->value->accept(this);
       nir_intrinsic_instr *copy =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
 
-      copy->variables[0] = nir_deref_var_create(this->shader,
-                                                this->impl->return_var);
-      copy->variables[1] = this->deref_head;
+      copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var);
+      copy->variables[1] = evaluate_deref(&copy->instr, ir->value);
    }
 
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
@@ -613,8 +621,7 @@ nir_visitor::visit(ir_call *ir)
       nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
       ir_dereference *param =
          (ir_dereference *) ir->actual_parameters.get_head();
-      param->accept(this);
-      instr->variables[0] = this->deref_head;
+      instr->variables[0] = evaluate_deref(&instr->instr, param);
       nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
 
       nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -623,8 +630,7 @@ nir_visitor::visit(ir_call *ir)
          nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
       store_instr->num_components = 1;
 
-      ir->return_deref->accept(this);
-      store_instr->variables[0] = this->deref_head;
+      store_instr->variables[0] = evaluate_deref(&store_instr->instr, ir->return_deref);
       store_instr->src[0].is_ssa = true;
       store_instr->src[0].ssa = &instr->dest.ssa;
 
@@ -642,13 +648,11 @@ nir_visitor::visit(ir_call *ir)
 
    unsigned i = 0;
    foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
-      param->accept(this);
-      instr->params[i] = this->deref_head;
+      instr->params[i] = evaluate_deref(&instr->instr, param);
       i++;
    }
 
-   ir->return_deref->accept(this);
-   instr->return_deref = this->deref_head;
+   instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
    nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
 }
 
@@ -663,12 +667,8 @@ nir_visitor::visit(ir_assignment *ir)
       nir_intrinsic_instr *copy =
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
 
-      ir->lhs->accept(this);
-      copy->variables[0] = this->deref_head;
-
-      ir->rhs->accept(this);
-      copy->variables[1] = this->deref_head;
-
+      copy->variables[0] = evaluate_deref(&copy->instr, ir->lhs);
+      copy->variables[1] = evaluate_deref(&copy->instr, ir->rhs);
 
       if (ir->condition) {
          nir_if *if_stmt = nir_if_create(this->shader);
@@ -700,6 +700,7 @@ nir_visitor::visit(ir_assignment *ir)
       load->num_components = ir->lhs->type->vector_elements;
       nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
       load->variables[0] = lhs_deref;
+      ralloc_steal(load, load->variables[0]);
       nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
 
       nir_op vec_op;
@@ -741,7 +742,7 @@ nir_visitor::visit(ir_assignment *ir)
    nir_intrinsic_instr *store =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
    store->num_components = ir->lhs->type->vector_elements;
-   nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+   nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
    store->src[0] = src;
 
@@ -816,6 +817,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
          nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
       load_instr->num_components = ir->type->vector_elements;
       load_instr->variables[0] = this->deref_head;
+      ralloc_steal(load_instr, load_instr->variables[0]);
       add_instr(&load_instr->instr, ir->type->vector_elements);
    }
 
@@ -959,6 +961,7 @@ nir_visitor::visit(ir_expression *ir)
       nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
       intrin->num_components = deref->type->vector_elements;
       intrin->variables[0] = this->deref_head;
+      ralloc_steal(intrin, intrin->variables[0]);
 
       if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
           intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
@@ -1087,12 +1090,6 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
    case ir_unop_sin:   emit(nir_op_fsin,   dest_size, srcs); break;
    case ir_unop_cos:   emit(nir_op_fcos,   dest_size, srcs); break;
-   case ir_unop_sin_reduced:
-      emit(nir_op_fsin_reduced, dest_size, srcs);
-      break;
-   case ir_unop_cos_reduced:
-      emit(nir_op_fcos_reduced, dest_size, srcs);
-      break;
    case ir_unop_dFdx:        emit(nir_op_fddx,        dest_size, srcs); break;
    case ir_unop_dFdy:        emit(nir_op_fddy,        dest_size, srcs); break;
    case ir_unop_dFdx_fine:   emit(nir_op_fddx_fine,   dest_size, srcs); break;
@@ -1210,6 +1207,9 @@ nir_visitor::visit(ir_expression *ir)
    case ir_binop_bit_and:
    case ir_binop_bit_or:
    case ir_binop_bit_xor:
+   case ir_binop_logic_and:
+   case ir_binop_logic_or:
+   case ir_binop_logic_xor:
    case ir_binop_lshift:
    case ir_binop_rshift:
       switch (ir->operation) {
@@ -1270,6 +1270,24 @@ nir_visitor::visit(ir_expression *ir)
       case ir_binop_bit_xor:
          op = nir_op_ixor;
          break;
+      case ir_binop_logic_and:
+         if (supports_ints)
+            op = nir_op_iand;
+         else
+            op = nir_op_fand;
+         break;
+      case ir_binop_logic_or:
+         if (supports_ints)
+            op = nir_op_ior;
+         else
+            op = nir_op_for;
+         break;
+      case ir_binop_logic_xor:
+         if (supports_ints)
+            op = nir_op_ixor;
+         else
+            op = nir_op_fxor;
+         break;
       case ir_binop_lshift:
          op = nir_op_ishl;
          break;
@@ -1444,24 +1462,6 @@ nir_visitor::visit(ir_expression *ir)
          }
       }
       break;
-   case ir_binop_logic_and:
-      if (supports_ints)
-         emit(nir_op_iand, dest_size, srcs);
-      else
-         emit(nir_op_fand, dest_size, srcs);
-      break;
-   case ir_binop_logic_or:
-      if (supports_ints)
-         emit(nir_op_ior, dest_size, srcs);
-      else
-         emit(nir_op_for, dest_size, srcs);
-      break;
-   case ir_binop_logic_xor:
-      if (supports_ints)
-         emit(nir_op_ixor, dest_size, srcs);
-      else
-         emit(nir_op_fxor, dest_size, srcs);
-      break;
    case ir_binop_dot:
       switch (ir->operands[0]->type->vector_elements) {
          case 2: emit(nir_op_fdot2, dest_size, srcs); break;
@@ -1633,8 +1633,7 @@ nir_visitor::visit(ir_texture *ir)
       unreachable("not reached");
    }
 
-   ir->sampler->accept(this);
-   instr->sampler = this->deref_head;
+   instr->sampler = evaluate_deref(&instr->instr, ir->sampler);
 
    unsigned src_number = 0;
 
@@ -1759,7 +1758,7 @@ nir_visitor::visit(ir_dereference_record *ir)
    int field_index = this->deref_tail->type->field_index(ir->field);
    assert(field_index >= 0);
 
-   nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+   nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index);
    deref->deref.type = ir->type;
    this->deref_tail->child = &deref->deref;
    this->deref_tail = &deref->deref;
@@ -1783,5 +1782,6 @@ nir_visitor::visit(ir_dereference_array *ir)
    ir->array->accept(this);
 
    this->deref_tail->child = &deref->deref;
+   ralloc_steal(this->deref_tail, deref);
    this->deref_tail = &deref->deref;
 }
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
index 6459d5108..c6e53612b 100644
--- a/mesalib/src/glsl/nir/nir.c
+++ b/mesalib/src/glsl/nir/nir.c
@@ -58,11 +58,11 @@ reg_create(void *mem_ctx, struct exec_list *list)
    nir_register *reg = ralloc(mem_ctx, nir_register);
 
    reg->parent_instr = NULL;
-   reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->uses = _mesa_set_create(reg, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->defs = _mesa_set_create(reg, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   reg->if_uses = _mesa_set_create(reg, _mesa_hash_pointer,
                                    _mesa_key_pointer_equal);
 
    reg->num_components = 0;
@@ -108,7 +108,7 @@ nir_function_create(nir_shader *shader, const char *name)
 
    exec_list_push_tail(&shader->functions, &func->node);
    exec_list_make_empty(&func->overload_list);
-   func->name = name;
+   func->name = ralloc_strdup(func, name);
    func->shader = shader;
 
    return func;
@@ -285,10 +285,10 @@ nir_block_create(void *mem_ctx)
    cf_init(&block->cf_node, nir_cf_node_block);
 
    block->successors[0] = block->successors[1] = NULL;
-   block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
    block->imm_dom = NULL;
-   block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
                                           _mesa_key_pointer_equal);
 
    exec_list_make_empty(&block->instr_list);
@@ -381,11 +381,11 @@ alu_src_init(nir_alu_src *src)
 }
 
 nir_alu_instr *
-nir_alu_instr_create(void *mem_ctx, nir_op op)
+nir_alu_instr_create(nir_shader *shader, nir_op op)
 {
    unsigned num_srcs = nir_op_infos[op].num_inputs;
    nir_alu_instr *instr =
-      ralloc_size(mem_ctx,
+      ralloc_size(shader,
                   sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
 
    instr_init(&instr->instr, nir_instr_type_alu);
@@ -398,18 +398,18 @@ nir_alu_instr_create(void *mem_ctx, nir_op op)
 }
 
 nir_jump_instr *
-nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
 {
-   nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+   nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
    instr_init(&instr->instr, nir_instr_type_jump);
    instr->type = type;
    return instr;
 }
 
 nir_load_const_instr *
-nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
 {
-   nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+   nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
    instr_init(&instr->instr, nir_instr_type_load_const);
 
    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -418,11 +418,11 @@ nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
 }
 
 nir_intrinsic_instr *
-nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
 {
    unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
    nir_intrinsic_instr *instr =
-      ralloc_size(mem_ctx,
+      ralloc_size(shader,
                   sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
 
    instr_init(&instr->instr, nir_instr_type_intrinsic);
@@ -438,29 +438,29 @@ nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
 }
 
 nir_call_instr *
-nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+nir_call_instr_create(nir_shader *shader, nir_function_overload *callee)
 {
-   nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+   nir_call_instr *instr = ralloc(shader, nir_call_instr);
    instr_init(&instr->instr, nir_instr_type_call);
 
    instr->callee = callee;
    instr->num_params = callee->num_params;
-   instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+   instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
    instr->return_deref = NULL;
 
    return instr;
 }
 
 nir_tex_instr *
-nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
 {
-   nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+   nir_tex_instr *instr = ralloc(shader, nir_tex_instr);
    instr_init(&instr->instr, nir_instr_type_tex);
 
    dest_init(&instr->dest);
 
    instr->num_srcs = num_srcs;
-   instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+   instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
    for (unsigned i = 0; i < num_srcs; i++)
       src_init(&instr->src[i].src);
 
@@ -472,9 +472,9 @@ nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
 }
 
 nir_phi_instr *
-nir_phi_instr_create(void *mem_ctx)
+nir_phi_instr_create(nir_shader *shader)
 {
-   nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+   nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
    instr_init(&instr->instr, nir_instr_type_phi);
 
    dest_init(&instr->dest);
@@ -483,9 +483,9 @@ nir_phi_instr_create(void *mem_ctx)
 }
 
 nir_parallel_copy_instr *
-nir_parallel_copy_instr_create(void *mem_ctx)
+nir_parallel_copy_instr_create(nir_shader *shader)
 {
-   nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+   nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
    instr_init(&instr->instr, nir_instr_type_parallel_copy);
 
    exec_list_make_empty(&instr->entries);
@@ -494,9 +494,9 @@ nir_parallel_copy_instr_create(void *mem_ctx)
 }
 
 nir_ssa_undef_instr *
-nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
 {
-   nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+   nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
    instr_init(&instr->instr, nir_instr_type_ssa_undef);
 
    nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
@@ -543,7 +543,7 @@ copy_deref_var(void *mem_ctx, nir_deref_var *deref)
    nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -558,7 +558,7 @@ copy_deref_array(void *mem_ctx, nir_deref_array *deref)
    }
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -568,7 +568,7 @@ copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
    nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
    ret->deref.type = deref->deref.type;
    if (deref->deref.child)
-      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+      ret->deref.child = nir_copy_deref(ret, deref->deref.child);
    return ret;
 }
 
@@ -1834,13 +1834,11 @@ void
 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
                  unsigned num_components, const char *name)
 {
-   void *mem_ctx = ralloc_parent(instr);
-
    def->name = name;
    def->parent_instr = instr;
-   def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   def->uses = _mesa_set_create(instr, _mesa_hash_pointer,
                                 _mesa_key_pointer_equal);
-   def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+   def->if_uses = _mesa_set_create(instr, _mesa_hash_pointer,
                                    _mesa_key_pointer_equal);
    def->num_components = num_components;
 
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
index 29fe94243..74772c798 100644
--- a/mesalib/src/glsl/nir/nir.h
+++ b/mesalib/src/glsl/nir/nir.h
@@ -34,6 +34,7 @@
 #include "util/set.h"
 #include "util/bitset.h"
 #include "nir_types.h"
+#include "glsl/shader_enums.h"
 #include <stdio.h>
 
 #include "nir_opcodes.h"
@@ -529,6 +530,16 @@ nir_src_for_reg(nir_register *reg)
    return src;
 }
 
+static inline nir_instr *
+nir_src_get_parent_instr(const nir_src *src)
+{
+   if (src->is_ssa) {
+      return src->ssa->parent_instr;
+   } else {
+      return src->reg.reg->parent_instr;
+   }
+}
+
 static inline nir_dest
 nir_dest_for_reg(nir_register *reg)
 {
@@ -1365,11 +1376,17 @@ typedef struct nir_function {
 
 typedef struct nir_shader_compiler_options {
    bool lower_ffma;
+   bool lower_flrp;
    bool lower_fpow;
    bool lower_fsat;
    bool lower_fsqrt;
    /** lowers fneg and ineg to fsub and isub. */
    bool lower_negate;
+   /** lowers fsub and isub to fadd+fneg and iadd+ineg. */
+   bool lower_sub;
+
+   /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */
+   bool lower_scmp;
 
    /**
     * Does the driver support real 32-bit integers?  (Otherwise, integers
@@ -1414,6 +1431,9 @@ typedef struct nir_shader {
     * access plus one
     */
    unsigned num_inputs, num_uniforms, num_outputs;
+
+   /** the number of uniforms that are only accessed directly */
+   unsigned num_direct_uniforms;
 } nir_shader;
 
 #define nir_foreach_overload(shader, overload)                        \
@@ -1466,26 +1486,26 @@ void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
 
 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */
-nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
 
-nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
 
-nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
                                                   unsigned num_components);
 
-nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
                                                 nir_intrinsic_op op);
 
-nir_call_instr *nir_call_instr_create(void *mem_ctx,
+nir_call_instr *nir_call_instr_create(nir_shader *shader,
                                       nir_function_overload *callee);
 
-nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
 
-nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
 
-nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
 
-nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
                                                 unsigned num_components);
 
 nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
@@ -1550,7 +1570,7 @@ void nir_print_instr(const nir_instr *instr, FILE *fp);
 #ifdef DEBUG
 void nir_validate_shader(nir_shader *shader);
 #else
-static inline void nir_validate_shader(nir_shader *shader) { }
+static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
 #endif /* DEBUG */
 
 void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -1596,14 +1616,18 @@ void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_phis_to_scalar(nir_shader *shader);
 
 void nir_lower_samplers(nir_shader *shader,
-                        struct gl_shader_program *shader_program,
-                        struct gl_program *prog);
+                        const struct gl_shader_program *shader_program,
+                        gl_shader_stage stage);
 
 void nir_lower_system_values(nir_shader *shader);
+void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
 
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
 
+void nir_normalize_cubemap_coords(nir_shader *shader);
+
 void nir_live_variables_impl(nir_function_impl *impl);
 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
 
@@ -1612,6 +1636,7 @@ void nir_convert_to_ssa(nir_shader *shader);
 void nir_convert_from_ssa(nir_shader *shader);
 
 bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_algebraic_late(nir_shader *shader);
 bool nir_opt_constant_folding(nir_shader *shader);
 
 bool nir_opt_global_to_local(nir_shader *shader);
@@ -1631,6 +1656,8 @@ bool nir_opt_peephole_ffma(nir_shader *shader);
 
 bool nir_opt_remove_phis(nir_shader *shader);
 
+void nir_sweep(nir_shader *shader);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
index afab1a008..bbf4f08ef 100644
--- a/mesalib/src/glsl/nir/nir_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -181,12 +181,23 @@ _algebraic_pass_template = mako.template.Template("""
 #include "nir.h"
 #include "nir_search.h"
 
+#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS
+
 struct transform {
    const nir_search_expression *search;
    const nir_search_value *replace;
    unsigned condition_offset;
 };
 
+struct opt_state {
+   void *mem_ctx;
+   bool progress;
+   const bool *condition_flags;
+};
+
+#endif
+
 % for (opcode, xform_list) in xform_dict.iteritems():
 % for xform in xform_list:
    ${xform.search.render()}
@@ -200,12 +211,6 @@ static const struct transform ${pass_name}_${opcode}_xforms[] = {
 };
 % endfor
 
-struct opt_state {
-   void *mem_ctx;
-   bool progress;
-   const bool *condition_flags;
-};
-
 static bool
 ${pass_name}_block(nir_block *block, void *void_state)
 {
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
index 7c4f7fd96..d1419ee21 100644
--- a/mesalib/src/glsl/nir/nir_builder.h
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -28,6 +28,9 @@ struct exec_list;
 
 typedef struct nir_builder {
    struct exec_list *cf_node_list;
+   nir_instr *before_instr;
+   nir_instr *after_instr;
+
    nir_shader *shader;
    nir_function_impl *impl;
 } nir_builder;
@@ -45,8 +48,75 @@ nir_builder_insert_after_cf_list(nir_builder *build,
                                  struct exec_list *cf_node_list)
 {
    build->cf_node_list = cf_node_list;
+   build->before_instr = NULL;
+   build->after_instr = NULL;
+}
+
+static inline void
+nir_builder_insert_before_instr(nir_builder *build, nir_instr *before_instr)
+{
+   build->cf_node_list = NULL;
+   build->before_instr = before_instr;
+   build->after_instr = NULL;
 }
 
+static inline void
+nir_builder_insert_after_instr(nir_builder *build, nir_instr *after_instr)
+{
+   build->cf_node_list = NULL;
+   build->before_instr = NULL;
+   build->after_instr = after_instr;
+}
+
+static inline void
+nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
+{
+   if (build->cf_node_list) {
+      nir_instr_insert_after_cf_list(build->cf_node_list, instr);
+   } else if (build->before_instr) {
+      nir_instr_insert_before(build->before_instr, instr);
+   } else {
+      assert(build->after_instr);
+      nir_instr_insert_after(build->after_instr, instr);
+      build->after_instr = instr;
+   }
+}
+
+static inline nir_ssa_def *
+nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value)
+{
+   nir_load_const_instr *load_const =
+      nir_load_const_instr_create(build->shader, num_components);
+   if (!load_const)
+      return NULL;
+
+   load_const->value = value;
+
+   nir_builder_instr_insert(build, &load_const->instr);
+
+   return &load_const->def;
+}
+
+static inline nir_ssa_def *
+nir_imm_float(nir_builder *build, float x)
+{
+   nir_const_value v = { { .f = {x, 0, 0, 0} } };
+   return nir_build_imm(build, 1, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
+{
+   nir_const_value v = { { .f = {x, y, z, w} } };
+   return nir_build_imm(build, 4, v);
+}
+
+static inline nir_ssa_def *
+nir_imm_int(nir_builder *build, int x)
+{
+   nir_const_value v = { { .i = {x, 0, 0, 0} } };
+   return nir_build_imm(build, 1, v);
+}
 
 static inline nir_ssa_def *
 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
@@ -90,7 +160,7 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
    nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
    instr->dest.write_mask = (1 << num_components) - 1;
 
-   nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(build, &instr->instr);
 
    return &instr->dest.dest.ssa;
 }
@@ -127,4 +197,67 @@ nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
 
 #include "nir_builder_opcodes.h"
 
+/**
+ * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def.
+ */
+static inline nir_ssa_def *
+nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   mov->dest.write_mask = (1 << num_components) - 1;
+   mov->src[0] = src;
+   nir_builder_instr_insert(build, &mov->instr);
+
+   return &mov->dest.dest.ssa;
+}
+
+static inline nir_ssa_def *
+nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
+{
+   nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+   mov->dest.write_mask = (1 << num_components) - 1;
+   mov->src[0] = src;
+   nir_builder_instr_insert(build, &mov->instr);
+
+   return &mov->dest.dest.ssa;
+}
+
+/**
+ * Construct an fmov or imov that reswizzles the source's components.
+ */
+static inline nir_ssa_def *
+nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
+            unsigned num_components, bool use_fmov)
+{
+   nir_alu_src alu_src;
+   memset(&alu_src, 0, sizeof(alu_src));
+   alu_src.src = nir_src_for_ssa(src);
+   for (int i = 0; i < 4; i++)
+      alu_src.swizzle[i] = swiz[i];
+
+   return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
+                     nir_imov_alu(build, alu_src, num_components);
+}
+
+/**
+ * Turns a nir_src into a nir_ssa_def * so it can be passed to
+ * nir_build_alu()-based builder calls.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
+{
+   if (src.is_ssa && src.ssa->num_components == num_components)
+      return src.ssa;
+
+   nir_alu_src alu;
+   memset(&alu, 0, sizeof(alu));
+   alu.src = src;
+   for (int j = 0; j < 4; j++)
+      alu.swizzle[j] = j;
+
+   return nir_imov_alu(build, alu, num_components);
+}
+
 #endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
index c3090fb06..184698abd 100644
--- a/mesalib/src/glsl/nir/nir_from_ssa.c
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -509,12 +509,13 @@ get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
       reg->num_components = def->num_components;
       reg->num_array_elems = 0;
 
-      /* This register comes from an SSA definition that was not part of a
-       * phi-web.  Therefore, we know it has a single unique definition
-       * that dominates all of its uses.  Therefore, we can copy the
+      /* This register comes from an SSA definition that is defined and not
+       * part of a phi-web.  Therefore, we know it has a single unique
+       * definition that dominates all of its uses; we can copy the
        * parent_instr from the SSA def safely.
        */
-      reg->parent_instr = def->parent_instr;
+      if (def->parent_instr->type != nir_instr_type_ssa_undef)
+         reg->parent_instr = def->parent_instr;
 
       _mesa_hash_table_insert(state->ssa_table, def, reg);
       return reg;
diff --git a/mesalib/src/glsl/nir/nir_lower_idiv.c b/mesalib/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000000000..7b6803207
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+ * Or perhaps we assume if you can do compute shaders you can also
+ * branch out to a pre-optimized shader library routine..
+ */
+
+static void
+convert_instr(nir_builder *bld, nir_alu_instr *alu)
+{
+   nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
+   nir_op op = alu->op;
+   bool is_signed;
+
+   if ((op != nir_op_idiv) &&
+       (op != nir_op_udiv) &&
+       (op != nir_op_umod))
+      return;
+
+   is_signed = (op == nir_op_idiv);
+
+   nir_builder_insert_before_instr(bld, &alu->instr);
+
+   numer = nir_ssa_for_src(bld, alu->src[0].src,
+                           nir_ssa_alu_instr_src_components(alu, 0));
+   denom = nir_ssa_for_src(bld, alu->src[1].src,
+                           nir_ssa_alu_instr_src_components(alu, 1));
+
+   if (is_signed) {
+      af = nir_i2f(bld, numer);
+      bf = nir_i2f(bld, denom);
+      af = nir_fabs(bld, af);
+      bf = nir_fabs(bld, bf);
+      a  = nir_iabs(bld, numer);
+      b  = nir_iabs(bld, denom);
+   } else {
+      af = nir_u2f(bld, numer);
+      bf = nir_u2f(bld, denom);
+      a  = numer;
+      b  = denom;
+   }
+
+   /* get first result: */
+   bf = nir_frcp(bld, bf);
+   bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
+   q  = nir_fmul(bld, af, bf);
+
+   if (is_signed) {
+      q = nir_f2i(bld, q);
+   } else {
+      q = nir_f2u(bld, q);
+   }
+
+   /* get error of first result: */
+   r = nir_imul(bld, q, b);
+   r = nir_isub(bld, a, r);
+   r = nir_u2f(bld, r);
+   r = nir_fmul(bld, r, bf);
+   r = nir_f2u(bld, r);
+
+   /* add quotients: */
+   q = nir_iadd(bld, q, r);
+
+   /* correction: if modulus >= divisor, add 1 */
+   r = nir_imul(bld, q, b);
+   r = nir_isub(bld, a, r);
+
+   r = nir_ige(bld, r, b);
+   r = nir_b2i(bld, r);
+
+   q = nir_iadd(bld, q, r);
+   if (is_signed)  {
+      /* fix the sign: */
+      r = nir_ixor(bld, numer, denom);
+      r = nir_ushr(bld, r, nir_imm_int(bld, 31));
+      r = nir_i2b(bld, r);
+      b = nir_ineg(bld, q);
+      q = nir_bcsel(bld, r, b, q);
+   }
+
+   if (op == nir_op_umod) {
+      /* division result in q */
+      r = nir_imul(bld, q, b);
+      q = nir_isub(bld, a, r);
+   }
+
+   assert(alu->dest.dest.is_ssa);
+   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+                            nir_src_for_ssa(q),
+                            ralloc_parent(alu));
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+   nir_builder *b = state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_alu)
+         convert_instr(b, nir_instr_as_alu(instr));
+   }
+
+   return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, convert_block, &b);
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_idiv(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         convert_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
index 7cd93ea0a..4bdb80072 100644
--- a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -223,7 +223,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
             else
                nir_instr_insert_after_block(src->pred, &mov->instr);
 
-            nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+            nir_phi_src *new_src = ralloc(new_phi, nir_phi_src);
             new_src->pred = src->pred;
             new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
 
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
index 3015dbd09..cf8ab8325 100644
--- a/mesalib/src/glsl/nir/nir_lower_samplers.cpp
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -36,33 +36,26 @@ extern "C" {
 }
 
 static unsigned
-get_sampler_index(struct gl_shader_program *shader_program, const char *name,
-                  const struct gl_program *prog)
+get_sampler_index(const struct gl_shader_program *shader_program,
+                  gl_shader_stage stage, const char *name)
 {
-   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
-
    unsigned location;
    if (!shader_program->UniformHash->get(location, name)) {
-      linker_error(shader_program,
-                   "failed to find sampler named %s.\n", name);
+      assert(!"failed to find sampler");
       return 0;
    }
 
-   if (!shader_program->UniformStorage[location].sampler[shader].active) {
-      assert(0 && "cannot return a sampler");
-      linker_error(shader_program,
-                   "cannot return a sampler named %s, because it is not "
-                   "used in this shader stage. This is a driver bug.\n",
-                   name);
+   if (!shader_program->UniformStorage[location].sampler[stage].active) {
+      assert(!"cannot return a sampler");
       return 0;
    }
 
-   return shader_program->UniformStorage[location].sampler[shader].index;
+   return shader_program->UniformStorage[location].sampler[stage].index;
 }
 
 static void
-lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
-              const struct gl_program *prog, void *mem_ctx)
+lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program,
+              gl_shader_stage stage, void *mem_ctx)
 {
    if (instr->sampler == NULL)
       return;
@@ -90,7 +83,7 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
                ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
             break;
          case nir_deref_array_type_indirect: {
-            instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+            instr->src = reralloc(instr, instr->src, nir_tex_src,
                                   instr->num_srcs + 1);
             memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
             instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
@@ -133,15 +126,15 @@ lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
       }
    }
 
-   instr->sampler_index += get_sampler_index(shader_program, name, prog);
+   instr->sampler_index += get_sampler_index(shader_program, stage, name);
 
    instr->sampler = NULL;
 }
 
 typedef struct {
    void *mem_ctx;
-   struct gl_shader_program *shader_program;
-   struct gl_program *prog;
+   const struct gl_shader_program *shader_program;
+   gl_shader_stage stage;
 } lower_state;
 
 static bool
@@ -152,7 +145,7 @@ lower_block_cb(nir_block *block, void *_state)
    nir_foreach_instr(block, instr) {
       if (instr->type == nir_instr_type_tex) {
          nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
-         lower_sampler(tex_instr, state->shader_program, state->prog,
+         lower_sampler(tex_instr, state->shader_program, state->stage,
                        state->mem_ctx);
       }
    }
@@ -161,24 +154,24 @@ lower_block_cb(nir_block *block, void *_state)
 }
 
 static void
-lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
-           struct gl_program *prog)
+lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program,
+           gl_shader_stage stage)
 {
    lower_state state;
 
    state.mem_ctx = ralloc_parent(impl);
    state.shader_program = shader_program;
-   state.prog = prog;
+   state.stage = stage;
 
    nir_foreach_block(impl, lower_block_cb, &state);
 }
 
 extern "C" void
-nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
-                   struct gl_program *prog)
+nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program,
+                   gl_shader_stage stage)
 {
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         lower_impl(overload->impl, shader_program, prog);
+         lower_impl(overload->impl, shader_program, stage);
    }
 }
diff --git a/mesalib/src/glsl/nir/nir_lower_tex_projector.c b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
new file mode 100644
index 000000000..6b0e9c340
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_tex_projector.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * This lowering pass converts the coordinate division for texture projection
+ * to be done in ALU instructions instead of asking the texture operation to
+ * do so.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+nir_lower_tex_projector_block(nir_block *block, void *void_state)
+{
+   nir_builder *b = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      nir_builder_insert_before_instr(b, &tex->instr);
+
+      /* Find the projector in the srcs list, if present. */
+      int proj_index;
+      for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
+         if (tex->src[proj_index].src_type == nir_tex_src_projector)
+            break;
+      }
+      if (proj_index == tex->num_srcs)
+         continue;
+      nir_ssa_def *inv_proj =
+         nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
+
+      /* Walk through the sources projecting the arguments. */
+      for (int i = 0; i < tex->num_srcs; i++) {
+         switch (tex->src[i].src_type) {
+         case nir_tex_src_coord:
+         case nir_tex_src_comparitor:
+            break;
+         default:
+            continue;
+         }
+         nir_ssa_def *unprojected =
+            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+         nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
+
+         /* Array indices don't get projected, so make an new vector with the
+          * coordinate's array index untouched.
+          */
+         if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
+            switch (tex->coord_components) {
+            case 4:
+               projected = nir_vec4(b,
+                                    channel(b, projected, 0),
+                                    channel(b, projected, 1),
+                                    channel(b, projected, 2),
+                                    channel(b, unprojected, 3));
+               break;
+            case 3:
+               projected = nir_vec3(b,
+                                    channel(b, projected, 0),
+                                    channel(b, projected, 1),
+                                    channel(b, unprojected, 2));
+               break;
+            case 2:
+               projected = nir_vec2(b,
+                                    channel(b, projected, 0),
+                                    channel(b, unprojected, 1));
+               break;
+            default:
+               unreachable("bad texture coord count for array");
+               break;
+            }
+         }
+
+         nir_instr_rewrite_src(&tex->instr,
+                               &tex->src[i].src,
+                               nir_src_for_ssa(projected));
+      }
+
+      /* Now move the later tex sources down the array so that the projector
+       * disappears.
+       */
+      nir_src dead;
+      memset(&dead, 0, sizeof dead);
+      nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, dead);
+      memmove(&tex->src[proj_index],
+              &tex->src[proj_index + 1],
+              (tex->num_srcs - proj_index) * sizeof(*tex->src));
+      tex->num_srcs--;
+   }
+
+   return true;
+}
+
+static void
+nir_lower_tex_projector_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, nir_lower_tex_projector_block, &b);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_tex_projector(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_tex_projector_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
index 85ebb281c..58389a7c7 100644
--- a/mesalib/src/glsl/nir/nir_lower_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -148,13 +148,10 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
 
       unsigned num_components = glsl_get_vector_elements(src_tail->type);
 
-      nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
-      nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
-
       nir_intrinsic_instr *load =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
       load->num_components = num_components;
-      load->variables[0] = nir_deref_as_var(src_deref);
+      load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
       nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
 
       nir_instr_insert_before(&copy_instr->instr, &load->instr);
@@ -162,7 +159,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
       nir_intrinsic_instr *store =
          nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
       store->num_components = num_components;
-      store->variables[0] = nir_deref_as_var(dest_deref);
+      store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref));
+
       store->src[0].is_ssa = true;
       store->src[0].ssa = &load->dest.ssa;
 
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
index 86e6ab416..2ca74d71b 100644
--- a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -642,7 +642,7 @@ add_phi_sources(nir_block *block, nir_block *pred,
 
       struct deref_node *node = entry->data;
 
-      nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+      nir_phi_src *src = ralloc(phi, nir_phi_src);
       src->pred = pred;
       src->src.is_ssa = true;
       src->src.ssa = get_ssa_def_for_block(node, pred, state);
diff --git a/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
new file mode 100644
index 000000000..0da8447ac
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_normalize_cubemap_coords.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand <jason@jlekstrand.net>
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * This file implements a NIR lowering pass to perform the normalization of
+ * the cubemap coordinates to have the largest magnitude component be -1.0
+ * or 1.0.  This is based on the old GLSL IR based pass by Eric.
+ */
+
+static nir_ssa_def *
+channel(nir_builder *b, nir_ssa_def *def, int c)
+{
+   return nir_swizzle(b, def, (unsigned[4]){c, c, c, c}, 1, false);
+}
+
+static bool
+normalize_cubemap_coords_block(nir_block *block, void *void_state)
+{
+   nir_builder *b = void_state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_tex)
+         continue;
+
+      nir_tex_instr *tex = nir_instr_as_tex(instr);
+      if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
+         continue;
+
+      nir_builder_insert_before_instr(b, &tex->instr);
+
+      for (unsigned i = 0; i < tex->num_srcs; i++) {
+         if (tex->src[i].src_type != nir_tex_src_coord)
+            continue;
+
+         nir_ssa_def *orig_coord =
+            nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
+         assert(orig_coord->num_components >= 3);
+
+         nir_ssa_def *abs = nir_fabs(b, orig_coord);
+         nir_ssa_def *norm = nir_fmax(b, channel(b, abs, 0),
+                                         nir_fmax(b, channel(b, abs, 1),
+                                                     channel(b, abs, 2)));
+
+         nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm));
+
+         /* Array indices don't have to be normalized, so make a new vector
+          * with the coordinate's array index untouched.
+          */
+         if (tex->coord_components == 4) {
+            normalized = nir_vec4(b,
+                                  channel(b, normalized, 0),
+                                  channel(b, normalized, 1),
+                                  channel(b, normalized, 2),
+                                  channel(b, orig_coord, 3));
+         }
+
+         nir_instr_rewrite_src(&tex->instr,
+                               &tex->src[i].src,
+                               nir_src_for_ssa(normalized));
+      }
+   }
+
+   return true;
+}
+
+static void
+normalize_cubemap_coords_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block(impl, normalize_cubemap_coords_block, &b);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_normalize_cubemap_coords(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         normalize_cubemap_coords_impl(overload->impl);
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
index 062cd628b..264806f5d 100644
--- a/mesalib/src/glsl/nir/nir_opcodes.py
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -161,12 +161,12 @@ unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
 unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
 unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
-unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
 # Boolean-to-float conversion
 unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
-unop_convert("i2b", tint, tbool, "src0 == 0")
-unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("i2b", tint, tbool, "src0 != 0")
+unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
 unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
 
 unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
@@ -191,8 +191,6 @@ unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
 
 unop("fsin", tfloat, "sinf(src0)")
 unop("fcos", tfloat, "cosf(src0)")
-unop("fsin_reduced", tfloat, "sinf(src0)")
-unop("fcos_reduced", tfloat, "cosf(src0)")
 
 
 # Partial derivatives.
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
index ef855aa77..cdb19241c 100644
--- a/mesalib/src/glsl/nir/nir_opt_algebraic.py
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -75,6 +75,9 @@ optimizations = [
    (('flrp', a, b, 1.0), b),
    (('flrp', a, a, b), a),
    (('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
+   (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
+   (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
    (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
    (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
    # Comparison simplifications
@@ -82,10 +85,6 @@ optimizations = [
    (('inot', ('fge', a, b)), ('flt', a, b)),
    (('inot', ('ilt', a, b)), ('ige', a, b)),
    (('inot', ('ige', a, b)), ('ilt', a, b)),
-   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
-   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
-   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
-   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
    (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
    (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
    (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
@@ -95,6 +94,18 @@ optimizations = [
    (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
    (('fsat', ('fsat', a)), ('fsat', a)),
    (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+   (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+   (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+   (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
+   (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
+   (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
+   (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'),
+   # Emulating booleans
+   (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))),
+   (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))),
+   (('iand', 'a@bool', 1.0), ('b2f', a)),
+   (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
+   (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF.
    # Comparison with the same args.  Note that these are not done for
    # the float versions because NaN always returns false on float
    # inequalities.
@@ -122,7 +133,7 @@ optimizations = [
    (('ishr', 0, a), 0),
    (('ishr', a, 0), a),
    (('ushr', 0, a), 0),
-   (('ushr', a, 0), 0),
+   (('ushr', a, 0), a),
    # Exponential/logarithmic identities
    (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
    (('fexp',  ('flog',  a)), a), # e^ln(a)  = a
@@ -134,6 +145,26 @@ optimizations = [
    (('fpow', a, 1.0), a),
    (('fpow', a, 2.0), ('fmul', a, a)),
    (('fpow', 2.0, a), ('fexp2', a)),
+   (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+   (('fsqrt', ('fexp', a)), ('fexp', ('fmul', 0.5, a))),
+   (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+   (('frcp', ('fexp', a)), ('fexp', ('fneg', a))),
+   (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+   (('frsq', ('fexp', a)), ('fexp', ('fmul', -0.5, a))),
+   (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+   (('flog', ('fsqrt', a)), ('fmul', 0.5, ('flog', a))),
+   (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+   (('flog', ('frcp', a)), ('fneg', ('flog', a))),
+   (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+   (('flog', ('frsq', a)), ('fmul', -0.5, ('flog', a))),
+   (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+   (('flog', ('fpow', a, b)), ('fmul', b, ('flog', a))),
+   (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+   (('fadd', ('flog', a), ('flog', b)), ('flog', ('fmul', a, b))),
+   (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+   (('fadd', ('flog', a), ('fneg', ('flog', b))), ('flog', ('fdiv', a, b))),
+   (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+   (('fmul', ('fexp', a), ('fexp', b)), ('fexp', ('fadd', a, b))),
    # Division and reciprocal
    (('fdiv', 1.0, a), ('frcp', a)),
    (('frcp', ('frcp', a)), a),
@@ -154,18 +185,21 @@ optimizations = [
    (('bcsel', a, b, b), b),
    (('fcsel', a, b, b), b),
 
+   # Conversions
+   (('f2i', ('ftrunc', a)), ('f2i', a)),
+   (('f2u', ('ftrunc', a)), ('f2u', a)),
+
    # Subtracts
    (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
    (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+   (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
+   (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
    (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
    (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
    (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
    (('iabs', ('isub', 0, a)), ('iabs', a)),
-
-# This one may not be exact
-   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
 ]
 
 # Add optimizations to handle the case where the result of a ternary is
@@ -189,4 +223,17 @@ for op in ['flt', 'fge', 'feq', 'fne',
        ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
    ]
 
+# This section contains "late" optimizations that should be run after the
+# regular optimizations have finished.  Optimizations should go here if
+# they help code generation but do not necessarily produce code that is
+# more easily optimizable.
+late_optimizations = [
+   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+]
+
 print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late",
+                                  late_optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
index 9b383202d..553906e12 100644
--- a/mesalib/src/glsl/nir/nir_opt_cse.c
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -37,20 +37,19 @@ struct cse_state {
 };
 
 static bool
-nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1,
+                   unsigned src2)
 {
-   if (src1.abs != src2.abs || src1.negate != src2.negate)
+   if (alu1->src[src1].abs != alu2->src[src2].abs ||
+       alu1->src[src1].negate != alu2->src[src2].negate)
       return false;
 
-   for (int i = 0; i < 4; ++i) {
-      if (!(read_mask & (1 << i)))
-         continue;
-
-      if (src1.swizzle[i] != src2.swizzle[i])
+   for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) {
+      if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i])
          return false;
    }
 
-   return nir_srcs_equal(src1.src, src2.src);
+   return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src);
 }
 
 static bool
@@ -73,10 +72,17 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
       if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
          return false;
 
-      for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
-         if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
-                                 (1 << alu1->dest.dest.ssa.num_components) - 1))
-            return false;
+      if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+         assert(nir_op_infos[alu1->op].num_inputs == 2);
+         return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
+                 nir_alu_srcs_equal(alu1, alu2, 1, 1)) ||
+                (nir_alu_srcs_equal(alu1, alu2, 0, 1) &&
+                 nir_alu_srcs_equal(alu1, alu2, 1, 0));
+      } else {
+         for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+            if (!nir_alu_srcs_equal(alu1, alu2, i, i))
+               return false;
+         }
       }
       return true;
    }
@@ -154,12 +160,14 @@ nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
 static bool
 src_is_ssa(nir_src *src, void *data)
 {
+   (void) data;
    return src->is_ssa;
 }
 
 static bool
 dest_is_ssa(nir_dest *dest, void *data)
 {
+   (void) data;
    return dest->is_ssa;
 }
 
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
new file mode 100644
index 000000000..9d5646fe6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for a multiply that
+ * is only ever used in an add and replaces both with an fma.
+ */
+
+struct peephole_ffma_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+   bool progress;
+};
+
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+   if (def->if_uses->entries > 0)
+      return false;
+
+   struct set_entry *use_iter;
+   set_foreach(def->uses, use_iter) {
+      nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+      if (use_instr->type != nir_instr_type_alu)
+         return false;
+
+      nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+      switch (use_alu->op) {
+      case nir_op_fadd:
+         break; /* This one's ok */
+
+      case nir_op_imov:
+      case nir_op_fmov:
+      case nir_op_fneg:
+      case nir_op_fabs:
+         assert(use_alu->dest.dest.is_ssa);
+         if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+            return false;
+         break;
+
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static nir_alu_instr *
+get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+{
+   assert(src->src.is_ssa && !src->abs && !src->negate);
+
+   nir_instr *instr = src->src.ssa->parent_instr;
+   if (instr->type != nir_instr_type_alu)
+      return NULL;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+   switch (alu->op) {
+   case nir_op_imov:
+   case nir_op_fmov:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      break;
+
+   case nir_op_fneg:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      *negate = !*negate;
+      break;
+
+   case nir_op_fabs:
+      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+      *negate = false;
+      *abs = true;
+      break;
+
+   case nir_op_fmul:
+      /* Only absorb a fmul into a ffma if the fmul is is only used in fadd
+       * operations.  This prevents us from being too aggressive with our
+       * fusing which can actually lead to more instructions.
+       */
+      if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+         return NULL;
+      break;
+
+   default:
+      return NULL;
+   }
+
+   if (!alu)
+      return NULL;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (!(alu->dest.write_mask & (1 << i)))
+         break;
+
+      swizzle[i] = swizzle[src->swizzle[i]];
+   }
+
+   return alu;
+}
+
+static bool
+nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+{
+   struct peephole_ffma_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *add = nir_instr_as_alu(instr);
+      if (add->op != nir_op_fadd)
+         continue;
+
+      /* TODO: Maybe bail if this expression is considered "precise"? */
+
+      assert(add->src[0].src.is_ssa && add->src[1].src.is_ssa);
+
+      /* This, is the case a + a.  We would rather handle this with an
+       * algebraic reduction than fuse it.  Also, we want to only fuse
+       * things where the multiply is used only once and, in this case,
+       * it would be used twice by the same instruction.
+       */
+      if (add->src[0].src.ssa == add->src[1].src.ssa)
+         continue;
+
+      nir_alu_instr *mul;
+      uint8_t add_mul_src, swizzle[4];
+      bool negate, abs;
+      for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
+         for (unsigned i = 0; i < 4; i++)
+            swizzle[i] = i;
+
+         negate = false;
+         abs = false;
+
+         mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+
+         if (mul != NULL)
+            break;
+      }
+
+      if (mul == NULL)
+         continue;
+
+      nir_ssa_def *mul_src[2];
+      mul_src[0] = mul->src[0].src.ssa;
+      mul_src[1] = mul->src[1].src.ssa;
+
+      if (abs) {
+         for (unsigned i = 0; i < 2; i++) {
+            nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_fabs);
+            abs->src[0].src = nir_src_for_ssa(mul_src[i]);
+            nir_ssa_dest_init(&abs->instr, &abs->dest.dest,
+                              mul_src[i]->num_components, NULL);
+            abs->dest.write_mask = (1 << mul_src[i]->num_components) - 1;
+            nir_instr_insert_before(&add->instr, &abs->instr);
+            mul_src[i] = &abs->dest.dest.ssa;
+         }
+      }
+
+      if (negate) {
+         nir_alu_instr *neg = nir_alu_instr_create(state->mem_ctx,
+                                                   nir_op_fneg);
+         neg->src[0].src = nir_src_for_ssa(mul_src[0]);
+         nir_ssa_dest_init(&neg->instr, &neg->dest.dest,
+                           mul_src[0]->num_components, NULL);
+         neg->dest.write_mask = (1 << mul_src[0]->num_components) - 1;
+         nir_instr_insert_before(&add->instr, &neg->instr);
+         mul_src[0] = &neg->dest.dest.ssa;
+      }
+
+      nir_alu_instr *ffma = nir_alu_instr_create(state->mem_ctx, nir_op_ffma);
+      ffma->dest.saturate = add->dest.saturate;
+      ffma->dest.write_mask = add->dest.write_mask;
+
+      for (unsigned i = 0; i < 2; i++) {
+         ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
+         for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
+            ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
+      }
+      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
+                       state->mem_ctx);
+
+      assert(add->dest.dest.is_ssa);
+
+      nir_ssa_dest_init(&ffma->instr, &ffma->dest.dest,
+                        add->dest.dest.ssa.num_components,
+                        add->dest.dest.ssa.name);
+      nir_ssa_def_rewrite_uses(&add->dest.dest.ssa,
+                               nir_src_for_ssa(&ffma->dest.dest.ssa),
+                               state->mem_ctx);
+
+      nir_instr_insert_before(&add->instr, &ffma->instr);
+      assert(add->dest.dest.ssa.uses->entries == 0);
+      nir_instr_remove(&add->instr);
+
+      state->progress = true;
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+{
+   struct peephole_ffma_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.progress = false;
+
+   nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_peephole_ffma(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_peephole_ffma_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
index b89451b09..f400cfd66 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_select.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -84,7 +84,9 @@ block_check_for_allowed_instrs(nir_block *block)
       case nir_instr_type_alu: {
          /* It must be a move operation */
          nir_alu_instr *mov = nir_instr_as_alu(instr);
-         if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+         if (mov->op != nir_op_fmov && mov->op != nir_op_imov &&
+             mov->op != nir_op_fneg && mov->op != nir_op_ineg &&
+             mov->op != nir_op_fabs && mov->op != nir_op_iabs)
             return false;
 
          /* Can't handle saturate */
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
index fa11a312e..fb8c9344c 100644
--- a/mesalib/src/glsl/nir/nir_print.c
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -137,25 +137,37 @@ print_dest(nir_dest *dest, FILE *fp)
 }
 
 static void
-print_alu_src(nir_alu_src *src, FILE *fp)
+print_alu_src(nir_alu_instr *instr, unsigned src, FILE *fp)
 {
-   if (src->negate)
+   if (instr->src[src].negate)
       fprintf(fp, "-");
-   if (src->abs)
+   if (instr->src[src].abs)
       fprintf(fp, "abs(");
 
-   print_src(&src->src, fp);
+   print_src(&instr->src[src].src, fp);
 
-   if (src->swizzle[0] != 0 ||
-       src->swizzle[1] != 1 ||
-       src->swizzle[2] != 2 ||
-       src->swizzle[3] != 3) {
+   bool print_swizzle = false;
+   for (unsigned i = 0; i < 4; i++) {
+      if (!nir_alu_instr_channel_used(instr, src, i))
+         continue;
+
+      if (instr->src[src].swizzle[i] != i) {
+         print_swizzle = true;
+         break;
+      }
+   }
+
+   if (print_swizzle) {
       fprintf(fp, ".");
-      for (unsigned i = 0; i < 4; i++)
-         fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+      for (unsigned i = 0; i < 4; i++) {
+         if (!nir_alu_instr_channel_used(instr, src, i))
+            continue;
+
+         fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]);
+      }
    }
 
-   if (src->abs)
+   if (instr->src[src].abs)
       fprintf(fp, ")");
 }
 
@@ -189,7 +201,7 @@ print_alu_instr(nir_alu_instr *instr, FILE *fp)
       if (i != 0)
          fprintf(fp, ", ");
 
-      print_alu_src(&instr->src[i], fp);
+      print_alu_src(instr, i, fp);
    }
 }
 
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
index e7f8aeacb..4417e2a48 100644
--- a/mesalib/src/glsl/nir/nir_remove_dead_variables.c
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -98,22 +98,14 @@ add_var_use_shader(nir_shader *shader, struct set *live)
 }
 
 static void
-remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+remove_dead_vars(struct exec_list *var_list, struct set *live)
 {
-   foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+   foreach_list_typed_safe(nir_variable, var, node, var_list) {
       struct set_entry *entry = _mesa_set_search(live, var);
-      if (entry == NULL)
-         exec_node_remove(&var->node);
-   }
-}
-
-static void
-remove_dead_global_vars(nir_shader *shader, struct set *live)
-{
-   foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
-      struct set_entry *entry = _mesa_set_search(live, var);
-      if (entry == NULL)
+      if (entry == NULL) {
          exec_node_remove(&var->node);
+         ralloc_free(var);
+      }
    }
 }
 
@@ -125,11 +117,11 @@ nir_remove_dead_variables(nir_shader *shader)
 
    add_var_use_shader(shader, live);
 
-   remove_dead_global_vars(shader, live);
+   remove_dead_vars(&shader->globals, live);
 
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         remove_dead_local_vars(overload->impl, live);
+         remove_dead_vars(&overload->impl->locals, live);
    }
 
    _mesa_set_destroy(live, NULL);
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
index 73a802be7..5ba016085 100644
--- a/mesalib/src/glsl/nir/nir_search.c
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -218,8 +218,8 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
    if (matched)
       return true;
 
-   if (nir_op_infos[instr->op].num_inputs == 2 &&
-       (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+   if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
+      assert(nir_op_infos[instr->op].num_inputs == 2);
       if (!match_value(expr->srcs[0], instr, 1, num_components,
                        swizzle, state))
          return false;
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
index 4d663b51b..fc72c078c 100644
--- a/mesalib/src/glsl/nir/nir_split_var_copies.c
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -188,8 +188,8 @@ split_var_copy_instr(nir_intrinsic_instr *old_copy,
           * belongs to the copy instruction and b) the deref chains may
           * have some of the same links due to the way we constructed them
           */
-         nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
-         nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+         nir_deref *src = nir_copy_deref(new_copy, src_head);
+         nir_deref *dest = nir_copy_deref(new_copy, dest_head);
 
          new_copy->variables[0] = nir_deref_as_var(dest);
          new_copy->variables[1] = nir_deref_as_var(src);
diff --git a/mesalib/src/glsl/nir/nir_sweep.c b/mesalib/src/glsl/nir/nir_sweep.c
new file mode 100644
index 000000000..d3549756a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_sweep.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/**
+ * \file nir_sweep.c
+ *
+ * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated
+ * memory - anything still connected to the program will be kept, and any dead memory
+ * we dropped on the floor will be freed.
+ *
+ * The expectation is that drivers should call this when finished compiling the shader
+ * (after any optimization, lowering, and so on).  However, it's also fine to call it
+ * earlier, and even many times, trading CPU cycles for memory savings.
+ */
+
+#define steal_list(mem_ctx, type, list) \
+   foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); }
+
+static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node);
+
+static bool
+sweep_src_indirect(nir_src *src, void *nir)
+{
+   if (!src->is_ssa && src->reg.indirect)
+      ralloc_steal(nir, src->reg.indirect);
+
+   return true;
+}
+
+static bool
+sweep_dest_indirect(nir_dest *dest, void *nir)
+{
+   if (!dest->is_ssa && dest->reg.indirect)
+      ralloc_steal(nir, dest->reg.indirect);
+
+   return true;
+}
+
+static void
+sweep_block(nir_shader *nir, nir_block *block)
+{
+   ralloc_steal(nir, block);
+
+   nir_foreach_instr(block, instr) {
+      ralloc_steal(nir, instr);
+
+      nir_foreach_src(instr, sweep_src_indirect, nir);
+      nir_foreach_dest(instr, sweep_dest_indirect, nir);
+   }
+}
+
+static void
+sweep_if(nir_shader *nir, nir_if *iff)
+{
+   ralloc_steal(nir, iff);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) {
+      sweep_cf_node(nir, cf_node);
+   }
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) {
+      sweep_cf_node(nir, cf_node);
+   }
+}
+
+static void
+sweep_loop(nir_shader *nir, nir_loop *loop)
+{
+   ralloc_steal(nir, loop);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+      sweep_cf_node(nir, cf_node);
+   }
+}
+
+static void
+sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node)
+{
+   switch (cf_node->type) {
+   case nir_cf_node_block:
+      sweep_block(nir, nir_cf_node_as_block(cf_node));
+      break;
+   case nir_cf_node_if:
+      sweep_if(nir, nir_cf_node_as_if(cf_node));
+      break;
+   case nir_cf_node_loop:
+      sweep_loop(nir, nir_cf_node_as_loop(cf_node));
+      break;
+   default:
+      unreachable("Invalid CF node type");
+   }
+}
+
+static void
+sweep_impl(nir_shader *nir, nir_function_impl *impl)
+{
+   ralloc_steal(nir, impl);
+
+   ralloc_steal(nir, impl->params);
+   ralloc_steal(nir, impl->return_var);
+   steal_list(nir, nir_variable, &impl->locals);
+   steal_list(nir, nir_register, &impl->registers);
+
+   foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) {
+      sweep_cf_node(nir, cf_node);
+   }
+
+   sweep_block(nir, impl->end_block);
+
+   /* Wipe out all the metadata, if any. */
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+static void
+sweep_function(nir_shader *nir, nir_function *f)
+{
+   ralloc_steal(nir, f);
+
+   foreach_list_typed(nir_function_overload, overload, node, &f->overload_list) {
+      ralloc_steal(nir, overload);
+      ralloc_steal(nir, overload->params);
+      if (overload->impl)
+         sweep_impl(nir, overload->impl);
+   }
+}
+
+void
+nir_sweep(nir_shader *nir)
+{
+   void *rubbish = ralloc_context(NULL);
+
+   /* First, move ownership of all the memory to a temporary context; assume dead. */
+   ralloc_adopt(rubbish, nir);
+
+   /* Variables and registers are not dead.  Steal them back. */
+   steal_list(nir, nir_variable, &nir->uniforms);
+   steal_list(nir, nir_variable, &nir->inputs);
+   steal_list(nir, nir_variable, &nir->outputs);
+   steal_list(nir, nir_variable, &nir->globals);
+   steal_list(nir, nir_variable, &nir->system_values);
+   steal_list(nir, nir_register, &nir->registers);
+
+   /* Recurse into functions, stealing their contents back. */
+   foreach_list_typed(nir_function, func, node, &nir->functions) {
+      sweep_function(nir, func);
+   }
+
+   /* Free everything we didn't steal back. */
+   ralloc_free(rubbish);
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
index 47cf45393..53ff54766 100644
--- a/mesalib/src/glsl/nir/nir_to_ssa.c
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -47,7 +47,7 @@ insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
    set_foreach(block->predecessors, entry) {
       nir_block *pred = (nir_block *) entry->key;
 
-      nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+      nir_phi_src *src = ralloc(instr, nir_phi_src);
       src->pred = pred;
       src->src.is_ssa = false;
       src->src.reg.base_offset = 0;
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
index a13c3e12a..f0d0b46d2 100644
--- a/mesalib/src/glsl/nir/nir_types.cpp
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_void_type(void)
 }
 
 const glsl_type *
+glsl_float_type(void)
+{
+   return glsl_type::float_type;
+}
+
+const glsl_type *
 glsl_vec4_type(void)
 {
    return glsl_type::vec4_type;
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
index 494051a67..276d4ad62 100644
--- a/mesalib/src/glsl/nir/nir_types.h
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -69,6 +69,7 @@ bool glsl_type_is_scalar(const struct glsl_type *type);
 bool glsl_type_is_matrix(const struct glsl_type *type);
 
 const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_float_type(void);
 const struct glsl_type *glsl_vec4_type(void);
 const struct glsl_type *glsl_array_type(const struct glsl_type *base,
                                         unsigned elements);
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
index f247ae069..a7aa79837 100644
--- a/mesalib/src/glsl/nir/nir_validate.c
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -295,6 +295,8 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
 static void
 validate_deref_chain(nir_deref *deref, validate_state *state)
 {
+   assert(deref->child == NULL || ralloc_parent(deref->child) == deref);
+
    nir_deref *parent = NULL;
    while (deref != NULL) {
       switch (deref->deref_type) {
@@ -336,9 +338,10 @@ validate_var_use(nir_variable *var, validate_state *state)
 }
 
 static void
-validate_deref_var(nir_deref_var *deref, validate_state *state)
+validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state)
 {
    assert(deref != NULL);
+   assert(ralloc_parent(deref) == parent_mem_ctx);
    assert(deref->deref.type == deref->var->type);
 
    validate_var_use(deref->var, state);
@@ -386,7 +389,7 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
 
    unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
    for (unsigned i = 0; i < num_vars; i++) {
-      validate_deref_var(instr->variables[i], state);
+      validate_deref_var(instr, instr->variables[i], state);
    }
 
    switch (instr->intrinsic) {
@@ -423,7 +426,7 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
    }
 
    if (instr->sampler != NULL)
-      validate_deref_var(instr->sampler, state);
+      validate_deref_var(instr, instr->sampler, state);
 }
 
 static void
@@ -438,10 +441,10 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
 
    for (unsigned i = 0; i < instr->num_params; i++) {
       assert(instr->callee->params[i].type == instr->params[i]->deref.type);
-      validate_deref_var(instr->params[i], state);
+      validate_deref_var(instr, instr->params[i], state);
    }
 
-   validate_deref_var(instr->return_deref, state);
+   validate_deref_var(instr, instr->return_deref, state);
 }
 
 static void
@@ -680,8 +683,7 @@ validate_cf_node(nir_cf_node *node, validate_state *state)
       break;
 
    default:
-      assert(!"Invalid ALU instruction type");
-      break;
+      unreachable("Invalid CF node type");
    }
 }