Merged origin/release (checked in because wanted to merge new stuff)

author: marha <marha@users.sourceforge.net> 2015-02-22 21:39:56 +0100
committer: marha <marha@users.sourceforge.net> 2015-02-22 21:39:56 +0100
commit: 462f18c7b25fe3e467f837647d07ab0a78aa8d2b (patch)
tree: fc8013c0a1bac05a1945846c1697e973f4c35013 /mesalib/src/glsl
parent: 36f711ee12b6dd5184198abed3aa551efb585587 (diff)
download: vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.tar.gz
vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.tar.bz2
vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.zip
106 files changed, 21580 insertions, 680 deletions
diff --git a/mesalib/src/glsl/Android.mk b/mesalib/src/glsl/Android.mk
index 1cbc5c6d2..38c2087a4 100644
--- a/mesalib/src/glsl/Android.mk
+++ b/mesalib/src/glsl/Android.mk
@@ -27,7 +27,6 @@ LOCAL_PATH := $(call my-dir)
 
 include $(LOCAL_PATH)/Makefile.sources
 
-GLSL_SRCDIR = .
 # ---------------------------------------
 # Build libmesa_glsl
 # ---------------------------------------
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am
index 0ccc81d75..5a0a643da 100644
--- a/mesalib/src/glsl/Makefile.am
+++ b/mesalib/src/glsl/Makefile.am
@@ -19,21 +19,41 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+AUTOMAKE_OPTIONS = subdir-objects
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/glsl/glcpp \
+	-I$(top_srcdir)/src/glsl/nir \
 	-I$(top_srcdir)/src/gtest/include \
+	-I$(top_builddir)/src/glsl/nir \
 	$(DEFINES)
 AM_CFLAGS = $(VISIBILITY_CFLAGS)
 AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
 
+EXTRA_DIST = tests glcpp/tests README TODO glcpp/README	\
+	glsl_lexer.ll					\
+	glsl_parser.yy					\
+	glcpp/glcpp-lex.l				\
+	glcpp/glcpp-parse.y				\
+	nir/nir_algebraic.py				\
+	nir/nir_constant_expressions.py			\
+	nir/nir_opcodes.py				\
+	nir/nir_opcodes_c.py				\
+	nir/nir_opcodes_h.py				\
+	nir/nir_opt_algebraic.py			\
+	SConscript
+
 include Makefile.sources
 
 TESTS = glcpp/tests/glcpp-test				\
 	glcpp/tests/glcpp-test-cr-lf			\
+	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/optimization-test				\
 	tests/sampler-types-test                        \
@@ -47,17 +67,20 @@ noinst_LTLIBRARIES = libglsl.la libglcpp.la
 check_PROGRAMS =					\
 	glcpp/glcpp					\
 	glsl_test					\
+	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/sampler-types-test			\
 	tests/uniform-initializer-test
 
 noinst_PROGRAMS = glsl_compiler
 
+tests_blob_test_SOURCES =				\
+	tests/blob_test.c
+tests_blob_test_LDADD =					\
+	$(top_builddir)/src/glsl/libglsl.la
+
 tests_general_ir_test_SOURCES =		\
-	$(top_srcdir)/src/mesa/main/imports.c		\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
+	standalone_scaffolding.cpp			\
 	tests/builtin_variable_test.cpp			\
 	tests/invalidate_locations_test.cpp		\
 	tests/general_ir_test.cpp			\
@@ -68,26 +91,24 @@ tests_general_ir_test_CFLAGS =				\
 tests_general_ir_test_LDADD =				\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 tests_uniform_initializer_test_SOURCES =		\
-	$(top_srcdir)/src/mesa/main/imports.c		\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
 	tests/copy_constant_to_storage_tests.cpp	\
 	tests/set_uniform_initializer_tests.cpp		\
 	tests/uniform_initializer_utils.cpp		\
+	tests/uniform_initializer_utils.h		\
 	tests/common.c
 tests_uniform_initializer_test_CFLAGS =			\
 	$(PTHREAD_CFLAGS)
 tests_uniform_initializer_test_LDADD =			\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 tests_sampler_types_test_SOURCES =			\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
 	tests/sampler_types_test.cpp			\
 	tests/common.c
 tests_sampler_types_test_CFLAGS =			\
@@ -95,6 +116,7 @@ tests_sampler_types_test_CFLAGS =			\
 tests_sampler_types_test_LDADD =			\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 libglcpp_la_LIBADD =					\
@@ -102,42 +124,44 @@ libglcpp_la_LIBADD =					\
 libglcpp_la_SOURCES =					\
 	glcpp/glcpp-lex.c				\
 	glcpp/glcpp-parse.c				\
+	glcpp/glcpp-parse.h				\
 	$(LIBGLCPP_FILES)
 
 glcpp_glcpp_SOURCES =					\
 	glcpp/glcpp.c					\
-	tests/common.c					\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c
+	tests/common.c
 glcpp_glcpp_LDADD =					\
 	libglcpp.la					\
+	$(top_builddir)/src/libglsl_util.la		\
 	-lm
 
 libglsl_la_LIBADD = libglcpp.la
 libglsl_la_SOURCES =					\
 	glsl_lexer.cpp					\
 	glsl_parser.cpp					\
-	$(LIBGLSL_FILES)
+	glsl_parser.h					\
+	$(LIBGLSL_FILES)				\
+	$(NIR_FILES)
 
 glsl_compiler_SOURCES = \
-	$(top_srcdir)/src/mesa/main/imports.c \
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c \
-	$(top_srcdir)/src/mesa/program/symbol_table.c \
 	$(GLSL_COMPILER_CXX_FILES)
 
 glsl_compiler_LDADD =					\
 	libglsl.la					\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 glsl_test_SOURCES = \
-	$(top_srcdir)/src/mesa/main/imports.c \
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c \
-	$(top_srcdir)/src/mesa/program/symbol_table.c \
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
+	standalone_scaffolding.cpp \
 	tests/common.c \
 	test.cpp \
-	test_optpass.cpp
+	test_optpass.cpp \
+	test_optpass.h
 
-glsl_test_LDADD = libglsl.la
+glsl_test_LDADD =					\
+	libglsl.la					\
+	$(top_builddir)/src/libglsl_util.la		\
+	$(PTHREAD_LIBS)
 
 # We write our own rules for yacc and lex below. We'd rather use automake,
 # but automake makes it especially difficult for a number of reasons:
@@ -170,14 +194,14 @@ am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =
 
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(GLSL_BUILDDIR)/glsl_parser.h $<
+	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
 
 glsl_lexer.cpp: glsl_lexer.ll
 	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
 
 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
 	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(GLSL_BUILDDIR)/glcpp/glcpp-parse.h $<
+	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
 
 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
 	$(AM_V_at)$(MKDIR_P) glcpp
@@ -193,8 +217,42 @@ BUILT_SOURCES =						\
 	glsl_parser.cpp					\
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
-	glcpp/glcpp-lex.c
+	glcpp/glcpp-lex.c				\
+	nir/nir_builder_opcodes.h				\
+	nir/nir_constant_expressions.c			\
+	nir/nir_opcodes.c				\
+	nir/nir_opcodes.h				\
+	nir/nir_opt_algebraic.c
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
 	$(BUILT_SOURCES)
+
+clean-local:
+	$(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
+
+dist-hook:
+	$(RM) glcpp/tests/*.out
+	$(RM) glcpp/tests/subtest*/*.out
+
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+
+nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+
+nir/nir.h: nir/nir_opcodes.h
+
+nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+
+nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index 676fa0d7a..d0210d170 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -1,119 +1,197 @@
 # shared source lists for Makefile, SConscript, and Android.mk
 
-GLSL_SRCDIR = $(top_srcdir)/src/glsl
-GLSL_BUILDDIR = $(top_builddir)/src/glsl
-
 # libglcpp
 
 LIBGLCPP_FILES = \
-	$(GLSL_SRCDIR)/glcpp/pp.c
+	glcpp/glcpp.h \
+	glcpp/pp.c
 
 LIBGLCPP_GENERATED_FILES = \
-	$(GLSL_BUILDDIR)/glcpp/glcpp-lex.c \
-	$(GLSL_BUILDDIR)/glcpp/glcpp-parse.c
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c
+
+NIR_GENERATED_FILES = \
+	nir/nir_builder_opcodes.h \
+	nir/nir_constant_expressions.c \
+	nir/nir_opcodes.c \
+	nir/nir_opcodes.h \
+	nir/nir_opt_algebraic.c
+
+NIR_FILES = \
+	nir/glsl_to_nir.cpp \
+	nir/glsl_to_nir.h \
+	nir/nir.c \
+	nir/nir.h \
+	nir/nir_constant_expressions.h \
+	nir/nir_dominance.c \
+	nir/nir_from_ssa.c \
+	nir/nir_intrinsics.c \
+	nir/nir_intrinsics.h \
+	nir/nir_live_variables.c \
+	nir/nir_lower_alu_to_scalar.c \
+	nir/nir_lower_atomics.c \
+	nir/nir_lower_global_vars_to_local.c \
+	nir/nir_lower_locals_to_regs.c \
+	nir/nir_lower_io.c \
+	nir/nir_lower_phis_to_scalar.c \
+	nir/nir_lower_samplers.cpp \
+	nir/nir_lower_system_values.c \
+	nir/nir_lower_to_source_mods.c \
+	nir/nir_lower_vars_to_ssa.c \
+	nir/nir_lower_var_copies.c \
+	nir/nir_lower_vec_to_movs.c \
+	nir/nir_metadata.c \
+	nir/nir_opt_constant_folding.c \
+	nir/nir_opt_copy_propagate.c \
+	nir/nir_opt_cse.c \
+	nir/nir_opt_dce.c \
+	nir/nir_opt_gcm.c \
+	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_peephole_select.c \
+	nir/nir_opt_remove_phis.c \
+	nir/nir_print.c \
+	nir/nir_remove_dead_variables.c \
+	nir/nir_search.c \
+	nir/nir_search.h \
+	nir/nir_split_var_copies.c \
+	nir/nir_to_ssa.c \
+	nir/nir_types.h \
+	nir/nir_validate.c \
+	nir/nir_worklist.c \
+	nir/nir_worklist.h \
+	nir/nir_types.cpp \
+	$(NIR_GENERATED_FILES)
 
 # libglsl
 
 LIBGLSL_FILES = \
-	$(GLSL_SRCDIR)/ast_array_index.cpp \
-	$(GLSL_SRCDIR)/ast_expr.cpp \
-	$(GLSL_SRCDIR)/ast_function.cpp \
-	$(GLSL_SRCDIR)/ast_to_hir.cpp \
-	$(GLSL_SRCDIR)/ast_type.cpp \
-	$(GLSL_SRCDIR)/builtin_functions.cpp \
-	$(GLSL_SRCDIR)/builtin_types.cpp \
-	$(GLSL_SRCDIR)/builtin_variables.cpp \
-	$(GLSL_SRCDIR)/glsl_parser_extras.cpp \
-	$(GLSL_SRCDIR)/glsl_types.cpp \
-	$(GLSL_SRCDIR)/glsl_symbol_table.cpp \
-	$(GLSL_SRCDIR)/hir_field_selection.cpp \
-	$(GLSL_SRCDIR)/ir_basic_block.cpp \
-	$(GLSL_SRCDIR)/ir_builder.cpp \
-	$(GLSL_SRCDIR)/ir_clone.cpp \
-	$(GLSL_SRCDIR)/ir_constant_expression.cpp \
-	$(GLSL_SRCDIR)/ir.cpp \
-	$(GLSL_SRCDIR)/ir_equals.cpp \
-	$(GLSL_SRCDIR)/ir_expression_flattening.cpp \
-	$(GLSL_SRCDIR)/ir_function_can_inline.cpp \
-	$(GLSL_SRCDIR)/ir_function_detect_recursion.cpp \
-	$(GLSL_SRCDIR)/ir_function.cpp \
-	$(GLSL_SRCDIR)/ir_hierarchical_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_hv_accept.cpp \
-	$(GLSL_SRCDIR)/ir_import_prototypes.cpp \
-	$(GLSL_SRCDIR)/ir_print_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_reader.cpp \
-	$(GLSL_SRCDIR)/ir_rvalue_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_set_program_inouts.cpp \
-	$(GLSL_SRCDIR)/ir_validate.cpp \
-	$(GLSL_SRCDIR)/ir_variable_refcount.cpp \
-	$(GLSL_SRCDIR)/linker.cpp \
-	$(GLSL_SRCDIR)/link_atomics.cpp \
-	$(GLSL_SRCDIR)/link_functions.cpp \
-	$(GLSL_SRCDIR)/link_interface_blocks.cpp \
-	$(GLSL_SRCDIR)/link_uniforms.cpp \
-	$(GLSL_SRCDIR)/link_uniform_initializers.cpp \
-	$(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \
-	$(GLSL_SRCDIR)/link_uniform_blocks.cpp \
-	$(GLSL_SRCDIR)/link_varyings.cpp \
-	$(GLSL_SRCDIR)/loop_analysis.cpp \
-	$(GLSL_SRCDIR)/loop_controls.cpp \
-	$(GLSL_SRCDIR)/loop_unroll.cpp \
-	$(GLSL_SRCDIR)/lower_clip_distance.cpp \
-	$(GLSL_SRCDIR)/lower_const_arrays_to_uniforms.cpp \
-	$(GLSL_SRCDIR)/lower_discard.cpp \
-	$(GLSL_SRCDIR)/lower_discard_flow.cpp \
-	$(GLSL_SRCDIR)/lower_if_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_instructions.cpp \
-	$(GLSL_SRCDIR)/lower_jumps.cpp \
-	$(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \
-	$(GLSL_SRCDIR)/lower_noise.cpp \
-	$(GLSL_SRCDIR)/lower_offset_array.cpp \
-	$(GLSL_SRCDIR)/lower_packed_varyings.cpp \
-	$(GLSL_SRCDIR)/lower_named_interface_blocks.cpp \
-	$(GLSL_SRCDIR)/lower_packing_builtins.cpp \
-	$(GLSL_SRCDIR)/lower_texture_projection.cpp \
-	$(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_vec_index_to_swizzle.cpp \
-	$(GLSL_SRCDIR)/lower_vector.cpp \
-	$(GLSL_SRCDIR)/lower_vector_insert.cpp \
-	$(GLSL_SRCDIR)/lower_vertex_id.cpp \
-	$(GLSL_SRCDIR)/lower_output_reads.cpp \
-	$(GLSL_SRCDIR)/lower_ubo_reference.cpp \
-	$(GLSL_SRCDIR)/opt_algebraic.cpp \
-	$(GLSL_SRCDIR)/opt_array_splitting.cpp \
-	$(GLSL_SRCDIR)/opt_constant_folding.cpp \
-	$(GLSL_SRCDIR)/opt_constant_propagation.cpp \
-	$(GLSL_SRCDIR)/opt_constant_variable.cpp \
-	$(GLSL_SRCDIR)/opt_copy_propagation.cpp \
-	$(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
-	$(GLSL_SRCDIR)/opt_cse.cpp \
-	$(GLSL_SRCDIR)/opt_dead_builtin_variables.cpp \
-	$(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
-	$(GLSL_SRCDIR)/opt_dead_code.cpp \
-	$(GLSL_SRCDIR)/opt_dead_code_local.cpp \
-	$(GLSL_SRCDIR)/opt_dead_functions.cpp \
-	$(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \
-	$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
-	$(GLSL_SRCDIR)/opt_function_inlining.cpp \
-	$(GLSL_SRCDIR)/opt_if_simplification.cpp \
-	$(GLSL_SRCDIR)/opt_minmax.cpp \
-	$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
-	$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
-	$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
-	$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
-	$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
-	$(GLSL_SRCDIR)/opt_tree_grafting.cpp \
-	$(GLSL_SRCDIR)/opt_vectorize.cpp \
-	$(GLSL_SRCDIR)/s_expression.cpp
+	ast.h \
+	ast_array_index.cpp \
+	ast_expr.cpp \
+	ast_function.cpp \
+	ast_to_hir.cpp \
+	ast_type.cpp \
+	blob.c \
+	blob.h \
+	builtin_functions.cpp \
+	builtin_type_macros.h \
+	builtin_types.cpp \
+	builtin_variables.cpp \
+	glsl_parser_extras.cpp \
+	glsl_parser_extras.h \
+	glsl_symbol_table.cpp \
+	glsl_symbol_table.h \
+	glsl_types.cpp \
+	glsl_types.h \
+	hir_field_selection.cpp \
+	ir_basic_block.cpp \
+	ir_basic_block.h \
+	ir_builder.cpp \
+	ir_builder.h \
+	ir_clone.cpp \
+	ir_constant_expression.cpp \
+	ir.cpp \
+	ir.h \
+	ir_equals.cpp \
+	ir_expression_flattening.cpp \
+	ir_expression_flattening.h \
+	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
+	ir_function_inlining.h \
+	ir_function.cpp \
+	ir_hierarchical_visitor.cpp \
+	ir_hierarchical_visitor.h \
+	ir_hv_accept.cpp \
+	ir_import_prototypes.cpp \
+	ir_optimization.h \
+	ir_print_visitor.cpp \
+	ir_print_visitor.h \
+	ir_reader.cpp \
+	ir_reader.h \
+	ir_rvalue_visitor.cpp \
+	ir_rvalue_visitor.h \
+	ir_set_program_inouts.cpp \
+	ir_uniform.h \
+	ir_validate.cpp \
+	ir_variable_refcount.cpp \
+	ir_variable_refcount.h \
+	ir_visitor.h \
+	linker.cpp \
+	linker.h \
+	link_atomics.cpp \
+	link_functions.cpp \
+	link_interface_blocks.cpp \
+	link_uniforms.cpp \
+	link_uniform_initializers.cpp \
+	link_uniform_block_active_visitor.cpp \
+	link_uniform_block_active_visitor.h \
+	link_uniform_blocks.cpp \
+	link_varyings.cpp \
+	link_varyings.h \
+	list.h \
+	loop_analysis.cpp \
+	loop_analysis.h \
+	loop_controls.cpp \
+	loop_unroll.cpp \
+	lower_clip_distance.cpp \
+	lower_const_arrays_to_uniforms.cpp \
+	lower_discard.cpp \
+	lower_discard_flow.cpp \
+	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
+	lower_jumps.cpp \
+	lower_mat_op_to_vec.cpp \
+	lower_noise.cpp \
+	lower_offset_array.cpp \
+	lower_packed_varyings.cpp \
+	lower_named_interface_blocks.cpp \
+	lower_packing_builtins.cpp \
+	lower_texture_projection.cpp \
+	lower_variable_index_to_cond_assign.cpp \
+	lower_vec_index_to_cond_assign.cpp \
+	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
+	lower_vector_insert.cpp \
+	lower_vertex_id.cpp \
+	lower_output_reads.cpp \
+	lower_ubo_reference.cpp \
+	opt_algebraic.cpp \
+	opt_array_splitting.cpp \
+	opt_constant_folding.cpp \
+	opt_constant_propagation.cpp \
+	opt_constant_variable.cpp \
+	opt_copy_propagation.cpp \
+	opt_copy_propagation_elements.cpp \
+	opt_cse.cpp \
+	opt_dead_builtin_variables.cpp \
+	opt_dead_builtin_varyings.cpp \
+	opt_dead_code.cpp \
+	opt_dead_code_local.cpp \
+	opt_dead_functions.cpp \
+	opt_flatten_nested_if_blocks.cpp \
+	opt_flip_matrices.cpp \
+	opt_function_inlining.cpp \
+	opt_if_simplification.cpp \
+	opt_minmax.cpp \
+	opt_noop_swizzle.cpp \
+	opt_rebalance_tree.cpp \
+	opt_redundant_jumps.cpp \
+	opt_structure_splitting.cpp \
+	opt_swizzle_swizzle.cpp \
+	opt_tree_grafting.cpp \
+	opt_vectorize.cpp \
+	program.h \
+	s_expression.cpp \
+	s_expression.h
 
 # glsl_compiler
 
 GLSL_COMPILER_CXX_FILES = \
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
-	$(GLSL_SRCDIR)/main.cpp
+	standalone_scaffolding.cpp \
+	standalone_scaffolding.h \
+	main.cpp
 
 # libglsl generated sources
 LIBGLSL_GENERATED_CXX_FILES = \
-	$(GLSL_BUILDDIR)/glsl_lexer.cpp \
-	$(GLSL_BUILDDIR)/glsl_parser.cpp
+	glsl_lexer.cpp \
+	glsl_parser.cpp
diff --git a/mesalib/src/glsl/README b/mesalib/src/glsl/README
index 2f93f12ff..bfcf69f90 100644
--- a/mesalib/src/glsl/README
+++ b/mesalib/src/glsl/README
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
 
 You can then use the new expression from builtins (if all backends
 would rather see it), or scan the IR and convert to use your new
-expression type (see ir_mod_to_fract, for example).
+expression type (see ir_mod_to_floor, for example).
 
 Q: How is memory management handled in the compiler?
 
diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript
index 847e96246..21c8266a6 100644
--- a/mesalib/src/glsl/SConscript
+++ b/mesalib/src/glsl/SConscript
@@ -11,6 +11,8 @@ env.Prepend(CPPPATH = [
     '#src',
     '#src/mapi',
     '#src/mesa',
+    '#src/gallium/include',
+    '#src/gallium/auxiliary',
     '#src/glsl',
     '#src/glsl/glcpp',
 ])
diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h
index deb8c7a7f..f1d1806f0 100644
--- a/mesalib/src/glsl/ast.h
+++ b/mesalib/src/glsl/ast.h
@@ -193,6 +193,7 @@ enum ast_operators {
    ast_uint_constant,
    ast_float_constant,
    ast_bool_constant,
+   ast_double_constant,
 
    ast_sequence,
    ast_aggregate
@@ -240,6 +241,7 @@ public:
       float float_constant;
       unsigned uint_constant;
       int bool_constant;
+      double double_constant;
    } primary_expression;
 
 
diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp
index cbff9d8b4..918be6966 100644
--- a/mesalib/src/glsl/ast_function.cpp
+++ b/mesalib/src/glsl/ast_function.cpp
@@ -573,6 +573,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
 	 result = new(ctx) ir_expression(ir_unop_i2u,
 		  new(ctx) ir_expression(ir_unop_b2i, src));
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2u, src);
+	 break;
       }
       break;
    case GLSL_TYPE_INT:
@@ -586,6 +589,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_BOOL:
 	 result = new(ctx) ir_expression(ir_unop_b2i, src);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2i, src);
+	 break;
       }
       break;
    case GLSL_TYPE_FLOAT:
@@ -599,6 +605,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_BOOL:
 	 result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL);
+	 break;
       }
       break;
    case GLSL_TYPE_BOOL:
@@ -613,8 +622,27 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_FLOAT:
 	 result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+         result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL);
+         break;
       }
       break;
+   case GLSL_TYPE_DOUBLE:
+      switch (b) {
+      case GLSL_TYPE_INT:
+         result = new(ctx) ir_expression(ir_unop_i2d, src);
+         break;
+      case GLSL_TYPE_UINT:
+         result = new(ctx) ir_expression(ir_unop_u2d, src);
+         break;
+      case GLSL_TYPE_BOOL:
+         result = new(ctx) ir_expression(ir_unop_f2d,
+                  new(ctx) ir_expression(ir_unop_b2f, src));
+         break;
+      case GLSL_TYPE_FLOAT:
+         result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL);
+         break;
+      }
    }
 
    assert(result != NULL);
@@ -711,9 +739,9 @@ process_vec_mat_constructor(exec_list *instructions,
 
       /* Apply implicit conversions (not the scalar constructor rules!). See
        * the spec quote above. */
-      if (constructor_type->is_float()) {
+      if (constructor_type->base_type != result->type->base_type) {
          const glsl_type *desired_type =
-            glsl_type::get_instance(GLSL_TYPE_FLOAT,
+            glsl_type::get_instance(constructor_type->base_type,
                                     ir->type->vector_elements,
                                     ir->type->matrix_columns);
          if (result->type->can_implicitly_convert_to(desired_type, state)) {
@@ -847,13 +875,17 @@ process_array_constructor(exec_list *instructions,
    foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) {
       ir_rvalue *result = ir;
 
+      const glsl_base_type element_base_type =
+         constructor_type->element_type()->base_type;
+
       /* Apply implicit conversions (not the scalar constructor rules!). See
        * the spec quote above. */
-      if (constructor_type->element_type()->is_float()) {
-	 const glsl_type *desired_type =
-	    glsl_type::get_instance(GLSL_TYPE_FLOAT,
-				    ir->type->vector_elements,
-				    ir->type->matrix_columns);
+      if (element_base_type != result->type->base_type) {
+         const glsl_type *desired_type =
+            glsl_type::get_instance(element_base_type,
+                                    ir->type->vector_elements,
+                                    ir->type->matrix_columns);
+
 	 if (result->type->can_implicitly_convert_to(desired_type, state)) {
 	    /* Even though convert_component() implements the constructor
 	     * conversion rules (not the implicit conversion rules), its safe
@@ -1012,6 +1044,9 @@ emit_inline_vector_constructor(const glsl_type *type,
 	       case GLSL_TYPE_FLOAT:
 		  data.f[i + base_component] = c->get_float_component(i);
 		  break;
+	       case GLSL_TYPE_DOUBLE:
+		  data.d[i + base_component] = c->get_double_component(i);
+		  break;
 	       case GLSL_TYPE_BOOL:
 		  data.b[i + base_component] = c->get_bool_component(i);
 		  break;
@@ -1167,16 +1202,21 @@ emit_inline_matrix_constructor(const glsl_type *type,
       /* Assign the scalar to the X component of a vec4, and fill the remaining
        * components with zero.
        */
+      glsl_base_type param_base_type = first_param->type->base_type;
+      assert(param_base_type == GLSL_TYPE_FLOAT ||
+             param_base_type == GLSL_TYPE_DOUBLE);
       ir_variable *rhs_var =
-	 new(ctx) ir_variable(glsl_type::vec4_type, "mat_ctor_vec",
-			      ir_var_temporary);
+         new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1),
+                              "mat_ctor_vec",
+                              ir_var_temporary);
       instructions->push_tail(rhs_var);
 
       ir_constant_data zero;
-      zero.f[0] = 0.0;
-      zero.f[1] = 0.0;
-      zero.f[2] = 0.0;
-      zero.f[3] = 0.0;
+      for (unsigned i = 0; i < 4; i++)
+         if (param_base_type == GLSL_TYPE_FLOAT)
+            zero.f[i] = 0.0;
+         else
+            zero.d[i] = 0.0;
 
       ir_instruction *inst =
 	 new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
@@ -1524,10 +1564,10 @@ ast_function_expression::hir(exec_list *instructions,
       }
 
 
-      /* Constructors for samplers are illegal.
+      /* Constructors for opaque types are illegal.
        */
-      if (constructor_type->is_sampler()) {
-	 _mesa_glsl_error(& loc, state, "cannot construct sampler type `%s'",
+      if (constructor_type->contains_opaque()) {
+	 _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'",
 			  constructor_type->name);
 	 return ir_rvalue::error_value(ctx);
       }
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index fe1e1291e..acb5c763c 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -172,6 +172,7 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from,
       switch (from->base_type) {
       case GLSL_TYPE_INT: return ir_unop_i2f;
       case GLSL_TYPE_UINT: return ir_unop_u2f;
+      case GLSL_TYPE_DOUBLE: return ir_unop_d2f;
       default: return (ir_expression_operation)0;
       }
 
@@ -183,6 +184,16 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from,
          default: return (ir_expression_operation)0;
       }
 
+   case GLSL_TYPE_DOUBLE:
+      if (!state->has_double())
+         return (ir_expression_operation)0;
+      switch (from->base_type) {
+      case GLSL_TYPE_INT: return ir_unop_i2d;
+      case GLSL_TYPE_UINT: return ir_unop_u2d;
+      case GLSL_TYPE_FLOAT: return ir_unop_f2d;
+      default: return (ir_expression_operation)0;
+      }
+
    default: return (ir_expression_operation)0;
    }
 }
@@ -340,8 +351,10 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
     * type of both operands must be float.
     */
    assert(type_a->is_matrix() || type_b->is_matrix());
-   assert(type_a->base_type == GLSL_TYPE_FLOAT);
-   assert(type_b->base_type == GLSL_TYPE_FLOAT);
+   assert(type_a->base_type == GLSL_TYPE_FLOAT ||
+          type_a->base_type == GLSL_TYPE_DOUBLE);
+   assert(type_b->base_type == GLSL_TYPE_FLOAT ||
+          type_b->base_type == GLSL_TYPE_DOUBLE);
 
    /*   "* The operator is add (+), subtract (-), or divide (/), and the
     *      operands are matrices with the same number of rows and the same
@@ -959,6 +972,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
       return new(mem_ctx) ir_expression(operation, op0, op1);
 
    case GLSL_TYPE_ARRAY: {
@@ -1597,13 +1611,11 @@ ast_expression::do_hir(exec_list *instructions,
       }
 
       ir_constant *cond_val = op[0]->constant_expression_value();
-      ir_constant *then_val = op[1]->constant_expression_value();
-      ir_constant *else_val = op[2]->constant_expression_value();
 
       if (then_instructions.is_empty()
           && else_instructions.is_empty()
-          && (cond_val != NULL) && (then_val != NULL) && (else_val != NULL)) {
-         result = (cond_val->value.b[0]) ? then_val : else_val;
+          && cond_val != NULL) {
+         result = cond_val->value.b[0] ? op[1] : op[2];
       } else {
          ir_variable *const tmp =
             new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary);
@@ -1748,6 +1760,10 @@ ast_expression::do_hir(exec_list *instructions,
       result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant));
       break;
 
+   case ast_double_constant:
+      result = new(ctx) ir_constant(this->primary_expression.double_constant);
+      break;
+
    case ast_sequence: {
       /* It should not be possible to generate a sequence in the AST without
        * any expressions in it.
@@ -2562,6 +2578,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          _mesa_glsl_error(loc, state,
                           "varying variables may not be of type struct");
          break;
+      case GLSL_TYPE_DOUBLE:
+         break;
       default:
          _mesa_glsl_error(loc, state, "illegal type for a varying variable");
          break;
@@ -3603,6 +3621,51 @@ ast_declarator_list::hir(exec_list *instructions,
 
             handle_geometry_shader_input_decl(state, loc, var);
          }
+      } else if (var->data.mode == ir_var_shader_out) {
+         const glsl_type *check_type = var->type->without_array();
+
+         /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+          *
+          *     It is a compile-time error to declare a vertex, tessellation
+          *     evaluation, tessellation control, or geometry shader output
+          *     that contains any of the following:
+          *
+          *     * A Boolean type (bool, bvec2 ...)
+          *     * An opaque type
+          */
+         if (check_type->is_boolean() || check_type->contains_opaque())
+            _mesa_glsl_error(&loc, state,
+                             "%s shader output cannot have type %s",
+                             _mesa_shader_stage_to_string(state->stage),
+                             check_type->name);
+
+         /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+          *
+          *     It is a compile-time error to declare a fragment shader output
+          *     that contains any of the following:
+          *
+          *     * A Boolean type (bool, bvec2 ...)
+          *     * A double-precision scalar or vector (double, dvec2 ...)
+          *     * An opaque type
+          *     * Any matrix type
+          *     * A structure
+          */
+         if (state->stage == MESA_SHADER_FRAGMENT) {
+            if (check_type->is_record() || check_type->is_matrix())
+               _mesa_glsl_error(&loc, state,
+                                "fragment shader output "
+                                "cannot have struct or array type");
+            switch (check_type->base_type) {
+            case GLSL_TYPE_UINT:
+            case GLSL_TYPE_INT:
+            case GLSL_TYPE_FLOAT:
+               break;
+            default:
+               _mesa_glsl_error(&loc, state,
+                                "fragment shader output cannot have "
+                                "type %s", check_type->name);
+            }
+         }
       }
 
       /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
@@ -3647,6 +3710,15 @@ ast_declarator_list::hir(exec_list *instructions,
                           var_type);
       }
 
+      /* Double fragment inputs must be qualified with 'flat'. */
+      if (var->type->contains_double() &&
+          var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+          state->stage == MESA_SHADER_FRAGMENT &&
+          var->data.mode == ir_var_shader_in) {
+         _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) "
+                          "a double, then it must be qualified with 'flat'",
+                          var_type);
+      }
 
       /* Interpolation qualifiers cannot be applied to 'centroid' and
        * 'centroid varying'.
@@ -4133,6 +4205,27 @@ ast_function::hir(exec_list *instructions,
       emit_function(state, f);
    }
 
+   /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71:
+    *
+    * "A shader cannot redefine or overload built-in functions."
+    *
+    * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions":
+    *
+    * "User code can overload the built-in functions but cannot redefine
+    * them."
+    */
+   if (state->es_shader && state->language_version >= 300) {
+      /* Local shader has no exact candidates; check the built-ins. */
+      _mesa_glsl_initialize_builtin_functions();
+      if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+         YYLTYPE loc = this->get_location();
+         _mesa_glsl_error(& loc, state,
+                          "A shader cannot redefine or overload built-in "
+                          "function `%s' in GLSL ES 3.00", name);
+         return NULL;
+      }
+   }
+
    /* Verify that this function's signature either doesn't match a previously
     * seen signature for a function with the same name, or, if a match is found,
     * that the previously seen signature does not have an associated definition.
@@ -5203,6 +5296,13 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                              "members");
          }
 
+         if (qual->flags.q.constant) {
+            YYLTYPE loc = decl_list->get_location();
+            _mesa_glsl_error(&loc, state,
+                             "const storage qualifier cannot be applied "
+                             "to struct or interface block members");
+         }
+
          field_type = process_array_type(&loc, decl_type,
                                          decl->array_specifier, state);
          fields[i].type = field_type;
@@ -5383,6 +5483,14 @@ ast_interface_block::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
+   /* Interface blocks must be declared at global scope */
+   if (state->current_function != NULL) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block `%s' must be declared "
+                       "at global scope",
+                       this->block_name);
+   }
+
    /* The ast_interface_block has a list of ast_declarator_lists.  We
     * need to turn those into ir_variables with an association
     * with this uniform block.
@@ -5443,9 +5551,23 @@ ast_interface_block::hir(exec_list *instructions,
 
    state->struct_specifier_depth--;
 
-   if (!redeclaring_per_vertex)
+   if (!redeclaring_per_vertex) {
       validate_identifier(this->block_name, loc, state);
 
+      /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec:
+       *
+       *     "Block names have no other use within a shader beyond interface
+       *     matching; it is a compile-time error to use a block name at global
+       *     scope for anything other than as a block name."
+       */
+      ir_variable *var = state->symbols->get_variable(this->block_name);
+      if (var && !var->type->is_interface()) {
+         _mesa_glsl_error(&loc, state, "Block name `%s' is "
+                          "already used in the scope.",
+                          this->block_name);
+      }
+   }
+
    const glsl_type *earlier_per_vertex = NULL;
    if (redeclaring_per_vertex) {
       /* Find the previous declaration of gl_PerVertex.  If we're redeclaring
@@ -5908,7 +6030,7 @@ ast_cs_input_layout::hir(exec_list *instructions,
     * declare it earlier).
     */
    ir_variable *var = new(state->symbols)
-      ir_variable(glsl_type::ivec3_type, "gl_WorkGroupSize", ir_var_auto);
+      ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto);
    var->data.how_declared = ir_var_declared_implicitly;
    var->data.read_only = true;
    instructions->push_tail(var);
@@ -5916,10 +6038,10 @@ ast_cs_input_layout::hir(exec_list *instructions,
    ir_constant_data data;
    memset(&data, 0, sizeof(data));
    for (int i = 0; i < 3; i++)
-      data.i[i] = this->local_size[i];
-   var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data);
+      data.u[i] = this->local_size[i];
+   var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
    var->constant_initializer =
-      new(var) ir_constant(glsl_type::ivec3_type, &data);
+      new(var) ir_constant(glsl_type::uvec3_type, &data);
    var->data.has_initializer = true;
 
    return NULL;
diff --git a/mesalib/src/glsl/blob.c b/mesalib/src/glsl/blob.c
new file mode 100644
index 000000000..dd4341be9
--- /dev/null
+++ b/mesalib/src/glsl/blob.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "main/macros.h"
+#include "util/ralloc.h"
+#include "blob.h"
+
+#define BLOB_INITIAL_SIZE 4096
+
+/* Ensure that \blob will be able to fit an additional object of size
+ * \additional.  The growing (if any) will occur by doubling the existing
+ * allocation.
+ */
+static bool
+grow_to_fit(struct blob *blob, size_t additional)
+{
+   size_t to_allocate;
+   uint8_t *new_data;
+
+   if (blob->size + additional <= blob->allocated)
+      return true;
+
+   if (blob->allocated == 0)
+      to_allocate = BLOB_INITIAL_SIZE;
+   else
+      to_allocate = blob->allocated * 2;
+
+   to_allocate = MAX2(to_allocate, blob->allocated + additional);
+
+   new_data = reralloc_size(blob, blob->data, to_allocate);
+   if (new_data == NULL)
+      return false;
+
+   blob->data = new_data;
+   blob->allocated = to_allocate;
+
+   return true;
+}
+
+/* Align the blob->size so that reading or writing a value at (blob->data +
+ * blob->size) will result in an access aligned to a granularity of \alignment
+ * bytes.
+ *
+ * \return True unless allocation fails
+ */
+static bool
+align_blob(struct blob *blob, size_t alignment)
+{
+   const size_t new_size = ALIGN(blob->size, alignment);
+
+   if (! grow_to_fit (blob, new_size - blob->size))
+      return false;
+
+   blob->size = new_size;
+
+   return true;
+}
+
+static void
+align_blob_reader(struct blob_reader *blob, size_t alignment)
+{
+   blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
+}
+
+struct blob *
+blob_create(void *mem_ctx)
+{
+   struct blob *blob;
+
+   blob = ralloc(mem_ctx, struct blob);
+   if (blob == NULL)
+      return NULL;
+
+   blob->data = NULL;
+   blob->allocated = 0;
+   blob->size = 0;
+
+   return blob;
+}
+
+bool
+blob_overwrite_bytes(struct blob *blob,
+                     size_t offset,
+                     const void *bytes,
+                     size_t to_write)
+{
+   /* Detect an attempt to overwrite data out of bounds. */
+   if (offset < 0 || blob->size - offset < to_write)
+      return false;
+
+   memcpy(blob->data + offset, bytes, to_write);
+
+   return true;
+}
+
+bool
+blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
+{
+   if (! grow_to_fit(blob, to_write))
+       return false;
+
+   memcpy(blob->data + blob->size, bytes, to_write);
+   blob->size += to_write;
+
+   return true;
+}
+
+uint8_t *
+blob_reserve_bytes(struct blob *blob, size_t to_write)
+{
+   uint8_t *ret;
+
+   if (! grow_to_fit (blob, to_write))
+      return NULL;
+
+   ret = blob->data + blob->size;
+   blob->size += to_write;
+
+   return ret;
+}
+
+bool
+blob_write_uint32(struct blob *blob, uint32_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_overwrite_uint32 (struct blob *blob,
+                       size_t offset,
+                       uint32_t value)
+{
+   return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
+}
+
+bool
+blob_write_uint64(struct blob *blob, uint64_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_write_intptr(struct blob *blob, intptr_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_write_string(struct blob *blob, const char *str)
+{
+   return blob_write_bytes(blob, str, strlen(str) + 1);
+}
+
+void
+blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size)
+{
+   blob->data = data;
+   blob->end = data + size;
+   blob->current = data;
+   blob->overrun = false;
+}
+
+/* Check that an object of size \size can be read from this blob.
+ *
+ * If not, set blob->overrun to indicate that we attempted to read too far.
+ */
+static bool
+ensure_can_read(struct blob_reader *blob, size_t size)
+{
+   if (blob->current < blob->end && blob->end - blob->current >= size)
+      return true;
+
+   blob->overrun = true;
+
+   return false;
+}
+
+void *
+blob_read_bytes(struct blob_reader *blob, size_t size)
+{
+   void *ret;
+
+   if (! ensure_can_read (blob, size))
+      return NULL;
+
+   ret = blob->current;
+
+   blob->current += size;
+
+   return ret;
+}
+
+void
+blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size)
+{
+   uint8_t *bytes;
+
+   bytes = blob_read_bytes(blob, size);
+   if (bytes == NULL)
+      return;
+
+   memcpy(dest, bytes, size);
+}
+
+/* These next three read functions have identical form. If we add any beyond
+ * these first three we should probably switch to generating these with a
+ * preprocessor macro.
+*/
+uint32_t
+blob_read_uint32(struct blob_reader *blob)
+{
+   uint32_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((uint32_t*) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+uint64_t
+blob_read_uint64(struct blob_reader *blob)
+{
+   uint64_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((uint64_t*) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+intptr_t
+blob_read_intptr(struct blob_reader *blob)
+{
+   intptr_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((intptr_t *) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+char *
+blob_read_string(struct blob_reader *blob)
+{
+   int size;
+   char *ret;
+   uint8_t *nul;
+
+   /* If we're already at the end, then this is an overrun. */
+   if (blob->current >= blob->end) {
+      blob->overrun = true;
+      return NULL;
+   }
+
+   /* Similarly, if there is no zero byte in the data remaining in this blob,
+    * we also consider that an overrun.
+    */
+   nul = memchr(blob->current, 0, blob->end - blob->current);
+
+   if (nul == NULL) {
+      blob->overrun = true;
+      return NULL;
+   }
+
+   size = nul - blob->current + 1;
+
+   assert(ensure_can_read(blob, size));
+
+   ret = (char *) blob->current;
+
+   blob->current += size;
+
+   return ret;
+}
diff --git a/mesalib/src/glsl/blob.h b/mesalib/src/glsl/blob.h
new file mode 100644
index 000000000..ec903ec14
--- /dev/null
+++ b/mesalib/src/glsl/blob.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef BLOB_H
+#define BLOB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* The blob functions implement a simple, low-level API for serializing and
+ * deserializing.
+ *
+ * All objects written to a blob will be serialized directly, (without any
+ * additional meta-data to describe the data written). Therefore, it is the
+ * caller's responsibility to ensure that any data can be read later, (either
+ * by knowing exactly what data is expected, or by writing to the blob
+ * sufficient meta-data to describe what has been written).
+ *
+ * A blob is efficient in that it dynamically grows by doubling in size, so
+ * allocation costs are logarithmic.
+ */
+
+struct blob {
+   /* The data actually written to the blob. */
+   uint8_t *data;
+
+   /** Number of bytes that have been allocated for \c data. */
+   size_t allocated;
+
+   /** The number of bytes that have actual data written to them. */
+   size_t size;
+};
+
+/* When done reading, the caller can ensure that everything was consumed by
+ * checking the following:
+ *
+ *   1. blob->current should be equal to blob->end, (if not, too little was
+ *      read).
+ *
+ *   2. blob->overrun should be false, (otherwise, too much was read).
+ */
+struct blob_reader {
+   uint8_t *data;
+   uint8_t *end;
+   uint8_t *current;
+   bool overrun;
+};
+
+/**
+ * Create a new, empty blob, belonging to \mem_ctx.
+ *
+ * \return The new blob, (or NULL in case of allocation failure).
+ */
+struct blob *
+blob_create (void *mem_ctx);
+
+/**
+ * Add some unstructured, fixed-size data to a blob.
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write);
+
+/**
+ * Reserve space in \blob for a number of bytes.
+ *
+ * Space will be allocated within the blob for these byes, but the bytes will
+ * be left uninitialized. The caller is expected to use the return value to
+ * write directly (and immediately) to these bytes.
+ *
+ * \note The return value is valid immediately upon return, but can be
+ * invalidated by any other call to a blob function. So the caller should call
+ * blob_reserve_byes immediately before writing through the returned pointer.
+ *
+ * This function is intended to be used when interfacing with an existing API
+ * that is not aware of the blob API, (so that blob_write_bytes cannot be
+ * called).
+ *
+ * \return A pointer to space allocated within \blob to which \to_write bytes
+ * can be written, (or NULL in case of any allocation error).
+ */
+uint8_t *
+blob_reserve_bytes (struct blob *blob, size_t to_write);
+
+/**
+ * Overwrite some data previously written to the blob.
+ *
+ * Writes data to an existing portion of the blob at an offset of \offset.
+ * This data range must have previously been written to the blob by one of the
+ * blob_write_* calls.
+ *
+ * For example usage, see blob_overwrite_uint32
+ *
+ * \return True unless the requested offset or offset+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_bytes (struct blob *blob,
+                      size_t offset,
+                      const void *bytes,
+                      size_t to_write);
+
+/**
+ * Add a uint32_t to a blob.
+ *
+ * \note This function will only write to a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint32 (struct blob *blob, uint32_t value);
+
+/**
+ * Overwrite a uint32_t previously written to the blob.
+ *
+ * Writes a uint32_t value to an existing portion of the blob at an offset of
+ * \offset.  This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ *
+ * The expected usage is something like the following pattern:
+ *
+ *	size_t offset;
+ *
+ *	offset = blob->size;
+ *	blob_write_uint32 (blob, 0); // placeholder
+ *	... various blob write calls, writing N items ...
+ *	blob_overwrite_uint32 (blob, offset, N);
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_uint32 (struct blob *blob,
+                       size_t offset,
+                       uint32_t value);
+
+/**
+ * Add a uint64_t to a blob.
+ *
+ * \note This function will only write to a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint64 (struct blob *blob, uint64_t value);
+
+/**
+ * Add an intptr_t to a blob.
+ *
+ * \note This function will only write to an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_intptr (struct blob *blob, intptr_t value);
+
+/**
+ * Add a NULL-terminated string to a blob, (including the NULL terminator).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_string (struct blob *blob, const char *str);
+
+/**
+ * Start reading a blob, (initializing the contents of \blob for reading).
+ *
+ * After this call, the caller can use the various blob_read_* functions to
+ * read elements from the data array.
+ *
+ * For all of the blob_read_* functions, if there is insufficient data
+ * remaining, the functions will do nothing, (perhaps returning default values
+ * such as 0). The caller can detect this by noting that the blob_reader's
+ * current value is unchanged before and after the call.
+ */
+void
+blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, (and
+ * update the current location to just past this data).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the data in order to use it after the lifetime of the data
+ * underlying the blob reader.
+ *
+ * \return The bytes read (see note above about memory lifetime).
+ */
+void *
+blob_read_bytes (struct blob_reader *blob, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, copying
+ * it to \dest (and update the current location to just past this data)
+ */
+void
+blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size);
+
+/**
+ * Read a uint32_t from the current location, (and update the current location
+ * to just past this uint32_t).
+ *
+ * \note This function will only read from a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint32_t read
+ */
+uint32_t
+blob_read_uint32 (struct blob_reader *blob);
+
+/**
+ * Read a uint64_t from the current location, (and update the current location
+ * to just past this uint64_t).
+ *
+ * \note This function will only read from a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint64_t read
+ */
+uint64_t
+blob_read_uint64 (struct blob_reader *blob);
+
+/**
+ * Read an intptr_t value from the current location, (and update the
+ * current location to just past this intptr_t).
+ *
+ * \note This function will only read from an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The intptr_t read
+ */
+intptr_t
+blob_read_intptr (struct blob_reader *blob);
+
+/**
+ * Read a NULL-terminated string from the current location, (and update the
+ * current location to just past this string).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the string in order to use the string after the lifetime
+ * of the data underlying the blob reader.
+ *
+ * \return The string read (see note above about memory lifetime). However, if
+ * there is no NULL byte remaining within the blob, this function returns
+ * NULL.
+ */
+char *
+blob_read_string (struct blob_reader *blob);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLOB_H */
diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp
index 4cd76f6b8..1a85c2dff 100755
--- a/mesalib/src/glsl/builtin_functions.cpp
+++ b/mesalib/src/glsl/builtin_functions.cpp
@@ -381,6 +381,12 @@ gs_streams(const _mesa_glsl_parse_state *state)
    return gpu_shader5(state) && gs_only(state);
 }
 
+static bool
+fp64(const _mesa_glsl_parse_state *state)
+{
+   return state->has_double();
+}
+
 /** @} */
 
 /******************************************************************************/
@@ -436,6 +442,7 @@ private:
    ir_constant *imm(float f, unsigned vector_elements=1);
    ir_constant *imm(int i, unsigned vector_elements=1);
    ir_constant *imm(unsigned u, unsigned vector_elements=1);
+   ir_constant *imm(double d, unsigned vector_elements=1);
    ir_constant *imm(const glsl_type *type, const ir_constant_data &);
    ir_dereference_variable *var_ref(ir_variable *var);
    ir_dereference_array *array_ref(ir_variable *var, int i);
@@ -526,29 +533,29 @@ private:
    B1(log)
    B1(exp2)
    B1(log2)
-   B1(sqrt)
-   B1(inversesqrt)
-   B1(abs)
-   B1(sign)
-   B1(floor)
-   B1(trunc)
-   B1(round)
-   B1(roundEven)
-   B1(ceil)
-   B1(fract)
+   BA1(sqrt)
+   BA1(inversesqrt)
+   BA1(abs)
+   BA1(sign)
+   BA1(floor)
+   BA1(trunc)
+   BA1(round)
+   BA1(roundEven)
+   BA1(ceil)
+   BA1(fract)
    B2(mod)
-   B1(modf)
+   BA1(modf)
    BA2(min)
    BA2(max)
    BA2(clamp)
-   B2(mix_lrp)
+   BA2(mix_lrp)
    ir_function_signature *_mix_sel(builtin_available_predicate avail,
                                    const glsl_type *val_type,
                                    const glsl_type *blend_type);
-   B2(step)
-   B2(smoothstep)
-   B1(isnan)
-   B1(isinf)
+   BA2(step)
+   BA2(smoothstep)
+   BA1(isnan)
+   BA1(isinf)
    B1(floatBitsToInt)
    B1(floatBitsToUint)
    B1(intBitsToFloat)
@@ -563,24 +570,27 @@ private:
    ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail);
    ir_function_signature *_packHalf2x16(builtin_available_predicate avail);
    ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail);
-   B1(length)
-   B1(distance);
-   B1(dot);
-   B1(cross);
-   B1(normalize);
+   ir_function_signature *_packDouble2x32(builtin_available_predicate avail);
+   ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail);
+
+   BA1(length)
+   BA1(distance);
+   BA1(dot);
+   BA1(cross);
+   BA1(normalize);
    B0(ftransform);
-   B1(faceforward);
-   B1(reflect);
-   B1(refract);
-   B1(matrixCompMult);
-   B1(outerProduct);
-   B0(determinant_mat2);
-   B0(determinant_mat3);
-   B0(determinant_mat4);
-   B0(inverse_mat2);
-   B0(inverse_mat3);
-   B0(inverse_mat4);
-   B1(transpose);
+   BA1(faceforward);
+   BA1(reflect);
+   BA1(refract);
+   BA1(matrixCompMult);
+   BA1(outerProduct);
+   BA1(determinant_mat2);
+   BA1(determinant_mat3);
+   BA1(determinant_mat4);
+   BA1(inverse_mat2);
+   BA1(inverse_mat3);
+   BA1(inverse_mat4);
+   BA1(transpose);
    BA1(lessThan);
    BA1(lessThanEqual);
    BA1(greaterThan);
@@ -644,9 +654,10 @@ private:
    B1(bitCount)
    B1(findLSB)
    B1(findMSB)
-   B1(fma_mesa)
+   BA1(fma)
    B2(ldexp)
    B2(frexp)
+   B2(dfrexp)
    B1(uaddCarry)
    B1(usubBorrow)
    B1(mulExtended)
@@ -815,6 +826,42 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::vec4_type),  \
                 NULL);
 
+#define FD(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(always_available, glsl_type::float_type), \
+                _##NAME(always_available, glsl_type::vec2_type),  \
+                _##NAME(always_available, glsl_type::vec3_type),  \
+                _##NAME(always_available, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
+#define FD130(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(v130, glsl_type::float_type), \
+                _##NAME(v130, glsl_type::vec2_type),  \
+                _##NAME(v130, glsl_type::vec3_type),                  \
+                _##NAME(v130, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
+#define FDGS5(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(gpu_shader5, glsl_type::float_type), \
+                _##NAME(gpu_shader5, glsl_type::vec2_type),  \
+                _##NAME(gpu_shader5, glsl_type::vec3_type),                  \
+                _##NAME(gpu_shader5, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
 #define FI(NAME)                                \
    add_function(#NAME,                          \
                 _##NAME(glsl_type::float_type), \
@@ -827,7 +874,23 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::ivec4_type), \
                 NULL);
 
-#define FIU(NAME)                                                 \
+#define FID(NAME)                                \
+   add_function(#NAME,                          \
+                _##NAME(always_available, glsl_type::float_type), \
+                _##NAME(always_available, glsl_type::vec2_type),  \
+                _##NAME(always_available, glsl_type::vec3_type),  \
+                _##NAME(always_available, glsl_type::vec4_type),  \
+                _##NAME(always_available, glsl_type::int_type),   \
+                _##NAME(always_available, glsl_type::ivec2_type), \
+                _##NAME(always_available, glsl_type::ivec3_type), \
+                _##NAME(always_available, glsl_type::ivec4_type), \
+                _##NAME(fp64, glsl_type::double_type), \
+                _##NAME(fp64, glsl_type::dvec2_type),  \
+                _##NAME(fp64, glsl_type::dvec3_type),  \
+                _##NAME(fp64, glsl_type::dvec4_type),  \
+                NULL);
+
+#define FIUD(NAME)                                                 \
    add_function(#NAME,                                            \
                 _##NAME(always_available, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type),  \
@@ -843,6 +906,10 @@ builtin_builder::create_builtins()
                 _##NAME(v130, glsl_type::uvec2_type),             \
                 _##NAME(v130, glsl_type::uvec3_type),             \
                 _##NAME(v130, glsl_type::uvec4_type),             \
+                _##NAME(fp64, glsl_type::double_type), \
+                _##NAME(fp64, glsl_type::dvec2_type),  \
+                _##NAME(fp64, glsl_type::dvec3_type),  \
+                _##NAME(fp64, glsl_type::dvec4_type),  \
                 NULL);
 
 #define IU(NAME)                                \
@@ -858,7 +925,7 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::uvec4_type), \
                 NULL);
 
-#define FIUB(NAME)                                                \
+#define FIUBD(NAME)                                                \
    add_function(#NAME,                                            \
                 _##NAME(always_available, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type),  \
@@ -879,9 +946,14 @@ builtin_builder::create_builtins()
                 _##NAME(always_available, glsl_type::bvec2_type), \
                 _##NAME(always_available, glsl_type::bvec3_type), \
                 _##NAME(always_available, glsl_type::bvec4_type), \
+                                                                  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type), \
+                _##NAME(fp64, glsl_type::dvec3_type), \
+                _##NAME(fp64, glsl_type::dvec4_type), \
                 NULL);
 
-#define FIU2_MIXED(NAME)                                                                 \
+#define FIUD2_MIXED(NAME)                                                                 \
    add_function(#NAME,                                                                   \
                 _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type,  glsl_type::float_type), \
@@ -909,6 +981,14 @@ builtin_builder::create_builtins()
                 _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type),             \
                 _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type),             \
                 _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type),             \
+                                                                                         \
+                _##NAME(fp64, glsl_type::double_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),           \
+                _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),           \
+                _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),           \
                 NULL);
 
    F(radians)
@@ -941,16 +1021,16 @@ builtin_builder::create_builtins()
    F(log)
    F(exp2)
    F(log2)
-   F(sqrt)
-   F(inversesqrt)
-   FI(abs)
-   FI(sign)
-   F(floor)
-   F(trunc)
-   F(round)
-   F(roundEven)
-   F(ceil)
-   F(fract)
+   FD(sqrt)
+   FD(inversesqrt)
+   FID(abs)
+   FID(sign)
+   FD(floor)
+   FD(trunc)
+   FD(round)
+   FD(roundEven)
+   FD(ceil)
+   FD(fract)
 
    add_function("mod",
                 _mod(glsl_type::float_type, glsl_type::float_type),
@@ -961,29 +1041,52 @@ builtin_builder::create_builtins()
                 _mod(glsl_type::vec2_type,  glsl_type::vec2_type),
                 _mod(glsl_type::vec3_type,  glsl_type::vec3_type),
                 _mod(glsl_type::vec4_type,  glsl_type::vec4_type),
+
+                _mod(glsl_type::double_type, glsl_type::double_type),
+                _mod(glsl_type::dvec2_type,  glsl_type::double_type),
+                _mod(glsl_type::dvec3_type,  glsl_type::double_type),
+                _mod(glsl_type::dvec4_type,  glsl_type::double_type),
+
+                _mod(glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _mod(glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _mod(glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
 
-   F(modf)
+   FD(modf)
 
-   FIU2_MIXED(min)
-   FIU2_MIXED(max)
-   FIU2_MIXED(clamp)
+   FIUD2_MIXED(min)
+   FIUD2_MIXED(max)
+   FIUD2_MIXED(clamp)
 
    add_function("mix",
-                _mix_lrp(glsl_type::float_type, glsl_type::float_type),
-                _mix_lrp(glsl_type::vec2_type,  glsl_type::float_type),
-                _mix_lrp(glsl_type::vec3_type,  glsl_type::float_type),
-                _mix_lrp(glsl_type::vec4_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec2_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec3_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec4_type,  glsl_type::float_type),
+
+                _mix_lrp(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _mix_lrp(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _mix_lrp(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
 
-                _mix_lrp(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _mix_lrp(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _mix_lrp(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec2_type,  glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec3_type,  glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec4_type,  glsl_type::double_type),
+
+                _mix_lrp(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _mix_lrp(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _mix_lrp(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
 
                 _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type),
                 _mix_sel(v130, glsl_type::vec2_type,  glsl_type::bvec2_type),
                 _mix_sel(v130, glsl_type::vec3_type,  glsl_type::bvec3_type),
                 _mix_sel(v130, glsl_type::vec4_type,  glsl_type::bvec4_type),
 
+                _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type),
+                _mix_sel(fp64, glsl_type::dvec2_type,  glsl_type::bvec2_type),
+                _mix_sel(fp64, glsl_type::dvec3_type,  glsl_type::bvec3_type),
+                _mix_sel(fp64, glsl_type::dvec4_type,  glsl_type::bvec4_type),
+
                 _mix_sel(shader_integer_mix, glsl_type::int_type,   glsl_type::bool_type),
                 _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type),
                 _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type),
@@ -1001,29 +1104,45 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("step",
-                _step(glsl_type::float_type, glsl_type::float_type),
-                _step(glsl_type::float_type, glsl_type::vec2_type),
-                _step(glsl_type::float_type, glsl_type::vec3_type),
-                _step(glsl_type::float_type, glsl_type::vec4_type),
-
-                _step(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _step(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _step(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _step(always_available, glsl_type::float_type, glsl_type::float_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec2_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec3_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec4_type),
+
+                _step(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _step(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _step(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
+                _step(fp64, glsl_type::double_type, glsl_type::double_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec2_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec3_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec4_type),
+
+                _step(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _step(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _step(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
 
    add_function("smoothstep",
-                _smoothstep(glsl_type::float_type, glsl_type::float_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec2_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec3_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec4_type),
-
-                _smoothstep(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _smoothstep(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _smoothstep(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type),
+
+                _smoothstep(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _smoothstep(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _smoothstep(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type),
+
+                _smoothstep(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _smoothstep(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _smoothstep(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
  
-   F(isnan)
-   F(isinf)
+   FD130(isnan)
+   FD130(isinf)
 
    F(floatBitsToInt)
    F(floatBitsToUint)
@@ -1050,68 +1169,106 @@ builtin_builder::create_builtins()
    add_function("unpackSnorm4x8",  _unpackSnorm4x8(shader_packing_or_gpu_shader5),         NULL);
    add_function("packHalf2x16",    _packHalf2x16(shader_packing_or_es3),                   NULL);
    add_function("unpackHalf2x16",  _unpackHalf2x16(shader_packing_or_es3),                 NULL);
+   add_function("packDouble2x32",    _packDouble2x32(fp64),                   NULL);
+   add_function("unpackDouble2x32",  _unpackDouble2x32(fp64),                 NULL);
 
-   F(length)
-   F(distance)
-   F(dot)
 
-   add_function("cross", _cross(glsl_type::vec3_type), NULL);
+   FD(length)
+   FD(distance)
+   FD(dot)
 
-   F(normalize)
+   add_function("cross", _cross(always_available, glsl_type::vec3_type), 
+                _cross(fp64, glsl_type::dvec3_type), NULL);
+
+   FD(normalize)
    add_function("ftransform", _ftransform(), NULL);
-   F(faceforward)
-   F(reflect)
-   F(refract)
+   FD(faceforward)
+   FD(reflect)
+   FD(refract)
    // ...
    add_function("matrixCompMult",
-                _matrixCompMult(glsl_type::mat2_type),
-                _matrixCompMult(glsl_type::mat3_type),
-                _matrixCompMult(glsl_type::mat4_type),
-                _matrixCompMult(glsl_type::mat2x3_type),
-                _matrixCompMult(glsl_type::mat2x4_type),
-                _matrixCompMult(glsl_type::mat3x2_type),
-                _matrixCompMult(glsl_type::mat3x4_type),
-                _matrixCompMult(glsl_type::mat4x2_type),
-                _matrixCompMult(glsl_type::mat4x3_type),
+                _matrixCompMult(always_available, glsl_type::mat2_type),
+                _matrixCompMult(always_available, glsl_type::mat3_type),
+                _matrixCompMult(always_available, glsl_type::mat4_type),
+                _matrixCompMult(always_available, glsl_type::mat2x3_type),
+                _matrixCompMult(always_available, glsl_type::mat2x4_type),
+                _matrixCompMult(always_available, glsl_type::mat3x2_type),
+                _matrixCompMult(always_available, glsl_type::mat3x4_type),
+                _matrixCompMult(always_available, glsl_type::mat4x2_type),
+                _matrixCompMult(always_available, glsl_type::mat4x3_type),
+                _matrixCompMult(fp64, glsl_type::dmat2_type),
+                _matrixCompMult(fp64, glsl_type::dmat3_type),
+                _matrixCompMult(fp64, glsl_type::dmat4_type),
+                _matrixCompMult(fp64, glsl_type::dmat2x3_type),
+                _matrixCompMult(fp64, glsl_type::dmat2x4_type),
+                _matrixCompMult(fp64, glsl_type::dmat3x2_type),
+                _matrixCompMult(fp64, glsl_type::dmat3x4_type),
+                _matrixCompMult(fp64, glsl_type::dmat4x2_type),
+                _matrixCompMult(fp64, glsl_type::dmat4x3_type),
                 NULL);
    add_function("outerProduct",
-                _outerProduct(glsl_type::mat2_type),
-                _outerProduct(glsl_type::mat3_type),
-                _outerProduct(glsl_type::mat4_type),
-                _outerProduct(glsl_type::mat2x3_type),
-                _outerProduct(glsl_type::mat2x4_type),
-                _outerProduct(glsl_type::mat3x2_type),
-                _outerProduct(glsl_type::mat3x4_type),
-                _outerProduct(glsl_type::mat4x2_type),
-                _outerProduct(glsl_type::mat4x3_type),
+                _outerProduct(v120, glsl_type::mat2_type),
+                _outerProduct(v120, glsl_type::mat3_type),
+                _outerProduct(v120, glsl_type::mat4_type),
+                _outerProduct(v120, glsl_type::mat2x3_type),
+                _outerProduct(v120, glsl_type::mat2x4_type),
+                _outerProduct(v120, glsl_type::mat3x2_type),
+                _outerProduct(v120, glsl_type::mat3x4_type),
+                _outerProduct(v120, glsl_type::mat4x2_type),
+                _outerProduct(v120, glsl_type::mat4x3_type),
+                _outerProduct(fp64, glsl_type::dmat2_type),
+                _outerProduct(fp64, glsl_type::dmat3_type),
+                _outerProduct(fp64, glsl_type::dmat4_type),
+                _outerProduct(fp64, glsl_type::dmat2x3_type),
+                _outerProduct(fp64, glsl_type::dmat2x4_type),
+                _outerProduct(fp64, glsl_type::dmat3x2_type),
+                _outerProduct(fp64, glsl_type::dmat3x4_type),
+                _outerProduct(fp64, glsl_type::dmat4x2_type),
+                _outerProduct(fp64, glsl_type::dmat4x3_type),
                 NULL);
    add_function("determinant",
-                _determinant_mat2(),
-                _determinant_mat3(),
-                _determinant_mat4(),
+                _determinant_mat2(v120, glsl_type::mat2_type),
+                _determinant_mat3(v120, glsl_type::mat3_type),
+                _determinant_mat4(v120, glsl_type::mat4_type),
+                _determinant_mat2(fp64, glsl_type::dmat2_type),
+                _determinant_mat3(fp64, glsl_type::dmat3_type),
+                _determinant_mat4(fp64, glsl_type::dmat4_type),
+
                 NULL);
    add_function("inverse",
-                _inverse_mat2(),
-                _inverse_mat3(),
-                _inverse_mat4(),
+                _inverse_mat2(v120, glsl_type::mat2_type),
+                _inverse_mat3(v120, glsl_type::mat3_type),
+                _inverse_mat4(v120, glsl_type::mat4_type),
+                _inverse_mat2(fp64, glsl_type::dmat2_type),
+                _inverse_mat3(fp64, glsl_type::dmat3_type),
+                _inverse_mat4(fp64, glsl_type::dmat4_type),
                 NULL);
    add_function("transpose",
-                _transpose(glsl_type::mat2_type),
-                _transpose(glsl_type::mat3_type),
-                _transpose(glsl_type::mat4_type),
-                _transpose(glsl_type::mat2x3_type),
-                _transpose(glsl_type::mat2x4_type),
-                _transpose(glsl_type::mat3x2_type),
-                _transpose(glsl_type::mat3x4_type),
-                _transpose(glsl_type::mat4x2_type),
-                _transpose(glsl_type::mat4x3_type),
+                _transpose(v120, glsl_type::mat2_type),
+                _transpose(v120, glsl_type::mat3_type),
+                _transpose(v120, glsl_type::mat4_type),
+                _transpose(v120, glsl_type::mat2x3_type),
+                _transpose(v120, glsl_type::mat2x4_type),
+                _transpose(v120, glsl_type::mat3x2_type),
+                _transpose(v120, glsl_type::mat3x4_type),
+                _transpose(v120, glsl_type::mat4x2_type),
+                _transpose(v120, glsl_type::mat4x3_type),
+                _transpose(fp64, glsl_type::dmat2_type),
+                _transpose(fp64, glsl_type::dmat3_type),
+                _transpose(fp64, glsl_type::dmat4_type),
+                _transpose(fp64, glsl_type::dmat2x3_type),
+                _transpose(fp64, glsl_type::dmat2x4_type),
+                _transpose(fp64, glsl_type::dmat3x2_type),
+                _transpose(fp64, glsl_type::dmat3x4_type),
+                _transpose(fp64, glsl_type::dmat4x2_type),
+                _transpose(fp64, glsl_type::dmat4x3_type),
                 NULL);
-   FIU(lessThan)
-   FIU(lessThanEqual)
-   FIU(greaterThan)
-   FIU(greaterThanEqual)
-   FIUB(notEqual)
-   FIUB(equal)
+   FIUD(lessThan)
+   FIUD(lessThanEqual)
+   FIUD(greaterThan)
+   FIUD(greaterThanEqual)
+   FIUBD(notEqual)
+   FIUBD(equal)
 
    add_function("any",
                 _any(glsl_type::bvec2_type),
@@ -2180,13 +2337,17 @@ builtin_builder::create_builtins()
    IU(bitCount)
    IU(findLSB)
    IU(findMSB)
-   F(fma_mesa)
+   FDGS5(fma)
 
    add_function("ldexp",
                 _ldexp(glsl_type::float_type, glsl_type::int_type),
                 _ldexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
                 _ldexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
                 _ldexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+                _ldexp(glsl_type::double_type, glsl_type::int_type),
+                _ldexp(glsl_type::dvec2_type,  glsl_type::ivec2_type),
+                _ldexp(glsl_type::dvec3_type,  glsl_type::ivec3_type),
+                _ldexp(glsl_type::dvec4_type,  glsl_type::ivec4_type),
                 NULL);
 
    add_function("frexp",
@@ -2194,6 +2355,10 @@ builtin_builder::create_builtins()
                 _frexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
                 _frexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
                 _frexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+                _dfrexp(glsl_type::double_type, glsl_type::int_type),
+                _dfrexp(glsl_type::dvec2_type,  glsl_type::ivec2_type),
+                _dfrexp(glsl_type::dvec3_type,  glsl_type::ivec3_type),
+                _dfrexp(glsl_type::dvec4_type,  glsl_type::ivec4_type),
                 NULL);
    add_function("uaddCarry",
                 _uaddCarry(glsl_type::uint_type),
@@ -2310,8 +2475,8 @@ builtin_builder::create_builtins()
 
 #undef F
 #undef FI
-#undef FIU
-#undef FIUB
+#undef FIUD
+#undef FIUBD
 #undef FIU2_MIXED
 }
 
@@ -2470,11 +2635,19 @@ builtin_builder::imm(unsigned u, unsigned vector_elements)
 }
 
 ir_constant *
+builtin_builder::imm(double d, unsigned vector_elements)
+{
+   return new(mem_ctx) ir_constant(d, vector_elements);
+}
+
+ir_constant *
 builtin_builder::imm(const glsl_type *type, const ir_constant_data &data)
 {
    return new(mem_ctx) ir_constant(type, &data);
 }
 
+#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val)
+
 ir_dereference_variable *
 builtin_builder::var_ref(ir_variable *var)
 {
@@ -2550,6 +2723,13 @@ builtin_builder::_##NAME(const glsl_type *type) \
    return unop(&AVAIL, OPCODE, type, type);     \
 }
 
+#define UNOPA(NAME, OPCODE)               \
+ir_function_signature *                         \
+builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \
+{                                               \
+   return unop(avail, OPCODE, type, type);     \
+}
+
 ir_function_signature *
 builtin_builder::binop(ir_expression_operation opcode,
                        builtin_available_predicate avail,
@@ -2855,19 +3035,19 @@ UNOP(exp,         ir_unop_exp,  always_available)
 UNOP(log,         ir_unop_log,  always_available)
 UNOP(exp2,        ir_unop_exp2, always_available)
 UNOP(log2,        ir_unop_log2, always_available)
-UNOP(sqrt,        ir_unop_sqrt, always_available)
-UNOP(inversesqrt, ir_unop_rsq,  always_available)
+UNOPA(sqrt,        ir_unop_sqrt)
+UNOPA(inversesqrt, ir_unop_rsq)
 
 /** @} */
 
-UNOP(abs,       ir_unop_abs,        always_available)
-UNOP(sign,      ir_unop_sign,       always_available)
-UNOP(floor,     ir_unop_floor,      always_available)
-UNOP(trunc,     ir_unop_trunc,      v130)
-UNOP(round,     ir_unop_round_even, always_available)
-UNOP(roundEven, ir_unop_round_even, always_available)
-UNOP(ceil,      ir_unop_ceil,       always_available)
-UNOP(fract,     ir_unop_fract,      always_available)
+UNOPA(abs,       ir_unop_abs)
+UNOPA(sign,      ir_unop_sign)
+UNOPA(floor,     ir_unop_floor)
+UNOPA(trunc,     ir_unop_trunc)
+UNOPA(round,     ir_unop_round_even)
+UNOPA(roundEven, ir_unop_round_even)
+UNOPA(ceil,      ir_unop_ceil)
+UNOPA(fract,     ir_unop_fract)
 
 ir_function_signature *
 builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
@@ -2876,11 +3056,11 @@ builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
 }
 
 ir_function_signature *
-builtin_builder::_modf(const glsl_type *type)
+builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
    ir_variable *i = out_var(type, "i");
-   MAKE_SIG(type, v130, 2, x, i);
+   MAKE_SIG(type, avail, 2, x, i);
 
    ir_variable *t = body.make_temp(type, "t");
    body.emit(assign(t, expr(ir_unop_trunc, x)));
@@ -2919,12 +3099,12 @@ builtin_builder::_clamp(builtin_available_predicate avail,
 }
 
 ir_function_signature *
-builtin_builder::_mix_lrp(const glsl_type *val_type, const glsl_type *blend_type)
+builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type)
 {
    ir_variable *x = in_var(val_type, "x");
    ir_variable *y = in_var(val_type, "y");
    ir_variable *a = in_var(blend_type, "a");
-   MAKE_SIG(val_type, always_available, 3, x, y, a);
+   MAKE_SIG(val_type, avail, 3, x, y, a);
 
    body.emit(ret(lrp(x, y, a)));
 
@@ -2954,26 +3134,37 @@ builtin_builder::_mix_sel(builtin_available_predicate avail,
 }
 
 ir_function_signature *
-builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type)
+builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type)
 {
    ir_variable *edge = in_var(edge_type, "edge");
    ir_variable *x = in_var(x_type, "x");
-   MAKE_SIG(x_type, always_available, 2, edge, x);
+   MAKE_SIG(x_type, avail, 2, edge, x);
 
    ir_variable *t = body.make_temp(x_type, "t");
    if (x_type->vector_elements == 1) {
       /* Both are floats */
-      body.emit(assign(t, b2f(gequal(x, edge))));
+      if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+         body.emit(assign(t, f2d(b2f(gequal(x, edge)))));
+      else
+         body.emit(assign(t, b2f(gequal(x, edge))));
    } else if (edge_type->vector_elements == 1) {
       /* x is a vector but edge is a float */
       for (int i = 0; i < x_type->vector_elements; i++) {
-         body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i));
+         if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+            body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i));
+         else
+            body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i));
       }
    } else {
       /* Both are vectors */
       for (int i = 0; i < x_type->vector_elements; i++) {
-         body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))),
-                          1 << i));
+         if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+            body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))),
+                             1 << i));
+         else
+            body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))),
+                             1 << i));
+
       }
    }
    body.emit(ret(t));
@@ -2982,12 +3173,12 @@ builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type)
 }
 
 ir_function_signature *
-builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type)
+builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type)
 {
    ir_variable *edge0 = in_var(edge_type, "edge0");
    ir_variable *edge1 = in_var(edge_type, "edge1");
    ir_variable *x = in_var(x_type, "x");
-   MAKE_SIG(x_type, always_available, 3, edge0, edge1, x);
+   MAKE_SIG(x_type, avail, 3, edge0, edge1, x);
 
    /* From the GLSL 1.10 specification:
     *
@@ -2998,18 +3189,18 @@ builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type
 
    ir_variable *t = body.make_temp(x_type, "t");
    body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)),
-                             imm(0.0f), imm(1.0f))));
+                             IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0))));
 
-   body.emit(ret(mul(t, mul(t, sub(imm(3.0f), mul(imm(2.0f), t))))));
+   body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t))))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_isnan(const glsl_type *type)
+builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x);
+   MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x);
 
    body.emit(ret(nequal(x, x)));
 
@@ -3017,10 +3208,10 @@ builtin_builder::_isnan(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_isinf(const glsl_type *type)
+builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x);
+   MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x);
 
    ir_constant_data infinities;
    for (int i = 0; i < type->vector_elements; i++) {
@@ -3160,10 +3351,28 @@ builtin_builder::_unpackHalf2x16(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_length(const glsl_type *type)
+builtin_builder::_packDouble2x32(builtin_available_predicate avail)
+{
+   ir_variable *v = in_var(glsl_type::uvec2_type, "v");
+   MAKE_SIG(glsl_type::double_type, avail, 1, v);
+   body.emit(ret(expr(ir_unop_pack_double_2x32, v)));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_unpackDouble2x32(builtin_available_predicate avail)
+{
+   ir_variable *p = in_var(glsl_type::double_type, "p");
+   MAKE_SIG(glsl_type::uvec2_type, avail, 1, p);
+   body.emit(ret(expr(ir_unop_unpack_double_2x32, p)));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::float_type, always_available, 1, x);
+   MAKE_SIG(type->get_base_type(), avail, 1, x);
 
    body.emit(ret(sqrt(dot(x, x))));
 
@@ -3171,11 +3380,11 @@ builtin_builder::_length(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_distance(const glsl_type *type)
+builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *p0 = in_var(type, "p0");
    ir_variable *p1 = in_var(type, "p1");
-   MAKE_SIG(glsl_type::float_type, always_available, 2, p0, p1);
+   MAKE_SIG(type->get_base_type(), avail, 2, p0, p1);
 
    if (type->vector_elements == 1) {
       body.emit(ret(abs(sub(p0, p1))));
@@ -3189,21 +3398,21 @@ builtin_builder::_distance(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_dot(const glsl_type *type)
+builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type)
 {
    if (type->vector_elements == 1)
-      return binop(ir_binop_mul, always_available, type, type, type);
+      return binop(ir_binop_mul, avail, type, type, type);
 
-   return binop(ir_binop_dot, always_available,
-                glsl_type::float_type, type, type);
+   return binop(ir_binop_dot, avail,
+                type->get_base_type(), type, type);
 }
 
 ir_function_signature *
-builtin_builder::_cross(const glsl_type *type)
+builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *a = in_var(type, "a");
    ir_variable *b = in_var(type, "b");
-   MAKE_SIG(type, always_available, 2, a, b);
+   MAKE_SIG(type, avail, 2, a, b);
 
    int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
    int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0);
@@ -3215,10 +3424,10 @@ builtin_builder::_cross(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_normalize(const glsl_type *type)
+builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(type, always_available, 1, x);
+   MAKE_SIG(type, avail, 1, x);
 
    if (type->vector_elements == 1) {
       body.emit(ret(sign(x)));
@@ -3248,41 +3457,41 @@ builtin_builder::_ftransform()
 }
 
 ir_function_signature *
-builtin_builder::_faceforward(const glsl_type *type)
+builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *N = in_var(type, "N");
    ir_variable *I = in_var(type, "I");
    ir_variable *Nref = in_var(type, "Nref");
-   MAKE_SIG(type, always_available, 3, N, I, Nref);
+   MAKE_SIG(type, avail, 3, N, I, Nref);
 
-   body.emit(if_tree(less(dot(Nref, I), imm(0.0f)),
+   body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)),
                      ret(N), ret(neg(N))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_reflect(const glsl_type *type)
+builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *I = in_var(type, "I");
    ir_variable *N = in_var(type, "N");
-   MAKE_SIG(type, always_available, 2, I, N);
+   MAKE_SIG(type, avail, 2, I, N);
 
    /* I - 2 * dot(N, I) * N */
-   body.emit(ret(sub(I, mul(imm(2.0f), mul(dot(N, I), N)))));
+   body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N)))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_refract(const glsl_type *type)
+builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *I = in_var(type, "I");
    ir_variable *N = in_var(type, "N");
-   ir_variable *eta = in_var(glsl_type::float_type, "eta");
-   MAKE_SIG(type, always_available, 3, I, N, eta);
+   ir_variable *eta = in_var(type->get_base_type(), "eta");
+   MAKE_SIG(type, avail, 3, I, N, eta);
 
-   ir_variable *n_dot_i = body.make_temp(glsl_type::float_type, "n_dot_i");
+   ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i");
    body.emit(assign(n_dot_i, dot(N, I)));
 
    /* From the GLSL 1.10 specification:
@@ -3292,11 +3501,11 @@ builtin_builder::_refract(const glsl_type *type)
     * else
     *    return eta * I - (eta * dot(N, I) + sqrt(k)) * N
     */
-   ir_variable *k = body.make_temp(glsl_type::float_type, "k");
-   body.emit(assign(k, sub(imm(1.0f),
-                           mul(eta, mul(eta, sub(imm(1.0f),
+   ir_variable *k = body.make_temp(type->get_base_type(), "k");
+   body.emit(assign(k, sub(IMM_FP(type, 1.0),
+                           mul(eta, mul(eta, sub(IMM_FP(type, 1.0),
                                                  mul(n_dot_i, n_dot_i)))))));
-   body.emit(if_tree(less(k, imm(0.0f)),
+   body.emit(if_tree(less(k, IMM_FP(type, 0.0)),
                      ret(ir_constant::zero(mem_ctx, type)),
                      ret(sub(mul(eta, I),
                              mul(add(mul(eta, n_dot_i), sqrt(k)), N)))));
@@ -3305,11 +3514,11 @@ builtin_builder::_refract(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_matrixCompMult(const glsl_type *type)
+builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
    ir_variable *y = in_var(type, "y");
-   MAKE_SIG(type, always_available, 2, x, y);
+   MAKE_SIG(type, avail, 2, x, y);
 
    ir_variable *z = body.make_temp(type, "z");
    for (int i = 0; i < type->matrix_columns; i++) {
@@ -3321,11 +3530,19 @@ builtin_builder::_matrixCompMult(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_outerProduct(const glsl_type *type)
+builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *c = in_var(glsl_type::vec(type->vector_elements), "c");
-   ir_variable *r = in_var(glsl_type::vec(type->matrix_columns), "r");
-   MAKE_SIG(type, v120, 2, c, r);
+   ir_variable *c;
+   ir_variable *r;
+
+   if (type->base_type == GLSL_TYPE_DOUBLE) {
+      r = in_var(glsl_type::dvec(type->matrix_columns), "r");
+      c = in_var(glsl_type::dvec(type->vector_elements), "c");
+   } else {
+      r = in_var(glsl_type::vec(type->matrix_columns), "r");
+      c = in_var(glsl_type::vec(type->vector_elements), "c");
+   }
+   MAKE_SIG(type, avail, 2, c, r);
 
    ir_variable *m = body.make_temp(type, "m");
    for (int i = 0; i < type->matrix_columns; i++) {
@@ -3337,15 +3554,15 @@ builtin_builder::_outerProduct(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_transpose(const glsl_type *orig_type)
+builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type)
 {
    const glsl_type *transpose_type =
-      glsl_type::get_instance(GLSL_TYPE_FLOAT,
+      glsl_type::get_instance(orig_type->base_type,
                               orig_type->matrix_columns,
                               orig_type->vector_elements);
 
    ir_variable *m = in_var(orig_type, "m");
-   MAKE_SIG(transpose_type, v120, 1, m);
+   MAKE_SIG(transpose_type, avail, 1, m);
 
    ir_variable *t = body.make_temp(transpose_type, "t");
    for (int i = 0; i < orig_type->matrix_columns; i++) {
@@ -3361,10 +3578,10 @@ builtin_builder::_transpose(const glsl_type *orig_type)
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat2()
+builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat2_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type->get_base_type(), avail, 1, m);
 
    body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)),
                      mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1)))));
@@ -3373,10 +3590,10 @@ builtin_builder::_determinant_mat2()
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat3()
+builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat3_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type->get_base_type(), avail, 1, m);
 
    ir_expression *f1 =
       sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)),
@@ -3398,30 +3615,31 @@ builtin_builder::_determinant_mat3()
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat4()
-{
-   ir_variable *m = in_var(glsl_type::mat4_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
-
-   ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00");
-   ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01");
-   ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02");
-   ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03");
-   ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04");
-   ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05");
-   ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06");
-   ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07");
-   ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08");
-   ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09");
-   ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10");
-   ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11");
-   ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12");
-   ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13");
-   ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14");
-   ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15");
-   ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16");
-   ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17");
-   ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18");
+builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type)
+{
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(btype, avail, 1, m);
+
+   ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00");
+   ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01");
+   ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02");
+   ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03");
+   ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04");
+   ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05");
+   ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06");
+   ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07");
+   ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08");
+   ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09");
+   ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10");
+   ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11");
+   ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12");
+   ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13");
+   ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14");
+   ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15");
+   ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16");
+   ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17");
+   ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18");
 
    body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3)))));
    body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3)))));
@@ -3443,7 +3661,7 @@ builtin_builder::_determinant_mat4()
    body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2)))));
    body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj_0 = body.make_temp(glsl_type::vec4_type, "adj_0");
+   ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0");
 
    body.emit(assign(adj_0,
                     add(sub(mul(matrix_elt(m, 1, 1), SubFactor00),
@@ -3472,12 +3690,12 @@ builtin_builder::_determinant_mat4()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat2()
+builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat2_type, "m");
-   MAKE_SIG(glsl_type::mat2_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type, avail, 1, m);
 
-   ir_variable *adj = body.make_temp(glsl_type::mat2_type, "adj");
+   ir_variable *adj = body.make_temp(type, "adj");
    body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0));
    body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1));
    body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0));
@@ -3492,14 +3710,15 @@ builtin_builder::_inverse_mat2()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat3()
+builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat3_type, "m");
-   MAKE_SIG(glsl_type::mat3_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(type, avail, 1, m);
 
-   ir_variable *f11_22_21_12 = body.make_temp(glsl_type::float_type, "f11_22_21_12");
-   ir_variable *f10_22_20_12 = body.make_temp(glsl_type::float_type, "f10_22_20_12");
-   ir_variable *f10_21_20_11 = body.make_temp(glsl_type::float_type, "f10_21_20_11");
+   ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12");
+   ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12");
+   ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11");
 
    body.emit(assign(f11_22_21_12,
                     sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)),
@@ -3511,7 +3730,7 @@ builtin_builder::_inverse_mat3()
                     sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)),
                         mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj = body.make_temp(glsl_type::mat3_type, "adj");
+   ir_variable *adj = body.make_temp(type, "adj");
    body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X));
    body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X));
    body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X));
@@ -3553,30 +3772,31 @@ builtin_builder::_inverse_mat3()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat4()
-{
-   ir_variable *m = in_var(glsl_type::mat4_type, "m");
-   MAKE_SIG(glsl_type::mat4_type, v120, 1, m);
-
-   ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00");
-   ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01");
-   ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02");
-   ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03");
-   ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04");
-   ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05");
-   ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06");
-   ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07");
-   ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08");
-   ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09");
-   ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10");
-   ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11");
-   ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12");
-   ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13");
-   ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14");
-   ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15");
-   ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16");
-   ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17");
-   ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18");
+builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type)
+{
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(type, avail, 1, m);
+
+   ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00");
+   ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01");
+   ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02");
+   ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03");
+   ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04");
+   ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05");
+   ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06");
+   ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07");
+   ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08");
+   ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09");
+   ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10");
+   ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11");
+   ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12");
+   ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13");
+   ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14");
+   ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15");
+   ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16");
+   ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17");
+   ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18");
 
    body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3)))));
    body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3)))));
@@ -3598,7 +3818,7 @@ builtin_builder::_inverse_mat4()
    body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2)))));
    body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj = body.make_temp(glsl_type::mat4_type, "adj");
+   ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj");
    body.emit(assign(array_ref(adj, 0),
                     add(sub(mul(matrix_elt(m, 1, 1), SubFactor00),
                             mul(matrix_elt(m, 1, 2), SubFactor01)),
@@ -4270,12 +4490,12 @@ builtin_builder::_findMSB(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_fma_mesa(const glsl_type *type)
+builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *a = in_var(type, "a");
    ir_variable *b = in_var(type, "b");
    ir_variable *c = in_var(type, "c");
-   MAKE_SIG(type, gpu_shader5, 3, a, b, c);
+   MAKE_SIG(type, avail, 3, a, b, c);
 
    body.emit(ret(ir_builder::fma(a, b, c)));
 
@@ -4285,7 +4505,20 @@ builtin_builder::_fma_mesa(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type)
 {
-   return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type);
+   return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5, x_type, x_type, exp_type);
+}
+
+ir_function_signature *
+builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type)
+{
+   ir_variable *x = in_var(x_type, "x");
+   ir_variable *exponent = out_var(exp_type, "exp");
+   MAKE_SIG(x_type, fp64, 2, x, exponent);
+
+   body.emit(assign(exponent, expr(ir_unop_frexp_exp, x)));
+
+   body.emit(ret(expr(ir_unop_frexp_sig, x)));
+   return sig;
 }
 
 ir_function_signature *
@@ -4618,6 +4851,17 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
    return s;
 }
 
+ir_function *
+_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
+                                         const char *name)
+{
+   ir_function *f;
+   mtx_lock(&builtins_lock);
+   f = builtins.shader->symbols->get_function(name);
+   mtx_unlock(&builtins_lock);
+   return f;
+}
+
 gl_shader *
 _mesa_glsl_get_builtin_function_shader()
 {
diff --git a/mesalib/src/glsl/builtin_type_macros.h b/mesalib/src/glsl/builtin_type_macros.h
index 236e1ce8c..8e16ae454 100644
--- a/mesalib/src/glsl/builtin_type_macros.h
+++ b/mesalib/src/glsl/builtin_type_macros.h
@@ -64,6 +64,22 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3)
 DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4)
 DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4)
 
+DECL_TYPE(double,  GL_DOUBLE,        GLSL_TYPE_DOUBLE, 1, 1)
+DECL_TYPE(dvec2,   GL_DOUBLE_VEC2,   GLSL_TYPE_DOUBLE, 2, 1)
+DECL_TYPE(dvec3,   GL_DOUBLE_VEC3,   GLSL_TYPE_DOUBLE, 3, 1)
+DECL_TYPE(dvec4,   GL_DOUBLE_VEC4,   GLSL_TYPE_DOUBLE, 4, 1)
+
+DECL_TYPE(dmat2,   GL_DOUBLE_MAT2,   GLSL_TYPE_DOUBLE, 2, 2)
+DECL_TYPE(dmat3,   GL_DOUBLE_MAT3,   GLSL_TYPE_DOUBLE, 3, 3)
+DECL_TYPE(dmat4,   GL_DOUBLE_MAT4,   GLSL_TYPE_DOUBLE, 4, 4)
+
+DECL_TYPE(dmat2x3, GL_DOUBLE_MAT2x3, GLSL_TYPE_DOUBLE, 3, 2)
+DECL_TYPE(dmat2x4, GL_DOUBLE_MAT2x4, GLSL_TYPE_DOUBLE, 4, 2)
+DECL_TYPE(dmat3x2, GL_DOUBLE_MAT3x2, GLSL_TYPE_DOUBLE, 2, 3)
+DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
+DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
+DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
+
 DECL_TYPE(sampler1D,         GL_SAMPLER_1D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler2D,         GL_SAMPLER_2D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler3D,         GL_SAMPLER_3D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D,   0, 0, GLSL_TYPE_FLOAT)
@@ -110,39 +126,39 @@ DECL_TYPE(sampler2DRectShadow,    GL_SAMPLER_2D_RECT_SHADOW,        GLSL_TYPE_SA
 
 DECL_TYPE(samplerExternalOES,     GL_SAMPLER_EXTERNAL_OES,          GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_EXTERNAL, 0, 0, GLSL_TYPE_FLOAT)
 
-DECL_TYPE(image1D,         GL_IMAGE_1D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2D,         GL_IMAGE_2D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image3D,         GL_IMAGE_3D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DRect,     GL_IMAGE_2D_RECT,                           GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageCube,       GL_IMAGE_CUBE,                              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageBuffer,     GL_IMAGE_BUFFER,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image1DArray,    GL_IMAGE_1D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DArray,    GL_IMAGE_2D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageCubeArray,  GL_IMAGE_CUBE_MAP_ARRAY,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DMS,       GL_IMAGE_2D_MULTISAMPLE,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DMSArray,  GL_IMAGE_2D_MULTISAMPLE_ARRAY,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(iimage1D,        GL_INT_IMAGE_1D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2D,        GL_INT_IMAGE_2D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage3D,        GL_INT_IMAGE_3D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DRect,    GL_INT_IMAGE_2D_RECT,                       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimageCube,      GL_INT_IMAGE_CUBE,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimageBuffer,    GL_INT_IMAGE_BUFFER,                        GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage1DArray,   GL_INT_IMAGE_1D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DArray,   GL_INT_IMAGE_2D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DMS,      GL_INT_IMAGE_2D_MULTISAMPLE,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY,          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(uimage1D,        GL_UNSIGNED_INT_IMAGE_1D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2D,        GL_UNSIGNED_INT_IMAGE_2D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage3D,        GL_UNSIGNED_INT_IMAGE_3D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DRect,    GL_UNSIGNED_INT_IMAGE_2D_RECT,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimageCube,      GL_UNSIGNED_INT_IMAGE_CUBE,                 GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimageBuffer,    GL_UNSIGNED_INT_IMAGE_BUFFER,               GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage1DArray,   GL_UNSIGNED_INT_IMAGE_1D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DArray,   GL_UNSIGNED_INT_IMAGE_2D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DMS,      GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_UINT);
+DECL_TYPE(image1D,         GL_IMAGE_1D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2D,         GL_IMAGE_2D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image3D,         GL_IMAGE_3D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DRect,     GL_IMAGE_2D_RECT,                           GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageCube,       GL_IMAGE_CUBE,                              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageBuffer,     GL_IMAGE_BUFFER,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image1DArray,    GL_IMAGE_1D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DArray,    GL_IMAGE_2D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageCubeArray,  GL_IMAGE_CUBE_MAP_ARRAY,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DMS,       GL_IMAGE_2D_MULTISAMPLE,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DMSArray,  GL_IMAGE_2D_MULTISAMPLE_ARRAY,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(iimage1D,        GL_INT_IMAGE_1D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2D,        GL_INT_IMAGE_2D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage3D,        GL_INT_IMAGE_3D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DRect,    GL_INT_IMAGE_2D_RECT,                       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimageCube,      GL_INT_IMAGE_CUBE,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimageBuffer,    GL_INT_IMAGE_BUFFER,                        GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage1DArray,   GL_INT_IMAGE_1D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DArray,   GL_INT_IMAGE_2D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DMS,      GL_INT_IMAGE_2D_MULTISAMPLE,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY,          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(uimage1D,        GL_UNSIGNED_INT_IMAGE_1D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2D,        GL_UNSIGNED_INT_IMAGE_2D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage3D,        GL_UNSIGNED_INT_IMAGE_3D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DRect,    GL_UNSIGNED_INT_IMAGE_2D_RECT,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimageCube,      GL_UNSIGNED_INT_IMAGE_CUBE,                 GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimageBuffer,    GL_UNSIGNED_INT_IMAGE_BUFFER,               GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage1DArray,   GL_UNSIGNED_INT_IMAGE_1D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DArray,   GL_UNSIGNED_INT_IMAGE_2D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DMS,      GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_UINT)
 
 DECL_TYPE(atomic_uint, GL_UNSIGNED_INT_ATOMIC_COUNTER, GLSL_TYPE_ATOMIC_UINT, 1, 1)
 
diff --git a/mesalib/src/glsl/builtin_types.cpp b/mesalib/src/glsl/builtin_types.cpp
index 10fac0f81..fef86df28 100644
--- a/mesalib/src/glsl/builtin_types.cpp
+++ b/mesalib/src/glsl/builtin_types.cpp
@@ -159,6 +159,20 @@ const static struct builtin_type_versions {
    T(mat4x2,                          120, 300)
    T(mat4x3,                          120, 300)
 
+   T(double,                          400, 999)
+   T(dvec2,                           400, 999)
+   T(dvec3,                           400, 999)
+   T(dvec4,                           400, 999)
+   T(dmat2,                           400, 999)
+   T(dmat3,                           400, 999)
+   T(dmat4,                           400, 999)
+   T(dmat2x3,                         400, 999)
+   T(dmat2x4,                         400, 999)
+   T(dmat3x2,                         400, 999)
+   T(dmat3x4,                         400, 999)
+   T(dmat4x2,                         400, 999)
+   T(dmat4x3,                         400, 999)
+
    T(sampler1D,                       110, 999)
    T(sampler2D,                       110, 100)
    T(sampler3D,                       110, 300)
@@ -361,5 +375,21 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
    if (state->ARB_shader_atomic_counters_enable) {
       add_type(symbols, glsl_type::atomic_uint_type);
    }
+
+   if (state->ARB_gpu_shader_fp64_enable) {
+      add_type(symbols, glsl_type::double_type);
+      add_type(symbols, glsl_type::dvec2_type);
+      add_type(symbols, glsl_type::dvec3_type);
+      add_type(symbols, glsl_type::dvec4_type);
+      add_type(symbols, glsl_type::dmat2_type);
+      add_type(symbols, glsl_type::dmat3_type);
+      add_type(symbols, glsl_type::dmat4_type);
+      add_type(symbols, glsl_type::dmat2x3_type);
+      add_type(symbols, glsl_type::dmat2x4_type);
+      add_type(symbols, glsl_type::dmat3x2_type);
+      add_type(symbols, glsl_type::dmat3x4_type);
+      add_type(symbols, glsl_type::dmat4x2_type);
+      add_type(symbols, glsl_type::dmat4x3_type);
+   }
 }
 /** @} */
diff --git a/mesalib/src/glsl/builtin_variables.cpp b/mesalib/src/glsl/builtin_variables.cpp
index c36d19831..65e32ad73 100644
--- a/mesalib/src/glsl/builtin_variables.cpp
+++ b/mesalib/src/glsl/builtin_variables.cpp
@@ -724,6 +724,10 @@ builtin_variable_generator::generate_constants()
       add_const("gl_MaxCombinedImageUniforms",
                 state->Const.MaxCombinedImageUniforms);
    }
+
+   if (state->is_version(410, 0) ||
+       state->ARB_viewport_array_enable)
+      add_const("gl_MaxViewports", state->Const.MaxViewports);
 }
 
 
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index f1119eb80..c2f5223a9 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -290,9 +290,10 @@ control_line_success:
 		macro_t *macro;
 		if (strcmp("__LINE__", $4) == 0
 		    || strcmp("__FILE__", $4) == 0
-		    || strcmp("__VERSION__", $4) == 0)
+		    || strcmp("__VERSION__", $4) == 0
+		    || strncmp("GL_", $4, 3) == 0)
 			glcpp_error(& @1, parser, "Built-in (pre-defined)"
-				    " macro names can not be undefined.");
+				    " macro names cannot be undefined.");
 
 		macro = hash_table_find (parser->defines, $4);
 		if (macro) {
@@ -2374,6 +2375,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	if (parser->is_gles) {
 	   add_builtin_define(parser, "GL_ES", 1);
            add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1);
+           add_builtin_define(parser, "GL_EXT_draw_buffers", 1);
 
 	   if (extensions != NULL) {
 	      if (extensions->OES_EGL_image_external)
@@ -2443,6 +2445,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	      if (extensions->ARB_gpu_shader5)
 	         add_builtin_define(parser, "GL_ARB_gpu_shader5", 1);
 
+              if (extensions->ARB_gpu_shader_fp64)
+                 add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1);
+
 	      if (extensions->AMD_vertex_shader_layer)
 	         add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1);
 
@@ -2472,6 +2477,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 
               if (extensions->ARB_derivative_control)
                  add_builtin_define(parser, "GL_ARB_derivative_control", 1);
+
+              if (extensions->ARB_shader_precision)
+                 add_builtin_define(parser, "GL_ARB_shader_precision", 1);
 	   }
 	}
 
diff --git a/mesalib/src/glsl/glcpp/glcpp.c b/mesalib/src/glsl/glcpp/glcpp.c
index 1a4b8b4f0..ece2fc3b7 100644
--- a/mesalib/src/glsl/glcpp/glcpp.c
+++ b/mesalib/src/glsl/glcpp/glcpp.c
@@ -129,7 +129,7 @@ enum {
 	DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1
 };
 
-const static struct option
+static const struct option
 long_options[] = {
 	{"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT },
         {"debug",                      no_argument, 0, 'd'},
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 57c46be84..8dc3d106b 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -458,6 +458,17 @@ layout		{
 			    return FLOATCONSTANT;
 			}
 
+[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF)	|
+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF)	|
+[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF)	|
+[0-9]+[eE][+-]?[0-9]+(lf|LF)		{
+			    if (!yyextra->is_version(400, 0) &&
+			        !yyextra->ARB_gpu_shader_fp64_enable)
+			        return ERROR_TOK;
+			    yylval->dreal = _mesa_strtod(yytext, NULL);
+			    return DOUBLECONSTANT;
+			}
+
 true			{
 			    yylval->n = 1;
 			    return BOOLCONSTANT;
@@ -489,7 +500,7 @@ external	KEYWORD(110, 100, 0, 0, EXTERNAL);
 interface	KEYWORD(110, 100, 0, 0, INTERFACE);
 long		KEYWORD(110, 100, 0, 0, LONG_TOK);
 short		KEYWORD(110, 100, 0, 0, SHORT_TOK);
-double		KEYWORD(110, 100, 400, 0, DOUBLE_TOK);
+double		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK);
 half		KEYWORD(110, 100, 0, 0, HALF);
 fixed		KEYWORD(110, 100, 0, 0, FIXED_TOK);
 unsigned	KEYWORD(110, 100, 0, 0, UNSIGNED);
@@ -498,9 +509,21 @@ output		KEYWORD(110, 100, 0, 0, OUTPUT);
 hvec2		KEYWORD(110, 100, 0, 0, HVEC2);
 hvec3		KEYWORD(110, 100, 0, 0, HVEC3);
 hvec4		KEYWORD(110, 100, 0, 0, HVEC4);
-dvec2		KEYWORD(110, 100, 400, 0, DVEC2);
-dvec3		KEYWORD(110, 100, 400, 0, DVEC3);
-dvec4		KEYWORD(110, 100, 400, 0, DVEC4);
+dvec2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2);
+dvec3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3);
+dvec4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4);
+dmat2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
+dmat3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
+dmat4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
+dmat2x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
+dmat2x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3);
+dmat2x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4);
+dmat3x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2);
+dmat3x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
+dmat3x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4);
+dmat4x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2);
+dmat4x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3);
+dmat4x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
 fvec2		KEYWORD(110, 100, 0, 0, FVEC2);
 fvec3		KEYWORD(110, 100, 0, 0, FVEC3);
 fvec4		KEYWORD(110, 100, 0, 0, FVEC4);
@@ -544,7 +567,13 @@ subroutine	KEYWORD(0, 300, 0, 0, SUBROUTINE);
 [_a-zA-Z][_a-zA-Z0-9]*	{
 			    struct _mesa_glsl_parse_state *state = yyextra;
 			    void *ctx = state;	
-			    yylval->identifier = ralloc_strdup(ctx, yytext);
+			    if (state->es_shader && strlen(yytext) > 1024) {
+			       _mesa_glsl_error(yylloc, state,
+			                        "Identifier `%s' exceeds 1024 characters",
+			                        yytext);
+			    } else {
+			      yylval->identifier = ralloc_strdup(ctx, yytext);
+			    }
 			    return classify_identifier(state, yytext);
 			}
 
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index cef968420..121b2fec0 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -95,6 +95,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %union {
    int n;
    float real;
+   double dreal;
    const char *identifier;
 
    struct ast_type_qualifier type_qualifier;
@@ -129,14 +130,17 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
    } selection_rest_statement;
 }
 
-%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK
+%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
 %token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
-%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4
+%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4
 %token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
 %token NOPERSPECTIVE FLAT SMOOTH
 %token MAT2X2 MAT2X3 MAT2X4
 %token MAT3X2 MAT3X3 MAT3X4
 %token MAT4X2 MAT4X3 MAT4X4
+%token DMAT2X2 DMAT2X3 DMAT2X4
+%token DMAT3X2 DMAT3X3 DMAT3X4
+%token DMAT4X2 DMAT4X3 DMAT4X4
 %token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW
 %token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW
 %token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW
@@ -163,6 +167,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %type <identifier> any_identifier
 %type <interface_block> instance_name_opt
 %token <real> FLOATCONSTANT
+%token <dreal> DOUBLECONSTANT
 %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
 %token <identifier> FIELD_SELECTION
 %token LEFT_OP RIGHT_OP
@@ -183,8 +188,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
     */
 %token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
 %token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL
-%token LONG_TOK SHORT_TOK DOUBLE_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK
-%token HVEC2 HVEC3 HVEC4 DVEC2 DVEC3 DVEC4 FVEC2 FVEC3 FVEC4
+%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK
+%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4
 %token SAMPLER3DRECT
 %token SIZEOF CAST NAMESPACE USING
 %token RESOURCE PATCH
@@ -332,7 +337,18 @@ pragma_statement:
    | PRAGMA_OPTIMIZE_OFF EOL
    | PRAGMA_INVARIANT_ALL EOL
    {
-      if (!state->is_version(120, 100)) {
+      /* Pragma invariant(all) cannot be used in a fragment shader.
+       *
+       * Page 27 of the GLSL 1.20 spec, Page 53 of the GLSL ES 3.00 spec:
+       *
+       *     "It is an error to use this pragma in a fragment shader."
+       */
+      if (state->is_version(120, 300) &&
+          state->stage == MESA_SHADER_FRAGMENT) {
+         _mesa_glsl_error(& @1, state,
+                          "pragma `invariant(all)' cannot be used "
+                          "in a fragment shader.");
+      } else if (!state->is_version(120, 100)) {
          _mesa_glsl_warning(& @1, state,
                             "pragma `invariant(all)' not supported in %s "
                             "(GLSL ES 1.00 or GLSL 1.20 required)",
@@ -424,6 +440,13 @@ primary_expression:
       $$->set_location(@1);
       $$->primary_expression.float_constant = $1;
    }
+   | DOUBLECONSTANT
+   {
+      void *ctx = state;
+      $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL);
+      $$->set_location(@1);
+      $$->primary_expression.double_constant = $1;
+   }
    | BOOLCONSTANT
    {
       void *ctx = state;
@@ -1592,6 +1615,17 @@ type_qualifier:
 
       $$ = $2;
       $$.flags.q.invariant = 1;
+
+      /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier":
+       *
+       * "Only variables output from a shader can be candidates for invariance.
+       * This includes user-defined output variables and the built-in output
+       * variables. As only outputs can be declared as invariant, an invariant
+       * output from one shader stage will still match an input of a subsequent
+       * stage without the input being declared as invariant."
+       */
+      if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+         _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
    }
    | interpolation_qualifier type_qualifier
    {
@@ -1843,6 +1877,7 @@ type_specifier_nonarray:
 basic_type_specifier_nonarray:
    VOID_TOK                 { $$ = "void"; }
    | FLOAT_TOK              { $$ = "float"; }
+   | DOUBLE_TOK             { $$ = "double"; }
    | INT_TOK                { $$ = "int"; }
    | UINT_TOK               { $$ = "uint"; }
    | BOOL_TOK               { $$ = "bool"; }
@@ -1858,6 +1893,9 @@ basic_type_specifier_nonarray:
    | UVEC2                  { $$ = "uvec2"; }
    | UVEC3                  { $$ = "uvec3"; }
    | UVEC4                  { $$ = "uvec4"; }
+   | DVEC2                  { $$ = "dvec2"; }
+   | DVEC3                  { $$ = "dvec3"; }
+   | DVEC4                  { $$ = "dvec4"; }
    | MAT2X2                 { $$ = "mat2"; }
    | MAT2X3                 { $$ = "mat2x3"; }
    | MAT2X4                 { $$ = "mat2x4"; }
@@ -1867,6 +1905,15 @@ basic_type_specifier_nonarray:
    | MAT4X2                 { $$ = "mat4x2"; }
    | MAT4X3                 { $$ = "mat4x3"; }
    | MAT4X4                 { $$ = "mat4"; }
+   | DMAT2X2                { $$ = "dmat2"; }
+   | DMAT2X3                { $$ = "dmat2x3"; }
+   | DMAT2X4                { $$ = "dmat2x4"; }
+   | DMAT3X2                { $$ = "dmat3x2"; }
+   | DMAT3X3                { $$ = "dmat3"; }
+   | DMAT3X4                { $$ = "dmat3x4"; }
+   | DMAT4X2                { $$ = "dmat4x2"; }
+   | DMAT4X3                { $$ = "dmat4x3"; }
+   | DMAT4X4                { $$ = "dmat4"; }
    | SAMPLER1D              { $$ = "sampler1D"; }
    | SAMPLER2D              { $$ = "sampler2D"; }
    | SAMPLER2DRECT          { $$ = "sampler2DRect"; }
@@ -2518,6 +2565,28 @@ basic_interface_block:
                              "interface block member does not match "
                              "the interface block");
          }
+
+         /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+          *
+          * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+          * outputs."
+          *
+          * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+          *
+          * "Only variables output from a shader can be candidates for
+          * invariance."
+          *
+          * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+          *
+          * "If optional qualifiers are used, they can include interpolation
+          * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+          * and they must declare an input, output, or uniform member
+          * consistent with the interface qualifier of the block"
+          */
+         if (qualifier.flags.q.invariant)
+            _mesa_glsl_error(&@1, state,
+                             "invariant qualifiers cannot be used "
+                             "with interface blocks members");
       }
 
       $$ = block;
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index 27e3301e2..9f7931380 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -25,12 +25,10 @@
 #include <string.h>
 #include <assert.h>
 
-extern "C" {
 #include "main/core.h" /* for struct gl_context */
 #include "main/context.h"
 #include "main/shaderobj.h"
-}
-
+#include "util/u_atomic.h" /* for p_atomic_cmpxchg */
 #include "util/ralloc.h"
 #include "ast.h"
 #include "glsl_parser_extras.h"
@@ -50,7 +48,7 @@ glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version)
 
 
 static const unsigned known_desktop_glsl_versions[] =
-   { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440 };
+   { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 };
 
 
 _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
@@ -134,6 +132,9 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms;
    this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms;
 
+   /* ARB_viewport_array */
+   this->Const.MaxViewports = ctx->Const.MaxViewports;
+
    this->current_function = NULL;
    this->toplevel_ir = NULL;
    this->found_return = false;
@@ -141,6 +142,12 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
    this->user_structures = NULL;
    this->num_user_structures = 0;
 
+   /* supported_versions should be large enough to support the known desktop
+    * GLSL versions plus 2 GLES versions (ES2 & ES3)
+    */
+   STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 2) ==
+                 ARRAY_SIZE(this->supported_versions));
+
    /* Populate the list of supported GLSL versions */
    /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or
     * the OpenGL 3.2 Core context is supported, this logic will need
@@ -168,8 +175,6 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
       this->supported_versions[this->num_supported_versions].es = true;
       this->num_supported_versions++;
    }
-   assert(this->num_supported_versions
-          <= ARRAY_SIZE(this->supported_versions));
 
    /* Create a string for use in error messages to tell the user which GLSL
     * versions are supported.
@@ -365,10 +370,27 @@ _mesa_shader_stage_to_string(unsigned stage)
    case MESA_SHADER_VERTEX:   return "vertex";
    case MESA_SHADER_FRAGMENT: return "fragment";
    case MESA_SHADER_GEOMETRY: return "geometry";
+   case MESA_SHADER_COMPUTE:  return "compute";
    }
 
-   assert(!"Should not get here.");
-   return "unknown";
+   unreachable("Unknown shader stage.");
+}
+
+/**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+const char *
+_mesa_shader_stage_to_abbrev(unsigned stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:   return "VS";
+   case MESA_SHADER_FRAGMENT: return "FS";
+   case MESA_SHADER_GEOMETRY: return "GS";
+   case MESA_SHADER_COMPUTE:  return "CS";
+   }
+
+   unreachable("Unknown shader stage.");
 }
 
 /* This helper function will append the given message to the shader's
@@ -522,11 +544,13 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_fragment_coord_conventions, true,  false,     ARB_fragment_coord_conventions),
    EXT(ARB_fragment_layer_viewport,    true,  false,     ARB_fragment_layer_viewport),
    EXT(ARB_gpu_shader5,                true,  false,     ARB_gpu_shader5),
+   EXT(ARB_gpu_shader_fp64,            true,  false,     ARB_gpu_shader_fp64),
    EXT(ARB_sample_shading,             true,  false,     ARB_sample_shading),
    EXT(ARB_separate_shader_objects,    true,  false,     dummy_true),
    EXT(ARB_shader_atomic_counters,     true,  false,     ARB_shader_atomic_counters),
    EXT(ARB_shader_bit_encoding,        true,  false,     ARB_shader_bit_encoding),
    EXT(ARB_shader_image_load_store,    true,  false,     ARB_shader_image_load_store),
+   EXT(ARB_shader_precision,           true,  false,     ARB_shader_precision),
    EXT(ARB_shader_stencil_export,      true,  false,     ARB_shader_stencil_export),
    EXT(ARB_shader_texture_lod,         true,  false,     ARB_shader_texture_lod),
    EXT(ARB_shading_language_420pack,   true,  false,     ARB_shading_language_420pack),
@@ -556,6 +580,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
    EXT(AMD_vertex_shader_layer,        true,  false,     AMD_vertex_shader_layer),
    EXT(AMD_vertex_shader_viewport_index, true,  false,   AMD_vertex_shader_viewport_index),
+   EXT(EXT_draw_buffers,               false,  true,     dummy_true),
    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
    EXT(EXT_texture_array,              true,  false,     EXT_texture_array),
@@ -954,6 +979,10 @@ ast_expression::print(void) const
       printf("%f ", primary_expression.float_constant);
       break;
 
+   case ast_double_constant:
+      printf("%f ", primary_expression.double_constant);
+      break;
+
    case ast_bool_constant:
       printf("%s ",
 	     primary_expression.bool_constant
@@ -1447,7 +1476,8 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
    const char *source = shader->Source;
 
    if (ctx->Const.GenerateTemporaryNames)
-      ir_variable::temporaries_allocate_names = true;
+      (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names,
+                              false, true);
 
    state->error = glcpp_preprocess(state, &source, &state->info_log,
                              &ctx->Extensions, ctx);
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index 36955d9af..c5670fdb1 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -209,6 +209,11 @@ struct _mesa_glsl_parse_state {
          || EXT_separate_shader_objects_enable;
    }
 
+   bool has_double() const
+   {
+      return ARB_gpu_shader_fp64_enable || is_version(400, 0);
+   }
+
    void process_version_directive(YYLTYPE *locp, int version,
                                   const char *ident);
 
@@ -221,7 +226,7 @@ struct _mesa_glsl_parse_state {
    struct {
       unsigned ver;
       bool es;
-   } supported_versions[12];
+   } supported_versions[14];
 
    bool es_shader;
    unsigned language_version;
@@ -347,6 +352,9 @@ struct _mesa_glsl_parse_state {
       unsigned MaxGeometryImageUniforms;
       unsigned MaxFragmentImageUniforms;
       unsigned MaxCombinedImageUniforms;
+
+      /* ARB_viewport_array */
+      unsigned MaxViewports;
    } Const;
 
    /**
@@ -415,6 +423,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_fragment_layer_viewport_warn;
    bool ARB_gpu_shader5_enable;
    bool ARB_gpu_shader5_warn;
+   bool ARB_gpu_shader_fp64_enable;
+   bool ARB_gpu_shader_fp64_warn;
    bool ARB_sample_shading_enable;
    bool ARB_sample_shading_warn;
    bool ARB_separate_shader_objects_enable;
@@ -425,6 +435,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_shader_bit_encoding_warn;
    bool ARB_shader_image_load_store_enable;
    bool ARB_shader_image_load_store_warn;
+   bool ARB_shader_precision_enable;
+   bool ARB_shader_precision_warn;
    bool ARB_shader_stencil_export_enable;
    bool ARB_shader_stencil_export_warn;
    bool ARB_shader_texture_lod_enable;
@@ -474,6 +486,8 @@ struct _mesa_glsl_parse_state {
    bool AMD_vertex_shader_layer_warn;
    bool AMD_vertex_shader_viewport_index_enable;
    bool AMD_vertex_shader_viewport_index_warn;
+   bool EXT_draw_buffers_enable;
+   bool EXT_draw_buffers_warn;
    bool EXT_separate_shader_objects_enable;
    bool EXT_separate_shader_objects_warn;
    bool EXT_shader_integer_mix_enable;
@@ -573,6 +587,9 @@ extern "C" {
 extern const char *
 _mesa_shader_stage_to_string(unsigned stage);
 
+extern const char *
+_mesa_shader_stage_to_abbrev(unsigned stage);
+
 extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
                       const struct gl_extensions *extensions, struct gl_context *gl_ctx);
 
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index 5f9919348..38b37a6a9 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -25,9 +25,8 @@
 #include "main/core.h" /* for Elements, MAX2 */
 #include "glsl_parser_extras.h"
 #include "glsl_types.h"
-extern "C" {
 #include "program/hash_table.h"
-}
+
 
 mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP;
 hash_table *glsl_type::array_types = NULL;
@@ -195,6 +194,22 @@ glsl_type::contains_integer() const
 }
 
 bool
+glsl_type::contains_double() const
+{
+   if (this->is_array()) {
+      return this->fields.array->contains_double();
+   } else if (this->is_record()) {
+      for (unsigned int i = 0; i < this->length; i++) {
+	 if (this->fields.structure[i].type->contains_double())
+	    return true;
+      }
+      return false;
+   } else {
+      return this->is_double();
+   }
+}
+
+bool
 glsl_type::contains_opaque() const {
    switch (base_type) {
    case GLSL_TYPE_SAMPLER:
@@ -269,6 +284,8 @@ const glsl_type *glsl_type::get_base_type() const
       return int_type;
    case GLSL_TYPE_FLOAT:
       return float_type;
+   case GLSL_TYPE_DOUBLE:
+      return double_type;
    case GLSL_TYPE_BOOL:
       return bool_type;
    default:
@@ -293,6 +310,8 @@ const glsl_type *glsl_type::get_scalar_type() const
       return int_type;
    case GLSL_TYPE_FLOAT:
       return float_type;
+   case GLSL_TYPE_DOUBLE:
+      return double_type;
    case GLSL_TYPE_BOOL:
       return bool_type;
    default:
@@ -378,6 +397,17 @@ glsl_type::vec(unsigned components)
    return ts[components - 1];
 }
 
+const glsl_type *
+glsl_type::dvec(unsigned components)
+{
+   if (components == 0 || components > 4)
+      return error_type;
+
+   static const glsl_type *const ts[] = {
+      double_type, dvec2_type, dvec3_type, dvec4_type
+   };
+   return ts[components - 1];
+}
 
 const glsl_type *
 glsl_type::ivec(unsigned components)
@@ -437,13 +467,15 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
 	 return ivec(rows);
       case GLSL_TYPE_FLOAT:
 	 return vec(rows);
+      case GLSL_TYPE_DOUBLE:
+	 return dvec(rows);
       case GLSL_TYPE_BOOL:
 	 return bvec(rows);
       default:
 	 return error_type;
       }
    } else {
-      if ((base_type != GLSL_TYPE_FLOAT) || (rows == 1))
+      if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
 	 return error_type;
 
       /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
@@ -457,17 +489,32 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
        */
 #define IDX(c,r) (((c-1)*3) + (r-1))
 
-      switch (IDX(columns, rows)) {
-      case IDX(2,2): return mat2_type;
-      case IDX(2,3): return mat2x3_type;
-      case IDX(2,4): return mat2x4_type;
-      case IDX(3,2): return mat3x2_type;
-      case IDX(3,3): return mat3_type;
-      case IDX(3,4): return mat3x4_type;
-      case IDX(4,2): return mat4x2_type;
-      case IDX(4,3): return mat4x3_type;
-      case IDX(4,4): return mat4_type;
-      default: return error_type;
+      if (base_type == GLSL_TYPE_DOUBLE) {
+         switch (IDX(columns, rows)) {
+         case IDX(2,2): return dmat2_type;
+         case IDX(2,3): return dmat2x3_type;
+         case IDX(2,4): return dmat2x4_type;
+         case IDX(3,2): return dmat3x2_type;
+         case IDX(3,3): return dmat3_type;
+         case IDX(3,4): return dmat3x4_type;
+         case IDX(4,2): return dmat4x2_type;
+         case IDX(4,3): return dmat4x3_type;
+         case IDX(4,4): return dmat4_type;
+         default: return error_type;
+         }
+      } else {
+         switch (IDX(columns, rows)) {
+         case IDX(2,2): return mat2_type;
+         case IDX(2,3): return mat2x3_type;
+         case IDX(2,4): return mat2x4_type;
+         case IDX(3,2): return mat3x2_type;
+         case IDX(3,3): return mat3_type;
+         case IDX(3,4): return mat3x4_type;
+         case IDX(4,2): return mat4x2_type;
+         case IDX(4,3): return mat4x3_type;
+         case IDX(4,4): return mat4_type;
+         default: return error_type;
+         }
       }
    }
 
@@ -475,6 +522,117 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
    return error_type;
 }
 
+const glsl_type *
+glsl_type::get_sampler_instance(enum glsl_sampler_dim dim,
+                                bool shadow,
+                                bool array,
+                                glsl_base_type type)
+{
+   switch (type) {
+   case GLSL_TYPE_FLOAT:
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         if (shadow)
+            return (array ? sampler1DArrayShadow_type : sampler1DShadow_type);
+         else
+            return (array ? sampler1DArray_type : sampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         if (shadow)
+            return (array ? sampler2DArrayShadow_type : sampler2DShadow_type);
+         else
+            return (array ? sampler2DArray_type : sampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (shadow || array)
+            return error_type;
+         else
+            return sampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         if (shadow)
+            return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type);
+         else
+            return (array ? samplerCubeArray_type : samplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         if (shadow)
+            return sampler2DRectShadow_type;
+         else
+            return sampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (shadow || array)
+            return error_type;
+         else
+            return samplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         if (shadow)
+            return error_type;
+         return (array ? sampler2DMSArray_type : sampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         if (shadow || array)
+            return error_type;
+         else
+            return samplerExternalOES_type;
+      }
+   case GLSL_TYPE_INT:
+      if (shadow)
+         return error_type;
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         return (array ? isampler1DArray_type : isampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         return (array ? isampler2DArray_type : isampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (array)
+            return error_type;
+         return isampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         return (array ? isamplerCubeArray_type : isamplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         return isampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (array)
+            return error_type;
+         return isamplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         return (array ? isampler2DMSArray_type : isampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         return error_type;
+      }
+   case GLSL_TYPE_UINT:
+      if (shadow)
+         return error_type;
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         return (array ? usampler1DArray_type : usampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         return (array ? usampler2DArray_type : usampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (array)
+            return error_type;
+         return usampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         return (array ? usamplerCubeArray_type : usamplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         return usampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (array)
+            return error_type;
+         return usamplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         return (array ? usampler2DMSArray_type : usampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         return error_type;
+      }
+   default:
+      return error_type;
+   }
+
+   unreachable("switch statement above should be complete");
+}
 
 const glsl_type *
 glsl_type::get_array_instance(const glsl_type *base, unsigned array_size)
@@ -708,6 +866,9 @@ glsl_type::component_slots() const
    case GLSL_TYPE_BOOL:
       return this->components();
 
+   case GLSL_TYPE_DOUBLE:
+      return 2 * this->components();
+
    case GLSL_TYPE_STRUCT:
    case GLSL_TYPE_INTERFACE: {
       unsigned size = 0;
@@ -743,6 +904,7 @@ glsl_type::uniform_locations() const
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
@@ -787,12 +949,26 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
          desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT)
       return true;
 
+   /* No implicit conversions from double. */
+   if ((!state || state->has_double()) && this->is_double())
+      return false;
+
+   /* Conversions from different types to double. */
+   if ((!state || state->has_double()) && desired->is_double()) {
+      if (this->is_float())
+         return true;
+      if (this->is_integer())
+         return true;
+   }
+
    return false;
 }
 
 unsigned
 glsl_type::std140_base_alignment(bool row_major) const
 {
+   unsigned N = is_double() ? 8 : 4;
+
    /* (1) If the member is a scalar consuming <N> basic machine units, the
     *     base alignment is <N>.
     *
@@ -806,12 +982,12 @@ glsl_type::std140_base_alignment(bool row_major) const
    if (this->is_scalar() || this->is_vector()) {
       switch (this->vector_elements) {
       case 1:
-	 return 4;
+	 return N;
       case 2:
-	 return 8;
+	 return 2 * N;
       case 3:
       case 4:
-	 return 16;
+	 return 4 * N;
       }
    }
 
@@ -860,10 +1036,10 @@ glsl_type::std140_base_alignment(bool row_major) const
       int r = this->vector_elements;
 
       if (row_major) {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT, c, 1);
+	 vec_type = get_instance(base_type, c, 1);
 	 array_type = glsl_type::get_array_instance(vec_type, r);
       } else {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT, r, 1);
+	 vec_type = get_instance(base_type, r, 1);
 	 array_type = glsl_type::get_array_instance(vec_type, c);
       }
 
@@ -901,6 +1077,15 @@ glsl_type::std140_base_alignment(bool row_major) const
       return base_alignment;
    }
 
+   /* A sampler may never occur in a UBO (without bindless of some sort),
+    * however it is convenient to use this alignment function even with
+    * regular uniforms. This allows use of this function on uniform structs
+    * that contain samplers.
+    */
+   if (this->is_sampler()) {
+      return 0;
+   }
+
    assert(!"not reached");
    return -1;
 }
@@ -908,6 +1093,8 @@ glsl_type::std140_base_alignment(bool row_major) const
 unsigned
 glsl_type::std140_size(bool row_major) const
 {
+   unsigned N = is_double() ? 8 : 4;
+
    /* (1) If the member is a scalar consuming <N> basic machine units, the
     *     base alignment is <N>.
     *
@@ -919,7 +1106,7 @@ glsl_type::std140_size(bool row_major) const
     *     <N> basic machine units, the base alignment is 4<N>.
     */
    if (this->is_scalar() || this->is_vector()) {
-      return this->vector_elements * 4;
+      return this->vector_elements * N;
    }
 
    /* (5) If the member is a column-major matrix with <C> columns and
@@ -954,11 +1141,12 @@ glsl_type::std140_size(bool row_major) const
       }
 
       if (row_major) {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT,
-				 element_type->matrix_columns, 1);
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->matrix_columns, 1);
+
 	 array_len *= element_type->vector_elements;
       } else {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT,
+	 vec_type = get_instance(element_type->base_type,
 				 element_type->vector_elements, 1);
 	 array_len *= element_type->matrix_columns;
       }
@@ -1061,6 +1249,7 @@ glsl_type::count_attribute_slots() const
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
       return this->matrix_columns;
 
    case GLSL_TYPE_STRUCT:
@@ -1115,8 +1304,13 @@ glsl_type::coordinate_components() const
       break;
    }
 
-   /* Array textures need an additional component for the array index. */
-   if (sampler_array)
+   /* Array textures need an additional component for the array index, except
+    * for cubemap array images that behave like a 2D array of interleaved
+    * cubemap faces.
+    */
+   if (sampler_array &&
+       !(base_type == GLSL_TYPE_IMAGE &&
+         sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE))
       size += 1;
 
    return size;
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 474b12914..7359e9476 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -28,7 +28,6 @@
 
 #include <string.h>
 #include <assert.h>
-#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +50,7 @@ enum glsl_base_type {
    GLSL_TYPE_UINT = 0,
    GLSL_TYPE_INT,
    GLSL_TYPE_FLOAT,
+   GLSL_TYPE_DOUBLE,
    GLSL_TYPE_BOOL,
    GLSL_TYPE_SAMPLER,
    GLSL_TYPE_IMAGE,
@@ -103,6 +103,7 @@ enum glsl_matrix_layout {
 #ifdef __cplusplus
 #include "GL/gl.h"
 #include "util/ralloc.h"
+#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
 
 struct glsl_type {
    GLenum gl_type;
@@ -199,6 +200,7 @@ struct glsl_type {
     * @{
     */
    static const glsl_type *vec(unsigned components);
+   static const glsl_type *dvec(unsigned components);
    static const glsl_type *ivec(unsigned components);
    static const glsl_type *uvec(unsigned components);
    static const glsl_type *bvec(unsigned components);
@@ -244,6 +246,15 @@ struct glsl_type {
 					unsigned columns);
 
    /**
+    * Get the instance of a sampler type
+    */
+   static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim,
+                                                bool shadow,
+                                                bool array,
+                                                glsl_base_type type);
+
+
+   /**
     * Get the instance of an array type
     */
    static const glsl_type *get_array_instance(const glsl_type *base,
@@ -378,7 +389,7 @@ struct glsl_type {
    bool is_matrix() const
    {
       /* GLSL only has float matrices. */
-      return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT);
+      return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE);
    }
 
    /**
@@ -386,7 +397,7 @@ struct glsl_type {
     */
    bool is_numeric() const
    {
-      return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_FLOAT);
+      return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE);
    }
 
    /**
@@ -404,6 +415,12 @@ struct glsl_type {
    bool contains_integer() const;
 
    /**
+    * Query whether or not type is a double type, or for struct and array
+    * types, contains a double type.
+    */
+   bool contains_double() const;
+
+   /**
     * Query whether or not a type is a float type
     */
    bool is_float() const
@@ -412,6 +429,14 @@ struct glsl_type {
    }
 
    /**
+    * Query whether or not a type is a double type
+    */
+   bool is_double() const
+   {
+      return base_type == GLSL_TYPE_DOUBLE;
+   }
+
+   /**
     * Query whether or not a type is a non-array boolean type
     */
    bool is_boolean() const
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index f6aeb6158..90c055f32 100755
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -257,6 +257,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_f2i:
    case ir_unop_b2i:
    case ir_unop_u2i:
+   case ir_unop_d2i:
    case ir_unop_bitcast_f2i:
    case ir_unop_bit_count:
    case ir_unop_find_msb:
@@ -268,6 +269,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_b2f:
    case ir_unop_i2f:
    case ir_unop_u2f:
+   case ir_unop_d2f:
    case ir_unop_bitcast_i2f:
    case ir_unop_bitcast_u2f:
       this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
@@ -276,12 +278,21 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
 
    case ir_unop_f2b:
    case ir_unop_i2b:
+   case ir_unop_d2b:
       this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
 					   op0->type->vector_elements, 1);
       break;
 
+   case ir_unop_f2d:
+   case ir_unop_i2d:
+   case ir_unop_u2d:
+      this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
+					   op0->type->vector_elements, 1);
+      break;
+
    case ir_unop_i2u:
    case ir_unop_f2u:
+   case ir_unop_d2u:
    case ir_unop_bitcast_f2u:
       this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
 					   op0->type->vector_elements, 1);
@@ -293,6 +304,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::float_type;
       break;
 
+   case ir_unop_unpack_double_2x32:
+      this->type = glsl_type::uvec2_type;
+      break;
+
    case ir_unop_any:
       this->type = glsl_type::bool_type;
       break;
@@ -305,6 +320,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::uint_type;
       break;
 
+   case ir_unop_pack_double_2x32:
+      this->type = glsl_type::double_type;
+      break;
+
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_half_2x16:
@@ -316,6 +335,14 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::vec4_type;
       break;
 
+   case ir_unop_frexp_sig:
+      this->type = op0->type;
+      break;
+   case ir_unop_frexp_exp:
+      this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+					   op0->type->vector_elements, 1);
+      break;
+
    default:
       assert(!"not reached: missing automatic type setup for ir_expression");
       this->type = op0->type;
@@ -390,7 +417,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
       break;
 
    case ir_binop_dot:
-      this->type = glsl_type::float_type;
+      this->type = op0->type->get_base_type();
       break;
 
    case ir_binop_pack_half_2x16_split:
@@ -494,6 +521,13 @@ static const char *const operator_strs[] = {
    "u2f",
    "i2u",
    "u2i",
+   "d2f",
+   "f2d",
+   "d2i",
+   "i2d",
+   "d2u",
+   "u2d",
+   "d2b",
    "bitcast_i2f",
    "bitcast_f2i",
    "bitcast_u2f",
@@ -531,6 +565,10 @@ static const char *const operator_strs[] = {
    "find_msb",
    "find_lsb",
    "sat",
+   "packDouble2x32",
+   "unpackDouble2x32",
+   "frexp_sig",
+   "frexp_exp",
    "noise",
    "interpolate_at_centroid",
    "+",
@@ -646,6 +684,19 @@ ir_constant::ir_constant(float f, unsigned vector_elements)
    }
 }
 
+ir_constant::ir_constant(double d, unsigned vector_elements)
+   : ir_rvalue(ir_type_constant)
+{
+   assert(vector_elements <= 4);
+   this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+      this->value.d[i] = d;
+   }
+   for (unsigned i = vector_elements; i < 16; i++)  {
+      this->value.d[i] = 0.0;
+   }
+}
+
 ir_constant::ir_constant(unsigned int u, unsigned vector_elements)
    : ir_rvalue(ir_type_constant)
 {
@@ -695,6 +746,7 @@ ir_constant::ir_constant(const ir_constant *c, unsigned i)
    case GLSL_TYPE_INT:   this->value.i[0] = c->value.i[i]; break;
    case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
    case GLSL_TYPE_BOOL:  this->value.b[0] = c->value.b[i]; break;
+   case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
    default:              assert(!"Should not get here."); break;
    }
 }
@@ -746,9 +798,16 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
    if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
       if (type->is_matrix()) {
 	 /* Matrix - fill diagonal (rest is already set to 0) */
-	 assert(type->base_type == GLSL_TYPE_FLOAT);
-	 for (unsigned i = 0; i < type->matrix_columns; i++)
-	    this->value.f[i * type->vector_elements + i] = value->value.f[0];
+         assert(type->base_type == GLSL_TYPE_FLOAT ||
+                type->base_type == GLSL_TYPE_DOUBLE);
+         for (unsigned i = 0; i < type->matrix_columns; i++) {
+            if (type->base_type == GLSL_TYPE_FLOAT)
+               this->value.f[i * type->vector_elements + i] =
+                  value->value.f[0];
+            else
+               this->value.d[i * type->vector_elements + i] =
+                  value->value.d[0];
+         }
       } else {
 	 /* Vector or scalar - fill all components */
 	 switch (type->base_type) {
@@ -761,6 +820,10 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
 	    for (unsigned i = 0; i < type->components(); i++)
 	       this->value.f[i] = value->value.f[0];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    for (unsigned i = 0; i < type->components(); i++)
+	       this->value.d[i] = value->value.d[0];
+	    break;
 	 case GLSL_TYPE_BOOL:
 	    for (unsigned i = 0; i < type->components(); i++)
 	       this->value.b[i] = value->value.b[0];
@@ -819,6 +882,9 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
 	 case GLSL_TYPE_BOOL:
 	    this->value.b[i] = value->get_bool_component(j);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    this->value.d[i] = value->get_double_component(j);
+	    break;
 	 default:
 	    /* FINISHME: What to do?  Exceptions are not the answer.
 	     */
@@ -869,6 +935,7 @@ ir_constant::get_bool_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i] != 0;
    case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
    case GLSL_TYPE_BOOL:  return this->value.b[i];
+   case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
    default:              assert(!"Should not get here."); break;
    }
 
@@ -886,6 +953,25 @@ ir_constant::get_float_component(unsigned i) const
    case GLSL_TYPE_INT:   return (float) this->value.i[i];
    case GLSL_TYPE_FLOAT: return this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0f : 0.0f;
+   case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
+   default:              assert(!"Should not get here."); break;
+   }
+
+   /* Must return something to make the compiler happy.  This is clearly an
+    * error case.
+    */
+   return 0.0;
+}
+
+double
+ir_constant::get_double_component(unsigned i) const
+{
+   switch (this->type->base_type) {
+   case GLSL_TYPE_UINT:  return (double) this->value.u[i];
+   case GLSL_TYPE_INT:   return (double) this->value.i[i];
+   case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
+   case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0 : 0.0;
+   case GLSL_TYPE_DOUBLE: return this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -903,6 +989,7 @@ ir_constant::get_int_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i];
    case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+   case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -920,6 +1007,7 @@ ir_constant::get_uint_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i];
    case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+   case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -984,6 +1072,7 @@ ir_constant::copy_offset(ir_constant *src, int offset)
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL: {
       unsigned int size = src->type->components();
       assert (size <= this->type->components() - offset);
@@ -1001,6 +1090,9 @@ ir_constant::copy_offset(ir_constant *src, int offset)
 	 case GLSL_TYPE_BOOL:
 	    value.b[i+offset] = src->get_bool_component(i);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    value.d[i+offset] = src->get_double_component(i);
+	    break;
 	 default: // Shut up the compiler
 	    break;
 	 }
@@ -1057,6 +1149,9 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
 	 case GLSL_TYPE_BOOL:
 	    value.b[i+offset] = src->get_bool_component(id++);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    value.d[i+offset] = src->get_double_component(id++);
+	    break;
 	 default:
 	    assert(!"Should not get here.");
 	    return;
@@ -1117,6 +1212,10 @@ ir_constant::has_value(const ir_constant *c) const
 	 if (this->value.b[i] != c->value.b[i])
 	    return false;
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 if (this->value.d[i] != c->value.d[i])
+	    return false;
+	 break;
       default:
 	 assert(!"Should not get here.");
 	 return false;
@@ -1154,6 +1253,10 @@ ir_constant::is_value(float f, int i) const
 	 if (this->value.b[c] != bool(i))
 	    return false;
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 if (this->value.d[c] != double(f))
+	    return false;
+	 break;
       default:
 	 /* The only other base types are structures, arrays, and samplers.
 	  * Samplers cannot be constants, and the others should have been
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index a0f48b2af..25f2ecada 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -450,11 +450,8 @@ public:
     */
    inline bool is_interface_instance() const
    {
-      const glsl_type *const t = this->type;
-
-      return (t == this->interface_type)
-         || (t->is_array() && t->fields.array == this->interface_type);
-    }
+      return this->type->without_array() == this->interface_type;
+   }
 
    /**
     * Set this->interface_type on a newly created variable.
@@ -1269,6 +1266,13 @@ enum ir_expression_operation {
    ir_unop_u2f,         /**< Unsigned-to-float conversion. */
    ir_unop_i2u,         /**< Integer-to-unsigned conversion. */
    ir_unop_u2i,         /**< Unsigned-to-integer conversion. */
+   ir_unop_d2f,         /**< Double-to-float conversion. */
+   ir_unop_f2d,         /**< Float-to-double conversion. */
+   ir_unop_d2i,         /**< Double-to-integer conversion. */
+   ir_unop_i2d,         /**< Integer-to-double conversion. */
+   ir_unop_d2u,         /**< Double-to-unsigned conversion. */
+   ir_unop_u2d,         /**< Unsigned-to-double conversion. */
+   ir_unop_d2b,         /**< Double-to-boolean conversion. */
    ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */
    ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */
    ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */
@@ -1345,6 +1349,18 @@ enum ir_expression_operation {
    /*@}*/
 
    ir_unop_saturate,
+
+   /**
+    * \name Double packing, part of ARB_gpu_shader_fp64.
+    */
+   /*@{*/
+   ir_unop_pack_double_2x32,
+   ir_unop_unpack_double_2x32,
+   /*@}*/
+
+   ir_unop_frexp_sig,
+   ir_unop_frexp_exp,
+
    ir_unop_noise,
 
    /**
@@ -2153,6 +2169,7 @@ union ir_constant_data {
       int i[16];
       float f[16];
       bool b[16];
+      double d[16];
 };
 
 
@@ -2163,6 +2180,7 @@ public:
    ir_constant(unsigned int u, unsigned vector_elements=1);
    ir_constant(int i, unsigned vector_elements=1);
    ir_constant(float f, unsigned vector_elements=1);
+   ir_constant(double d, unsigned vector_elements=1);
 
    /**
     * Construct an ir_constant from a list of ir_constant values
@@ -2209,6 +2227,7 @@ public:
    /*@{*/
    bool get_bool_component(unsigned i) const;
    float get_float_component(unsigned i) const;
+   double get_double_component(unsigned i) const;
    int get_int_component(unsigned i) const;
    unsigned get_uint_component(unsigned i) const;
    /*@}*/
@@ -2417,6 +2436,10 @@ extern ir_function_signature *
 _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
                                  const char *name, exec_list *actual_parameters);
 
+extern ir_function *
+_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
+                                         const char *name);
+
 extern gl_shader *
 _mesa_glsl_get_builtin_function_shader(void);
 
diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp
index a2f6f2967..e44b05c99 100644
--- a/mesalib/src/glsl/ir_builder.cpp
+++ b/mesalib/src/glsl/ir_builder.cpp
@@ -246,11 +246,21 @@ ir_expression *borrow(operand a, operand b)
    return expr(ir_binop_borrow, a, b);
 }
 
+ir_expression *trunc(operand a)
+{
+   return expr(ir_unop_trunc, a);
+}
+
 ir_expression *round_even(operand a)
 {
    return expr(ir_unop_round_even, a);
 }
 
+ir_expression *fract(operand a)
+{
+   return expr(ir_unop_fract, a);
+}
+
 /* dot for vectors, mul for scalars */
 ir_expression *dot(operand a, operand b)
 {
@@ -515,6 +525,24 @@ interpolate_at_sample(operand a, operand b)
 }
 
 ir_expression *
+f2d(operand a)
+{
+   return expr(ir_unop_f2d, a);
+}
+
+ir_expression *
+i2d(operand a)
+{
+   return expr(ir_unop_i2d, a);
+}
+
+ir_expression *
+u2d(operand a)
+{
+   return expr(ir_unop_u2d, a);
+}
+
+ir_expression *
 fma(operand a, operand b, operand c)
 {
    return expr(ir_triop_fma, a, b, c);
diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h
index 573596cf1..870265881 100644
--- a/mesalib/src/glsl/ir_builder.h
+++ b/mesalib/src/glsl/ir_builder.h
@@ -137,7 +137,9 @@ ir_expression *imul_high(operand a, operand b);
 ir_expression *div(operand a, operand b);
 ir_expression *carry(operand a, operand b);
 ir_expression *borrow(operand a, operand b);
+ir_expression *trunc(operand a);
 ir_expression *round_even(operand a);
+ir_expression *fract(operand a);
 ir_expression *dot(operand a, operand b);
 ir_expression *clamp(operand a, operand b, operand c);
 ir_expression *saturate(operand a);
@@ -183,6 +185,10 @@ ir_expression *i2b(operand a);
 ir_expression *f2b(operand a);
 ir_expression *b2f(operand a);
 
+ir_expression *f2d(operand a);
+ir_expression *i2d(operand a);
+ir_expression *u2d(operand a);
+
 ir_expression *min2(operand a, operand b);
 ir_expression *max2(operand a, operand b);
 
diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp
index dffa57844..5c7279ca3 100644
--- a/mesalib/src/glsl/ir_clone.cpp
+++ b/mesalib/src/glsl/ir_clone.cpp
@@ -327,6 +327,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL:
       return new(mem_ctx) ir_constant(this->type, &this->value);
 
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 4593a13d1..4b521f4e8 100755
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -44,7 +44,7 @@ static int isnormal(double x)
 {
    return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
 }
-#elif defined(__SUNPRO_CC)
+#elif defined(__SUNPRO_CC) && !defined(isnormal)
 #include <ieeefp.h>
 static int isnormal(double x)
 {
@@ -53,7 +53,7 @@ static int isnormal(double x)
 #endif
 
 static float
-dot(ir_constant *op0, ir_constant *op1)
+dot_f(ir_constant *op0, ir_constant *op1)
 {
    assert(op0->type->is_float() && op1->type->is_float());
 
@@ -64,6 +64,18 @@ dot(ir_constant *op0, ir_constant *op1)
    return result;
 }
 
+static double
+dot_d(ir_constant *op0, ir_constant *op1)
+{
+   assert(op0->type->is_double() && op1->type->is_double());
+
+   double result = 0;
+   for (unsigned c = 0; c < op0->type->components(); c++)
+      result += op0->value.d[c] * op1->value.d[c];
+
+   return result;
+}
+
 /* This method is the only one supported by gcc.  Unions in particular
  * are iffy, and read-through-converted-pointer is killed by strict
  * aliasing.  OTOH, the compiler sees through the memcpy, so the
@@ -660,32 +672,81 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    data.b[0] = true;
       }
       break;
-
-   case ir_unop_trunc:
+   case ir_unop_d2f:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.f[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_f2d:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = truncf(op[0]->value.f[c]);
+	 data.d[c] = op[0]->value.f[c];
+      }
+      break;
+   case ir_unop_d2i:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.i[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_i2d:
+      assert(op[0]->type->base_type == GLSL_TYPE_INT);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.d[c] = op[0]->value.i[c];
+      }
+      break;
+   case ir_unop_d2u:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.u[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_u2d:
+      assert(op[0]->type->base_type == GLSL_TYPE_UINT);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.d[c] = op[0]->value.u[c];
+      }
+      break;
+   case ir_unop_d2b:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+         data.b[c] = op[0]->value.d[c] != 0.0;
+      }
+      break;
+   case ir_unop_trunc:
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = trunc(op[0]->value.d[c]);
+         else
+            data.f[c] = truncf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_round_even:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = _mesa_round_to_even(op[0]->value.d[c]);
+         else
+            data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_ceil:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = ceilf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = ceil(op[0]->value.d[c]);
+         else
+            data.f[c] = ceilf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_floor:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = floorf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = floor(op[0]->value.d[c]);
+         else
+            data.f[c] = floorf(op[0]->value.f[c]);
       }
       break;
 
@@ -701,6 +762,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -735,6 +799,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = -op[0]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = -op[0]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -755,6 +822,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = fabs(op[0]->value.f[c]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = fabs(op[0]->value.d[c]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -773,6 +843,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0));
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0));
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -780,7 +853,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_rcp:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 switch (this->type->base_type) {
 	 case GLSL_TYPE_UINT:
@@ -795,6 +867,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    if (op[0]->value.f[c] != 0.0)
 	       data.f[c] = 1.0F / op[0]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    if (op[0]->value.d[c] != 0.0)
+	       data.d[c] = 1.0 / op[0]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -802,16 +878,20 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_rsq:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = 1.0 / sqrt(op[0]->value.d[c]);
+         else
+            data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_sqrt:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = sqrtf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = sqrt(op[0]->value.d[c]);
+         else
+            data.f[c] = sqrtf(op[0]->value.f[c]);
       }
       break;
 
@@ -927,7 +1007,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_binop_dot:
-      data.f[0] = dot(op[0], op[1]);
+      if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+         data.d[0] = dot_d(op[0], op[1]);
+      else
+         data.f[0] = dot_f(op[0], op[1]);
       break;
 
    case ir_binop_min:
@@ -946,6 +1029,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -968,6 +1054,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -990,6 +1079,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1012,6 +1104,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1036,6 +1131,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    case GLSL_TYPE_FLOAT:
 	       data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1];
 	       break;
+	    case GLSL_TYPE_DOUBLE:
+	       data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1];
+	       break;
 	    default:
 	       assert(0);
 	    }
@@ -1059,7 +1157,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 for (unsigned j = 0; j < p; j++) {
 	    for (unsigned i = 0; i < n; i++) {
 	       for (unsigned k = 0; k < m; k++) {
-		  data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
+                  if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+                     data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j];
+                  else
+                     data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
 	       }
 	    }
 	 }
@@ -1091,6 +1192,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1126,6 +1230,13 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]
 	       * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    /* We don't use fmod because it rounds toward zero; GLSL specifies
+	     * the use of floor.
+	     */
+	    data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]
+	       * floor(op[0]->value.d[c0] / op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1162,6 +1273,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] < op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] < op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1180,6 +1294,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] > op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] > op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1198,6 +1315,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1216,6 +1336,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1237,6 +1360,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_BOOL:
 	    data.b[c] = op[0]->value.b[c] == op[1]->value.b[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] == op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1258,6 +1384,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_BOOL:
 	    data.b[c] = op[0]->value.b[c] != op[1]->value.b[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] != op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1368,6 +1497,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       case GLSL_TYPE_FLOAT:
          data.f[0] = op[0]->value.f[c];
          break;
+      case GLSL_TYPE_DOUBLE:
+         data.d[0] = op[0]->value.d[c];
+         break;
       case GLSL_TYPE_BOOL:
          data.b[0] = op[0]->value.b[c];
          break;
@@ -1467,6 +1599,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
          data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f);
       }
       break;
+   case ir_unop_pack_double_2x32: {
+      /* XXX needs to be checked on big-endian */
+      uint64_t temp;
+      temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32);
+      data.d[0] = *(double *)&temp;
+
+      break;
+   }
+   case ir_unop_unpack_double_2x32:
+      /* XXX needs to be checked on big-endian */
+      data.u[0] = *(uint32_t *)&op[0]->value.d[0];
+      data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1);
+      break;
 
    case ir_triop_bitfield_extract: {
       int offset = op[1]->value.i[0];
@@ -1516,40 +1661,65 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 
    case ir_binop_ldexp:
       for (unsigned c = 0; c < components; c++) {
-         data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
-         /* Flush subnormal values to zero. */
-         if (!isnormal(data.f[c]))
-            data.f[c] = copysign(0.0f, op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) {
+            data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]);
+            /* Flush subnormal values to zero. */
+            if (!isnormal(data.d[c]))
+               data.d[c] = copysign(0.0, op[0]->value.d[c]);
+         } else {
+            data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
+            /* Flush subnormal values to zero. */
+            if (!isnormal(data.f[c]))
+               data.f[c] = copysign(0.0f, op[0]->value.f[c]);
+         }
       }
       break;
 
    case ir_triop_fma:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[1]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[2]->type->base_type == GLSL_TYPE_DOUBLE);
 
       for (unsigned c = 0; c < components; c++) {
-         data.f[c] = op[0]->value.f[c] * op[1]->value.f[c]
-                                       + op[2]->value.f[c];
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.d[c] * op[1]->value.d[c]
+                                          + op[2]->value.d[c];
+         else
+            data.f[c] = op[0]->value.f[c] * op[1]->value.f[c]
+                                          + op[2]->value.f[c];
       }
       break;
 
    case ir_triop_lrp: {
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[1]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[2]->type->base_type == GLSL_TYPE_DOUBLE);
 
       unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
       for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
-         data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
-                     (op[1]->value.f[c] * op[2]->value.f[c2]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) +
+               (op[1]->value.d[c] * op[2]->value.d[c2]);
+         else
+            data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
+               (op[1]->value.f[c] * op[2]->value.f[c2]);
       }
       break;
    }
 
    case ir_triop_csel:
       for (unsigned c = 0; c < components; c++) {
-         data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c]
+         if (op[1]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c]
+                                       : op[2]->value.d[c];
+         else
+            data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c]
                                        : op[2]->value.u[c];
       }
       break;
@@ -1572,6 +1742,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       case GLSL_TYPE_BOOL:
 	 data.b[idx] = op[1]->value.b[0];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 data.d[idx] = op[1]->value.d[0];
+	 break;
       default:
 	 assert(!"Should not get here.");
 	 break;
@@ -1618,6 +1791,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[c]->value.f[0];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[c]->value.d[0];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1659,6 +1835,7 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_INT:   data.u[i] = v->value.u[swiz_idx[i]]; break;
 	 case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
 	 case GLSL_TYPE_BOOL:  data.b[i] = v->value.b[swiz_idx[i]]; break;
+	 case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
 	 default:              assert(!"Should not get here."); break;
 	 }
       }
@@ -1733,6 +1910,12 @@ ir_dereference_array::constant_expression_value(struct hash_table *variable_cont
 
 	    break;
 
+	 case GLSL_TYPE_DOUBLE:
+	    for (unsigned i = 0; i < column_type->vector_elements; i++)
+	       data.d[i] = array->value.d[mat_idx + i];
+
+	    break;
+
 	 default:
 	    assert(!"Should not get here.");
 	    break;
diff --git a/mesalib/src/glsl/ir_function_can_inline.cpp b/mesalib/src/glsl/ir_function_can_inline.cpp
index 7b15d5df1..3b1d15f80 100644
--- a/mesalib/src/glsl/ir_function_can_inline.cpp
+++ b/mesalib/src/glsl/ir_function_can_inline.cpp
@@ -26,11 +26,10 @@
  *
  * Determines if we can inline a function call using ir_function_inlining.cpp.
  *
- * The primary restriction is that we can't return from the function
- * other than as the last instruction.  We could potentially work
- * around this for some constructs by flattening control flow and
- * moving the return to the end, or by using breaks from a do {} while
- * (0) loop surrounding the function body.
+ * The primary restriction is that we can't return from the function other
+ * than as the last instruction.  In lower_jumps.cpp, we can lower return
+ * statements not at the end of the function to other control flow in order to
+ * deal with this restriction.
  */
 
 #include "ir.h"
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index 34e0b4b94..7eb861ae5 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -34,13 +34,15 @@
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
-#define MOD_TO_FRACT       0x20
+#define MOD_TO_FLOOR       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
 #define BITFIELD_INSERT_TO_BFM_BFI 0x80
 #define LDEXP_TO_ARITH     0x100
 #define CARRY_TO_ARITH     0x200
 #define BORROW_TO_ARITH    0x400
 #define SAT_TO_CLAMP       0x800
+#define DOPS_TO_DFRAC      0x1000
+#define DFREXP_DLDEXP_TO_ARITH    0x2000
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp
index bd398052c..01f52e85f 100644
--- a/mesalib/src/glsl/ir_print_visitor.cpp
+++ b/mesalib/src/glsl/ir_print_visitor.cpp
@@ -436,6 +436,17 @@ void ir_print_visitor::visit(ir_constant *ir)
                fprintf(f, "%f", ir->value.f[i]);
             break;
 	 case GLSL_TYPE_BOOL:  fprintf(f, "%d", ir->value.b[i]); break;
+	 case GLSL_TYPE_DOUBLE:
+            if (ir->value.d[i] == 0.0)
+               /* 0.0 == -0.0, so print with %f to get the proper sign. */
+               fprintf(f, "%.1f", ir->value.d[i]);
+            else if (fabs(ir->value.d[i]) < 0.000001)
+               fprintf(f, "%a", ir->value.d[i]);
+            else if (fabs(ir->value.d[i]) > 1000000.0)
+               fprintf(f, "%e", ir->value.d[i]);
+            else
+               fprintf(f, "%f", ir->value.d[i]);
+            break;
 	 default: assert(0);
 	 }
       }
diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp
index 97ead750a..e877a2019 100644
--- a/mesalib/src/glsl/ir_set_program_inouts.cpp
+++ b/mesalib/src/glsl/ir_set_program_inouts.cpp
@@ -81,6 +81,13 @@ is_shader_inout(ir_variable *var)
           var->data.mode == ir_var_system_value;
 }
 
+static inline bool
+is_dual_slot(ir_variable *var)
+{
+   const glsl_type *type = var->type->without_array();
+   return type == glsl_type::dvec4_type || type == glsl_type::dvec3_type;
+}
+
 static void
 mark(struct gl_program *prog, ir_variable *var, int offset, int len,
      bool is_fragment_shader)
@@ -94,19 +101,32 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len,
     */
 
    for (int i = 0; i < len; i++) {
-      GLbitfield64 bitfield =
-         BITFIELD64_BIT(var->data.location + var->data.index + offset + i);
+      bool dual_slot = is_dual_slot(var);
+      int idx = var->data.location + var->data.index + offset + i;
+      GLbitfield64 bitfield = BITFIELD64_BIT(idx);
+
+      /* dvec3 and dvec4 take up 2 slots */
+      if (dual_slot) {
+         idx += i;
+         bitfield |= bitfield << 1;
+      }
       if (var->data.mode == ir_var_shader_in) {
 	 prog->InputsRead |= bitfield;
          if (is_fragment_shader) {
             gl_fragment_program *fprog = (gl_fragment_program *) prog;
-            fprog->InterpQualifier[var->data.location +
-                                   var->data.index + offset + i] =
+            fprog->InterpQualifier[idx] =
                (glsl_interp_qualifier) var->data.interpolation;
             if (var->data.centroid)
                fprog->IsCentroid |= bitfield;
             if (var->data.sample)
                fprog->IsSample |= bitfield;
+
+            /* Set the InterpQualifier of the next slot to the same as the
+             * current one, since dvec3 and dvec4 spans 2 slots.
+             */
+            if (dual_slot)
+               fprog->InterpQualifier[idx + 1] =
+                  (glsl_interp_qualifier) var->data.interpolation;
          }
       } else if (var->data.mode == ir_var_system_value) {
          prog->SystemValuesRead |= bitfield;
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 5a6f8bbf5..667889480 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -313,6 +313,10 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_unop_ceil:
    case ir_unop_floor:
    case ir_unop_fract:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->operands[0]->type == ir->type);
+      break;
    case ir_unop_sin:
    case ir_unop_cos:
    case ir_unop_sin_reduced:
@@ -340,6 +344,11 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type == glsl_type::vec4_type);
       break;
 
+   case ir_unop_pack_double_2x32:
+      assert(ir->type == glsl_type::double_type);
+      assert(ir->operands[0]->type == glsl_type::uvec2_type);
+      break;
+
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_half_2x16:
@@ -359,6 +368,11 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type == glsl_type::uint_type);
       break;
 
+   case ir_unop_unpack_double_2x32:
+      assert(ir->type == glsl_type::uvec2_type);
+      assert(ir->operands[0]->type == glsl_type::double_type);
+      break;
+
    case ir_unop_bitfield_reverse:
       assert(ir->operands[0]->type == ir->type);
       assert(ir->type->is_integer());
@@ -381,6 +395,45 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type->is_float());
       break;
 
+   case ir_unop_d2f:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+      break;
+   case ir_unop_f2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2i:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_INT);
+      break;
+   case ir_unop_i2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2u:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_UINT);
+      break;
+   case ir_unop_u2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2b:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_BOOL);
+      break;
+
+   case ir_unop_frexp_sig:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_frexp_exp:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_INT);
+      break;
    case ir_binop_add:
    case ir_binop_sub:
    case ir_binop_mul:
@@ -481,8 +534,10 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_dot:
-      assert(ir->type == glsl_type::float_type);
-      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type == glsl_type::float_type ||
+             ir->type == glsl_type::double_type);
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->operands[0]->type->is_vector());
       assert(ir->operands[0]->type == ir->operands[1]->type);
       break;
@@ -507,7 +562,8 @@ ir_validate::visit_leave(ir_expression *ir)
 
    case ir_binop_ldexp:
       assert(ir->operands[0]->type == ir->type);
-      assert(ir->operands[0]->type->is_float());
+      assert(ir->operands[0]->type->is_float() ||
+             ir->operands[0]->type->is_double());
       assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
       assert(ir->operands[0]->type->components() ==
              ir->operands[1]->type->components());
@@ -533,16 +589,20 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_triop_fma:
-      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->type == ir->operands[0]->type);
       assert(ir->type == ir->operands[1]->type);
       assert(ir->type == ir->operands[2]->type);
       break;
 
    case ir_triop_lrp:
-      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->operands[0]->type == ir->operands[1]->type);
-      assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
+      assert(ir->operands[2]->type == ir->operands[0]->type ||
+             ir->operands[2]->type == glsl_type::float_type ||
+             ir->operands[2]->type == glsl_type::double_type);
       break;
 
    case ir_triop_csel:
@@ -706,7 +766,7 @@ ir_validate::visit(ir_variable *ir)
    }
 
    if (ir->data.mode == ir_var_uniform
-       && strncmp(ir->name, "gl_", 3) == 0
+       && is_gl_identifier(ir->name)
        && ir->get_state_slots() == NULL) {
       printf("built-in uniform has no state\n");
       ir->print();
diff --git a/mesalib/src/glsl/ir_variable_refcount.cpp b/mesalib/src/glsl/ir_variable_refcount.cpp
index f67fe6784..e4d825c45 100644
--- a/mesalib/src/glsl/ir_variable_refcount.cpp
+++ b/mesalib/src/glsl/ir_variable_refcount.cpp
@@ -38,7 +38,8 @@
 ir_variable_refcount_visitor::ir_variable_refcount_visitor()
 {
    this->mem_ctx = ralloc_context(NULL);
-   this->ht = _mesa_hash_table_create(NULL, _mesa_key_pointer_equal);
+   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
 }
 
 static void
@@ -70,15 +71,13 @@ ir_variable_refcount_visitor::get_variable_entry(ir_variable *var)
 {
    assert(var);
 
-   struct hash_entry *e = _mesa_hash_table_search(this->ht,
-						    _mesa_hash_pointer(var),
-						    var);
+   struct hash_entry *e = _mesa_hash_table_search(this->ht, var);
    if (e)
       return (ir_variable_refcount_entry *)e->data;
 
    ir_variable_refcount_entry *entry = new ir_variable_refcount_entry(var);
    assert(entry->referenced_count == 0);
-   _mesa_hash_table_insert(this->ht, _mesa_hash_pointer(var), var, entry);
+   _mesa_hash_table_insert(this->ht, var, entry);
 
    return entry;
 }
diff --git a/mesalib/src/glsl/link_uniform_block_active_visitor.cpp b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp
index 9da6a4bba..292cde343 100644
--- a/mesalib/src/glsl/link_uniform_block_active_visitor.cpp
+++ b/mesalib/src/glsl/link_uniform_block_active_visitor.cpp
@@ -27,9 +27,8 @@
 link_uniform_block_active *
 process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
 {
-   const uint32_t h = _mesa_hash_string(var->get_interface_type()->name);
    const hash_entry *const existing_block =
-      _mesa_hash_table_search(ht, h, var->get_interface_type()->name);
+      _mesa_hash_table_search(ht, var->get_interface_type()->name);
 
    const glsl_type *const block_type = var->is_interface_instance()
       ? var->type : var->get_interface_type();
@@ -54,8 +53,7 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var)
          b->binding = 0;
       }
 
-      _mesa_hash_table_insert(ht, h, var->get_interface_type()->name,
-			      (void *) b);
+      _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b);
       return b;
    } else {
       link_uniform_block_active *const b =
diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp
index 536fcd458..6ca41107e 100644
--- a/mesalib/src/glsl/link_uniform_blocks.cpp
+++ b/mesalib/src/glsl/link_uniform_blocks.cpp
@@ -67,6 +67,28 @@ private:
       assert(!"Should not get here.");
    }
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->offset = glsl_align(
+            this->offset, type->std140_base_alignment(row_major));
+   }
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+
+      /* If this is the last field of a structure, apply rule #9.  The
+       * GL_ARB_uniform_buffer_object spec says:
+       *
+       *     "The structure may have padding at the end; the base offset of
+       *     the member following the sub-structure is rounded up to the next
+       *     multiple of the base alignment of the structure."
+       */
+      this->offset = glsl_align(
+            this->offset, type->std140_base_alignment(row_major));
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
                             bool last_field)
@@ -97,27 +119,13 @@ private:
          v->IndexName = v->Name;
       }
 
-      const unsigned alignment = record_type
-         ? record_type->std140_base_alignment(v->RowMajor)
-         : type->std140_base_alignment(v->RowMajor);
+      const unsigned alignment = type->std140_base_alignment(v->RowMajor);
       unsigned size = type->std140_size(v->RowMajor);
 
       this->offset = glsl_align(this->offset, alignment);
       v->Offset = this->offset;
 
-      /* If this is the last field of a structure, apply rule #9.  The
-       * GL_ARB_uniform_buffer_object spec says:
-       *
-       *     "The structure may have padding at the end; the base offset of
-       *     the member following the sub-structure is rounded up to the next
-       *     multiple of the base alignment of the structure."
-       *
-       * last_field won't be set if this is the last field of a UBO that is
-       * not a named instance.
-       */
       this->offset += size;
-      if (last_field)
-         this->offset = glsl_align(this->offset, 16);
 
       /* From the GL_ARB_uniform_buffer_object spec:
        *
@@ -131,16 +139,6 @@ private:
        */
       this->buffer_size = glsl_align(this->offset, 16);
    }
-
-   virtual void visit_field(const glsl_struct_field *field)
-   {
-      /* FINISHME: When support for doubles (dvec4, etc.) is added to the
-       * FINISHME: compiler, this may be incorrect for a structure in a UBO
-       * FINISHME: like struct s { struct { float f } s1; dvec4 v; };.
-       */
-      this->offset = glsl_align(this->offset,
-                                field->type->std140_base_alignment(false));
-   }
 };
 
 class count_block_size : public program_resource_visitor {
@@ -182,7 +180,8 @@ link_uniform_blocks(void *mem_ctx,
     * the hash is organized by block-name.
     */
    struct hash_table *block_hash =
-      _mesa_hash_table_create(mem_ctx, _mesa_key_string_equal);
+      _mesa_hash_table_create(mem_ctx, _mesa_key_hash_string,
+                              _mesa_key_string_equal);
 
    if (block_hash == NULL) {
       _mesa_error_no_memory(__func__);
diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp
index f6a60bce9..69073841e 100644
--- a/mesalib/src/glsl/link_uniform_initializers.cpp
+++ b/mesalib/src/glsl/link_uniform_initializers.cpp
@@ -75,6 +75,11 @@ copy_constant_to_storage(union gl_constant_value *storage,
       case GLSL_TYPE_FLOAT:
 	 storage[i].f = val->value.f[i];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+         /* XXX need to check on big-endian */
+         storage[i * 2].u = *(uint32_t *)&val->value.d[i];
+         storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1);
+         break;
       case GLSL_TYPE_BOOL:
 	 storage[i].b = val->value.b[i] ? boolean_true : 0;
 	 break;
@@ -200,6 +205,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
 	 val->array_elements[0]->type->base_type;
       const unsigned int elements = val->array_elements[0]->type->components();
       unsigned int idx = 0;
+      unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
 
       assert(val->type->length >= storage->array_elements);
       for (unsigned int i = 0; i < storage->array_elements; i++) {
@@ -209,7 +215,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
                                   elements,
                                   boolean_true);
 
-	 idx += elements;
+	 idx += elements * dmul;
       }
    } else {
       copy_constant_to_storage(storage->storage,
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index a77b5868a..972ba8100 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -169,6 +169,9 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
       if (record_type == NULL && t->is_record())
          record_type = t;
 
+      if (t->is_record())
+         this->enter_record(t, *name, row_major);
+
       for (unsigned i = 0; i < t->length; i++) {
 	 const char *field = t->fields.structure[i].name;
 	 size_t new_length = name_length;
@@ -208,6 +211,11 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
           */
          record_type = NULL;
       }
+
+      if (t->is_record()) {
+         (*name)[name_length] = '\0';
+         this->leave_record(t, *name, row_major);
+      }
    } else if (t->is_array() && (t->fields.array->is_record()
                                 || t->fields.array->is_interface())) {
       if (record_type == NULL && t->fields.array->is_record())
@@ -249,6 +257,16 @@ program_resource_visitor::visit_field(const glsl_struct_field *field)
    /* empty */
 }
 
+void
+program_resource_visitor::enter_record(const glsl_type *, const char *, bool)
+{
+}
+
+void
+program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
+{
+}
+
 namespace {
 
 /**
@@ -526,6 +544,20 @@ private:
       assert(!"Should not get here.");
    }
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std140_base_alignment(row_major));
+   }
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std140_base_alignment(row_major));
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
                             bool last_field)
@@ -590,16 +622,11 @@ private:
       if (this->ubo_block_index != -1) {
 	 this->uniforms[id].block_index = this->ubo_block_index;
 
-	 const unsigned alignment = record_type
-	    ? record_type->std140_base_alignment(row_major)
-	    : type->std140_base_alignment(row_major);
+	 const unsigned alignment = type->std140_base_alignment(row_major);
 	 this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
 	 this->uniforms[id].offset = this->ubo_byte_offset;
 	 this->ubo_byte_offset += type->std140_size(row_major);
 
-         if (last_field)
-            this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, 16);
-
 	 if (type->is_array()) {
 	    this->uniforms[id].array_stride =
 	       glsl_align(type->fields.array->std140_size(row_major), 16);
@@ -608,7 +635,12 @@ private:
 	 }
 
 	 if (type->without_array()->is_matrix()) {
-	    this->uniforms[id].matrix_stride = 16;
+            const glsl_type *matrix = type->without_array();
+            const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4;
+            const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
+
+            assert(items <= 4);
+            this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
 	    this->uniforms[id].row_major = row_major;
 	 } else {
 	    this->uniforms[id].matrix_stride = 0;
diff --git a/mesalib/src/glsl/link_varyings.cpp b/mesalib/src/glsl/link_varyings.cpp
index 1866ab265..22617990f 100644
--- a/mesalib/src/glsl/link_varyings.cpp
+++ b/mesalib/src/glsl/link_varyings.cpp
@@ -116,7 +116,7 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
       return;
    }
 
-   if (input->data.invariant != output->data.invariant) {
+   if (!prog->IsES && input->data.invariant != output->data.invariant) {
       linker_error(prog,
                    "%s shader output `%s' %s invariant qualifier, "
                    "but %s shader input %s invariant qualifier\n",
@@ -835,9 +835,11 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
        * regardless of where they appear.  We can trivially satisfy that
        * requirement by changing the interpolation type to flat here.
        */
-      producer_var->data.centroid = false;
-      producer_var->data.sample = false;
-      producer_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+      if (producer_var) {
+         producer_var->data.centroid = false;
+         producer_var->data.sample = false;
+         producer_var->data.interpolation = INTERP_QUALIFIER_FLAT;
+      }
 
       if (consumer_var) {
          consumer_var->data.centroid = false;
diff --git a/mesalib/src/glsl/linker.cpp b/mesalib/src/glsl/linker.cpp
index de6b1fb9f..3f5eac1e2 100644
--- a/mesalib/src/glsl/linker.cpp
+++ b/mesalib/src/glsl/linker.cpp
@@ -76,10 +76,9 @@
 #include "ir_rvalue_visitor.h"
 #include "ir_uniform.h"
 
-extern "C" {
 #include "main/shaderobj.h"
 #include "main/enums.h"
-}
+
 
 void linker_error(gl_shader_program *, const char *, ...);
 
@@ -732,8 +731,27 @@ cross_validate_globals(struct gl_shader_program *prog,
 		   && ((var->type->length == 0)
 		       || (existing->type->length == 0))) {
 		  if (var->type->length != 0) {
+                     if (var->type->length <= existing->data.max_array_access) {
+                        linker_error(prog, "%s `%s' declared as type "
+                                     "`%s' but outermost dimension has an index"
+                                     " of `%i'\n",
+                                     mode_string(var),
+                                     var->name, var->type->name,
+                                     existing->data.max_array_access);
+                        return;
+                     }
 		     existing->type = var->type;
-		  }
+		  } else if (existing->type->length != 0
+                             && existing->type->length <=
+                                var->data.max_array_access) {
+                     linker_error(prog, "%s `%s' declared as type "
+                                  "`%s' but outermost dimension has an index"
+                                  " of `%i'\n",
+                                  mode_string(var),
+                                  var->name, existing->type->name,
+                                  var->data.max_array_access);
+                     return;
+                  }
                } else if (var->type->is_record()
 		   && existing->type->is_record()
 		   && existing->type->record_compare(var->type)) {
@@ -2746,6 +2764,21 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    if (last >= 0 && last < MESA_SHADER_FRAGMENT) {
       gl_shader *const sh = prog->_LinkedShaders[last];
 
+      if (first == MESA_SHADER_GEOMETRY) {
+         /* There was no vertex shader, but we still have to assign varying
+          * locations for use by geometry shader inputs in SSO.
+          *
+          * If the shader is not separable (i.e., prog->SeparateShader is
+          * false), linking will have already failed when first is
+          * MESA_SHADER_GEOMETRY.
+          */
+         if (!assign_varying_locations(ctx, mem_ctx, prog,
+                                       NULL, sh,
+                                       num_tfeedback_decls, tfeedback_decls,
+                                       prog->Geom.VerticesIn))
+            goto done;
+      }
+
       if (num_tfeedback_decls != 0 || prog->SeparateShader) {
          /* There was no fragment shader, but we still have to assign varying
           * locations for use by transform feedback.
diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h
index 6ee585898..be4da5e0a 100644
--- a/mesalib/src/glsl/linker.h
+++ b/mesalib/src/glsl/linker.h
@@ -170,6 +170,12 @@ protected:
     */
    virtual void visit_field(const glsl_struct_field *field);
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major);
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major);
+
 private:
    /**
     * \param name_length  Length of the current name \b not including the
diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h
index bebe17fcf..8554a4652 100644
--- a/mesalib/src/glsl/list.h
+++ b/mesalib/src/glsl/list.h
@@ -51,6 +51,10 @@
  * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL,
  * the list is empty.
  *
+ * Do note that this means that the list nodes will contain pointers into the
+ * list structure itself and as a result you may not \c realloc() an  \c
+ * exec_list or any structure in which an \c exec_list is embedded.
+ *
  * To anyone familiar with "exec lists" on the Amiga, this structure should
  * be immediately recognizable.  See the following link for the original Amiga
  * operating system documentation on the subject.
@@ -542,9 +546,7 @@ exec_list_validate(const struct exec_list *list)
     * either require C++ or assume the exec_node is embedded in a structure
     * which is not the case for this function.
     */
-   for (node = exec_list_get_head_const(list);
-        !exec_node_is_tail_sentinel(node);
-        node = exec_node_get_next_const(node)) {
+   for (node = list->head; node->next != NULL; node = node->next) {
       assert(node->next->prev == node);
       assert(node->prev->next == node);
    }
@@ -646,6 +648,12 @@ inline void exec_node::insert_before(exec_list *before)
         __next != NULL;                              \
         __node = __next, __next = (__type *)__next->next)
 
+#define foreach_in_list_reverse_safe(__type, __node, __list) \
+   for (__type *__node = (__type *)(__list)->tail_pred,      \
+               *__prev = (__type *)__node->prev;             \
+        __prev != NULL;                                      \
+        __node = __prev, __prev = (__type *)__prev->prev)
+
 #define foreach_in_list_use_after(__type, __inst, __list) \
    __type *(__inst);                                      \
    for ((__inst) = (__type *)(__list)->head;              \
@@ -673,6 +681,12 @@ inline void exec_node::insert_before(exec_list *before)
 	(__node)->__field.next != NULL; 				\
 	(__node) = exec_node_data(__type, (__node)->__field.next, __field))
 
+#define foreach_list_typed_reverse(__type, __node, __field, __list)        \
+   for (__type * __node =                                                \
+           exec_node_data(__type, (__list)->tail_pred, __field);        \
+        (__node)->__field.prev != NULL;                                 \
+        (__node) = exec_node_data(__type, (__node)->__field.prev, __field))
+
 #define foreach_list_typed_safe(__type, __node, __field, __list)           \
    for (__type * __node =                                                  \
            exec_node_data(__type, (__list)->head, __field),                \
@@ -682,4 +696,13 @@ inline void exec_node::insert_before(exec_list *before)
         __node = __next, __next =                                          \
            exec_node_data(__type, (__next)->__field.next, __field))
 
+#define foreach_list_typed_safe_reverse(__type, __node, __field, __list)   \
+   for (__type * __node =                                                  \
+           exec_node_data(__type, (__list)->tail_pred, __field),           \
+               * __prev =                                                  \
+           exec_node_data(__type, (__node)->__field.prev, __field);        \
+        __prev != NULL;                                                    \
+        __node = __prev, __prev =                                          \
+           exec_node_data(__type, (__prev)->__field.prev, __field))
+
 #endif /* LIST_CONTAINER_H */
diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp
index 1c1d34fef..2459fc1c3 100644
--- a/mesalib/src/glsl/loop_controls.cpp
+++ b/mesalib/src/glsl/loop_controls.cpp
@@ -102,9 +102,10 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
       return -1;
 
    if (!iter->type->is_integer()) {
+      const ir_expression_operation op = iter->type->is_double()
+         ? ir_unop_d2i : ir_unop_f2i;
       ir_rvalue *cast =
-	 new(mem_ctx) ir_expression(ir_unop_f2i, glsl_type::int_type, iter,
-				    NULL);
+         new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL);
 
       iter = cast->constant_expression_value();
    }
@@ -134,6 +135,9 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
       case GLSL_TYPE_FLOAT:
          iter = new(mem_ctx) ir_constant(float(iter_value + bias[i]));
          break;
+      case GLSL_TYPE_DOUBLE:
+         iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
+         break;
       default:
           unreachable(!"Unsupported type for loop iterator.");
       }
diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp
index 684285350..4779de059 100644
--- a/mesalib/src/glsl/lower_instructions.cpp
+++ b/mesalib/src/glsl/lower_instructions.cpp
@@ -36,12 +36,14 @@
  * - EXP_TO_EXP2
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
  * - LDEXP_TO_ARITH
+ * - DFREXP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - CARRY_TO_ARITH
  * - BORROW_TO_ARITH
  * - SAT_TO_CLAMP
+ * - DOPS_TO_DFRAC
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -77,17 +79,25 @@
  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  * x**y to 2**(y * log2(x)).
  *
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
  * -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
  *
  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  * if we have to break it down like this anyway, it gives an
  * opportunity to do things like constant fold the (1.0 / op1) easily.
  *
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precision errors.
+ *
  * LDEXP_TO_ARITH:
  * -------------
- * Converts ir_binop_ldexp to arithmetic and bit operations.
+ * Converts ir_binop_ldexp to arithmetic and bit operations for float sources.
+ *
+ * DFREXP_DLDEXP_TO_ARITH:
+ * ---------------
+ * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to
+ * arithmetic and bit ops for double arguments.
  *
  * BITFIELD_INSERT_TO_BFM_BFI:
  * ---------------------------
@@ -109,9 +119,13 @@
  * -------------
  * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
  *
+ * DOPS_TO_DFRAC:
+ * --------------
+ * Converts double trunc, ceil, floor, round to fract
  */
 
 #include "main/core.h" /* for M_LOG2E */
+#include "program/prog_instruction.h" /* for swizzle */
 #include "glsl_types.h"
 #include "ir.h"
 #include "ir_builder.h"
@@ -136,15 +150,25 @@ private:
    void sub_to_add_neg(ir_expression *);
    void div_to_mul_rcp(ir_expression *);
    void int_div_to_mul_rcp(ir_expression *);
-   void mod_to_fract(ir_expression *);
+   void mod_to_floor(ir_expression *);
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
    void bitfield_insert_to_bfm_bfi(ir_expression *);
    void ldexp_to_arith(ir_expression *);
+   void dldexp_to_arith(ir_expression *);
+   void dfrexp_sig_to_arith(ir_expression *);
+   void dfrexp_exp_to_arith(ir_expression *);
    void carry_to_arith(ir_expression *);
    void borrow_to_arith(ir_expression *);
    void sat_to_clamp(ir_expression *);
+   void double_dot_to_fma(ir_expression *);
+   void double_lrp(ir_expression *);
+   void dceil_to_dfrac(ir_expression *);
+   void dfloor_to_dfrac(ir_expression *);
+   void dround_even_to_dfrac(ir_expression *);
+   void dtrunc_to_dfrac(ir_expression *);
+   void dsign_to_csel(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -175,7 +199,7 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 void
 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 {
-   assert(ir->operands[1]->type->is_float());
+   assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
 
    /* New expression for the 1.0 / op1 */
    ir_rvalue *expr;
@@ -276,37 +300,50 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
 }
 
 void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
 {
-   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
-					   ir_var_temporary);
-   this->base_ir->insert_before(temp);
-
-   ir_assignment *const assign =
-      new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
-			    ir->operands[1], NULL);
-
-   this->base_ir->insert_before(assign);
+   ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
+                                         ir_var_temporary);
+   ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
+                                         ir_var_temporary);
+   this->base_ir->insert_before(x);
+   this->base_ir->insert_before(y);
+
+   ir_assignment *const assign_x =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
+                            ir->operands[0], NULL);
+   ir_assignment *const assign_y =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
+                            ir->operands[1], NULL);
+
+   this->base_ir->insert_before(assign_x);
+   this->base_ir->insert_before(assign_y);
 
    ir_expression *const div_expr =
-      new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
-			    ir->operands[0],
-			    new(ir) ir_dereference_variable(temp));
+      new(ir) ir_expression(ir_binop_div, x->type,
+                            new(ir) ir_dereference_variable(x),
+                            new(ir) ir_dereference_variable(y));
 
    /* Don't generate new IR that would need to be lowered in an additional
     * pass.
     */
-   if (lowering(DIV_TO_MUL_RCP))
+   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
       div_to_mul_rcp(div_expr);
 
-   ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
-					   ir->operands[0]->type,
-					   div_expr,
-					   NULL);
+   ir_expression *const floor_expr =
+      new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 
-   ir->operation = ir_binop_mul;
-   ir->operands[0] = new(ir) ir_dereference_variable(temp);
-   ir->operands[1] = expr;
+   if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+      dfloor_to_dfrac(floor_expr);
+
+   ir_expression *const mul_expr =
+      new(ir) ir_expression(ir_binop_mul,
+                            new(ir) ir_dereference_variable(y),
+                            floor_expr);
+
+   ir->operation = ir_binop_sub;
+   ir->operands[0] = new(ir) ir_dereference_variable(x);
+   ir->operands[1] = mul_expr;
    this->progress = true;
 }
 
@@ -455,6 +492,262 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 }
 
 void
+lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
+{
+   /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent
+    * from the significand.
+    */
+
+   const unsigned vec_elem = ir->type->vector_elements;
+
+   /* Types */
+   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+
+   /* Constants */
+   ir_constant *zeroi = ir_constant::zero(ir, ivec);
+
+   ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
+
+   ir_constant *exp_shift = new(ir) ir_constant(20);
+   ir_constant *exp_width = new(ir) ir_constant(11);
+   ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
+
+   /* Temporary variables */
+   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
+   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
+
+   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
+                                                  ir_var_temporary);
+
+   ir_variable *extracted_biased_exp =
+      new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
+   ir_variable *resulting_biased_exp =
+      new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
+
+   ir_variable *is_not_zero_or_underflow =
+      new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
+
+   ir_instruction &i = *base_ir;
+
+   /* Copy <x> and <exp> arguments. */
+   i.insert_before(x);
+   i.insert_before(assign(x, ir->operands[0]));
+   i.insert_before(exp);
+   i.insert_before(assign(exp, ir->operands[1]));
+
+   ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x);
+   if (lowering(DFREXP_DLDEXP_TO_ARITH))
+      dfrexp_exp_to_arith(frexp_exp);
+
+   /* Extract the biased exponent from <x>. */
+   i.insert_before(extracted_biased_exp);
+   i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias)));
+
+   i.insert_before(resulting_biased_exp);
+   i.insert_before(assign(resulting_biased_exp,
+                          add(extracted_biased_exp, exp)));
+
+   /* Test if result is ±0.0, subnormal, or underflow by checking if the
+    * resulting biased exponent would be less than 0x1. If so, the result is
+    * 0.0 with the sign of x. (Actually, invert the conditions so that
+    * immediate values are the second arguments, which is better for i965)
+    * TODO: Implement in a vector fashion.
+    */
+   i.insert_before(zero_sign_x);
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+      i.insert_before(unpacked);
+      i.insert_before(
+            assign(unpacked,
+                   expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
+      i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)),
+                             WRITEMASK_Y));
+      i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X));
+      i.insert_before(assign(zero_sign_x,
+                             expr(ir_unop_pack_double_2x32, unpacked),
+                             1 << elem));
+   }
+   i.insert_before(is_not_zero_or_underflow);
+   i.insert_before(assign(is_not_zero_or_underflow,
+                          gequal(resulting_biased_exp,
+                                  new(ir) ir_constant(0x1, vec_elem))));
+   i.insert_before(assign(x, csel(is_not_zero_or_underflow,
+                                  x, zero_sign_x)));
+   i.insert_before(assign(resulting_biased_exp,
+                          csel(is_not_zero_or_underflow,
+                               resulting_biased_exp, zeroi)));
+
+   /* We could test for overflows by checking if the resulting biased exponent
+    * would be greater than 0xFE. Turns out we don't need to because the GLSL
+    * spec says:
+    *
+    *    "If this product is too large to be represented in the
+    *     floating-point type, the result is undefined."
+    */
+
+   ir_rvalue *results[4] = {NULL};
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+      i.insert_before(unpacked);
+      i.insert_before(
+            assign(unpacked,
+                   expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
+
+      ir_expression *bfi = bitfield_insert(
+            swizzle_y(unpacked),
+            i2u(swizzle(resulting_biased_exp, elem, 1)),
+            exp_shift->clone(ir, NULL),
+            exp_width->clone(ir, NULL));
+
+      if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
+         bitfield_insert_to_bfm_bfi(bfi);
+
+      i.insert_before(assign(unpacked, bfi, WRITEMASK_Y));
+
+      results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
+   }
+
+   ir->operation = ir_quadop_vector;
+   ir->operands[0] = results[0];
+   ir->operands[1] = results[1];
+   ir->operands[2] = results[2];
+   ir->operands[3] = results[3];
+
+   /* Don't generate new IR that would need to be lowered in an additional
+    * pass.
+    */
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir)
+{
+   const unsigned vec_elem = ir->type->vector_elements;
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+
+   /* Double-precision floating-point values are stored as
+    *   1 sign bit;
+    *   11 exponent bits;
+    *   52 mantissa bits.
+    *
+    * We're just extracting the significand here, so we only need to modify
+    * the upper 32-bit uint. Unfortunately we must extract each double
+    * independently as there is no vector version of unpackDouble.
+    */
+
+   ir_instruction &i = *base_ir;
+
+   ir_variable *is_not_zero =
+      new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
+   ir_rvalue *results[4] = {NULL};
+
+   ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
+   i.insert_before(is_not_zero);
+   i.insert_before(
+         assign(is_not_zero,
+                nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero)));
+
+   /* TODO: Remake this as more vector-friendly when int64 support is
+    * available.
+    */
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_constant *zero = new(ir) ir_constant(0u, 1);
+      ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1);
+
+      /* Exponent of double floating-point values in the range [0.5, 1.0). */
+      ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1);
+
+      ir_variable *bits =
+         new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary);
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+
+      ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1);
+
+      i.insert_before(bits);
+      i.insert_before(unpacked);
+      i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x)));
+
+      /* Manipulate the high uint to remove the exponent and replace it with
+       * either the default exponent or zero.
+       */
+      i.insert_before(assign(bits, swizzle_y(unpacked)));
+      i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask)));
+      i.insert_before(assign(bits, bit_or(bits,
+                                          csel(swizzle(is_not_zero, elem, 1),
+                                               exponent_value,
+                                               zero))));
+      i.insert_before(assign(unpacked, bits, WRITEMASK_Y));
+      results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
+   }
+
+   /* Put the dvec back together */
+   ir->operation = ir_quadop_vector;
+   ir->operands[0] = results[0];
+   ir->operands[1] = results[1];
+   ir->operands[2] = results[2];
+   ir->operands[3] = results[3];
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir)
+{
+   const unsigned vec_elem = ir->type->vector_elements;
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+   const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1);
+
+   /* Double-precision floating-point values are stored as
+    *   1 sign bit;
+    *   11 exponent bits;
+    *   52 mantissa bits.
+    *
+    * We're just extracting the exponent here, so we only care about the upper
+    * 32-bit uint.
+    */
+
+   ir_instruction &i = *base_ir;
+
+   ir_variable *is_not_zero =
+      new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
+   ir_variable *high_words =
+      new(ir) ir_variable(uvec, "high_words", ir_var_temporary);
+   ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
+   ir_constant *izero = new(ir) ir_constant(0, vec_elem);
+
+   ir_rvalue *absval = abs(ir->operands[0]);
+
+   i.insert_before(is_not_zero);
+   i.insert_before(high_words);
+   i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero)));
+
+   /* Extract all of the upper uints. */
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1);
+
+      i.insert_before(assign(high_words,
+                             swizzle_y(expr(ir_unop_unpack_double_2x32, x)),
+                             1 << elem));
+
+   }
+   ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem);
+   ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem);
+
+   /* For non-zero inputs, shift the exponent down and apply bias. */
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero);
+   ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift)));
+   ir->operands[2] = izero;
+
+   this->progress = true;
+}
+
+void
 lower_instructions_visitor::carry_to_arith(ir_expression *ir)
 {
    /* Translates
@@ -508,10 +801,211 @@ lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::double_dot_to_fma(ir_expression *ir)
+{
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res",
+					   ir_var_temporary);
+   this->base_ir->insert_before(temp);
+
+   int nc = ir->operands[0]->type->components();
+   for (int i = nc - 1; i >= 1; i--) {
+      ir_assignment *assig;
+      if (i == (nc - 1)) {
+         assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
+                                  swizzle(ir->operands[1]->clone(ir, NULL), i, 1)));
+      } else {
+         assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
+                                  swizzle(ir->operands[1]->clone(ir, NULL), i, 1),
+                                  temp));
+      }
+      this->base_ir->insert_before(assig);
+   }
+
+   ir->operation = ir_triop_fma;
+   ir->operands[0] = swizzle(ir->operands[0], 0, 1);
+   ir->operands[1] = swizzle(ir->operands[1], 0, 1);
+   ir->operands[2] = new(ir) ir_dereference_variable(temp);
+
+   this->progress = true;
+
+}
+
+void
+lower_instructions_visitor::double_lrp(ir_expression *ir)
+{
+   int swizval;
+   ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2];
+   ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements);
+
+   switch (op2->type->vector_elements) {
+   case 1:
+      swizval = SWIZZLE_XXXX;
+      break;
+   default:
+      assert(op0->type->vector_elements == op2->type->vector_elements);
+      swizval = SWIZZLE_XYZW;
+      break;
+   }
+
+   ir->operation = ir_triop_fma;
+   ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements);
+   ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0);
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * temp = sub(x, frtemp);
+    * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
+    */
+   ir_instruction &i = *base_ir;
+   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+                                             ir_var_temporary);
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(ir->operands[0])));
+
+   ir->operation = ir_binop_add;
+   ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
+   ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * result = sub(x, frtemp);
+    */
+   ir->operation = ir_binop_sub;
+   ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
+
+   this->progress = true;
+}
+void
+lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
+{
+   /*
+    * insane but works
+    * temp = x + 0.5;
+    * frtemp = frac(temp);
+    * t2 = sub(temp, frtemp);
+    * if (frac(x) == 0.5)
+    *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
+    *  else
+    *     result = t2;
+
+    */
+   ir_instruction &i = *base_ir;
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+                                             ir_var_temporary);
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
+                                           ir_var_temporary);
+   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
+                                           ir_var_temporary);
+   ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
+   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
+
+   i.insert_before(temp);
+   i.insert_before(assign(temp, add(ir->operands[0], p5)));
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(temp)));
+
+   i.insert_before(t2);
+   i.insert_before(assign(t2, sub(temp, frtemp)));
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
+                           p5->clone(ir, NULL));
+   ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
+                                zero),
+                          t2,
+                          sub(t2, one));
+   ir->operands[2] = new(ir) ir_dereference_variable(t2);
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * temp = sub(x, frtemp);
+    * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1;
+    */
+   ir_rvalue *arg = ir->operands[0];
+   ir_instruction &i = *base_ir;
+
+   ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
+   ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp",
+                                             ir_var_temporary);
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
+                                           ir_var_temporary);
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(arg)));
+   i.insert_before(temp);
+   i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
+   ir->operands[1] = new (ir) ir_dereference_variable(temp);
+   ir->operands[2] = add(temp,
+                         csel(equal(frtemp, zero->clone(ir, NULL)),
+                              zero->clone(ir, NULL),
+                              one));
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
+{
+   /*
+    * temp = x > 0.0 ? 1.0 : 0.0;
+    * result = x < 0.0 ? -1.0 : temp;
+    */
+   ir_rvalue *arg = ir->operands[0];
+   ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
+   ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = less(arg->clone(ir, NULL),
+                          zero->clone(ir, NULL));
+   ir->operands[1] = neg_one;
+   ir->operands[2] = csel(greater(arg, zero),
+                          one,
+                          zero->clone(ir, NULL));
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
    switch (ir->operation) {
+   case ir_binop_dot:
+      if (ir->operands[0]->type->is_double())
+         double_dot_to_fma(ir);
+      break;
+   case ir_triop_lrp:
+      if (ir->operands[0]->type->is_double())
+         double_lrp(ir);
+      break;
    case ir_binop_sub:
       if (lowering(SUB_TO_ADD_NEG))
 	 sub_to_add_neg(ir);
@@ -520,7 +1014,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
    case ir_binop_div:
       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 	 int_div_to_mul_rcp(ir);
-      else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() ||
+                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
 	 div_to_mul_rcp(ir);
       break;
 
@@ -535,8 +1030,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_mod:
-      if (lowering(MOD_TO_FRACT) && ir->type->is_float())
-	 mod_to_fract(ir);
+      if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
+	 mod_to_floor(ir);
       break;
 
    case ir_binop_pow:
@@ -550,8 +1045,20 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_ldexp:
-      if (lowering(LDEXP_TO_ARITH))
+      if (lowering(LDEXP_TO_ARITH) && ir->type->is_float())
          ldexp_to_arith(ir);
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double())
+         dldexp_to_arith(ir);
+      break;
+
+   case ir_unop_frexp_exp:
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
+         dfrexp_exp_to_arith(ir);
+      break;
+
+   case ir_unop_frexp_sig:
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
+         dfrexp_sig_to_arith(ir);
       break;
 
    case ir_binop_carry:
@@ -569,6 +1076,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          sat_to_clamp(ir);
       break;
 
+   case ir_unop_trunc:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dtrunc_to_dfrac(ir);
+      break;
+
+   case ir_unop_ceil:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dceil_to_dfrac(ir);
+      break;
+
+   case ir_unop_floor:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dfloor_to_dfrac(ir);
+      break;
+
+   case ir_unop_round_even:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dround_even_to_dfrac(ir);
+      break;
+
+   case ir_unop_sign:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dsign_to_csel(ir);
+      break;
    default:
       return visit_continue;
    }
diff --git a/mesalib/src/glsl/lower_mat_op_to_vec.cpp b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
index 105ee0d3f..dda754f91 100644
--- a/mesalib/src/glsl/lower_mat_op_to_vec.cpp
+++ b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
@@ -354,6 +354,8 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign)
 
    /* OK, time to break down this matrix operation. */
    switch (orig_expr->operation) {
+   case ir_unop_d2f:
+   case ir_unop_f2d:
    case ir_unop_neg: {
       /* Apply the operation to each column.*/
       for (i = 0; i < matrix_columns; i++) {
diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp
index 43dd067fa..4ea4ccb03 100644
--- a/mesalib/src/glsl/lower_ubo_reference.cpp
+++ b/mesalib/src/glsl/lower_ubo_reference.cpp
@@ -140,7 +140,8 @@ public:
 
    void handle_rvalue(ir_rvalue **rvalue);
    void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
-		       unsigned int deref_offset, bool row_major);
+                       unsigned int deref_offset, bool row_major,
+                       int matrix_columns);
    ir_expression *ubo_load(const struct glsl_type *type,
 			   ir_rvalue *offset);
 
@@ -265,6 +266,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
    unsigned const_offset = 0;
    bool row_major = is_dereferenced_thing_row_major(deref);
+   int matrix_columns = 1;
 
    /* Calculate the offset to the start of the region of the UBO
     * dereferenced by *rvalue.  This may be a variable offset if an
@@ -288,6 +290,9 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 	     * vector) is handled below in emit_ubo_loads.
 	     */
 	    array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+            matrix_columns = deref_array->array->type->matrix_columns;
          } else if (deref_array->type->is_interface()) {
             /* We're processing an array dereference of an interface instance
 	     * array.  The thing being dereferenced *must* be a variable
@@ -334,15 +339,6 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 	 const glsl_type *struct_type = deref_record->record->type;
 	 unsigned intra_struct_offset = 0;
 
-         /* glsl_type::std140_base_alignment doesn't grok interfaces.  Use
-          * 16-bytes for the alignment because that is the general minimum of
-          * std140.
-          */
-         const unsigned struct_alignment = struct_type->is_interface()
-            ? 16
-            : struct_type->std140_base_alignment(row_major);
-
-
 	 for (unsigned int i = 0; i < struct_type->length; i++) {
 	    const glsl_type *type = struct_type->fields.structure[i].type;
 
@@ -372,7 +368,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
              */
             if (type->without_array()->is_record()) {
                intra_struct_offset = glsl_align(intra_struct_offset,
-                                                struct_alignment);
+                                                field_align);
 
             }
 	 }
@@ -405,7 +401,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    base_ir->insert_before(assign(load_offset, offset));
 
    deref = new(mem_ctx) ir_dereference_variable(load_var);
-   emit_ubo_loads(deref, load_offset, const_offset, row_major);
+   emit_ubo_loads(deref, load_offset, const_offset, row_major, matrix_columns);
    *rvalue = deref;
 
    progress = true;
@@ -436,7 +432,8 @@ void
 lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 					    ir_variable *base_offset,
                                             unsigned int deref_offset,
-                                            bool row_major)
+                                            bool row_major,
+                                            int matrix_columns)
 {
    if (deref->type->is_record()) {
       unsigned int field_offset = 0;
@@ -453,7 +450,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
                        field->type->std140_base_alignment(row_major));
 
 	 emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset,
-                        row_major);
+                        row_major, 1);
 
 	 field_offset += field->type->std140_size(row_major);
       }
@@ -472,7 +469,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 					      element);
 	 emit_ubo_loads(element_deref, base_offset,
 			deref_offset + i * array_stride,
-                        row_major);
+                        row_major, 1);
       }
       return;
    }
@@ -488,14 +485,18 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
             /* For a row-major matrix, the next column starts at the next
              * element.
              */
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * 4,
-                           row_major);
+            int size_mul = deref->type->is_double() ? 8 : 4;
+            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
+                           row_major, deref->type->matrix_columns);
          } else {
             /* std140 always rounds the stride of arrays (and matrices) to a
-             * vec4, so matrices are always 16 between columns/rows.
+             * vec4, so matrices are always 16 between columns/rows. With
+             * doubles, they will be 32 apart when there are more than 2 rows.
              */
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * 16,
-                           row_major);
+            int size_mul = (deref->type->is_double() &&
+                            deref->type->vector_elements > 2) ? 32 : 16;
+            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
+                           row_major, deref->type->matrix_columns);
          }
       }
       return;
@@ -510,16 +511,24 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
 				    ubo_load(deref->type, offset)));
    } else {
+      unsigned N = deref->type->is_double() ? 8 : 4;
+
       /* We're dereffing a column out of a row-major matrix, so we
        * gather the vector from each stored row.
       */
-      assert(deref->type->base_type == GLSL_TYPE_FLOAT);
+      assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
+             deref->type->base_type == GLSL_TYPE_DOUBLE);
       /* Matrices, row_major or not, are stored as if they were
        * arrays of vectors of the appropriate size in std140.
        * Arrays have their strides rounded up to a vec4, so the
-       * matrix stride is always 16.
+       * matrix stride is always 16. However a double matrix may either be 16
+       * or 32 depending on the number of columns.
        */
-      unsigned matrix_stride = 16;
+      assert(matrix_columns <= 4);
+      unsigned matrix_stride = glsl_align(matrix_columns * N, 16);
+
+      const glsl_type *ubo_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
+         glsl_type::float_type : glsl_type::double_type;
 
       for (unsigned i = 0; i < deref->type->vector_elements; i++) {
 	 ir_rvalue *chan_offset =
@@ -527,7 +536,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 		new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
 
 	 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-				       ubo_load(glsl_type::float_type,
+				       ubo_load(ubo_type,
 						chan_offset),
 				       (1U << i)));
       }
diff --git a/mesalib/src/glsl/main.cpp b/mesalib/src/glsl/main.cpp
index 432643707..47c0db81e 100644
--- a/mesalib/src/glsl/main.cpp
+++ b/mesalib/src/glsl/main.cpp
@@ -193,8 +193,6 @@ initialize_context(struct gl_context *ctx, gl_api api)
       ctx->Const.MaxGeometryOutputVertices = 256;
       ctx->Const.MaxGeometryTotalOutputComponents = 1024;
 
-//      ctx->Const.MaxGeometryVaryingComponents = 64;
-
       ctx->Const.MaxVarying = 60 / 4;
       break;
    case 300:
diff --git a/mesalib/src/glsl/nir/.gitignore b/mesalib/src/glsl/nir/.gitignore
new file mode 100644
index 000000000..64828eba6
--- /dev/null
+++ b/mesalib/src/glsl/nir/.gitignore
@@ -0,0 +1,5 @@
+nir_builder_opcodes.h
+nir_opt_algebraic.c
+nir_opcodes.c
+nir_opcodes.h
+nir_constant_expressions.c
diff --git a/mesalib/src/glsl/nir/README b/mesalib/src/glsl/nir/README
new file mode 100644
index 000000000..2c81db9db
--- /dev/null
+++ b/mesalib/src/glsl/nir/README
@@ -0,0 +1,118 @@
+New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR.
+Its design inherits from the various IR's that Mesa has used in the past, as
+well as Direct3D assembly, and it includes a few new ideas as well. It is a
+flat (in terms of using instructions instead of expressions), typeless IR,
+similar to TGSI and Mesa IR.  It also supports SSA (although it doesn't require
+it).
+
+Variables
+=========
+
+NIR includes support for source-level GLSL variables through a structure mostly
+copied from GLSL IR. These will be used for linking and conversion from GLSL IR
+(and later, from an AST), but for the most part, they will be lowered to
+registers (see below) and loads/stores.
+
+Registers
+=========
+
+Registers are light-weight; they consist of a structure that only contains its
+size, its index for liveness analysis, and an optional name for debugging. In
+addition, registers can be local to a function or global to the entire shader;
+the latter will be used in ARB_shader_subroutine for passing parameters and
+getting return values from subroutines. Registers can also be an array, in which
+case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.)
+works directly with registers or SSA values (see below).
+
+SSA
+========
+
+Everywhere a register can be loaded/stored, an SSA value can be used instead.
+The only exception is that arrays/indirect addressing are not supported with
+SSA; although research has been done on extensions of SSA to arrays before, it's
+usually for the purpose of parallelization (which we're not interested in), and
+adds some overhead in the form of adding copies or extra arrays (which is much
+more expensive than introducing copies between non-array registers). SSA uses
+point directly to their corresponding definition, which in turn points to the
+instruction it is part of. This creates an implicit use-def chain and avoids the
+need for an external structure for each SSA register.
+
+Functions
+=========
+
+Support for function calls is mostly similar to GLSL IR. Each shader contains a
+list of functions, and each function has a list of overloads. Each overload
+contains a list of parameters, and may contain an implementation which specifies
+the variables that correspond to the parameters and return value. Inlining a
+function, assuming it has a single return point, is as simple as copying its
+instructions, registers, and local variables into the target function and then
+inserting copies to and from the new parameters as appropriate. After functions
+are inlined and any non-subroutine functions are deleted, parameters and return
+variables will be converted to global variables and then global registers. We
+don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons:
+
+- If we want to do optimizations before link time, we need to have the function
+signature available during link-time.
+
+- If we do any inlining before link time, then we might wind up with the
+inlined function and the non-inlined function using the same global
+variables/registers which would preclude optimization.
+
+Intrinsics
+=========
+
+Any operation (other than function calls and textures) which touches a variable
+or is not referentially transparent is represented by an intrinsic. Intrinsics
+are similar to the idea of a "builtin function," i.e. a function declaration
+whose implementation is provided by the backend, except they are more powerful
+in the following ways:
+
+- They can also load and store registers when appropriate, which limits the
+number of variables needed in later stages of the IR while obviating the need
+for a separate load/store variable instruction.
+
+- Intrinsics can be marked as side-effect free, which permits them to be
+treated like any other instruction when it comes to optimizations. This allows
+load intrinsics to be represented as intrinsics while still being optimized
+away by dead code elimination, common subexpression elimination, etc.
+
+Intrinsics are used for:
+
+- Atomic operations
+- Memory barriers
+- Subroutine calls
+- Geometry shader emitVertex and endPrimitive
+- Loading and storing variables (before lowering)
+- Loading and storing uniforms, shader inputs and outputs, etc (after lowering)
+- Copying variables (cases where in GLSL the destination is a structure or
+array)
+- The kitchen sink
+- ...
+
+Textures
+=========
+
+Unfortunately, there are far too many texture operations to represent each one
+of them with an intrinsic, so there's a special texture instruction similar to
+the GLSL IR one. The biggest difference is that, while the texture instruction
+has a sampler dereference field used just like in GLSL IR, this gets lowered to
+a texture unit index (with a possible indirect offset) while the type
+information of the original sampler is kept around for backends. Also, all the
+non-constant sources are stored in a single array to make it easier for
+optimization passes to iterate over all the sources.
+
+Control Flow
+=========
+
+Like in GLSL IR, control flow consists of a tree of "control flow nodes", which
+include if statements and loops, and jump instructions (break, continue, and
+return). Unlike GLSL IR, though, the leaves of the tree aren't statements but
+basic blocks. Each basic block also keeps track of its successors and
+predecessors, and function implementations keep track of the beginning basic
+block (the first basic block of the function) and the ending basic block (a fake
+basic block that every return statement points to). Together, these elements
+make up the control flow graph, in this case a redundant piece of information on
+top of the control flow tree that will be used by almost all the optimizations.
+There are helper functions to add and remove control flow nodes that also update
+the control flow graph, and so usually it doesn't need to be touched by passes
+that modify control flow nodes.
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
new file mode 100644
index 000000000..544d0d932
--- /dev/null
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -0,0 +1,1814 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "glsl_to_nir.h"
+#include "ir_visitor.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+
+/*
+ * pass to lower GLSL IR to NIR
+ *
+ * This will lower variable dereferences to loads/stores of corresponding
+ * variables in NIR - the variables will be converted to registers in a later
+ * pass.
+ */
+
+namespace {
+
+class nir_visitor : public ir_visitor
+{
+public:
+   nir_visitor(nir_shader *shader, bool supports_ints);
+   ~nir_visitor();
+
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_if *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_emit_vertex *);
+   virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_dereference_variable *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_dereference_array *);
+
+   void create_function(ir_function *ir);
+
+private:
+   void create_overload(ir_function_signature *ir, nir_function *function);
+   void add_instr(nir_instr *instr, unsigned num_components);
+   nir_src evaluate_rvalue(ir_rvalue *ir);
+
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src *srcs);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
+                       nir_src src2);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
+                       nir_src src2, nir_src src3);
+
+   bool supports_ints;
+
+   nir_shader *shader;
+   nir_function_impl *impl;
+   exec_list *cf_node_list;
+   nir_instr *result; /* result of the expression tree last visited */
+
+   /* the head of the dereference chain we're creating */
+   nir_deref_var *deref_head;
+   /* the tail of the dereference chain we're creating */
+   nir_deref *deref_tail;
+
+   nir_variable *var; /* variable created by ir_variable visitor */
+
+   /* whether the IR we're operating on is per-function or global */
+   bool is_global;
+
+   /* map of ir_variable -> nir_variable */
+   struct hash_table *var_table;
+
+   /* map of ir_function_signature -> nir_function_overload */
+   struct hash_table *overload_table;
+};
+
+/*
+ * This visitor runs before the main visitor, calling create_function() for
+ * each function so that the main visitor can resolve forward references in
+ * calls.
+ */
+
+class nir_function_visitor : public ir_hierarchical_visitor
+{
+public:
+   nir_function_visitor(nir_visitor *v) : visitor(v)
+   {
+   }
+   virtual ir_visitor_status visit_enter(ir_function *);
+
+private:
+   nir_visitor *visitor;
+};
+
+}; /* end of anonymous namespace */
+
+static const nir_shader_compiler_options default_options = {
+};
+
+nir_shader *
+glsl_to_nir(exec_list *ir, _mesa_glsl_parse_state *state,
+            bool native_integers)
+{
+   const nir_shader_compiler_options *options;
+
+   if (state) {
+      struct gl_context *ctx = state->ctx;
+      struct gl_shader_compiler_options *gl_options =
+         &ctx->Const.ShaderCompilerOptions[state->stage];
+
+      if (!gl_options->NirOptions) {
+         nir_shader_compiler_options *new_options =
+            rzalloc(ctx, nir_shader_compiler_options);
+         options = gl_options->NirOptions = new_options;
+
+         if (gl_options->EmitNoPow)
+            new_options->lower_fpow = true;
+      } else {
+         options = gl_options->NirOptions;
+      }
+   } else {
+      options = &default_options;
+   }
+
+   nir_shader *shader = nir_shader_create(NULL, options);
+
+   if (state) {
+      shader->num_user_structures = state->num_user_structures;
+      shader->user_structures = ralloc_array(shader, glsl_type *,
+                                             shader->num_user_structures);
+      memcpy(shader->user_structures, state->user_structures,
+            shader->num_user_structures * sizeof(glsl_type *));
+   } else {
+      shader->num_user_structures = 0;
+      shader->user_structures = NULL;
+   }
+
+   nir_visitor v1(shader, native_integers);
+   nir_function_visitor v2(&v1);
+   v2.run(ir);
+   visit_exec_list(ir, &v1);
+
+   return shader;
+}
+
+nir_visitor::nir_visitor(nir_shader *shader, bool supports_ints)
+{
+   this->supports_ints = supports_ints;
+   this->shader = shader;
+   this->is_global = true;
+   this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
+}
+
+nir_visitor::~nir_visitor()
+{
+   _mesa_hash_table_destroy(this->var_table, NULL);
+   _mesa_hash_table_destroy(this->overload_table, NULL);
+}
+
+static nir_constant *
+constant_copy(ir_constant *ir, void *mem_ctx)
+{
+   if (ir == NULL)
+      return NULL;
+
+   nir_constant *ret = ralloc(mem_ctx, nir_constant);
+
+   unsigned total_elems = ir->type->components();
+   unsigned i;
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_UINT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.u[i] = ir->value.u[i];
+      break;
+
+   case GLSL_TYPE_INT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.i[i] = ir->value.i[i];
+      break;
+
+   case GLSL_TYPE_FLOAT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.f[i] = ir->value.f[i];
+      break;
+
+   case GLSL_TYPE_BOOL:
+      for (i = 0; i < total_elems; i++)
+         ret->value.b[i] = ir->value.b[i];
+      break;
+
+   case GLSL_TYPE_STRUCT:
+      ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                   ir->type->length);
+      i = 0;
+      foreach_in_list(ir_constant, field, &ir->components) {
+         ret->elements[i] = constant_copy(field, mem_ctx);
+         i++;
+      }
+      break;
+
+   case GLSL_TYPE_ARRAY:
+      ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                   ir->type->length);
+
+      for (i = 0; i < ir->type->length; i++)
+         ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   return ret;
+}
+
+void
+nir_visitor::visit(ir_variable *ir)
+{
+   nir_variable *var = ralloc(shader, nir_variable);
+   var->type = ir->type;
+   var->name = ralloc_strdup(var, ir->name);
+
+   if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
+      unsigned size = ir->get_interface_type()->length;
+      var->max_ifc_array_access = ralloc_array(var, unsigned, size);
+      memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
+             size * sizeof(unsigned));
+   } else {
+      var->max_ifc_array_access = NULL;
+   }
+
+   var->data.read_only = ir->data.read_only;
+   var->data.centroid = ir->data.centroid;
+   var->data.sample = ir->data.sample;
+   var->data.invariant = ir->data.invariant;
+   var->data.location = ir->data.location;
+
+   switch(ir->data.mode) {
+   case ir_var_auto:
+   case ir_var_temporary:
+      if (is_global)
+         var->data.mode = nir_var_global;
+      else
+         var->data.mode = nir_var_local;
+      break;
+
+   case ir_var_function_in:
+   case ir_var_function_out:
+   case ir_var_function_inout:
+   case ir_var_const_in:
+      var->data.mode = nir_var_local;
+      break;
+
+   case ir_var_shader_in:
+      if (ir->data.location == VARYING_SLOT_FACE) {
+         /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
+         var->data.location = SYSTEM_VALUE_FRONT_FACE;
+         var->data.mode = nir_var_system_value;
+      } else {
+         var->data.mode = nir_var_shader_in;
+      }
+      break;
+
+   case ir_var_shader_out:
+      var->data.mode = nir_var_shader_out;
+      break;
+
+   case ir_var_uniform:
+      var->data.mode = nir_var_uniform;
+      break;
+
+
+   case ir_var_system_value:
+      var->data.mode = nir_var_system_value;
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   var->data.interpolation = ir->data.interpolation;
+   var->data.origin_upper_left = ir->data.origin_upper_left;
+   var->data.pixel_center_integer = ir->data.pixel_center_integer;
+   var->data.explicit_location = ir->data.explicit_location;
+   var->data.explicit_index = ir->data.explicit_index;
+   var->data.explicit_binding = ir->data.explicit_binding;
+   var->data.has_initializer = ir->data.has_initializer;
+   var->data.is_unmatched_generic_inout = ir->data.is_unmatched_generic_inout;
+   var->data.location_frac = ir->data.location_frac;
+   var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
+   var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
+
+   switch (ir->data.depth_layout) {
+   case ir_depth_layout_none:
+      var->data.depth_layout = nir_depth_layout_none;
+      break;
+   case ir_depth_layout_any:
+      var->data.depth_layout = nir_depth_layout_any;
+      break;
+   case ir_depth_layout_greater:
+      var->data.depth_layout = nir_depth_layout_greater;
+      break;
+   case ir_depth_layout_less:
+      var->data.depth_layout = nir_depth_layout_less;
+      break;
+   case ir_depth_layout_unchanged:
+      var->data.depth_layout = nir_depth_layout_unchanged;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   var->data.index = ir->data.index;
+   var->data.binding = ir->data.binding;
+   /* XXX Get rid of buffer_index */
+   var->data.atomic.buffer_index = ir->data.binding;
+   var->data.atomic.offset = ir->data.atomic.offset;
+   var->data.image.read_only = ir->data.image_read_only;
+   var->data.image.write_only = ir->data.image_write_only;
+   var->data.image.coherent = ir->data.image_coherent;
+   var->data.image._volatile = ir->data.image_volatile;
+   var->data.image.restrict_flag = ir->data.image_restrict;
+   var->data.image.format = ir->data.image_format;
+   var->data.max_array_access = ir->data.max_array_access;
+
+   var->num_state_slots = ir->get_num_state_slots();
+   if (var->num_state_slots > 0) {
+      var->state_slots = ralloc_array(var, nir_state_slot,
+                                      var->num_state_slots);
+
+      ir_state_slot *state_slots = ir->get_state_slots();
+      for (unsigned i = 0; i < var->num_state_slots; i++) {
+         for (unsigned j = 0; j < 5; j++)
+            var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
+         var->state_slots[i].swizzle = state_slots[i].swizzle;
+      }
+   } else {
+      var->state_slots = NULL;
+   }
+
+   var->constant_initializer = constant_copy(ir->constant_initializer, var);
+
+   var->interface_type = ir->get_interface_type();
+
+   switch (var->data.mode) {
+   case nir_var_local:
+      exec_list_push_tail(&impl->locals, &var->node);
+      break;
+
+   case nir_var_global:
+      exec_list_push_tail(&shader->globals, &var->node);
+      break;
+
+   case nir_var_shader_in:
+      _mesa_hash_table_insert(shader->inputs, var->name, var);
+      break;
+
+   case nir_var_shader_out:
+      _mesa_hash_table_insert(shader->outputs, var->name, var);
+      break;
+
+   case nir_var_uniform:
+      _mesa_hash_table_insert(shader->uniforms, var->name, var);
+      break;
+
+   case nir_var_system_value:
+      exec_list_push_tail(&shader->system_values, &var->node);
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   _mesa_hash_table_insert(var_table, ir, var);
+   this->var = var;
+}
+
+ir_visitor_status
+nir_function_visitor::visit_enter(ir_function *ir)
+{
+   visitor->create_function(ir);
+   return visit_continue_with_parent;
+}
+
+
+void
+nir_visitor::create_function(ir_function *ir)
+{
+   nir_function *func = nir_function_create(this->shader, ir->name);
+   foreach_in_list(ir_function_signature, sig, &ir->signatures) {
+      create_overload(sig, func);
+   }
+}
+
+
+
+void
+nir_visitor::create_overload(ir_function_signature *ir, nir_function *function)
+{
+   if (ir->is_intrinsic)
+      return;
+
+   nir_function_overload *overload = nir_function_overload_create(function);
+
+   unsigned num_params = ir->parameters.length();
+   overload->num_params = num_params;
+   overload->params = ralloc_array(shader, nir_parameter, num_params);
+
+   unsigned i = 0;
+   foreach_in_list(ir_variable, param, &ir->parameters) {
+      switch (param->data.mode) {
+      case ir_var_function_in:
+         overload->params[i].param_type = nir_parameter_in;
+         break;
+
+      case ir_var_function_out:
+         overload->params[i].param_type = nir_parameter_out;
+         break;
+
+      case ir_var_function_inout:
+         overload->params[i].param_type = nir_parameter_inout;
+         break;
+
+      default:
+         unreachable("not reached");
+      }
+
+      overload->params[i].type = param->type;
+      i++;
+   }
+
+   overload->return_type = ir->return_type;
+
+   _mesa_hash_table_insert(this->overload_table, ir, overload);
+}
+
+void
+nir_visitor::visit(ir_function *ir)
+{
+   foreach_in_list(ir_function_signature, sig, &ir->signatures)
+      sig->accept(this);
+}
+
+void
+nir_visitor::visit(ir_function_signature *ir)
+{
+   if (ir->is_intrinsic)
+      return;
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir);
+
+   assert(entry);
+   nir_function_overload *overload = (nir_function_overload *) entry->data;
+
+   if (ir->is_defined) {
+      nir_function_impl *impl = nir_function_impl_create(overload);
+      this->impl = impl;
+
+      unsigned num_params = overload->num_params;
+      impl->num_params = num_params;
+      impl->params = ralloc_array(this->shader, nir_variable *, num_params);
+      unsigned i = 0;
+      foreach_in_list(ir_variable, param, &ir->parameters) {
+         param->accept(this);
+         impl->params[i] = this->var;
+         i++;
+      }
+
+      if (overload->return_type == glsl_type::void_type) {
+         impl->return_var = NULL;
+      } else {
+         impl->return_var = ralloc(this->shader, nir_variable);
+         impl->return_var->name = ralloc_strdup(impl->return_var,
+                                                "return_var");
+         impl->return_var->type = overload->return_type;
+      }
+
+      this->is_global = false;
+
+      this->cf_node_list = &impl->body;
+      visit_exec_list(&ir->body, this);
+
+      this->is_global = true;
+   } else {
+      overload->impl = NULL;
+   }
+}
+
+void
+nir_visitor::visit(ir_loop *ir)
+{
+   exec_list *old_list = this->cf_node_list;
+
+   nir_loop *loop = nir_loop_create(this->shader);
+   nir_cf_node_insert_end(old_list, &loop->cf_node);
+   this->cf_node_list = &loop->body;
+   visit_exec_list(&ir->body_instructions, this);
+
+   this->cf_node_list = old_list;
+}
+
+void
+nir_visitor::visit(ir_if *ir)
+{
+   nir_src condition = evaluate_rvalue(ir->condition);
+
+   exec_list *old_list = this->cf_node_list;
+
+   nir_if *if_stmt = nir_if_create(this->shader);
+   if_stmt->condition = condition;
+   nir_cf_node_insert_end(old_list, &if_stmt->cf_node);
+
+   this->cf_node_list = &if_stmt->then_list;
+   visit_exec_list(&ir->then_instructions, this);
+
+   this->cf_node_list = &if_stmt->else_list;
+   visit_exec_list(&ir->else_instructions, this);
+
+   this->cf_node_list = old_list;
+}
+
+void
+nir_visitor::visit(ir_discard *ir)
+{
+   /*
+    * discards aren't treated as control flow, because before we lower them
+    * they can appear anywhere in the shader and the stuff after them may still
+    * be executed (yay, crazy GLSL rules!). However, after lowering, all the
+    * discards will be immediately followed by a return.
+    */
+
+   nir_intrinsic_instr *discard =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &discard->instr);
+}
+
+void
+nir_visitor::visit(ir_emit_vertex *ir)
+{
+   nir_intrinsic_instr *instr =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
+   instr->const_index[0] = ir->stream_id();
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_end_primitive *ir)
+{
+   nir_intrinsic_instr *instr =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
+   instr->const_index[0] = ir->stream_id();
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_loop_jump *ir)
+{
+   nir_jump_type type;
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      type = nir_jump_break;
+      break;
+   case ir_loop_jump::jump_continue:
+      type = nir_jump_continue;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_return *ir)
+{
+   if (ir->value != NULL) {
+      ir->value->accept(this);
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+      copy->variables[0] = nir_deref_var_create(this->shader,
+                                                this->impl->return_var);
+      copy->variables[1] = this->deref_head;
+   }
+
+   nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_call *ir)
+{
+   if (ir->callee->is_intrinsic) {
+      nir_intrinsic_op op;
+      if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) {
+         op = nir_intrinsic_atomic_counter_read_var;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) {
+         op = nir_intrinsic_atomic_counter_inc_var;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
+         op = nir_intrinsic_atomic_counter_dec_var;
+      } else {
+         unreachable("not reached");
+      }
+
+      nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+      ir_dereference *param =
+         (ir_dereference *) ir->actual_parameters.get_head();
+      param->accept(this);
+      instr->variables[0] = this->deref_head;
+      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+      nir_intrinsic_instr *store_instr =
+         nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+      store_instr->num_components = 1;
+
+      ir->return_deref->accept(this);
+      store_instr->variables[0] = this->deref_head;
+      store_instr->src[0].is_ssa = true;
+      store_instr->src[0].ssa = &instr->dest.ssa;
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr);
+
+      return;
+   }
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir->callee);
+   assert(entry);
+   nir_function_overload *callee = (nir_function_overload *) entry->data;
+
+   nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
+
+   unsigned i = 0;
+   foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
+      param->accept(this);
+      instr->params[i] = this->deref_head;
+      i++;
+   }
+
+   ir->return_deref->accept(this);
+   instr->return_deref = this->deref_head;
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_assignment *ir)
+{
+   unsigned num_components = ir->lhs->type->vector_elements;
+
+   if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
+       (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+      /* We're doing a plain-as-can-be copy, so emit a copy_var */
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+      ir->lhs->accept(this);
+      copy->variables[0] = this->deref_head;
+
+      ir->rhs->accept(this);
+      copy->variables[1] = this->deref_head;
+
+
+      if (ir->condition) {
+         nir_if *if_stmt = nir_if_create(this->shader);
+         if_stmt->condition = evaluate_rvalue(ir->condition);
+         nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+         nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+      } else {
+         nir_instr_insert_after_cf_list(this->cf_node_list, &copy->instr);
+      }
+      return;
+   }
+
+   assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
+
+   ir->lhs->accept(this);
+   nir_deref_var *lhs_deref = this->deref_head;
+   nir_src src = evaluate_rvalue(ir->rhs);
+
+   if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
+      /*
+       * We have no good way to update only part of a variable, so just load
+       * the LHS and do a vec operation to combine the old with the new, and
+       * then store it
+       * back into the LHS. Copy propagation should get rid of the mess.
+       */
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load->num_components = ir->lhs->type->vector_elements;
+      nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+      load->variables[0] = lhs_deref;
+      nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
+
+      nir_op vec_op;
+      switch (ir->lhs->type->vector_elements) {
+         case 1: vec_op = nir_op_imov; break;
+         case 2: vec_op = nir_op_vec2; break;
+         case 3: vec_op = nir_op_vec3; break;
+         case 4: vec_op = nir_op_vec4; break;
+         default: unreachable("Invalid number of components"); break;
+      }
+      nir_alu_instr *vec = nir_alu_instr_create(this->shader, vec_op);
+      nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+      vec->dest.write_mask = (1 << num_components) - 1;
+
+      unsigned component = 0;
+      for (unsigned i = 0; i < ir->lhs->type->vector_elements; i++) {
+         if (ir->write_mask & (1 << i)) {
+            vec->src[i].src = src;
+
+            /* GLSL IR will give us the input to the write-masked assignment
+             * in a single packed vector.  So, for example, if the
+             * writemask is xzw, then we have to swizzle x -> x, y -> z,
+             * and z -> w and get the y component from the load.
+             */
+            vec->src[i].swizzle[0] = component++;
+         } else {
+            vec->src[i].src.is_ssa = true;
+            vec->src[i].src.ssa = &load->dest.ssa;
+            vec->src[i].swizzle[0] = i;
+         }
+      }
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &vec->instr);
+
+      src.is_ssa = true;
+      src.ssa = &vec->dest.dest.ssa;
+   }
+
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+   store->num_components = ir->lhs->type->vector_elements;
+   nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+   store->variables[0] = nir_deref_as_var(store_deref);
+   store->src[0] = src;
+
+   if (ir->condition) {
+      nir_if *if_stmt = nir_if_create(this->shader);
+      if_stmt->condition = evaluate_rvalue(ir->condition);
+      nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+      nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+   } else {
+      nir_instr_insert_after_cf_list(this->cf_node_list, &store->instr);
+   }
+}
+
+/*
+ * Given an instruction, returns a pointer to its destination or NULL if there
+ * is no destination.
+ *
+ * Note that this only handles instructions we generate at this level.
+ */
+static nir_dest *
+get_instr_dest(nir_instr *instr)
+{
+   nir_alu_instr *alu_instr;
+   nir_intrinsic_instr *intrinsic_instr;
+   nir_tex_instr *tex_instr;
+
+   switch (instr->type) {
+      case nir_instr_type_alu:
+         alu_instr = nir_instr_as_alu(instr);
+         return &alu_instr->dest.dest;
+
+      case nir_instr_type_intrinsic:
+         intrinsic_instr = nir_instr_as_intrinsic(instr);
+         if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
+            return &intrinsic_instr->dest;
+         else
+            return NULL;
+
+      case nir_instr_type_tex:
+         tex_instr = nir_instr_as_tex(instr);
+         return &tex_instr->dest;
+
+      default:
+         unreachable("not reached");
+   }
+
+   return NULL;
+}
+
+void
+nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+{
+   nir_dest *dest = get_instr_dest(instr);
+
+   nir_ssa_dest_init(instr, dest, num_components, NULL);
+
+   nir_instr_insert_after_cf_list(this->cf_node_list, instr);
+   this->result = instr;
+}
+
+nir_src
+nir_visitor::evaluate_rvalue(ir_rvalue* ir)
+{
+   ir->accept(this);
+   if (ir->as_dereference() || ir->as_constant()) {
+      /*
+       * A dereference is being used on the right hand side, which means we
+       * must emit a variable load.
+       */
+
+      nir_intrinsic_instr *load_instr =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load_instr->num_components = ir->type->vector_elements;
+      load_instr->variables[0] = this->deref_head;
+      add_instr(&load_instr->instr, ir->type->vector_elements);
+   }
+
+   nir_dest *dest = get_instr_dest(this->result);
+
+   assert(dest->is_ssa);
+   nir_src src;
+   src.is_ssa = true;
+   src.ssa = &dest->ssa;
+
+   return src;
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src *srcs)
+{
+   nir_alu_instr *instr = nir_alu_instr_create(this->shader, op);
+   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
+      instr->src[i].src = srcs[i];
+   instr->dest.write_mask = (1 << dest_size) - 1;
+   add_instr(&instr->instr, dest_size);
+   return instr;
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1)
+{
+   assert(nir_op_infos[op].num_inputs == 1);
+   return emit(op, dest_size, &src1);
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
+                  nir_src src2)
+{
+   assert(nir_op_infos[op].num_inputs == 2);
+   nir_src srcs[] = { src1, src2 };
+   return emit(op, dest_size, srcs);
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
+                  nir_src src2, nir_src src3)
+{
+   assert(nir_op_infos[op].num_inputs == 3);
+   nir_src srcs[] = { src1, src2, src3 };
+   return emit(op, dest_size, srcs);
+}
+
+void
+nir_visitor::visit(ir_expression *ir)
+{
+   /* Some special cases */
+   switch (ir->operation) {
+   case ir_binop_ubo_load: {
+      ir_constant *const_index = ir->operands[1]->as_constant();
+
+      nir_intrinsic_op op;
+      if (const_index) {
+         op = nir_intrinsic_load_ubo;
+      } else {
+         op = nir_intrinsic_load_ubo_indirect;
+      }
+      nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+      load->num_components = ir->type->vector_elements;
+      load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
+      load->const_index[1] = 1; /* number of vec4's */
+      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      if (!const_index)
+         load->src[1] = evaluate_rvalue(ir->operands[1]);
+      add_instr(&load->instr, ir->type->vector_elements);
+
+      /*
+       * In UBO's, a true boolean value is any non-zero value, but we consider
+       * a true boolean to be ~0. Fix this up with a != 0 comparison.
+       */
+
+      if (ir->type->base_type == GLSL_TYPE_BOOL) {
+         nir_load_const_instr *const_zero = nir_load_const_instr_create(shader, 1);
+         const_zero->value.u[0] = 0;
+         nir_instr_insert_after_cf_list(this->cf_node_list, &const_zero->instr);
+
+         nir_alu_instr *compare = nir_alu_instr_create(shader, nir_op_ine);
+         compare->src[0].src.is_ssa = true;
+         compare->src[0].src.ssa = &load->dest.ssa;
+         compare->src[1].src.is_ssa = true;
+         compare->src[1].src.ssa = &const_zero->def;
+         for (unsigned i = 0; i < ir->type->vector_elements; i++)
+            compare->src[1].swizzle[i] = 0;
+         compare->dest.write_mask = (1 << ir->type->vector_elements) - 1;
+
+         add_instr(&compare->instr, ir->type->vector_elements);
+      }
+
+      return;
+   }
+
+   case ir_unop_interpolate_at_centroid:
+   case ir_binop_interpolate_at_offset:
+   case ir_binop_interpolate_at_sample: {
+      ir_dereference *deref = ir->operands[0]->as_dereference();
+      ir_swizzle *swizzle = NULL;
+      if (!deref) {
+         /* the api does not allow a swizzle here, but the varying packing code
+          * may have pushed one into here.
+          */
+         swizzle = ir->operands[0]->as_swizzle();
+         assert(swizzle);
+         deref = swizzle->val->as_dereference();
+         assert(deref);
+      }
+
+      deref->accept(this);
+
+      nir_intrinsic_op op;
+      if (this->deref_head->var->data.mode == nir_var_shader_in) {
+         switch (ir->operation) {
+         case ir_unop_interpolate_at_centroid:
+            op = nir_intrinsic_interp_var_at_centroid;
+            break;
+         case ir_binop_interpolate_at_offset:
+            op = nir_intrinsic_interp_var_at_offset;
+            break;
+         case ir_binop_interpolate_at_sample:
+            op = nir_intrinsic_interp_var_at_sample;
+            break;
+         default:
+            unreachable("Invalid interpolation intrinsic");
+         }
+      } else {
+         /* This case can happen if the vertex shader does not write the
+          * given varying.  In this case, the linker will lower it to a
+          * global variable.  Since interpolating a variable makes no
+          * sense, we'll just turn it into a load which will probably
+          * eventually end up as an SSA definition.
+          */
+         assert(this->deref_head->var->data.mode == nir_var_global);
+         op = nir_intrinsic_load_var;
+      }
+
+      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
+      intrin->num_components = deref->type->vector_elements;
+      intrin->variables[0] = this->deref_head;
+
+      if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
+          intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
+         intrin->src[0] = evaluate_rvalue(ir->operands[1]);
+
+      add_instr(&intrin->instr, deref->type->vector_elements);
+
+      if (swizzle) {
+         nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+         mov->dest.write_mask = (1 << swizzle->type->vector_elements) - 1;
+         mov->src[0].src.is_ssa = true;
+         mov->src[0].src.ssa = &intrin->dest.ssa;
+
+         mov->src[0].swizzle[0] = swizzle->mask.x;
+         mov->src[0].swizzle[1] = swizzle->mask.y;
+         mov->src[0].swizzle[2] = swizzle->mask.z;
+         mov->src[0].swizzle[3] = swizzle->mask.w;
+         for (unsigned i = deref->type->vector_elements; i < 4; i++)
+            mov->src[0].swizzle[i] = 0;
+
+         add_instr(&mov->instr, swizzle->type->vector_elements);
+      }
+
+      return;
+   }
+
+   default:
+      break;
+   }
+
+   nir_src srcs[4];
+   for (unsigned i = 0; i < ir->get_num_operands(); i++)
+      srcs[i] = evaluate_rvalue(ir->operands[i]);
+
+   glsl_base_type types[4];
+   for (unsigned i = 0; i < ir->get_num_operands(); i++)
+      if (supports_ints)
+         types[i] = ir->operands[i]->type->base_type;
+      else
+         types[i] = GLSL_TYPE_FLOAT;
+
+   glsl_base_type out_type;
+   if (supports_ints)
+      out_type = ir->type->base_type;
+   else
+      out_type = GLSL_TYPE_FLOAT;
+
+   unsigned dest_size = ir->type->vector_elements;
+
+   nir_alu_instr *instr;
+   nir_op op;
+
+   switch (ir->operation) {
+   case ir_unop_bit_not: emit(nir_op_inot, dest_size, srcs); break;
+   case ir_unop_logic_not:
+      emit(supports_ints ? nir_op_inot : nir_op_fnot, dest_size, srcs);
+      break;
+   case ir_unop_neg:
+      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fneg : nir_op_ineg,
+                   dest_size, srcs);
+      break;
+   case ir_unop_abs:
+      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fabs : nir_op_iabs,
+                   dest_size, srcs);
+      break;
+   case ir_unop_saturate:
+      assert(types[0] == GLSL_TYPE_FLOAT);
+      instr = emit(nir_op_fsat, dest_size, srcs);
+      break;
+   case ir_unop_sign:
+      emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fsign : nir_op_isign,
+           dest_size, srcs);
+      break;
+   case ir_unop_rcp:  emit(nir_op_frcp, dest_size, srcs);  break;
+   case ir_unop_rsq:  emit(nir_op_frsq, dest_size, srcs);  break;
+   case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break;
+   case ir_unop_exp:  emit(nir_op_fexp, dest_size, srcs);  break;
+   case ir_unop_log:  emit(nir_op_flog, dest_size, srcs);  break;
+   case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break;
+   case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break;
+   case ir_unop_i2f:
+      emit(supports_ints ? nir_op_i2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_u2f:
+      emit(supports_ints ? nir_op_u2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_b2f:
+      emit(supports_ints ? nir_op_b2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_f2i:  emit(nir_op_f2i, dest_size, srcs);   break;
+   case ir_unop_f2u:  emit(nir_op_f2u, dest_size, srcs);   break;
+   case ir_unop_f2b:  emit(nir_op_f2b, dest_size, srcs);   break;
+   case ir_unop_i2b:  emit(nir_op_i2b, dest_size, srcs);   break;
+   case ir_unop_b2i:  emit(nir_op_b2i, dest_size, srcs);   break;
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_bitcast_i2f:
+   case ir_unop_bitcast_f2i:
+   case ir_unop_bitcast_u2f:
+   case ir_unop_bitcast_f2u:
+      /* no-op */
+      emit(nir_op_imov, dest_size, srcs);
+      break;
+   case ir_unop_any:
+      switch (ir->operands[0]->type->vector_elements) {
+      case 2:
+         emit(supports_ints ? nir_op_bany2 : nir_op_fany2,
+              dest_size, srcs);
+         break;
+      case 3:
+         emit(supports_ints ? nir_op_bany3 : nir_op_fany3,
+              dest_size, srcs);
+         break;
+      case 4:
+         emit(supports_ints ? nir_op_bany4 : nir_op_fany4,
+              dest_size, srcs);
+         break;
+      default:
+         unreachable("not reached");
+      }
+      break;
+   case ir_unop_trunc: emit(nir_op_ftrunc, dest_size, srcs); break;
+   case ir_unop_ceil:  emit(nir_op_fceil,  dest_size, srcs); break;
+   case ir_unop_floor: emit(nir_op_ffloor, dest_size, srcs); break;
+   case ir_unop_fract: emit(nir_op_ffract, dest_size, srcs); break;
+   case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
+   case ir_unop_sin:   emit(nir_op_fsin,   dest_size, srcs); break;
+   case ir_unop_cos:   emit(nir_op_fcos,   dest_size, srcs); break;
+   case ir_unop_sin_reduced:
+      emit(nir_op_fsin_reduced, dest_size, srcs);
+      break;
+   case ir_unop_cos_reduced:
+      emit(nir_op_fcos_reduced, dest_size, srcs);
+      break;
+   case ir_unop_dFdx:        emit(nir_op_fddx,        dest_size, srcs); break;
+   case ir_unop_dFdy:        emit(nir_op_fddy,        dest_size, srcs); break;
+   case ir_unop_dFdx_fine:   emit(nir_op_fddx_fine,   dest_size, srcs); break;
+   case ir_unop_dFdy_fine:   emit(nir_op_fddy_fine,   dest_size, srcs); break;
+   case ir_unop_dFdx_coarse: emit(nir_op_fddx_coarse, dest_size, srcs); break;
+   case ir_unop_dFdy_coarse: emit(nir_op_fddy_coarse, dest_size, srcs); break;
+   case ir_unop_pack_snorm_2x16:
+      emit(nir_op_pack_snorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_pack_snorm_4x8:
+      emit(nir_op_pack_snorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_pack_unorm_2x16:
+      emit(nir_op_pack_unorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_pack_unorm_4x8:
+      emit(nir_op_pack_unorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_pack_half_2x16:
+      emit(nir_op_pack_half_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_snorm_2x16:
+      emit(nir_op_unpack_snorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_snorm_4x8:
+      emit(nir_op_unpack_snorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_unpack_unorm_2x16:
+      emit(nir_op_unpack_unorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_unorm_4x8:
+      emit(nir_op_unpack_unorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16:
+      emit(nir_op_unpack_half_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+      emit(nir_op_unpack_half_2x16_split_x, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16_split_y:
+      emit(nir_op_unpack_half_2x16_split_y, dest_size, srcs);
+      break;
+   case ir_unop_bitfield_reverse:
+      emit(nir_op_bitfield_reverse, dest_size, srcs);
+      break;
+   case ir_unop_bit_count:
+      emit(nir_op_bit_count, dest_size, srcs);
+      break;
+   case ir_unop_find_msb:
+      switch (types[0]) {
+      case GLSL_TYPE_UINT:
+         emit(nir_op_ufind_msb, dest_size, srcs);
+         break;
+      case GLSL_TYPE_INT:
+         emit(nir_op_ifind_msb, dest_size, srcs);
+         break;
+      default:
+         unreachable("Invalid type for findMSB()");
+      }
+      break;
+   case ir_unop_find_lsb:
+      emit(nir_op_find_lsb,  dest_size, srcs);
+      break;
+
+   case ir_unop_noise:
+      switch (ir->type->vector_elements) {
+      case 1:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise1_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise1_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise1_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise1_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 2:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise2_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise2_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise2_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise2_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 3:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise3_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise3_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise3_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise3_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 4:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise4_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise4_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise4_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise4_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      default:
+         unreachable("not reached");
+      }
+      break;
+   case ir_binop_add:
+   case ir_binop_sub:
+   case ir_binop_mul:
+   case ir_binop_div:
+   case ir_binop_mod:
+   case ir_binop_min:
+   case ir_binop_max:
+   case ir_binop_pow:
+   case ir_binop_bit_and:
+   case ir_binop_bit_or:
+   case ir_binop_bit_xor:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+      switch (ir->operation) {
+      case ir_binop_add:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fadd;
+         else
+            op = nir_op_iadd;
+         break;
+      case ir_binop_sub:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fsub;
+         else
+            op = nir_op_isub;
+         break;
+      case ir_binop_mul:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmul;
+         else
+            op = nir_op_imul;
+         break;
+      case ir_binop_div:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fdiv;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_idiv;
+         else
+            op = nir_op_udiv;
+         break;
+      case ir_binop_mod:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmod;
+         else
+            op = nir_op_umod;
+         break;
+      case ir_binop_min:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmin;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_imin;
+         else
+            op = nir_op_umin;
+         break;
+      case ir_binop_max:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmax;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_imax;
+         else
+            op = nir_op_umax;
+         break;
+      case ir_binop_bit_and:
+         op = nir_op_iand;
+         break;
+      case ir_binop_bit_or:
+         op = nir_op_ior;
+         break;
+      case ir_binop_bit_xor:
+         op = nir_op_ixor;
+         break;
+      case ir_binop_lshift:
+         op = nir_op_ishl;
+         break;
+      case ir_binop_rshift:
+         if (out_type == GLSL_TYPE_INT)
+            op = nir_op_ishr;
+         else
+            op = nir_op_ushr;
+         break;
+      case ir_binop_pow:
+         op = nir_op_fpow;
+         break;
+
+      default:
+         unreachable("not reached");
+      }
+
+      instr = emit(op, dest_size, srcs);
+
+      if (ir->operands[0]->type->vector_elements != 1 &&
+          ir->operands[1]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
+              i++) {
+            instr->src[1].swizzle[i] = 0;
+         }
+      }
+
+      if (ir->operands[1]->type->vector_elements != 1 &&
+          ir->operands[0]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[1]->type->vector_elements;
+              i++) {
+            instr->src[0].swizzle[i] = 0;
+         }
+      }
+
+      break;
+   case ir_binop_imul_high:
+      emit(out_type == GLSL_TYPE_UINT ? nir_op_umul_high : nir_op_imul_high,
+           dest_size, srcs);
+      break;
+   case ir_binop_carry:  emit(nir_op_uadd_carry, dest_size, srcs);  break;
+   case ir_binop_borrow: emit(nir_op_usub_borrow, dest_size, srcs); break;
+   case ir_binop_less:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_flt, dest_size, srcs);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ilt, dest_size, srcs);
+         else
+            emit(nir_op_ult, dest_size, srcs);
+      } else {
+         emit(nir_op_slt, dest_size, srcs);
+      }
+      break;
+   case ir_binop_greater:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_flt, dest_size, srcs[1], srcs[0]);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ilt, dest_size, srcs[1], srcs[0]);
+         else
+            emit(nir_op_ult, dest_size, srcs[1], srcs[0]);
+      } else {
+         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+      }
+      break;
+   case ir_binop_lequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fge, dest_size, srcs[1], srcs[0]);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ige, dest_size, srcs[1], srcs[0]);
+         else
+            emit(nir_op_uge, dest_size, srcs[1], srcs[0]);
+      } else {
+         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+      }
+      break;
+   case ir_binop_gequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fge, dest_size, srcs);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ige, dest_size, srcs);
+         else
+            emit(nir_op_uge, dest_size, srcs);
+      } else {
+         emit(nir_op_slt, dest_size, srcs);
+      }
+      break;
+   case ir_binop_equal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_feq, dest_size, srcs);
+         else
+            emit(nir_op_ieq, dest_size, srcs);
+      } else {
+         emit(nir_op_seq, dest_size, srcs);
+      }
+      break;
+   case ir_binop_nequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fne, dest_size, srcs);
+         else
+            emit(nir_op_ine, dest_size, srcs);
+      } else {
+         emit(nir_op_sne, dest_size, srcs);
+      }
+      break;
+   case ir_binop_all_equal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT) {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_feq, dest_size, srcs); break;
+               case 2: emit(nir_op_ball_fequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_ball_fequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_ball_fequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         } else {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_ieq, dest_size, srcs); break;
+               case 2: emit(nir_op_ball_iequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_ball_iequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_ball_iequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         }
+      } else {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_seq, dest_size, srcs); break;
+            case 2: emit(nir_op_fall_equal2, dest_size, srcs); break;
+            case 3: emit(nir_op_fall_equal3, dest_size, srcs); break;
+            case 4: emit(nir_op_fall_equal4, dest_size, srcs); break;
+            default:
+               unreachable("not reached");
+         }
+      }
+      break;
+   case ir_binop_any_nequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT) {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_fne, dest_size, srcs); break;
+               case 2: emit(nir_op_bany_fnequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_bany_fnequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_bany_fnequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         } else {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_ine, dest_size, srcs); break;
+               case 2: emit(nir_op_bany_inequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_bany_inequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_bany_inequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         }
+      } else {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_sne, dest_size, srcs); break;
+            case 2: emit(nir_op_fany_nequal2, dest_size, srcs); break;
+            case 3: emit(nir_op_fany_nequal3, dest_size, srcs); break;
+            case 4: emit(nir_op_fany_nequal4, dest_size, srcs); break;
+            default:
+               unreachable("not reached");
+         }
+      }
+      break;
+   case ir_binop_logic_and:
+      if (supports_ints)
+         emit(nir_op_iand, dest_size, srcs);
+      else
+         emit(nir_op_fand, dest_size, srcs);
+      break;
+   case ir_binop_logic_or:
+      if (supports_ints)
+         emit(nir_op_ior, dest_size, srcs);
+      else
+         emit(nir_op_for, dest_size, srcs);
+      break;
+   case ir_binop_logic_xor:
+      if (supports_ints)
+         emit(nir_op_ixor, dest_size, srcs);
+      else
+         emit(nir_op_fxor, dest_size, srcs);
+      break;
+   case ir_binop_dot:
+      switch (ir->operands[0]->type->vector_elements) {
+         case 2: emit(nir_op_fdot2, dest_size, srcs); break;
+         case 3: emit(nir_op_fdot3, dest_size, srcs); break;
+         case 4: emit(nir_op_fdot4, dest_size, srcs); break;
+         default:
+            unreachable("not reached");
+      }
+      break;
+
+   case ir_binop_pack_half_2x16_split:
+         emit(nir_op_pack_half_2x16_split, dest_size, srcs);
+         break;
+   case ir_binop_bfm:   emit(nir_op_bfm, dest_size, srcs);   break;
+   case ir_binop_ldexp: emit(nir_op_ldexp, dest_size, srcs); break;
+   case ir_triop_fma:   emit(nir_op_ffma, dest_size, srcs);  break;
+   case ir_triop_lrp:
+      instr = emit(nir_op_flrp, dest_size, srcs);
+      if (ir->operands[0]->type->vector_elements != 1 &&
+          ir->operands[2]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
+              i++) {
+            instr->src[2].swizzle[i] = 0;
+         }
+      }
+      break;
+   case ir_triop_csel:
+      if (supports_ints)
+         emit(nir_op_bcsel, dest_size, srcs);
+      else
+         emit(nir_op_fcsel, dest_size, srcs);
+      break;
+   case ir_triop_bfi:
+      instr = emit(nir_op_bfi, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[1]->type->vector_elements; i++) {
+         instr->src[0].swizzle[i] = 0;
+      }
+      break;
+   case ir_triop_bitfield_extract:
+      instr = emit(out_type == GLSL_TYPE_INT ? nir_op_ibitfield_extract :
+                   nir_op_ubitfield_extract, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
+         instr->src[1].swizzle[i] = 0;
+         instr->src[2].swizzle[i] = 0;
+      }
+      break;
+   case ir_quadop_bitfield_insert:
+      instr = emit(nir_op_bitfield_insert, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
+         instr->src[2].swizzle[i] = 0;
+         instr->src[3].swizzle[i] = 0;
+      }
+      break;
+   case ir_quadop_vector:
+      switch (ir->type->vector_elements) {
+         case 2: emit(nir_op_vec2, dest_size, srcs); break;
+         case 3: emit(nir_op_vec3, dest_size, srcs); break;
+         case 4: emit(nir_op_vec4, dest_size, srcs); break;
+         default: unreachable("not reached");
+      }
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+}
+
+void
+nir_visitor::visit(ir_swizzle *ir)
+{
+   nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov,
+                               ir->type->vector_elements,
+                               evaluate_rvalue(ir->val));
+
+   unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
+   for (unsigned i = 0; i < ir->type->vector_elements; i++)
+      instr->src[0].swizzle[i] = swizzle[i];
+}
+
+void
+nir_visitor::visit(ir_texture *ir)
+{
+   unsigned num_srcs;
+   nir_texop op;
+   switch (ir->op) {
+   case ir_tex:
+      op = nir_texop_tex;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_txb:
+   case ir_txl:
+      op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
+      num_srcs = 2; /* coordinate, bias/lod */
+      break;
+
+   case ir_txd:
+      op = nir_texop_txd; /* coordinate, dPdx, dPdy */
+      num_srcs = 3;
+      break;
+
+   case ir_txf:
+      op = nir_texop_txf;
+      if (ir->lod_info.lod != NULL)
+         num_srcs = 2; /* coordinate, lod */
+      else
+         num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_txf_ms:
+      op = nir_texop_txf_ms;
+      num_srcs = 2; /* coordinate, sample_index */
+      break;
+
+   case ir_txs:
+      op = nir_texop_txs;
+      if (ir->lod_info.lod != NULL)
+         num_srcs = 1; /* lod */
+      else
+         num_srcs = 0;
+      break;
+
+   case ir_lod:
+      op = nir_texop_lod;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_tg4:
+      op = nir_texop_tg4;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_query_levels:
+      op = nir_texop_query_levels;
+      num_srcs = 0;
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   if (ir->projector != NULL)
+      num_srcs++;
+   if (ir->shadow_comparitor != NULL)
+      num_srcs++;
+   if (ir->offset != NULL && ir->offset->as_constant() == NULL)
+      num_srcs++;
+
+   nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
+
+   instr->op = op;
+   instr->sampler_dim =
+      (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
+   instr->is_array = ir->sampler->type->sampler_array;
+   instr->is_shadow = ir->sampler->type->sampler_shadow;
+   if (instr->is_shadow)
+      instr->is_new_style_shadow = (ir->type->vector_elements == 1);
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      instr->dest_type = nir_type_float;
+      break;
+   case GLSL_TYPE_INT:
+      instr->dest_type = nir_type_int;
+      break;
+   case GLSL_TYPE_UINT:
+      instr->dest_type = nir_type_unsigned;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   ir->sampler->accept(this);
+   instr->sampler = this->deref_head;
+
+   unsigned src_number = 0;
+
+   if (ir->coordinate != NULL) {
+      instr->coord_components = ir->coordinate->type->vector_elements;
+      instr->src[src_number].src = evaluate_rvalue(ir->coordinate);
+      instr->src[src_number].src_type = nir_tex_src_coord;
+      src_number++;
+   }
+
+   if (ir->projector != NULL) {
+      instr->src[src_number].src = evaluate_rvalue(ir->projector);
+      instr->src[src_number].src_type = nir_tex_src_projector;
+      src_number++;
+   }
+
+   if (ir->shadow_comparitor != NULL) {
+      instr->src[src_number].src = evaluate_rvalue(ir->shadow_comparitor);
+      instr->src[src_number].src_type = nir_tex_src_comparitor;
+      src_number++;
+   }
+
+   if (ir->offset != NULL) {
+      /* we don't support multiple offsets yet */
+      assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+
+      ir_constant *const_offset = ir->offset->as_constant();
+      if (const_offset != NULL) {
+         for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
+            instr->const_offset[i] = const_offset->value.i[i];
+      } else {
+         instr->src[src_number].src = evaluate_rvalue(ir->offset);
+         instr->src[src_number].src_type = nir_tex_src_offset;
+         src_number++;
+      }
+   }
+
+   switch (ir->op) {
+   case ir_txb:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.bias);
+      instr->src[src_number].src_type = nir_tex_src_bias;
+      src_number++;
+      break;
+
+   case ir_txl:
+   case ir_txf:
+   case ir_txs:
+      if (ir->lod_info.lod != NULL) {
+         instr->src[src_number].src = evaluate_rvalue(ir->lod_info.lod);
+         instr->src[src_number].src_type = nir_tex_src_lod;
+         src_number++;
+      }
+      break;
+
+   case ir_txd:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdx);
+      instr->src[src_number].src_type = nir_tex_src_ddx;
+      src_number++;
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdy);
+      instr->src[src_number].src_type = nir_tex_src_ddy;
+      src_number++;
+      break;
+
+   case ir_txf_ms:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.sample_index);
+      instr->src[src_number].src_type = nir_tex_src_ms_index;
+      src_number++;
+      break;
+
+   case ir_tg4:
+      instr->component = ir->lod_info.component->as_constant()->value.u[0];
+      break;
+
+   default:
+      break;
+   }
+
+   assert(src_number == num_srcs);
+
+   add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+}
+
+void
+nir_visitor::visit(ir_constant *ir)
+{
+   /*
+    * We don't know if this variable is an an array or struct that gets
+    * dereferenced, so do the safe thing an make it a variable with a
+    * constant initializer and return a dereference.
+    */
+
+   nir_variable *var = ralloc(this->shader, nir_variable);
+   var->name = ralloc_strdup(var, "const_temp");
+   var->type = ir->type;
+   var->data.mode = nir_var_local;
+   var->data.read_only = true;
+   var->constant_initializer = constant_copy(ir, var);
+   exec_list_push_tail(&this->impl->locals, &var->node);
+
+   this->deref_head = nir_deref_var_create(this->shader, var);
+   this->deref_tail = &this->deref_head->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_variable *ir)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->var_table, ir->var);
+   assert(entry);
+   nir_variable *var = (nir_variable *) entry->data;
+
+   nir_deref_var *deref = nir_deref_var_create(this->shader, var);
+   this->deref_head = deref;
+   this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_record *ir)
+{
+   ir->record->accept(this);
+
+   int field_index = this->deref_tail->type->field_index(ir->field);
+   assert(field_index >= 0);
+
+   nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+   deref->deref.type = ir->type;
+   this->deref_tail->child = &deref->deref;
+   this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_array *ir)
+{
+   nir_deref_array *deref = nir_deref_array_create(this->shader);
+   deref->deref.type = ir->type;
+
+   ir_constant *const_index = ir->array_index->as_constant();
+   if (const_index != NULL) {
+      deref->deref_array_type = nir_deref_array_type_direct;
+      deref->base_offset = const_index->value.u[0];
+   } else {
+      deref->deref_array_type = nir_deref_array_type_indirect;
+      deref->indirect = evaluate_rvalue(ir->array_index);
+   }
+
+   ir->array->accept(this);
+
+   this->deref_tail->child = &deref->deref;
+   this->deref_tail = &deref->deref;
+}
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.h b/mesalib/src/glsl/nir/glsl_to_nir.h
new file mode 100644
index 000000000..58b2cee6a
--- /dev/null
+++ b/mesalib/src/glsl/nir/glsl_to_nir.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "../glsl_parser_extras.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+nir_shader *glsl_to_nir(exec_list * ir, _mesa_glsl_parse_state *state,
+                        bool native_integers);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
new file mode 100644
index 000000000..5b0e4bc50
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir.c
@@ -0,0 +1,2085 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <assert.h>
+
+nir_shader *
+nir_shader_create(void *mem_ctx, const nir_shader_compiler_options *options)
+{
+   nir_shader *shader = ralloc(mem_ctx, nir_shader);
+
+   shader->uniforms = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                              _mesa_key_string_equal);
+   shader->inputs = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                            _mesa_key_string_equal);
+   shader->outputs = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                             _mesa_key_string_equal);
+
+   shader->options = options;
+
+   shader->num_user_structures = 0;
+   shader->user_structures = NULL;
+
+   exec_list_make_empty(&shader->functions);
+   exec_list_make_empty(&shader->registers);
+   exec_list_make_empty(&shader->globals);
+   exec_list_make_empty(&shader->system_values);
+   shader->reg_alloc = 0;
+
+   shader->num_inputs = 0;
+   shader->num_outputs = 0;
+   shader->num_uniforms = 0;
+
+   return shader;
+}
+
+static nir_register *
+reg_create(void *mem_ctx, struct exec_list *list)
+{
+   nir_register *reg = ralloc(mem_ctx, nir_register);
+
+   reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                   _mesa_key_pointer_equal);
+
+   reg->num_components = 0;
+   reg->num_array_elems = 0;
+   reg->is_packed = false;
+   reg->name = NULL;
+
+   exec_list_push_tail(list, &reg->node);
+
+   return reg;
+}
+
+nir_register *
+nir_global_reg_create(nir_shader *shader)
+{
+   nir_register *reg = reg_create(shader, &shader->registers);
+   reg->index = shader->reg_alloc++;
+   reg->is_global = true;
+
+   return reg;
+}
+
+nir_register *
+nir_local_reg_create(nir_function_impl *impl)
+{
+   nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
+   reg->index = impl->reg_alloc++;
+   reg->is_global = false;
+
+   return reg;
+}
+
+void
+nir_reg_remove(nir_register *reg)
+{
+   exec_node_remove(&reg->node);
+}
+
+nir_function *
+nir_function_create(nir_shader *shader, const char *name)
+{
+   nir_function *func = ralloc(shader, nir_function);
+
+   exec_list_push_tail(&shader->functions, &func->node);
+   exec_list_make_empty(&func->overload_list);
+   func->name = name;
+   func->shader = shader;
+
+   return func;
+}
+
+nir_function_overload *
+nir_function_overload_create(nir_function *func)
+{
+   void *mem_ctx = ralloc_parent(func);
+
+   nir_function_overload *overload = ralloc(mem_ctx, nir_function_overload);
+
+   overload->num_params = 0;
+   overload->params = NULL;
+   overload->return_type = glsl_void_type();
+   overload->impl = NULL;
+
+   exec_list_push_tail(&func->overload_list, &overload->node);
+   overload->function = func;
+
+   return overload;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
+{
+   dest->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      dest->ssa = src->ssa;
+   } else {
+      dest->reg.base_offset = src->reg.base_offset;
+      dest->reg.reg = src->reg.reg;
+      if (src->reg.indirect) {
+         dest->reg.indirect = ralloc(mem_ctx, nir_src);
+         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      } else {
+         dest->reg.indirect = NULL;
+      }
+   }
+}
+
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+{
+   dest->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      dest->ssa = src->ssa;
+   } else {
+      dest->reg.base_offset = src->reg.base_offset;
+      dest->reg.reg = src->reg.reg;
+      if (src->reg.indirect) {
+         dest->reg.indirect = ralloc(mem_ctx, nir_src);
+         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      } else {
+         dest->reg.indirect = NULL;
+      }
+   }
+}
+
+void
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+{
+   nir_src_copy(&dest->src, &src->src, mem_ctx);
+   dest->abs = src->abs;
+   dest->negate = src->negate;
+   for (unsigned i = 0; i < 4; i++)
+      dest->swizzle[i] = src->swizzle[i];
+}
+
+void
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+{
+   nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+   dest->write_mask = src->write_mask;
+   dest->saturate = src->saturate;
+}
+
+static inline void
+block_add_pred(nir_block *block, nir_block *pred)
+{
+   _mesa_set_add(block->predecessors, pred);
+}
+
+static void
+cf_init(nir_cf_node *node, nir_cf_node_type type)
+{
+   exec_node_init(&node->node);
+   node->parent = NULL;
+   node->type = type;
+}
+
+static void
+link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
+{
+   pred->successors[0] = succ1;
+   block_add_pred(succ1, pred);
+
+   pred->successors[1] = succ2;
+   if (succ2 != NULL)
+      block_add_pred(succ2, pred);
+}
+
+static void
+unlink_blocks(nir_block *pred, nir_block *succ)
+{
+   if (pred->successors[0] == succ) {
+      pred->successors[0] = pred->successors[1];
+      pred->successors[1] = NULL;
+   } else {
+      assert(pred->successors[1] == succ);
+      pred->successors[1] = NULL;
+   }
+
+   struct set_entry *entry = _mesa_set_search(succ->predecessors, pred);
+
+   assert(entry);
+
+   _mesa_set_remove(succ->predecessors, entry);
+}
+
+static void
+unlink_block_successors(nir_block *block)
+{
+   if (block->successors[0] != NULL)
+      unlink_blocks(block, block->successors[0]);
+   if (block->successors[1] != NULL)
+      unlink_blocks(block, block->successors[1]);
+}
+
+
+nir_function_impl *
+nir_function_impl_create(nir_function_overload *overload)
+{
+   assert(overload->impl == NULL);
+
+   void *mem_ctx = ralloc_parent(overload);
+
+   nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl);
+
+   overload->impl = impl;
+   impl->overload = overload;
+
+   cf_init(&impl->cf_node, nir_cf_node_function);
+
+   exec_list_make_empty(&impl->body);
+   exec_list_make_empty(&impl->registers);
+   exec_list_make_empty(&impl->locals);
+   impl->num_params = 0;
+   impl->params = NULL;
+   impl->return_var = NULL;
+   impl->reg_alloc = 0;
+   impl->ssa_alloc = 0;
+   impl->valid_metadata = nir_metadata_none;
+
+   /* create start & end blocks */
+   nir_block *start_block = nir_block_create(mem_ctx);
+   nir_block *end_block = nir_block_create(mem_ctx);
+   start_block->cf_node.parent = &impl->cf_node;
+   end_block->cf_node.parent = &impl->cf_node;
+   impl->start_block = start_block;
+   impl->end_block = end_block;
+
+   exec_list_push_tail(&impl->body, &start_block->cf_node.node);
+
+   start_block->successors[0] = end_block;
+   block_add_pred(end_block, start_block);
+
+   return impl;
+}
+
+nir_block *
+nir_block_create(void *mem_ctx)
+{
+   nir_block *block = ralloc(mem_ctx, nir_block);
+
+   cf_init(&block->cf_node, nir_cf_node_block);
+
+   block->successors[0] = block->successors[1] = NULL;
+   block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+   block->imm_dom = NULL;
+   block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+
+   exec_list_make_empty(&block->instr_list);
+
+   return block;
+}
+
+static inline void
+src_init(nir_src *src)
+{
+   src->is_ssa = false;
+   src->reg.reg = NULL;
+   src->reg.indirect = NULL;
+   src->reg.base_offset = 0;
+}
+
+nir_if *
+nir_if_create(void *mem_ctx)
+{
+   nir_if *if_stmt = ralloc(mem_ctx, nir_if);
+
+   cf_init(&if_stmt->cf_node, nir_cf_node_if);
+   src_init(&if_stmt->condition);
+
+   nir_block *then = nir_block_create(mem_ctx);
+   exec_list_make_empty(&if_stmt->then_list);
+   exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
+   then->cf_node.parent = &if_stmt->cf_node;
+
+   nir_block *else_stmt = nir_block_create(mem_ctx);
+   exec_list_make_empty(&if_stmt->else_list);
+   exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
+   else_stmt->cf_node.parent = &if_stmt->cf_node;
+
+   return if_stmt;
+}
+
+nir_loop *
+nir_loop_create(void *mem_ctx)
+{
+   nir_loop *loop = ralloc(mem_ctx, nir_loop);
+
+   cf_init(&loop->cf_node, nir_cf_node_loop);
+
+   nir_block *body = nir_block_create(mem_ctx);
+   exec_list_make_empty(&loop->body);
+   exec_list_push_tail(&loop->body, &body->cf_node.node);
+   body->cf_node.parent = &loop->cf_node;
+
+   body->successors[0] = body;
+   block_add_pred(body, body);
+
+   return loop;
+}
+
+static void
+instr_init(nir_instr *instr, nir_instr_type type)
+{
+   instr->type = type;
+   instr->block = NULL;
+   exec_node_init(&instr->node);
+}
+
+static void
+dest_init(nir_dest *dest)
+{
+   dest->is_ssa = false;
+   dest->reg.reg = NULL;
+   dest->reg.indirect = NULL;
+   dest->reg.base_offset = 0;
+}
+
+static void
+alu_dest_init(nir_alu_dest *dest)
+{
+   dest_init(&dest->dest);
+   dest->saturate = false;
+   dest->write_mask = 0xf;
+}
+
+static void
+alu_src_init(nir_alu_src *src)
+{
+   src_init(&src->src);
+   src->abs = src->negate = false;
+   src->swizzle[0] = 0;
+   src->swizzle[1] = 1;
+   src->swizzle[2] = 2;
+   src->swizzle[3] = 3;
+}
+
+nir_alu_instr *
+nir_alu_instr_create(void *mem_ctx, nir_op op)
+{
+   unsigned num_srcs = nir_op_infos[op].num_inputs;
+   nir_alu_instr *instr =
+      ralloc_size(mem_ctx,
+                  sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
+
+   instr_init(&instr->instr, nir_instr_type_alu);
+   instr->op = op;
+   alu_dest_init(&instr->dest);
+   for (unsigned i = 0; i < num_srcs; i++)
+      alu_src_init(&instr->src[i]);
+
+   return instr;
+}
+
+nir_jump_instr *
+nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+{
+   nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+   instr_init(&instr->instr, nir_instr_type_jump);
+   instr->type = type;
+   return instr;
+}
+
+nir_load_const_instr *
+nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+{
+   nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+   instr_init(&instr->instr, nir_instr_type_load_const);
+
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+   return instr;
+}
+
+nir_intrinsic_instr *
+nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+{
+   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
+   nir_intrinsic_instr *instr =
+      ralloc_size(mem_ctx,
+                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
+
+   instr_init(&instr->instr, nir_instr_type_intrinsic);
+   instr->intrinsic = op;
+
+   if (nir_intrinsic_infos[op].has_dest)
+      dest_init(&instr->dest);
+
+   for (unsigned i = 0; i < num_srcs; i++)
+      src_init(&instr->src[i]);
+
+   return instr;
+}
+
+nir_call_instr *
+nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+{
+   nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+   instr_init(&instr->instr, nir_instr_type_call);
+
+   instr->callee = callee;
+   instr->num_params = callee->num_params;
+   instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+   instr->return_deref = NULL;
+
+   return instr;
+}
+
+nir_tex_instr *
+nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+{
+   nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+   instr_init(&instr->instr, nir_instr_type_tex);
+
+   dest_init(&instr->dest);
+
+   instr->num_srcs = num_srcs;
+   instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+   for (unsigned i = 0; i < num_srcs; i++)
+      src_init(&instr->src[i].src);
+
+   instr->sampler_index = 0;
+   instr->sampler_array_size = 0;
+   instr->sampler = NULL;
+
+   return instr;
+}
+
+nir_phi_instr *
+nir_phi_instr_create(void *mem_ctx)
+{
+   nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+   instr_init(&instr->instr, nir_instr_type_phi);
+
+   dest_init(&instr->dest);
+   exec_list_make_empty(&instr->srcs);
+   return instr;
+}
+
+nir_parallel_copy_instr *
+nir_parallel_copy_instr_create(void *mem_ctx)
+{
+   nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+   instr_init(&instr->instr, nir_instr_type_parallel_copy);
+
+   exec_list_make_empty(&instr->entries);
+
+   return instr;
+}
+
+nir_ssa_undef_instr *
+nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+{
+   nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+   instr_init(&instr->instr, nir_instr_type_ssa_undef);
+
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+   return instr;
+}
+
+nir_deref_var *
+nir_deref_var_create(void *mem_ctx, nir_variable *var)
+{
+   nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
+   deref->deref.deref_type = nir_deref_type_var;
+   deref->deref.child = NULL;
+   deref->deref.type = var->type;
+   deref->var = var;
+   return deref;
+}
+
+nir_deref_array *
+nir_deref_array_create(void *mem_ctx)
+{
+   nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
+   deref->deref.deref_type = nir_deref_type_array;
+   deref->deref.child = NULL;
+   deref->deref_array_type = nir_deref_array_type_direct;
+   src_init(&deref->indirect);
+   deref->base_offset = 0;
+   return deref;
+}
+
+nir_deref_struct *
+nir_deref_struct_create(void *mem_ctx, unsigned field_index)
+{
+   nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
+   deref->deref.deref_type = nir_deref_type_struct;
+   deref->deref.child = NULL;
+   deref->index = field_index;
+   return deref;
+}
+
+static nir_deref_var *
+copy_deref_var(void *mem_ctx, nir_deref_var *deref)
+{
+   nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+static nir_deref_array *
+copy_deref_array(void *mem_ctx, nir_deref_array *deref)
+{
+   nir_deref_array *ret = nir_deref_array_create(mem_ctx);
+   ret->base_offset = deref->base_offset;
+   ret->deref_array_type = deref->deref_array_type;
+   if (deref->deref_array_type == nir_deref_array_type_indirect) {
+      nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
+   }
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+static nir_deref_struct *
+copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
+{
+   nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+nir_deref *
+nir_copy_deref(void *mem_ctx, nir_deref *deref)
+{
+   switch (deref->deref_type) {
+   case nir_deref_type_var:
+      return &copy_deref_var(mem_ctx, nir_deref_as_var(deref))->deref;
+   case nir_deref_type_array:
+      return &copy_deref_array(mem_ctx, nir_deref_as_array(deref))->deref;
+   case nir_deref_type_struct:
+      return &copy_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref;
+   default:
+      unreachable("Invalid dereference type");
+   }
+
+   return NULL;
+}
+
+
+/**
+ * \name Control flow modification
+ *
+ * These functions modify the control flow tree while keeping the control flow
+ * graph up-to-date. The invariants respected are:
+ * 1. Each then statement, else statement, or loop body must have at least one
+ *    control flow node.
+ * 2. Each if-statement and loop must have one basic block before it and one
+ *    after.
+ * 3. Two basic blocks cannot be directly next to each other.
+ * 4. If a basic block has a jump instruction, there must be only one and it
+ *    must be at the end of the block.
+ * 5. The CFG must always be connected - this means that we must insert a fake
+ *    CFG edge for loops with no break statement.
+ *
+ * The purpose of the second one is so that we have places to insert code during
+ * GCM, as well as eliminating the possibility of critical edges.
+ */
+/*@{*/
+
+static void
+link_non_block_to_block(nir_cf_node *node, nir_block *block)
+{
+   if (node->type == nir_cf_node_if) {
+      /*
+       * We're trying to link an if to a block after it; this just means linking
+       * the last block of the then and else branches.
+       */
+
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+
+      nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
+      assert(last_then->type == nir_cf_node_block);
+      nir_block *last_then_block = nir_cf_node_as_block(last_then);
+
+      nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
+      assert(last_else->type == nir_cf_node_block);
+      nir_block *last_else_block = nir_cf_node_as_block(last_else);
+
+      if (exec_list_is_empty(&last_then_block->instr_list) ||
+          nir_block_last_instr(last_then_block)->type != nir_instr_type_jump) {
+         unlink_block_successors(last_then_block);
+         link_blocks(last_then_block, block, NULL);
+      }
+
+      if (exec_list_is_empty(&last_else_block->instr_list) ||
+          nir_block_last_instr(last_else_block)->type != nir_instr_type_jump) {
+         unlink_block_successors(last_else_block);
+         link_blocks(last_else_block, block, NULL);
+      }
+   } else {
+      assert(node->type == nir_cf_node_loop);
+
+      /*
+       * We can only get to this codepath if we're inserting a new loop, or
+       * at least a loop with no break statements; we can't insert break
+       * statements into a loop when we haven't inserted it into the CFG
+       * because we wouldn't know which block comes after the loop
+       * and therefore, which block should be the successor of the block with
+       * the break). Therefore, we need to insert a fake edge (see invariant
+       * #5).
+       */
+
+      nir_loop *loop = nir_cf_node_as_loop(node);
+
+      nir_cf_node *last = nir_loop_last_cf_node(loop);
+      assert(last->type == nir_cf_node_block);
+      nir_block *last_block =  nir_cf_node_as_block(last);
+
+      last_block->successors[1] = block;
+      block_add_pred(block, last_block);
+   }
+}
+
+static void
+link_block_to_non_block(nir_block *block, nir_cf_node *node)
+{
+   if (node->type == nir_cf_node_if) {
+      /*
+       * We're trying to link a block to an if after it; this just means linking
+       * the block to the first block of the then and else branches.
+       */
+
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+
+      nir_cf_node *first_then = nir_if_first_then_node(if_stmt);
+      assert(first_then->type == nir_cf_node_block);
+      nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+      nir_cf_node *first_else = nir_if_first_else_node(if_stmt);
+      assert(first_else->type == nir_cf_node_block);
+      nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+      unlink_block_successors(block);
+      link_blocks(block, first_then_block, first_else_block);
+   } else {
+      /*
+       * For similar reasons as the corresponding case in
+       * link_non_block_to_block(), don't worry about if the loop header has
+       * any predecessors that need to be unlinked.
+       */
+
+      assert(node->type == nir_cf_node_loop);
+
+      nir_loop *loop = nir_cf_node_as_loop(node);
+
+      nir_cf_node *loop_header = nir_loop_first_cf_node(loop);
+      assert(loop_header->type == nir_cf_node_block);
+      nir_block *loop_header_block = nir_cf_node_as_block(loop_header);
+
+      unlink_block_successors(block);
+      link_blocks(block, loop_header_block, NULL);
+   }
+
+}
+
+/**
+ * Takes a basic block and inserts a new empty basic block before it, making its
+ * predecessors point to the new block. This essentially splits the block into
+ * an empty header and a body so that another non-block CF node can be inserted
+ * between the two. Note that this does *not* link the two basic blocks, so
+ * some kind of cleanup *must* be performed after this call.
+ */
+
+static nir_block *
+split_block_beginning(nir_block *block)
+{
+   nir_block *new_block = nir_block_create(ralloc_parent(block));
+   new_block->cf_node.parent = block->cf_node.parent;
+   exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node);
+
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      unlink_blocks(pred, block);
+      link_blocks(pred, new_block, NULL);
+   }
+
+   return new_block;
+}
+
+static void
+rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_foreach_phi_src(phi, src) {
+         if (src->pred == old_pred) {
+            src->pred = new_pred;
+            break;
+         }
+      }
+   }
+}
+
+/**
+ * Moves the successors of source to the successors of dest, leaving both
+ * successors of source NULL.
+ */
+
+static void
+move_successors(nir_block *source, nir_block *dest)
+{
+   nir_block *succ1 = source->successors[0];
+   nir_block *succ2 = source->successors[1];
+
+   if (succ1) {
+      unlink_blocks(source, succ1);
+      rewrite_phi_preds(succ1, source, dest);
+   }
+
+   if (succ2) {
+      unlink_blocks(source, succ2);
+      rewrite_phi_preds(succ2, source, dest);
+   }
+
+   unlink_block_successors(dest);
+   link_blocks(dest, succ1, succ2);
+}
+
+static nir_block *
+split_block_end(nir_block *block)
+{
+   nir_block *new_block = nir_block_create(ralloc_parent(block));
+   new_block->cf_node.parent = block->cf_node.parent;
+   exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node);
+
+   move_successors(block, new_block);
+
+   return new_block;
+}
+
+/**
+ * Inserts a non-basic block between two basic blocks and links them together.
+ */
+
+static void
+insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after)
+{
+   node->parent = before->cf_node.parent;
+   exec_node_insert_after(&before->cf_node.node, &node->node);
+   link_block_to_non_block(before, node);
+   link_non_block_to_block(node, after);
+}
+
+/**
+ * Inserts a non-basic block before a basic block.
+ */
+
+static void
+insert_non_block_before_block(nir_cf_node *node, nir_block *block)
+{
+   /* split off the beginning of block into new_block */
+   nir_block *new_block = split_block_beginning(block);
+
+   /* insert our node in between new_block and block */
+   insert_non_block(new_block, node, block);
+}
+
+static void
+insert_non_block_after_block(nir_block *block, nir_cf_node *node)
+{
+   /* split off the end of block into new_block */
+   nir_block *new_block = split_block_end(block);
+
+   /* insert our node in between block and new_block */
+   insert_non_block(block, node, new_block);
+}
+
+/* walk up the control flow tree to find the innermost enclosed loop */
+static nir_loop *
+nearest_loop(nir_cf_node *node)
+{
+   while (node->type != nir_cf_node_loop) {
+      node = node->parent;
+   }
+
+   return nir_cf_node_as_loop(node);
+}
+
+nir_function_impl *
+nir_cf_node_get_function(nir_cf_node *node)
+{
+   while (node->type != nir_cf_node_function) {
+      node = node->parent;
+   }
+
+   return nir_cf_node_as_function(node);
+}
+
+/*
+ * update the CFG after a jump instruction has been added to the end of a block
+ */
+
+static void
+handle_jump(nir_block *block)
+{
+   nir_instr *instr = nir_block_last_instr(block);
+   nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+
+   unlink_block_successors(block);
+
+   nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+
+   if (jump_instr->type == nir_jump_break ||
+       jump_instr->type == nir_jump_continue) {
+      nir_loop *loop = nearest_loop(&block->cf_node);
+
+      if (jump_instr->type == nir_jump_continue) {
+         nir_cf_node *first_node = nir_loop_first_cf_node(loop);
+         assert(first_node->type == nir_cf_node_block);
+         nir_block *first_block = nir_cf_node_as_block(first_node);
+         link_blocks(block, first_block, NULL);
+      } else {
+         nir_cf_node *after = nir_cf_node_next(&loop->cf_node);
+         assert(after->type == nir_cf_node_block);
+         nir_block *after_block = nir_cf_node_as_block(after);
+         link_blocks(block, after_block, NULL);
+
+         /* If we inserted a fake link, remove it */
+         nir_cf_node *last = nir_loop_last_cf_node(loop);
+         assert(last->type == nir_cf_node_block);
+         nir_block *last_block =  nir_cf_node_as_block(last);
+         if (last_block->successors[1] != NULL)
+            unlink_blocks(last_block, after_block);
+      }
+   } else {
+      assert(jump_instr->type == nir_jump_return);
+      link_blocks(block, impl->end_block, NULL);
+   }
+}
+
+static void
+handle_remove_jump(nir_block *block, nir_jump_type type)
+{
+   unlink_block_successors(block);
+
+   if (exec_node_is_tail_sentinel(block->cf_node.node.next)) {
+      nir_cf_node *parent = block->cf_node.parent;
+      if (parent->type == nir_cf_node_if) {
+         nir_cf_node *next = nir_cf_node_next(parent);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         link_blocks(block, next_block, NULL);
+      } else {
+         assert(parent->type == nir_cf_node_loop);
+         nir_loop *loop = nir_cf_node_as_loop(parent);
+
+         nir_cf_node *head = nir_loop_first_cf_node(loop);
+         assert(head->type == nir_cf_node_block);
+         nir_block *head_block = nir_cf_node_as_block(head);
+
+         link_blocks(block, head_block, NULL);
+      }
+   } else {
+      nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+      if (next->type == nir_cf_node_if) {
+         nir_if *next_if = nir_cf_node_as_if(next);
+
+         nir_cf_node *first_then = nir_if_first_then_node(next_if);
+         assert(first_then->type == nir_cf_node_block);
+         nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+         nir_cf_node *first_else = nir_if_first_else_node(next_if);
+         assert(first_else->type == nir_cf_node_block);
+         nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+         link_blocks(block, first_then_block, first_else_block);
+      } else {
+         assert(next->type == nir_cf_node_loop);
+         nir_loop *next_loop = nir_cf_node_as_loop(next);
+
+         nir_cf_node *first = nir_loop_first_cf_node(next_loop);
+         assert(first->type == nir_cf_node_block);
+         nir_block *first_block = nir_cf_node_as_block(first);
+
+         link_blocks(block, first_block, NULL);
+      }
+   }
+
+   if (type == nir_jump_break) {
+      nir_loop *loop = nearest_loop(&block->cf_node);
+
+      nir_cf_node *next = nir_cf_node_next(&loop->cf_node);
+      assert(next->type == nir_cf_node_block);
+      nir_block *next_block = nir_cf_node_as_block(next);
+
+      if (next_block->predecessors->entries == 0) {
+         /* insert fake link */
+         nir_cf_node *last = nir_loop_last_cf_node(loop);
+         assert(last->type == nir_cf_node_block);
+         nir_block *last_block = nir_cf_node_as_block(last);
+
+         last_block->successors[1] = next_block;
+         block_add_pred(next_block, last_block);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+/**
+ * Inserts a basic block before another by merging the instructions.
+ *
+ * @param block the target of the insertion
+ * @param before the block to be inserted - must not have been inserted before
+ * @param has_jump whether \before has a jump instruction at the end
+ */
+
+static void
+insert_block_before_block(nir_block *block, nir_block *before, bool has_jump)
+{
+   assert(!has_jump || exec_list_is_empty(&block->instr_list));
+
+   foreach_list_typed(nir_instr, instr, node, &before->instr_list) {
+      instr->block = block;
+   }
+
+   exec_list_prepend(&block->instr_list, &before->instr_list);
+
+   if (has_jump)
+      handle_jump(block);
+}
+
+/**
+ * Inserts a basic block after another by merging the instructions.
+ *
+ * @param block the target of the insertion
+ * @param after the block to be inserted - must not have been inserted before
+ * @param has_jump whether \after has a jump instruction at the end
+ */
+
+static void
+insert_block_after_block(nir_block *block, nir_block *after, bool has_jump)
+{
+   foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+      instr->block = block;
+   }
+
+   exec_list_append(&block->instr_list, &after->instr_list);
+
+   if (has_jump)
+      handle_jump(block);
+}
+
+static void
+update_if_uses(nir_cf_node *node)
+{
+   if (node->type != nir_cf_node_if)
+      return;
+
+   nir_if *if_stmt = nir_cf_node_as_if(node);
+
+   struct set *if_uses_set = if_stmt->condition.is_ssa ?
+                             if_stmt->condition.ssa->if_uses :
+                             if_stmt->condition.reg.reg->uses;
+
+   _mesa_set_add(if_uses_set, if_stmt);
+}
+
+void
+nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after)
+{
+   update_if_uses(after);
+
+   if (after->type == nir_cf_node_block) {
+      /*
+       * either node or the one after it must be a basic block, by invariant #2;
+       * in either case, just merge the blocks together.
+       */
+      nir_block *after_block = nir_cf_node_as_block(after);
+
+      bool has_jump = !exec_list_is_empty(&after_block->instr_list) &&
+         nir_block_last_instr(after_block)->type == nir_instr_type_jump;
+
+      if (node->type == nir_cf_node_block) {
+         insert_block_after_block(nir_cf_node_as_block(node), after_block,
+                                  has_jump);
+      } else {
+         nir_cf_node *next = nir_cf_node_next(node);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         insert_block_before_block(next_block, after_block, has_jump);
+      }
+   } else {
+      if (node->type == nir_cf_node_block) {
+         insert_non_block_after_block(nir_cf_node_as_block(node), after);
+      } else {
+         /*
+          * We have to insert a non-basic block after a non-basic block. Since
+          * every non-basic block has a basic block after it, this is equivalent
+          * to inserting a non-basic block before a basic block.
+          */
+
+         nir_cf_node *next = nir_cf_node_next(node);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         insert_non_block_before_block(after, next_block);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+void
+nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before)
+{
+   update_if_uses(before);
+
+   if (before->type == nir_cf_node_block) {
+      nir_block *before_block = nir_cf_node_as_block(before);
+
+      bool has_jump = !exec_list_is_empty(&before_block->instr_list) &&
+         nir_block_last_instr(before_block)->type == nir_instr_type_jump;
+
+      if (node->type == nir_cf_node_block) {
+         insert_block_before_block(nir_cf_node_as_block(node), before_block,
+                                   has_jump);
+      } else {
+         nir_cf_node *prev = nir_cf_node_prev(node);
+         assert(prev->type == nir_cf_node_block);
+         nir_block *prev_block = nir_cf_node_as_block(prev);
+
+         insert_block_after_block(prev_block, before_block, has_jump);
+      }
+   } else {
+      if (node->type == nir_cf_node_block) {
+         insert_non_block_before_block(before, nir_cf_node_as_block(node));
+      } else {
+         /*
+          * We have to insert a non-basic block before a non-basic block. This
+          * is equivalent to inserting a non-basic block after a basic block.
+          */
+
+         nir_cf_node *prev_node = nir_cf_node_prev(node);
+         assert(prev_node->type == nir_cf_node_block);
+         nir_block *prev_block = nir_cf_node_as_block(prev_node);
+
+         insert_non_block_after_block(prev_block, before);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+void
+nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node)
+{
+   nir_cf_node *begin = exec_node_data(nir_cf_node, list->head, node);
+   nir_cf_node_insert_before(begin, node);
+}
+
+void
+nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node)
+{
+   nir_cf_node *end = exec_node_data(nir_cf_node, list->tail_pred, node);
+   nir_cf_node_insert_after(end, node);
+}
+
+/**
+ * Stitch two basic blocks together into one. The aggregate must have the same
+ * predecessors as the first and the same successors as the second.
+ */
+
+static void
+stitch_blocks(nir_block *before, nir_block *after)
+{
+   /*
+    * We move after into before, so we have to deal with up to 2 successors vs.
+    * possibly a large number of predecessors.
+    *
+    * TODO: special case when before is empty and after isn't?
+    */
+
+   move_successors(after, before);
+
+   foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+      instr->block = before;
+   }
+
+   exec_list_append(&before->instr_list, &after->instr_list);
+   exec_node_remove(&after->cf_node.node);
+}
+
+static void
+remove_defs_uses(nir_instr *instr);
+
+static void
+cleanup_cf_node(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block: {
+      nir_block *block = nir_cf_node_as_block(node);
+      /* We need to walk the instructions and clean up defs/uses */
+      nir_foreach_instr(block, instr)
+         remove_defs_uses(instr);
+      break;
+   }
+
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+         cleanup_cf_node(child);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+         cleanup_cf_node(child);
+
+      struct set *if_uses;
+      if (if_stmt->condition.is_ssa) {
+         if_uses = if_stmt->condition.ssa->if_uses;
+      } else {
+         if_uses = if_stmt->condition.reg.reg->if_uses;
+      }
+
+      struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
+      assert(entry);
+      _mesa_set_remove(if_uses, entry);
+      break;
+   }
+
+   case nir_cf_node_loop: {
+      nir_loop *loop = nir_cf_node_as_loop(node);
+      foreach_list_typed(nir_cf_node, child, node, &loop->body)
+         cleanup_cf_node(child);
+      break;
+   }
+   case nir_cf_node_function: {
+      nir_function_impl *impl = nir_cf_node_as_function(node);
+      foreach_list_typed(nir_cf_node, child, node, &impl->body)
+         cleanup_cf_node(child);
+      break;
+   }
+   default:
+      unreachable("Invalid CF node type");
+   }
+}
+
+void
+nir_cf_node_remove(nir_cf_node *node)
+{
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+
+   if (node->type == nir_cf_node_block) {
+      /*
+       * Basic blocks can't really be removed by themselves, since they act as
+       * padding between the non-basic blocks. So all we do here is empty the
+       * block of instructions.
+       *
+       * TODO: could we assert here?
+       */
+      exec_list_make_empty(&nir_cf_node_as_block(node)->instr_list);
+   } else {
+      nir_cf_node *before = nir_cf_node_prev(node);
+      assert(before->type == nir_cf_node_block);
+      nir_block *before_block = nir_cf_node_as_block(before);
+
+      nir_cf_node *after = nir_cf_node_next(node);
+      assert(after->type == nir_cf_node_block);
+      nir_block *after_block = nir_cf_node_as_block(after);
+
+      exec_node_remove(&node->node);
+      stitch_blocks(before_block, after_block);
+   }
+
+   cleanup_cf_node(node);
+}
+
+static bool
+add_use_cb(nir_src *src, void *state)
+{
+   nir_instr *instr = state;
+
+   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
+
+   _mesa_set_add(uses_set, instr);
+
+   return true;
+}
+
+static bool
+add_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+   nir_instr *instr = state;
+
+   if (instr->block && def->index == UINT_MAX) {
+      nir_function_impl *impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+
+      def->index = impl->ssa_alloc++;
+   }
+
+   return true;
+}
+
+static bool
+add_reg_def_cb(nir_dest *dest, void *state)
+{
+   nir_instr *instr = state;
+
+   if (!dest->is_ssa)
+      _mesa_set_add(dest->reg.reg->defs, instr);
+
+   return true;
+}
+
+static void
+add_defs_uses(nir_instr *instr)
+{
+   nir_foreach_src(instr, add_use_cb, instr);
+   nir_foreach_dest(instr, add_reg_def_cb, instr);
+   nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
+}
+
+void
+nir_instr_insert_before(nir_instr *instr, nir_instr *before)
+{
+   assert(before->type != nir_instr_type_jump);
+   before->block = instr->block;
+   add_defs_uses(before);
+   exec_node_insert_node_before(&instr->node, &before->node);
+}
+
+void
+nir_instr_insert_after(nir_instr *instr, nir_instr *after)
+{
+   if (after->type == nir_instr_type_jump) {
+      assert(instr == nir_block_last_instr(instr->block));
+      assert(instr->type != nir_instr_type_jump);
+   }
+
+   after->block = instr->block;
+   add_defs_uses(after);
+   exec_node_insert_after(&instr->node, &after->node);
+
+   if (after->type == nir_instr_type_jump)
+      handle_jump(after->block);
+}
+
+void
+nir_instr_insert_before_block(nir_block *block, nir_instr *before)
+{
+   if (before->type == nir_instr_type_jump)
+      assert(exec_list_is_empty(&block->instr_list));
+
+   before->block = block;
+   add_defs_uses(before);
+   exec_list_push_head(&block->instr_list, &before->node);
+
+   if (before->type == nir_instr_type_jump)
+      handle_jump(block);
+}
+
+void
+nir_instr_insert_after_block(nir_block *block, nir_instr *after)
+{
+   if (after->type == nir_instr_type_jump) {
+      assert(exec_list_is_empty(&block->instr_list) ||
+             nir_block_last_instr(block)->type != nir_instr_type_jump);
+   }
+
+   after->block = block;
+   add_defs_uses(after);
+   exec_list_push_tail(&block->instr_list, &after->node);
+
+   if (after->type == nir_instr_type_jump)
+      handle_jump(block);
+}
+
+void
+nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
+{
+   if (node->type == nir_cf_node_block) {
+      nir_instr_insert_before_block(nir_cf_node_as_block(node), before);
+   } else {
+      nir_cf_node *prev = nir_cf_node_prev(node);
+      assert(prev->type == nir_cf_node_block);
+      nir_block *prev_block = nir_cf_node_as_block(prev);
+
+      nir_instr_insert_before_block(prev_block, before);
+   }
+}
+
+void
+nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
+{
+   if (node->type == nir_cf_node_block) {
+      nir_instr_insert_after_block(nir_cf_node_as_block(node), after);
+   } else {
+      nir_cf_node *next = nir_cf_node_next(node);
+      assert(next->type == nir_cf_node_block);
+      nir_block *next_block = nir_cf_node_as_block(next);
+
+      nir_instr_insert_before_block(next_block, after);
+   }
+}
+
+void
+nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
+{
+   nir_cf_node *first_node = exec_node_data(nir_cf_node,
+                                            exec_list_get_head(list), node);
+   nir_instr_insert_before_cf(first_node, before);
+}
+
+void
+nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
+{
+   nir_cf_node *last_node = exec_node_data(nir_cf_node,
+                                           exec_list_get_tail(list), node);
+   nir_instr_insert_after_cf(last_node, after);
+}
+
+static bool
+remove_use_cb(nir_src *src, void *state)
+{
+   nir_instr *instr = state;
+
+   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
+
+   struct set_entry *entry = _mesa_set_search(uses_set, instr);
+   if (entry)
+      _mesa_set_remove(uses_set, entry);
+
+   return true;
+}
+
+static bool
+remove_def_cb(nir_dest *dest, void *state)
+{
+   nir_instr *instr = state;
+
+   if (dest->is_ssa)
+      return true;
+
+   nir_register *reg = dest->reg.reg;
+
+   struct set_entry *entry = _mesa_set_search(reg->defs, instr);
+   if (entry)
+      _mesa_set_remove(reg->defs, entry);
+
+   return true;
+}
+
+static void
+remove_defs_uses(nir_instr *instr)
+{
+   nir_foreach_dest(instr, remove_def_cb, instr);
+   nir_foreach_src(instr, remove_use_cb, instr);
+}
+
+void nir_instr_remove(nir_instr *instr)
+{
+   remove_defs_uses(instr);
+   exec_node_remove(&instr->node);
+
+   if (instr->type == nir_instr_type_jump) {
+      nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+      handle_remove_jump(instr->block, jump_instr->type);
+   }
+}
+
+/*@}*/
+
+void
+nir_index_local_regs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      reg->index = index++;
+   }
+   impl->reg_alloc = index;
+}
+
+void
+nir_index_global_regs(nir_shader *shader)
+{
+   unsigned index = 0;
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      reg->index = index++;
+   }
+   shader->reg_alloc = index;
+}
+
+static bool
+visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(&instr->dest.dest, state);
+}
+
+static bool
+visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
+                     void *state)
+{
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      return cb(&instr->dest, state);
+
+   return true;
+}
+
+static bool
+visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
+                   void *state)
+{
+   return cb(&instr->dest, state);
+}
+
+static bool
+visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(&instr->dest, state);
+}
+
+static bool
+visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
+                         nir_foreach_dest_cb cb, void *state)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (!cb(&entry->dest, state))
+         return false;
+   }
+
+   return true;
+}
+
+bool
+nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+   case nir_instr_type_intrinsic:
+      return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
+   case nir_instr_type_tex:
+      return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
+   case nir_instr_type_phi:
+      return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
+   case nir_instr_type_parallel_copy:
+      return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
+                                      cb, state);
+
+   case nir_instr_type_load_const:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      break;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+
+   return true;
+}
+
+struct foreach_ssa_def_state {
+   nir_foreach_ssa_def_cb cb;
+   void *client_state;
+};
+
+static inline bool
+nir_ssa_def_visitor(nir_dest *dest, void *void_state)
+{
+   struct foreach_ssa_def_state *state = void_state;
+
+   if (dest->is_ssa)
+      return state->cb(&dest->ssa, state->client_state);
+   else
+      return true;
+}
+
+bool
+nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_tex:
+   case nir_instr_type_intrinsic:
+   case nir_instr_type_phi:
+   case nir_instr_type_parallel_copy: {
+      struct foreach_ssa_def_state foreach_state = {cb, state};
+      return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
+   }
+
+   case nir_instr_type_load_const:
+      return cb(&nir_instr_as_load_const(instr)->def, state);
+   case nir_instr_type_ssa_undef:
+      return cb(&nir_instr_as_ssa_undef(instr)->def, state);
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      return true;
+   default:
+      unreachable("Invalid instruction type");
+   }
+}
+
+static bool
+visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
+{
+   if (!cb(src, state))
+      return false;
+   if (!src->is_ssa && src->reg.indirect)
+      return cb(src->reg.indirect, state);
+   return true;
+}
+
+static bool
+visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
+                      void *state)
+{
+   if (deref->deref_array_type == nir_deref_array_type_indirect)
+      return visit_src(&deref->indirect, cb, state);
+   return true;
+}
+
+static bool
+visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
+{
+   nir_deref *cur = &deref->deref;
+   while (cur != NULL) {
+      if (cur->deref_type == nir_deref_type_array)
+         if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
+            return false;
+
+      cur = cur->child;
+   }
+
+   return true;
+}
+
+static bool
+visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+      if (!visit_src(&instr->src[i].src, cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   for (unsigned i = 0; i < instr->num_srcs; i++)
+      if (!visit_src(&instr->src[i].src, cb, state))
+         return false;
+
+   if (instr->sampler != NULL)
+      if (!visit_deref_src(instr->sampler, cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
+                    void *state)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   for (unsigned i = 0; i < num_srcs; i++)
+      if (!visit_src(&instr->src[i], cb, state))
+         return false;
+
+   unsigned num_vars =
+      nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++)
+      if (!visit_deref_src(instr->variables[i], cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   return true;
+}
+
+static bool
+visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
+                     void *state)
+{
+   return true;
+}
+
+static bool
+visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   nir_foreach_phi_src(instr, src) {
+      if (!visit_src(&src->src, cb, state))
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+visit_parallel_copy_src(nir_parallel_copy_instr *instr,
+                        nir_foreach_src_cb cb, void *state)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (!visit_src(&entry->src, cb, state))
+         return false;
+   }
+
+   return true;
+}
+
+typedef struct {
+   void *state;
+   nir_foreach_src_cb cb;
+} visit_dest_indirect_state;
+
+static bool
+visit_dest_indirect(nir_dest *dest, void *_state)
+{
+   visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
+
+   if (!dest->is_ssa && dest->reg.indirect)
+      return state->cb(dest->reg.indirect, state->state);
+
+   return true;
+}
+
+bool
+nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_intrinsic:
+      if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_tex:
+      if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_call:
+      if (!visit_call_src(nir_instr_as_call(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_load_const:
+      if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_phi:
+      if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_parallel_copy:
+      if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
+                                   cb, state))
+         return false;
+      break;
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return true;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+
+   visit_dest_indirect_state dest_state;
+   dest_state.state = state;
+   dest_state.cb = cb;
+   return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
+}
+
+nir_const_value *
+nir_src_as_const_value(nir_src src)
+{
+   if (!src.is_ssa)
+      return NULL;
+
+   if (src.ssa->parent_instr->type != nir_instr_type_load_const)
+      return NULL;
+
+   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+
+   return &load->value;
+}
+
+bool
+nir_srcs_equal(nir_src src1, nir_src src2)
+{
+   if (src1.is_ssa) {
+      if (src2.is_ssa) {
+         return src1.ssa == src2.ssa;
+      } else {
+         return false;
+      }
+   } else {
+      if (src2.is_ssa) {
+         return false;
+      } else {
+         if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+            return false;
+
+         if (src1.reg.indirect) {
+            if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+               return false;
+         }
+
+         return src1.reg.reg == src2.reg.reg &&
+                src1.reg.base_offset == src2.reg.base_offset;
+      }
+   }
+}
+
+static bool
+src_does_not_use_def(nir_src *src, void *void_def)
+{
+   nir_ssa_def *def = void_def;
+
+   if (src->is_ssa) {
+      return src->ssa != def;
+   } else {
+      return true;
+   }
+}
+
+static bool
+src_does_not_use_reg(nir_src *src, void *void_reg)
+{
+   nir_register *reg = void_reg;
+
+   if (src->is_ssa) {
+      return true;
+   } else {
+      return src->reg.reg != reg;
+   }
+}
+
+void
+nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
+{
+   if (src->is_ssa) {
+      nir_ssa_def *old_ssa = src->ssa;
+      *src = new_src;
+      if (old_ssa && nir_foreach_src(instr, src_does_not_use_def, old_ssa)) {
+         struct set_entry *entry = _mesa_set_search(old_ssa->uses, instr);
+         assert(entry);
+         _mesa_set_remove(old_ssa->uses, entry);
+      }
+   } else {
+      if (src->reg.indirect)
+         nir_instr_rewrite_src(instr, src->reg.indirect, new_src);
+
+      nir_register *old_reg = src->reg.reg;
+      *src = new_src;
+      if (old_reg && nir_foreach_src(instr, src_does_not_use_reg, old_reg)) {
+         struct set_entry *entry = _mesa_set_search(old_reg->uses, instr);
+         assert(entry);
+         _mesa_set_remove(old_reg->uses, entry);
+      }
+   }
+
+   if (new_src.is_ssa) {
+      if (new_src.ssa)
+         _mesa_set_add(new_src.ssa->uses, instr);
+   } else {
+      if (new_src.reg.reg)
+         _mesa_set_add(new_src.reg.reg->uses, instr);
+   }
+}
+
+void
+nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                 unsigned num_components, const char *name)
+{
+   void *mem_ctx = ralloc_parent(instr);
+
+   def->name = name;
+   def->parent_instr = instr;
+   def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                   _mesa_key_pointer_equal);
+   def->num_components = num_components;
+
+   if (instr->block) {
+      nir_function_impl *impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+
+      def->index = impl->ssa_alloc++;
+   } else {
+      def->index = UINT_MAX;
+   }
+}
+
+void
+nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                 unsigned num_components, const char *name)
+{
+   dest->is_ssa = true;
+   nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+}
+
+struct ssa_def_rewrite_state {
+   void *mem_ctx;
+   nir_ssa_def *old;
+   nir_src new_src;
+};
+
+static bool
+ssa_def_rewrite_uses_src(nir_src *src, void *void_state)
+{
+   struct ssa_def_rewrite_state *state = void_state;
+
+   if (src->is_ssa && src->ssa == state->old)
+      nir_src_copy(src, &state->new_src, state->mem_ctx);
+
+   return true;
+}
+
+void
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
+{
+   struct ssa_def_rewrite_state state;
+   state.mem_ctx = mem_ctx;
+   state.old = def;
+   state.new_src = new_src;
+
+   assert(!new_src.is_ssa || def != new_src.ssa);
+
+   struct set *new_uses, *new_if_uses;
+   if (new_src.is_ssa) {
+      new_uses = new_src.ssa->uses;
+      new_if_uses = new_src.ssa->if_uses;
+   } else {
+      new_uses = new_src.reg.reg->uses;
+      new_if_uses = new_src.reg.reg->if_uses;
+   }
+
+   struct set_entry *entry;
+   set_foreach(def->uses, entry) {
+      nir_instr *instr = (nir_instr *)entry->key;
+
+      _mesa_set_remove(def->uses, entry);
+      nir_foreach_src(instr, ssa_def_rewrite_uses_src, &state);
+      _mesa_set_add(new_uses, instr);
+   }
+
+   set_foreach(def->if_uses, entry) {
+      nir_if *if_use = (nir_if *)entry->key;
+
+      _mesa_set_remove(def->if_uses, entry);
+      nir_src_copy(&if_use->condition, &new_src, mem_ctx);
+      _mesa_set_add(new_if_uses, if_use);
+   }
+}
+
+
+static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                            bool reverse, void *state);
+
+static inline bool
+foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+   if (reverse) {
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+                                      &if_stmt->else_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+                                      &if_stmt->then_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   } else {
+      foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+
+      foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static inline bool
+foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+   if (reverse) {
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   } else {
+      foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                bool reverse, void *state)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+      return cb(nir_cf_node_as_block(node), state);
+   case nir_cf_node_if:
+      return foreach_if(nir_cf_node_as_if(node), cb, reverse, state);
+   case nir_cf_node_loop:
+      return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state);
+      break;
+
+   default:
+      unreachable("Invalid CFG node type");
+      break;
+   }
+
+   return false;
+}
+
+bool
+nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
+{
+   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+      if (!foreach_cf_node(node, cb, false, state))
+         return false;
+   }
+
+   return cb(impl->end_block, state);
+}
+
+bool
+nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                          void *state)
+{
+   if (!cb(impl->end_block, state))
+      return false;
+
+   foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) {
+      if (!foreach_cf_node(node, cb, true, state))
+         return false;
+   }
+
+   return true;
+}
+
+nir_if *
+nir_block_get_following_if(nir_block *block)
+{
+   if (exec_node_is_tail_sentinel(&block->cf_node.node))
+      return NULL;
+
+   if (nir_cf_node_is_last(&block->cf_node))
+      return NULL;
+
+   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+   if (next_node->type != nir_cf_node_if)
+      return NULL;
+
+   return nir_cf_node_as_if(next_node);
+}
+
+static bool
+index_block(nir_block *block, void *state)
+{
+   unsigned *index = state;
+   block->index = (*index)++;
+   return true;
+}
+
+void
+nir_index_blocks(nir_function_impl *impl)
+{
+   unsigned index = 0;
+
+   if (impl->valid_metadata & nir_metadata_block_index)
+      return;
+
+   nir_foreach_block(impl, index_block, &index);
+
+   impl->num_blocks = index;
+}
+
+static bool
+index_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+   unsigned *index = (unsigned *) state;
+   def->index = (*index)++;
+
+   return true;
+}
+
+static bool
+index_ssa_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
+
+   return true;
+}
+
+void
+nir_index_ssa_defs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   nir_foreach_block(impl, index_ssa_block, &index);
+   impl->ssa_alloc = index;
+}
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
new file mode 100644
index 000000000..d74caa959
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir.h
@@ -0,0 +1,1618 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#pragma once
+
+#include "util/hash_table.h"
+#include "../list.h"
+#include "GL/gl.h" /* GLenum */
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/bitset.h"
+#include "nir_types.h"
+#include <stdio.h>
+
+#include "nir_opcodes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct gl_program;
+struct gl_shader_program;
+
+#define NIR_FALSE 0u
+#define NIR_TRUE (~0u)
+
+/** Defines a cast function
+ *
+ * This macro defines a cast function from in_type to out_type where
+ * out_type is some structure type that contains a field of type out_type.
+ *
+ * Note that you have to be a bit careful as the generated cast function
+ * destroys constness.
+ */
+#define NIR_DEFINE_CAST(name, in_type, out_type, field)  \
+static inline out_type *                                 \
+name(const in_type *parent)                              \
+{                                                        \
+   return exec_node_data(out_type, parent, field);       \
+}
+
+struct nir_function_overload;
+struct nir_function;
+struct nir_shader;
+
+
+/**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa nir_variable::state_slots
+ */
+typedef struct {
+   int tokens[5];
+   int swizzle;
+} nir_state_slot;
+
+typedef enum {
+   nir_var_shader_in,
+   nir_var_shader_out,
+   nir_var_global,
+   nir_var_local,
+   nir_var_uniform,
+   nir_var_system_value
+} nir_variable_mode;
+
+/**
+ * Data stored in an nir_constant
+ */
+union nir_constant_data {
+   unsigned u[16];
+   int i[16];
+   float f[16];
+   bool b[16];
+};
+
+typedef struct nir_constant {
+   /**
+    * Value of the constant.
+    *
+    * The field used to back the values supplied by the constant is determined
+    * by the type associated with the \c nir_variable.  Constants may be
+    * scalars, vectors, or matrices.
+    */
+   union nir_constant_data value;
+
+   /* Array elements / Structure Fields */
+   struct nir_constant **elements;
+} nir_constant;
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+ * with a layout qualifier.
+ */
+typedef enum {
+    nir_depth_layout_none, /**< No depth layout is specified. */
+    nir_depth_layout_any,
+    nir_depth_layout_greater,
+    nir_depth_layout_less,
+    nir_depth_layout_unchanged
+} nir_depth_layout;
+
+/**
+ * Either a uniform, global variable, shader input, or shader output. Based on
+ * ir_variable - it should be easy to translate between the two.
+ */
+
+typedef struct {
+   struct exec_node node;
+
+   /**
+    * Declared type of the variable
+    */
+   const struct glsl_type *type;
+
+   /**
+    * Declared name of the variable
+    */
+   char *name;
+
+   /**
+    * For variables which satisfy the is_interface_instance() predicate, this
+    * points to an array of integers such that if the ith member of the
+    * interface block is an array, max_ifc_array_access[i] is the maximum
+    * array element of that member that has been accessed.  If the ith member
+    * of the interface block is not an array, max_ifc_array_access[i] is
+    * unused.
+    *
+    * For variables whose type is not an interface block, this pointer is
+    * NULL.
+    */
+   unsigned *max_ifc_array_access;
+
+   struct nir_variable_data {
+
+      /**
+       * Is the variable read-only?
+       *
+       * This is set for variables declared as \c const, shader inputs,
+       * and uniforms.
+       */
+      unsigned read_only:1;
+      unsigned centroid:1;
+      unsigned sample:1;
+      unsigned invariant:1;
+
+      /**
+       * Storage class of the variable.
+       *
+       * \sa nir_variable_mode
+       */
+      nir_variable_mode mode:4;
+
+      /**
+       * Interpolation mode for shader inputs / outputs
+       *
+       * \sa glsl_interp_qualifier
+       */
+      unsigned interpolation:2;
+
+      /**
+       * \name ARB_fragment_coord_conventions
+       * @{
+       */
+      unsigned origin_upper_left:1;
+      unsigned pixel_center_integer:1;
+      /*@}*/
+
+      /**
+       * Was the location explicitly set in the shader?
+       *
+       * If the location is explicitly set in the shader, it \b cannot be changed
+       * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+       * no effect).
+       */
+      unsigned explicit_location:1;
+      unsigned explicit_index:1;
+
+      /**
+       * Was an initial binding explicitly set in the shader?
+       *
+       * If so, constant_initializer contains an integer nir_constant
+       * representing the initial binding point.
+       */
+      unsigned explicit_binding:1;
+
+      /**
+       * Does this variable have an initializer?
+       *
+       * This is used by the linker to cross-validiate initializers of global
+       * variables.
+       */
+      unsigned has_initializer:1;
+
+      /**
+       * Is this variable a generic output or input that has not yet been matched
+       * up to a variable in another stage of the pipeline?
+       *
+       * This is used by the linker as scratch storage while assigning locations
+       * to generic inputs and outputs.
+       */
+      unsigned is_unmatched_generic_inout:1;
+
+      /**
+       * If non-zero, then this variable may be packed along with other variables
+       * into a single varying slot, so this offset should be applied when
+       * accessing components.  For example, an offset of 1 means that the x
+       * component of this variable is actually stored in component y of the
+       * location specified by \c location.
+       */
+      unsigned location_frac:2;
+
+      /**
+       * Non-zero if this variable was created by lowering a named interface
+       * block which was not an array.
+       *
+       * Note that this variable and \c from_named_ifc_block_array will never
+       * both be non-zero.
+       */
+      unsigned from_named_ifc_block_nonarray:1;
+
+      /**
+       * Non-zero if this variable was created by lowering a named interface
+       * block which was an array.
+       *
+       * Note that this variable and \c from_named_ifc_block_nonarray will never
+       * both be non-zero.
+       */
+      unsigned from_named_ifc_block_array:1;
+
+      /**
+       * \brief Layout qualifier for gl_FragDepth.
+       *
+       * This is not equal to \c ir_depth_layout_none if and only if this
+       * variable is \c gl_FragDepth and a layout qualifier is specified.
+       */
+      nir_depth_layout depth_layout;
+
+      /**
+       * Storage location of the base of this variable
+       *
+       * The precise meaning of this field depends on the nature of the variable.
+       *
+       *   - Vertex shader input: one of the values from \c gl_vert_attrib.
+       *   - Vertex shader output: one of the values from \c gl_varying_slot.
+       *   - Geometry shader input: one of the values from \c gl_varying_slot.
+       *   - Geometry shader output: one of the values from \c gl_varying_slot.
+       *   - Fragment shader input: one of the values from \c gl_varying_slot.
+       *   - Fragment shader output: one of the values from \c gl_frag_result.
+       *   - Uniforms: Per-stage uniform slot number for default uniform block.
+       *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Other: This field is not currently used.
+       *
+       * If the variable is a uniform, shader input, or shader output, and the
+       * slot has not been assigned, the value will be -1.
+       */
+      int location;
+
+      /**
+       * The actual location of the variable in the IR. Only valid for inputs
+       * and outputs.
+       */
+      unsigned int driver_location;
+
+      /**
+       * output index for dual source blending.
+       */
+      int index;
+
+      /**
+       * Initial binding point for a sampler or UBO.
+       *
+       * For array types, this represents the binding point for the first element.
+       */
+      int binding;
+
+      /**
+       * Location an atomic counter is stored at.
+       */
+      struct {
+         unsigned buffer_index;
+         unsigned offset;
+      } atomic;
+
+      /**
+       * ARB_shader_image_load_store qualifiers.
+       */
+      struct {
+         bool read_only; /**< "readonly" qualifier. */
+         bool write_only; /**< "writeonly" qualifier. */
+         bool coherent;
+         bool _volatile;
+         bool restrict_flag;
+
+         /** Image internal format if specified explicitly, otherwise GL_NONE. */
+         GLenum format;
+      } image;
+
+      /**
+       * Highest element accessed with a constant expression array index
+       *
+       * Not used for non-array variables.
+       */
+      unsigned max_array_access;
+
+   } data;
+
+   /**
+    * Built-in state that backs this uniform
+    *
+    * Once set at variable creation, \c state_slots must remain invariant.
+    * This is because, ideally, this array would be shared by all clones of
+    * this variable in the IR tree.  In other words, we'd really like for it
+    * to be a fly-weight.
+    *
+    * If the variable is not a uniform, \c num_state_slots will be zero and
+    * \c state_slots will be \c NULL.
+    */
+   /*@{*/
+   unsigned num_state_slots;    /**< Number of state slots used */
+   nir_state_slot *state_slots;  /**< State descriptors. */
+   /*@}*/
+
+   /**
+    * Constant expression assigned in the initializer of the variable
+    */
+   nir_constant *constant_initializer;
+
+   /**
+    * For variables that are in an interface block or are an instance of an
+    * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+    *
+    * \sa ir_variable::location
+    */
+   const struct glsl_type *interface_type;
+} nir_variable;
+
+typedef struct {
+   struct exec_node node;
+
+   unsigned num_components; /** < number of vector components */
+   unsigned num_array_elems; /** < size of array (0 for no array) */
+
+   /** generic register index. */
+   unsigned index;
+
+   /** only for debug purposes, can be NULL */
+   const char *name;
+
+   /** whether this register is local (per-function) or global (per-shader) */
+   bool is_global;
+
+   /**
+    * If this flag is set to true, then accessing channels >= num_components
+    * is well-defined, and simply spills over to the next array element. This
+    * is useful for backends that can do per-component accessing, in
+    * particular scalar backends. By setting this flag and making
+    * num_components equal to 1, structures can be packed tightly into
+    * registers and then registers can be accessed per-component to get to
+    * each structure member, even if it crosses vec4 boundaries.
+    */
+   bool is_packed;
+
+   /** set of nir_instr's where this register is used (read from) */
+   struct set *uses;
+
+   /** set of nir_instr's where this register is defined (written to) */
+   struct set *defs;
+
+   /** set of nir_if's where this register is used as a condition */
+   struct set *if_uses;
+} nir_register;
+
+typedef enum {
+   nir_instr_type_alu,
+   nir_instr_type_call,
+   nir_instr_type_tex,
+   nir_instr_type_intrinsic,
+   nir_instr_type_load_const,
+   nir_instr_type_jump,
+   nir_instr_type_ssa_undef,
+   nir_instr_type_phi,
+   nir_instr_type_parallel_copy,
+} nir_instr_type;
+
+typedef struct {
+   struct exec_node node;
+   nir_instr_type type;
+   struct nir_block *block;
+
+   /* A temporary for optimization and analysis passes to use for storing
+    * flags.  For instance, DCE uses this to store the "dead/live" info.
+    */
+   uint8_t pass_flags;
+} nir_instr;
+
+static inline nir_instr *
+nir_instr_next(nir_instr *instr)
+{
+   struct exec_node *next = exec_node_get_next(&instr->node);
+   if (exec_node_is_tail_sentinel(next))
+      return NULL;
+   else
+      return exec_node_data(nir_instr, next, node);
+}
+
+static inline nir_instr *
+nir_instr_prev(nir_instr *instr)
+{
+   struct exec_node *prev = exec_node_get_prev(&instr->node);
+   if (exec_node_is_head_sentinel(prev))
+      return NULL;
+   else
+      return exec_node_data(nir_instr, prev, node);
+}
+
+typedef struct {
+   /** for debugging only, can be NULL */
+   const char* name;
+
+   /** generic SSA definition index. */
+   unsigned index;
+
+   /** Index into the live_in and live_out bitfields */
+   unsigned live_index;
+
+   nir_instr *parent_instr;
+
+   /** set of nir_instr's where this register is used (read from) */
+   struct set *uses;
+
+   /** set of nir_if's where this register is used as a condition */
+   struct set *if_uses;
+
+   uint8_t num_components;
+} nir_ssa_def;
+
+struct nir_src;
+
+typedef struct {
+   nir_register *reg;
+   struct nir_src *indirect; /** < NULL for no indirect offset */
+   unsigned base_offset;
+
+   /* TODO use-def chain goes here */
+} nir_reg_src;
+
+typedef struct {
+   nir_register *reg;
+   struct nir_src *indirect; /** < NULL for no indirect offset */
+   unsigned base_offset;
+
+   /* TODO def-use chain goes here */
+} nir_reg_dest;
+
+typedef struct nir_src {
+   union {
+      nir_reg_src reg;
+      nir_ssa_def *ssa;
+   };
+
+   bool is_ssa;
+} nir_src;
+
+typedef struct {
+   union {
+      nir_reg_dest reg;
+      nir_ssa_def ssa;
+   };
+
+   bool is_ssa;
+} nir_dest;
+
+static inline nir_src
+nir_src_for_ssa(nir_ssa_def *def)
+{
+   nir_src src;
+
+   src.is_ssa = true;
+   src.ssa = def;
+
+   return src;
+}
+
+static inline nir_src
+nir_src_for_reg(nir_register *reg)
+{
+   nir_src src;
+
+   src.is_ssa = false;
+   src.reg.reg = reg;
+   src.reg.indirect = NULL;
+   src.reg.base_offset = 0;
+
+   return src;
+}
+
+static inline nir_dest
+nir_dest_for_reg(nir_register *reg)
+{
+   nir_dest dest;
+
+   dest.is_ssa = false;
+   dest.reg.reg = reg;
+   dest.reg.indirect = NULL;
+   dest.reg.base_offset = 0;
+
+   return dest;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+
+typedef struct {
+   nir_src src;
+
+   /**
+    * \name input modifiers
+    */
+   /*@{*/
+   /**
+    * For inputs interpreted as floating point, flips the sign bit. For
+    * inputs interpreted as integers, performs the two's complement negation.
+    */
+   bool negate;
+
+   /**
+    * Clears the sign bit for floating point values, and computes the integer
+    * absolute value for integers. Note that the negate modifier acts after
+    * the absolute value modifier, therefore if both are set then all inputs
+    * will become negative.
+    */
+   bool abs;
+   /*@}*/
+
+   /**
+    * For each input component, says which component of the register it is
+    * chosen from. Note that which elements of the swizzle are used and which
+    * are ignored are based on the write mask for most opcodes - for example,
+    * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
+    * a swizzle of {2, x, 1, 0} where x means "don't care."
+    */
+   uint8_t swizzle[4];
+} nir_alu_src;
+
+typedef struct {
+   nir_dest dest;
+
+   /**
+    * \name saturate output modifier
+    *
+    * Only valid for opcodes that output floating-point numbers. Clamps the
+    * output to between 0.0 and 1.0 inclusive.
+    */
+
+   bool saturate;
+
+   unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+} nir_alu_dest;
+
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                       void *mem_ctx);
+
+typedef enum {
+   nir_type_invalid = 0, /* Not a valid type */
+   nir_type_float,
+   nir_type_int,
+   nir_type_unsigned,
+   nir_type_bool
+} nir_alu_type;
+
+typedef enum {
+   NIR_OP_IS_COMMUTATIVE = (1 << 0),
+   NIR_OP_IS_ASSOCIATIVE = (1 << 1),
+} nir_op_algebraic_property;
+
+typedef struct {
+   const char *name;
+
+   unsigned num_inputs;
+
+   /**
+    * The number of components in the output
+    *
+    * If non-zero, this is the size of the output and input sizes are
+    * explicitly given; swizzle and writemask are still in effect, but if
+    * the output component is masked out, then the input component may
+    * still be in use.
+    *
+    * If zero, the opcode acts in the standard, per-component manner; the
+    * operation is performed on each component (except the ones that are
+    * masked out) with the input being taken from the input swizzle for
+    * that component.
+    *
+    * The size of some of the inputs may be given (i.e. non-zero) even
+    * though output_size is zero; in that case, the inputs with a zero
+    * size act per-component, while the inputs with non-zero size don't.
+    */
+   unsigned output_size;
+
+   /**
+    * The type of vector that the instruction outputs. Note that the
+    * staurate modifier is only allowed on outputs with the float type.
+    */
+
+   nir_alu_type output_type;
+
+   /**
+    * The number of components in each input
+    */
+   unsigned input_sizes[4];
+
+   /**
+    * The type of vector that each input takes. Note that negate and
+    * absolute value are only allowed on inputs with int or float type and
+    * behave differently on the two.
+    */
+   nir_alu_type input_types[4];
+
+   nir_op_algebraic_property algebraic_properties;
+} nir_op_info;
+
+extern const nir_op_info nir_op_infos[nir_num_opcodes];
+
+typedef struct nir_alu_instr {
+   nir_instr instr;
+   nir_op op;
+   nir_alu_dest dest;
+   nir_alu_src src[];
+} nir_alu_instr;
+
+/* is this source channel used? */
+static inline bool
+nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
+{
+   if (nir_op_infos[instr->op].input_sizes[src] > 0)
+      return channel < nir_op_infos[instr->op].input_sizes[src];
+
+   return (instr->dest.write_mask >> channel) & 1;
+}
+
+/*
+ * For instructions whose destinations are SSA, get the number of channels
+ * used for a source
+ */
+static inline unsigned
+nir_ssa_alu_instr_src_components(nir_alu_instr *instr, unsigned src)
+{
+   assert(instr->dest.dest.is_ssa);
+
+   if (nir_op_infos[instr->op].input_sizes[src] > 0)
+      return nir_op_infos[instr->op].input_sizes[src];
+
+   return instr->dest.dest.ssa.num_components;
+}
+
+typedef enum {
+   nir_deref_type_var,
+   nir_deref_type_array,
+   nir_deref_type_struct
+} nir_deref_type;
+
+typedef struct nir_deref {
+   nir_deref_type deref_type;
+   struct nir_deref *child;
+   const struct glsl_type *type;
+} nir_deref;
+
+typedef struct {
+   nir_deref deref;
+
+   nir_variable *var;
+} nir_deref_var;
+
+/* This enum describes how the array is referenced.  If the deref is
+ * direct then the base_offset is used.  If the deref is indirect then then
+ * offset is given by base_offset + indirect.  If the deref is a wildcard
+ * then the deref refers to all of the elements of the array at the same
+ * time.  Wildcard dereferences are only ever allowed in copy_var
+ * intrinsics and the source and destination derefs must have matching
+ * wildcards.
+ */
+typedef enum {
+   nir_deref_array_type_direct,
+   nir_deref_array_type_indirect,
+   nir_deref_array_type_wildcard,
+} nir_deref_array_type;
+
+typedef struct {
+   nir_deref deref;
+
+   nir_deref_array_type deref_array_type;
+   unsigned base_offset;
+   nir_src indirect;
+} nir_deref_array;
+
+typedef struct {
+   nir_deref deref;
+
+   unsigned index;
+} nir_deref_struct;
+
+NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
+NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
+NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+
+typedef struct {
+   nir_instr instr;
+
+   unsigned num_params;
+   nir_deref_var **params;
+   nir_deref_var *return_deref;
+
+   struct nir_function_overload *callee;
+} nir_call_instr;
+
+#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
+                  num_variables, num_indices, flags) \
+   nir_intrinsic_##name,
+
+#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
+
+typedef enum {
+#include "nir_intrinsics.h"
+   nir_num_intrinsics = nir_last_intrinsic + 1
+} nir_intrinsic_op;
+
+#undef INTRINSIC
+#undef LAST_INTRINSIC
+
+/** Represents an intrinsic
+ *
+ * An intrinsic is an instruction type for handling things that are
+ * more-or-less regular operations but don't just consume and produce SSA
+ * values like ALU operations do.  Intrinsics are not for things that have
+ * special semantic meaning such as phi nodes and parallel copies.
+ * Examples of intrinsics include variable load/store operations, system
+ * value loads, and the like.  Even though texturing more-or-less falls
+ * under this category, texturing is its own instruction type because
+ * trying to represent texturing with intrinsics would lead to a
+ * combinatorial explosion of intrinsic opcodes.
+ *
+ * By having a single instruction type for handling a lot of different
+ * cases, optimization passes can look for intrinsics and, for the most
+ * part, completely ignore them.  Each intrinsic type also has a few
+ * possible flags that govern whether or not they can be reordered or
+ * eliminated.  That way passes like dead code elimination can still work
+ * on intrisics without understanding the meaning of each.
+ *
+ * Each intrinsic has some number of constant indices, some number of
+ * variables, and some number of sources.  What these sources, variables,
+ * and indices mean depends on the intrinsic and is documented with the
+ * intrinsic declaration in nir_intrinsics.h.  Intrinsics and texture
+ * instructions are the only types of instruction that can operate on
+ * variables.
+ */
+typedef struct {
+   nir_instr instr;
+
+   nir_intrinsic_op intrinsic;
+
+   nir_dest dest;
+
+   /** number of components if this is a vectorized intrinsic
+    *
+    * Similarly to ALU operations, some intrinsics are vectorized.
+    * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
+    * For vectorized intrinsics, the num_components field specifies the
+    * number of destination components and the number of source components
+    * for all sources with nir_intrinsic_infos.src_components[i] == 0.
+    */
+   uint8_t num_components;
+
+   int const_index[3];
+
+   nir_deref_var *variables[2];
+
+   nir_src src[];
+} nir_intrinsic_instr;
+
+/**
+ * \name NIR intrinsics semantic flags
+ *
+ * information about what the compiler can do with the intrinsics.
+ *
+ * \sa nir_intrinsic_info::flags
+ */
+typedef enum {
+   /**
+    * whether the intrinsic can be safely eliminated if none of its output
+    * value is not being used.
+    */
+   NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
+
+   /**
+    * Whether the intrinsic can be reordered with respect to any other
+    * intrinsic, i.e. whether the only reordering dependencies of the
+    * intrinsic are due to the register reads/writes.
+    */
+   NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+} nir_intrinsic_semantic_flag;
+
+#define NIR_INTRINSIC_MAX_INPUTS 4
+
+typedef struct {
+   const char *name;
+
+   unsigned num_srcs; /** < number of register/SSA inputs */
+
+   /** number of components of each input register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
+   unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+
+   bool has_dest;
+
+   /** number of components of the output register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
+   unsigned dest_components;
+
+   /** the number of inputs/outputs that are variables */
+   unsigned num_variables;
+
+   /** the number of constant indices used by the intrinsic */
+   unsigned num_indices;
+
+   /** semantic flags for calls to this intrinsic */
+   nir_intrinsic_semantic_flag flags;
+} nir_intrinsic_info;
+
+extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+
+/**
+ * \group texture information
+ *
+ * This gives semantic information about textures which is useful to the
+ * frontend, the backend, and lowering passes, but not the optimizer.
+ */
+
+typedef enum {
+   nir_tex_src_coord,
+   nir_tex_src_projector,
+   nir_tex_src_comparitor, /* shadow comparitor */
+   nir_tex_src_offset,
+   nir_tex_src_bias,
+   nir_tex_src_lod,
+   nir_tex_src_ms_index, /* MSAA sample index */
+   nir_tex_src_ddx,
+   nir_tex_src_ddy,
+   nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
+   nir_num_tex_src_types
+} nir_tex_src_type;
+
+typedef struct {
+   nir_src src;
+   nir_tex_src_type src_type;
+} nir_tex_src;
+
+typedef enum {
+   nir_texop_tex,                /**< Regular texture look-up */
+   nir_texop_txb,                /**< Texture look-up with LOD bias */
+   nir_texop_txl,                /**< Texture look-up with explicit LOD */
+   nir_texop_txd,                /**< Texture look-up with partial derivatvies */
+   nir_texop_txf,                /**< Texel fetch with explicit LOD */
+   nir_texop_txf_ms,                /**< Multisample texture fetch */
+   nir_texop_txs,                /**< Texture size */
+   nir_texop_lod,                /**< Texture lod query */
+   nir_texop_tg4,                /**< Texture gather */
+   nir_texop_query_levels       /**< Texture levels query */
+} nir_texop;
+
+typedef struct {
+   nir_instr instr;
+
+   enum glsl_sampler_dim sampler_dim;
+   nir_alu_type dest_type;
+
+   nir_texop op;
+   nir_dest dest;
+   nir_tex_src *src;
+   unsigned num_srcs, coord_components;
+   bool is_array, is_shadow;
+
+   /**
+    * If is_shadow is true, whether this is the old-style shadow that outputs 4
+    * components or the new-style shadow that outputs 1 component.
+    */
+   bool is_new_style_shadow;
+
+   /* constant offset - must be 0 if the offset source is used */
+   int const_offset[4];
+
+   /* gather component selector */
+   unsigned component : 2;
+
+   /** The sampler index
+    *
+    * If this texture instruction has a nir_tex_src_sampler_offset source,
+    * then the sampler index is given by sampler_index + sampler_offset.
+    */
+   unsigned sampler_index;
+
+   /** The size of the sampler array or 0 if it's not an array */
+   unsigned sampler_array_size;
+
+   nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */
+} nir_tex_instr;
+
+static inline unsigned
+nir_tex_instr_dest_size(nir_tex_instr *instr)
+{
+   if (instr->op == nir_texop_txs) {
+      unsigned ret;
+      switch (instr->sampler_dim) {
+         case GLSL_SAMPLER_DIM_1D:
+         case GLSL_SAMPLER_DIM_BUF:
+            ret = 1;
+            break;
+         case GLSL_SAMPLER_DIM_2D:
+         case GLSL_SAMPLER_DIM_CUBE:
+         case GLSL_SAMPLER_DIM_MS:
+         case GLSL_SAMPLER_DIM_RECT:
+         case GLSL_SAMPLER_DIM_EXTERNAL:
+            ret = 2;
+            break;
+         case GLSL_SAMPLER_DIM_3D:
+            ret = 3;
+            break;
+         default:
+            unreachable("not reached");
+      }
+      if (instr->is_array)
+         ret++;
+      return ret;
+   }
+
+   if (instr->op == nir_texop_query_levels)
+      return 2;
+
+   if (instr->is_shadow && instr->is_new_style_shadow)
+      return 1;
+
+   return 4;
+}
+
+static inline unsigned
+nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
+{
+   if (instr->src[src].src_type == nir_tex_src_coord)
+      return instr->coord_components;
+
+
+   if (instr->src[src].src_type == nir_tex_src_offset ||
+       instr->src[src].src_type == nir_tex_src_ddx ||
+       instr->src[src].src_type == nir_tex_src_ddy) {
+      if (instr->is_array)
+         return instr->coord_components - 1;
+      else
+         return instr->coord_components;
+   }
+
+   return 1;
+}
+
+static inline int
+nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
+{
+   for (unsigned i = 0; i < instr->num_srcs; i++)
+      if (instr->src[i].src_type == type)
+         return (int) i;
+
+   return -1;
+}
+
+typedef struct {
+   union {
+      float f[4];
+      int32_t i[4];
+      uint32_t u[4];
+   };
+} nir_const_value;
+
+typedef struct {
+   nir_instr instr;
+
+   nir_const_value value;
+
+   nir_ssa_def def;
+} nir_load_const_instr;
+
+typedef enum {
+   nir_jump_return,
+   nir_jump_break,
+   nir_jump_continue,
+} nir_jump_type;
+
+typedef struct {
+   nir_instr instr;
+   nir_jump_type type;
+} nir_jump_instr;
+
+/* creates a new SSA variable in an undefined state */
+
+typedef struct {
+   nir_instr instr;
+   nir_ssa_def def;
+} nir_ssa_undef_instr;
+
+typedef struct {
+   struct exec_node node;
+
+   /* The predecessor block corresponding to this source */
+   struct nir_block *pred;
+
+   nir_src src;
+} nir_phi_src;
+
+#define nir_foreach_phi_src(phi, entry) \
+   foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs)
+
+typedef struct {
+   nir_instr instr;
+
+   struct exec_list srcs; /** < list of nir_phi_src */
+
+   nir_dest dest;
+} nir_phi_instr;
+
+typedef struct {
+   struct exec_node node;
+   nir_src src;
+   nir_dest dest;
+} nir_parallel_copy_entry;
+
+#define nir_foreach_parallel_copy_entry(pcopy, entry) \
+   foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
+
+typedef struct {
+   nir_instr instr;
+
+   /* A list of nir_parallel_copy_entry's.  The sources of all of the
+    * entries are copied to the corresponding destinations "in parallel".
+    * In other words, if we have two entries: a -> b and b -> a, the values
+    * get swapped.
+    */
+   struct exec_list entries;
+} nir_parallel_copy_instr;
+
+NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
+                nir_parallel_copy_instr, instr)
+
+/*
+ * Control flow
+ *
+ * Control flow consists of a tree of control flow nodes, which include
+ * if-statements and loops. The leaves of the tree are basic blocks, lists of
+ * instructions that always run start-to-finish. Each basic block also keeps
+ * track of its successors (blocks which may run immediately after the current
+ * block) and predecessors (blocks which could have run immediately before the
+ * current block). Each function also has a start block and an end block which
+ * all return statements point to (which is always empty). Together, all the
+ * blocks with their predecessors and successors make up the control flow
+ * graph (CFG) of the function. There are helpers that modify the tree of
+ * control flow nodes while modifying the CFG appropriately; these should be
+ * used instead of modifying the tree directly.
+ */
+
+typedef enum {
+   nir_cf_node_block,
+   nir_cf_node_if,
+   nir_cf_node_loop,
+   nir_cf_node_function
+} nir_cf_node_type;
+
+typedef struct nir_cf_node {
+   struct exec_node node;
+   nir_cf_node_type type;
+   struct nir_cf_node *parent;
+} nir_cf_node;
+
+typedef struct nir_block {
+   nir_cf_node cf_node;
+
+   struct exec_list instr_list; /** < list of nir_instr */
+
+   /** generic block index; generated by nir_index_blocks */
+   unsigned index;
+
+   /*
+    * Each block can only have up to 2 successors, so we put them in a simple
+    * array - no need for anything more complicated.
+    */
+   struct nir_block *successors[2];
+
+   /* Set of nir_block predecessors in the CFG */
+   struct set *predecessors;
+
+   /*
+    * this node's immediate dominator in the dominance tree - set to NULL for
+    * the start block.
+    */
+   struct nir_block *imm_dom;
+
+   /* This node's children in the dominance tree */
+   unsigned num_dom_children;
+   struct nir_block **dom_children;
+
+   /* Set of nir_block's on the dominance frontier of this block */
+   struct set *dom_frontier;
+
+   /*
+    * These two indices have the property that dom_{pre,post}_index for each
+    * child of this block in the dominance tree will always be between
+    * dom_pre_index and dom_post_index for this block, which makes testing if
+    * a given block is dominated by another block an O(1) operation.
+    */
+   unsigned dom_pre_index, dom_post_index;
+
+   /* live in and out for this block; used for liveness analysis */
+   BITSET_WORD *live_in;
+   BITSET_WORD *live_out;
+} nir_block;
+
+static inline nir_instr *
+nir_block_first_instr(nir_block *block)
+{
+   struct exec_node *head = exec_list_get_head(&block->instr_list);
+   return exec_node_data(nir_instr, head, node);
+}
+
+static inline nir_instr *
+nir_block_last_instr(nir_block *block)
+{
+   struct exec_node *tail = exec_list_get_tail(&block->instr_list);
+   return exec_node_data(nir_instr, tail, node);
+}
+
+#define nir_foreach_instr(block, instr) \
+   foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse(block, instr) \
+   foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_safe(block, instr) \
+   foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+
+typedef struct {
+   nir_cf_node cf_node;
+   nir_src condition;
+
+   struct exec_list then_list; /** < list of nir_cf_node */
+   struct exec_list else_list; /** < list of nir_cf_node */
+} nir_if;
+
+static inline nir_cf_node *
+nir_if_first_then_node(nir_if *if_stmt)
+{
+   struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
+   return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_then_node(nir_if *if_stmt)
+{
+   struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
+   return exec_node_data(nir_cf_node, tail, node);
+}
+
+static inline nir_cf_node *
+nir_if_first_else_node(nir_if *if_stmt)
+{
+   struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
+   return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_else_node(nir_if *if_stmt)
+{
+   struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
+   return exec_node_data(nir_cf_node, tail, node);
+}
+
+typedef struct {
+   nir_cf_node cf_node;
+
+   struct exec_list body; /** < list of nir_cf_node */
+} nir_loop;
+
+static inline nir_cf_node *
+nir_loop_first_cf_node(nir_loop *loop)
+{
+   return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node);
+}
+
+static inline nir_cf_node *
+nir_loop_last_cf_node(nir_loop *loop)
+{
+   return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node);
+}
+
+/**
+ * Various bits of metadata that can may be created or required by
+ * optimization and analysis passes
+ */
+typedef enum {
+   nir_metadata_none = 0x0,
+   nir_metadata_block_index = 0x1,
+   nir_metadata_dominance = 0x2,
+   nir_metadata_live_variables = 0x4,
+} nir_metadata;
+
+typedef struct {
+   nir_cf_node cf_node;
+
+   /** pointer to the overload of which this is an implementation */
+   struct nir_function_overload *overload;
+
+   struct exec_list body; /** < list of nir_cf_node */
+
+   nir_block *start_block, *end_block;
+
+   /** list for all local variables in the function */
+   struct exec_list locals;
+
+   /** array of variables used as parameters */
+   unsigned num_params;
+   nir_variable **params;
+
+   /** variable used to hold the result of the function */
+   nir_variable *return_var;
+
+   /** list of local registers in the function */
+   struct exec_list registers;
+
+   /** next available local register index */
+   unsigned reg_alloc;
+
+   /** next available SSA value index */
+   unsigned ssa_alloc;
+
+   /* total number of basic blocks, only valid when block_index_dirty = false */
+   unsigned num_blocks;
+
+   nir_metadata valid_metadata;
+} nir_function_impl;
+
+static inline nir_cf_node *
+nir_cf_node_next(nir_cf_node *node)
+{
+   struct exec_node *next = exec_node_get_next(&node->node);
+   if (exec_node_is_tail_sentinel(next))
+      return NULL;
+   else
+      return exec_node_data(nir_cf_node, next, node);
+}
+
+static inline nir_cf_node *
+nir_cf_node_prev(nir_cf_node *node)
+{
+   struct exec_node *prev = exec_node_get_prev(&node->node);
+   if (exec_node_is_head_sentinel(prev))
+      return NULL;
+   else
+      return exec_node_data(nir_cf_node, prev, node);
+}
+
+static inline bool
+nir_cf_node_is_first(const nir_cf_node *node)
+{
+   return exec_node_is_head_sentinel(node->node.prev);
+}
+
+static inline bool
+nir_cf_node_is_last(const nir_cf_node *node)
+{
+   return exec_node_is_tail_sentinel(node->node.next);
+}
+
+NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node)
+
+typedef enum {
+   nir_parameter_in,
+   nir_parameter_out,
+   nir_parameter_inout,
+} nir_parameter_type;
+
+typedef struct {
+   nir_parameter_type param_type;
+   const struct glsl_type *type;
+} nir_parameter;
+
+typedef struct nir_function_overload {
+   struct exec_node node;
+
+   unsigned num_params;
+   nir_parameter *params;
+   const struct glsl_type *return_type;
+
+   nir_function_impl *impl; /** < NULL if the overload is only declared yet */
+
+   /** pointer to the function of which this is an overload */
+   struct nir_function *function;
+} nir_function_overload;
+
+typedef struct nir_function {
+   struct exec_node node;
+
+   struct exec_list overload_list; /** < list of nir_function_overload */
+   const char *name;
+   struct nir_shader *shader;
+} nir_function;
+
+#define nir_function_first_overload(func) \
+   exec_node_data(nir_function_overload, \
+                  exec_list_get_head(&(func)->overload_list), node)
+
+typedef struct nir_shader_compiler_options {
+   bool lower_ffma;
+   bool lower_fpow;
+   bool lower_fsat;
+   bool lower_fsqrt;
+   /** lowers fneg and ineg to fsub and isub. */
+   bool lower_negate;
+} nir_shader_compiler_options;
+
+typedef struct nir_shader {
+   /** hash table of name -> uniform nir_variable */
+   struct hash_table *uniforms;
+
+   /** hash table of name -> input nir_variable */
+   struct hash_table *inputs;
+
+   /** hash table of name -> output nir_variable */
+   struct hash_table *outputs;
+
+   /** Set of driver-specific options for the shader.
+    *
+    * The memory for the options is expected to be kept in a single static
+    * copy by the driver.
+    */
+   const struct nir_shader_compiler_options *options;
+
+   /** list of global variables in the shader */
+   struct exec_list globals;
+
+   /** list of system value variables in the shader */
+   struct exec_list system_values;
+
+   struct exec_list functions; /** < list of nir_function */
+
+   /** list of global register in the shader */
+   struct exec_list registers;
+
+   /** structures used in this shader */
+   unsigned num_user_structures;
+   struct glsl_type **user_structures;
+
+   /** next available global register index */
+   unsigned reg_alloc;
+
+   /**
+    * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
+    * access plus one
+    */
+   unsigned num_inputs, num_uniforms, num_outputs;
+} nir_shader;
+
+#define nir_foreach_overload(shader, overload)                        \
+   foreach_list_typed(nir_function, func, node, &(shader)->functions) \
+      foreach_list_typed(nir_function_overload, overload, node, \
+                         &(func)->overload_list)
+
+nir_shader *nir_shader_create(void *mem_ctx,
+                              const nir_shader_compiler_options *options);
+
+/** creates a register, including assigning it an index and adding it to the list */
+nir_register *nir_global_reg_create(nir_shader *shader);
+
+nir_register *nir_local_reg_create(nir_function_impl *impl);
+
+void nir_reg_remove(nir_register *reg);
+
+/** creates a function and adds it to the shader's list of functions */
+nir_function *nir_function_create(nir_shader *shader, const char *name);
+
+/** creates a null function returning null */
+nir_function_overload *nir_function_overload_create(nir_function *func);
+
+nir_function_impl *nir_function_impl_create(nir_function_overload *func);
+
+nir_block *nir_block_create(void *mem_ctx);
+nir_if *nir_if_create(void *mem_ctx);
+nir_loop *nir_loop_create(void *mem_ctx);
+
+nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
+
+/** puts a control flow node immediately after another control flow node */
+void nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after);
+
+/** puts a control flow node immediately before another control flow node */
+void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before);
+
+/** puts a control flow node at the beginning of a list from an if, loop, or function */
+void nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node);
+
+/** puts a control flow node at the end of a list from an if, loop, or function */
+void nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node);
+
+/** removes a control flow node, doing any cleanup necessary */
+void nir_cf_node_remove(nir_cf_node *node);
+
+/** requests that the given pieces of metadata be generated */
+void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
+/** dirties all but the preserved metadata */
+void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+
+/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
+nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+
+nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+
+nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+                                                  unsigned num_components);
+
+nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+                                                nir_intrinsic_op op);
+
+nir_call_instr *nir_call_instr_create(void *mem_ctx,
+                                      nir_function_overload *callee);
+
+nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+
+nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+                                                unsigned num_components);
+
+nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
+nir_deref_array *nir_deref_array_create(void *mem_ctx);
+nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
+
+nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+
+void nir_instr_insert_before(nir_instr *instr, nir_instr *before);
+void nir_instr_insert_after(nir_instr *instr, nir_instr *after);
+
+void nir_instr_insert_before_block(nir_block *block, nir_instr *before);
+void nir_instr_insert_after_block(nir_block *block, nir_instr *after);
+
+void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before);
+void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after);
+
+void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before);
+void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after);
+
+void nir_instr_remove(nir_instr *instr);
+
+typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
+typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
+typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
+bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
+                         void *state);
+bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
+bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+
+nir_const_value *nir_src_as_const_value(nir_src src);
+bool nir_srcs_equal(nir_src src1, nir_src src2);
+void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+
+void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                       unsigned num_components, const char *name);
+void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                      unsigned num_components, const char *name);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+
+/* visits basic blocks in source-code order */
+typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
+bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
+                       void *state);
+bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                               void *state);
+
+/* If the following CF node is an if, this function returns that if.
+ * Otherwise, it returns NULL.
+ */
+nir_if *nir_block_get_following_if(nir_block *block);
+
+void nir_index_local_regs(nir_function_impl *impl);
+void nir_index_global_regs(nir_shader *shader);
+void nir_index_ssa_defs(nir_function_impl *impl);
+
+void nir_index_blocks(nir_function_impl *impl);
+
+void nir_print_shader(nir_shader *shader, FILE *fp);
+void nir_print_instr(const nir_instr *instr, FILE *fp);
+
+#ifdef DEBUG
+void nir_validate_shader(nir_shader *shader);
+#else
+static inline void nir_validate_shader(nir_shader *shader) { }
+#endif /* DEBUG */
+
+void nir_calc_dominance_impl(nir_function_impl *impl);
+void nir_calc_dominance(nir_shader *shader);
+
+nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+bool nir_block_dominates(nir_block *parent, nir_block *child);
+
+void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
+
+void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
+
+void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_cfg(nir_shader *shader, FILE *fp);
+
+void nir_split_var_copies(nir_shader *shader);
+
+void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
+void nir_lower_var_copies(nir_shader *shader);
+
+void nir_lower_global_vars_to_local(nir_shader *shader);
+
+void nir_lower_locals_to_regs(nir_shader *shader);
+
+void nir_lower_io(nir_shader *shader);
+
+void nir_lower_vars_to_ssa(nir_shader *shader);
+
+void nir_remove_dead_variables(nir_shader *shader);
+
+void nir_lower_vec_to_movs(nir_shader *shader);
+void nir_lower_alu_to_scalar(nir_shader *shader);
+
+void nir_lower_phis_to_scalar(nir_shader *shader);
+
+void nir_lower_samplers(nir_shader *shader,
+                        struct gl_shader_program *shader_program,
+                        struct gl_program *prog);
+
+void nir_lower_system_values(nir_shader *shader);
+
+void nir_lower_atomics(nir_shader *shader);
+void nir_lower_to_source_mods(nir_shader *shader);
+
+void nir_live_variables_impl(nir_function_impl *impl);
+bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
+
+void nir_convert_to_ssa_impl(nir_function_impl *impl);
+void nir_convert_to_ssa(nir_shader *shader);
+void nir_convert_from_ssa(nir_shader *shader);
+
+bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_constant_folding(nir_shader *shader);
+
+bool nir_opt_global_to_local(nir_shader *shader);
+
+bool nir_copy_prop_impl(nir_function_impl *impl);
+bool nir_copy_prop(nir_shader *shader);
+
+bool nir_opt_cse(nir_shader *shader);
+
+bool nir_opt_dce_impl(nir_function_impl *impl);
+bool nir_opt_dce(nir_shader *shader);
+
+void nir_opt_gcm(nir_shader *shader);
+
+bool nir_opt_peephole_select(nir_shader *shader);
+bool nir_opt_peephole_ffma(nir_shader *shader);
+
+bool nir_opt_remove_phis(nir_shader *shader);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
new file mode 100644
index 000000000..afab1a008
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -0,0 +1,300 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Jason Ekstrand (jason@jlekstrand.net)
+
+import itertools
+import struct
+import sys
+import mako.template
+import re
+
+# Represents a set of variables, each with a unique id
+class VarSet(object):
+   def __init__(self):
+      self.names = {}
+      self.ids = itertools.count()
+      self.immutable = False;
+
+   def __getitem__(self, name):
+      if name not in self.names:
+         assert not self.immutable, "Unknown replacement variable: " + name
+         self.names[name] = self.ids.next()
+
+      return self.names[name]
+
+   def lock(self):
+      self.immutable = True
+
+class Value(object):
+   @staticmethod
+   def create(val, name_base, varset):
+      if isinstance(val, tuple):
+         return Expression(val, name_base, varset)
+      elif isinstance(val, Expression):
+         return val
+      elif isinstance(val, (str, unicode)):
+         return Variable(val, name_base, varset)
+      elif isinstance(val, (bool, int, long, float)):
+         return Constant(val, name_base)
+
+   __template = mako.template.Template("""
+static const ${val.c_type} ${val.name} = {
+   { ${val.type_enum} },
+% if isinstance(val, Constant):
+   { ${hex(val)} /* ${val.value} */ },
+% elif isinstance(val, Variable):
+   ${val.index}, /* ${val.var_name} */
+   ${'true' if val.is_constant else 'false'},
+   nir_type_${ val.required_type or 'invalid' },
+% elif isinstance(val, Expression):
+   nir_op_${val.opcode},
+   { ${', '.join(src.c_ptr for src in val.sources)} },
+% endif
+};""")
+
+   def __init__(self, name, type_str):
+      self.name = name
+      self.type_str = type_str
+
+   @property
+   def type_enum(self):
+      return "nir_search_value_" + self.type_str
+
+   @property
+   def c_type(self):
+      return "nir_search_" + self.type_str
+
+   @property
+   def c_ptr(self):
+      return "&{0}.value".format(self.name)
+
+   def render(self):
+      return self.__template.render(val=self,
+                                    Constant=Constant,
+                                    Variable=Variable,
+                                    Expression=Expression)
+
+class Constant(Value):
+   def __init__(self, val, name):
+      Value.__init__(self, name, "constant")
+      self.value = val
+
+   def __hex__(self):
+      # Even if it's an integer, we still need to unpack as an unsigned
+      # int.  This is because, without C99, we can only assign to the first
+      # element of a union in an initializer.
+      if isinstance(self.value, (bool)):
+         return 'NIR_TRUE' if self.value else 'NIR_FALSE'
+      if isinstance(self.value, (int, long)):
+         return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+      elif isinstance(self.value, float):
+         return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+      else:
+         assert False
+
+_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
+
+class Variable(Value):
+   def __init__(self, val, name, varset):
+      Value.__init__(self, name, "variable")
+
+      m = _var_name_re.match(val)
+      assert m and m.group('name') is not None
+
+      self.var_name = m.group('name')
+      self.is_constant = m.group('const') is not None
+      self.required_type = m.group('type')
+
+      if self.required_type is not None:
+         assert self.required_type in ('float', 'bool', 'int', 'unsigned')
+
+      self.index = varset[self.var_name]
+
+class Expression(Value):
+   def __init__(self, expr, name_base, varset):
+      Value.__init__(self, name_base, "expression")
+      assert isinstance(expr, tuple)
+
+      self.opcode = expr[0]
+      self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
+                       for (i, src) in enumerate(expr[1:]) ]
+
+   def render(self):
+      srcs = "\n".join(src.render() for src in self.sources)
+      return srcs + super(Expression, self).render()
+
+_optimization_ids = itertools.count()
+
+condition_list = ['true']
+
+class SearchAndReplace(object):
+   def __init__(self, transform):
+      self.id = _optimization_ids.next()
+
+      search = transform[0]
+      replace = transform[1]
+      if len(transform) > 2:
+         self.condition = transform[2]
+      else:
+         self.condition = 'true'
+
+      if self.condition not in condition_list:
+         condition_list.append(self.condition)
+      self.condition_index = condition_list.index(self.condition)
+
+      varset = VarSet()
+      if isinstance(search, Expression):
+         self.search = search
+      else:
+         self.search = Expression(search, "search{0}".format(self.id), varset)
+
+      varset.lock()
+
+      if isinstance(replace, Value):
+         self.replace = replace
+      else:
+         self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
+
+_algebraic_pass_template = mako.template.Template("""
+#include "nir.h"
+#include "nir_search.h"
+
+struct transform {
+   const nir_search_expression *search;
+   const nir_search_value *replace;
+   unsigned condition_offset;
+};
+
+% for (opcode, xform_list) in xform_dict.iteritems():
+% for xform in xform_list:
+   ${xform.search.render()}
+   ${xform.replace.render()}
+% endfor
+
+static const struct transform ${pass_name}_${opcode}_xforms[] = {
+% for xform in xform_list:
+   { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
+% endfor
+};
+% endfor
+
+struct opt_state {
+   void *mem_ctx;
+   bool progress;
+   const bool *condition_flags;
+};
+
+static bool
+${pass_name}_block(nir_block *block, void *void_state)
+{
+   struct opt_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (!alu->dest.dest.is_ssa)
+         continue;
+
+      switch (alu->op) {
+      % for opcode in xform_dict.keys():
+      case nir_op_${opcode}:
+         for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
+            const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
+            if (state->condition_flags[xform->condition_offset] &&
+                nir_replace_instr(alu, xform->search, xform->replace,
+                                  state->mem_ctx)) {
+               state->progress = true;
+               break;
+            }
+         }
+         break;
+      % endfor
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static bool
+${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
+{
+   struct opt_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+   state.condition_flags = condition_flags;
+
+   nir_foreach_block(impl, ${pass_name}_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+
+bool
+${pass_name}(nir_shader *shader)
+{
+   bool progress = false;
+   bool condition_flags[${len(condition_list)}];
+   const nir_shader_compiler_options *options = shader->options;
+
+   % for index, condition in enumerate(condition_list):
+   condition_flags[${index}] = ${condition};
+   % endfor
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= ${pass_name}_impl(overload->impl, condition_flags);
+   }
+
+   return progress;
+}
+""")
+
+class AlgebraicPass(object):
+   def __init__(self, pass_name, transforms):
+      self.xform_dict = {}
+      self.pass_name = pass_name
+
+      for xform in transforms:
+         if not isinstance(xform, SearchAndReplace):
+            xform = SearchAndReplace(xform)
+
+         if xform.search.opcode not in self.xform_dict:
+            self.xform_dict[xform.search.opcode] = []
+
+         self.xform_dict[xform.search.opcode].append(xform)
+
+   def render(self):
+      return _algebraic_pass_template.render(pass_name=self.pass_name,
+                                             xform_dict=self.xform_dict,
+                                             condition_list=condition_list)
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
new file mode 100644
index 000000000..7c4f7fd96
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef NIR_BUILDER_H
+#define NIR_BUILDER_H
+
+struct exec_list;
+
+typedef struct nir_builder {
+   struct exec_list *cf_node_list;
+   nir_shader *shader;
+   nir_function_impl *impl;
+} nir_builder;
+
+static inline void
+nir_builder_init(nir_builder *build, nir_function_impl *impl)
+{
+   memset(build, 0, sizeof(*build));
+   build->impl = impl;
+   build->shader = impl->overload->function->shader;
+}
+
+static inline void
+nir_builder_insert_after_cf_list(nir_builder *build,
+                                 struct exec_list *cf_node_list)
+{
+   build->cf_node_list = cf_node_list;
+}
+
+
+static inline nir_ssa_def *
+nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
+              nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
+{
+   const nir_op_info *op_info = &nir_op_infos[op];
+   nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
+   if (!instr)
+      return NULL;
+
+   instr->src[0].src = nir_src_for_ssa(src0);
+   if (src1)
+      instr->src[1].src = nir_src_for_ssa(src1);
+   if (src2)
+      instr->src[2].src = nir_src_for_ssa(src2);
+   if (src3)
+      instr->src[3].src = nir_src_for_ssa(src3);
+
+   /* Guess the number of components the destination temporary should have
+    * based on our input sizes, if it's not fixed for the op.
+    */
+   unsigned num_components = op_info->output_size;
+   if (num_components == 0) {
+      for (unsigned i = 0; i < op_info->num_inputs; i++) {
+         if (op_info->input_sizes[i] == 0)
+            num_components = MAX2(num_components,
+                                  instr->src[i].src.ssa->num_components);
+      }
+   }
+   assert(num_components != 0);
+
+   /* Make sure we don't swizzle from outside of our source vector (like if a
+    * scalar value was passed into a multiply with a vector).
+    */
+   for (unsigned i = 0; i < op_info->num_inputs; i++) {
+      for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) {
+         instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
+      }
+   }
+
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   instr->dest.write_mask = (1 << num_components) - 1;
+
+   nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+
+   return &instr->dest.dest.ssa;
+}
+
+#define ALU1(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0)                           \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL);      \
+}
+
+#define ALU2(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)        \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL);      \
+}
+
+#define ALU3(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+         nir_ssa_def *src1, nir_ssa_def *src2)                            \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL);      \
+}
+
+#define ALU4(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+         nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)         \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3);      \
+}
+
+#include "nir_builder_opcodes.h"
+
+#endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_builder_opcodes_h.py b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py
new file mode 100644
index 000000000..e27206ea8
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py
@@ -0,0 +1,38 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _NIR_BUILDER_OPCODES_
+#define _NIR_BUILDER_OPCODES_
+
+% for name, opcode in sorted(opcodes.iteritems()):
+ALU${opcode.num_inputs}(${name});
+% endfor
+
+#endif /* _NIR_BUILDER_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.h b/mesalib/src/glsl/nir/nir_constant_expressions.h
new file mode 100644
index 000000000..97997f2e5
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_constant_expressions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
+                                      nir_const_value *src);
diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.py b/mesalib/src/glsl/nir/nir_constant_expressions.py
new file mode 100644
index 000000000..22bc4f095
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_constant_expressions.py
@@ -0,0 +1,352 @@
+#! /usr/bin/python2
+template = """\
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include <math.h>
+#include "main/core.h"
+#include "nir_constant_expressions.h"
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+static int isnormal(double x)
+{
+   return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
+}
+#elif defined(__SUNPRO_CC)
+#include <ieeefp.h>
+static int isnormal(double x)
+{
+   return fpclass(x) == FP_NORMAL;
+}
+#endif
+
+#if defined(_MSC_VER)
+static double copysign(double x, double y)
+{
+   return _copysign(x, y);
+}
+#endif
+
+/**
+ * Evaluate one component of packSnorm4x8.
+ */
+static uint8_t
+pack_snorm_1x8(float x)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    packSnorm4x8
+     *    ------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+     *
+     * We must first cast the float to an int, because casting a negative
+     * float to a uint is undefined.
+     */
+   return (uint8_t) (int8_t)
+          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packSnorm2x16
+     *    -------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+     *
+     * We must first cast the float to an int, because casting a negative
+     * float to a uint is undefined.
+     */
+   return (uint16_t) (int16_t)
+          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm4x8.
+ */
+static float
+unpack_snorm_1x8(uint8_t u)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    unpackSnorm4x8
+     *    --------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+     */
+   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static float
+unpack_snorm_1x16(uint16_t u)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackSnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
+     */
+   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm4x8.
+ */
+static uint8_t
+pack_unorm_1x8(float x)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    packUnorm4x8
+     *    ------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+     */
+   return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packUnorm2x16
+     *    -------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+     */
+   return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+/**
+ * Evaluate one component of unpackUnorm4x8.
+ */
+static float
+unpack_unorm_1x8(uint8_t u)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    unpackUnorm4x8
+     *    --------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackUnorm4x8: f / 255.0
+     */
+   return (float) u / 255.0f;
+}
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static float
+unpack_unorm_1x16(uint16_t u)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackUnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackUnorm2x16: f / 65535.0
+     */
+   return (float) u / 65535.0f;
+}
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+   return _mesa_float_to_half(x);
+}
+
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static float
+unpack_half_1x16(uint16_t u)
+{
+   return _mesa_half_to_float(u);
+}
+
+/* Some typed vector structures to make things like src0.y work */
+% for type in ["float", "int", "unsigned", "bool"]:
+struct ${type}_vec {
+   ${type} x;
+   ${type} y;
+   ${type} z;
+   ${type} w;
+};
+% endfor
+
+% for name, op in sorted(opcodes.iteritems()):
+static nir_const_value
+evaluate_${name}(unsigned num_components, nir_const_value *_src)
+{
+   nir_const_value _dst_val = { { {0, 0, 0, 0} } };
+
+   ## For each non-per-component input, create a variable srcN that
+   ## contains x, y, z, and w elements which are filled in with the
+   ## appropriately-typed values.
+   % for j in range(op.num_inputs):
+      % if op.input_sizes[j] == 0:
+         <% continue %>
+      % elif "src" + str(j) not in op.const_expr:
+         ## Avoid unused variable warnings
+         <% continue %>
+      %endif
+
+      struct ${op.input_types[j]}_vec src${j} = {
+      % for k in range(op.input_sizes[j]):
+         % if op.input_types[j] == "bool":
+            _src[${j}].u[${k}] != 0,
+         % else:
+            _src[${j}].${op.input_types[j][:1]}[${k}],
+         % endif
+      % endfor
+      };
+   % endfor
+
+   % if op.output_size == 0:
+      ## For per-component instructions, we need to iterate over the
+      ## components and apply the constant expression one component
+      ## at a time.
+      for (unsigned _i = 0; _i < num_components; _i++) {
+         ## For each per-component input, create a variable srcN that
+         ## contains the value of the current (_i'th) component.
+         % for j in range(op.num_inputs):
+            % if op.input_sizes[j] != 0:
+               <% continue %>
+            % elif "src" + str(j) not in op.const_expr:
+               ## Avoid unused variable warnings
+               <% continue %>
+            % elif op.input_types[j] == "bool":
+               bool src${j} = _src[${j}].u[_i] != 0;
+            % else:
+               ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+            % endif
+         % endfor
+
+         ## Create an appropriately-typed variable dst and assign the
+         ## result of the const_expr to it.  If const_expr already contains
+         ## writes to dst, just include const_expr directly.
+         % if "dst" in op.const_expr:
+            ${op.output_type} dst;
+            ${op.const_expr}
+         % else:
+            ${op.output_type} dst = ${op.const_expr};
+         % endif
+
+         ## Store the current component of the actual destination to the
+         ## value of dst.
+         % if op.output_type == "bool":
+            ## Sanitize the C value to a proper NIR bool
+            _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
+         % else:
+            _dst_val.${op.output_type[:1]}[_i] = dst;
+         % endif
+      }
+   % else:
+      ## In the non-per-component case, create a struct dst with
+      ## appropriately-typed elements x, y, z, and w and assign the result
+      ## of the const_expr to all components of dst, or include the
+      ## const_expr directly if it writes to dst already.
+      struct ${op.output_type}_vec dst;
+
+      % if "dst" in op.const_expr:
+         ${op.const_expr}
+      % else:
+         ## Splat the value to all components.  This way expressions which
+         ## write the same value to all components don't need to explicitly
+         ## write to dest.  One such example is fnoise which has a
+         ## const_expr of 0.0f.
+         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+      % endif
+
+      ## For each component in the destination, copy the value of dst to
+      ## the actual destination.
+      % for k in range(op.output_size):
+         % if op.output_type == "bool":
+            ## Sanitize the C value to a proper NIR bool
+            _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+         % else:
+            _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
+         % endif
+      % endfor
+   % endif
+
+   return _dst_val;
+}
+% endfor
+
+nir_const_value
+nir_eval_const_opcode(nir_op op, unsigned num_components,
+                      nir_const_value *src)
+{
+   switch (op) {
+% for name in sorted(opcodes.iterkeys()):
+   case nir_op_${name}: {
+      return evaluate_${name}(num_components, src);
+      break;
+   }
+% endfor
+   default:
+      unreachable("shouldn't get here");
+   }
+}"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_dominance.c b/mesalib/src/glsl/nir/nir_dominance.c
new file mode 100644
index 000000000..2f50db1c1
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_dominance.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements the algorithms for computing the dominance tree and the
+ * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
+ * Harvey, and Kennedy.
+ */
+
+typedef struct {
+   nir_function_impl *impl;
+   bool progress;
+} dom_state;
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+   dom_state *state = (dom_state *) _state;
+   if (block == state->impl->start_block)
+      block->imm_dom = block;
+   else
+      block->imm_dom = NULL;
+   block->num_dom_children = 0;
+
+   struct set_entry *entry;
+   set_foreach(block->dom_frontier, entry) {
+      _mesa_set_remove(block->dom_frontier, entry);
+   }
+
+   return true;
+}
+
+static nir_block *
+intersect(nir_block *b1, nir_block *b2)
+{
+   while (b1 != b2) {
+      /*
+       * Note, the comparisons here are the opposite of what the paper says
+       * because we index blocks from beginning -> end (i.e. reverse
+       * post-order) instead of post-order like they assume.
+       */
+      while (b1->index > b2->index)
+         b1 = b1->imm_dom;
+      while (b2->index > b1->index)
+         b2 = b2->imm_dom;
+   }
+
+   return b1;
+}
+
+static bool
+calc_dominance_cb(nir_block *block, void *_state)
+{
+   dom_state *state = (dom_state *) _state;
+   if (block == state->impl->start_block)
+      return true;
+
+   nir_block *new_idom = NULL;
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      if (pred->imm_dom) {
+         if (new_idom)
+            new_idom = intersect(pred, new_idom);
+         else
+            new_idom = pred;
+      }
+   }
+
+   assert(new_idom);
+   if (block->imm_dom != new_idom) {
+      block->imm_dom = new_idom;
+      state->progress = true;
+   }
+
+   return true;
+}
+
+static bool
+calc_dom_frontier_cb(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->predecessors->entries > 1) {
+      struct set_entry *entry;
+      set_foreach(block->predecessors, entry) {
+         nir_block *runner = (nir_block *) entry->key;
+         while (runner != block->imm_dom) {
+            _mesa_set_add(runner->dom_frontier, block);
+            runner = runner->imm_dom;
+         }
+      }
+   }
+
+   return true;
+}
+
+/*
+ * Compute each node's children in the dominance tree from the immediate
+ * dominator information. We do this in three stages:
+ *
+ * 1. Calculate the number of children each node has
+ * 2. Allocate arrays, setting the number of children to 0 again
+ * 3. For each node, add itself to its parent's list of children, using
+ *    num_dom_children as an index - at the end of this step, num_dom_children
+ *    for each node will be the same as it was at the end of step #1.
+ */
+
+static bool
+block_count_children(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->imm_dom)
+      block->imm_dom->num_dom_children++;
+
+   return true;
+}
+
+static bool
+block_alloc_children(nir_block *block, void *state)
+{
+   void *mem_ctx = state;
+
+   block->dom_children = ralloc_array(mem_ctx, nir_block *,
+                                      block->num_dom_children);
+   block->num_dom_children = 0;
+
+   return true;
+}
+
+static bool
+block_add_child(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->imm_dom)
+      block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block;
+
+   return true;
+}
+
+static void
+calc_dom_children(nir_function_impl* impl)
+{
+   void *mem_ctx = ralloc_parent(impl);
+
+   nir_foreach_block(impl, block_count_children, NULL);
+   nir_foreach_block(impl, block_alloc_children, mem_ctx);
+   nir_foreach_block(impl, block_add_child, NULL);
+}
+
+static void
+calc_dfs_indicies(nir_block *block, unsigned *index)
+{
+   block->dom_pre_index = (*index)++;
+
+   for (unsigned i = 0; i < block->num_dom_children; i++)
+      calc_dfs_indicies(block->dom_children[i], index);
+
+   block->dom_post_index = (*index)++;
+}
+
+void
+nir_calc_dominance_impl(nir_function_impl *impl)
+{
+   if (impl->valid_metadata & nir_metadata_dominance)
+      return;
+
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   dom_state state;
+   state.impl = impl;
+   state.progress = true;
+
+   nir_foreach_block(impl, init_block_cb, &state);
+
+   while (state.progress) {
+      state.progress = false;
+      nir_foreach_block(impl, calc_dominance_cb, &state);
+   }
+
+   nir_foreach_block(impl, calc_dom_frontier_cb, &state);
+
+   impl->start_block->imm_dom = NULL;
+
+   calc_dom_children(impl);
+
+   unsigned dfs_index = 0;
+   calc_dfs_indicies(impl->start_block, &dfs_index);
+}
+
+void
+nir_calc_dominance(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_calc_dominance_impl(overload->impl);
+   }
+}
+
+/**
+ * Computes the least common anscestor of two blocks.  If one of the blocks
+ * is null, the other block is returned.
+ */
+nir_block *
+nir_dominance_lca(nir_block *b1, nir_block *b2)
+{
+   if (b1 == NULL)
+      return b2;
+
+   if (b2 == NULL)
+      return b1;
+
+   assert(nir_cf_node_get_function(&b1->cf_node) ==
+          nir_cf_node_get_function(&b2->cf_node));
+
+   assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+          nir_metadata_dominance);
+
+   return intersect(b1, b2);
+}
+
+/**
+ * Returns true if parent dominates child
+ */
+bool
+nir_block_dominates(nir_block *parent, nir_block *child)
+{
+   assert(nir_cf_node_get_function(&parent->cf_node) ==
+          nir_cf_node_get_function(&child->cf_node));
+
+   assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+          nir_metadata_dominance);
+
+   return child->dom_pre_index >= parent->dom_pre_index &&
+          child->dom_post_index <= parent->dom_post_index;
+}
+
+static bool
+dump_block_dom(nir_block *block, void *state)
+{
+   FILE *fp = state;
+   if (block->imm_dom)
+      fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
+   return true;
+}
+
+void
+nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
+{
+   fprintf(fp, "digraph doms_%s {\n", impl->overload->function->name);
+   nir_foreach_block(impl, dump_block_dom, fp);
+   fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_dom_tree(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_dom_tree_impl(overload->impl, fp);
+   }
+}
+
+static bool
+dump_block_dom_frontier(nir_block *block, void *state)
+{
+   FILE *fp = state;
+
+   fprintf(fp, "DF(%u) = {", block->index);
+   struct set_entry *entry;
+   set_foreach(block->dom_frontier, entry) {
+      nir_block *df = (nir_block *) entry->key;
+      fprintf(fp, "%u, ", df->index);
+   }
+   fprintf(fp, "}\n");
+   return true;
+}
+
+void
+nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
+{
+   nir_foreach_block(impl, dump_block_dom_frontier, fp);
+}
+
+void
+nir_dump_dom_frontier(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_dom_frontier_impl(overload->impl, fp);
+   }
+}
+
+static bool
+dump_block_succs(nir_block *block, void *state)
+{
+   FILE *fp = state;
+   if (block->successors[0])
+      fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
+   if (block->successors[1])
+      fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
+   return true;
+}
+
+void
+nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
+{
+   fprintf(fp, "digraph cfg_%s {\n", impl->overload->function->name);
+   nir_foreach_block(impl, dump_block_succs, fp);
+   fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_cfg(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_cfg_impl(overload->impl, fp);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
new file mode 100644
index 000000000..7c5009577
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This file implements an out-of-SSA pass as described in "Revisiting
+ * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+
+struct from_ssa_state {
+   void *mem_ctx;
+   void *dead_ctx;
+   struct hash_table *ssa_table;
+   struct hash_table *merge_node_table;
+   nir_instr *instr;
+   nir_function_impl *impl;
+};
+
+/* Returns true if a dominates b */
+static bool
+ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
+{
+   if (a->live_index == 0) {
+      /* SSA undefs always dominate */
+      return true;
+   } else if (b->live_index < a->live_index) {
+      return false;
+   } else if (a->parent_instr->block == b->parent_instr->block) {
+      return a->live_index <= b->live_index;
+   } else {
+      return nir_block_dominates(a->parent_instr->block,
+                                 b->parent_instr->block);
+   }
+}
+
+
+/* The following data structure, which I have named merge_set is a way of
+ * representing a set registers of non-interfering registers.  This is
+ * based on the concept of a "dominence forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * implementation concept is taken from  "Revisiting Out-of-SSA Translation
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ *
+ * Each SSA definition is associated with a merge_node and the association
+ * is represented by a combination of a hash table and the "def" parameter
+ * in the merge_node structure.  The merge_set stores a linked list of
+ * merge_node's in dominence order of the ssa definitions.  (Since the
+ * liveness analysis pass indexes the SSA values in dominence order for us,
+ * this is an easy thing to keep up.)  It is assumed that no pair of the
+ * nodes in a given set interfere.  Merging two sets or checking for
+ * interference can be done in a single linear-time merge-sort walk of the
+ * two lists of nodes.
+ */
+struct merge_set;
+
+typedef struct {
+   struct exec_node node;
+   struct merge_set *set;
+   nir_ssa_def *def;
+} merge_node;
+
+typedef struct merge_set {
+   struct exec_list nodes;
+   unsigned size;
+   nir_register *reg;
+} merge_set;
+
+#if 0
+static void
+merge_set_dump(merge_set *set, FILE *fp)
+{
+   nir_ssa_def *dom[set->size];
+   int dom_idx = -1;
+
+   foreach_list_typed(merge_node, node, node, &set->nodes) {
+      while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def))
+         dom_idx--;
+
+      for (int i = 0; i <= dom_idx; i++)
+         fprintf(fp, "  ");
+
+      if (node->def->name)
+         fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name);
+      else
+         fprintf(fp, "ssa_%d\n", node->def->index);
+
+      dom[++dom_idx] = node->def;
+   }
+}
+#endif
+
+static merge_node *
+get_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->merge_node_table, def);
+   if (entry)
+      return entry->data;
+
+   merge_set *set = ralloc(state->dead_ctx, merge_set);
+   exec_list_make_empty(&set->nodes);
+   set->size = 1;
+   set->reg = NULL;
+
+   merge_node *node = ralloc(state->dead_ctx, merge_node);
+   node->set = set;
+   node->def = def;
+   exec_list_push_head(&set->nodes, &node->node);
+
+   _mesa_hash_table_insert(state->merge_node_table, def, node);
+
+   return node;
+}
+
+static bool
+merge_nodes_interfere(merge_node *a, merge_node *b)
+{
+   return nir_ssa_defs_interfere(a->def, b->def);
+}
+
+/* Merges b into a */
+static merge_set *
+merge_merge_sets(merge_set *a, merge_set *b)
+{
+   struct exec_node *an = exec_list_get_head(&a->nodes);
+   struct exec_node *bn = exec_list_get_head(&b->nodes);
+   while (!exec_node_is_tail_sentinel(bn)) {
+      merge_node *a_node = exec_node_data(merge_node, an, node);
+      merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+      if (exec_node_is_tail_sentinel(an) ||
+          a_node->def->live_index > b_node->def->live_index) {
+         struct exec_node *next = bn->next;
+         exec_node_remove(bn);
+         exec_node_insert_node_before(an, bn);
+         exec_node_data(merge_node, bn, node)->set = a;
+         bn = next;
+      } else {
+         an = an->next;
+      }
+   }
+
+   a->size += b->size;
+   b->size = 0;
+
+   return a;
+}
+
+/* Checks for any interference between two merge sets
+ *
+ * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
+ * Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+static bool
+merge_sets_interfere(merge_set *a, merge_set *b)
+{
+   merge_node *dom[a->size + b->size];
+   int dom_idx = -1;
+
+   struct exec_node *an = exec_list_get_head(&a->nodes);
+   struct exec_node *bn = exec_list_get_head(&b->nodes);
+   while (!exec_node_is_tail_sentinel(an) ||
+          !exec_node_is_tail_sentinel(bn)) {
+
+      merge_node *current;
+      if (exec_node_is_tail_sentinel(an)) {
+         current = exec_node_data(merge_node, bn, node);
+         bn = bn->next;
+      } else if (exec_node_is_tail_sentinel(bn)) {
+         current = exec_node_data(merge_node, an, node);
+         an = an->next;
+      } else {
+         merge_node *a_node = exec_node_data(merge_node, an, node);
+         merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+         if (a_node->def->live_index <= b_node->def->live_index) {
+            current = a_node;
+            an = an->next;
+         } else {
+            current = b_node;
+            bn = bn->next;
+         }
+      }
+
+      while (dom_idx >= 0 &&
+             !ssa_def_dominates(dom[dom_idx]->def, current->def))
+         dom_idx--;
+
+      if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx]))
+         return true;
+
+      dom[++dom_idx] = current;
+   }
+
+   return false;
+}
+
+static bool
+add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   bool need_end_copy = false;
+   if (block->successors[0]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[0]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
+
+   if (block->successors[1]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[1]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
+
+   if (need_end_copy) {
+      /* If one of our successors has at least one phi node, we need to
+       * create a parallel copy at the end of the block but before the jump
+       * (if there is one).
+       */
+      nir_parallel_copy_instr *pcopy =
+         nir_parallel_copy_instr_create(state->dead_ctx);
+
+      nir_instr *last_instr = nir_block_last_instr(block);
+      if (last_instr && last_instr->type == nir_instr_type_jump) {
+         nir_instr_insert_before(last_instr, &pcopy->instr);
+      } else {
+         nir_instr_insert_after_block(block, &pcopy->instr);
+      }
+   }
+
+   return true;
+}
+
+static nir_parallel_copy_instr *
+get_parallel_copy_at_end_of_block(nir_block *block)
+{
+   nir_instr *last_instr = nir_block_last_instr(block);
+   if (last_instr == NULL)
+      return NULL;
+
+   /* The last instruction may be a jump in which case the parallel copy is
+    * right before it.
+    */
+   if (last_instr->type == nir_instr_type_jump)
+      last_instr = nir_instr_prev(last_instr);
+
+   if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
+      return nir_instr_as_parallel_copy(last_instr);
+   else
+      return NULL;
+}
+
+/** Isolate phi nodes with parallel copies
+ *
+ * In order to solve the dependency problems with the sources and
+ * destinations of phi nodes, we first isolate them by adding parallel
+ * copies to the beginnings and ends of basic blocks.  For every block with
+ * phi nodes, we add a parallel copy immediately following the last phi
+ * node that copies the destinations of all of the phi nodes to new SSA
+ * values.  We also add a parallel copy to the end of every block that has
+ * a successor with phi nodes that, for each phi node in each successor,
+ * copies the corresponding sorce of the phi node and adjust the phi to
+ * used the destination of the parallel copy.
+ *
+ * In SSA form, each value has exactly one definition.  What this does is
+ * ensure that each value used in a phi also has exactly one use.  The
+ * destinations of phis are only used by the parallel copy immediately
+ * following the phi nodes and.  Thanks to the parallel copy at the end of
+ * the predecessor block, the sources of phi nodes are are the only use of
+ * that value.  This allows us to immediately assign all the sources and
+ * destinations of any given phi node to the same register without worrying
+ * about interference at all.  We do coalescing to get rid of the parallel
+ * copies where possible.
+ *
+ * Before this pass can be run, we have to iterate over the blocks with
+ * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
+ * the ends of blocks exist.  We can create the ones at the beginnings as
+ * we go, but the ones at the ends of blocks need to be created ahead of
+ * time because of potential back-edges in the CFG.
+ */
+static bool
+isolate_phi_nodes_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_instr *last_phi_instr = NULL;
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      last_phi_instr = instr;
+   }
+
+   /* If we don't have any phi's, then there's nothing for us to do. */
+   if (last_phi_instr == NULL)
+      return true;
+
+   /* If we have phi nodes, we need to create a parallel copy at the
+    * start of this block but after the phi nodes.
+    */
+   nir_parallel_copy_instr *block_pcopy =
+      nir_parallel_copy_instr_create(state->dead_ctx);
+   nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
+
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      assert(phi->dest.is_ssa);
+      nir_foreach_phi_src(phi, src) {
+         nir_parallel_copy_instr *pcopy =
+            get_parallel_copy_at_end_of_block(src->pred);
+         assert(pcopy);
+
+         nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
+                                                 nir_parallel_copy_entry);
+         exec_list_push_tail(&pcopy->entries, &entry->node);
+
+         nir_src_copy(&entry->src, &src->src, state->dead_ctx);
+         _mesa_set_add(src->src.ssa->uses, &pcopy->instr);
+
+         nir_ssa_dest_init(&pcopy->instr, &entry->dest,
+                           phi->dest.ssa.num_components, src->src.ssa->name);
+
+         struct set_entry *use_entry =
+            _mesa_set_search(src->src.ssa->uses, instr);
+         if (use_entry)
+            /* It is possible that a phi node can use the same source twice
+             * but for different basic blocks.  If that happens, entry will
+             * be NULL because we already deleted it.  This is safe
+             * because, by the time the loop is done, we will have deleted
+             * all of the sources of the phi from their respective use sets
+             * and moved them to the parallel copy definitions.
+             */
+            _mesa_set_remove(src->src.ssa->uses, use_entry);
+
+         src->src.ssa = &entry->dest.ssa;
+         _mesa_set_add(entry->dest.ssa.uses, instr);
+      }
+
+      nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
+                                              nir_parallel_copy_entry);
+      exec_list_push_tail(&block_pcopy->entries, &entry->node);
+
+      nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
+                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&entry->dest.ssa),
+                               state->mem_ctx);
+
+      entry->src.is_ssa = true;
+      entry->src.ssa = &phi->dest.ssa;
+      _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr);
+   }
+
+   return true;
+}
+
+static bool
+coalesce_phi_nodes_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      assert(phi->dest.is_ssa);
+      merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
+
+      nir_foreach_phi_src(phi, src) {
+         assert(src->src.is_ssa);
+         merge_node *src_node = get_merge_node(src->src.ssa, state);
+         if (src_node->set != dest_node->set)
+            merge_merge_sets(dest_node->set, src_node->set);
+      }
+   }
+
+   return true;
+}
+
+static void
+agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
+                                 struct from_ssa_state *state)
+{
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      if (!entry->src.is_ssa)
+         continue;
+
+      /* Since load_const instructions are SSA only, we can't replace their
+       * destinations with registers and, therefore, can't coalesce them.
+       */
+      if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const)
+         continue;
+
+      /* Don't try and coalesce these */
+      if (entry->dest.ssa.num_components != entry->src.ssa->num_components)
+         continue;
+
+      merge_node *src_node = get_merge_node(entry->src.ssa, state);
+      merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
+
+      if (src_node->set == dest_node->set)
+         continue;
+
+      if (!merge_sets_interfere(src_node->set, dest_node->set))
+         merge_merge_sets(src_node->set, dest_node->set);
+   }
+}
+
+static bool
+agressive_coalesce_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_parallel_copy_instr *start_pcopy = NULL;
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi) {
+         if (instr->type != nir_instr_type_parallel_copy)
+            break; /* The parallel copy must be right after the phis */
+
+         start_pcopy = nir_instr_as_parallel_copy(instr);
+
+         agressive_coalesce_parallel_copy(start_pcopy, state);
+
+         break;
+      }
+   }
+
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+
+   if (end_pcopy && end_pcopy != start_pcopy)
+      agressive_coalesce_parallel_copy(end_pcopy, state);
+
+   return true;
+}
+
+static nir_register *
+get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->merge_node_table, def);
+   if (entry) {
+      merge_node *node = (merge_node *)entry->data;
+
+      /* If it doesn't have a register yet, create one.  Note that all of
+       * the things in the merge set should be the same so it doesn't
+       * matter which node's definition we use.
+       */
+      if (node->set->reg == NULL) {
+         node->set->reg = nir_local_reg_create(state->impl);
+         node->set->reg->name = def->name;
+         node->set->reg->num_components = def->num_components;
+         node->set->reg->num_array_elems = 0;
+      }
+
+      return node->set->reg;
+   }
+
+   entry = _mesa_hash_table_search(state->ssa_table, def);
+   if (entry) {
+      return (nir_register *)entry->data;
+   } else {
+      /* We leave load_const SSA values alone.  They act as immediates to
+       * the backend.  If it got coalesced into a phi, that's ok.
+       */
+      if (def->parent_instr->type == nir_instr_type_load_const)
+         return NULL;
+
+      nir_register *reg = nir_local_reg_create(state->impl);
+      reg->name = def->name;
+      reg->num_components = def->num_components;
+      reg->num_array_elems = 0;
+
+      _mesa_hash_table_insert(state->ssa_table, def, reg);
+      return reg;
+   }
+}
+
+static bool
+rewrite_ssa_src(nir_src *src, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   if (src->is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(src->ssa, state);
+
+      if (reg == NULL) {
+         assert(src->ssa->parent_instr->type == nir_instr_type_load_const);
+         return true;
+      }
+
+      memset(src, 0, sizeof *src);
+      src->reg.reg = reg;
+
+      /* We don't need to remove it from the uses set because that is going
+       * away.  We just need to add it to the one for the register. */
+      _mesa_set_add(reg->uses, state->instr);
+   }
+
+   return true;
+}
+
+static bool
+rewrite_ssa_dest(nir_dest *dest, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   if (dest->is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(&dest->ssa, state);
+
+      if (reg == NULL) {
+         assert(dest->ssa.parent_instr->type == nir_instr_type_load_const);
+         return true;
+      }
+
+      _mesa_set_destroy(dest->ssa.uses, NULL);
+      _mesa_set_destroy(dest->ssa.if_uses, NULL);
+
+      memset(dest, 0, sizeof *dest);
+      dest->reg.reg = reg;
+
+      _mesa_set_add(reg->defs, state->instr);
+   }
+
+   return true;
+}
+
+/* Resolves ssa definitions to registers.  While we're at it, we also
+ * remove phi nodes and ssa_undef instructions
+ */
+static bool
+resolve_registers_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      state->instr = instr;
+      nir_foreach_src(instr, rewrite_ssa_src, state);
+      nir_foreach_dest(instr, rewrite_ssa_dest, state);
+
+      if (instr->type == nir_instr_type_ssa_undef ||
+          instr->type == nir_instr_type_phi) {
+         nir_instr_remove(instr);
+         ralloc_steal(state->dead_ctx, instr);
+      }
+   }
+   state->instr = NULL;
+
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (following_if && following_if->condition.is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(following_if->condition.ssa,
+                                                   state);
+      if (reg) {
+         memset(&following_if->condition, 0, sizeof following_if->condition);
+         following_if->condition.reg.reg = reg;
+
+         _mesa_set_add(reg->if_uses, following_if);
+      } else {
+         /* FIXME: We really shouldn't hit this.  We should be doing
+          * constant control flow propagation.
+          */
+         assert(following_if->condition.ssa->parent_instr->type == nir_instr_type_load_const);
+      }
+   }
+
+   return true;
+}
+
+static void
+emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
+          void *mem_ctx)
+{
+   assert(!dest_src.is_ssa &&
+          dest_src.reg.indirect == NULL &&
+          dest_src.reg.base_offset == 0);
+
+   if (src.is_ssa)
+      assert(src.ssa->num_components >= dest_src.reg.reg->num_components);
+   else
+      assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
+
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+   mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
+   mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
+
+   nir_instr_insert_before(&pcopy->instr, &mov->instr);
+}
+
+/* Resolves a single parallel copy operation into a sequence of mov's
+ *
+ * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
+ * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * However, I never got the algorithm to work as written, so this version
+ * is slightly modified.
+ *
+ * The algorithm works by playing this little shell game with the values.
+ * We start by recording where every source value is and which source value
+ * each destination value should recieve.  We then grab any copy whose
+ * destination is "empty", i.e. not used as a source, and do the following:
+ *  - Find where its source value currently lives
+ *  - Emit the move instruction
+ *  - Set the location of the source value to the destination
+ *  - Mark the location containing the source value
+ *  - Mark the destination as no longer needing to be copied
+ *
+ * When we run out of "empty" destinations, we have a cycle and so we
+ * create a temporary register, copy to that register, and mark the value
+ * we copied as living in that temporary.  Now, the cycle is broken, so we
+ * can continue with the above steps.
+ */
+static void
+resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
+                      struct from_ssa_state *state)
+{
+   unsigned num_copies = 0;
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      /* Sources may be SSA */
+      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+         continue;
+
+      num_copies++;
+   }
+
+   if (num_copies == 0) {
+      /* Hooray, we don't need any copies! */
+      nir_instr_remove(&pcopy->instr);
+      return;
+   }
+
+   /* The register/source corresponding to the given index */
+   nir_src values[num_copies * 2];
+   memset(values, 0, sizeof values);
+
+   /* The current location of a given piece of data */
+   int loc[num_copies * 2];
+
+   /* The piece of data that the given piece of data is to be copied from */
+   int pred[num_copies * 2];
+
+   /* Initialize loc and pred.  We will use -1 for "null" */
+   memset(loc, -1, sizeof loc);
+   memset(pred, -1, sizeof pred);
+
+   /* The destinations we have yet to properly fill */
+   int to_do[num_copies * 2];
+   int to_do_idx = -1;
+
+   /* Now we set everything up:
+    *  - All values get assigned a temporary index
+    *  - Current locations are set from sources
+    *  - Predicessors are recorded from sources and destinations
+    */
+   int num_vals = 0;
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      /* Sources may be SSA */
+      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+         continue;
+
+      int src_idx = -1;
+      for (int i = 0; i < num_vals; ++i) {
+         if (nir_srcs_equal(values[i], entry->src))
+            src_idx = i;
+      }
+      if (src_idx < 0) {
+         src_idx = num_vals++;
+         values[src_idx] = entry->src;
+      }
+
+      nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
+
+      int dest_idx = -1;
+      for (int i = 0; i < num_vals; ++i) {
+         if (nir_srcs_equal(values[i], dest_src)) {
+            /* Each destination of a parallel copy instruction should be
+             * unique.  A destination may get used as a source, so we still
+             * have to walk the list.  However, the predecessor should not,
+             * at this point, be set yet, so we should have -1 here.
+             */
+            assert(pred[i] == -1);
+            dest_idx = i;
+         }
+      }
+      if (dest_idx < 0) {
+         dest_idx = num_vals++;
+         values[dest_idx] = dest_src;
+      }
+
+      loc[src_idx] = src_idx;
+      pred[dest_idx] = src_idx;
+
+      to_do[++to_do_idx] = dest_idx;
+   }
+
+   /* Currently empty destinations we can go ahead and fill */
+   int ready[num_copies * 2];
+   int ready_idx = -1;
+
+   /* Mark the ones that are ready for copying.  We know an index is a
+    * destination if it has a predecessor and it's ready for copying if
+    * it's not marked as containing data.
+    */
+   for (int i = 0; i < num_vals; i++) {
+      if (pred[i] != -1 && loc[i] == -1)
+         ready[++ready_idx] = i;
+   }
+
+   while (to_do_idx >= 0) {
+      while (ready_idx >= 0) {
+         int b = ready[ready_idx--];
+         int a = pred[b];
+         emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx);
+
+         /* If any other copies want a they can find it at b */
+         loc[a] = b;
+
+         /* b has been filled, mark it as not needing to be copied */
+         pred[b] = -1;
+
+         /* If a needs to be filled, it's ready for copying now */
+         if (pred[a] != -1)
+            ready[++ready_idx] = a;
+      }
+      int b = to_do[to_do_idx--];
+      if (pred[b] == -1)
+         continue;
+
+      /* If we got here, then we don't have any more trivial copies that we
+       * can do.  We have to break a cycle, so we create a new temporary
+       * register for that purpose.  Normally, if going out of SSA after
+       * register allocation, you would want to avoid creating temporary
+       * registers.  However, we are going out of SSA before register
+       * allocation, so we would rather not create extra register
+       * dependencies for the backend to deal with.  If it wants, the
+       * backend can coalesce the (possibly multiple) temporaries.
+       */
+      assert(num_vals < num_copies * 2);
+      nir_register *reg = nir_local_reg_create(state->impl);
+      reg->name = "copy_temp";
+      reg->num_array_elems = 0;
+      if (values[b].is_ssa)
+         reg->num_components = values[b].ssa->num_components;
+      else
+         reg->num_components = values[b].reg.reg->num_components;
+      values[num_vals].is_ssa = false;
+      values[num_vals].reg.reg = reg;
+
+      emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx);
+      loc[b] = num_vals;
+      ready[++ready_idx] = b;
+      num_vals++;
+   }
+
+   nir_instr_remove(&pcopy->instr);
+}
+
+/* Resolves the parallel copies in a block.  Each block can have at most
+ * two:  One at the beginning, right after all the phi noces, and one at
+ * the end (or right before the final jump if it exists).
+ */
+static bool
+resolve_parallel_copies_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   /* At this point, we have removed all of the phi nodes.  If a parallel
+    * copy existed right after the phi nodes in this block, it is now the
+    * first instruction.
+    */
+   nir_instr *first_instr = nir_block_first_instr(block);
+   if (first_instr == NULL)
+      return true; /* Empty, nothing to do. */
+
+   if (first_instr->type == nir_instr_type_parallel_copy) {
+      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
+
+      resolve_parallel_copy(pcopy, state);
+   }
+
+   /* It's possible that the above code already cleaned up the end parallel
+    * copy.  However, doing so removed it form the instructions list so we
+    * won't find it here.  Therefore, it's safe to go ahead and just look
+    * for one and clean it up if it exists.
+    */
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+   if (end_pcopy)
+      resolve_parallel_copy(end_pcopy, state);
+
+   return true;
+}
+
+static void
+nir_convert_from_ssa_impl(nir_function_impl *impl)
+{
+   struct from_ssa_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+   state.impl = impl;
+   state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                    _mesa_key_pointer_equal);
+
+   nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
+   nir_foreach_block(impl, isolate_phi_nodes_block, &state);
+
+   /* Mark metadata as dirty before we ask for liveness analysis */
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   nir_metadata_require(impl, nir_metadata_live_variables |
+                              nir_metadata_dominance);
+
+   nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
+   nir_foreach_block(impl, agressive_coalesce_block, &state);
+
+   state.ssa_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   nir_foreach_block(impl, resolve_registers_block, &state);
+
+   nir_foreach_block(impl, resolve_parallel_copies_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   /* Clean up dead instructions and the hash tables */
+   _mesa_hash_table_destroy(state.ssa_table, NULL);
+   _mesa_hash_table_destroy(state.merge_node_table, NULL);
+   ralloc_free(state.dead_ctx);
+}
+
+void
+nir_convert_from_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_convert_from_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_intrinsics.c b/mesalib/src/glsl/nir/nir_intrinsics.c
new file mode 100644
index 000000000..a7c868c39
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_intrinsics.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+#define OPCODE(name) nir_intrinsic_##name
+
+#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
+                  _dest_components, _num_variables, _num_indices, _flags) \
+{ \
+   .name = #_name, \
+   .num_srcs = _num_srcs, \
+   .src_components = _src_components, \
+   .has_dest = _has_dest, \
+   .dest_components = _dest_components, \
+   .num_variables = _num_variables, \
+   .num_indices = _num_indices, \
+   .flags = _flags \
+},
+
+#define LAST_INTRINSIC(name)
+
+const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
+#include "nir_intrinsics.h"
+};
+\ No newline at end of file
diff --git a/mesalib/src/glsl/nir/nir_intrinsics.h b/mesalib/src/glsl/nir/nir_intrinsics.h
new file mode 100644
index 000000000..d94866c85
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_intrinsics.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+/**
+ * This header file defines all the available intrinsics in one place. It
+ * expands to a list of macros of the form:
+ *
+ * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
+ *              num_variables, num_indices, flags)
+ *
+ * Which should correspond one-to-one with the nir_intrinsic_info structure. It
+ * is included in both ir.h to create the nir_intrinsic enum (with members of
+ * the form nir_intrinsic_(name)) and and in opcodes.c to create
+ * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
+ * for each intrinsic.
+ */
+
+#define ARR(...) { __VA_ARGS__ }
+
+
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+
+/*
+ * Interpolation of input.  The interp_var_at* intrinsics are similar to the
+ * load_var intrinsic acting an a shader input except that they interpolate
+ * the input differently.  The at_sample and at_offset intrinsics take an
+ * aditional source that is a integer sample id or a vec2 position offset
+ * respectively.
+ */
+
+INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
+ * a barrier is an intrinsic with no inputs/outputs but which can't be moved
+ * around/optimized in general
+ */
+#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+
+BARRIER(discard)
+
+INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+
+/*
+ * Atomic counters
+ *
+ * The *_var variants take an atomic_uint nir_variable, while the other,
+ * lowered, variants take a constant buffer index and register offset.
+ */
+
+#define ATOMIC(name, flags) \
+   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
+   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+
+ATOMIC(inc, 0)
+ATOMIC(dec, 0)
+ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+
+#define SYSTEM_VALUE(name, components) \
+   INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+SYSTEM_VALUE(front_face, 1)
+SYSTEM_VALUE(vertex_id, 1)
+SYSTEM_VALUE(instance_id, 1)
+SYSTEM_VALUE(sample_id, 1)
+SYSTEM_VALUE(sample_pos, 2)
+SYSTEM_VALUE(sample_mask_in, 1)
+SYSTEM_VALUE(invocation_id, 1)
+
+/*
+ * The first index is the address to load from, and the second index is the
+ * number of array elements to load.  Indirect loads have an additional
+ * register input, which is added to the constant address to compute the
+ * final address to load from.  For UBO's (and SSBO's), the first source is
+ * the (possibly constant) UBO buffer index and the indirect (if it exists)
+ * is the second source.
+ *
+ * For vector backends, the address is in terms of one vec4, and so each array
+ * element is +4 scalar components from the previous array element. For scalar
+ * backends, the address is in terms of a single 4-byte float/int and arrays
+ * elements begin immediately after the previous array element.
+ */
+
+#define LOAD(name, extra_srcs, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
+             true, 0, 0, 2, flags)
+
+LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* LOAD(ssbo, 1, 0) */
+
+/*
+ * Stores work the same way as loads, except now the first register input is
+ * the value or array to store and the optional second input is the indirect
+ * offset.
+ */
+
+#define STORE(name, num_indices, flags) \
+   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
+             num_indices, flags) \
+
+STORE(output, 2, 0)
+/* STORE(ssbo, 3, 0) */
+
+LAST_INTRINSIC(store_output_indirect)
diff --git a/mesalib/src/glsl/nir/nir_live_variables.c b/mesalib/src/glsl/nir/nir_live_variables.c
new file mode 100644
index 000000000..7402dc087
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_live_variables.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include "nir.h"
+#include "nir_worklist.h"
+
+/*
+ * Basic liveness analysis.  This works only in SSA form.
+ *
+ * This liveness pass treats phi nodes as being melded to the space between
+ * blocks so that the destinations of a phi are in the livein of the block
+ * in which it resides and the sources are in the liveout of the
+ * corresponding block.  By formulating the liveness information in this
+ * way, we ensure that the definition of any variable dominates its entire
+ * live range.  This is true because the only way that the definition of an
+ * SSA value may not dominate a use is if the use is in a phi node and the
+ * uses in phi no are in the live-out of the corresponding predecessor
+ * block but not in the live-in of the block containing the phi node.
+ */
+
+struct live_variables_state {
+   unsigned num_ssa_defs;
+   unsigned bitset_words;
+
+   nir_block_worklist worklist;
+};
+
+static bool
+index_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   struct live_variables_state *state = void_state;
+
+   if (def->parent_instr->type == nir_instr_type_ssa_undef)
+      def->live_index = 0;
+   else
+      def->live_index = state->num_ssa_defs++;
+
+   return true;
+}
+
+static bool
+index_ssa_definitions_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, index_ssa_def, state);
+
+   return true;
+}
+
+/* Initialize the liveness data to zero and add the given block to the
+ * worklist.
+ */
+static bool
+init_liveness_block(nir_block *block, void *void_state)
+{
+   struct live_variables_state *state = void_state;
+
+   block->live_in = reralloc(block, block->live_in, BITSET_WORD,
+                             state->bitset_words);
+   memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+   block->live_out = reralloc(block, block->live_out, BITSET_WORD,
+                              state->bitset_words);
+   memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+   nir_block_worklist_push_head(&state->worklist, block);
+
+   return true;
+}
+
+static bool
+set_src_live(nir_src *src, void *void_live)
+{
+   BITSET_WORD *live = void_live;
+
+   if (!src->is_ssa)
+      return true;
+
+   if (src->ssa->live_index == 0)
+      return true;   /* undefined variables are never live */
+
+   BITSET_SET(live, src->ssa->live_index);
+
+   return true;
+}
+
+static bool
+set_ssa_def_dead(nir_ssa_def *def, void *void_live)
+{
+   BITSET_WORD *live = void_live;
+
+   BITSET_CLEAR(live, def->live_index);
+
+   return true;
+}
+
+/** Propagates the live in of succ across the edge to the live out of pred
+ *
+ * Phi nodes exist "between" blocks and all the phi nodes at the start of a
+ * block act "in parallel".  When we propagate from the live_in of one
+ * block to the live out of the other, we have to kill any writes from phis
+ * and make live any sources.
+ *
+ * Returns true if updating live out of pred added anything
+ */
+static bool
+propagate_across_edge(nir_block *pred, nir_block *succ,
+                      struct live_variables_state *state)
+{
+   BITSET_WORD live[state->bitset_words];
+   memcpy(live, succ->live_in, sizeof live);
+
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      assert(phi->dest.is_ssa);
+      set_ssa_def_dead(&phi->dest.ssa, live);
+   }
+
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_foreach_phi_src(phi, src) {
+         if (src->pred == pred) {
+            set_src_live(&src->src, live);
+            break;
+         }
+      }
+   }
+
+   BITSET_WORD progress = 0;
+   for (unsigned i = 0; i < state->bitset_words; ++i) {
+      progress |= live[i] & ~pred->live_out[i];
+      pred->live_out[i] |= live[i];
+   }
+   return progress != 0;
+}
+
+void
+nir_live_variables_impl(nir_function_impl *impl)
+{
+   struct live_variables_state state;
+
+   /* We start at 1 because we reserve the index value of 0 for ssa_undef
+    * instructions.  Those are never live, so their liveness information
+    * can be compacted into a single bit.
+    */
+   state.num_ssa_defs = 1;
+   nir_foreach_block(impl, index_ssa_definitions_block, &state);
+
+   nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
+
+   /* We now know how many unique ssa definitions we have and we can go
+    * ahead and allocate live_in and live_out sets and add all of the
+    * blocks to the worklist.
+    */
+   state.bitset_words = BITSET_WORDS(state.num_ssa_defs);
+   nir_foreach_block(impl, init_liveness_block, &state);
+
+   /* We're now ready to work through the worklist and update the liveness
+    * sets of each of the blocks.  By the time we get to this point, every
+    * block in the function implementation has been pushed onto the
+    * worklist in reverse order.  As long as we keep the worklist
+    * up-to-date as we go, everything will get covered.
+    */
+   while (!nir_block_worklist_is_empty(&state.worklist)) {
+      /* We pop them off in the reverse order we pushed them on.  This way
+       * the first walk of the instructions is backwards so we only walk
+       * once in the case of no control flow.
+       */
+      nir_block *block = nir_block_worklist_pop_head(&state.worklist);
+
+      memcpy(block->live_in, block->live_out,
+             state.bitset_words * sizeof(BITSET_WORD));
+
+      nir_if *following_if = nir_block_get_following_if(block);
+      if (following_if)
+         set_src_live(&following_if->condition, block->live_in);
+
+      nir_foreach_instr_reverse(block, instr) {
+         /* Phi nodes are handled seperately so we want to skip them.  Since
+          * we are going backwards and they are at the beginning, we can just
+          * break as soon as we see one.
+          */
+         if (instr->type == nir_instr_type_phi)
+            break;
+
+         nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in);
+         nir_foreach_src(instr, set_src_live, block->live_in);
+      }
+
+      /* Walk over all of the predecessors of the current block updating
+       * their live in with the live out of this one.  If anything has
+       * changed, add the predecessor to the work list so that we ensure
+       * that the new information is used.
+       */
+      struct set_entry *entry;
+      set_foreach(block->predecessors, entry) {
+         nir_block *pred = (nir_block *)entry->key;
+         if (propagate_across_edge(pred, block, &state))
+            nir_block_worklist_push_tail(&state.worklist, pred);
+      }
+   }
+
+   nir_block_worklist_fini(&state.worklist);
+}
+
+static bool
+src_does_not_use_def(nir_src *src, void *def)
+{
+   return !src->is_ssa || src->ssa != (nir_ssa_def *)def;
+}
+
+static bool
+search_for_use_after_instr(nir_instr *start, nir_ssa_def *def)
+{
+   /* Only look for a use strictly after the given instruction */
+   struct exec_node *node = start->node.next;
+   while (!exec_node_is_tail_sentinel(node)) {
+      nir_instr *instr = exec_node_data(nir_instr, node, node);
+      if (!nir_foreach_src(instr, src_does_not_use_def, def))
+         return true;
+      node = node->next;
+   }
+   return false;
+}
+
+/* Returns true if def is live at instr assuming that def comes before
+ * instr in a pre DFS search of the dominance tree.
+ */
+static bool
+nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr)
+{
+   if (BITSET_TEST(instr->block->live_out, def->live_index)) {
+      /* Since def dominates instr, if def is in the liveout of the block,
+       * it's live at instr
+       */
+      return true;
+   } else {
+      if (BITSET_TEST(instr->block->live_in, def->live_index) ||
+          def->parent_instr->block == instr->block) {
+         /* In this case it is either live coming into instr's block or it
+          * is defined in the same block.  In this case, we simply need to
+          * see if it is used after instr.
+          */
+         return search_for_use_after_instr(instr, def);
+      } else {
+         return false;
+      }
+   }
+}
+
+bool
+nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b)
+{
+   if (a->parent_instr == b->parent_instr) {
+      /* Two variables defined at the same time interfere assuming at
+       * least one isn't dead.
+       */
+      return true;
+   } else if (a->live_index == 0 || b->live_index == 0) {
+      /* If either variable is an ssa_undef, then there's no interference */
+      return false;
+   } else if (a->live_index < b->live_index) {
+      return nir_ssa_def_is_live_at(a, b->parent_instr);
+   } else {
+      return nir_ssa_def_is_live_at(b, a->parent_instr);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c
new file mode 100644
index 000000000..25bba4ef0
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/** @file nir_lower_alu_to_scalar.c
+ *
+ * Replaces nir_alu_instr operations with more than one channel used in the
+ * arguments with individual per-channel operations.
+ */
+
+static void
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+{
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   instr->dest.write_mask = (1 << num_components) - 1;
+}
+
+static void
+lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
+                void *mem_ctx)
+{
+   unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+
+   nir_ssa_def *last = NULL;
+   for (unsigned i = 0; i < num_components; i++) {
+      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
+      nir_alu_ssa_dest_init(chan, 1);
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+      chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+      if (nir_op_infos[chan_op].num_inputs > 1) {
+         assert(nir_op_infos[chan_op].num_inputs == 2);
+         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+         chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+      }
+
+      nir_instr_insert_before(&instr->instr, &chan->instr);
+
+      if (i == 0) {
+         last = &chan->dest.dest.ssa;
+      } else {
+         nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
+         nir_alu_ssa_dest_init(merge, 1);
+         merge->dest.write_mask = 1;
+         merge->src[0].src = nir_src_for_ssa(last);
+         merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
+         nir_instr_insert_before(&instr->instr, &merge->instr);
+         last = &merge->dest.dest.ssa;
+      }
+   }
+
+   assert(instr->dest.write_mask == 1);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
+                            mem_ctx);
+   nir_instr_remove(&instr->instr);
+}
+
+static void
+lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
+{
+   unsigned num_src = nir_op_infos[instr->op].num_inputs;
+   unsigned i, chan;
+
+   assert(instr->dest.dest.is_ssa);
+   assert(instr->dest.write_mask != 0);
+
+#define LOWER_REDUCTION(name, chan, merge) \
+   case name##2: \
+   case name##3: \
+   case name##4: \
+      lower_reduction(instr, chan, merge, mem_ctx); \
+      break;
+
+   switch (instr->op) {
+   case nir_op_vec4:
+   case nir_op_vec3:
+   case nir_op_vec2:
+      /* We don't need to scalarize these ops, they're the ones generated to
+       * group up outputs into a value that can be SSAed.
+       */
+      return;
+
+      LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+      LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+      LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+      LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+      LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+      LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+      LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+      LOWER_REDUCTION(nir_op_ball, nir_op_imov, nir_op_iand);
+      LOWER_REDUCTION(nir_op_bany, nir_op_imov, nir_op_ior);
+      LOWER_REDUCTION(nir_op_fall, nir_op_fmov, nir_op_fand);
+      LOWER_REDUCTION(nir_op_fany, nir_op_fmov, nir_op_for);
+
+   default:
+      break;
+   }
+
+   if (instr->dest.dest.ssa.num_components == 1)
+      return;
+
+   unsigned num_components = instr->dest.dest.ssa.num_components;
+   static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
+   nir_alu_instr *vec_instr =
+      nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
+   nir_alu_ssa_dest_init(vec_instr, num_components);
+
+   for (chan = 0; chan < 4; chan++) {
+      if (!(instr->dest.write_mask & (1 << chan)))
+         continue;
+
+      nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
+      for (i = 0; i < num_src; i++) {
+         /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
+          * args (input_sizes[] == 1).
+          */
+         assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+         unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
+                              0 : chan);
+
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+         for (int j = 0; j < 4; j++)
+            lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
+      }
+
+      nir_alu_ssa_dest_init(lower, 1);
+      lower->dest.saturate = instr->dest.saturate;
+      vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
+
+      nir_instr_insert_before(&instr->instr, &lower->instr);
+   }
+
+   nir_instr_insert_before(&instr->instr, &vec_instr->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&vec_instr->dest.dest.ssa),
+                            mem_ctx);
+
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_alu_to_scalar_block(nir_block *block, void *data)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_alu)
+         lower_alu_instr_scalar((nir_alu_instr *)instr, data);
+   }
+
+   return true;
+}
+
+static void
+nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
+}
+
+void
+nir_lower_alu_to_scalar(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_alu_to_scalar_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_atomics.c b/mesalib/src/glsl/nir/nir_lower_atomics.c
new file mode 100644
index 000000000..e82df0169
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_atomics.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "main/config.h"
+#include <assert.h>
+
+/*
+ * replace atomic counter intrinsics that use a variable with intrinsics
+ * that directly store the buffer index and byte offset
+ */
+
+static void
+lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
+{
+   nir_intrinsic_op op;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_atomic_counter_read_var:
+      op = nir_intrinsic_atomic_counter_read;
+      break;
+
+   case nir_intrinsic_atomic_counter_inc_var:
+      op = nir_intrinsic_atomic_counter_inc;
+      break;
+
+   case nir_intrinsic_atomic_counter_dec_var:
+      op = nir_intrinsic_atomic_counter_dec;
+      break;
+
+   default:
+      return;
+   }
+
+   if (instr->variables[0]->var->data.mode != nir_var_uniform)
+      return; /* atomics passed as function arguments can't be lowered */
+
+   void *mem_ctx = ralloc_parent(instr);
+
+   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+   new_instr->const_index[0] =
+      (int) instr->variables[0]->var->data.atomic.buffer_index;
+
+   nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+   offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
+
+   nir_instr_insert_before(&instr->instr, &offset_const->instr);
+
+   nir_ssa_def *offset_def = &offset_const->def;
+
+   if (instr->variables[0]->deref.child != NULL) {
+      assert(instr->variables[0]->deref.child->deref_type ==
+             nir_deref_type_array);
+      nir_deref_array *deref_array =
+         nir_deref_as_array(instr->variables[0]->deref.child);
+      assert(deref_array->deref.child == NULL);
+
+      offset_const->value.u[0] += deref_array->base_offset;
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         nir_load_const_instr *atomic_counter_size =
+               nir_load_const_instr_create(mem_ctx, 1);
+         atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE;
+         nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
+
+         nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         mul->dest.write_mask = 0x1;
+         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+         mul->src[1].src.is_ssa = true;
+         mul->src[1].src.ssa = &atomic_counter_size->def;
+         nir_instr_insert_before(&instr->instr, &mul->instr);
+
+         nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
+         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+         add->dest.write_mask = 0x1;
+         add->src[0].src.is_ssa = true;
+         add->src[0].src.ssa = &mul->dest.dest.ssa;
+         add->src[1].src.is_ssa = true;
+         add->src[1].src.ssa = &offset_const->def;
+         nir_instr_insert_before(&instr->instr, &add->instr);
+
+         offset_def = &add->dest.dest.ssa;
+      }
+   }
+
+   new_instr->src[0].is_ssa = true;
+   new_instr->src[0].ssa = offset_def;;
+
+   if (instr->dest.is_ssa) {
+      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+                        instr->dest.ssa.num_components, NULL);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+                               nir_src_for_ssa(&new_instr->dest.ssa),
+                               mem_ctx);
+   } else {
+      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+   }
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_block(nir_block *block, void *state)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_intrinsic)
+         lower_instr(nir_instr_as_intrinsic(instr), state);
+   }
+
+   return true;
+}
+
+void
+nir_lower_atomics(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, lower_block, overload->impl);
+         nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c
new file mode 100644
index 000000000..0cd8740cc
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass detects when a global variable is only being used by
+ * one function and makes it local to that function
+ */
+
+#include "nir.h"
+
+struct global_to_local_state {
+   nir_function_impl *impl;
+   /* A hash table keyed on variable pointers that stores the unique
+    * nir_function_impl that uses the given variable.  If a variable is
+    * used in multiple functions, the data for the given key will be NULL.
+    */
+   struct hash_table *var_func_table;
+};
+
+static bool
+mark_global_var_uses_block(nir_block *block, void *void_state)
+{
+   struct global_to_local_state *state = void_state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables;
+
+      for (unsigned i = 0; i < num_vars; i++) {
+         nir_variable *var = intrin->variables[i]->var;
+         if (var->data.mode != nir_var_global)
+            continue;
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->var_func_table, var);
+
+         if (entry) {
+            if (entry->data != state->impl)
+               entry->data = NULL;
+         } else {
+            _mesa_hash_table_insert(state->var_func_table, var, state->impl);
+         }
+      }
+   }
+
+   return true;
+}
+
+void
+nir_lower_global_vars_to_local(nir_shader *shader)
+{
+   struct global_to_local_state state;
+
+   state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         state.impl = overload->impl;
+         nir_foreach_block(overload->impl, mark_global_var_uses_block, &state);
+      }
+   }
+
+   struct hash_entry *entry;
+   hash_table_foreach(state.var_func_table, entry) {
+      nir_variable *var = (void *)entry->key;
+      nir_function_impl *impl = entry->data;
+
+      assert(var->data.mode == nir_var_global);
+
+      if (impl != NULL) {
+         exec_node_remove(&var->node);
+         var->data.mode = nir_var_local;
+         exec_list_push_tail(&impl->locals, &var->node);
+      }
+   }
+
+   _mesa_hash_table_destroy(state.var_func_table, NULL);
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_io.c b/mesalib/src/glsl/nir/nir_lower_io.c
new file mode 100644
index 000000000..207f8daa1
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_io.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass converts references to input/output variables with
+ * loads/stores to actual input/output intrinsics.
+ *
+ * NOTE: This pass really only works for scalar backends at the moment due
+ * to the way it packes the input/output data.
+ */
+
+#include "nir.h"
+
+struct lower_io_state {
+   void *mem_ctx;
+};
+
+static unsigned
+type_size(const struct glsl_type *type)
+{
+   unsigned int size, i;
+
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      return glsl_get_components(type);
+   case GLSL_TYPE_ARRAY:
+      return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < glsl_get_length(type); i++) {
+         size += type_size(glsl_get_struct_field(type, i));
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      return 0;
+   case GLSL_TYPE_ATOMIC_UINT:
+      return 0;
+   case GLSL_TYPE_INTERFACE:
+      return 0;
+   case GLSL_TYPE_IMAGE:
+      return 0;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_DOUBLE:
+      unreachable("not reached");
+   }
+
+   return 0;
+}
+
+static void
+assign_var_locations(struct hash_table *ht, unsigned *size)
+{
+   unsigned location = 0;
+
+   struct hash_entry *entry;
+   hash_table_foreach(ht, entry) {
+      nir_variable *var = (nir_variable *) entry->data;
+
+      /*
+       * UBO's have their own address spaces, so don't count them towards the
+       * number of global uniforms
+       */
+      if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+         continue;
+
+      var->data.driver_location = location;
+      location += type_size(var->type);
+   }
+
+   *size = location;
+}
+
+static void
+assign_var_locations_shader(nir_shader *shader)
+{
+   assign_var_locations(shader->inputs, &shader->num_inputs);
+   assign_var_locations(shader->outputs, &shader->num_outputs);
+   assign_var_locations(shader->uniforms, &shader->num_uniforms);
+}
+
+static bool
+deref_has_indirect(nir_deref_var *deref)
+{
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      if (tail->deref_type == nir_deref_type_array) {
+         nir_deref_array *arr = nir_deref_as_array(tail);
+         if (arr->deref_array_type == nir_deref_array_type_indirect)
+            return true;
+      }
+   }
+
+   return false;
+}
+
+static unsigned
+get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
+              struct lower_io_state *state)
+{
+   bool found_indirect = false;
+   unsigned base_offset = 0;
+
+   nir_deref *tail = &deref->deref;
+   while (tail->child != NULL) {
+      const struct glsl_type *parent_type = tail->type;
+      tail = tail->child;
+
+      if (tail->deref_type == nir_deref_type_array) {
+         nir_deref_array *deref_array = nir_deref_as_array(tail);
+         unsigned size = type_size(tail->type);
+
+         base_offset += size * deref_array->base_offset;
+
+         if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+            nir_load_const_instr *load_const =
+               nir_load_const_instr_create(state->mem_ctx, 1);
+            load_const->value.u[0] = size;
+            nir_instr_insert_before(instr, &load_const->instr);
+
+            nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imul);
+            mul->src[0].src.is_ssa = true;
+            mul->src[0].src.ssa = &load_const->def;
+            nir_src_copy(&mul->src[1].src, &deref_array->indirect,
+                         state->mem_ctx);
+            mul->dest.write_mask = 1;
+            nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+            nir_instr_insert_before(instr, &mul->instr);
+
+            if (found_indirect) {
+               nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,
+                                                         nir_op_iadd);
+               add->src[0].src = *indirect;
+               add->src[1].src.is_ssa = true;
+               add->src[1].src.ssa = &mul->dest.dest.ssa;
+               add->dest.write_mask = 1;
+               nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+               nir_instr_insert_before(instr, &add->instr);
+
+               indirect->is_ssa = true;
+               indirect->ssa = &add->dest.dest.ssa;
+            } else {
+               indirect->is_ssa = true;
+               indirect->ssa = &mul->dest.dest.ssa;
+               found_indirect = true;
+            }
+         }
+      } else if (tail->deref_type == nir_deref_type_struct) {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+         for (unsigned i = 0; i < deref_struct->index; i++)
+            base_offset += type_size(glsl_get_struct_field(parent_type, i));
+      }
+   }
+
+   return base_offset;
+}
+
+static bool
+nir_lower_io_block(nir_block *block, void *void_state)
+{
+   struct lower_io_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var: {
+         nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+         if (mode != nir_var_shader_in && mode != nir_var_uniform)
+            continue;
+
+         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+
+         /* Figure out the opcode */
+         nir_intrinsic_op load_op;
+         switch (mode) {
+         case nir_var_shader_in:
+            load_op = has_indirect ? nir_intrinsic_load_input_indirect :
+                                     nir_intrinsic_load_input;
+            break;
+         case nir_var_uniform:
+            load_op = has_indirect ? nir_intrinsic_load_uniform_indirect :
+                                     nir_intrinsic_load_uniform;
+            break;
+         default:
+            unreachable("Unknown variable mode");
+         }
+
+         nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
+                                                                load_op);
+         load->num_components = intrin->num_components;
+
+         nir_src indirect;
+         unsigned offset = get_io_offset(intrin->variables[0],
+                                         &intrin->instr, &indirect, state);
+         offset += intrin->variables[0]->var->data.driver_location;
+
+         load->const_index[0] = offset;
+         load->const_index[1] = 1;
+
+         if (has_indirect)
+            load->src[0] = indirect;
+
+         if (intrin->dest.is_ssa) {
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              intrin->num_components, NULL);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&load->dest.ssa),
+                                     state->mem_ctx);
+         } else {
+            nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
+         }
+
+         nir_instr_insert_before(&intrin->instr, &load->instr);
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_store_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
+            continue;
+
+         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+
+         nir_intrinsic_op store_op;
+         if (has_indirect) {
+            store_op = nir_intrinsic_store_output_indirect;
+         } else {
+            store_op = nir_intrinsic_store_output;
+         }
+
+         nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
+                                                                 store_op);
+         store->num_components = intrin->num_components;
+
+         nir_src indirect;
+         unsigned offset = get_io_offset(intrin->variables[0],
+                                         &intrin->instr, &indirect, state);
+         offset += intrin->variables[0]->var->data.driver_location;
+
+         store->const_index[0] = offset;
+         store->const_index[1] = 1;
+
+         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+
+         if (has_indirect)
+            store->src[1] = indirect;
+
+         nir_instr_insert_before(&intrin->instr, &store->instr);
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_io_impl(nir_function_impl *impl)
+{
+   struct lower_io_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+
+   nir_foreach_block(impl, nir_lower_io_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_io(nir_shader *shader)
+{
+   assign_var_locations_shader(shader);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_io_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c
new file mode 100644
index 000000000..8c5df7be6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+struct locals_to_regs_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+
+   /* A hash table mapping derefs to registers */
+   struct hash_table *regs_table;
+};
+
+/* The following two functions implement a hash and equality check for
+ * variable dreferences.  When the hash or equality function encounters an
+ * array, it ignores the offset and whether it is direct or indirect
+ * entirely.
+ */
+static uint32_t
+hash_deref(const void *void_deref)
+{
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+   const nir_deref_var *deref_var = void_deref;
+   hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
+
+   for (const nir_deref *deref = deref_var->deref.child;
+        deref; deref = deref->child) {
+      if (deref->deref_type == nir_deref_type_struct) {
+         const nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
+      }
+   }
+
+   return hash;
+}
+
+static bool
+derefs_equal(const void *void_a, const void *void_b)
+{
+   const nir_deref_var *a_var = void_a;
+   const nir_deref_var *b_var = void_b;
+
+   if (a_var->var != b_var->var)
+      return false;
+
+   for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
+        a != NULL; a = a->child, b = b->child) {
+      if (a->deref_type != b->deref_type)
+         return false;
+
+      if (a->deref_type == nir_deref_type_struct) {
+         if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
+            return false;
+      }
+      /* Do nothing for arrays.  They're all the same. */
+
+      assert((a->child == NULL) == (b->child == NULL));
+      if((a->child == NULL) != (b->child == NULL))
+         return false;
+   }
+
+   return true;
+}
+
+static nir_register *
+get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
+{
+   uint32_t hash = hash_deref(deref);
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref);
+   if (entry)
+      return entry->data;
+
+   unsigned array_size = 1;
+   nir_deref *tail = &deref->deref;
+   while (tail->child) {
+      if (tail->child->deref_type == nir_deref_type_array) {
+         /* Multiply by the parent's type. */
+         if (glsl_type_is_matrix(tail->type)) {
+            array_size *= glsl_get_matrix_columns(tail->type);
+         } else {
+            assert(glsl_get_length(tail->type) > 0);
+            array_size *= glsl_get_length(tail->type);
+         }
+      }
+      tail = tail->child;
+   }
+
+   assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));
+
+   nir_register *reg = nir_local_reg_create(state->impl);
+   reg->num_components = glsl_get_vector_elements(tail->type);
+   reg->num_array_elems = array_size > 1 ? array_size : 0;
+
+   _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
+
+   return reg;
+}
+
+static nir_src
+get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
+                  struct locals_to_regs_state *state)
+{
+   nir_src src;
+
+   src.is_ssa = false;
+   src.reg.reg = get_reg_for_deref(deref, state);
+   src.reg.base_offset = 0;
+   src.reg.indirect = NULL;
+
+   nir_deref *tail = &deref->deref;
+   while (tail->child != NULL) {
+      const struct glsl_type *parent_type = tail->type;
+      tail = tail->child;
+
+      if (tail->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+      src.reg.base_offset *= glsl_get_length(parent_type);
+      src.reg.base_offset += deref_array->base_offset;
+
+      if (src.reg.indirect) {
+         nir_load_const_instr *load_const =
+            nir_load_const_instr_create(state->mem_ctx, 1);
+         load_const->value.u[0] = glsl_get_length(parent_type);
+         nir_instr_insert_before(instr, &load_const->instr);
+
+         nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul);
+         mul->src[0].src = *src.reg.indirect;
+         mul->src[1].src.is_ssa = true;
+         mul->src[1].src.ssa = &load_const->def;
+         mul->dest.write_mask = 1;
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         nir_instr_insert_before(instr, &mul->instr);
+
+         src.reg.indirect->is_ssa = true;
+         src.reg.indirect->ssa = &mul->dest.dest.ssa;
+      }
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         if (src.reg.indirect == NULL) {
+            src.reg.indirect = ralloc(state->mem_ctx, nir_src);
+            nir_src_copy(src.reg.indirect, &deref_array->indirect,
+                         state->mem_ctx);
+         } else {
+            nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_iadd);
+            add->src[0].src = *src.reg.indirect;
+            nir_src_copy(&add->src[1].src, &deref_array->indirect,
+                         state->mem_ctx);
+            add->dest.write_mask = 1;
+            nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+            nir_instr_insert_before(instr, &add->instr);
+
+            src.reg.indirect->is_ssa = true;
+            src.reg.indirect->ssa = &add->dest.dest.ssa;
+         }
+      }
+   }
+
+   return src;
+}
+
+static bool
+lower_locals_to_regs_block(nir_block *block, void *void_state)
+{
+   struct locals_to_regs_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_local)
+            continue;
+
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
+         mov->src[0].src = get_deref_reg_src(intrin->variables[0],
+                                             &intrin->instr, state);
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         if (intrin->dest.is_ssa) {
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa),
+                                     state->mem_ctx);
+         } else {
+            nir_dest_copy(&mov->dest.dest, &intrin->dest, state->mem_ctx);
+         }
+         nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_store_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_local)
+            continue;
+
+         nir_src reg_src = get_deref_reg_src(intrin->variables[0],
+                                             &intrin->instr, state);
+
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
+         nir_src_copy(&mov->src[0].src, &intrin->src[0], state->mem_ctx);
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         mov->dest.dest.is_ssa = false;
+         mov->dest.dest.reg.reg = reg_src.reg.reg;
+         mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
+         mov->dest.dest.reg.indirect = reg_src.reg.indirect;
+
+         nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_copy_var:
+         unreachable("There should be no copies whatsoever at this point");
+         break;
+
+      default:
+         continue;
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_locals_to_regs_impl(nir_function_impl *impl)
+{
+   struct locals_to_regs_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
+
+   nir_foreach_block(impl, lower_locals_to_regs_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   _mesa_hash_table_destroy(state.regs_table, NULL);
+}
+
+void
+nir_lower_locals_to_regs(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_locals_to_regs_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
new file mode 100644
index 000000000..7cd93ea0a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that lowers vector phi nodes to scalar phi nodes when
+ * we don't think it will hurt anything.
+ */
+
+struct lower_phis_to_scalar_state {
+   void *mem_ctx;
+   void *dead_ctx;
+
+   /* Hash table marking which phi nodes are scalarizable.  The key is
+    * pointers to phi instructions and the entry is either NULL for not
+    * scalarizable or non-null for scalarizable.
+    */
+   struct hash_table *phi_table;
+};
+
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state);
+
+static bool
+is_phi_src_scalarizable(nir_phi_src *src,
+                        struct lower_phis_to_scalar_state *state)
+{
+   /* Don't know what to do with non-ssa sources */
+   if (!src->src.is_ssa)
+      return false;
+
+   nir_instr *src_instr = src->src.ssa->parent_instr;
+   switch (src_instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
+
+      /* ALU operations with output_size == 0 should be scalarized.  We
+       * will also see a bunch of vecN operations from scalarizing ALU
+       * operations and, since they can easily be copy-propagated, they
+       * are ok too.
+       */
+      return nir_op_infos[src_alu->op].output_size == 0 ||
+             src_alu->op == nir_op_vec2 ||
+             src_alu->op == nir_op_vec3 ||
+             src_alu->op == nir_op_vec4;
+   }
+
+   case nir_instr_type_phi:
+      /* A phi is scalarizable if we're going to lower it */
+      return should_lower_phi(nir_instr_as_phi(src_instr), state);
+
+   case nir_instr_type_load_const:
+      /* These are trivially scalarizable */
+      return true;
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
+
+      switch (src_intrin->intrinsic) {
+      case nir_intrinsic_load_var:
+         return src_intrin->variables[0]->var->data.mode == nir_var_shader_in ||
+                src_intrin->variables[0]->var->data.mode == nir_var_uniform;
+
+      case nir_intrinsic_interp_var_at_centroid:
+      case nir_intrinsic_interp_var_at_sample:
+      case nir_intrinsic_interp_var_at_offset:
+      case nir_intrinsic_load_uniform:
+      case nir_intrinsic_load_uniform_indirect:
+      case nir_intrinsic_load_ubo:
+      case nir_intrinsic_load_ubo_indirect:
+      case nir_intrinsic_load_input:
+      case nir_intrinsic_load_input_indirect:
+         return true;
+      default:
+         break;
+      }
+   }
+
+   default:
+      /* We can't scalarize this type of instruction */
+      return false;
+   }
+}
+
+/**
+ * Determines if the given phi node should be lowered.  The only phi nodes
+ * we will scalarize at the moment are those where all of the sources are
+ * scalarizable.
+ *
+ * The reason for this comes down to coalescing.  Since phi sources can't
+ * swizzle, swizzles on phis have to be resolved by inserting a mov right
+ * before the phi.  The choice then becomes between movs to pick off
+ * components for a scalar phi or potentially movs to recombine components
+ * for a vector phi.  The problem is that the movs generated to pick off
+ * the components are almost uncoalescable.  We can't coalesce them in NIR
+ * because we need them to pick off components and we can't coalesce them
+ * in the backend because the source register is a vector and the
+ * destination is a scalar that may be used at other places in the program.
+ * On the other hand, if we have a bunch of scalars going into a vector
+ * phi, the situation is much better.  In this case, if the SSA def is
+ * generated in the predecessor block to the corresponding phi source, the
+ * backend code will be an ALU op into a temporary and then a mov into the
+ * given vector component;  this move can almost certainly be coalesced
+ * away.
+ */
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
+{
+   /* Already scalar */
+   if (phi->dest.ssa.num_components == 1)
+      return false;
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
+   if (entry)
+      return entry->data != NULL;
+
+   /* Insert an entry and mark it as scalarizable for now. That way
+    * we don't recurse forever and a cycle in the dependence graph
+    * won't automatically make us fail to scalarize.
+    */
+   entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
+
+   bool scalarizable = true;
+
+   nir_foreach_phi_src(phi, src) {
+      scalarizable = is_phi_src_scalarizable(src, state);
+      if (!scalarizable)
+         break;
+   }
+
+   entry->data = (void *)(intptr_t)scalarizable;
+
+   return scalarizable;
+}
+
+static bool
+lower_phis_to_scalar_block(nir_block *block, void *void_state)
+{
+   struct lower_phis_to_scalar_state *state = void_state;
+
+   /* Find the last phi node in the block */
+   nir_phi_instr *last_phi = NULL;
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      last_phi = nir_instr_as_phi(instr);
+   }
+
+   /* We have to handle the phi nodes in their own pass due to the way
+    * we're modifying the linked list of instructions.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      if (!should_lower_phi(phi, state))
+         continue;
+
+      /* Create a vecN operation to combine the results.  Most of these
+       * will be redundant, but copy propagation should clean them up for
+       * us.  No need to add the complexity here.
+       */
+      nir_op vec_op;
+      switch (phi->dest.ssa.num_components) {
+      case 2: vec_op = nir_op_vec2; break;
+      case 3: vec_op = nir_op_vec3; break;
+      case 4: vec_op = nir_op_vec4; break;
+      default: unreachable("Invalid number of components");
+      }
+
+      nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
+      nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
+                        phi->dest.ssa.num_components, NULL);
+      vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+      for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
+         nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
+         nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+
+         vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
+
+         nir_foreach_phi_src(phi, src) {
+            /* We need to insert a mov to grab the i'th component of src */
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+            mov->dest.write_mask = 1;
+            nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
+            mov->src[0].swizzle[0] = i;
+
+            /* Insert at the end of the predecessor but before the jump */
+            nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
+            if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
+               nir_instr_insert_before(pred_last_instr, &mov->instr);
+            else
+               nir_instr_insert_after_block(src->pred, &mov->instr);
+
+            nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+            new_src->pred = src->pred;
+            new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
+
+            exec_list_push_tail(&new_phi->srcs, &new_src->node);
+         }
+
+         nir_instr_insert_before(&phi->instr, &new_phi->instr);
+      }
+
+      nir_instr_insert_after(&last_phi->instr, &vec->instr);
+
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&vec->dest.dest.ssa),
+                               state->mem_ctx);
+
+      ralloc_steal(state->dead_ctx, phi);
+      nir_instr_remove(&phi->instr);
+
+      /* We're using the safe iterator and inserting all the newly
+       * scalarized phi nodes before their non-scalarized version so that's
+       * ok.  However, we are also inserting vec operations after all of
+       * the last phi node so once we get here, we can't trust even the
+       * safe iterator to stop properly.  We have to break manually.
+       */
+      if (instr == &last_phi->instr)
+         break;
+   }
+
+   return true;
+}
+
+static void
+lower_phis_to_scalar_impl(nir_function_impl *impl)
+{
+   struct lower_phis_to_scalar_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+   state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   nir_foreach_block(impl, lower_phis_to_scalar_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   ralloc_free(state.dead_ctx);
+}
+
+/** A pass that lowers vector phi nodes to scalar
+ *
+ * This pass loops through the blocks and lowers looks for vector phi nodes
+ * it can lower to scalar phi nodes.  Not all phi nodes are lowered.  For
+ * instance, if one of the sources is a non-scalarizable vector, then we
+ * don't bother lowering because that would generate hard-to-coalesce movs.
+ */
+void
+nir_lower_phis_to_scalar(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_phis_to_scalar_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
new file mode 100644
index 000000000..3015dbd09
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "../program.h"
+#include "program/hash_table.h"
+#include "ir_uniform.h"
+
+extern "C" {
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+}
+
+static unsigned
+get_sampler_index(struct gl_shader_program *shader_program, const char *name,
+                  const struct gl_program *prog)
+{
+   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
+
+   unsigned location;
+   if (!shader_program->UniformHash->get(location, name)) {
+      linker_error(shader_program,
+                   "failed to find sampler named %s.\n", name);
+      return 0;
+   }
+
+   if (!shader_program->UniformStorage[location].sampler[shader].active) {
+      assert(0 && "cannot return a sampler");
+      linker_error(shader_program,
+                   "cannot return a sampler named %s, because it is not "
+                   "used in this shader stage. This is a driver bug.\n",
+                   name);
+      return 0;
+   }
+
+   return shader_program->UniformStorage[location].sampler[shader].index;
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
+              const struct gl_program *prog, void *mem_ctx)
+{
+   if (instr->sampler == NULL)
+      return;
+
+   /* Get the name and the offset */
+   instr->sampler_index = 0;
+   char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name);
+
+   for (nir_deref *deref = &instr->sampler->deref;
+        deref->child; deref = deref->child) {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *deref_array = nir_deref_as_array(deref->child);
+
+         /* XXX: We're assuming here that the indirect is the last array
+          * thing we have.  This should be ok for now as we don't support
+          * arrays_of_arrays yet.
+          */
+
+         instr->sampler_index *= glsl_get_length(deref->type);
+         switch (deref_array->deref_array_type) {
+         case nir_deref_array_type_direct:
+            instr->sampler_index += deref_array->base_offset;
+            if (deref_array->deref.child)
+               ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
+            break;
+         case nir_deref_array_type_indirect: {
+            instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+                                  instr->num_srcs + 1);
+            memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
+            instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+            instr->num_srcs++;
+
+            nir_instr_rewrite_src(&instr->instr,
+                                  &instr->src[instr->num_srcs - 1].src,
+                                  deref_array->indirect);
+
+            instr->sampler_array_size = glsl_get_length(deref->type);
+
+            nir_src empty;
+            memset(&empty, 0, sizeof empty);
+            nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, empty);
+
+            if (deref_array->deref.child)
+               ralloc_strcat(&name, "[0]");
+            break;
+         }
+
+         case nir_deref_array_type_wildcard:
+            unreachable("Cannot copy samplers");
+         default:
+            unreachable("Invalid deref array type");
+         }
+         break;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child);
+         const char *field = glsl_get_struct_elem_name(deref->type,
+                                                       deref_struct->index);
+         ralloc_asprintf_append(&name, ".%s", field);
+         break;
+      }
+
+      default:
+         unreachable("Invalid deref type");
+         break;
+      }
+   }
+
+   instr->sampler_index += get_sampler_index(shader_program, name, prog);
+
+   instr->sampler = NULL;
+}
+
+typedef struct {
+   void *mem_ctx;
+   struct gl_shader_program *shader_program;
+   struct gl_program *prog;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+   lower_state *state = (lower_state *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_tex) {
+         nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+         lower_sampler(tex_instr, state->shader_program, state->prog,
+                       state->mem_ctx);
+      }
+   }
+
+   return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
+           struct gl_program *prog)
+{
+   lower_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.shader_program = shader_program;
+   state.prog = prog;
+
+   nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+extern "C" void
+nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
+                   struct gl_program *prog)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_impl(overload->impl, shader_program, prog);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_system_values.c b/mesalib/src/glsl/nir/nir_lower_system_values.c
new file mode 100644
index 000000000..328d4f1ab
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_system_values.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "main/mtypes.h"
+
+static void
+convert_instr(nir_intrinsic_instr *instr)
+{
+   if (instr->intrinsic != nir_intrinsic_load_var)
+      return;
+
+   nir_variable *var = instr->variables[0]->var;
+   if (var->data.mode != nir_var_system_value)
+      return;
+
+   void *mem_ctx = ralloc_parent(instr);
+
+   nir_intrinsic_op op;
+
+   switch (var->data.location) {
+   case SYSTEM_VALUE_FRONT_FACE:
+      op = nir_intrinsic_load_front_face;
+      break;
+   case SYSTEM_VALUE_VERTEX_ID:
+      op = nir_intrinsic_load_vertex_id;
+      break;
+   case SYSTEM_VALUE_INSTANCE_ID:
+      op = nir_intrinsic_load_instance_id;
+      break;
+   case SYSTEM_VALUE_SAMPLE_ID:
+      op = nir_intrinsic_load_sample_id;
+      break;
+   case SYSTEM_VALUE_SAMPLE_POS:
+      op = nir_intrinsic_load_sample_pos;
+      break;
+   case SYSTEM_VALUE_SAMPLE_MASK_IN:
+      op = nir_intrinsic_load_sample_mask_in;
+      break;
+   case SYSTEM_VALUE_INVOCATION_ID:
+      op = nir_intrinsic_load_invocation_id;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+
+   if (instr->dest.is_ssa) {
+      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+                        instr->dest.ssa.num_components, NULL);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+                               nir_src_for_ssa(&new_instr->dest.ssa),
+                               mem_ctx);
+   } else {
+      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+   }
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+   (void) state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_intrinsic)
+         convert_instr(nir_instr_as_intrinsic(instr));
+   }
+
+   return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, convert_block, NULL);
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_system_values(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         convert_impl(overload->impl);
+   }
+
+   exec_list_make_empty(&shader->system_values);
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c
new file mode 100644
index 000000000..d6bf77f17
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This pass lowers the neg, abs, and sat operations to source modifiers on
+ * ALU operations to make things nicer for the backend.  It's just much
+ * easier to not have them when we're doing optimizations.
+ */
+
+static bool
+nir_lower_to_source_mods_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+         if (!alu->src[i].src.is_ssa)
+            continue;
+
+         if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu)
+            continue;
+
+         nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
+
+         if (parent->dest.saturate)
+            continue;
+
+         switch (nir_op_infos[alu->op].input_types[i]) {
+         case nir_type_float:
+            if (parent->op != nir_op_fmov)
+               continue;
+            break;
+         case nir_type_int:
+            if (parent->op != nir_op_imov)
+               continue;
+            break;
+         default:
+            continue;
+         }
+
+         nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src);
+         if (alu->src[i].abs) {
+            /* abs trumps both neg and abs, do nothing */
+         } else {
+            alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate);
+            alu->src[i].abs |= parent->src[0].abs;
+         }
+
+         for (int j = 0; j < 4; ++j) {
+            if (!nir_alu_instr_channel_used(alu, i, j))
+               continue;
+            alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]];
+         }
+
+         if (parent->dest.dest.ssa.uses->entries == 0 &&
+             parent->dest.dest.ssa.if_uses->entries == 0)
+            nir_instr_remove(&parent->instr);
+      }
+
+      switch (alu->op) {
+      case nir_op_fsat:
+         alu->op = nir_op_fmov;
+         alu->dest.saturate = true;
+         break;
+      case nir_op_ineg:
+         alu->op = nir_op_imov;
+         alu->src[0].negate = !alu->src[0].negate;
+         break;
+      case nir_op_fneg:
+         alu->op = nir_op_fmov;
+         alu->src[0].negate = !alu->src[0].negate;
+         break;
+      case nir_op_iabs:
+         alu->op = nir_op_imov;
+         alu->src[0].abs = true;
+         alu->src[0].negate = false;
+         break;
+      case nir_op_fabs:
+         alu->op = nir_op_fmov;
+         alu->src[0].abs = true;
+         alu->src[0].negate = false;
+         break;
+      default:
+         break;
+      }
+
+      /* We've covered sources.  Now we're going to try and saturate the
+       * destination if we can.
+       */
+
+      if (!alu->dest.dest.is_ssa)
+         continue;
+
+      /* We can only saturate float destinations */
+      if (nir_op_infos[alu->op].output_type != nir_type_float)
+         continue;
+
+      if (alu->dest.dest.ssa.if_uses->entries != 0)
+         continue;
+
+      bool all_children_are_sat = true;
+      struct set_entry *entry;
+      set_foreach(alu->dest.dest.ssa.uses, entry) {
+         const nir_instr *child = entry->key;
+         if (child->type != nir_instr_type_alu) {
+            all_children_are_sat = false;
+            continue;
+         }
+
+         nir_alu_instr *child_alu = nir_instr_as_alu(child);
+         if (child_alu->src[0].negate || child_alu->src[0].abs) {
+            all_children_are_sat = false;
+            continue;
+         }
+
+         if (child_alu->op != nir_op_fsat &&
+             !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) {
+            all_children_are_sat = false;
+            continue;
+         }
+      }
+
+      if (!all_children_are_sat)
+         continue;
+
+      alu->dest.saturate = true;
+
+      set_foreach(alu->dest.dest.ssa.uses, entry) {
+         nir_alu_instr *child_alu = nir_instr_as_alu((nir_instr *)entry->key);
+         child_alu->op = nir_op_fmov;
+         child_alu->dest.saturate = false;
+         /* We could propagate the dest of our instruction to the
+          * destinations of the uses here.  However, one quick round of
+          * copy propagation will clean that all up and then we don't have
+          * the complexity.
+          */
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_to_source_mods_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL);
+}
+
+void
+nir_lower_to_source_mods(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_to_source_mods_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
new file mode 100644
index 000000000..85ebb281c
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+#include "nir_types.h"
+
+/*
+ * Lowers all copy intrinsics to sequences of load/store intrinsics.
+ */
+
+/* Walks down the deref chain and returns the next deref in the chain whose
+ * child is a wildcard.  In other words, given the chain  a[1].foo[*].bar,
+ * this function will return the deref to foo.  Calling it a second time
+ * with the [*].bar, it will return NULL.
+ */
+static nir_deref *
+deref_next_wildcard_parent(nir_deref *deref)
+{
+   for (nir_deref *tail = deref; tail->child; tail = tail->child) {
+      if (tail->child->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(tail->child);
+
+      if (arr->deref_array_type == nir_deref_array_type_wildcard)
+         return tail;
+   }
+
+   return NULL;
+}
+
+/* Returns the last deref in the chain.
+ */
+static nir_deref *
+get_deref_tail(nir_deref *deref)
+{
+   while (deref->child)
+      deref = deref->child;
+
+   return deref;
+}
+
+static int
+type_get_length(const struct glsl_type *type)
+{
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_ARRAY:
+      return glsl_get_length(type);
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(type))
+         return glsl_get_matrix_columns(type);
+      else
+         return glsl_get_vector_elements(type);
+   default:
+      unreachable("Invalid deref base type");
+   }
+}
+
+/* This function recursively walks the given deref chain and replaces the
+ * given copy instruction with an equivalent sequence load/store
+ * operations.
+ *
+ * @copy_instr    The copy instruction to replace; new instructions will be
+ *                inserted before this one
+ *
+ * @dest_head     The head of the destination variable deref chain
+ *
+ * @src_head      The head of the source variable deref chain
+ *
+ * @dest_tail     The current tail of the destination variable deref chain;
+ *                this is used for recursion and external callers of this
+ *                function should call it with tail == head
+ *
+ * @src_tail      The current tail of the source variable deref chain;
+ *                this is used for recursion and external callers of this
+ *                function should call it with tail == head
+ *
+ * @state         The current variable lowering state
+ */
+static void
+emit_copy_load_store(nir_intrinsic_instr *copy_instr,
+                     nir_deref_var *dest_head, nir_deref_var *src_head,
+                     nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx)
+{
+   /* Find the next pair of wildcards */
+   nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail);
+   nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail);
+
+   if (src_arr_parent || dest_arr_parent) {
+      /* Wildcards had better come in matched pairs */
+      assert(dest_arr_parent && dest_arr_parent);
+
+      nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child);
+      nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child);
+
+      unsigned length = type_get_length(src_arr_parent->type);
+      /* The wildcards should represent the same number of elements */
+      assert(length == type_get_length(dest_arr_parent->type));
+      assert(length > 0);
+
+      /* Walk over all of the elements that this wildcard refers to and
+       * call emit_copy_load_store on each one of them */
+      src_arr->deref_array_type = nir_deref_array_type_direct;
+      dest_arr->deref_array_type = nir_deref_array_type_direct;
+      for (unsigned i = 0; i < length; i++) {
+         src_arr->base_offset = i;
+         dest_arr->base_offset = i;
+         emit_copy_load_store(copy_instr, dest_head, src_head,
+                              &dest_arr->deref, &src_arr->deref, mem_ctx);
+      }
+      src_arr->deref_array_type = nir_deref_array_type_wildcard;
+      dest_arr->deref_array_type = nir_deref_array_type_wildcard;
+   } else {
+      /* In this case, we have no wildcards anymore, so all we have to do
+       * is just emit the load and store operations. */
+      src_tail = get_deref_tail(src_tail);
+      dest_tail = get_deref_tail(dest_tail);
+
+      assert(src_tail->type == dest_tail->type);
+
+      unsigned num_components = glsl_get_vector_elements(src_tail->type);
+
+      nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
+      nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
+      load->num_components = num_components;
+      load->variables[0] = nir_deref_as_var(src_deref);
+      nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+
+      nir_instr_insert_before(&copy_instr->instr, &load->instr);
+
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
+      store->num_components = num_components;
+      store->variables[0] = nir_deref_as_var(dest_deref);
+      store->src[0].is_ssa = true;
+      store->src[0].ssa = &load->dest.ssa;
+
+      nir_instr_insert_before(&copy_instr->instr, &store->instr);
+   }
+}
+
+/* Lowers a copy instruction to a sequence of load/store instructions
+ *
+ * The new instructions are placed before the copy instruction in the IR.
+ */
+void
+nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx)
+{
+   assert(copy->intrinsic == nir_intrinsic_copy_var);
+   emit_copy_load_store(copy, copy->variables[0], copy->variables[1],
+                        &copy->variables[0]->deref,
+                        &copy->variables[1]->deref, mem_ctx);
+}
+
+static bool
+lower_var_copies_block(nir_block *block, void *mem_ctx)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+      if (copy->intrinsic != nir_intrinsic_copy_var)
+         continue;
+
+      nir_lower_var_copy_instr(copy, mem_ctx);
+
+      nir_instr_remove(&copy->instr);
+      ralloc_free(copy);
+   }
+
+   return true;
+}
+
+static void
+lower_var_copies_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl));
+}
+
+/* Lowers every copy_var instruction in the program to a sequence of
+ * load/store instructions.
+ */
+void
+nir_lower_var_copies(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_var_copies_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
new file mode 100644
index 000000000..8af753029
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -0,0 +1,1102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+struct deref_node {
+   struct deref_node *parent;
+   const struct glsl_type *type;
+
+   bool lower_to_ssa;
+
+   struct set *loads;
+   struct set *stores;
+   struct set *copies;
+
+   nir_ssa_def **def_stack;
+   nir_ssa_def **def_stack_tail;
+
+   struct deref_node *wildcard;
+   struct deref_node *indirect;
+   struct deref_node *children[0];
+};
+
+struct lower_variables_state {
+   void *mem_ctx;
+   void *dead_ctx;
+   nir_function_impl *impl;
+
+   /* A hash table mapping variables to deref_node data */
+   struct hash_table *deref_var_nodes;
+
+   /* A hash table mapping fully-qualified direct dereferences, i.e.
+    * dereferences with no indirect or wildcard array dereferences, to
+    * deref_node data.
+    *
+    * At the moment, we only lower loads, stores, and copies that can be
+    * trivially lowered to loads and stores, i.e. copies with no indirects
+    * and no wildcards.  If a part of a variable that is being loaded from
+    * and/or stored into is also involved in a copy operation with
+    * wildcards, then we lower that copy operation to loads and stores, but
+    * otherwise we leave copies with wildcards alone. Since the only derefs
+    * used in these loads, stores, and trivial copies are ones with no
+    * wildcards and no indirects, these are precisely the derefs that we
+    * can actually consider lowering.
+    */
+   struct hash_table *direct_deref_nodes;
+
+   /* Controls whether get_deref_node will add variables to the
+    * direct_deref_nodes table.  This is turned on when we are initially
+    * scanning for load/store instructions.  It is then turned off so we
+    * don't accidentally change the direct_deref_nodes table while we're
+    * iterating throug it.
+    */
+   bool add_to_direct_deref_nodes;
+
+   /* A hash table mapping phi nodes to deref_state data */
+   struct hash_table *phi_table;
+};
+
+/* The following two functions implement a hash and equality check for
+ * variable dreferences.  When the hash or equality function encounters an
+ * array, all indirects are treated as equal and are never equal to a
+ * direct dereference or a wildcard.
+ */
+static uint32_t
+hash_deref(const void *void_deref)
+{
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+   const nir_deref_var *deref_var = void_deref;
+   hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
+
+   for (const nir_deref *deref = deref_var->deref.child;
+        deref; deref = deref->child) {
+      switch (deref->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *deref_array = nir_deref_as_array(deref);
+
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_array->deref_array_type);
+
+         if (deref_array->deref_array_type == nir_deref_array_type_direct)
+            hash = _mesa_fnv32_1a_accumulate(hash, deref_array->base_offset);
+         break;
+      }
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
+         break;
+      }
+      default:
+         assert("Invalid deref chain");
+      }
+   }
+
+   return hash;
+}
+
+static bool
+derefs_equal(const void *void_a, const void *void_b)
+{
+   const nir_deref_var *a_var = void_a;
+   const nir_deref_var *b_var = void_b;
+
+   if (a_var->var != b_var->var)
+      return false;
+
+   for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
+        a != NULL; a = a->child, b = b->child) {
+      if (a->deref_type != b->deref_type)
+         return false;
+
+      switch (a->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *a_arr = nir_deref_as_array(a);
+         nir_deref_array *b_arr = nir_deref_as_array(b);
+
+         if (a_arr->deref_array_type != b_arr->deref_array_type)
+            return false;
+
+         if (a_arr->deref_array_type == nir_deref_array_type_direct &&
+             a_arr->base_offset != b_arr->base_offset)
+            return false;
+         break;
+      }
+      case nir_deref_type_struct:
+         if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
+            return false;
+         break;
+      default:
+         assert("Invalid deref chain");
+         return false;
+      }
+
+      assert((a->child == NULL) == (b->child == NULL));
+      if((a->child == NULL) != (b->child == NULL))
+         return false;
+   }
+
+   return true;
+}
+
+static int
+type_get_length(const struct glsl_type *type)
+{
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_ARRAY:
+      return glsl_get_length(type);
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(type))
+         return glsl_get_matrix_columns(type);
+      else
+         return glsl_get_vector_elements(type);
+   default:
+      unreachable("Invalid deref base type");
+   }
+}
+
+static struct deref_node *
+deref_node_create(struct deref_node *parent,
+                  const struct glsl_type *type, void *mem_ctx)
+{
+   size_t size = sizeof(struct deref_node) +
+                 type_get_length(type) * sizeof(struct deref_node *);
+
+   struct deref_node *node = rzalloc_size(mem_ctx, size);
+   node->type = type;
+   node->parent = parent;
+
+   return node;
+}
+
+/* Returns the deref node associated with the given variable.  This will be
+ * the root of the tree representing all of the derefs of the given variable.
+ */
+static struct deref_node *
+get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
+{
+   struct deref_node *node;
+
+   struct hash_entry *var_entry =
+      _mesa_hash_table_search(state->deref_var_nodes, var);
+
+   if (var_entry) {
+      return var_entry->data;
+   } else {
+      node = deref_node_create(NULL, var->type, state->dead_ctx);
+      _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+      return node;
+   }
+}
+
+/* Gets the deref_node for the given deref chain and creates it if it
+ * doesn't yet exist.  If the deref is fully-qualified and direct and
+ * state->add_to_direct_deref_nodes is true, it will be added to the hash
+ * table of of fully-qualified direct derefs.
+ */
+static struct deref_node *
+get_deref_node(nir_deref_var *deref, struct lower_variables_state *state)
+{
+   bool is_direct = true;
+
+   /* Start at the base of the chain. */
+   struct deref_node *node = get_deref_node_for_var(deref->var, state);
+   assert(deref->deref.type == node->type);
+
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      switch (tail->deref_type) {
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+         assert(deref_struct->index < type_get_length(node->type));
+
+         if (node->children[deref_struct->index] == NULL)
+            node->children[deref_struct->index] =
+               deref_node_create(node, tail->type, state->dead_ctx);
+
+         node = node->children[deref_struct->index];
+         break;
+      }
+
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(tail);
+
+         switch (arr->deref_array_type) {
+         case nir_deref_array_type_direct:
+            /* This is possible if a loop unrolls and generates an
+             * out-of-bounds offset.  We need to handle this at least
+             * somewhat gracefully.
+             */
+            if (arr->base_offset >= type_get_length(node->type))
+               return NULL;
+
+            if (node->children[arr->base_offset] == NULL)
+               node->children[arr->base_offset] =
+                  deref_node_create(node, tail->type, state->dead_ctx);
+
+            node = node->children[arr->base_offset];
+            break;
+
+         case nir_deref_array_type_indirect:
+            if (node->indirect == NULL)
+               node->indirect = deref_node_create(node, tail->type,
+                                                  state->dead_ctx);
+
+            node = node->indirect;
+            is_direct = false;
+            break;
+
+         case nir_deref_array_type_wildcard:
+            if (node->wildcard == NULL)
+               node->wildcard = deref_node_create(node, tail->type,
+                                                  state->dead_ctx);
+
+            node = node->wildcard;
+            is_direct = false;
+            break;
+
+         default:
+            unreachable("Invalid array deref type");
+         }
+         break;
+      }
+      default:
+         unreachable("Invalid deref type");
+      }
+   }
+
+   assert(node);
+
+   if (is_direct && state->add_to_direct_deref_nodes)
+      _mesa_hash_table_insert(state->direct_deref_nodes, deref, node);
+
+   return node;
+}
+
+/* \sa foreach_deref_node_match */
+static bool
+foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
+                          bool (* cb)(struct deref_node *node,
+                                      struct lower_variables_state *state),
+                          struct lower_variables_state *state)
+{
+   if (deref->child == NULL) {
+      return cb(node, state);
+   } else {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(deref->child);
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+         if (node->children[arr->base_offset] &&
+             !foreach_deref_node_worker(node->children[arr->base_offset],
+                                        deref->child, cb, state))
+            return false;
+
+         if (node->wildcard &&
+             !foreach_deref_node_worker(node->wildcard,
+                                        deref->child, cb, state))
+            return false;
+
+         return true;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *str = nir_deref_as_struct(deref->child);
+         return foreach_deref_node_worker(node->children[str->index],
+                                          deref->child, cb, state);
+      }
+
+      default:
+         unreachable("Invalid deref child type");
+      }
+   }
+}
+
+/* Walks over every "matching" deref_node and calls the callback.  A node
+ * is considered to "match" if either refers to that deref or matches up t
+ * a wildcard.  In other words, the following would match a[6].foo[3].bar:
+ *
+ * a[6].foo[3].bar
+ * a[*].foo[3].bar
+ * a[6].foo[*].bar
+ * a[*].foo[*].bar
+ *
+ * The given deref must be a full-length and fully qualified (no wildcards
+ * or indirects) deref chain.
+ */
+static bool
+foreach_deref_node_match(nir_deref_var *deref,
+                         bool (* cb)(struct deref_node *node,
+                                     struct lower_variables_state *state),
+                         struct lower_variables_state *state)
+{
+   nir_deref_var var_deref = *deref;
+   var_deref.deref.child = NULL;
+   struct deref_node *node = get_deref_node(&var_deref, state);
+
+   if (node == NULL)
+      return false;
+
+   return foreach_deref_node_worker(node, &deref->deref, cb, state);
+}
+
+/* \sa deref_may_be_aliased */
+static bool
+deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
+                          struct lower_variables_state *state)
+{
+   if (deref->child == NULL) {
+      return false;
+   } else {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(deref->child);
+         if (arr->deref_array_type == nir_deref_array_type_indirect)
+            return true;
+
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+
+         if (node->children[arr->base_offset] &&
+             deref_may_be_aliased_node(node->children[arr->base_offset],
+                                       deref->child, state))
+            return true;
+
+         if (node->wildcard &&
+             deref_may_be_aliased_node(node->wildcard, deref->child, state))
+            return true;
+
+         return false;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *str = nir_deref_as_struct(deref->child);
+         if (node->children[str->index]) {
+             return deref_may_be_aliased_node(node->children[str->index],
+                                              deref->child, state);
+         } else {
+            return false;
+         }
+      }
+
+      default:
+         unreachable("Invalid nir_deref child type");
+      }
+   }
+}
+
+/* Returns true if there are no indirects that can ever touch this deref.
+ *
+ * For example, if the given deref is a[6].foo, then any uses of a[i].foo
+ * would cause this to return false, but a[i].bar would not affect it
+ * because it's a different structure member.  A var_copy involving of
+ * a[*].bar also doesn't affect it because that can be lowered to entirely
+ * direct load/stores.
+ *
+ * We only support asking this question about fully-qualified derefs.
+ * Obviously, it's pointless to ask this about indirects, but we also
+ * rule-out wildcards.  Handling Wildcard dereferences would involve
+ * checking each array index to make sure that there aren't any indirect
+ * references.
+ */
+static bool
+deref_may_be_aliased(nir_deref_var *deref,
+                     struct lower_variables_state *state)
+{
+   return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state),
+                                    &deref->deref, state);
+}
+
+static void
+register_load_instr(nir_intrinsic_instr *load_instr,
+                    struct lower_variables_state *state)
+{
+   struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+   if (node == NULL)
+      return;
+
+   if (node->loads == NULL)
+      node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+
+   _mesa_set_add(node->loads, load_instr);
+}
+
+static void
+register_store_instr(nir_intrinsic_instr *store_instr,
+                     struct lower_variables_state *state)
+{
+   struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+   if (node == NULL)
+      return;
+
+   if (node->stores == NULL)
+      node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+
+   _mesa_set_add(node->stores, store_instr);
+}
+
+static void
+register_copy_instr(nir_intrinsic_instr *copy_instr,
+                    struct lower_variables_state *state)
+{
+   for (unsigned idx = 0; idx < 2; idx++) {
+      struct deref_node *node =
+         get_deref_node(copy_instr->variables[idx], state);
+
+      if (node == NULL)
+         continue;
+
+      if (node->copies == NULL)
+         node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+
+      _mesa_set_add(node->copies, copy_instr);
+   }
+}
+
+/* Registers all variable uses in the given block. */
+static bool
+register_variable_uses_block(nir_block *block, void *void_state)
+{
+   struct lower_variables_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var:
+         register_load_instr(intrin, state);
+         break;
+
+      case nir_intrinsic_store_var:
+         register_store_instr(intrin, state);
+         break;
+
+      case nir_intrinsic_copy_var:
+         register_copy_instr(intrin, state);
+         break;
+
+      default:
+         continue;
+      }
+   }
+
+   return true;
+}
+
+/* Walks over all of the copy instructions to or from the given deref_node
+ * and lowers them to load/store intrinsics.
+ */
+static bool
+lower_copies_to_load_store(struct deref_node *node,
+                           struct lower_variables_state *state)
+{
+   if (!node->copies)
+      return true;
+
+   struct set_entry *copy_entry;
+   set_foreach(node->copies, copy_entry) {
+      nir_intrinsic_instr *copy = (void *)copy_entry->key;
+
+      nir_lower_var_copy_instr(copy, state->mem_ctx);
+
+      for (unsigned i = 0; i < 2; ++i) {
+         struct deref_node *arg_node =
+            get_deref_node(copy->variables[i], state);
+
+         if (arg_node == NULL)
+            continue;
+
+         struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
+         assert(arg_entry);
+         _mesa_set_remove(node->copies, arg_entry);
+      }
+
+      nir_instr_remove(&copy->instr);
+   }
+
+   return true;
+}
+
+/* Returns a load_const instruction that represents the constant
+ * initializer for the given deref chain.  The caller is responsible for
+ * ensuring that there actually is a constant initializer.
+ */
+static nir_load_const_instr *
+get_const_initializer_load(const nir_deref_var *deref,
+                           struct lower_variables_state *state)
+{
+   nir_constant *constant = deref->var->constant_initializer;
+   const nir_deref *tail = &deref->deref;
+   unsigned matrix_offset = 0;
+   while (tail->child) {
+      switch (tail->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(tail->child);
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+         if (glsl_type_is_matrix(tail->type)) {
+            assert(arr->deref.child == NULL);
+            matrix_offset = arr->base_offset;
+         } else {
+            constant = constant->elements[arr->base_offset];
+         }
+         break;
+      }
+
+      case nir_deref_type_struct: {
+         constant = constant->elements[nir_deref_as_struct(tail->child)->index];
+         break;
+      }
+
+      default:
+         unreachable("Invalid deref child type");
+      }
+
+      tail = tail->child;
+   }
+
+   nir_load_const_instr *load =
+      nir_load_const_instr_create(state->mem_ctx,
+                                  glsl_get_vector_elements(tail->type));
+
+   matrix_offset *= load->def.num_components;
+   for (unsigned i = 0; i < load->def.num_components; i++) {
+      switch (glsl_get_base_type(tail->type)) {
+      case GLSL_TYPE_FLOAT:
+      case GLSL_TYPE_INT:
+      case GLSL_TYPE_UINT:
+         load->value.u[i] = constant->value.u[matrix_offset + i];
+         break;
+      case GLSL_TYPE_BOOL:
+         load->value.u[i] = constant->value.b[matrix_offset + i] ?
+                             NIR_TRUE : NIR_FALSE;
+         break;
+      default:
+         unreachable("Invalid immediate type");
+      }
+   }
+
+   return load;
+}
+
+/** Pushes an SSA def onto the def stack for the given node
+ *
+ * Each node is potentially associated with a stack of SSA definitions.
+ * This stack is used for determining what SSA definition reaches a given
+ * point in the program for variable renaming.  The stack is always kept in
+ * dominance-order with at most one SSA def per block.  If the SSA
+ * definition on the top of the stack is in the same block as the one being
+ * pushed, the top element is replaced.
+ */
+static void
+def_stack_push(struct deref_node *node, nir_ssa_def *def,
+               struct lower_variables_state *state)
+{
+   if (node->def_stack == NULL) {
+      node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
+                                     state->impl->num_blocks);
+      node->def_stack_tail = node->def_stack - 1;
+   }
+
+   if (node->def_stack_tail >= node->def_stack) {
+      nir_ssa_def *top_def = *node->def_stack_tail;
+
+      if (def->parent_instr->block == top_def->parent_instr->block) {
+         /* They're in the same block, just replace the top */
+         *node->def_stack_tail = def;
+         return;
+      }
+   }
+
+   *(++node->def_stack_tail) = def;
+}
+
+/* Pop the top of the def stack if it's in the given block */
+static void
+def_stack_pop_if_in_block(struct deref_node *node, nir_block *block)
+{
+   /* If we're popping, then we have presumably pushed at some time in the
+    * past so this should exist.
+    */
+   assert(node->def_stack != NULL);
+
+   /* The stack is already empty.  Do nothing. */
+   if (node->def_stack_tail < node->def_stack)
+      return;
+
+   nir_ssa_def *def = *node->def_stack_tail;
+   if (def->parent_instr->block == block)
+      node->def_stack_tail--;
+}
+
+/** Retrieves the SSA definition on the top of the stack for the given
+ * node, if one exists.  If the stack is empty, then we return the constant
+ * initializer (if it exists) or an SSA undef.
+ */
+static nir_ssa_def *
+get_ssa_def_for_block(struct deref_node *node, nir_block *block,
+                      struct lower_variables_state *state)
+{
+   /* If we have something on the stack, go ahead and return it.  We're
+    * assuming that the top of the stack dominates the given block.
+    */
+   if (node->def_stack && node->def_stack_tail >= node->def_stack)
+      return *node->def_stack_tail;
+
+   /* If we got here then we don't have a definition that dominates the
+    * given block.  This means that we need to add an undef and use that.
+    */
+   nir_ssa_undef_instr *undef =
+      nir_ssa_undef_instr_create(state->mem_ctx,
+                                 glsl_get_vector_elements(node->type));
+   nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
+   def_stack_push(node, &undef->def, state);
+   return &undef->def;
+}
+
+/* Given a block and one of its predecessors, this function fills in the
+ * souces of the phi nodes to take SSA defs from the given predecessor.
+ * This function must be called exactly once per block/predecessor pair.
+ */
+static void
+add_phi_sources(nir_block *block, nir_block *pred,
+                struct lower_variables_state *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+      if (!entry)
+         continue;
+
+      struct deref_node *node = entry->data;
+
+      nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+      src->pred = pred;
+      src->src.is_ssa = true;
+      src->src.ssa = get_ssa_def_for_block(node, pred, state);
+
+      _mesa_set_add(src->src.ssa->uses, instr);
+
+      exec_list_push_tail(&phi->srcs, &src->node);
+   }
+}
+
+/* Performs variable renaming by doing a DFS of the dominance tree
+ *
+ * This algorithm is very similar to the one outlined in "Efficiently
+ * Computing Static Single Assignment Form and the Control Dependence
+ * Graph" by Cytron et. al.  The primary difference is that we only put one
+ * SSA def on the stack per block.
+ */
+static bool
+rename_variables_block(nir_block *block, struct lower_variables_state *state)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+
+         /* This can happen if we already have phi nodes in the program
+          * that were not created in this pass.
+          */
+         if (!entry)
+            continue;
+
+         struct deref_node *node = entry->data;
+
+         def_stack_push(node, &phi->dest.ssa, state);
+      } else if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* If we hit this path then we are referencing an invalid
+                * value.  Most likely, we unrolled something and are
+                * reading past the end of some array.  In any case, this
+                * should result in an undefined value.
+                */
+               nir_ssa_undef_instr *undef =
+                  nir_ssa_undef_instr_create(state->mem_ctx,
+                                             intrin->num_components);
+
+               nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_remove(&intrin->instr);
+
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(&undef->def),
+                                        state->mem_ctx);
+               continue;
+            }
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            mov->src[0].src.is_ssa = true;
+            mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
+
+            assert(intrin->dest.is_ssa);
+
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
+            nir_instr_remove(&intrin->instr);
+
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa),
+                                     state->mem_ctx);
+            break;
+         }
+
+         case nir_intrinsic_store_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* Probably an out-of-bounds array store.  That should be a
+                * no-op. */
+               nir_instr_remove(&intrin->instr);
+               continue;
+            }
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
+
+            assert(intrin->src[0].is_ssa);
+
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            mov->src[0].src.is_ssa = true;
+            mov->src[0].src.ssa = intrin->src[0].ssa;
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
+
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+            def_stack_push(node, &mov->dest.dest.ssa, state);
+
+            /* We'll wait to remove the instruction until the next pass
+             * where we pop the node we just pushed back off the stack.
+             */
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+
+   if (block->successors[0])
+      add_phi_sources(block->successors[0], block, state);
+   if (block->successors[1])
+      add_phi_sources(block->successors[1], block, state);
+
+   for (unsigned i = 0; i < block->num_dom_children; ++i)
+      rename_variables_block(block->dom_children[i], state);
+
+   /* Now we iterate over the instructions and pop off any SSA defs that we
+    * pushed in the first loop.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+
+         /* This can happen if we already have phi nodes in the program
+          * that were not created in this pass.
+          */
+         if (!entry)
+            continue;
+
+         struct deref_node *node = entry->data;
+
+         def_stack_pop_if_in_block(node, block);
+      } else if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         if (intrin->intrinsic != nir_intrinsic_store_var)
+            continue;
+
+         struct deref_node *node = get_deref_node(intrin->variables[0], state);
+         if (!node)
+            continue;
+
+         if (!node->lower_to_ssa)
+            continue;
+
+         def_stack_pop_if_in_block(node, block);
+         nir_instr_remove(&intrin->instr);
+      }
+   }
+
+   return true;
+}
+
+/* Inserts phi nodes for all variables marked lower_to_ssa
+ *
+ * This is the same algorithm as presented in "Efficiently Computing Static
+ * Single Assignment Form and the Control Dependence Graph" by Cytron et.
+ * al.
+ */
+static void
+insert_phi_nodes(struct lower_variables_state *state)
+{
+   unsigned work[state->impl->num_blocks];
+   unsigned has_already[state->impl->num_blocks];
+
+   /*
+    * Since the work flags already prevent us from inserting a node that has
+    * ever been inserted into W, we don't need to use a set to represent W.
+    * Also, since no block can ever be inserted into W more than once, we know
+    * that the maximum size of W is the number of basic blocks in the
+    * function. So all we need to handle W is an array and a pointer to the
+    * next element to be inserted and the next element to be removed.
+    */
+   nir_block *W[state->impl->num_blocks];
+
+   memset(work, 0, sizeof work);
+   memset(has_already, 0, sizeof has_already);
+
+   unsigned w_start, w_end;
+   unsigned iter_count = 0;
+
+   struct hash_entry *deref_entry;
+   hash_table_foreach(state->direct_deref_nodes, deref_entry) {
+      struct deref_node *node = deref_entry->data;
+
+      if (node->stores == NULL)
+         continue;
+
+      if (!node->lower_to_ssa)
+         continue;
+
+      w_start = w_end = 0;
+      iter_count++;
+
+      struct set_entry *store_entry;
+      set_foreach(node->stores, store_entry) {
+         nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key;
+         if (work[store->instr.block->index] < iter_count)
+            W[w_end++] = store->instr.block;
+         work[store->instr.block->index] = iter_count;
+      }
+
+      while (w_start != w_end) {
+         nir_block *cur = W[w_start++];
+         struct set_entry *dom_entry;
+         set_foreach(cur->dom_frontier, dom_entry) {
+            nir_block *next = (nir_block *) dom_entry->key;
+
+            /*
+             * If there's more than one return statement, then the end block
+             * can be a join point for some definitions. However, there are
+             * no instructions in the end block, so nothing would use those
+             * phi nodes. Of course, we couldn't place those phi nodes
+             * anyways due to the restriction of having no instructions in the
+             * end block...
+             */
+            if (next == state->impl->end_block)
+               continue;
+
+            if (has_already[next->index] < iter_count) {
+               nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx);
+               nir_ssa_dest_init(&phi->instr, &phi->dest,
+                                 glsl_get_vector_elements(node->type), NULL);
+               nir_instr_insert_before_block(next, &phi->instr);
+
+               _mesa_hash_table_insert(state->phi_table, phi, node);
+
+               has_already[next->index] = iter_count;
+               if (work[next->index] < iter_count) {
+                  work[next->index] = iter_count;
+                  W[w_end++] = next;
+               }
+            }
+         }
+      }
+   }
+}
+
+
+/** Implements a pass to lower variable uses to SSA values
+ *
+ * This path walks the list of instructions and tries to lower as many
+ * local variable load/store operations to SSA defs and uses as it can.
+ * The process involves four passes:
+ *
+ *  1) Iterate over all of the instructions and mark where each local
+ *     variable deref is used in a load, store, or copy.  While we're at
+ *     it, we keep track of all of the fully-qualified (no wildcards) and
+ *     fully-direct references we see and store them in the
+ *     direct_deref_nodes hash table.
+ *
+ *  2) Walk over the the list of fully-qualified direct derefs generated in
+ *     the previous pass.  For each deref, we determine if it can ever be
+ *     aliased, i.e. if there is an indirect reference anywhere that may
+ *     refer to it.  If it cannot be aliased, we mark it for lowering to an
+ *     SSA value.  At this point, we lower any var_copy instructions that
+ *     use the given deref to load/store operations and, if the deref has a
+ *     constant initializer, we go ahead and add a load_const value at the
+ *     beginning of the function with the initialized value.
+ *
+ *  3) Walk over the list of derefs we plan to lower to SSA values and
+ *     insert phi nodes as needed.
+ *
+ *  4) Perform "variable renaming" by replacing the load/store instructions
+ *     with SSA definitions and SSA uses.
+ */
+static bool
+nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
+{
+   struct lower_variables_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(state.mem_ctx);
+   state.impl = impl;
+
+   state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
+                                                   _mesa_hash_pointer,
+                                                   _mesa_key_pointer_equal);
+   state.direct_deref_nodes = _mesa_hash_table_create(state.dead_ctx,
+                                                      hash_deref, derefs_equal);
+   state.phi_table = _mesa_hash_table_create(state.dead_ctx,
+                                             _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   /* Build the initial deref structures and direct_deref_nodes table */
+   state.add_to_direct_deref_nodes = true;
+   nir_foreach_block(impl, register_variable_uses_block, &state);
+
+   struct set *outputs = _mesa_set_create(state.dead_ctx,
+                                          _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+
+   bool progress = false;
+
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   /* We're about to iterate through direct_deref_nodes.  Don't modify it. */
+   state.add_to_direct_deref_nodes = false;
+
+   struct hash_entry *entry;
+   hash_table_foreach(state.direct_deref_nodes, entry) {
+      nir_deref_var *deref = (void *)entry->key;
+      struct deref_node *node = entry->data;
+
+      if (deref->var->data.mode != nir_var_local) {
+         _mesa_hash_table_remove(state.direct_deref_nodes, entry);
+         continue;
+      }
+
+      if (deref_may_be_aliased(deref, &state)) {
+         _mesa_hash_table_remove(state.direct_deref_nodes, entry);
+         continue;
+      }
+
+      node->lower_to_ssa = true;
+      progress = true;
+
+      if (deref->var->constant_initializer) {
+         nir_load_const_instr *load = get_const_initializer_load(deref, &state);
+         nir_ssa_def_init(&load->instr, &load->def,
+                          glsl_get_vector_elements(node->type), NULL);
+         nir_instr_insert_before_cf_list(&impl->body, &load->instr);
+         def_stack_push(node, &load->def, &state);
+      }
+
+      if (deref->var->data.mode == nir_var_shader_out)
+         _mesa_set_add(outputs, node);
+
+      foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
+   }
+
+   if (!progress)
+      return false;
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   /* We may have lowered some copy instructions to load/store
+    * instructions.  The uses from the copy instructions hav already been
+    * removed but we need to rescan to ensure that the uses from the newly
+    * added load/store instructions are registered.  We need this
+    * information for phi node insertion below.
+    */
+   nir_foreach_block(impl, register_variable_uses_block, &state);
+
+   insert_phi_nodes(&state);
+   rename_variables_block(impl->start_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   ralloc_free(state.dead_ctx);
+
+   return progress;
+}
+
+void
+nir_lower_vars_to_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_vars_to_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c
new file mode 100644
index 000000000..602853ea6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a simple pass that lowers vecN instructions to a series of
+ * moves with partial writes.
+ */
+
+static bool
+src_matches_dest_reg(nir_dest *dest, nir_src *src)
+{
+   if (dest->is_ssa || src->is_ssa)
+      return false;
+
+   return (dest->reg.reg == src->reg.reg &&
+           dest->reg.base_offset == src->reg.base_offset &&
+           !dest->reg.indirect &&
+           !src->reg.indirect);
+}
+
+/**
+ * For a given starting writemask channel and corresponding source index in
+ * the vec instruction, insert a MOV to the vec instruction's dest of all the
+ * writemask channels that get read from the same src reg.
+ *
+ * Returns the writemask of our MOV, so the parent loop calling this knows
+ * which ones have been processed.
+ */
+static unsigned
+insert_mov(nir_alu_instr *vec, unsigned start_channel,
+            unsigned start_src_idx, void *mem_ctx)
+{
+   unsigned src_idx = start_src_idx;
+   assert(src_idx < nir_op_infos[vec->op].num_inputs);
+
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
+   nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+
+   mov->dest.write_mask = (1u << start_channel);
+   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
+   src_idx++;
+
+   for (unsigned i = start_channel + 1; i < 4; i++) {
+      if (!(vec->dest.write_mask & (1 << i)))
+         continue;
+
+      if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
+         mov->dest.write_mask |= (1 << i);
+         mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
+      }
+      src_idx++;
+   }
+
+   nir_instr_insert_before(&vec->instr, &mov->instr);
+
+   return mov->dest.write_mask;
+}
+
+static bool
+lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *vec = (nir_alu_instr *)instr;
+
+      switch (vec->op) {
+      case nir_op_vec2:
+      case nir_op_vec3:
+      case nir_op_vec4:
+         break;
+      default:
+         continue; /* The loop */
+      }
+
+      /* Since we insert multiple MOVs, we have to be non-SSA. */
+      assert(!vec->dest.dest.is_ssa);
+
+      unsigned finished_write_mask = 0;
+
+      /* First, emit a MOV for all the src channels that are in the
+       * destination reg, in case other values we're populating in the dest
+       * might overwrite them.
+       */
+      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+         if (!(vec->dest.write_mask & (1 << i)))
+            continue;
+
+         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
+            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+            break;
+         }
+         src_idx++;
+      }
+
+      /* Now, emit MOVs for all the other src channels. */
+      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+         if (!(vec->dest.write_mask & (1 << i)))
+            continue;
+
+         if (!(finished_write_mask & (1 << i)))
+            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+
+         src_idx++;
+      }
+
+      nir_instr_remove(&vec->instr);
+      ralloc_free(vec);
+   }
+
+   return true;
+}
+
+static void
+nir_lower_vec_to_movs_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+}
+
+void
+nir_lower_vec_to_movs(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_vec_to_movs_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_metadata.c b/mesalib/src/glsl/nir/nir_metadata.c
new file mode 100644
index 000000000..a03e12456
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_metadata.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include "nir.h"
+
+/*
+ * Handles management of the metadata.
+ */
+
+void
+nir_metadata_require(nir_function_impl *impl, nir_metadata required)
+{
+#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X))
+
+   if (NEEDS_UPDATE(nir_metadata_block_index))
+      nir_index_blocks(impl);
+   if (NEEDS_UPDATE(nir_metadata_dominance))
+      nir_calc_dominance_impl(impl);
+   if (NEEDS_UPDATE(nir_metadata_live_variables))
+      nir_live_variables_impl(impl);
+
+#undef NEEDS_UPDATE
+
+   impl->valid_metadata |= required;
+}
+
+void
+nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
+{
+   impl->valid_metadata &= preserved;
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
new file mode 100644
index 000000000..77f3bb826
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -0,0 +1,591 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Connor Abbott (cwabbott0@gmail.com)
+
+
+# Class that represents all the information we have about the opcode
+# NOTE: this must be kept in sync with nir_op_info
+
+class Opcode(object):
+   """Class that represents all the information we have about the opcode
+   NOTE: this must be kept in sync with nir_op_info
+   """
+   def __init__(self, name, output_size, output_type, input_sizes,
+                input_types, algebraic_properties, const_expr):
+      """Parameters:
+
+      - name is the name of the opcode (prepend nir_op_ for the enum name)
+      - all types are strings that get nir_type_ prepended to them
+      - input_types is a list of types
+      - algebraic_properties is a space-seperated string, where nir_op_is_ is
+        prepended before each entry
+      - const_expr is an expression or series of statements that computes the
+        constant value of the opcode given the constant values of its inputs.
+
+      Constant expressions are formed from the variables src0, src1, ...,
+      src(N-1), where N is the number of arguments.  The output of the
+      expression should be stored in the dst variable.  Per-component input
+      and output variables will be scalars and non-per-component input and
+      output variables will be a struct with fields named x, y, z, and w
+      all of the correct type.  Input and output variables can be assumed
+      to already be of the correct type and need no conversion.  In
+      particular, the conversion from the C bool type to/from  NIR_TRUE and
+      NIR_FALSE happens automatically.
+
+      For per-component instructions, the entire expression will be
+      executed once for each component.  For non-per-component
+      instructions, the expression is expected to store the correct values
+      in dst.x, dst.y, etc.  If "dst" does not exist anywhere in the
+      constant expression, an assignment to dst will happen automatically
+      and the result will be equivalent to "dst = <expression>" for
+      per-component instructions and "dst.x = dst.y = ... = <expression>"
+      for non-per-component instructions.
+      """
+      assert isinstance(name, str)
+      assert isinstance(output_size, int)
+      assert isinstance(output_type, str)
+      assert isinstance(input_sizes, list)
+      assert isinstance(input_sizes[0], int)
+      assert isinstance(input_types, list)
+      assert isinstance(input_types[0], str)
+      assert isinstance(algebraic_properties, str)
+      assert isinstance(const_expr, str)
+      assert len(input_sizes) == len(input_types)
+      assert 0 <= output_size <= 4
+      for size in input_sizes:
+         assert 0 <= size <= 4
+         if output_size != 0:
+            assert size != 0
+      self.name = name
+      self.num_inputs = len(input_sizes)
+      self.output_size = output_size
+      self.output_type = output_type
+      self.input_sizes = input_sizes
+      self.input_types = input_types
+      self.algebraic_properties = algebraic_properties
+      self.const_expr = const_expr
+
+# helper variables for strings
+tfloat = "float"
+tint = "int"
+tbool = "bool"
+tunsigned = "unsigned"
+
+commutative = "commutative "
+associative = "associative "
+
+# global dictionary of opcodes
+opcodes = {}
+
+def opcode(name, output_size, output_type, input_sizes, input_types,
+           algebraic_properties, const_expr):
+   assert name not in opcodes
+   opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
+                          input_types, algebraic_properties, const_expr)
+
+def unop_convert(name, in_type, out_type, const_expr):
+   opcode(name, 0, out_type, [0], [in_type], "", const_expr)
+
+def unop(name, ty, const_expr):
+   opcode(name, 0, ty, [0], [ty], "", const_expr)
+
+def unop_horiz(name, output_size, output_type, input_size, input_type,
+               const_expr):
+   opcode(name, output_size, output_type, [input_size], [input_type], "",
+          const_expr)
+
+def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
+                reduce_expr, final_expr):
+   def prereduce(src):
+      return "(" + prereduce_expr.format(src=src) + ")"
+   def final(src):
+      return final_expr.format(src="(" + src + ")")
+   def reduce_(src0, src1):
+      return reduce_expr.format(src0=src0, src1=src1)
+   src0 = prereduce("src0.x")
+   src1 = prereduce("src0.y")
+   src2 = prereduce("src0.z")
+   src3 = prereduce("src0.w")
+   unop_horiz(name + "2", output_size, output_type, 2, input_type,
+              final(reduce_(src0, src1)))
+   unop_horiz(name + "3", output_size, output_type, 3, input_type,
+              final(reduce_(reduce_(src0, src1), src2)))
+   unop_horiz(name + "4", output_size, output_type, 4, input_type,
+              final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+
+# These two move instructions differ in what modifiers they support and what
+# the negate modifier means. Otherwise, they are identical.
+unop("fmov", tfloat, "src0")
+unop("imov", tint, "src0")
+
+unop("ineg", tint, "-src0")
+unop("fneg", tfloat, "-src0")
+unop("inot", tint, "~src0") # invert every bit of the integer
+unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
+unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
+unop("fabs", tfloat, "fabsf(src0)")
+unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+unop("frcp", tfloat, "1.0f / src0")
+unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+unop("fsqrt", tfloat, "sqrtf(src0)")
+unop("fexp", tfloat, "expf(src0)") # < e^x
+unop("flog", tfloat, "logf(src0)") # log base e
+unop("fexp2", tfloat, "exp2f(src0)")
+unop("flog2", tfloat, "log2f(src0)")
+unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
+# Float-to-boolean conversion
+unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+# Boolean-to-float conversion
+unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
+# Int-to-boolean conversion
+unop_convert("i2b", tint, tbool, "src0 == 0")
+unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+
+unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
+unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
+unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}",
+            "{src} ? 1.0f : 0.0f")
+unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}",
+            "{src} ? 1.0f : 0.0f")
+
+# Unary floating-point rounding operations.
+
+
+unop("ftrunc", tfloat, "truncf(src0)")
+unop("fceil", tfloat, "ceilf(src0)")
+unop("ffloor", tfloat, "floorf(src0)")
+unop("ffract", tfloat, "src0 - floorf(src0)")
+unop("fround_even", tfloat, "_mesa_round_to_even(src0)")
+
+
+# Trigonometric operations.
+
+
+unop("fsin", tfloat, "sinf(src0)")
+unop("fcos", tfloat, "cosf(src0)")
+unop("fsin_reduced", tfloat, "sinf(src0)")
+unop("fcos_reduced", tfloat, "cosf(src0)")
+
+
+# Partial derivatives.
+
+
+unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0f")
+unop("fddx_fine", tfloat, "0.0f")
+unop("fddy_fine", tfloat, "0.0f")
+unop("fddx_coarse", tfloat, "0.0f")
+unop("fddy_coarse", tfloat, "0.0f")
+
+
+# Floating point pack and unpack operations.
+
+def pack_2x16(fmt):
+   unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x16(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
+""".replace("fmt", fmt))
+
+def pack_4x8(fmt):
+   unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x8(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
+""".replace("fmt", fmt))
+
+def unpack_2x16(fmt):
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
+dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
+""".replace("fmt", fmt))
+
+def unpack_4x8(fmt):
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
+dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
+dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
+dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
+""".replace("fmt", fmt))
+
+
+pack_2x16("snorm")
+pack_4x8("snorm")
+pack_2x16("unorm")
+pack_4x8("unorm")
+pack_2x16("half")
+unpack_2x16("snorm")
+unpack_4x8("snorm")
+unpack_2x16("unorm")
+unpack_4x8("unorm")
+unpack_2x16("half")
+
+
+# Lowered floating point unpacking operations.
+
+
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+           "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+           "unpack_half_1x16((uint16_t)(src0.x >> 16))")
+
+
+# Bit operations, part of ARB_gpu_shader5.
+
+
+unop("bitfield_reverse", tunsigned, """
+/* we're not winning any awards for speed here, but that's ok */
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++)
+   dst |= ((src0 >> bit) & 1) << (31 - bit);
+""")
+unop("bit_count", tunsigned, """
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++) {
+   if ((src0 >> bit) & 1)
+      dst++;
+}
+""")
+
+unop_convert("ufind_msb", tunsigned, tint, """
+dst = -1;
+for (int bit = 31; bit > 0; bit--) {
+   if ((src0 >> bit) & 1) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+unop("ifind_msb", tint, """
+dst = -1;
+for (int bit = 31; bit >= 0; bit--) {
+   /* If src0 < 0, we're looking for the first 0 bit.
+    * if src0 >= 0, we're looking for the first 1 bit.
+    */
+   if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
+      (!((src0 >> bit) & 1) && (src0 < 0))) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+unop("find_lsb", tint, """
+dst = -1;
+for (unsigned bit = 0; bit < 32; bit++) {
+   if ((src0 >> bit) & 1) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+
+for i in xrange(1, 5):
+   for j in xrange(1, 5):
+      unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+
+def binop_convert(name, out_type, in_type, alg_props, const_expr):
+   opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
+
+def binop(name, ty, alg_props, const_expr):
+   binop_convert(name, ty, ty, alg_props, const_expr)
+
+def binop_compare(name, ty, alg_props, const_expr):
+   binop_convert(name, tbool, ty, alg_props, const_expr)
+
+def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
+                src2_type, const_expr):
+   opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
+          "", const_expr)
+
+def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
+                 reduce_expr, final_expr):
+   def final(src):
+      return final_expr.format(src= "(" + src + ")")
+   def reduce_(src0, src1):
+      return reduce_expr.format(src0=src0, src1=src1)
+   def prereduce(src0, src1):
+      return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
+   src0 = prereduce("src0.x", "src1.x")
+   src1 = prereduce("src0.y", "src1.y")
+   src2 = prereduce("src0.z", "src1.z")
+   src3 = prereduce("src0.w", "src1.w")
+   opcode(name + "2", output_size, output_type,
+          [2, 2], [src_type, src_type], commutative,
+          final(reduce_(src0, src1)))
+   opcode(name + "3", output_size, output_type,
+          [3, 3], [src_type, src_type], commutative,
+          final(reduce_(reduce_(src0, src1), src2)))
+   opcode(name + "4", output_size, output_type,
+          [4, 4], [src_type, src_type], commutative,
+          final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+binop("fadd", tfloat, commutative + associative, "src0 + src1")
+binop("iadd", tint, commutative + associative, "src0 + src1")
+binop("fsub", tfloat, "", "src0 - src1")
+binop("isub", tint, "", "src0 - src1")
+
+binop("fmul", tfloat, commutative + associative, "src0 * src1")
+# low 32-bits of signed/unsigned integer multiply
+binop("imul", tint, commutative + associative, "src0 * src1")
+# high 32-bits of signed integer multiply
+binop("imul_high", tint, commutative,
+      "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+# high 32-bits of unsigned integer multiply
+binop("umul_high", tunsigned, commutative,
+      "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+
+binop("fdiv", tfloat, "", "src0 / src1")
+binop("idiv", tint, "", "src0 / src1")
+binop("udiv", tunsigned, "", "src0 / src1")
+
+# returns a boolean representing the carry resulting from the addition of
+# the two unsigned arguments.
+
+binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+
+# returns a boolean representing the borrow resulting from the subtraction
+# of the two unsigned arguments.
+
+binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+
+binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
+binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+
+#
+# Comparisons
+#
+
+
+# these integer-aware comparisons return a boolean (0 or ~0)
+
+binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("ilt", tint, "", "src0 < src1")
+binop_compare("ige", tint, "", "src0 >= src1")
+binop_compare("ieq", tint, commutative, "src0 == src1")
+binop_compare("ine", tint, commutative, "src0 != src1")
+binop_compare("ult", tunsigned, "", "src0 < src1")
+binop_compare("uge", tunsigned, "", "src0 >= src1")
+
+# integer-aware GLSL-style comparisons that compare floats and ints
+
+binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+
+# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
+
+binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
+             "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+             "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+
+# These comparisons for integer-less hardware return 1.0 and 0.0 for true
+# and false respectively
+
+binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+
+
+binop("ishl", tint, "", "src0 << src1")
+binop("ishr", tint, "", "src0 >> src1")
+binop("ushr", tunsigned, "", "src0 >> src1")
+
+# bitwise logic operators
+#
+# These are also used as boolean and, or, xor for hardware supporting
+# integers.
+
+
+binop("iand", tunsigned, commutative + associative, "src0 & src1")
+binop("ior", tunsigned, commutative + associative, "src0 | src1")
+binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+
+
+# floating point logic operators
+#
+# These use (src != 0.0) for testing the truth of the input, and output 1.0
+# for true and 0.0 for false
+
+binop("fand", tfloat, commutative,
+      "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("for", tfloat, commutative,
+      "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("fxor", tfloat, commutative,
+      "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+
+binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
+             "{src}")
+
+binop("fmin", tfloat, "", "fminf(src0, src1)")
+binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+
+binop("fpow", tfloat, "", "powf(src0, src1)")
+
+binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+            "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+
+binop_convert("bfm", tunsigned, tint, "", """
+int offset = src0, bits = src1;
+if (offset < 0 || bits < 0 || offset + bits > 32)
+   dst = 0; /* undefined per the spec */
+else
+   dst = ((1 << bits)- 1) << offset;
+""")
+
+opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
+dst = ldexp(src0, src1);
+/* flush denormals to zero. */
+if (!isnormal(dst))
+   dst = copysign(0.0f, src0);
+""")
+
+# Combines the first component of each input to make a 2-component vector.
+
+binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+dst.x = src0.x;
+dst.y = src1.x;
+""")
+
+def triop(name, ty, const_expr):
+   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
+def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
+   opcode(name, output_size, tunsigned,
+   [src1_size, src2_size, src3_size],
+   [tunsigned, tunsigned, tunsigned], "", const_expr)
+
+triop("ffma", tfloat, "src0 * src1 + src2")
+
+triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+
+# Conditional Select
+#
+# A vector conditional select instruction (like ?:, but operating per-
+# component on vectors). There are two versions, one for floating point
+# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
+
+
+triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+opcode("bcsel", 0, tunsigned, [0, 0, 0],
+      [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+
+triop("bfi", tunsigned, """
+unsigned mask = src0, insert = src1 & mask, base = src2;
+if (mask == 0) {
+   dst = base;
+} else {
+   unsigned tmp = mask;
+   while (!(tmp & 1)) {
+      tmp >>= 1;
+      insert <<= 1;
+   }
+   dst = (base & ~mask) | insert;
+}
+""")
+
+opcode("ubitfield_extract", 0, tunsigned,
+       [0, 1, 1], [tunsigned, tint, tint], "", """
+unsigned base = src0;
+int offset = src1.x, bits = src2.x;
+if (bits == 0) {
+   dst = 0;
+} else if (bits < 0 || offset < 0 || offset + bits > 32) {
+   dst = 0; /* undefined per the spec */
+} else {
+   dst = (base >> offset) & ((1 << bits) - 1);
+}
+""")
+opcode("ibitfield_extract", 0, tint,
+       [0, 1, 1], [tint, tint, tint], "", """
+int base = src0;
+int offset = src1.x, bits = src2.x;
+if (bits == 0) {
+   dst = 0;
+} else if (offset < 0 || bits < 0 || offset + bits > 32) {
+   dst = 0;
+} else {
+   dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
+}
+""")
+
+# Combines the first component of each input to make a 3-component vector.
+
+triop_horiz("vec3", 3, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+""")
+
+def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
+                 src4_size, const_expr):
+   opcode(name, output_size, tunsigned,
+          [src1_size, src2_size, src3_size, src4_size],
+          [tunsigned, tunsigned, tunsigned, tunsigned],
+          "", const_expr)
+
+opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
+       [tunsigned, tunsigned, tint, tint], "", """
+unsigned base = src0, insert = src1;
+int offset = src2.x, bits = src3.x;
+if (bits == 0) {
+   dst = 0;
+} else if (offset < 0 || bits < 0 || bits + offset > 32) {
+   dst = 0;
+} else {
+   unsigned mask = ((1 << bits) - 1) << offset;
+   dst = (base & ~mask) | ((insert << bits) & mask);
+}
+""")
+
+quadop_horiz("vec4", 4, 1, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+dst.w = src3.x;
+""")
+
+
diff --git a/mesalib/src/glsl/nir/nir_opcodes_c.py b/mesalib/src/glsl/nir/nir_opcodes_c.py
new file mode 100644
index 000000000..7049c5be6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes_c.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Connor Abbott (cwabbott0@gmail.com)
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+template = Template("""
+#include "nir.h"
+
+const nir_op_info nir_op_infos[nir_num_opcodes] = {
+% for name, opcode in sorted(opcodes.iteritems()):
+{
+   .name = "${name}",
+   .num_inputs = ${opcode.num_inputs},
+   .output_size = ${opcode.output_size},
+   .output_type = ${"nir_type_" + opcode.output_type},
+   .input_sizes = {
+      ${ ", ".join(str(size) for size in opcode.input_sizes) }
+   },
+   .input_types = {
+      ${ ", ".join("nir_type_" + type for type in opcode.input_types) }
+   },
+   .algebraic_properties =
+      ${ "0" if opcode.algebraic_properties == "" else " | ".join(
+            "NIR_OP_IS_" + prop.upper() for prop in
+               opcode.algebraic_properties.strip().split(" ")) }
+},
+% endfor
+};
+""")
+
+print template.render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_opcodes_h.py b/mesalib/src/glsl/nir/nir_opcodes_h.py
new file mode 100644
index 000000000..be15a96d2
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes_h.py
@@ -0,0 +1,47 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ */
+
+#ifndef _NIR_OPCODES_
+#define _NIR_OPCODES_
+
+<% opcode_names = sorted(opcodes.iterkeys()) %>
+
+typedef enum {
+% for name in opcode_names:
+   nir_op_${name},
+% endfor
+   nir_last_opcode = nir_op_${opcode_names[-1]},
+   nir_num_opcodes = nir_last_opcode + 1
+} nir_op;
+
+#endif /* _NIR_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
new file mode 100644
index 000000000..7bf643134
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -0,0 +1,188 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Jason Ekstrand (jason@jlekstrand.net)
+
+import nir_algebraic
+
+# Convenience variables
+a = 'a'
+b = 'b'
+c = 'c'
+d = 'd'
+
+# Written in the form (<search>, <replace>) where <search> is an expression
+# and <replace> is either an expression or a value.  An expression is
+# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# where each source is either an expression or a value.  A value can be
+# either a numeric constant or a string representing a variable name.
+#
+# Variable names are specified as "[#]name[@type]" where "#" inicates that
+# the given variable will only match constants and the type indicates that
+# the given variable will only match values from ALU instructions with the
+# given output type.
+#
+# For constants, you have to be careful to make sure that it is the right
+# type because python is unaware of the source and destination types of the
+# opcodes.
+
+optimizations = [
+   (('fneg', ('fneg', a)), a),
+   (('ineg', ('ineg', a)), a),
+   (('fabs', ('fabs', a)), ('fabs', a)),
+   (('fabs', ('fneg', a)), ('fabs', a)),
+   (('iabs', ('iabs', a)), ('iabs', a)),
+   (('iabs', ('ineg', a)), ('iabs', a)),
+   (('fadd', a, 0.0), a),
+   (('iadd', a, 0), a),
+   (('fmul', a, 0.0), 0.0),
+   (('imul', a, 0), 0),
+   (('fmul', a, 1.0), a),
+   (('imul', a, 1), a),
+   (('fmul', a, -1.0), ('fneg', a)),
+   (('imul', a, -1), ('ineg', a)),
+   (('ffma', 0.0, a, b), b),
+   (('ffma', a, 0.0, b), b),
+   (('ffma', a, b, 0.0), ('fmul', a, b)),
+   (('ffma', a, 1.0, b), ('fadd', a, b)),
+   (('ffma', 1.0, a, b), ('fadd', a, b)),
+   (('flrp', a, b, 0.0), a),
+   (('flrp', a, b, 1.0), b),
+   (('flrp', a, a, b), a),
+   (('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+   (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+   # Comparison simplifications
+   (('inot', ('flt', a, b)), ('fge', a, b)),
+   (('inot', ('fge', a, b)), ('flt', a, b)),
+   (('inot', ('ilt', a, b)), ('ige', a, b)),
+   (('inot', ('ige', a, b)), ('ilt', a, b)),
+   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
+   (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+   (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+   (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
+   (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+   (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+   (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
+   (('fsat', ('fsat', a)), ('fsat', a)),
+   (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+   # Comparison with the same args.  Note that these are not done for
+   # the float versions because NaN always returns false on float
+   # inequalities.
+   (('ilt', a, a), False),
+   (('ige', a, a), True),
+   (('ieq', a, a), True),
+   (('ine', a, a), False),
+   (('ult', a, a), False),
+   (('uge', a, a), True),
+   # Logical and bit operations
+   (('fand', a, 0.0), 0.0),
+   (('iand', a, a), a),
+   (('iand', a, 0), 0),
+   (('ior', a, a), a),
+   (('ior', a, 0), a),
+   (('fxor', a, a), 0.0),
+   (('ixor', a, a), 0),
+   (('inot', ('inot', a)), a),
+   # DeMorgan's Laws
+   (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),
+   (('ior',  ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
+   # Shift optimizations
+   (('ishl', 0, a), 0),
+   (('ishl', a, 0), a),
+   (('ishr', 0, a), 0),
+   (('ishr', a, 0), a),
+   (('ushr', 0, a), 0),
+   (('ushr', a, 0), 0),
+   # Exponential/logarithmic identities
+   (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+   (('fexp',  ('flog',  a)), a), # e^ln(a)  = a
+   (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+   (('flog',  ('fexp',  a)), a), # ln(e^a)  = a
+   (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
+   (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+   (('fexp',  ('fmul', ('flog', a), b)),  ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b
+   (('fpow', a, 1.0), a),
+   (('fpow', a, 2.0), ('fmul', a, a)),
+   (('fpow', 2.0, a), ('fexp2', a)),
+   # Division and reciprocal
+   (('fdiv', 1.0, a), ('frcp', a)),
+   (('frcp', ('frcp', a)), a),
+   (('frcp', ('fsqrt', a)), ('frsq', a)),
+   (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
+   (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+   # Boolean simplifications
+   (('ine', 'a@bool', 0), 'a'),
+   (('ieq', 'a@bool', 0), ('inot', 'a')),
+   (('bcsel', a, True, False), ('ine', a, 0)),
+   (('bcsel', a, False, True), ('ieq', a, 0)),
+   (('bcsel', True, b, c), b),
+   (('bcsel', False, b, c), c),
+   # The result of this should be hit by constant propagation and, in the
+   # next round of opt_algebraic, get picked up by one of the above two.
+   (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
+
+   (('bcsel', a, b, b), b),
+   (('fcsel', a, b, b), b),
+
+   # Subtracts
+   (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+   (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+   (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
+   (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
+   (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+   (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
+   (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
+   (('iabs', ('isub', 0, a)), ('iabs', a)),
+
+# This one may not be exact
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+]
+
+# Add optimizations to handle the case where the result of a ternary is
+# compared to a constant.  This way we can take things like
+#
+# (a ? 0 : 1) > 0
+#
+# and turn it into
+#
+# a ? (0 > 0) : (1 > 0)
+#
+# which constant folding will eat for lunch.  The resulting ternary will
+# further get cleaned up by the boolean reductions above and we will be
+# left with just the original variable "a".
+for op in ['flt', 'fge', 'feq', 'fne',
+           'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
+   optimizations += [
+      ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
+       ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
+      ((op, '#d', ('bcsel', a, '#b', '#c')),
+       ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
+   ]
+
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_constant_folding.c b/mesalib/src/glsl/nir/nir_opt_constant_folding.c
new file mode 100644
index 000000000..85c09fc48
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_constant_folding.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_constant_expressions.h"
+#include <math.h>
+
+/*
+ * Implements SSA-based constant folding.
+ */
+
+struct constant_fold_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+   bool progress;
+};
+
+static bool
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+{
+   nir_const_value src[4];
+
+   if (!instr->dest.dest.is_ssa)
+      return false;
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      if (!instr->src[i].src.is_ssa)
+         return false;
+
+      nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
+
+      if (src_instr->type != nir_instr_type_load_const)
+         return false;
+      nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr);
+
+      for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
+           j++) {
+         src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+      }
+
+      /* We shouldn't have any source modifiers in the optimization loop. */
+      assert(!instr->src[i].abs && !instr->src[i].negate);
+   }
+
+   /* We shouldn't have any saturate modifiers in the optimization loop. */
+   assert(!instr->dest.saturate);
+
+   nir_const_value dest =
+      nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
+                            src);
+
+   nir_load_const_instr *new_instr =
+      nir_load_const_instr_create(mem_ctx,
+                                  instr->dest.dest.ssa.num_components);
+
+   new_instr->value = dest;
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def),
+                            mem_ctx);
+
+   nir_instr_remove(&instr->instr);
+   ralloc_free(instr);
+
+   return true;
+}
+
+static bool
+constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
+{
+   bool progress = false;
+
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      if (tail->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(tail);
+
+      if (arr->deref_array_type == nir_deref_array_type_indirect &&
+          arr->indirect.is_ssa &&
+          arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) {
+         nir_load_const_instr *indirect =
+            nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
+
+         arr->base_offset += indirect->value.u[0];
+
+         /* Clear out the source */
+         nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
+
+         arr->deref_array_type = nir_deref_array_type_direct;
+
+         progress = true;
+      }
+   }
+
+   return progress;
+}
+
+static bool
+constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
+{
+   bool progress = false;
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      progress |= constant_fold_deref(&instr->instr, instr->variables[i]);
+   }
+
+   return progress;
+}
+
+static bool
+constant_fold_tex_instr(nir_tex_instr *instr)
+{
+   if (instr->sampler)
+      return constant_fold_deref(&instr->instr, instr->sampler);
+   else
+      return false;
+}
+
+static bool
+constant_fold_block(nir_block *block, void *void_state)
+{
+   struct constant_fold_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      switch (instr->type) {
+      case nir_instr_type_alu:
+         state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr),
+                                                    state->mem_ctx);
+         break;
+      case nir_instr_type_intrinsic:
+         state->progress |=
+            constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr));
+         break;
+      case nir_instr_type_tex:
+         state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr));
+         break;
+      default:
+         /* Don't know how to constant fold */
+         break;
+      }
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_constant_folding_impl(nir_function_impl *impl)
+{
+   struct constant_fold_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.progress = false;
+
+   nir_foreach_block(impl, constant_fold_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_constant_folding(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_constant_folding_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c
new file mode 100644
index 000000000..dd0ec01ef
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <main/imports.h>
+
+/**
+ * SSA-based copy propagation
+ */
+
+static bool is_move(nir_alu_instr *instr)
+{
+   if (instr->op != nir_op_fmov &&
+       instr->op != nir_op_imov)
+      return false;
+
+   if (instr->dest.saturate)
+      return false;
+
+   /* we handle modifiers in a separate pass */
+
+   if (instr->src[0].abs || instr->src[0].negate)
+      return false;
+
+   if (!instr->src[0].src.is_ssa)
+      return false;
+
+   return true;
+
+}
+
+static bool
+is_swizzleless_move(nir_alu_instr *instr)
+{
+   if (!is_move(instr))
+      return false;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (!((instr->dest.write_mask >> i) & 1))
+         break;
+      if (instr->src[0].swizzle[i] != i)
+         return false;
+   }
+
+   return true;
+}
+
+static bool is_vec(nir_alu_instr *instr)
+{
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+      if (!instr->src[i].src.is_ssa)
+         return false;
+
+   return instr->op == nir_op_vec2 ||
+          instr->op == nir_op_vec3 ||
+          instr->op == nir_op_vec4;
+}
+
+typedef struct {
+   nir_ssa_def *def;
+   bool found;
+} search_def_state;
+
+static bool
+search_def(nir_src *src, void *_state)
+{
+   search_def_state *state = (search_def_state *) _state;
+
+   if (src->is_ssa && src->ssa == state->def)
+      state->found = true;
+
+   return true;
+}
+
+static void
+rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr)
+{
+   nir_ssa_def *old_def = src->ssa;
+
+   src->ssa = new_def;
+
+   /*
+    * The instruction could still use the old definition in one of its other
+    * sources, so only remove the instruction from the uses if there are no
+    * more uses left.
+    */
+
+   search_def_state search_state;
+   search_state.def = old_def;
+   search_state.found = false;
+   nir_foreach_src(parent_instr, search_def, &search_state);
+   if (!search_state.found) {
+      struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr);
+      assert(entry);
+      _mesa_set_remove(old_def->uses, entry);
+   }
+
+   _mesa_set_add(new_def->uses, parent_instr);
+}
+
+static void
+rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def)
+{
+   nir_ssa_def *old_def = if_stmt->condition.ssa;
+
+   if_stmt->condition.ssa = new_def;
+
+   struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt);
+   assert(entry);
+   _mesa_set_remove(old_def->if_uses, entry);
+
+   _mesa_set_add(new_def->if_uses, if_stmt);
+}
+
+static bool
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+{
+   if (!src->is_ssa) {
+      if (src->reg.indirect)
+         return copy_prop_src(src, parent_instr, parent_if);
+      return false;
+   }
+
+   nir_instr *src_instr = src->ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+   if (!is_swizzleless_move(alu_instr))
+      return false;
+
+   /* Don't let copy propagation land us with a phi that has more
+    * components in its source than it has in its destination.  That badly
+    * messes up out-of-ssa.
+    */
+   if (parent_instr && parent_instr->type == nir_instr_type_phi) {
+      nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
+      assert(phi->dest.is_ssa);
+      if (phi->dest.ssa.num_components !=
+          alu_instr->src[0].src.ssa->num_components)
+         return false;
+   }
+
+   if (parent_instr)
+      rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr);
+   else
+      rewrite_src_if(parent_if, alu_instr->src[0].src.ssa);
+
+   return true;
+}
+
+static bool
+copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
+{
+   nir_alu_src *src = &parent_alu_instr->src[index];
+   if (!src->src.is_ssa) {
+      if (src->src.reg.indirect)
+         return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
+                              NULL);
+      return false;
+   }
+
+   nir_instr *src_instr =  src->src.ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+   if (!is_move(alu_instr) && !is_vec(alu_instr))
+      return false;
+
+   nir_ssa_def *def;
+   unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+   if (alu_instr->op == nir_op_fmov ||
+       alu_instr->op == nir_op_imov) {
+      for (unsigned i = 0; i < 4; i++)
+         new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]];
+      def = alu_instr->src[0].src.ssa;
+   } else {
+      def = NULL;
+
+      for (unsigned i = 0; i < 4; i++) {
+         if (!nir_alu_instr_channel_used(parent_alu_instr, index, i))
+            continue;
+
+         nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa;
+         if (def == NULL)
+            def = new_def;
+         else {
+            if (def != new_def)
+               return false;
+         }
+         new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0];
+      }
+   }
+
+   for (unsigned i = 0; i < 4; i++)
+      src->swizzle[i] = new_swizzle[i];
+
+   rewrite_src_instr(&src->src, def, &parent_alu_instr->instr);
+
+   return true;
+}
+
+typedef struct {
+   nir_instr *parent_instr;
+   bool progress;
+} copy_prop_state;
+
+static bool
+copy_prop_src_cb(nir_src *src, void *_state)
+{
+   copy_prop_state *state = (copy_prop_state *) _state;
+   while (copy_prop_src(src, state->parent_instr, NULL))
+      state->progress = true;
+
+   return true;
+}
+
+static bool
+copy_prop_instr(nir_instr *instr)
+{
+   if (instr->type == nir_instr_type_alu) {
+      nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
+      bool progress = false;
+
+      for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
+         while (copy_prop_alu_src(alu_instr, i))
+            progress = true;
+
+      if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
+         while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+            progress = true;
+
+      return progress;
+   }
+
+   copy_prop_state state;
+   state.parent_instr = instr;
+   state.progress = false;
+   nir_foreach_src(instr, copy_prop_src_cb, &state);
+
+   return state.progress;
+}
+
+static bool
+copy_prop_if(nir_if *if_stmt)
+{
+   return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
+}
+
+static bool
+copy_prop_block(nir_block *block, void *_state)
+{
+   bool *progress = (bool *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (copy_prop_instr(instr))
+         *progress = true;
+   }
+
+   if (block->cf_node.node.next != NULL && /* check that we aren't the end node */
+       !nir_cf_node_is_last(&block->cf_node) &&
+       nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+      if (copy_prop_if(if_stmt))
+         *progress = true;
+   }
+
+   return true;
+}
+
+bool
+nir_copy_prop_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_foreach_block(impl, copy_prop_block, &progress);
+   return progress;
+}
+
+bool
+nir_copy_prop(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl && nir_copy_prop_impl(overload->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
new file mode 100644
index 000000000..9b383202d
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements common subexpression elimination
+ */
+
+struct cse_state {
+   void *mem_ctx;
+   bool progress;
+};
+
+static bool
+nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+{
+   if (src1.abs != src2.abs || src1.negate != src2.negate)
+      return false;
+
+   for (int i = 0; i < 4; ++i) {
+      if (!(read_mask & (1 << i)))
+         continue;
+
+      if (src1.swizzle[i] != src2.swizzle[i])
+         return false;
+   }
+
+   return nir_srcs_equal(src1.src, src2.src);
+}
+
+static bool
+nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
+{
+   if (instr1->type != instr2->type)
+      return false;
+
+   switch (instr1->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+      nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+
+      if (alu1->op != alu2->op)
+         return false;
+
+      /* TODO: We can probably acutally do something more inteligent such
+       * as allowing different numbers and taking a maximum or something
+       * here */
+      if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+         return false;
+
+      for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+         if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
+                                 (1 << alu1->dest.dest.ssa.num_components) - 1))
+            return false;
+      }
+      return true;
+   }
+   case nir_instr_type_tex:
+      return false;
+   case nir_instr_type_load_const: {
+      nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+      nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+
+      if (load1->def.num_components != load2->def.num_components)
+         return false;
+
+      return memcmp(load1->value.f, load2->value.f,
+                    load1->def.num_components * sizeof(*load2->value.f)) == 0;
+   }
+   case nir_instr_type_phi: {
+      nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+      nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+
+      if (phi1->instr.block != phi2->instr.block)
+         return false;
+
+      nir_foreach_phi_src(phi1, src1) {
+         nir_foreach_phi_src(phi2, src2) {
+            if (src1->pred == src2->pred) {
+               if (!nir_srcs_equal(src1->src, src2->src))
+                  return false;
+
+               break;
+            }
+         }
+      }
+
+      return true;
+   }
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+      nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+      const nir_intrinsic_info *info =
+         &nir_intrinsic_infos[intrinsic1->intrinsic];
+
+      if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+          intrinsic1->num_components != intrinsic2->num_components)
+         return false;
+
+      if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+                            intrinsic2->dest.ssa.num_components)
+         return false;
+
+      for (unsigned i = 0; i < info->num_srcs; i++) {
+         if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+            return false;
+      }
+
+      assert(info->num_variables == 0);
+
+      for (unsigned i = 0; i < info->num_indices; i++) {
+         if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+            return false;
+      }
+
+      return true;
+   }
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+   return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+   return dest->is_ssa;
+}
+
+static bool
+nir_instr_can_cse(nir_instr *instr)
+{
+   /* We only handle SSA. */
+   if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+       !nir_foreach_src(instr, src_is_ssa, NULL))
+      return false;
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_load_const:
+   case nir_instr_type_phi:
+      return true;
+   case nir_instr_type_tex:
+      return false; /* TODO */
+   case nir_instr_type_intrinsic: {
+      const nir_intrinsic_info *info =
+         &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+             (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+             info->num_variables == 0; /* not implemented yet */
+   }
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return false;
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+static nir_ssa_def *
+nir_instr_get_dest_ssa_def(nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+      return &nir_instr_as_alu(instr)->dest.dest.ssa;
+   case nir_instr_type_load_const:
+      return &nir_instr_as_load_const(instr)->def;
+   case nir_instr_type_phi:
+      assert(nir_instr_as_phi(instr)->dest.is_ssa);
+      return &nir_instr_as_phi(instr)->dest.ssa;
+   case nir_instr_type_intrinsic:
+      assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+      return &nir_instr_as_intrinsic(instr)->dest.ssa;
+   default:
+      unreachable("We never ask for any of these");
+   }
+}
+
+static void
+nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
+{
+   if (!nir_instr_can_cse(instr))
+      return;
+
+   for (struct exec_node *node = instr->node.prev;
+        !exec_node_is_head_sentinel(node); node = node->prev) {
+      nir_instr *other = exec_node_data(nir_instr, node, node);
+      if (nir_instrs_equal(instr, other)) {
+         nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
+         nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
+                                  nir_src_for_ssa(other_def),
+                                  state->mem_ctx);
+         nir_instr_remove(instr);
+         state->progress = true;
+         return;
+      }
+   }
+
+   for (nir_block *block = instr->block->imm_dom;
+        block != NULL; block = block->imm_dom) {
+      nir_foreach_instr_reverse(block, other) {
+         if (nir_instrs_equal(instr, other)) {
+            nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
+            nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
+                                     nir_src_for_ssa(other_def),
+                                     state->mem_ctx);
+            nir_instr_remove(instr);
+            state->progress = true;
+            return;
+         }
+      }
+   }
+}
+
+static bool
+nir_opt_cse_block(nir_block *block, void *void_state)
+{
+   struct cse_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr)
+      nir_opt_cse_instr(instr, state);
+
+   return true;
+}
+
+static bool
+nir_opt_cse_impl(nir_function_impl *impl)
+{
+   struct cse_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_foreach_block(impl, nir_opt_cse_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_cse(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_cse_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_dce.c b/mesalib/src/glsl/nir/nir_opt_dce.c
new file mode 100644
index 000000000..e0ebdc61c
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_dce.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/* SSA-based mark-and-sweep dead code elimination */
+
+typedef struct {
+   struct exec_node node;
+   nir_instr *instr;
+} worklist_elem;
+
+static void
+worklist_push(struct exec_list *worklist, nir_instr *instr)
+{
+   worklist_elem *elem = ralloc(worklist, worklist_elem);
+   elem->instr = instr;
+   instr->pass_flags = 1;
+   exec_list_push_tail(worklist, &elem->node);
+}
+
+static nir_instr *
+worklist_pop(struct exec_list *worklist)
+{
+   struct exec_node *node = exec_list_pop_head(worklist);
+   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
+   return elem->instr;
+}
+
+static bool
+mark_live_cb(nir_src *src, void *_state)
+{
+   struct exec_list *worklist = (struct exec_list *) _state;
+
+   if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
+      worklist_push(worklist, src->ssa->parent_instr);
+   }
+
+   return true;
+}
+
+static void
+init_instr(nir_instr *instr, struct exec_list *worklist)
+{
+   nir_alu_instr *alu_instr;
+   nir_intrinsic_instr *intrin_instr;
+   nir_tex_instr *tex_instr;
+
+   /* We use the pass_flags to store the live/dead information.  In DCE, we
+    * just treat it as a zero/non-zerl boolean for whether or not the
+    * instruction is live.
+    */
+   instr->pass_flags = 0;
+
+   switch (instr->type) {
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      worklist_push(worklist, instr);
+      break;
+
+   case nir_instr_type_alu:
+      alu_instr = nir_instr_as_alu(instr);
+      if (!alu_instr->dest.dest.is_ssa)
+         worklist_push(worklist, instr);
+      break;
+
+   case nir_instr_type_intrinsic:
+      intrin_instr = nir_instr_as_intrinsic(instr);
+      if (nir_intrinsic_infos[intrin_instr->intrinsic].flags &
+          NIR_INTRINSIC_CAN_ELIMINATE) {
+         if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest &&
+             !intrin_instr->dest.is_ssa) {
+            worklist_push(worklist, instr);
+         }
+      } else {
+         worklist_push(worklist, instr);
+      }
+      break;
+
+   case nir_instr_type_tex:
+      tex_instr = nir_instr_as_tex(instr);
+      if (!tex_instr->dest.is_ssa)
+         worklist_push(worklist, instr);
+      break;
+
+   default:
+      break;
+   }
+}
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+   struct exec_list *worklist = (struct exec_list *) _state;
+
+   nir_foreach_instr(block, instr)
+      init_instr(instr, worklist);
+
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (following_if) {
+      if (following_if->condition.is_ssa &&
+          !following_if->condition.ssa->parent_instr->pass_flags)
+         worklist_push(worklist, following_if->condition.ssa->parent_instr);
+   }
+
+   return true;
+}
+
+static bool
+delete_block_cb(nir_block *block, void *_state)
+{
+   bool *progress = (bool *) _state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (!instr->pass_flags) {
+         nir_instr_remove(instr);
+         *progress = true;
+      }
+   }
+
+   return true;
+}
+
+bool
+nir_opt_dce_impl(nir_function_impl *impl)
+{
+   struct exec_list *worklist = ralloc(NULL, struct exec_list);
+   exec_list_make_empty(worklist);
+
+   nir_foreach_block(impl, init_block_cb, worklist);
+
+   while (!exec_list_is_empty(worklist)) {
+      nir_instr *instr = worklist_pop(worklist);
+      nir_foreach_src(instr, mark_live_cb, worklist);
+   }
+
+   ralloc_free(worklist);
+
+   bool progress = false;
+   nir_foreach_block(impl, delete_block_cb, &progress);
+
+   if (progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return progress;
+}
+
+bool
+nir_opt_dce(nir_shader *shader)
+{
+   bool progress = false;
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl && nir_opt_dce_impl(overload->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_gcm.c b/mesalib/src/glsl/nir/nir_opt_gcm.c
new file mode 100644
index 000000000..bf565b969
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_gcm.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements Global Code Motion.  A description of GCM can be found in
+ * "Global Code Motion; Global Value Numbering" by Cliff Click.
+ * Unfortunately, the algorithm presented in the paper is broken in a
+ * number of ways.  The algorithm used here differs substantially from the
+ * one in the paper but it is, in my opinion, much easier to read and
+ * verify correcness.
+ */
+
+struct gcm_block_info {
+   /* Number of loops this block is inside */
+   unsigned loop_depth;
+
+   /* The last instruction inserted into this block.  This is used as we
+    * traverse the instructions and insert them back into the program to
+    * put them in the right order.
+    */
+   nir_instr *last_instr;
+};
+
+/* Flags used in the instr->pass_flags field for various instruction states */
+enum {
+   GCM_INSTR_PINNED =            (1 << 0),
+   GCM_INSTR_SCHEDULED_EARLY =   (1 << 1),
+   GCM_INSTR_SCHEDULED_LATE =    (1 << 2),
+   GCM_INSTR_PLACED =            (1 << 3),
+};
+
+struct gcm_state {
+   nir_function_impl *impl;
+   nir_instr *instr;
+
+   /* The list of non-pinned instructions.  As we do the late scheduling,
+    * we pull non-pinned instructions out of their blocks and place them in
+    * this list.  This saves us from having linked-list problems when we go
+    * to put instructions back in their blocks.
+    */
+   struct exec_list instrs;
+
+   struct gcm_block_info *blocks;
+};
+
+/* Recursively walks the CFG and builds the block_info structure */
+static void
+gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
+                     unsigned loop_depth)
+{
+   foreach_list_typed(nir_cf_node, node, node, cf_list) {
+      switch (node->type) {
+      case nir_cf_node_block: {
+         nir_block *block = nir_cf_node_as_block(node);
+         state->blocks[block->index].loop_depth = loop_depth;
+         break;
+      }
+      case nir_cf_node_if: {
+         nir_if *if_stmt = nir_cf_node_as_if(node);
+         gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
+         gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
+         break;
+      }
+      case nir_cf_node_loop: {
+         nir_loop *loop = nir_cf_node_as_loop(node);
+         gcm_build_block_info(&loop->body, state, loop_depth + 1);
+         break;
+      }
+      default:
+         unreachable("Invalid CF node type");
+      }
+   }
+}
+
+/* Walks the instruction list and marks immovable instructions as pinned
+ *
+ * This function also serves to initialize the instr->pass_flags field.
+ * After this is completed, all instructions' pass_flags fields will be set
+ * to either GCM_INSTR_PINNED or 0.
+ */
+static bool
+gcm_pin_instructions_block(nir_block *block, void *void_state)
+{
+   struct gcm_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      switch (instr->type) {
+      case nir_instr_type_alu:
+         switch (nir_instr_as_alu(instr)->op) {
+         case nir_op_fddx:
+         case nir_op_fddy:
+         case nir_op_fddx_fine:
+         case nir_op_fddy_fine:
+         case nir_op_fddx_coarse:
+         case nir_op_fddy_coarse:
+            /* These can only go in uniform control flow; pin them for now */
+            instr->pass_flags = GCM_INSTR_PINNED;
+
+         default:
+            instr->pass_flags = 0;
+         }
+         break;
+
+      case nir_instr_type_tex:
+         switch (nir_instr_as_tex(instr)->op) {
+         case nir_texop_tex:
+         case nir_texop_txb:
+         case nir_texop_lod:
+            /* These two take implicit derivatives so they need to be pinned */
+            instr->pass_flags = GCM_INSTR_PINNED;
+
+         default:
+            instr->pass_flags = 0;
+         }
+         break;
+
+      case nir_instr_type_load_const:
+         instr->pass_flags = 0;
+         break;
+
+      case nir_instr_type_intrinsic: {
+         const nir_intrinsic_info *info =
+            &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+
+         if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+             (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+            instr->pass_flags = 0;
+         } else {
+            instr->pass_flags = GCM_INSTR_PINNED;
+         }
+         break;
+      }
+
+      case nir_instr_type_jump:
+      case nir_instr_type_ssa_undef:
+      case nir_instr_type_phi:
+         instr->pass_flags = GCM_INSTR_PINNED;
+         break;
+
+      default:
+         unreachable("Invalid instruction type in GCM");
+      }
+
+      if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+         /* If this is an unpinned instruction, go ahead and pull it out of
+          * the program and put it on the instrs list.  This has a couple
+          * of benifits.  First, it makes the scheduling algorithm more
+          * efficient because we can avoid walking over basic blocks and
+          * pinned instructions.  Second, it keeps us from causing linked
+          * list confusion when we're trying to put everything in its
+          * proper place at the end of the pass.
+          *
+          * Note that we don't use nir_instr_remove here because that also
+          * cleans up uses and defs and we want to keep that information.
+          */
+         exec_node_remove(&instr->node);
+         exec_list_push_tail(&state->instrs, &instr->node);
+      }
+   }
+
+   return true;
+}
+
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Update an instructions schedule for the given source
+ *
+ * This function is called iteratively as we walk the sources of an
+ * instruction.  It ensures that the given source instruction has been
+ * scheduled and then update this instruction's block if the source
+ * instruction is lower down the tree.
+ */
+static bool
+gcm_schedule_early_src(nir_src *src, void *void_state)
+{
+   struct gcm_state *state = void_state;
+   nir_instr *instr = state->instr;
+
+   assert(src->is_ssa);
+
+   gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
+
+   /* While the index isn't a proper dominance depth, it does have the
+    * property that if A dominates B then A->index <= B->index.  Since we
+    * know that this instruction must have been dominated by all of its
+    * sources at some point (even if it's gone through value-numbering),
+    * all of the sources must lie on the same branch of the dominance tree.
+    * Therefore, we can just go ahead and just compare indices.
+    */
+   if (instr->block->index < src->ssa->parent_instr->block->index)
+      instr->block = src->ssa->parent_instr->block;
+
+   /* We need to restore the state instruction because it may have been
+    * changed through the gcm_schedule_early_instr call above.  Since we
+    * may still be iterating through sources and future calls to
+    * gcm_schedule_early_src for the same instruction will still need it.
+    */
+   state->instr = instr;
+
+   return true;
+}
+
+/** Schedules an instruction early
+ *
+ * This function performs a recursive depth-first search starting at the
+ * given instruction and proceeding through the sources to schedule
+ * instructions as early as they can possibly go in the dominance tree.
+ * The instructions are "scheduled" by updating their instr->block field.
+ */
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY;
+
+   /* Pinned instructions are already scheduled so we don't need to do
+    * anything.  Also, bailing here keeps us from ever following the
+    * sources of phi nodes which can be back-edges.
+    */
+   if (instr->pass_flags & GCM_INSTR_PINNED)
+      return;
+
+   /* Start with the instruction at the top.  As we iterate over the
+    * sources, it will get moved down as needed.
+    */
+   instr->block = state->impl->start_block;
+   state->instr = instr;
+
+   nir_foreach_src(instr, gcm_schedule_early_src, state);
+}
+
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Schedules the instruction associated with the given SSA def late
+ *
+ * This function works by first walking all of the uses of the given SSA
+ * definition, ensuring that they are scheduled, and then computing the LCA
+ * (least common ancestor) of its uses.  It then schedules this instruction
+ * as close to the LCA as possible while trying to stay out of loops.
+ */
+static bool
+gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
+{
+   struct gcm_state *state = void_state;
+
+   nir_block *lca = NULL;
+
+   struct set_entry *entry;
+   set_foreach(def->uses, entry) {
+      nir_instr *use_instr = (nir_instr *)entry->key;
+
+      gcm_schedule_late_instr(use_instr, state);
+
+      /* Phi instructions are a bit special.  SSA definitions don't have to
+       * dominate the sources of the phi nodes that use them; instead, they
+       * have to dominate the predecessor block corresponding to the phi
+       * source.  We handle this by looking through the sources, finding
+       * any that are usingg this SSA def, and using those blocks instead
+       * of the one the phi lives in.
+       */
+      if (use_instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(use_instr);
+
+         nir_foreach_phi_src(phi, phi_src) {
+            if (phi_src->src.ssa == def)
+               lca = nir_dominance_lca(lca, phi_src->pred);
+         }
+      } else {
+         lca = nir_dominance_lca(lca, use_instr->block);
+      }
+   }
+
+   set_foreach(def->if_uses, entry) {
+      nir_if *if_stmt = (nir_if *)entry->key;
+
+      /* For if statements, we consider the block to be the one immediately
+       * preceding the if CF node.
+       */
+      nir_block *pred_block =
+         nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
+
+      lca = nir_dominance_lca(lca, pred_block);
+   }
+
+   /* Some instructions may never be used.  We'll just leave them scheduled
+    * early and let dead code clean them up.
+    */
+   if (lca == NULL)
+      return true;
+
+   /* We know have the LCA of all of the uses.  If our invariants hold,
+    * this is dominated by the block that we chose when scheduling early.
+    * We now walk up the dominance tree and pick the lowest block that is
+    * as far outside loops as we can get.
+    */
+   nir_block *best = lca;
+   while (lca != def->parent_instr->block) {
+      assert(lca);
+      if (state->blocks[lca->index].loop_depth <
+          state->blocks[best->index].loop_depth)
+         best = lca;
+      lca = lca->imm_dom;
+   }
+   def->parent_instr->block = best;
+
+   return true;
+}
+
+/** Schedules an instruction late
+ *
+ * This function performs a depth-first search starting at the given
+ * instruction and proceeding through its uses to schedule instructions as
+ * late as they can reasonably go in the dominance tree.  The instructions
+ * are "scheduled" by updating their instr->block field.
+ *
+ * The name of this function is actually a bit of a misnomer as it doesn't
+ * schedule them "as late as possible" as the paper implies.  Instead, it
+ * first finds the lates possible place it can schedule the instruction and
+ * then possibly schedules it earlier than that.  The actual location is as
+ * far down the tree as we can go while trying to stay out of loops.
+ */
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE;
+
+   /* Pinned instructions are already scheduled so we don't need to do
+    * anything.  Also, bailing here keeps us from ever following phi nodes
+    * which can be back-edges.
+    */
+   if (instr->pass_flags & GCM_INSTR_PINNED)
+      return;
+
+   nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
+}
+
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state);
+
+static bool
+gcm_place_instr_def(nir_ssa_def *def, void *state)
+{
+   struct set_entry *entry;
+   set_foreach(def->uses, entry)
+      gcm_place_instr((nir_instr *)entry->key, state);
+
+   return false;
+}
+
+/** Places an instrution back into the program
+ *
+ * The earlier passes of GCM simply choose blocks for each instruction and
+ * otherwise leave them alone.  This pass actually places the instructions
+ * into their chosen blocks.
+ *
+ * To do so, we use a standard post-order depth-first search linearization
+ * algorithm.  We walk over the uses of the given instruction and ensure
+ * that they are placed and then place this instruction.  Because we are
+ * working on multiple blocks at a time, we keep track of the last inserted
+ * instruction per-block in the state structure's block_info array.  When
+ * we insert an instruction in a block we insert it before the last
+ * instruction inserted in that block rather than the last instruction
+ * inserted globally.
+ */
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_PLACED)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_PLACED;
+
+   /* Phi nodes are our once source of back-edges.  Since right now we are
+    * only doing scheduling within blocks, we don't need to worry about
+    * them since they are always at the top.  Just skip them completely.
+    */
+   if (instr->type == nir_instr_type_phi) {
+      assert(instr->pass_flags & GCM_INSTR_PINNED);
+      return;
+   }
+
+   nir_foreach_ssa_def(instr, gcm_place_instr_def, state);
+
+   if (instr->pass_flags & GCM_INSTR_PINNED) {
+      /* Pinned instructions have an implicit dependence on the pinned
+       * instructions that come after them in the block.  Since the pinned
+       * instructions will naturally "chain" together, we only need to
+       * explicitly visit one of them.
+       */
+      for (nir_instr *after = nir_instr_next(instr);
+           after;
+           after = nir_instr_next(after)) {
+         if (after->pass_flags & GCM_INSTR_PINNED) {
+            gcm_place_instr(after, state);
+            break;
+         }
+      }
+   }
+
+   struct gcm_block_info *block_info = &state->blocks[instr->block->index];
+   if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+      exec_node_remove(&instr->node);
+
+      if (block_info->last_instr) {
+         exec_node_insert_node_before(&block_info->last_instr->node,
+                                      &instr->node);
+      } else {
+         /* Schedule it at the end of the block */
+         nir_instr *jump_instr = nir_block_last_instr(instr->block);
+         if (jump_instr && jump_instr->type == nir_instr_type_jump) {
+            exec_node_insert_node_before(&jump_instr->node, &instr->node);
+         } else {
+            exec_list_push_tail(&instr->block->instr_list, &instr->node);
+         }
+      }
+   }
+
+   block_info->last_instr = instr;
+}
+
+static void
+opt_gcm_impl(nir_function_impl *impl)
+{
+   struct gcm_state state;
+
+   state.impl = impl;
+   state.instr = NULL;
+   exec_list_make_empty(&state.instrs);
+   state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
+
+   nir_metadata_require(impl, nir_metadata_block_index |
+                              nir_metadata_dominance);
+
+   gcm_build_block_info(&impl->body, &state, 0);
+   nir_foreach_block(impl, gcm_pin_instructions_block, &state);
+
+   foreach_list_typed(nir_instr, instr, node, &state.instrs)
+      gcm_schedule_early_instr(instr, &state);
+
+   foreach_list_typed(nir_instr, instr, node, &state.instrs)
+      gcm_schedule_late_instr(instr, &state);
+
+   while (!exec_list_is_empty(&state.instrs)) {
+      nir_instr *instr = exec_node_data(nir_instr,
+                                        state.instrs.tail_pred, node);
+      gcm_place_instr(instr, &state);
+   }
+
+   ralloc_free(state.blocks);
+}
+
+void
+nir_opt_gcm(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         opt_gcm_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_global_to_local.c b/mesalib/src/glsl/nir/nir_opt_global_to_local.c
new file mode 100644
index 000000000..00db37ba7
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_global_to_local.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+static bool
+global_to_local(nir_register *reg)
+{
+   nir_function_impl *impl = NULL;
+
+   assert(reg->is_global);
+
+   struct set_entry *entry;
+   set_foreach(reg->defs, entry) {
+      nir_instr *instr = (nir_instr *) entry->key;
+      nir_function_impl *instr_impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+      if (impl != NULL) {
+         if (impl != instr_impl)
+            return false;
+      } else {
+         impl = instr_impl;
+      }
+   }
+
+   set_foreach(reg->uses, entry) {
+      nir_instr *instr = (nir_instr *) entry->key;
+      nir_function_impl *instr_impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+      if (impl != NULL) {
+         if (impl != instr_impl)
+            return false;
+      } else {
+         impl = instr_impl;
+      }
+   }
+
+   set_foreach(reg->if_uses, entry) {
+      nir_if *if_stmt = (nir_if *) entry->key;
+      nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node);
+      if (impl != NULL) {
+         if (impl != if_impl)
+            return false;
+      } else {
+         impl = if_impl;
+      }
+   }
+
+   if (impl == NULL) {
+      /* this instruction is never used/defined, delete it */
+      nir_reg_remove(reg);
+      return true;
+   }
+
+   /*
+    * if we've gotten to this point, the register is always used/defined in
+    * the same implementation so we can move it to be local to that
+    * implementation.
+    */
+
+   exec_node_remove(&reg->node);
+   exec_list_push_tail(&impl->registers, &reg->node);
+   reg->index = impl->reg_alloc++;
+   reg->is_global = false;
+   return true;
+}
+
+bool
+nir_opt_global_to_local(nir_shader *shader)
+{
+   bool progress = false;
+
+   foreach_list_typed_safe(nir_register, reg, node, &shader->registers) {
+      if (global_to_local(reg))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
new file mode 100644
index 000000000..ab08f286f
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for
+ *
+ * if (cond) {
+ *    <empty>
+ * } else {
+ *    <empty>
+ * }
+ * phi
+ * ...
+ * phi
+ *
+ * and replaces it with a series of selects.  It can also handle the case
+ * where, instead of being empty, the if may contain some move operations
+ * whose only use is one of the following phi nodes.  This happens all the
+ * time when the SSA form comes from a conditional assignment with a
+ * swizzle.
+ */
+
+struct peephole_select_state {
+   void *mem_ctx;
+   bool progress;
+};
+
+static bool
+are_all_move_to_phi(nir_block *block)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         return false;
+
+      /* It must be a move operation */
+      nir_alu_instr *mov = nir_instr_as_alu(instr);
+      if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+         return false;
+
+      /* Can't handle saturate */
+      if (mov->dest.saturate)
+         return false;
+
+      /* It must be SSA */
+      if (!mov->dest.dest.is_ssa)
+         return false;
+
+      /* It cannot have any if-uses */
+      if (mov->dest.dest.ssa.if_uses->entries != 0)
+         return false;
+
+      /* The only uses of this definition must be phi's in the successor */
+      struct set_entry *entry;
+      set_foreach(mov->dest.dest.ssa.uses, entry) {
+         const nir_instr *dest_instr = entry->key;
+         if (dest_instr->type != nir_instr_type_phi ||
+             dest_instr->block != block->successors[0])
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_select_block(nir_block *block, void *void_state)
+{
+   struct peephole_select_state *state = void_state;
+
+   /* If the block is empty, then it certainly doesn't have any phi nodes,
+    * so we can skip it.  This also ensures that we do an early skip on the
+    * end block of the function which isn't actually attached to the CFG.
+    */
+   if (exec_list_is_empty(&block->instr_list))
+      return true;
+
+   if (nir_cf_node_is_first(&block->cf_node))
+      return true;
+
+   nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+   if (prev_node->type != nir_cf_node_if)
+      return true;
+
+   nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+   nir_cf_node *then_node = nir_if_first_then_node(if_stmt);
+   nir_cf_node *else_node = nir_if_first_else_node(if_stmt);
+
+   /* We can only have one block in each side ... */
+   if (nir_if_last_then_node(if_stmt) != then_node ||
+       nir_if_last_else_node(if_stmt) != else_node)
+      return true;
+
+   nir_block *then_block = nir_cf_node_as_block(then_node);
+   nir_block *else_block = nir_cf_node_as_block(else_node);
+
+   /* ... and those blocks must only contain move-to-phi. */
+   if (!are_all_move_to_phi(then_block) || !are_all_move_to_phi(else_block))
+      return true;
+
+   /* At this point, we know that the previous CFG node is an if-then
+    * statement containing only moves to phi nodes in this block.  We can
+    * just remove that entire CF node and replace all of the phi nodes with
+    * selects.
+    */
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
+      nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+      /* Splat the condition to all channels */
+      memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
+
+      assert(exec_list_length(&phi->srcs) == 2);
+      nir_foreach_phi_src(phi, src) {
+         assert(src->pred == then_block || src->pred == else_block);
+         assert(src->src.is_ssa);
+
+         unsigned idx = src->pred == then_block ? 1 : 2;
+
+         if (src->src.ssa->parent_instr->block == src->pred) {
+            /* We already know that this instruction must be a move with
+             * this phi's in this block as its only users.
+             */
+            nir_alu_instr *mov = nir_instr_as_alu(src->src.ssa->parent_instr);
+            assert(mov->instr.type == nir_instr_type_alu);
+            assert(mov->op == nir_op_fmov || mov->op == nir_op_imov);
+
+            nir_alu_src_copy(&sel->src[idx], &mov->src[0], state->mem_ctx);
+         } else {
+            nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+         }
+      }
+
+      nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
+                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+      sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&sel->dest.dest.ssa),
+                               state->mem_ctx);
+
+      nir_instr_insert_before(&phi->instr, &sel->instr);
+      nir_instr_remove(&phi->instr);
+   }
+
+   nir_cf_node_remove(&if_stmt->cf_node);
+   state->progress = true;
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_select_impl(nir_function_impl *impl)
+{
+   struct peephole_select_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+
+   nir_foreach_block(impl, nir_opt_peephole_select_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_none);
+
+   return state.progress;
+}
+
+bool
+nir_opt_peephole_select(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_peephole_select_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_remove_phis.c b/mesalib/src/glsl/nir/nir_opt_remove_phis.c
new file mode 100644
index 000000000..7896584b4
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_remove_phis.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2015 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This is a pass for removing phi nodes that look like:
+ * a = phi(b, b, b, ...)
+ *
+ * Note that we can't ignore undef sources here, or else we may create a
+ * situation where the definition of b isn't dominated by its uses. We're
+ * allowed to do this since the definition of b must dominate all of the
+ * phi node's predecessors, which means it must dominate the phi node as well
+ * as all of the phi node's uses. In essence, the phi node acts as a copy
+ * instruction. b can't be another phi node in the same block, since the only
+ * time when phi nodes can source other phi nodes defined in the same block is
+ * at the loop header, and in that case one of the sources of the phi has to
+ * be from before the loop and that source can't be b.
+ */
+
+static bool
+remove_phis_block(nir_block *block, void *state)
+{
+   bool *progress = state;
+
+   void *mem_ctx = ralloc_parent(block);
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_ssa_def *def = NULL;
+      bool srcs_same = true;
+
+      nir_foreach_phi_src(phi, src) {
+         assert(src->src.is_ssa);
+         
+         if (def == NULL) {
+            def  = src->src.ssa;
+         } else {
+            if (src->src.ssa != def) {
+               srcs_same = false;
+               break;
+            }
+         }
+      }
+
+      if (!srcs_same)
+         continue;
+
+      assert(phi->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def),
+                               mem_ctx);
+      nir_instr_remove(instr);
+
+      *progress = true;
+   }
+
+   return true;
+}
+
+static bool
+remove_phis_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_foreach_block(impl, remove_phis_block, &progress);
+
+   return progress;
+}
+
+bool
+nir_opt_remove_phis(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         progress = remove_phis_impl(overload->impl) || progress;
+
+   return progress;
+}
+
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
new file mode 100644
index 000000000..6a3c6a027
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+static void
+print_tabs(unsigned num_tabs, FILE *fp)
+{
+   for (unsigned i = 0; i < num_tabs; i++)
+      fprintf(fp, "\t");
+}
+
+typedef struct {
+   /** map from nir_variable -> printable name */
+   struct hash_table *ht;
+
+   /** set of names used so far for nir_variables */
+   struct set *syms;
+
+   /* an index used to make new non-conflicting names */
+   unsigned index;
+} print_var_state;
+
+static void
+print_register(nir_register *reg, FILE *fp)
+{
+   if (reg->name != NULL)
+      fprintf(fp, "/* %s */ ", reg->name);
+   if (reg->is_global)
+      fprintf(fp, "gr%u", reg->index);
+   else
+      fprintf(fp, "r%u", reg->index);
+}
+
+static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+
+static void
+print_register_decl(nir_register *reg, FILE *fp)
+{
+   fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+   if (reg->is_packed)
+      fprintf(fp, "(packed) ");
+   print_register(reg, fp);
+   if (reg->num_array_elems != 0)
+      fprintf(fp, "[%u]", reg->num_array_elems);
+   fprintf(fp, "\n");
+}
+
+static void
+print_ssa_def(nir_ssa_def *def, FILE *fp)
+{
+   if (def->name != NULL)
+      fprintf(fp, "/* %s */ ", def->name);
+   fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+}
+
+static void
+print_ssa_use(nir_ssa_def *def, FILE *fp)
+{
+   if (def->name != NULL)
+      fprintf(fp, "/* %s */ ", def->name);
+   fprintf(fp, "ssa_%u", def->index);
+}
+
+static void print_src(nir_src *src, FILE *fp);
+
+static void
+print_reg_src(nir_reg_src *src, FILE *fp)
+{
+   print_register(src->reg, fp);
+   if (src->reg->num_array_elems != 0) {
+      fprintf(fp, "[%u", src->base_offset);
+      if (src->indirect != NULL) {
+         fprintf(fp, " + ");
+         print_src(src->indirect, fp);
+      }
+      fprintf(fp, "]");
+   }
+}
+
+static void
+print_reg_dest(nir_reg_dest *dest, FILE *fp)
+{
+   print_register(dest->reg, fp);
+   if (dest->reg->num_array_elems != 0) {
+      fprintf(fp, "[%u", dest->base_offset);
+      if (dest->indirect != NULL) {
+         fprintf(fp, " + ");
+         print_src(dest->indirect, fp);
+      }
+      fprintf(fp, "]");
+   }
+}
+
+static void
+print_src(nir_src *src, FILE *fp)
+{
+   if (src->is_ssa)
+      print_ssa_use(src->ssa, fp);
+   else
+      print_reg_src(&src->reg, fp);
+}
+
+static void
+print_dest(nir_dest *dest, FILE *fp)
+{
+   if (dest->is_ssa)
+      print_ssa_def(&dest->ssa, fp);
+   else
+      print_reg_dest(&dest->reg, fp);
+}
+
+static void
+print_alu_src(nir_alu_src *src, FILE *fp)
+{
+   if (src->negate)
+      fprintf(fp, "-");
+   if (src->abs)
+      fprintf(fp, "abs(");
+
+   print_src(&src->src, fp);
+
+   if (src->swizzle[0] != 0 ||
+       src->swizzle[1] != 1 ||
+       src->swizzle[2] != 2 ||
+       src->swizzle[3] != 3) {
+      fprintf(fp, ".");
+      for (unsigned i = 0; i < 4; i++)
+         fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+   }
+
+   if (src->abs)
+      fprintf(fp, ")");
+}
+
+static void
+print_alu_dest(nir_alu_dest *dest, FILE *fp)
+{
+   /* we're going to print the saturate modifier later, after the opcode */
+
+   print_dest(&dest->dest, fp);
+
+   if (!dest->dest.is_ssa &&
+       dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
+      fprintf(fp, ".");
+      for (unsigned i = 0; i < 4; i++)
+         if ((dest->write_mask >> i) & 1)
+            fprintf(fp, "%c", "xyzw"[i]);
+   }
+}
+
+static void
+print_alu_instr(nir_alu_instr *instr, FILE *fp)
+{
+   print_alu_dest(&instr->dest, fp);
+
+   fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+   if (instr->dest.saturate)
+      fprintf(fp, ".sat");
+   fprintf(fp, " ");
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_alu_src(&instr->src[i], fp);
+   }
+}
+
+static void
+print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "decl_var ");
+
+   const char *const cent = (var->data.centroid) ? "centroid " : "";
+   const char *const samp = (var->data.sample) ? "sample " : "";
+   const char *const inv = (var->data.invariant) ? "invariant " : "";
+   const char *const mode[] = { "shader_in ", "shader_out ", "", "",
+                                "uniform ", "system " };
+   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
+
+   fprintf(fp, "%s%s%s%s%s ",
+      cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]);
+
+   glsl_print_type(var->type, fp);
+
+   struct set_entry *entry = NULL;
+   if (state)
+      entry = _mesa_set_search(state->syms, var->name);
+
+   char *name;
+
+   if (entry != NULL) {
+      /* we have a collision with another name, append an @ + a unique index */
+      name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++);
+   } else {
+      name = var->name;
+   }
+
+   fprintf(fp, " %s", name);
+
+   if (var->data.mode == nir_var_shader_in ||
+       var->data.mode == nir_var_shader_out ||
+       var->data.mode == nir_var_uniform) {
+      fprintf(fp, " (%u)", var->data.driver_location);
+   }
+
+   fprintf(fp, "\n");
+
+   if (state) {
+      _mesa_set_add(state->syms, name);
+      _mesa_hash_table_insert(state->ht, var, name);
+   }
+}
+
+static void
+print_var(nir_variable *var, print_var_state *state, FILE *fp)
+{
+   const char *name;
+   if (state) {
+      struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
+
+      assert(entry != NULL);
+      name = entry->data;
+   } else {
+      name = var->name;
+   }
+
+   fprintf(fp, "%s", name);
+}
+
+static void
+print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp)
+{
+   print_var(deref->var, state, fp);
+}
+
+static void
+print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "[");
+   switch (deref->deref_array_type) {
+   case nir_deref_array_type_direct:
+      fprintf(fp, "%u", deref->base_offset);
+      break;
+   case nir_deref_array_type_indirect:
+      if (deref->base_offset != 0)
+         fprintf(fp, "%u + ", deref->base_offset);
+      print_src(&deref->indirect, fp);
+      break;
+   case nir_deref_array_type_wildcard:
+      fprintf(fp, "*");
+      break;
+   }
+   fprintf(fp, "]");
+}
+
+static void
+print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
+                   print_var_state *state, FILE *fp)
+{
+   fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
+}
+
+static void
+print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
+{
+   nir_deref *tail = &deref->deref;
+   nir_deref *pretail = NULL;
+   while (tail != NULL) {
+      switch (tail->deref_type) {
+      case nir_deref_type_var:
+         assert(pretail == NULL);
+         assert(tail == &deref->deref);
+         print_deref_var(deref, state, fp);
+         break;
+
+      case nir_deref_type_array:
+         assert(pretail != NULL);
+         print_deref_array(nir_deref_as_array(tail), state, fp);
+         break;
+
+      case nir_deref_type_struct:
+         assert(pretail != NULL);
+         print_deref_struct(nir_deref_as_struct(tail),
+                            pretail->type, state, fp);
+         break;
+
+      default:
+         unreachable("Invalid deref type");
+      }
+
+      pretail = tail;
+      tail = pretail->child;
+   }
+}
+
+static void
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
+                      FILE *fp)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      print_dest(&instr->dest, fp);
+      fprintf(fp, " = ");
+   }
+
+   fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+
+   for (unsigned i = 0; i < num_srcs; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_src(&instr->src[i], fp);
+   }
+
+   fprintf(fp, ") (");
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+
+   for (unsigned i = 0; i < num_vars; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_deref(instr->variables[i], state, fp);
+   }
+
+   fprintf(fp, ") (");
+
+   unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
+
+   for (unsigned i = 0; i < num_indices; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      fprintf(fp, "%u", instr->const_index[i]);
+   }
+
+   fprintf(fp, ")");
+}
+
+static void
+print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
+{
+   print_dest(&instr->dest, fp);
+
+   fprintf(fp, " = ");
+
+   switch (instr->op) {
+   case nir_texop_tex:
+      fprintf(fp, "tex ");
+      break;
+   case nir_texop_txb:
+      fprintf(fp, "txb ");
+      break;
+   case nir_texop_txl:
+      fprintf(fp, "txl ");
+      break;
+   case nir_texop_txd:
+      fprintf(fp, "txd ");
+      break;
+   case nir_texop_txf:
+      fprintf(fp, "txf ");
+      break;
+   case nir_texop_txf_ms:
+      fprintf(fp, "txf_ms ");
+      break;
+   case nir_texop_txs:
+      fprintf(fp, "txs ");
+      break;
+   case nir_texop_lod:
+      fprintf(fp, "lod ");
+      break;
+   case nir_texop_tg4:
+      fprintf(fp, "tg4 ");
+      break;
+   case nir_texop_query_levels:
+      fprintf(fp, "query_levels ");
+      break;
+
+   default:
+      unreachable("Invalid texture operation");
+      break;
+   }
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      print_src(&instr->src[i].src, fp);
+
+      fprintf(fp, " ");
+
+      switch(instr->src[i].src_type) {
+      case nir_tex_src_coord:
+         fprintf(fp, "(coord)");
+         break;
+      case nir_tex_src_projector:
+         fprintf(fp, "(projector)");
+         break;
+      case nir_tex_src_comparitor:
+         fprintf(fp, "(comparitor)");
+         break;
+      case nir_tex_src_offset:
+         fprintf(fp, "(offset)");
+         break;
+      case nir_tex_src_bias:
+         fprintf(fp, "(bias)");
+         break;
+      case nir_tex_src_lod:
+         fprintf(fp, "(lod)");
+         break;
+      case nir_tex_src_ms_index:
+         fprintf(fp, "(ms_index)");
+         break;
+      case nir_tex_src_ddx:
+         fprintf(fp, "(ddx)");
+         break;
+      case nir_tex_src_ddy:
+         fprintf(fp, "(ddy)");
+         break;
+      case nir_tex_src_sampler_offset:
+         fprintf(fp, "(sampler_offset)");
+         break;
+
+      default:
+         unreachable("Invalid texture source type");
+         break;
+      }
+
+      fprintf(fp, ", ");
+   }
+
+   bool has_nonzero_offset = false;
+   for (unsigned i = 0; i < 4; i++) {
+      if (instr->const_offset[i] != 0) {
+         has_nonzero_offset = true;
+         break;
+      }
+   }
+
+   if (has_nonzero_offset) {
+      fprintf(fp, "[%i %i %i %i] (offset), ",
+              instr->const_offset[0], instr->const_offset[1],
+              instr->const_offset[2], instr->const_offset[3]);
+   }
+
+   if (instr->op == nir_texop_tg4) {
+      fprintf(fp, "%u (gather_component), ", instr->component);
+   }
+
+   if (instr->sampler) {
+      print_deref(instr->sampler, state, fp);
+   } else {
+      fprintf(fp, "%u", instr->sampler_index);
+   }
+
+   fprintf(fp, " (sampler)");
+}
+
+static void
+print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "call %s ", instr->callee->function->name);
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_deref(instr->params[i], state, fp);
+   }
+
+   if (instr->return_deref != NULL) {
+      if (instr->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      print_deref(instr->return_deref, state, fp);
+   }
+}
+
+static void
+print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
+{
+   print_ssa_def(&instr->def, fp);
+
+   fprintf(fp, " = load_const (");
+
+   for (unsigned i = 0; i < instr->def.num_components; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      /*
+       * we don't really know the type of the constant (if it will be used as a
+       * float or an int), so just print the raw constant in hex for fidelity
+       * and then print the float in a comment for readability.
+       */
+
+      fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+   }
+}
+
+static void
+print_jump_instr(nir_jump_instr *instr, FILE *fp)
+{
+   switch (instr->type) {
+   case nir_jump_break:
+      fprintf(fp, "break");
+      break;
+
+   case nir_jump_continue:
+      fprintf(fp, "continue");
+      break;
+
+   case nir_jump_return:
+      fprintf(fp, "return");
+      break;
+   }
+}
+
+static void
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp)
+{
+   print_ssa_def(&instr->def, fp);
+   fprintf(fp, " = undefined");
+}
+
+static void
+print_phi_instr(nir_phi_instr *instr, FILE *fp)
+{
+   print_dest(&instr->dest, fp);
+   fprintf(fp, " = phi ");
+   nir_foreach_phi_src(instr, src) {
+      if (&src->node != exec_list_get_head(&instr->srcs))
+         fprintf(fp, ", ");
+
+      fprintf(fp, "block_%u: ", src->pred->index);
+      print_src(&src->src, fp);
+   }
+}
+
+static void
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (&entry->node != exec_list_get_head(&instr->entries))
+         fprintf(fp, "; ");
+
+      print_dest(&entry->dest, fp);
+      fprintf(fp, " = ");
+      print_src(&entry->src, fp);
+   }
+}
+
+static void
+print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      print_alu_instr(nir_instr_as_alu(instr), fp);
+      break;
+
+   case nir_instr_type_call:
+      print_call_instr(nir_instr_as_call(instr), state, fp);
+      break;
+
+   case nir_instr_type_intrinsic:
+      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp);
+      break;
+
+   case nir_instr_type_tex:
+      print_tex_instr(nir_instr_as_tex(instr), state, fp);
+      break;
+
+   case nir_instr_type_load_const:
+      print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp);
+      break;
+
+   case nir_instr_type_jump:
+      print_jump_instr(nir_instr_as_jump(instr), fp);
+      break;
+
+   case nir_instr_type_ssa_undef:
+      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp);
+      break;
+
+   case nir_instr_type_phi:
+      print_phi_instr(nir_instr_as_phi(instr), fp);
+      break;
+
+   case nir_instr_type_parallel_copy:
+      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp);
+      break;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+}
+
+static int
+compare_block_index(const void *p1, const void *p2)
+{
+   const nir_block *block1 = *((const nir_block **) p1);
+   const nir_block *block2 = *((const nir_block **) p2);
+
+   return (int) block1->index - (int) block2->index;
+}
+
+static void print_cf_node(nir_cf_node *node, print_var_state *state,
+                          unsigned tabs, FILE *fp);
+
+static void
+print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "block block_%u:\n", block->index);
+
+   /* sort the predecessors by index so we consistently print the same thing */
+
+   nir_block **preds =
+      malloc(block->predecessors->entries * sizeof(nir_block *));
+
+   struct set_entry *entry;
+   unsigned i = 0;
+   set_foreach(block->predecessors, entry) {
+      preds[i++] = (nir_block *) entry->key;
+   }
+
+   qsort(preds, block->predecessors->entries, sizeof(nir_block *),
+         compare_block_index);
+
+   print_tabs(tabs, fp);
+   fprintf(fp, "/* preds: ");
+   for (unsigned i = 0; i < block->predecessors->entries; i++) {
+      fprintf(fp, "block_%u ", preds[i]->index);
+   }
+   fprintf(fp, "*/\n");
+
+   free(preds);
+
+   nir_foreach_instr(block, instr) {
+      print_instr(instr, state, tabs, fp);
+      fprintf(fp, "\n");
+   }
+
+   print_tabs(tabs, fp);
+   fprintf(fp, "/* succs: ");
+   for (unsigned i = 0; i < 2; i++)
+      if (block->successors[i]) {
+         fprintf(fp, "block_%u ", block->successors[i]->index);
+      }
+   fprintf(fp, "*/\n");
+}
+
+static void
+print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "if ");
+   print_src(&if_stmt->condition, fp);
+   fprintf(fp, " {\n");
+   foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "} else {\n");
+   foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "}\n");
+}
+
+static void
+print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "loop {\n");
+   foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "}\n");
+}
+
+static void
+print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
+              FILE *fp)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+      print_block(nir_cf_node_as_block(node), state, tabs, fp);
+      break;
+
+   case nir_cf_node_if:
+      print_if(nir_cf_node_as_if(node), state, tabs, fp);
+      break;
+
+   case nir_cf_node_loop:
+      print_loop(nir_cf_node_as_loop(node), state, tabs, fp);
+      break;
+
+   default:
+      unreachable("Invalid CFG node type");
+   }
+}
+
+static void
+print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "\nimpl %s ", impl->overload->function->name);
+
+   for (unsigned i = 0; i < impl->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_var(impl->params[i], state, fp);
+   }
+
+   if (impl->return_var != NULL) {
+      if (impl->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      print_var(impl->return_var, state, fp);
+   }
+
+   fprintf(fp, "{\n");
+
+   foreach_list_typed(nir_variable, var, node, &impl->locals) {
+      fprintf(fp, "\t");
+      print_var_decl(var, state, fp);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      fprintf(fp, "\t");
+      print_register_decl(reg, fp);
+   }
+
+   nir_index_blocks(impl);
+
+   foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+      print_cf_node(node, state, 1, fp);
+   }
+
+   fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
+}
+
+static void
+print_function_overload(nir_function_overload *overload,
+                        print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "decl_overload %s ", overload->function->name);
+
+   for (unsigned i = 0; i < overload->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      switch (overload->params[i].param_type) {
+      case nir_parameter_in:
+         fprintf(fp, "in ");
+         break;
+      case nir_parameter_out:
+         fprintf(fp, "out ");
+         break;
+      case nir_parameter_inout:
+         fprintf(fp, "inout ");
+         break;
+      default:
+         unreachable("Invalid parameter type");
+      }
+
+      glsl_print_type(overload->params[i].type, fp);
+   }
+
+   if (overload->return_type != NULL) {
+      if (overload->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      glsl_print_type(overload->return_type, fp);
+   }
+
+   fprintf(fp, "\n");
+
+   if (overload->impl != NULL) {
+      print_function_impl(overload->impl, state, fp);
+      return;
+   }
+}
+
+static void
+print_function(nir_function *func, print_var_state *state, FILE *fp)
+{
+   foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
+      print_function_overload(overload, state, fp);
+   }
+}
+
+static void
+init_print_state(print_var_state *state)
+{
+   state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+   state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
+                                  _mesa_key_string_equal);
+   state->index = 0;
+}
+
+static void
+destroy_print_state(print_var_state *state)
+{
+   _mesa_hash_table_destroy(state->ht, NULL);
+   _mesa_set_destroy(state->syms, NULL);
+}
+
+void
+nir_print_shader(nir_shader *shader, FILE *fp)
+{
+   print_var_state state;
+   init_print_state(&state);
+
+   for (unsigned i = 0; i < shader->num_user_structures; i++) {
+      glsl_print_struct(shader->user_structures[i], fp);
+   }
+
+   struct hash_entry *entry;
+
+   hash_table_foreach(shader->uniforms, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   hash_table_foreach(shader->inputs, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   hash_table_foreach(shader->outputs, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   foreach_list_typed(nir_variable, var, node, &shader->globals) {
+      print_var_decl(var, &state, fp);
+   }
+
+   foreach_list_typed(nir_variable, var, node, &shader->system_values) {
+      print_var_decl(var, &state, fp);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      print_register_decl(reg, fp);
+   }
+
+   foreach_list_typed(nir_function, func, node, &shader->functions) {
+      print_function(func, &state, fp);
+   }
+
+   destroy_print_state(&state);
+}
+
+void
+nir_print_instr(const nir_instr *instr, FILE *fp)
+{
+   print_instr(instr, NULL, 0, fp);
+}
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
new file mode 100644
index 000000000..e7f8aeacb
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+static void
+add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+{
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      nir_variable *var = instr->variables[i]->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static void
+add_var_use_call(nir_call_instr *instr, struct set *live)
+{
+   if (instr->return_deref != NULL) {
+      nir_variable *var = instr->return_deref->var;
+      _mesa_set_add(live, var);
+   }
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      nir_variable *var = instr->params[i]->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static void
+add_var_use_tex(nir_tex_instr *instr, struct set *live)
+{
+   if (instr->sampler != NULL) {
+      nir_variable *var = instr->sampler->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static bool
+add_var_use_block(nir_block *block, void *state)
+{
+   struct set *live = state;
+
+   nir_foreach_instr(block, instr) {
+      switch(instr->type) {
+      case nir_instr_type_intrinsic:
+         add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+         break;
+
+      case nir_instr_type_call:
+         add_var_use_call(nir_instr_as_call(instr), live);
+         break;
+
+      case nir_instr_type_tex:
+         add_var_use_tex(nir_instr_as_tex(instr), live);
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+add_var_use_shader(nir_shader *shader, struct set *live)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, add_var_use_block, live);
+      }
+   }
+}
+
+static void
+remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+{
+   foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+      struct set_entry *entry = _mesa_set_search(live, var);
+      if (entry == NULL)
+         exec_node_remove(&var->node);
+   }
+}
+
+static void
+remove_dead_global_vars(nir_shader *shader, struct set *live)
+{
+   foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
+      struct set_entry *entry = _mesa_set_search(live, var);
+      if (entry == NULL)
+         exec_node_remove(&var->node);
+   }
+}
+
+void
+nir_remove_dead_variables(nir_shader *shader)
+{
+   struct set *live =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+   add_var_use_shader(shader, live);
+
+   remove_dead_global_vars(shader, live);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         remove_dead_local_vars(overload->impl, live);
+   }
+
+   _mesa_set_destroy(live, NULL);
+}
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
new file mode 100644
index 000000000..73a802be7
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_search.h"
+
+struct match_state {
+   unsigned variables_seen;
+   nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES];
+};
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+                 unsigned num_components, const uint8_t *swizzle,
+                 struct match_state *state);
+
+static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };
+
+static bool alu_instr_is_bool(nir_alu_instr *instr);
+
+static bool
+src_is_bool(nir_src src)
+{
+   if (!src.is_ssa)
+      return false;
+   if (src.ssa->parent_instr->type != nir_instr_type_alu)
+      return false;
+   return alu_instr_is_bool((nir_alu_instr *)src.ssa->parent_instr);
+}
+
+static bool
+alu_instr_is_bool(nir_alu_instr *instr)
+{
+   switch (instr->op) {
+   case nir_op_iand:
+   case nir_op_ior:
+   case nir_op_ixor:
+      return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src);
+   case nir_op_inot:
+      return src_is_bool(instr->src[0].src);
+   default:
+      return nir_op_infos[instr->op].output_type == nir_type_bool;
+   }
+}
+
+static bool
+match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
+            unsigned num_components, const uint8_t *swizzle,
+            struct match_state *state)
+{
+   uint8_t new_swizzle[4];
+
+   for (int i = 0; i < num_components; ++i)
+      new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
+
+   switch (value->type) {
+   case nir_search_value_expression:
+      if (!instr->src[src].src.is_ssa)
+         return false;
+
+      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+         return false;
+
+      return match_expression(nir_search_value_as_expression(value),
+                              nir_instr_as_alu(instr->src[src].src.ssa->parent_instr),
+                              num_components, new_swizzle, state);
+
+   case nir_search_value_variable: {
+      nir_search_variable *var = nir_search_value_as_variable(value);
+
+      if (state->variables_seen & (1 << var->variable)) {
+         if (!nir_srcs_equal(state->variables[var->variable].src,
+                             instr->src[src].src))
+            return false;
+
+         assert(!instr->src[src].abs && !instr->src[src].negate);
+
+         for (int i = 0; i < num_components; ++i) {
+            if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
+               return false;
+         }
+
+         return true;
+      } else {
+         if (var->is_constant &&
+             instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+            return false;
+
+         if (var->type != nir_type_invalid) {
+            if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+               return false;
+
+            nir_alu_instr *src_alu =
+               nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
+
+            if (nir_op_infos[src_alu->op].output_type != var->type &&
+                !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+               return false;
+         }
+
+         state->variables_seen |= (1 << var->variable);
+         state->variables[var->variable].src = instr->src[src].src;
+         state->variables[var->variable].abs = false;
+         state->variables[var->variable].negate = false;
+
+         for (int i = 0; i < 4; ++i) {
+            if (i < num_components)
+               state->variables[var->variable].swizzle[i] = new_swizzle[i];
+            else
+               state->variables[var->variable].swizzle[i] = 0;
+         }
+
+         return true;
+      }
+   }
+
+   case nir_search_value_constant: {
+      nir_search_constant *const_val = nir_search_value_as_constant(value);
+
+      if (!instr->src[src].src.is_ssa)
+         return false;
+
+      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+         return false;
+
+      nir_load_const_instr *load =
+         nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
+
+      switch (nir_op_infos[instr->op].input_types[src]) {
+      case nir_type_float:
+         for (unsigned i = 0; i < num_components; ++i) {
+            if (load->value.f[new_swizzle[i]] != const_val->data.f)
+               return false;
+         }
+         return true;
+      case nir_type_int:
+      case nir_type_unsigned:
+      case nir_type_bool:
+         for (unsigned i = 0; i < num_components; ++i) {
+            if (load->value.i[new_swizzle[i]] != const_val->data.i)
+               return false;
+         }
+         return true;
+      default:
+         unreachable("Invalid alu source type");
+      }
+   }
+
+   default:
+      unreachable("Invalid search value type");
+   }
+}
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+                 unsigned num_components, const uint8_t *swizzle,
+                 struct match_state *state)
+{
+   if (instr->op != expr->opcode)
+      return false;
+
+   assert(!instr->dest.saturate);
+   assert(nir_op_infos[instr->op].num_inputs > 0);
+
+   /* If we have an explicitly sized destination, we can only handle the
+    * identity swizzle.  While dot(vec3(a, b, c).zxy) is a valid
+    * expression, we don't have the information right now to propagate that
+    * swizzle through.  We can only properly propagate swizzles if the
+    * instruction is vectorized.
+    */
+   if (nir_op_infos[instr->op].output_size != 0) {
+      for (unsigned i = 0; i < num_components; i++) {
+         if (swizzle[i] != i)
+            return false;
+      }
+   }
+
+   bool matched = true;
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      /* If the source is an explicitly sized source, then we need to reset
+       * both the number of components and the swizzle.
+       */
+      if (nir_op_infos[instr->op].input_sizes[i] != 0) {
+         num_components = nir_op_infos[instr->op].input_sizes[i];
+         swizzle = identity_swizzle;
+      }
+
+      if (!match_value(expr->srcs[i], instr, i, num_components,
+                       swizzle, state)) {
+         matched = false;
+         break;
+      }
+   }
+
+   if (matched)
+      return true;
+
+   if (nir_op_infos[instr->op].num_inputs == 2 &&
+       (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+      if (!match_value(expr->srcs[0], instr, 1, num_components,
+                       swizzle, state))
+         return false;
+
+      return match_value(expr->srcs[1], instr, 0, num_components,
+                         swizzle, state);
+   } else {
+      return false;
+   }
+}
+
+static nir_alu_src
+construct_value(const nir_search_value *value, nir_alu_type type,
+                unsigned num_components, struct match_state *state,
+                nir_instr *instr, void *mem_ctx)
+{
+   switch (value->type) {
+   case nir_search_value_expression: {
+      const nir_search_expression *expr = nir_search_value_as_expression(value);
+
+      if (nir_op_infos[expr->opcode].output_size != 0)
+         num_components = nir_op_infos[expr->opcode].output_size;
+
+      nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
+      nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+      alu->dest.write_mask = (1 << num_components) - 1;
+      alu->dest.saturate = false;
+
+      for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) {
+         /* If the source is an explicitly sized source, then we need to reset
+          * the number of components to match.
+          */
+         if (nir_op_infos[alu->op].input_sizes[i] != 0)
+            num_components = nir_op_infos[alu->op].input_sizes[i];
+
+         alu->src[i] = construct_value(expr->srcs[i],
+                                       nir_op_infos[alu->op].input_types[i],
+                                       num_components,
+                                       state, instr, mem_ctx);
+      }
+
+      nir_instr_insert_before(instr, &alu->instr);
+
+      nir_alu_src val;
+      val.src = nir_src_for_ssa(&alu->dest.dest.ssa);
+      val.negate = false;
+      val.abs = false,
+      memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle);
+
+      return val;
+   }
+
+   case nir_search_value_variable: {
+      const nir_search_variable *var = nir_search_value_as_variable(value);
+      assert(state->variables_seen & (1 << var->variable));
+
+      nir_alu_src val;
+      nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx);
+
+      assert(!var->is_constant);
+
+      return val;
+   }
+
+   case nir_search_value_constant: {
+      const nir_search_constant *c = nir_search_value_as_constant(value);
+      nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
+
+      switch (type) {
+      case nir_type_float:
+         load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
+         load->value.f[0] = c->data.f;
+         break;
+      case nir_type_int:
+         load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
+         load->value.i[0] = c->data.i;
+         break;
+      case nir_type_unsigned:
+      case nir_type_bool:
+         load->value.u[0] = c->data.u;
+         break;
+      default:
+         unreachable("Invalid alu source type");
+      }
+
+      nir_instr_insert_before(instr, &load->instr);
+
+      nir_alu_src val;
+      val.src = nir_src_for_ssa(&load->def);
+      val.negate = false;
+      val.abs = false,
+      memset(val.swizzle, 0, sizeof val.swizzle);
+
+      return val;
+   }
+
+   default:
+      unreachable("Invalid search value type");
+   }
+}
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+                  const nir_search_value *replace, void *mem_ctx)
+{
+   uint8_t swizzle[4] = { 0, 0, 0, 0 };
+
+   for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i)
+      swizzle[i] = i;
+
+   assert(instr->dest.dest.is_ssa);
+
+   struct match_state state;
+   state.variables_seen = 0;
+
+   if (!match_expression(search, instr, instr->dest.dest.ssa.num_components,
+                         swizzle, &state))
+      return NULL;
+
+   /* Inserting a mov may be unnecessary.  However, it's much easier to
+    * simply let copy propagation clean this up than to try to go through
+    * and rewrite swizzles ourselves.
+    */
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   mov->dest.write_mask = instr->dest.write_mask;
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                     instr->dest.dest.ssa.num_components, NULL);
+
+   mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
+                                 instr->dest.dest.ssa.num_components, &state,
+                                 &instr->instr, mem_ctx);
+   nir_instr_insert_before(&instr->instr, &mov->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx);
+
+   /* We know this one has no more uses because we just rewrote them all,
+    * so we can remove it.  The rest of the matched expression, however, we
+    * don't know so much about.  We'll just let dead code clean them up.
+    */
+   nir_instr_remove(&instr->instr);
+
+   return mov;
+}
diff --git a/mesalib/src/glsl/nir/nir_search.h b/mesalib/src/glsl/nir/nir_search.h
new file mode 100644
index 000000000..7d4779294
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_search.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#ifndef _NIR_SEARCH_
+#define _NIR_SEARCH_
+
+#include "nir.h"
+
+#define NIR_SEARCH_MAX_VARIABLES 16
+
+typedef enum {
+   nir_search_value_expression,
+   nir_search_value_variable,
+   nir_search_value_constant,
+} nir_search_value_type;
+
+typedef struct {
+   nir_search_value_type type;
+} nir_search_value;
+
+typedef struct {
+   nir_search_value value;
+
+   /** The variable index;  Must be less than NIR_SEARCH_MAX_VARIABLES */
+   unsigned variable;
+
+   /** Indicates that the given variable must be a constant
+    *
+    * This is only alloed in search expressions and indicates that the
+    * given variable is only allowed to match constant values.
+    */
+   bool is_constant;
+
+   /** Indicates that the given variable must have a certain type
+    *
+    * This is only allowed in search expressions and indicates that the
+    * given variable is only allowed to match values that come from an ALU
+    * instruction with the given output type.  A type of nir_type_void
+    * means it can match any type.
+    *
+    * Note: A variable that is both constant and has a non-void type will
+    * never match anything.
+    */
+   nir_alu_type type;
+} nir_search_variable;
+
+typedef struct {
+   nir_search_value value;
+
+   union {
+      uint32_t u;
+      int32_t i;
+      float f;
+   } data;
+} nir_search_constant;
+
+typedef struct {
+   nir_search_value value;
+
+   nir_op opcode;
+   const nir_search_value *srcs[4];
+} nir_search_expression;
+
+NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value,
+                nir_search_variable, value)
+NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value,
+                nir_search_constant, value)
+NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
+                nir_search_expression, value)
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+                  const nir_search_value *replace, void *mem_ctx);
+
+#endif /* _NIR_SEARCH_ */
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
new file mode 100644
index 000000000..4d663b51b
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements "copy splitting" which is similar to structure splitting only
+ * it works on copy operations rather than the datatypes themselves.  The
+ * GLSL language allows you to copy one variable to another an entire
+ * structure (which may contain arrays or other structures) at a time.
+ * Normally, in a language such as C this would be handled by a "structure
+ * splitting" pass that breaks up the structures.  Unfortunately for us,
+ * structures used in inputs or outputs can't be split.  Therefore,
+ * regardlesss of what we do, we have to be able to copy to/from
+ * structures.
+ *
+ * The primary purpose of structure splitting is to allow you to better
+ * optimize variable access and lower things to registers where you can.
+ * The primary issue here is that, if you lower the copy to a bunch of
+ * loads and stores, you loose a lot of information about the copy
+ * operation that you would like to keep around.  To solve this problem, we
+ * have a "copy splitting" pass that, instead of splitting the structures
+ * or lowering the copy into loads and storres, splits the copy operation
+ * into a bunch of copy operations one for each leaf of the structure tree.
+ * If an intermediate array is encountered, it is referenced with a
+ * wildcard reference to indicate that the entire array is to be copied.
+ *
+ * As things become direct, array copies may be able to be losslessly
+ * lowered to having fewer and fewer wildcards.  However, until that
+ * happens we want to keep the information about the arrays intact.
+ *
+ * Prior to the copy splitting pass, there are no wildcard references but
+ * there may be incomplete references where the tail of the deref chain is
+ * an array or a structure and not a specific element.  After the copy
+ * splitting pass has completed, every variable deref will be a full-length
+ * dereference pointing to a single leaf in the structure type tree with
+ * possibly a few wildcard array dereferences.
+ */
+
+struct split_var_copies_state {
+   void *mem_ctx;
+   void *dead_ctx;
+};
+
+static nir_deref *
+get_deref_tail(nir_deref *deref)
+{
+   while (deref->child != NULL)
+      deref = deref->child;
+   return deref;
+}
+
+/* Recursively constructs deref chains to split a copy instruction into
+ * multiple (if needed) copy instructions with full-length deref chains.
+ * External callers of this function should pass the tail and head of the
+ * deref chains found as the source and destination of the copy instruction
+ * into this function.
+ *
+ * \param  old_copy  The copy instruction we are splitting
+ * \param  dest_head The head of the destination deref chain we are building
+ * \param  src_head  The head of the source deref chain we are building
+ * \param  dest_tail The tail of the destination deref chain we are building
+ * \param  src_tail  The tail of the source deref chain we are building
+ * \param  state     The current split_var_copies_state object
+ */
+static void
+split_var_copy_instr(nir_intrinsic_instr *old_copy,
+                     nir_deref *dest_head, nir_deref *src_head,
+                     nir_deref *dest_tail, nir_deref *src_tail,
+                     struct split_var_copies_state *state)
+{
+   assert(src_tail->type == dest_tail->type);
+
+   /* Make sure these really are the tails of the deref chains */
+   assert(dest_tail->child == NULL);
+   assert(src_tail->child == NULL);
+
+   switch (glsl_get_base_type(src_tail->type)) {
+   case GLSL_TYPE_ARRAY: {
+      /* Make a wildcard dereference */
+      nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+      deref->deref.type = glsl_get_array_element(src_tail->type);
+      deref->deref_array_type = nir_deref_array_type_wildcard;
+
+      /* Set the tail of both as the newly created wildcard deref.  It is
+       * safe to use the same wildcard in both places because a) we will be
+       * copying it before we put it in an actual instruction and b)
+       * everything that will potentially add another link in the deref
+       * chain will also add the same thing to both chains.
+       */
+      src_tail->child = &deref->deref;
+      dest_tail->child = &deref->deref;
+
+      split_var_copy_instr(old_copy, dest_head, src_head,
+                           dest_tail->child, src_tail->child, state);
+
+      /* Set it back to the way we found it */
+      src_tail->child = NULL;
+      dest_tail->child = NULL;
+      break;
+   }
+
+   case GLSL_TYPE_STRUCT:
+      /* This is the only part that actually does any interesting
+       * splitting.  For array types, we just use wildcards and resolve
+       * them later.  For structure types, we need to emit one copy
+       * instruction for every structure element.  Because we may have
+       * structs inside structs, we just recurse and let the next level
+       * take care of any additional structures.
+       */
+      for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) {
+         nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i);
+         deref->deref.type = glsl_get_struct_field(src_tail->type, i);
+
+         /* Set the tail of both as the newly created structure deref.  It
+          * is safe to use the same wildcard in both places because a) we
+          * will be copying it before we put it in an actual instruction
+          * and b) everything that will potentially add another link in the
+          * deref chain will also add the same thing to both chains.
+          */
+         src_tail->child = &deref->deref;
+         dest_tail->child = &deref->deref;
+
+         split_var_copy_instr(old_copy, dest_head, src_head,
+                              dest_tail->child, src_tail->child, state);
+      }
+      /* Set it back to the way we found it */
+      src_tail->child = NULL;
+      dest_tail->child = NULL;
+      break;
+
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(src_tail->type)) {
+         nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+         deref->deref.type = glsl_get_column_type(src_tail->type);
+         deref->deref_array_type = nir_deref_array_type_wildcard;
+
+         /* Set the tail of both as the newly created wildcard deref.  It
+          * is safe to use the same wildcard in both places because a) we
+          * will be copying it before we put it in an actual instruction
+          * and b) everything that will potentially add another link in the
+          * deref chain will also add the same thing to both chains.
+          */
+         src_tail->child = &deref->deref;
+         dest_tail->child = &deref->deref;
+
+         split_var_copy_instr(old_copy, dest_head, src_head,
+                              dest_tail->child, src_tail->child, state);
+
+         /* Set it back to the way we found it */
+         src_tail->child = NULL;
+         dest_tail->child = NULL;
+      } else {
+         /* At this point, we have fully built our deref chains and can
+          * actually add the new copy instruction.
+          */
+         nir_intrinsic_instr *new_copy =
+            nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var);
+
+         /* We need to make copies because a) this deref chain actually
+          * belongs to the copy instruction and b) the deref chains may
+          * have some of the same links due to the way we constructed them
+          */
+         nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
+         nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+
+         new_copy->variables[0] = nir_deref_as_var(dest);
+         new_copy->variables[1] = nir_deref_as_var(src);
+
+         /* Emit the copy instruction after the old instruction.  We'll
+          * remove the old one later.
+          */
+         nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+      }
+      break;
+
+   case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_INTERFACE:
+   default:
+      unreachable("Cannot copy these types");
+   }
+}
+
+static bool
+split_var_copies_block(nir_block *block, void *void_state)
+{
+   struct split_var_copies_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
+      if (intrinsic->intrinsic != nir_intrinsic_copy_var)
+         continue;
+
+      nir_deref *dest_head = &intrinsic->variables[0]->deref;
+      nir_deref *src_head = &intrinsic->variables[1]->deref;
+      nir_deref *dest_tail = get_deref_tail(dest_head);
+      nir_deref *src_tail = get_deref_tail(src_head);
+
+      switch (glsl_get_base_type(src_tail->type)) {
+      case GLSL_TYPE_ARRAY:
+      case GLSL_TYPE_STRUCT:
+         split_var_copy_instr(intrinsic, dest_head, src_head,
+                              dest_tail, src_tail, state);
+         nir_instr_remove(&intrinsic->instr);
+         ralloc_steal(state->dead_ctx, instr);
+         break;
+      case GLSL_TYPE_FLOAT:
+      case GLSL_TYPE_INT:
+      case GLSL_TYPE_UINT:
+      case GLSL_TYPE_BOOL:
+         if (glsl_type_is_matrix(src_tail->type)) {
+            split_var_copy_instr(intrinsic, dest_head, src_head,
+                                 dest_tail, src_tail, state);
+            nir_instr_remove(&intrinsic->instr);
+            ralloc_steal(state->dead_ctx, instr);
+         }
+         break;
+      default:
+         unreachable("Invalid type");
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+split_var_copies_impl(nir_function_impl *impl)
+{
+   struct split_var_copies_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+
+   nir_foreach_block(impl, split_var_copies_block, &state);
+
+   ralloc_free(state.dead_ctx);
+}
+
+void
+nir_split_var_copies(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         split_var_copies_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
new file mode 100644
index 000000000..47cf45393
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -0,0 +1,535 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * Implements the classic to-SSA algorithm described by Cytron et. al. in
+ * "Efficiently Computing Static Single Assignment Form and the Control
+ * Dependence Graph."
+ */
+
+/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
+
+static void
+insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
+{
+   nir_phi_instr *instr = nir_phi_instr_create(mem_ctx);
+
+   instr->dest.reg.reg = reg;
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+      src->pred = pred;
+      src->src.is_ssa = false;
+      src->src.reg.base_offset = 0;
+      src->src.reg.indirect = NULL;
+      src->src.reg.reg = reg;
+      exec_list_push_tail(&instr->srcs, &src->node);
+   }
+
+   nir_instr_insert_before_block(block, &instr->instr);
+}
+
+static void
+insert_phi_nodes(nir_function_impl *impl)
+{
+   void *mem_ctx = ralloc_parent(impl);
+
+   unsigned *work = calloc(impl->num_blocks, sizeof(unsigned));
+   unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned));
+
+   /*
+    * Since the work flags already prevent us from inserting a node that has
+    * ever been inserted into W, we don't need to use a set to represent W.
+    * Also, since no block can ever be inserted into W more than once, we know
+    * that the maximum size of W is the number of basic blocks in the
+    * function. So all we need to handle W is an array and a pointer to the
+    * next element to be inserted and the next element to be removed.
+    */
+   nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *));
+   unsigned w_start, w_end;
+
+   unsigned iter_count = 0;
+
+   nir_index_blocks(impl);
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      if (reg->num_array_elems != 0)
+         continue;
+
+      w_start = w_end = 0;
+      iter_count++;
+
+      struct set_entry *entry;
+      set_foreach(reg->defs, entry) {
+         nir_instr *def = (nir_instr *) entry->key;
+         if (work[def->block->index] < iter_count)
+            W[w_end++] = def->block;
+         work[def->block->index] = iter_count;
+      }
+
+      while (w_start != w_end) {
+         nir_block *cur = W[w_start++];
+         set_foreach(cur->dom_frontier, entry) {
+            nir_block *next = (nir_block *) entry->key;
+
+            /*
+             * If there's more than one return statement, then the end block
+             * can be a join point for some definitions. However, there are
+             * no instructions in the end block, so nothing would use those
+             * phi nodes. Of course, we couldn't place those phi nodes
+             * anyways due to the restriction of having no instructions in the
+             * end block...
+             */
+            if (next == impl->end_block)
+               continue;
+
+            if (has_already[next->index] < iter_count) {
+               insert_trivial_phi(reg, next, mem_ctx);
+               has_already[next->index] = iter_count;
+               if (work[next->index] < iter_count) {
+                  work[next->index] = iter_count;
+                  W[w_end++] = next;
+               }
+            }
+         }
+      }
+   }
+
+   free(work);
+   free(has_already);
+   free(W);
+}
+
+typedef struct {
+   nir_ssa_def **stack;
+   int index;
+   unsigned num_defs; /** < used to add indices to debug names */
+#ifndef NDEBUG
+   unsigned stack_size;
+#endif
+} reg_state;
+
+typedef struct {
+   reg_state *states;
+   void *mem_ctx;
+   nir_instr *parent_instr;
+   nir_if *parent_if;
+   nir_function_impl *impl;
+
+   /* map from SSA value -> original register */
+   struct hash_table *ssa_map;
+} rewrite_state;
+
+static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
+{
+   unsigned index = reg->index;
+
+   if (state->states[index].index == -1) {
+      /*
+       * We're using an undefined register, create a new undefined SSA value
+       * to preserve the information that this source is undefined
+       */
+      nir_ssa_undef_instr *instr =
+         nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
+
+      /*
+       * We could just insert the undefined instruction before the instruction
+       * we're rewriting, but we could be rewriting a phi source in which case
+       * we can't do that, so do the next easiest thing - insert it at the
+       * beginning of the program. In the end, it doesn't really matter where
+       * the undefined instructions are because they're going to be ignored
+       * in the backend.
+       */
+      nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr);
+      return &instr->def;
+   }
+
+   return state->states[index].stack[state->states[index].index];
+}
+
+static bool
+rewrite_use(nir_src *src, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (src->is_ssa)
+      return true;
+
+   unsigned index = src->reg.reg->index;
+
+   if (state->states[index].stack == NULL)
+      return true;
+
+   src->is_ssa = true;
+   src->ssa = get_ssa_src(src->reg.reg, state);
+
+   if (state->parent_instr)
+      _mesa_set_add(src->ssa->uses, state->parent_instr);
+   else
+      _mesa_set_add(src->ssa->if_uses, state->parent_if);
+   return true;
+}
+
+static bool
+rewrite_def_forwards(nir_dest *dest, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (dest->is_ssa)
+      return true;
+
+   nir_register *reg = dest->reg.reg;
+   unsigned index = reg->index;
+
+   if (state->states[index].stack == NULL)
+      return true;
+
+   char *name = NULL;
+   if (dest->reg.reg->name)
+      name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name,
+                             state->states[index].num_defs);
+
+   nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+
+   /* push our SSA destination on the stack */
+   state->states[index].index++;
+   assert(state->states[index].index < state->states[index].stack_size);
+   state->states[index].stack[state->states[index].index] = &dest->ssa;
+   state->states[index].num_defs++;
+
+   _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg);
+
+   return true;
+}
+
+static void
+rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
+{
+   state->parent_instr = &instr->instr;
+
+   nir_foreach_src(&instr->instr, rewrite_use, state);
+
+   if (instr->dest.dest.is_ssa)
+      return;
+
+   nir_register *reg = instr->dest.dest.reg.reg;
+   unsigned index = reg->index;
+
+   if (state->states[index].stack == NULL)
+      return;
+
+   unsigned write_mask = instr->dest.write_mask;
+   if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) {
+      /*
+       * Calculate the number of components the final instruction, which for
+       * per-component things is the number of output components of the
+       * instruction and non-per-component things is the number of enabled
+       * channels in the write mask.
+       */
+      unsigned num_components;
+      if (nir_op_infos[instr->op].output_size == 0) {
+         unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5);
+         num_components = (temp & 0x3) + ((temp >> 2) & 0x3);
+      } else {
+         num_components = nir_op_infos[instr->op].output_size;
+      }
+
+      char *name = NULL;
+      if (instr->dest.dest.reg.reg->name)
+         name = ralloc_asprintf(state->mem_ctx, "%s_%u",
+                                reg->name, state->states[index].num_defs);
+
+      instr->dest.write_mask = (1 << num_components) - 1;
+      nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+
+      if (nir_op_infos[instr->op].output_size == 0) {
+         /*
+          * When we change the output writemask, we need to change the
+          * swizzles for per-component inputs too
+          */
+         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+            if (nir_op_infos[instr->op].input_sizes[i] != 0)
+               continue;
+
+            unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+            /*
+             * We keep two indices:
+             * 1. The index of the original (non-SSA) component
+             * 2. The index of the post-SSA, compacted, component
+             *
+             * We need to map the swizzle component at index 1 to the swizzle
+             * component at index 2.
+             */
+
+            unsigned ssa_index = 0;
+            for (unsigned index = 0; index < 4; index++) {
+               if (!((write_mask >> index) & 1))
+                  continue;
+
+               new_swizzle[ssa_index] = instr->src[i].swizzle[index];
+               ssa_index++;
+            }
+
+            for (unsigned j = 0; j < 4; j++)
+               instr->src[i].swizzle[j] = new_swizzle[j];
+         }
+      }
+
+      nir_op op;
+      switch (reg->num_components) {
+      case 2: op = nir_op_vec2; break;
+      case 3: op = nir_op_vec3; break;
+      case 4: op = nir_op_vec4; break;
+      default: unreachable("not reached");
+      }
+
+      nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op);
+
+      vec->dest.dest.reg.reg = reg;
+      vec->dest.write_mask = (1 << reg->num_components) - 1;
+
+      nir_ssa_def *old_src = get_ssa_src(reg, state);
+      nir_ssa_def *new_src = &instr->dest.dest.ssa;
+
+      unsigned ssa_index = 0;
+      for (unsigned i = 0; i < reg->num_components; i++) {
+         vec->src[i].src.is_ssa = true;
+         if ((write_mask >> i) & 1) {
+            vec->src[i].src.ssa = new_src;
+            if (nir_op_infos[instr->op].output_size == 0)
+               vec->src[i].swizzle[0] = ssa_index;
+            else
+               vec->src[i].swizzle[0] = i;
+            ssa_index++;
+         } else {
+            vec->src[i].src.ssa = old_src;
+            vec->src[i].swizzle[0] = i;
+         }
+      }
+
+      nir_instr_insert_after(&instr->instr, &vec->instr);
+
+      state->parent_instr = &vec->instr;
+      rewrite_def_forwards(&vec->dest.dest, state);
+   } else {
+      rewrite_def_forwards(&instr->dest.dest, state);
+   }
+}
+
+static void
+rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state)
+{
+   state->parent_instr = &instr->instr;
+   rewrite_def_forwards(&instr->dest, state);
+}
+
+static void
+rewrite_instr_forward(nir_instr *instr, rewrite_state *state)
+{
+   if (instr->type == nir_instr_type_alu) {
+      rewrite_alu_instr_forward(nir_instr_as_alu(instr), state);
+      return;
+   }
+
+   if (instr->type == nir_instr_type_phi) {
+      rewrite_phi_instr(nir_instr_as_phi(instr), state);
+      return;
+   }
+
+   state->parent_instr = instr;
+
+   nir_foreach_src(instr, rewrite_use, state);
+   nir_foreach_dest(instr, rewrite_def_forwards, state);
+}
+
+static void
+rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi_instr = nir_instr_as_phi(instr);
+
+      state->parent_instr = instr;
+
+      nir_foreach_phi_src(phi_instr, src) {
+         if (src->pred == pred) {
+            rewrite_use(&src->src, state);
+            break;
+         }
+      }
+   }
+}
+
+static bool
+rewrite_def_backwards(nir_dest *dest, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (!dest->is_ssa)
+      return true;
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->ssa_map, &dest->ssa);
+
+   if (!entry)
+      return true;
+
+   nir_register *reg = (nir_register *) entry->data;
+   unsigned index = reg->index;
+
+   state->states[index].index--;
+   assert(state->states[index].index >= -1);
+
+   return true;
+}
+
+static void
+rewrite_instr_backwards(nir_instr *instr, rewrite_state *state)
+{
+   nir_foreach_dest(instr, rewrite_def_backwards, state);
+}
+
+static void
+rewrite_block(nir_block *block, rewrite_state *state)
+{
+   /* This will skip over any instructions after the current one, which is
+    * what we want because those instructions (vector gather, conditional
+    * select) will already be in SSA form.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      rewrite_instr_forward(instr, state);
+   }
+
+   if (block != state->impl->end_block &&
+       !nir_cf_node_is_last(&block->cf_node) &&
+       nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+      state->parent_instr = NULL;
+      state->parent_if = if_stmt;
+      rewrite_use(&if_stmt->condition, state);
+   }
+
+   if (block->successors[0])
+      rewrite_phi_sources(block->successors[0], block, state);
+   if (block->successors[1])
+      rewrite_phi_sources(block->successors[1], block, state);
+
+   for (unsigned i = 0; i < block->num_dom_children; i++)
+      rewrite_block(block->dom_children[i], state);
+
+   nir_foreach_instr_reverse(block, instr) {
+      rewrite_instr_backwards(instr, state);
+   }
+}
+
+static void
+remove_unused_regs(nir_function_impl *impl, rewrite_state *state)
+{
+   foreach_list_typed_safe(nir_register, reg, node, &impl->registers) {
+      if (state->states[reg->index].stack != NULL)
+         exec_node_remove(&reg->node);
+   }
+}
+
+static void
+init_rewrite_state(nir_function_impl *impl, rewrite_state *state)
+{
+   state->impl = impl;
+   state->mem_ctx = ralloc_parent(impl);
+   state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
+   state->states = ralloc_array(NULL, reg_state, impl->reg_alloc);
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      assert(reg->index < impl->reg_alloc);
+      if (reg->num_array_elems > 0) {
+         state->states[reg->index].stack = NULL;
+      } else {
+         /*
+          * Calculate a conservative estimate of the stack size based on the
+          * number of definitions there are. Note that this function *must* be
+          * called after phi nodes are inserted so we can count phi node
+          * definitions too.
+          */
+         unsigned stack_size = reg->defs->entries;
+
+         state->states[reg->index].stack = ralloc_array(state->states,
+                                                        nir_ssa_def *,
+                                                        stack_size);
+#ifndef NDEBUG
+         state->states[reg->index].stack_size = stack_size;
+#endif
+         state->states[reg->index].index = -1;
+         state->states[reg->index].num_defs = 0;
+      }
+   }
+}
+
+static void
+destroy_rewrite_state(rewrite_state *state)
+{
+   _mesa_hash_table_destroy(state->ssa_map, NULL);
+   ralloc_free(state->states);
+}
+
+void
+nir_convert_to_ssa_impl(nir_function_impl *impl)
+{
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   insert_phi_nodes(impl);
+
+   rewrite_state state;
+   init_rewrite_state(impl, &state);
+
+   rewrite_block(impl->start_block, &state);
+
+   remove_unused_regs(impl, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   destroy_rewrite_state(&state);
+}
+
+void
+nir_convert_to_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_convert_to_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
new file mode 100644
index 000000000..a13c3e12a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir_types.h"
+#include "ir.h"
+
+void
+glsl_print_type(const glsl_type *type, FILE *fp)
+{
+   if (type->base_type == GLSL_TYPE_ARRAY) {
+      glsl_print_type(type->fields.array, fp);
+      fprintf(fp, "[%u]", type->length);
+   } else if ((type->base_type == GLSL_TYPE_STRUCT)
+              && !is_gl_identifier(type->name)) {
+      fprintf(fp, "%s@%p", type->name, (void *) type);
+   } else {
+      fprintf(fp, "%s", type->name);
+   }
+}
+
+void
+glsl_print_struct(const glsl_type *type, FILE *fp)
+{
+   assert(type->base_type == GLSL_TYPE_STRUCT);
+
+   fprintf(fp, "struct {\n");
+   for (unsigned i = 0; i < type->length; i++) {
+      fprintf(fp, "\t");
+      glsl_print_type(type->fields.structure[i].type, fp);
+      fprintf(fp, " %s;\n", type->fields.structure[i].name);
+   }
+   fprintf(fp, "}\n");
+}
+
+const glsl_type *
+glsl_get_array_element(const glsl_type* type)
+{
+   if (type->is_matrix())
+      return type->column_type();
+   return type->fields.array;
+}
+
+const glsl_type *
+glsl_get_struct_field(const glsl_type *type, unsigned index)
+{
+   return type->fields.structure[index].type;
+}
+
+const struct glsl_type *
+glsl_get_column_type(const struct glsl_type *type)
+{
+   return type->column_type();
+}
+
+enum glsl_base_type
+glsl_get_base_type(const struct glsl_type *type)
+{
+   return type->base_type;
+}
+
+unsigned
+glsl_get_vector_elements(const struct glsl_type *type)
+{
+   return type->vector_elements;
+}
+
+unsigned
+glsl_get_components(const struct glsl_type *type)
+{
+   return type->components();
+}
+
+unsigned
+glsl_get_matrix_columns(const struct glsl_type *type)
+{
+   return type->matrix_columns;
+}
+
+unsigned
+glsl_get_length(const struct glsl_type *type)
+{
+   return type->length;
+}
+
+const char *
+glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
+{
+   return type->fields.structure[index].name;
+}
+
+bool
+glsl_type_is_void(const glsl_type *type)
+{
+   return type->is_void();
+}
+
+bool
+glsl_type_is_vector(const struct glsl_type *type)
+{
+   return type->is_vector();
+}
+
+bool
+glsl_type_is_scalar(const struct glsl_type *type)
+{
+   return type->is_scalar();
+}
+
+bool
+glsl_type_is_matrix(const struct glsl_type *type)
+{
+   return type->is_matrix();
+}
+
+const glsl_type *
+glsl_void_type(void)
+{
+   return glsl_type::void_type;
+}
+
+const glsl_type *
+glsl_vec4_type(void)
+{
+   return glsl_type::vec4_type;
+}
+
+const glsl_type *
+glsl_array_type(const glsl_type *base, unsigned elements)
+{
+   return glsl_type::get_array_instance(base, elements);
+}
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
new file mode 100644
index 000000000..494051a67
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#pragma once
+
+/* C wrapper around glsl_types.h */
+
+#include "../glsl_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#else
+struct glsl_type;
+#endif
+
+#include <stdio.h>
+
+void glsl_print_type(const struct glsl_type *type, FILE *fp);
+void glsl_print_struct(const struct glsl_type *type, FILE *fp);
+
+const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type,
+                                              unsigned index);
+
+const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
+
+const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
+
+enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
+
+unsigned glsl_get_vector_elements(const struct glsl_type *type);
+
+unsigned glsl_get_components(const struct glsl_type *type);
+
+unsigned glsl_get_matrix_columns(const struct glsl_type *type);
+
+unsigned glsl_get_length(const struct glsl_type *type);
+
+const char *glsl_get_struct_elem_name(const struct glsl_type *type,
+                                      unsigned index);
+
+
+bool glsl_type_is_void(const struct glsl_type *type);
+bool glsl_type_is_vector(const struct glsl_type *type);
+bool glsl_type_is_scalar(const struct glsl_type *type);
+bool glsl_type_is_matrix(const struct glsl_type *type);
+
+const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_vec4_type(void);
+const struct glsl_type *glsl_array_type(const struct glsl_type *base,
+                                        unsigned elements);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
new file mode 100644
index 000000000..a3fe9d620
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -0,0 +1,979 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <assert.h>
+
+/*
+ * This file checks for invalid IR indicating a bug somewhere in the compiler.
+ */
+
+/* Since this file is just a pile of asserts, don't bother compiling it if
+ * we're not building a debug build.
+ */
+#ifdef DEBUG
+
+/*
+ * Per-register validation state.
+ */
+
+typedef struct {
+   /*
+    * equivalent to the uses and defs in nir_register, but built up by the
+    * validator. At the end, we verify that the sets have the same entries.
+    */
+   struct set *uses, *if_uses, *defs;
+   nir_function_impl *where_defined; /* NULL for global registers */
+} reg_validate_state;
+
+typedef struct {
+   /*
+    * equivalent to the uses in nir_ssa_def, but built up by the validator.
+    * At the end, we verify that the sets have the same entries.
+    */
+   struct set *uses, *if_uses;
+   nir_function_impl *where_defined;
+} ssa_def_validate_state;
+
+typedef struct {
+   /* map of register -> validation state (struct above) */
+   struct hash_table *regs;
+
+   /* the current shader being validated */
+   nir_shader *shader;
+
+   /* the current instruction being validated */
+   nir_instr *instr;
+
+   /* the current basic block being validated */
+   nir_block *block;
+
+   /* the current if statement being validated */
+   nir_if *if_stmt;
+
+   /* the parent of the current cf node being visited */
+   nir_cf_node *parent_node;
+
+   /* the current function implementation being validated */
+   nir_function_impl *impl;
+
+   /* map of SSA value -> function implementation where it is defined */
+   struct hash_table *ssa_defs;
+
+   /* bitset of ssa definitions we have found; used to check uniqueness */
+   BITSET_WORD *ssa_defs_found;
+
+   /* bitset of registers we have currently found; used to check uniqueness */
+   BITSET_WORD *regs_found;
+
+   /* map of local variable -> function implementation where it is defined */
+   struct hash_table *var_defs;
+} validate_state;
+
+static void validate_src(nir_src *src, validate_state *state);
+
+static void
+validate_reg_src(nir_reg_src *src, validate_state *state)
+{
+   assert(src->reg != NULL);
+
+   struct hash_entry *entry;
+   entry = _mesa_hash_table_search(state->regs, src->reg);
+   assert(entry);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+   if (state->instr) {
+      _mesa_set_add(reg_state->uses, state->instr);
+
+      assert(_mesa_set_search(src->reg->uses, state->instr));
+   } else {
+      assert(state->if_stmt);
+      _mesa_set_add(reg_state->if_uses, state->if_stmt);
+
+      assert(_mesa_set_search(src->reg->if_uses, state->if_stmt));
+   }
+
+   if (!src->reg->is_global) {
+      assert(reg_state->where_defined == state->impl &&
+             "using a register declared in a different function");
+   }
+
+   assert((src->reg->num_array_elems == 0 ||
+          src->base_offset < src->reg->num_array_elems) &&
+          "definitely out-of-bounds array access");
+
+   if (src->indirect) {
+      assert(src->reg->num_array_elems != 0);
+      assert((src->indirect->is_ssa || src->indirect->reg.indirect == NULL) &&
+             "only one level of indirection allowed");
+      validate_src(src->indirect, state);
+   }
+}
+
+static void
+validate_ssa_src(nir_ssa_def *def, validate_state *state)
+{
+   assert(def != NULL);
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+
+   assert(entry);
+
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+   assert(def_state->where_defined == state->impl &&
+          "using an SSA value defined in a different function");
+
+   if (state->instr) {
+      _mesa_set_add(def_state->uses, state->instr);
+
+      assert(_mesa_set_search(def->uses, state->instr));
+   } else {
+      assert(state->if_stmt);
+      _mesa_set_add(def_state->if_uses, state->if_stmt);
+
+      assert(_mesa_set_search(def->if_uses, state->if_stmt));
+   }
+
+   /* TODO validate that the use is dominated by the definition */
+}
+
+static void
+validate_src(nir_src *src, validate_state *state)
+{
+   if (src->is_ssa)
+      validate_ssa_src(src->ssa, state);
+   else
+      validate_reg_src(&src->reg, state);
+}
+
+static void
+validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
+{
+   nir_alu_src *src = &instr->src[index];
+
+   unsigned num_components;
+   if (src->src.is_ssa)
+      num_components = src->src.ssa->num_components;
+   else {
+      if (src->src.reg.reg->is_packed)
+         num_components = 4; /* can't check anything */
+      else
+         num_components = src->src.reg.reg->num_components;
+   }
+   for (unsigned i = 0; i < 4; i++) {
+      assert(src->swizzle[i] < 4);
+
+      if (nir_alu_instr_channel_used(instr, index, i))
+         assert(src->swizzle[i] < num_components);
+   }
+
+   validate_src(&src->src, state);
+}
+
+static void
+validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+{
+   assert(dest->reg != NULL);
+
+   struct set_entry *entry = _mesa_set_search(dest->reg->defs, state->instr);
+   assert(entry && "definition not in nir_register.defs");
+
+   struct hash_entry *entry2;
+   entry2 = _mesa_hash_table_search(state->regs, dest->reg);
+
+   assert(entry2);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
+   _mesa_set_add(reg_state->defs, state->instr);
+
+   if (!dest->reg->is_global) {
+      assert(reg_state->where_defined == state->impl &&
+             "writing to a register declared in a different function");
+   }
+
+   assert((dest->reg->num_array_elems == 0 ||
+          dest->base_offset < dest->reg->num_array_elems) &&
+          "definitely out-of-bounds array access");
+
+   if (dest->indirect) {
+      assert(dest->reg->num_array_elems != 0);
+      assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) &&
+             "only one level of indirection allowed");
+      validate_src(dest->indirect, state);
+   }
+}
+
+static void
+validate_ssa_def(nir_ssa_def *def, validate_state *state)
+{
+   assert(def->index < state->impl->ssa_alloc);
+   assert(!BITSET_TEST(state->ssa_defs_found, def->index));
+   BITSET_SET(state->ssa_defs_found, def->index);
+
+   assert(def->num_components <= 4);
+
+   ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+                                              ssa_def_validate_state);
+   def_state->where_defined = state->impl;
+   def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+   def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   _mesa_hash_table_insert(state->ssa_defs, def, def_state);
+}
+
+static void
+validate_dest(nir_dest *dest, validate_state *state)
+{
+   if (dest->is_ssa)
+      validate_ssa_def(&dest->ssa, state);
+   else
+      validate_reg_dest(&dest->reg, state);
+}
+
+static void
+validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+{
+   unsigned dest_size =
+      dest->dest.is_ssa ? dest->dest.ssa.num_components
+                        : dest->dest.reg.reg->num_components;
+   bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed;
+   /*
+    * validate that the instruction doesn't write to components not in the
+    * register/SSA value
+    */
+   assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1)));
+
+   /* validate that saturate is only ever used on instructions with
+    * destinations of type float
+    */
+   nir_alu_instr *alu = nir_instr_as_alu(state->instr);
+   assert(nir_op_infos[alu->op].output_type == nir_type_float ||
+          !dest->saturate);
+
+   validate_dest(&dest->dest, state);
+}
+
+static void
+validate_alu_instr(nir_alu_instr *instr, validate_state *state)
+{
+   assert(instr->op < nir_num_opcodes);
+
+   validate_alu_dest(&instr->dest, state);
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      validate_alu_src(instr, i, state);
+   }
+}
+
+static void
+validate_deref_chain(nir_deref *deref, validate_state *state)
+{
+   nir_deref *parent = NULL;
+   while (deref != NULL) {
+      switch (deref->deref_type) {
+      case nir_deref_type_array:
+         assert(deref->type == glsl_get_array_element(parent->type));
+         if (nir_deref_as_array(deref)->deref_array_type ==
+             nir_deref_array_type_indirect)
+            validate_src(&nir_deref_as_array(deref)->indirect, state);
+         break;
+
+      case nir_deref_type_struct:
+         assert(deref->type ==
+                glsl_get_struct_field(parent->type,
+                                      nir_deref_as_struct(deref)->index));
+         break;
+
+      case nir_deref_type_var:
+         break;
+
+      default:
+         assert(!"Invalid deref type");
+         break;
+      }
+
+      parent = deref;
+      deref = deref->child;
+   }
+}
+
+static void
+validate_var_use(nir_variable *var, validate_state *state)
+{
+   if (var->data.mode == nir_var_local) {
+      struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+
+      assert(entry);
+      assert((nir_function_impl *) entry->data == state->impl);
+   }
+}
+
+static void
+validate_deref_var(nir_deref_var *deref, validate_state *state)
+{
+   assert(deref != NULL);
+   assert(deref->deref.type == deref->var->type);
+
+   validate_var_use(deref->var, state);
+
+   validate_deref_chain(&deref->deref, state);
+}
+
+static void
+validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   for (unsigned i = 0; i < num_srcs; i++) {
+      unsigned components_read =
+         nir_intrinsic_infos[instr->intrinsic].src_components[i];
+      if (components_read == 0)
+         components_read = instr->num_components;
+
+      assert(components_read > 0);
+
+      if (instr->src[i].is_ssa) {
+         assert(components_read <= instr->src[i].ssa->num_components);
+      } else if (!instr->src[i].reg.reg->is_packed) {
+         assert(components_read <= instr->src[i].reg.reg->num_components);
+      }
+
+      validate_src(&instr->src[i], state);
+   }
+
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      unsigned components_written =
+         nir_intrinsic_infos[instr->intrinsic].dest_components;
+      if (components_written == 0)
+         components_written = instr->num_components;
+
+      assert(components_written > 0);
+
+      if (instr->dest.is_ssa) {
+         assert(components_written <= instr->dest.ssa.num_components);
+      } else if (!instr->dest.reg.reg->is_packed) {
+         assert(components_written <= instr->dest.reg.reg->num_components);
+      }
+
+      validate_dest(&instr->dest, state);
+   }
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      validate_deref_var(instr->variables[i], state);
+   }
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
+      break;
+   case nir_intrinsic_store_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+             instr->variables[0]->var->data.mode != nir_var_uniform);
+      break;
+   case nir_intrinsic_copy_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+             instr->variables[0]->var->data.mode != nir_var_uniform);
+      assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
+      break;
+   default:
+      break;
+   }
+}
+
+static void
+validate_tex_instr(nir_tex_instr *instr, validate_state *state)
+{
+   validate_dest(&instr->dest, state);
+
+   bool src_type_seen[nir_num_tex_src_types];
+   for (unsigned i = 0; i < nir_num_tex_src_types; i++)
+      src_type_seen[i] = false;
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      assert(!src_type_seen[instr->src[i].src_type]);
+      src_type_seen[instr->src[i].src_type] = true;
+      validate_src(&instr->src[i].src, state);
+   }
+
+   if (instr->sampler != NULL)
+      validate_deref_var(instr->sampler, state);
+}
+
+static void
+validate_call_instr(nir_call_instr *instr, validate_state *state)
+{
+   if (instr->return_deref == NULL)
+      assert(glsl_type_is_void(instr->callee->return_type));
+   else
+      assert(instr->return_deref->deref.type == instr->callee->return_type);
+
+   assert(instr->num_params == instr->callee->num_params);
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      assert(instr->callee->params[i].type == instr->params[i]->deref.type);
+      validate_deref_var(instr->params[i], state);
+   }
+
+   validate_deref_var(instr->return_deref, state);
+}
+
+static void
+validate_load_const_instr(nir_load_const_instr *instr, validate_state *state)
+{
+   validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state)
+{
+   validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_phi_instr(nir_phi_instr *instr, validate_state *state)
+{
+   /*
+    * don't validate the sources until we get to them from their predecessor
+    * basic blocks, to avoid validating an SSA use before its definition.
+    */
+
+   validate_dest(&instr->dest, state);
+
+   exec_list_validate(&instr->srcs);
+   assert(exec_list_length(&instr->srcs) ==
+          state->block->predecessors->entries);
+}
+
+static void
+validate_instr(nir_instr *instr, validate_state *state)
+{
+   assert(instr->block == state->block);
+
+   state->instr = instr;
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      validate_alu_instr(nir_instr_as_alu(instr), state);
+      break;
+
+   case nir_instr_type_call:
+      validate_call_instr(nir_instr_as_call(instr), state);
+      break;
+
+   case nir_instr_type_intrinsic:
+      validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+      break;
+
+   case nir_instr_type_tex:
+      validate_tex_instr(nir_instr_as_tex(instr), state);
+      break;
+
+   case nir_instr_type_load_const:
+      validate_load_const_instr(nir_instr_as_load_const(instr), state);
+      break;
+
+   case nir_instr_type_phi:
+      validate_phi_instr(nir_instr_as_phi(instr), state);
+      break;
+
+   case nir_instr_type_ssa_undef:
+      validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+      break;
+
+   case nir_instr_type_jump:
+      break;
+
+   default:
+      assert(!"Invalid ALU instruction type");
+      break;
+   }
+
+   state->instr = NULL;
+}
+
+static void
+validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state)
+{
+   state->instr = &instr->instr;
+
+   assert(instr->dest.is_ssa);
+
+   exec_list_validate(&instr->srcs);
+   nir_foreach_phi_src(instr, src) {
+      if (src->pred == pred) {
+         assert(src->src.is_ssa);
+         assert(src->src.ssa->num_components ==
+                instr->dest.ssa.num_components);
+
+         validate_src(&src->src, state);
+         state->instr = NULL;
+         return;
+      }
+   }
+
+   abort();
+}
+
+static void
+validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state)
+{
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      validate_phi_src(nir_instr_as_phi(instr), block, state);
+   }
+}
+
+static void validate_cf_node(nir_cf_node *node, validate_state *state);
+
+static void
+validate_block(nir_block *block, validate_state *state)
+{
+   assert(block->cf_node.parent == state->parent_node);
+
+   state->block = block;
+
+   exec_list_validate(&block->instr_list);
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         assert(instr == nir_block_first_instr(block) ||
+                nir_instr_prev(instr)->type == nir_instr_type_phi);
+      }
+
+      if (instr->type == nir_instr_type_jump) {
+         assert(instr == nir_block_last_instr(block));
+      }
+
+      validate_instr(instr, state);
+   }
+
+   assert(block->successors[0] != NULL);
+
+   for (unsigned i = 0; i < 2; i++) {
+      if (block->successors[i] != NULL) {
+         struct set_entry *entry =
+            _mesa_set_search(block->successors[i]->predecessors, block);
+         assert(entry);
+
+         validate_phi_srcs(block, block->successors[i], state);
+      }
+   }
+
+   if (!exec_list_is_empty(&block->instr_list) &&
+       nir_block_last_instr(block)->type == nir_instr_type_jump)
+      assert(block->successors[1] == NULL);
+}
+
+static void
+validate_if(nir_if *if_stmt, validate_state *state)
+{
+   state->if_stmt = if_stmt;
+
+   assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
+   nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+   assert(prev_node->type == nir_cf_node_block);
+
+   nir_block *prev_block = nir_cf_node_as_block(prev_node);
+   assert(&prev_block->successors[0]->cf_node ==
+          nir_if_first_then_node(if_stmt));
+   assert(&prev_block->successors[1]->cf_node ==
+          nir_if_first_else_node(if_stmt));
+
+   assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next));
+   nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
+   assert(next_node->type == nir_cf_node_block);
+
+   validate_src(&if_stmt->condition, state);
+
+   assert(!exec_list_is_empty(&if_stmt->then_list));
+   assert(!exec_list_is_empty(&if_stmt->else_list));
+
+   nir_cf_node *old_parent = state->parent_node;
+   state->parent_node = &if_stmt->cf_node;
+
+   exec_list_validate(&if_stmt->then_list);
+   foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) {
+      validate_cf_node(cf_node, state);
+   }
+
+   exec_list_validate(&if_stmt->else_list);
+   foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) {
+      validate_cf_node(cf_node, state);
+   }
+
+   state->parent_node = old_parent;
+   state->if_stmt = NULL;
+}
+
+static void
+validate_loop(nir_loop *loop, validate_state *state)
+{
+   assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev));
+   nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node);
+   assert(prev_node->type == nir_cf_node_block);
+
+   nir_block *prev_block = nir_cf_node_as_block(prev_node);
+   assert(&prev_block->successors[0]->cf_node == nir_loop_first_cf_node(loop));
+   assert(prev_block->successors[1] == NULL);
+
+   assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next));
+   nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node);
+   assert(next_node->type == nir_cf_node_block);
+
+   assert(!exec_list_is_empty(&loop->body));
+
+   nir_cf_node *old_parent = state->parent_node;
+   state->parent_node = &loop->cf_node;
+
+   exec_list_validate(&loop->body);
+   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+      validate_cf_node(cf_node, state);
+   }
+
+   state->parent_node = old_parent;
+}
+
+static void
+validate_cf_node(nir_cf_node *node, validate_state *state)
+{
+   assert(node->parent == state->parent_node);
+
+   switch (node->type) {
+   case nir_cf_node_block:
+      validate_block(nir_cf_node_as_block(node), state);
+      break;
+
+   case nir_cf_node_if:
+      validate_if(nir_cf_node_as_if(node), state);
+      break;
+
+   case nir_cf_node_loop:
+      validate_loop(nir_cf_node_as_loop(node), state);
+      break;
+
+   default:
+      assert(!"Invalid ALU instruction type");
+      break;
+   }
+}
+
+static void
+prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state)
+{
+   assert(reg->is_global == is_global);
+
+   if (is_global)
+      assert(reg->index < state->shader->reg_alloc);
+   else
+      assert(reg->index < state->impl->reg_alloc);
+   assert(!BITSET_TEST(state->regs_found, reg->index));
+   BITSET_SET(state->regs_found, reg->index);
+
+   reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
+   reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+   reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+
+   reg_state->where_defined = is_global ? NULL : state->impl;
+
+   _mesa_hash_table_insert(state->regs, reg, reg_state);
+}
+
+static void
+postvalidate_reg_decl(nir_register *reg, validate_state *state)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+   if (reg_state->uses->entries != reg->uses->entries) {
+      printf("extra entries in register uses:\n");
+      struct set_entry *entry;
+      set_foreach(reg->uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (reg_state->if_uses->entries != reg->if_uses->entries) {
+      printf("extra entries in register if_uses:\n");
+      struct set_entry *entry;
+      set_foreach(reg->if_uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->if_uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (reg_state->defs->entries != reg->defs->entries) {
+      printf("extra entries in register defs:\n");
+      struct set_entry *entry;
+      set_foreach(reg->defs, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->defs, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+}
+
+static void
+validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
+{
+   assert(is_global != (var->data.mode == nir_var_local));
+
+   /*
+    * TODO validate some things ir_validate.cpp does (requires more GLSL type
+    * support)
+    */
+
+   if (!is_global) {
+      _mesa_hash_table_insert(state->var_defs, var, state->impl);
+   }
+}
+
+static bool
+postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   validate_state *state = void_state;
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+   if (def_state->uses->entries != def->uses->entries) {
+      printf("extra entries in SSA def uses:\n");
+      struct set_entry *entry;
+      set_foreach(def->uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(def_state->uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (def_state->if_uses->entries != def->if_uses->entries) {
+      printf("extra entries in SSA def uses:\n");
+      struct set_entry *entry;
+      set_foreach(def->if_uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(def_state->if_uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   return true;
+}
+
+static bool
+postvalidate_ssa_defs_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+
+   return true;
+}
+
+static void
+validate_function_impl(nir_function_impl *impl, validate_state *state)
+{
+   assert(impl->overload->impl == impl);
+   assert(impl->cf_node.parent == NULL);
+
+   assert(impl->num_params == impl->overload->num_params);
+   for (unsigned i = 0; i < impl->num_params; i++)
+      assert(impl->params[i]->type == impl->overload->params[i].type);
+
+   if (glsl_type_is_void(impl->overload->return_type))
+      assert(impl->return_var == NULL);
+   else
+      assert(impl->return_var->type == impl->overload->return_type);
+
+   assert(exec_list_is_empty(&impl->end_block->instr_list));
+   assert(impl->end_block->successors[0] == NULL);
+   assert(impl->end_block->successors[1] == NULL);
+
+   state->impl = impl;
+   state->parent_node = &impl->cf_node;
+
+   exec_list_validate(&impl->locals);
+   foreach_list_typed(nir_variable, var, node, &impl->locals) {
+      validate_var_decl(var, false, state);
+   }
+
+   state->regs_found = realloc(state->regs_found,
+                               BITSET_WORDS(impl->reg_alloc) *
+                               sizeof(BITSET_WORD));
+   memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) *
+                                sizeof(BITSET_WORD));
+   exec_list_validate(&impl->registers);
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      prevalidate_reg_decl(reg, false, state);
+   }
+
+   state->ssa_defs_found = realloc(state->ssa_defs_found,
+                                   BITSET_WORDS(impl->ssa_alloc) *
+                                   sizeof(BITSET_WORD));
+   memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) *
+                                    sizeof(BITSET_WORD));
+   exec_list_validate(&impl->body);
+   foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+      validate_cf_node(node, state);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      postvalidate_reg_decl(reg, state);
+   }
+
+   nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
+}
+
+static void
+validate_function_overload(nir_function_overload *overload,
+                           validate_state *state)
+{
+   if (overload->impl != NULL)
+      validate_function_impl(overload->impl, state);
+}
+
+static void
+validate_function(nir_function *func, validate_state *state)
+{
+   exec_list_validate(&func->overload_list);
+   foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
+      assert(overload->function == func);
+      validate_function_overload(overload, state);
+   }
+}
+
+static void
+init_validate_state(validate_state *state)
+{
+   state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   state->ssa_defs_found = NULL;
+   state->regs_found = NULL;
+   state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+}
+
+static void
+destroy_validate_state(validate_state *state)
+{
+   _mesa_hash_table_destroy(state->regs, NULL);
+   _mesa_hash_table_destroy(state->ssa_defs, NULL);
+   free(state->ssa_defs_found);
+   free(state->regs_found);
+   _mesa_hash_table_destroy(state->var_defs, NULL);
+}
+
+void
+nir_validate_shader(nir_shader *shader)
+{
+   validate_state state;
+   init_validate_state(&state);
+
+   state.shader = shader;
+
+   struct hash_entry *entry;
+   hash_table_foreach(shader->uniforms, entry) {
+      validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   hash_table_foreach(shader->inputs, entry) {
+     validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   hash_table_foreach(shader->outputs, entry) {
+      validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   exec_list_validate(&shader->globals);
+   foreach_list_typed(nir_variable, var, node, &shader->globals) {
+     validate_var_decl(var, true, &state);
+   }
+
+   exec_list_validate(&shader->system_values);
+   foreach_list_typed(nir_variable, var, node, &shader->system_values) {
+     validate_var_decl(var, true, &state);
+   }
+
+   state.regs_found = realloc(state.regs_found,
+                              BITSET_WORDS(shader->reg_alloc) *
+                              sizeof(BITSET_WORD));
+   memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) *
+                               sizeof(BITSET_WORD));
+   exec_list_validate(&shader->registers);
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      prevalidate_reg_decl(reg, true, &state);
+   }
+
+   exec_list_validate(&shader->functions);
+   foreach_list_typed(nir_function, func, node, &shader->functions) {
+      validate_function(func, &state);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      postvalidate_reg_decl(reg, &state);
+   }
+
+   destroy_validate_state(&state);
+}
+
+#endif /* NDEBUG */
diff --git a/mesalib/src/glsl/nir/nir_worklist.c b/mesalib/src/glsl/nir/nir_worklist.c
new file mode 100644
index 000000000..a8baae937
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_worklist.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_worklist.h"
+
+void
+nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+                        void *mem_ctx)
+{
+   w->size = num_blocks;
+   w->count = 0;
+   w->start = 0;
+
+   w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD,
+                                     BITSET_WORDS(num_blocks));
+   w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks);
+}
+
+void
+nir_block_worklist_fini(nir_block_worklist *w)
+{
+   ralloc_free(w->blocks_present);
+   ralloc_free(w->blocks);
+}
+
+static bool
+worklist_add_block(nir_block *block, void *w)
+{
+   nir_block_worklist_push_tail(w, block);
+
+   return true;
+}
+
+void
+nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl)
+{
+   nir_foreach_block(impl, worklist_add_block, w);
+}
+
+void
+nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block)
+{
+   /* Pushing a block we already have is a no-op */
+   if (BITSET_TEST(w->blocks_present, block->index))
+      return;
+
+   assert(w->count < w->size);
+
+   if (w->start == 0)
+      w->start = w->size - 1;
+   else
+      w->start--;
+
+   w->count++;
+
+   w->blocks[w->start] = block;
+   BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_head(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   return w->blocks[w->start];
+}
+
+nir_block *
+nir_block_worklist_pop_head(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned head = w->start;
+
+   w->start = (w->start + 1) % w->size;
+   w->count--;
+
+   BITSET_CLEAR(w->blocks_present, w->blocks[head]->index);
+   return w->blocks[head];
+}
+
+void
+nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block)
+{
+   /* Pushing a block we already have is a no-op */
+   if (BITSET_TEST(w->blocks_present, block->index))
+      return;
+
+   assert(w->count < w->size);
+
+   w->count++;
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   w->blocks[tail] = block;
+   BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_tail(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   return w->blocks[tail];
+}
+
+nir_block *
+nir_block_worklist_pop_tail(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   w->count--;
+
+   BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index);
+   return w->blocks[tail];
+}
diff --git a/mesalib/src/glsl/nir/nir_worklist.h b/mesalib/src/glsl/nir/nir_worklist.h
new file mode 100644
index 000000000..d5a8568e4
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_worklist.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#pragma once
+
+#ifndef _NIR_WORKLIST_
+#define _NIR_WORKLIST_
+
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Represents a double-ended queue of unique blocks
+ *
+ * The worklist datastructure guarantees that eacy block is in the queue at
+ * most once.  Pushing a block onto either end of the queue is a no-op if
+ * the block is already in the queue.  In order for this to work, the
+ * caller must ensure that the blocks are properly indexed.
+ */
+typedef struct {
+   /* The total size of the worklist */
+   unsigned size;
+
+   /* The number of blocks currently in the worklist */
+   unsigned count;
+
+   /* The offset in the array of blocks at which the list starts */
+   unsigned start;
+
+   /* A bitset of all of the blocks currently present in the worklist */
+   BITSET_WORD *blocks_present;
+
+   /* The actual worklist */
+   nir_block **blocks;
+} nir_block_worklist;
+
+void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+                             void *mem_ctx);
+void nir_block_worklist_fini(nir_block_worklist *w);
+
+void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl);
+
+static inline bool
+nir_block_worklist_is_empty(const nir_block_worklist *w)
+{
+   return w->count == 0;
+}
+
+void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_head(nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_head(nir_block_worklist *w);
+
+void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_tail(nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _NIR_WORKLIST_ */
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index 430f5cb97..6784242ff 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -119,6 +119,8 @@ is_valid_vec_const(ir_constant *ir)
 static inline bool
 is_less_than_one(ir_constant *ir)
 {
+   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
    if (!is_valid_vec_const(ir))
       return false;
 
@@ -134,6 +136,8 @@ is_less_than_one(ir_constant *ir)
 static inline bool
 is_greater_than_zero(ir_constant *ir)
 {
+   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
    if (!is_valid_vec_const(ir))
       return false;
 
@@ -376,6 +380,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       }
       break;
 
+   case ir_unop_f2i:
+   case ir_unop_f2u:
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
+         return new(mem_ctx) ir_expression(ir->operation,
+                                           ir->type,
+                                           op_expr[0]->operands[0]);
+      }
+      break;
+
    case ir_unop_logic_not: {
       enum ir_expression_operation new_op = ir_unop_logic_not;
 
@@ -514,10 +527,45 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (op_const[1] && !op_const[0])
 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
 
+      /* Optimizes
+       *
+       *    (mul (floor (add (abs x) 0.5) (sign x)))
+       *
+       * into
+       *
+       *    (trunc (add x (mul (sign x) 0.5)))
+       */
+      for (int i = 0; i < 2; i++) {
+         ir_expression *sign_expr = ir->operands[i]->as_expression();
+         ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
+
+         if (!sign_expr || sign_expr->operation != ir_unop_sign ||
+             !floor_expr || floor_expr->operation != ir_unop_floor)
+            continue;
+
+         ir_expression *add_expr = floor_expr->operands[0]->as_expression();
+
+         for (int j = 0; j < 2; j++) {
+            ir_expression *abs_expr = add_expr->operands[j]->as_expression();
+            if (!abs_expr || abs_expr->operation != ir_unop_abs)
+               continue;
+
+            ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
+            if (!point_five->is_value(0.5, 0))
+               continue;
+
+            if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
+               return trunc(add(abs_expr->operands[0],
+                                mul(sign_expr, point_five)));
+            }
+         }
+      }
       break;
 
    case ir_binop_div:
-      if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
+      if (is_vec_one(op_const[0]) && (
+                ir->type->base_type == GLSL_TYPE_FLOAT ||
+                ir->type->base_type == GLSL_TYPE_DOUBLE)) {
 	 return new(mem_ctx) ir_expression(ir_unop_rcp,
 					   ir->operands[1]->type,
 					   ir->operands[1],
@@ -538,7 +586,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
          unsigned components[4] = { 0 }, count = 0;
 
          for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
-            if (op_const[i]->value.f[c] == 0.0)
+            if (op_const[i]->is_zero())
                continue;
 
             components[count] = c;
@@ -554,7 +602,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
          /* Swizzle both operands to remove the channels that were zero. */
          return new(mem_ctx)
-            ir_expression(op, glsl_type::float_type,
+            ir_expression(op, ir->type,
                           new(mem_ctx) ir_swizzle(ir->operands[0],
                                                   components, count),
                           new(mem_ctx) ir_swizzle(ir->operands[1],
@@ -584,6 +632,16 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       }
       break;
 
+   case ir_binop_all_equal:
+   case ir_binop_any_nequal:
+      if (ir->operands[0]->type->is_scalar() &&
+          ir->operands[1]->type->is_scalar())
+         return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
+                                           ? ir_binop_equal : ir_binop_nequal,
+                                           ir->operands[0],
+                                           ir->operands[1]);
+      break;
+
    case ir_binop_rshift:
    case ir_binop_lshift:
       /* 0 >> x == 0 */
@@ -679,7 +737,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
    case ir_binop_min:
    case ir_binop_max:
-      if (ir->type->base_type != GLSL_TYPE_FLOAT)
+      if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat)
          break;
 
       /* Replace min(max) operations and its commutative combinations with
@@ -737,6 +795,12 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
 	 return op_expr[0]->operands[0];
 
+      if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
+                         op_expr[0]->operation == ir_unop_exp)) {
+         return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
+                                           neg(op_expr[0]->operands[0]));
+      }
+
       /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
        * its IR level, so we can always apply this transformation.
        */
@@ -775,7 +839,19 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
          return mul(ir->operands[1], ir->operands[2]);
       } else if (is_vec_zero(op_const[1])) {
          unsigned op2_components = ir->operands[2]->type->vector_elements;
-         ir_constant *one = new(mem_ctx) ir_constant(1.0f, op2_components);
+         ir_constant *one;
+
+         switch (ir->type->base_type) {
+         case GLSL_TYPE_FLOAT:
+            one = new(mem_ctx) ir_constant(1.0f, op2_components);
+            break;
+         case GLSL_TYPE_DOUBLE:
+            one = new(mem_ctx) ir_constant(1.0, op2_components);
+            break;
+         default:
+            unreachable("unexpected type");
+         }
+
          return mul(ir->operands[0], add(one, neg(ir->operands[2])));
       }
       break;
diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp
index c334e1276..90cc0c89b 100644
--- a/mesalib/src/glsl/opt_constant_propagation.cpp
+++ b/mesalib/src/glsl/opt_constant_propagation.cpp
@@ -194,6 +194,9 @@ ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
       case GLSL_TYPE_FLOAT:
 	 data.f[i] = found->constant->value.f[rhs_channel];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 data.d[i] = found->constant->value.d[rhs_channel];
+	 break;
       case GLSL_TYPE_INT:
 	 data.i[i] = found->constant->value.i[rhs_channel];
 	 break;
diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp
index 5c65af66b..806027b28 100644
--- a/mesalib/src/glsl/opt_copy_propagation.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation.cpp
@@ -128,6 +128,9 @@ ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir)
 
    visit_list_elements(this, &ir->body);
 
+   ralloc_free(this->acp);
+   ralloc_free(this->kills);
+
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = orig_killed_all;
@@ -215,7 +218,7 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 
    /* Populate the initial acp with a copy of the original */
    foreach_in_list(acp_entry, a, orig_acp) {
-      this->acp->push_tail(new(this->mem_ctx) acp_entry(a->lhs, a->rhs));
+      this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs));
    }
 
    visit_list_elements(this, instructions);
@@ -226,12 +229,15 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
    foreach_in_list(kill_entry, k, new_kills) {
       kill(k->var);
    }
+
+   ralloc_free(new_kills);
 }
 
 ir_visitor_status
@@ -269,6 +275,7 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -276,6 +283,8 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
       kill(k->var);
    }
 
+   ralloc_free(new_kills);
+
    /* already descended into the children. */
    return visit_continue_with_parent;
 }
@@ -294,7 +303,7 @@ ir_copy_propagation_visitor::kill(ir_variable *var)
 
    /* Add the LHS variable to the list of killed variables in this block.
     */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var));
+   this->kills->push_tail(new(this->kills) kill_entry(var));
 }
 
 /**
@@ -322,7 +331,7 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
 	 ir->condition = new(ralloc_parent(ir)) ir_constant(false);
 	 this->progress = true;
       } else {
-	 entry = new(this->mem_ctx) acp_entry(lhs_var, rhs_var);
+	 entry = new(this->acp) acp_entry(lhs_var, rhs_var);
 	 this->acp->push_tail(entry);
       }
    }
diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
index c3e55bcd1..353a5c668 100644
--- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
@@ -156,6 +156,9 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
 
    visit_list_elements(this, &ir->body);
 
+   ralloc_free(this->acp);
+   ralloc_free(this->kills);
+
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = orig_killed_all;
@@ -173,9 +176,9 @@ ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
       kill_entry *k;
 
       if (lhs)
-	 k = new(mem_ctx) kill_entry(var, ir->write_mask);
+	 k = new(this->kills) kill_entry(var, ir->write_mask);
       else
-	 k = new(mem_ctx) kill_entry(var, ~0);
+	 k = new(this->kills) kill_entry(var, ~0);
 
       kill(k);
    }
@@ -334,7 +337,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
 
    /* Populate the initial acp with a copy of the original */
    foreach_in_list(acp_entry, a, orig_acp) {
-      this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
+      this->acp->push_tail(new(this->acp) acp_entry(a));
    }
 
    visit_list_elements(this, instructions);
@@ -345,6 +348,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -354,6 +358,8 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
    foreach_in_list_safe(kill_entry, k, new_kills) {
       kill(k);
    }
+
+   ralloc_free(new_kills);
 }
 
 ir_visitor_status
@@ -391,6 +397,7 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -398,6 +405,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
       kill(k);
    }
 
+   ralloc_free(new_kills);
+
    /* already descended into the children. */
    return visit_continue_with_parent;
 }
@@ -423,6 +432,7 @@ ir_copy_propagation_elements_visitor::kill(kill_entry *k)
    if (k->next)
       k->remove();
 
+   ralloc_steal(this->kills, k);
    this->kills->push_tail(k);
 }
 
diff --git a/mesalib/src/glsl/opt_dead_builtin_variables.cpp b/mesalib/src/glsl/opt_dead_builtin_variables.cpp
index 85c75d6f2..0d4e3a8f0 100644
--- a/mesalib/src/glsl/opt_dead_builtin_variables.cpp
+++ b/mesalib/src/glsl/opt_dead_builtin_variables.cpp
@@ -52,7 +52,7 @@ optimize_dead_builtin_variables(exec_list *instructions,
           && var->data.how_declared != ir_var_declared_implicitly)
          continue;
 
-      if (strncmp(var->name, "gl_", 3) != 0)
+      if (!is_gl_identifier(var->name))
          continue;
 
       /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they
diff --git a/mesalib/src/glsl/opt_minmax.cpp b/mesalib/src/glsl/opt_minmax.cpp
index 32fb2d7ea..23d0b109d 100644
--- a/mesalib/src/glsl/opt_minmax.cpp
+++ b/mesalib/src/glsl/opt_minmax.cpp
@@ -133,6 +133,14 @@ compare_components(ir_constant *a, ir_constant *b)
          else
             foundequal = true;
          break;
+      case GLSL_TYPE_DOUBLE:
+         if (a->value.d[c0] < b->value.d[c1])
+            foundless = true;
+         else if (a->value.d[c0] > b->value.d[c1])
+            foundgreater = true;
+         else
+            foundequal = true;
+         break;
       default:
          unreachable("not reached");
       }
@@ -178,6 +186,11 @@ combine_constant(bool ismin, ir_constant *a, ir_constant *b)
              (!ismin && b->value.f[i] > c->value.f[i]))
             c->value.f[i] = b->value.f[i];
          break;
+      case GLSL_TYPE_DOUBLE:
+         if ((ismin && b->value.d[i] < c->value.d[i]) ||
+             (!ismin && b->value.d[i] > c->value.d[i]))
+            c->value.d[i] = b->value.d[i];
+         break;
       default:
          assert(!"not reached");
       }
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 67b0d0c82..ad0d75bf8 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -127,6 +127,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
    ctx->Extensions.ARB_fragment_coord_conventions = true;
    ctx->Extensions.ARB_fragment_layer_viewport = true;
    ctx->Extensions.ARB_gpu_shader5 = true;
+   ctx->Extensions.ARB_gpu_shader_fp64 = true;
    ctx->Extensions.ARB_sample_shading = true;
    ctx->Extensions.ARB_shader_bit_encoding = true;
    ctx->Extensions.ARB_shader_stencil_export = true;
author	marha <marha@users.sourceforge.net>	2015-02-22 21:39:56 +0100
committer	marha <marha@users.sourceforge.net>	2015-02-22 21:39:56 +0100
commit	462f18c7b25fe3e467f837647d07ab0a78aa8d2b (patch)
tree	fc8013c0a1bac05a1945846c1697e973f4c35013 /mesalib/src/glsl
parent	36f711ee12b6dd5184198abed3aa551efb585587 (diff)
download	vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.tar.gz vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.tar.bz2 vcxsrv-462f18c7b25fe3e467f837647d07ab0a78aa8d2b.zip