xwininfo fontconfig libX11 libXdmcp libfontenc libxcb libxcb/xcb-proto mesalib xserver xkeyboard-config mkfontscale git update 22 Feb 2015

xserver commit 3a06faf3fcdb7451125a46181f9152e8e59e9770 libxcb commit e3ec1f74637237ce500dfd0ca59f2e422da4e019 libxcb/xcb-proto commit 4c550465934164aab2449a125f75f4ca07816233 xkeyboard-config commit 26f344c93f8c6141e9233eb68088ba4fd56bc9ef libX11 commit c8e19b393defd53f046ddc2da3a16881221b3c34 libXdmcp commit 9f4cac7656b221ce2a8f97e7bd31e5e23126d001 libfontenc commit de1843aaf76015c9d99416f3122d169fe331b849 mkfontscale commit 87d628f8eec170ec13bb9feefb1ce05aed07d1d6 xwininfo commit 0c49f8f2bd56b1e77721e81030ea948386dcdf4e fontconfig commit d6d5adeb7940c0d0beb86489c2a1c2ce59e5c044 mesa commit 4359954d842caa2a9f8d4b50d70ecc789884b68b
author: marha <marha@users.sourceforge.net> 2015-02-22 14:31:16 +0100
committer: marha <marha@users.sourceforge.net> 2015-02-22 14:31:16 +0100
commit: f1c2db43dcf35d2cf4715390bd2391c28e42a8c2 (patch)
tree: 46b537271afe0f6534231b1bd4cc4f91ae1fb446 /mesalib/src
parent: 5e5a48ff8cd08f123601cd0625ca62a86675aac9 (diff)
download: vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.tar.gz
vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.tar.bz2
vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.zip
318 files changed, 34675 insertions, 19250 deletions
diff --git a/mesalib/src/Makefile.am b/mesalib/src/Makefile.am
index 85b75a93b..8edf33373 100644
--- a/mesalib/src/Makefile.am
+++ b/mesalib/src/Makefile.am
@@ -19,7 +19,9 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
-SUBDIRS = gtest util mapi/glapi/gen mapi
+AUTOMAKE_OPTIONS = subdir-objects
+
+SUBDIRS = . gtest util mapi/glapi/gen mapi
 
 if NEED_OPENGL_COMMON
 SUBDIRS += glsl mesa
@@ -52,3 +54,16 @@ SUBDIRS += gallium
 endif
 
 EXTRA_DIST = egl/docs getopt hgl SConscript
+
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/include/ \
+	-I$(top_srcdir)/src/mapi/ \
+	-I$(top_srcdir)/src/mesa/ \
+	$(DEFINES)
+
+noinst_LTLIBRARIES = libglsl_util.la
+
+libglsl_util_la_SOURCES = \
+	mesa/main/imports.c \
+	mesa/program/prog_hash_table.c \
+	mesa/program/symbol_table.c
diff --git a/mesalib/src/gallium/auxiliary/Makefile.am b/mesalib/src/gallium/auxiliary/Makefile.am
index 1053ce4ee..4b6205797 100644
--- a/mesalib/src/gallium/auxiliary/Makefile.am
+++ b/mesalib/src/gallium/auxiliary/Makefile.am
@@ -53,24 +53,40 @@ libgalliumvl_stub_la_SOURCES = \
 
 if NEED_GALLIUM_VL
 
-noinst_LTLIBRARIES += libgalliumvl.la
-
-libgalliumvl_la_CFLAGS = \
+COMMON_VL_CFLAGS = \
 	$(AM_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(DRI2PROTO_CFLAGS) \
 	$(LIBDRM_CFLAGS) \
 	$(GALLIUM_PIPE_LOADER_DEFINES) \
 	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
 
 if HAVE_GALLIUM_STATIC_TARGETS
-libgalliumvl_la_CFLAGS += \
+COMMON_VL_CFLAGS += \
 	-DGALLIUM_STATIC_TARGETS=1
 
 endif # HAVE_GALLIUM_STATIC_TARGETS
 
+noinst_LTLIBRARIES += libgalliumvl.la
+
+libgalliumvl_la_CFLAGS = \
+	$(COMMON_VL_CFLAGS)
+
 libgalliumvl_la_SOURCES = \
 	$(VL_SOURCES)
 
+if NEED_GALLIUM_VL_WINSYS
+
+noinst_LTLIBRARIES += libgalliumvlwinsys.la
+
+libgalliumvlwinsys_la_CFLAGS = \
+	$(COMMON_VL_CFLAGS)
+
+libgalliumvlwinsys_la_SOURCES = \
+	$(VL_WINSYS_SOURCES)
+
+endif
+
 endif
 
 EXTRA_DIST = \
diff --git a/mesalib/src/gallium/auxiliary/Makefile.sources b/mesalib/src/gallium/auxiliary/Makefile.sources
index 3460482c1..b7174d6e7 100644
--- a/mesalib/src/gallium/auxiliary/Makefile.sources
+++ b/mesalib/src/gallium/auxiliary/Makefile.sources
@@ -208,12 +208,12 @@ C_SOURCES := \
 	util/u_dump_state.c \
 	util/u_dynarray.h \
 	util/u_fifo.h \
-	util/u_format_bptc.c \
-	util/u_format_bptc.h \
 	util/u_format.c \
+	util/u_format.h \
 	util/u_format_etc.c \
 	util/u_format_etc.h \
-	util/u_format.h \
+	util/u_format_fake.c \
+	util/u_format_fake.h \
 	util/u_format_latc.c \
 	util/u_format_latc.h \
 	util/u_format_other.c \
@@ -273,7 +273,6 @@ C_SOURCES := \
 	util/u_ringbuffer.h \
 	util/u_sampler.c \
 	util/u_sampler.h \
-	util/u_simple_list.h \
 	util/u_simple_shaders.c \
 	util/u_simple_shaders.h \
 	util/u_slab.c \
@@ -335,10 +334,13 @@ VL_SOURCES := \
 	vl/vl_video_buffer.h \
 	vl/vl_vlc.h \
 	vl/vl_winsys.h \
-	vl/vl_winsys_dri.c \
 	vl/vl_zscan.c \
 	vl/vl_zscan.h
 
+# XXX: Nuke this as our dri targets no longer depend on VL.
+VL_WINSYS_SOURCES := \
+	vl/vl_winsys_dri.c
+
 VL_STUB_SOURCES := \
 	vl/vl_stubs.c
 
diff --git a/mesalib/src/gallium/auxiliary/hud/font.c b/mesalib/src/gallium/auxiliary/hud/font.c
index 03e35d945..60e8ae514 100644
--- a/mesalib/src/gallium/auxiliary/hud/font.c
+++ b/mesalib/src/gallium/auxiliary/hud/font.c
@@ -57,6 +57,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "util/u_inlines.h"
+#include "util/u_memory.h"
 
 typedef unsigned char	GLubyte;	/* 1-byte unsigned */
 typedef struct tagSFG_Font SFG_Font;
@@ -373,24 +374,29 @@ static boolean
 util_font_create_fixed_8x13(struct pipe_context *pipe,
                             struct util_font *out_font)
 {
+   static const enum pipe_format formats[] = {
+      PIPE_FORMAT_I8_UNORM,
+      PIPE_FORMAT_L8_UNORM,
+      PIPE_FORMAT_R8_UNORM
+   };
    struct pipe_screen *screen = pipe->screen;
    struct pipe_resource tex_templ, *tex;
    struct pipe_transfer *transfer = NULL;
    char *map;
-   enum pipe_format tex_format;
+   enum pipe_format tex_format = PIPE_FORMAT_NONE;
    int i;
 
-   if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM,
+   for (i = 0; i < Elements(formats); i++) {
+      if (screen->is_format_supported(screen, formats[i],
                                    PIPE_TEXTURE_RECT, 0,
                                    PIPE_BIND_SAMPLER_VIEW)) {
-      tex_format = PIPE_FORMAT_I8_UNORM;
-   }
-   else if (screen->is_format_supported(screen, PIPE_FORMAT_L8_UNORM,
-                                   PIPE_TEXTURE_RECT, 0,
-                                   PIPE_BIND_SAMPLER_VIEW)) {
-      tex_format = PIPE_FORMAT_L8_UNORM;
+         tex_format = formats[i];
+         break;
+      }
    }
-   else {
+
+   if (tex_format == PIPE_FORMAT_NONE) {
+      debug_printf("Unable to find texture format for font.\n");
       return FALSE;
    }
 
diff --git a/mesalib/src/gallium/auxiliary/hud/hud_context.c b/mesalib/src/gallium/auxiliary/hud/hud_context.c
index 98678fc9f..e46c68cdd 100644
--- a/mesalib/src/gallium/auxiliary/hud/hud_context.c
+++ b/mesalib/src/gallium/auxiliary/hud/hud_context.c
@@ -915,6 +915,7 @@ print_help(struct pipe_screen *screen)
    }
 
    puts("");
+   fflush(stdout);
 }
 
 struct hud_context *
diff --git a/mesalib/src/gallium/auxiliary/util/u_cache.c b/mesalib/src/gallium/auxiliary/util/u_cache.c
index 26aab2bf1..9395c66f2 100644
--- a/mesalib/src/gallium/auxiliary/util/u_cache.c
+++ b/mesalib/src/gallium/auxiliary/util/u_cache.c
@@ -42,7 +42,7 @@
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_cache.h"
-#include "util/u_simple_list.h"
+#include "util/simple_list.h"
 
 
 struct util_cache_entry
diff --git a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c
index 5d9db59d1..23ab46c54 100644
--- a/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/mesalib/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -272,7 +272,7 @@ static INLINE uint64_t xgetbv(void)
 
 
 #if defined(PIPE_ARCH_X86)
-static INLINE boolean sse2_has_daz(void)
+PIPE_ALIGN_STACK static INLINE boolean sse2_has_daz(void)
 {
    struct {
       uint32_t pad1[7];
@@ -409,8 +409,12 @@ util_cpu_detect(void)
       }
 
       if (regs[0] >= 0x80000006) {
+         /* should we really do this if the clflush size above worked? */
+         unsigned int cacheline;
          cpuid(0x80000006, regs2);
-         util_cpu_caps.cacheline = regs2[2] & 0xFF;
+         cacheline = regs2[2] & 0xFF;
+         if (cacheline > 0)
+            util_cpu_caps.cacheline = cacheline;
       }
 
       if (!util_cpu_caps.has_sse) {
diff --git a/mesalib/src/gallium/auxiliary/util/u_format.csv b/mesalib/src/gallium/auxiliary/util/u_format.csv
index a71aaf15d..d3b77e6b9 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format.csv
+++ b/mesalib/src/gallium/auxiliary/util/u_format.csv
@@ -186,6 +186,17 @@ PIPE_FORMAT_LATC2_SNORM           , rgtc, 4, 4, x128,     ,     ,     , xxxy, rg
 
 PIPE_FORMAT_ETC1_RGB8             ,  etc, 4, 4, x64,      ,     ,     , xyz1, rgb
 
+PIPE_FORMAT_ETC2_RGB8             ,  etc, 4, 4, x64,      ,     ,     , xyz1, rgb
+PIPE_FORMAT_ETC2_SRGB8            ,  etc, 4, 4, x64,      ,     ,     , xyz1, srgb
+PIPE_FORMAT_ETC2_RGB8A1           ,  etc, 4, 4, x64,      ,     ,     , xyzw, rgb
+PIPE_FORMAT_ETC2_SRGB8A1          ,  etc, 4, 4, x64,      ,     ,     , xyzw, srgb
+PIPE_FORMAT_ETC2_RGBA8            ,  etc, 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_ETC2_SRGBA8           ,  etc, 4, 4, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_ETC2_R11_UNORM        ,  etc, 4, 4, x64,      ,     ,     , x001, rgb
+PIPE_FORMAT_ETC2_R11_SNORM        ,  etc, 4, 4, x64,      ,     ,     , x001, rgb
+PIPE_FORMAT_ETC2_RG11_UNORM       ,  etc, 4, 4, x128,     ,     ,     , xy01, rgb
+PIPE_FORMAT_ETC2_RG11_SNORM       ,  etc, 4, 4, x128,     ,     ,     , xy01, rgb
+
 PIPE_FORMAT_BPTC_RGBA_UNORM       , bptc, 4, 4, x128,     ,     ,     , xyzw, rgb
 PIPE_FORMAT_BPTC_SRGBA            , bptc, 4, 4, x128,     ,     ,     , xyzw, srgb
 PIPE_FORMAT_BPTC_RGB_FLOAT        , bptc, 4, 4, x128,     ,     ,     , xyz1, rgb
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_bptc.h b/mesalib/src/gallium/auxiliary/util/u_format_bptc.h
deleted file mode 100644
index f67d071ee..000000000
--- a/mesalib/src/gallium/auxiliary/util/u_format_bptc.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Red Hat Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- **************************************************************************/
-
-#ifndef U_FORMAT_BPTC_H_
-#define U_FORMAT_BPTC_H_
-
-void
-util_format_bptc_rgba_unorm_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
-
-void
-util_format_bptc_rgba_unorm_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgba_unorm_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgba_unorm_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgba_unorm_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgba_unorm_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-
-
-void
-util_format_bptc_srgba_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
-
-void
-util_format_bptc_srgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_srgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_srgba_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_srgba_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_srgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-
-
-void
-util_format_bptc_rgb_float_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
-
-void
-util_format_bptc_rgb_float_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_float_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_float_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_float_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_float_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-
-void
-util_format_bptc_rgb_ufloat_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j);
-
-void
-util_format_bptc_rgb_ufloat_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_ufloat_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_ufloat_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_ufloat_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height);
-
-void
-util_format_bptc_rgb_ufloat_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
-
-
-#endif
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_bptc.c b/mesalib/src/gallium/auxiliary/util/u_format_fake.c
index 196220ee0..77e896d27 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format_bptc.c
+++ b/mesalib/src/gallium/auxiliary/util/u_format_fake.c
@@ -1,5 +1,5 @@
 #include "u_format.h"
-#include "u_format_bptc.h"
+#include "u_format_fake.h"
 
 #define fake(format) \
 void \
@@ -24,3 +24,14 @@ fake(bptc_rgba_unorm)
 fake(bptc_srgba)
 fake(bptc_rgb_float)
 fake(bptc_rgb_ufloat)
+
+fake(etc2_rgb8)
+fake(etc2_srgb8)
+fake(etc2_rgb8a1)
+fake(etc2_srgb8a1)
+fake(etc2_rgba8)
+fake(etc2_srgba8)
+fake(etc2_r11_unorm)
+fake(etc2_r11_snorm)
+fake(etc2_rg11_unorm)
+fake(etc2_rg11_snorm)
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_fake.h b/mesalib/src/gallium/auxiliary/util/u_format_fake.h
new file mode 100644
index 000000000..e6bfd4e15
--- /dev/null
+++ b/mesalib/src/gallium/auxiliary/util/u_format_fake.h
@@ -0,0 +1,66 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+#ifndef U_FORMAT_FAKE_H_
+#define U_FORMAT_FAKE_H_
+
+#define __format_fake(format) \
+void \
+util_format_##format##_fetch_rgba_8unorm(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j); \
+\
+void \
+util_format_##format##_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
+\
+void \
+util_format_##format##_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
+\
+void \
+util_format_##format##_unpack_rgba_float(float *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); \
+\
+void \
+util_format_##format##_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride, const float *src_row, unsigned src_stride, unsigned width, unsigned height); \
+\
+void \
+util_format_##format##_fetch_rgba_float(float *dst, const uint8_t *src, unsigned i, unsigned j);
+
+__format_fake(bptc_rgba_unorm)
+__format_fake(bptc_srgba)
+__format_fake(bptc_rgb_float)
+__format_fake(bptc_rgb_ufloat)
+
+__format_fake(etc2_rgb8)
+__format_fake(etc2_srgb8)
+__format_fake(etc2_rgb8a1)
+__format_fake(etc2_srgb8a1)
+__format_fake(etc2_rgba8)
+__format_fake(etc2_srgba8)
+__format_fake(etc2_r11_unorm)
+__format_fake(etc2_r11_snorm)
+__format_fake(etc2_rg11_unorm)
+__format_fake(etc2_rg11_snorm)
+
+#endif
diff --git a/mesalib/src/gallium/auxiliary/util/u_format_table.py b/mesalib/src/gallium/auxiliary/util/u_format_table.py
index ad582e4f5..aceb0caf7 100644
--- a/mesalib/src/gallium/auxiliary/util/u_format_table.py
+++ b/mesalib/src/gallium/auxiliary/util/u_format_table.py
@@ -90,7 +90,7 @@ def write_format_table(formats):
     print '#include "u_format_rgtc.h"'
     print '#include "u_format_latc.h"'
     print '#include "u_format_etc.h"'
-    print '#include "u_format_bptc.h"'
+    print '#include "u_format_fake.h"'
     print
     
     u_format_pack.generate(formats)
diff --git a/mesalib/src/gallium/auxiliary/util/u_math.h b/mesalib/src/gallium/auxiliary/util/u_math.h
index 19c7343b1..d6e83f962 100644
--- a/mesalib/src/gallium/auxiliary/util/u_math.h
+++ b/mesalib/src/gallium/auxiliary/util/u_math.h
@@ -40,7 +40,6 @@
 
 
 #include "pipe/p_compiler.h"
-#include "util/u_debug.h"
 
 
 #ifdef __cplusplus
@@ -530,6 +529,7 @@ unsigned ffs( unsigned u )
 }
 #elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
 #define ffs __builtin_ffs
+#define ffsll __builtin_ffsll
 #endif
 
 #endif /* FFS_DEFINED */
@@ -561,14 +561,10 @@ util_last_bit(unsigned u)
 static INLINE unsigned
 util_last_bit_signed(int i)
 {
-#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407) && !defined(__INTEL_COMPILER)
-   return 31 - __builtin_clrsb(i);
-#else
    if (i >= 0)
       return util_last_bit(i);
    else
       return util_last_bit(~(unsigned)i);
-#endif
 }
 
 /* Destructively loop over all of the bits in a mask as in:
@@ -587,6 +583,15 @@ u_bit_scan(unsigned *mask)
    return i;
 }
 
+#ifndef _MSC_VER
+static INLINE int
+u_bit_scan64(uint64_t *mask)
+{
+   int i = ffsll(*mask) - 1;
+   *mask &= ~(1llu << i);
+   return i;
+}
+#endif
 
 /**
  * Return float bits.
@@ -602,9 +607,9 @@ fui( float f )
 static INLINE float
 uif(uint32_t ui)
 {
-        union fi fi;
-        fi.ui = ui;
-        return fi.f;
+   union fi fi;
+   fi.ui = ui;
+   return fi.f;
 }
 
 
diff --git a/mesalib/src/gallium/auxiliary/util/u_pstipple.c b/mesalib/src/gallium/auxiliary/util/u_pstipple.c
index 1e1ec4a98..0a20bdb47 100644
--- a/mesalib/src/gallium/auxiliary/util/u_pstipple.c
+++ b/mesalib/src/gallium/auxiliary/util/u_pstipple.c
@@ -182,6 +182,8 @@ struct pstip_transform_context {
    int freeSampler;  /** an available sampler for the pstipple */
    int numImmed;
    uint coordOrigin;
+   unsigned fixedUnit;
+   bool hasFixedUnit;
 };
 
 
@@ -279,7 +281,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
    }
 
    /* declare new sampler */
-   tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
+   tgsi_transform_sampler_decl(ctx,
+         pctx->hasFixedUnit ? pctx->fixedUnit : pctx->freeSampler);
 
    /* Declare temp[0] reg if not already declared.
     * We can always use temp[0] since this code is before
@@ -318,7 +321,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
    tgsi_transform_tex_2d_inst(ctx,
                               TGSI_FILE_TEMPORARY, texTemp,
                               TGSI_FILE_TEMPORARY, texTemp,
-                              pctx->freeSampler);
+                              pctx->hasFixedUnit ? pctx->fixedUnit
+                                                 : pctx->freeSampler);
 
    /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
    tgsi_transform_kill_inst(ctx,
@@ -330,12 +334,16 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
 /**
  * Given a fragment shader, return a new fragment shader which
  * samples a stipple texture and executes KILL.
+ *
  * \param samplerUnitOut  returns the index of the sampler unit which
- *                        will be used to sample the stipple texture
+ *                        will be used to sample the stipple texture;
+ *                        if NULL, the fixed unit is used
+ * \param fixedUnit       fixed texture unit used for the stipple texture
  */
 struct tgsi_token *
 util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
-                                     unsigned *samplerUnitOut)
+                                     unsigned *samplerUnitOut,
+                                     unsigned fixedUnit)
 {
    struct pstip_transform_context transform;
    const uint newLen = tgsi_num_tokens(tokens) + NUM_NEW_TOKENS;
@@ -352,6 +360,8 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
    transform.wincoordInput = -1;
    transform.maxInput = -1;
    transform.coordOrigin = TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
+   transform.hasFixedUnit = !samplerUnitOut;
+   transform.fixedUnit = fixedUnit;
    transform.base.prolog = pstip_transform_prolog;
    transform.base.transform_declaration = pstip_transform_decl;
    transform.base.transform_immediate = pstip_transform_immed;
@@ -368,9 +378,10 @@ util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
    tgsi_dump(new_fs->tokens, 0);
 #endif
 
-   assert(transform.freeSampler < PIPE_MAX_SAMPLERS);
-   *samplerUnitOut = transform.freeSampler;
+   if (samplerUnitOut) {
+      assert(transform.freeSampler < PIPE_MAX_SAMPLERS);
+      *samplerUnitOut = transform.freeSampler;
+   }
 
    return new_tokens;
 }
-
diff --git a/mesalib/src/gallium/auxiliary/util/u_pstipple.h b/mesalib/src/gallium/auxiliary/util/u_pstipple.h
index 13155e7f5..249c58be9 100644
--- a/mesalib/src/gallium/auxiliary/util/u_pstipple.h
+++ b/mesalib/src/gallium/auxiliary/util/u_pstipple.h
@@ -49,7 +49,8 @@ util_pstipple_create_sampler(struct pipe_context *pipe);
 
 struct tgsi_token *
 util_pstipple_create_fragment_shader(const struct tgsi_token *tokens,
-                                     unsigned *samplerUnitOut);
+                                     unsigned *samplerUnitOut,
+                                     unsigned fixed_unit);
 
 
 #endif
diff --git a/mesalib/src/gallium/auxiliary/util/u_simple_list.h b/mesalib/src/gallium/auxiliary/util/u_simple_list.h
deleted file mode 100644
index 3f7def5fc..000000000
--- a/mesalib/src/gallium/auxiliary/util/u_simple_list.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * \file simple_list.h
- * Simple macros for type-safe, intrusive lists.
- *
- *  Intended to work with a list sentinal which is created as an empty
- *  list.  Insert & delete are O(1).
- *  
- * \author
- *  (C) 1997, Keith Whitwell
- */
-
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#ifndef _U_SIMPLE_LIST_H_
-#define _U_SIMPLE_LIST_H_
-
-/**
- * Remove an element from list.
- *
- * \param elem element to remove.
- */
-#define remove_from_list(elem)			\
-do {						\
-   (elem)->next->prev = (elem)->prev;		\
-   (elem)->prev->next = (elem)->next;		\
-   (elem)->next = elem;                         \
-   (elem)->prev = elem;                         \
-} while (0)
-
-/**
- * Insert an element to the list head.
- *
- * \param list list.
- * \param elem element to insert.
- */
-#define insert_at_head(list, elem)		\
-do {						\
-   (elem)->prev = list;				\
-   (elem)->next = (list)->next;			\
-   (list)->next->prev = elem;			\
-   (list)->next = elem;				\
-} while(0)
-
-/**
- * Insert an element to the list tail.
- *
- * \param list list.
- * \param elem element to insert.
- */
-#define insert_at_tail(list, elem)		\
-do {						\
-   (elem)->next = list;				\
-   (elem)->prev = (list)->prev;			\
-   (list)->prev->next = elem;			\
-   (list)->prev = elem;				\
-} while(0)
-
-/**
- * Move an element to the list head.
- *
- * \param list list.
- * \param elem element to move.
- */
-#define move_to_head(list, elem)		\
-do {						\
-   remove_from_list(elem);			\
-   insert_at_head(list, elem);			\
-} while (0)
-
-/**
- * Move an element to the list tail.
- *
- * \param list list.
- * \param elem element to move.
- */
-#define move_to_tail(list, elem)		\
-do {						\
-   remove_from_list(elem);			\
-   insert_at_tail(list, elem);			\
-} while (0)
-
-/**
- * Make a empty list empty.
- *
- * \param sentinal list (sentinal element).
- */
-#define make_empty_list(sentinal)		\
-do {						\
-   (sentinal)->next = sentinal;			\
-   (sentinal)->prev = sentinal;			\
-} while (0)
-
-/**
- * Get list first element.
- *
- * \param list list.
- *
- * \return pointer to first element.
- */
-#define first_elem(list)       ((list)->next)
-
-/**
- * Get list last element.
- *
- * \param list list.
- *
- * \return pointer to last element.
- */
-#define last_elem(list)        ((list)->prev)
-
-/**
- * Get next element.
- *
- * \param elem element.
- *
- * \return pointer to next element.
- */
-#define next_elem(elem)        ((elem)->next)
-
-/**
- * Get previous element.
- *
- * \param elem element.
- *
- * \return pointer to previous element.
- */
-#define prev_elem(elem)        ((elem)->prev)
-
-/**
- * Test whether element is at end of the list.
- * 
- * \param list list.
- * \param elem element.
- * 
- * \return non-zero if element is at end of list, or zero otherwise.
- */
-#define at_end(list, elem)     ((elem) == (list))
-
-/**
- * Test if a list is empty.
- * 
- * \param list list.
- * 
- * \return non-zero if list empty, or zero otherwise.
- */
-#define is_empty_list(list)    ((list)->next == (list))
-
-/**
- * Walk through the elements of a list.
- *
- * \param ptr pointer to the current element.
- * \param list list.
- *
- * \note It should be followed by a { } block or a single statement, as in a \c
- * for loop.
- */
-#define foreach(ptr, list)     \
-        for( ptr=(list)->next ;  ptr!=list ;  ptr=(ptr)->next )
-
-/**
- * Walk through the elements of a list.
- *
- * Same as #foreach but lets you unlink the current value during a list
- * traversal.  Useful for freeing a list, element by element.
- * 
- * \param ptr pointer to the current element.
- * \param t temporary pointer.
- * \param list list.
- *
- * \note It should be followed by a { } block or a single statement, as in a \c
- * for loop.
- */
-#define foreach_s(ptr, t, list)   \
-        for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next)
-
-#endif /* _U_SIMPLE_LIST_H_ */
diff --git a/mesalib/src/gallium/auxiliary/util/u_simple_shaders.c b/mesalib/src/gallium/auxiliary/util/u_simple_shaders.c
index edb30379b..c612b67e2 100644
--- a/mesalib/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/mesalib/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -246,9 +246,15 @@ util_make_fragment_tex_shader_writemask(struct pipe_context *pipe,
       ureg_MOV( ureg, out, imm );
    }
 
-   ureg_TEX( ureg, 
-             ureg_writemask(out, writemask),
-             tex_target, tex, sampler );
+   if (tex_target == TGSI_TEXTURE_BUFFER)
+      ureg_TXF(ureg,
+               ureg_writemask(out, writemask),
+               tex_target, tex, sampler);
+   else
+      ureg_TEX(ureg,
+               ureg_writemask(out, writemask),
+               tex_target, tex, sampler);
+
    ureg_END( ureg );
 
    return ureg_create_shader_and_destroy( ureg, pipe );
diff --git a/mesalib/src/gallium/auxiliary/util/u_slab.c b/mesalib/src/gallium/auxiliary/util/u_slab.c
index dbdebc6c9..7e7d43bd8 100644
--- a/mesalib/src/gallium/auxiliary/util/u_slab.c
+++ b/mesalib/src/gallium/auxiliary/util/u_slab.c
@@ -24,7 +24,7 @@
 
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_simple_list.h"
+#include "util/simple_list.h"
 
 #include <stdio.h>
 
diff --git a/mesalib/src/gallium/auxiliary/util/u_tests.c b/mesalib/src/gallium/auxiliary/util/u_tests.c
index b42f5e137..fe549723c 100644
--- a/mesalib/src/gallium/auxiliary/util/u_tests.c
+++ b/mesalib/src/gallium/auxiliary/util/u_tests.c
@@ -30,9 +30,13 @@
 #include "util/u_draw_quad.h"
 #include "util/u_format.h"
 #include "util/u_inlines.h"
+#include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_surface.h"
+#include "util/u_string.h"
 #include "util/u_tile.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_text.h"
 #include "cso_cache/cso_context.h"
 #include <stdio.h>
 
@@ -138,15 +142,70 @@ util_set_interleaved_vertex_elements(struct cso_context *cso,
    free(velem);
 }
 
+static void *
+util_set_passthrough_vertex_shader(struct cso_context *cso,
+                                   struct pipe_context *ctx,
+                                   bool window_space)
+{
+   static const uint vs_attribs[] = {
+      TGSI_SEMANTIC_POSITION,
+      TGSI_SEMANTIC_GENERIC
+   };
+   static const uint vs_indices[] = {0, 0};
+   void *vs;
+
+   vs = util_make_vertex_passthrough_shader(ctx, 2, vs_attribs, vs_indices,
+                                            window_space);
+   cso_set_vertex_shader_handle(cso, vs);
+   return vs;
+}
+
+static void
+util_set_common_states_and_clear(struct cso_context *cso, struct pipe_context *ctx,
+                                 struct pipe_resource *cb)
+{
+   static const float clear_color[] = {0.1, 0.1, 0.1, 0.1};
+
+   util_set_framebuffer_cb0(cso, ctx, cb);
+   util_set_blend_normal(cso);
+   util_set_dsa_disable(cso);
+   util_set_rasterizer_normal(cso);
+   util_set_max_viewport(cso, cb);
+
+   ctx->clear(ctx, PIPE_CLEAR_COLOR0, (void*)clear_color, 0, 0);
+}
+
+static void
+util_draw_fullscreen_quad(struct cso_context *cso)
+{
+   static float vertices[] = {
+     -1, -1, 0, 1,   0, 0, 0, 0,
+     -1,  1, 0, 1,   0, 1, 0, 0,
+      1,  1, 0, 1,   1, 1, 0, 0,
+      1, -1, 0, 1,   1, 0, 0, 0
+   };
+   util_set_interleaved_vertex_elements(cso, 2);
+   util_draw_user_vertex_buffer(cso, vertices, PIPE_PRIM_QUADS, 4, 2);
+}
+
+/**
+ * Probe and test if the rectangle contains the expected color.
+ *
+ * If "num_expected_colors" > 1, at least one expected color must match
+ * the probed color. "expected" should be an array of 4*num_expected_colors
+ * floats.
+ */
 static bool
-util_probe_rect_rgba(struct pipe_context *ctx, struct pipe_resource *tex,
-                     unsigned offx, unsigned offy, unsigned w, unsigned h,
-                     const float *expected)
+util_probe_rect_rgba_multi(struct pipe_context *ctx, struct pipe_resource *tex,
+                           unsigned offx, unsigned offy, unsigned w,
+                           unsigned h,
+                           const float *expected,
+                           unsigned num_expected_colors)
 {
    struct pipe_transfer *transfer;
    void *map;
    float *pixels = malloc(w * h * 4 * sizeof(float));
-   int x,y,c;
+   int x,y,e,c;
    bool pass = true;
 
    map = pipe_transfer_map(ctx, tex, 0, 0, PIPE_TRANSFER_READ,
@@ -154,21 +213,31 @@ util_probe_rect_rgba(struct pipe_context *ctx, struct pipe_resource *tex,
    pipe_get_tile_rgba(transfer, map, 0, 0, w, h, pixels);
    pipe_transfer_unmap(ctx, transfer);
 
-   for (y = 0; y < h; y++) {
-      for (x = 0; x < w; x++) {
-         float *probe = &pixels[(y*w + x)*4];
-
-         for (c = 0; c < 4; c++)
-            if (fabs(probe[c] - expected[c]) >= TOLERANCE) {
-               printf("Probe color at (%i,%i),  ", offx+x, offy+y);
-               printf("Expected: %.3f, %.3f, %.3f, %.3f,  ",
-                      expected[0], expected[1], expected[2], expected[3]);
-               printf("Got: %.3f, %.3f, %.3f, %.3f\n",
-                      probe[0], probe[1], probe[2], probe[2]);
-               pass = false;
-               goto done;
+   for (e = 0; e < num_expected_colors; e++) {
+      for (y = 0; y < h; y++) {
+         for (x = 0; x < w; x++) {
+            float *probe = &pixels[(y*w + x)*4];
+
+            for (c = 0; c < 4; c++) {
+               if (fabs(probe[c] - expected[e*4+c]) >= TOLERANCE) {
+                  if (e < num_expected_colors-1)
+                     goto next_color; /* test the next expected color */
+
+                  printf("Probe color at (%i,%i),  ", offx+x, offy+y);
+                  printf("Expected: %.3f, %.3f, %.3f, %.3f,  ",
+                         expected[e*4], expected[e*4+1],
+                         expected[e*4+2], expected[e*4+3]);
+                  printf("Got: %.3f, %.3f, %.3f, %.3f\n",
+                         probe[0], probe[1], probe[2], probe[2]);
+                  pass = false;
+                  goto done;
+               }
             }
+         }
       }
+      break; /* this color was successful */
+
+   next_color:;
    }
 done:
 
@@ -176,6 +245,37 @@ done:
    return pass;
 }
 
+static bool
+util_probe_rect_rgba(struct pipe_context *ctx, struct pipe_resource *tex,
+                     unsigned offx, unsigned offy, unsigned w, unsigned h,
+                     const float *expected)
+{
+   return util_probe_rect_rgba_multi(ctx, tex, offx, offy, w, h, expected, 1);
+}
+
+enum {
+   SKIP = -1,
+   FAIL = 0, /* also "false" */
+   PASS = 1 /* also "true" */
+};
+
+static void
+util_report_result_helper(int status, const char *name, ...)
+{
+   char buf[256];
+   va_list ap;
+
+   va_start(ap, name);
+   util_vsnprintf(buf, sizeof(buf), name, ap);
+   va_end(ap);
+
+   printf("Test(%s) = %s\n", buf,
+          status == SKIP ? "skip" :
+          status == PASS ? "pass" : "fail");
+}
+
+#define util_report_result(status) util_report_result_helper(status, __func__)
+
 /**
  * Test TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION.
  *
@@ -196,38 +296,18 @@ tgsi_vs_window_space_position(struct pipe_context *ctx)
    struct pipe_resource *cb;
    void *fs, *vs;
    bool pass = true;
-
-   static uint vs_attribs[] = {
-      TGSI_SEMANTIC_POSITION,
-      TGSI_SEMANTIC_GENERIC
-   };
-   static uint vs_indices[] = {0, 0};
-   static float vertices[] = {
-       0,   0, 0, 0,   1,  0, 0, 1,
-       0, 256, 0, 0,   1,  0, 0, 1,
-     256, 256, 0, 0,   1,  0, 0, 1,
-     256,   0, 0, 0,   1,  0, 0, 1,
-   };
-   static float red[] = {1, 0, 0, 1};
-   static float clear_color[] = {0.1, 0.1, 0.1, 0.1};
+   static const float red[] = {1, 0, 0, 1};
 
    if (!ctx->screen->get_param(ctx->screen,
                                PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION)) {
-      printf("Test(%s) = skip\n", __func__);
+      util_report_result(SKIP);
       return;
    }
 
    cso = cso_create_context(ctx);
    cb = util_create_texture2d(ctx->screen, 256, 256,
                               PIPE_FORMAT_R8G8B8A8_UNORM);
-
-   /* Set states. */
-   util_set_framebuffer_cb0(cso, ctx, cb);
-   util_set_blend_normal(cso);
-   util_set_dsa_disable(cso);
-   util_set_rasterizer_normal(cso);
-   util_set_max_viewport(cso, cb);
-   util_set_interleaved_vertex_elements(cso, 2);
+   util_set_common_states_and_clear(cso, ctx, cb);
 
    /* Fragment shader. */
    fs = util_make_fragment_passthrough_shader(ctx, TGSI_SEMANTIC_GENERIC,
@@ -235,13 +315,19 @@ tgsi_vs_window_space_position(struct pipe_context *ctx)
    cso_set_fragment_shader_handle(cso, fs);
 
    /* Vertex shader. */
-   vs = util_make_vertex_passthrough_shader(ctx, 2, vs_attribs, vs_indices,
-                                            TRUE);
-   cso_set_vertex_shader_handle(cso, vs);
-
-   /* Clear and draw. */
-   ctx->clear(ctx, PIPE_CLEAR_COLOR0, (void*)clear_color, 0, 0);
-   util_draw_user_vertex_buffer(cso, vertices, PIPE_PRIM_QUADS, 4, 2);
+   vs = util_set_passthrough_vertex_shader(cso, ctx, true);
+
+   /* Draw. */
+   {
+      static float vertices[] = {
+          0,   0, 0, 0,   1,  0, 0, 1,
+          0, 256, 0, 0,   1,  0, 0, 1,
+        256, 256, 0, 0,   1,  0, 0, 1,
+        256,   0, 0, 0,   1,  0, 0, 1,
+      };
+      util_set_interleaved_vertex_elements(cso, 2);
+      util_draw_user_vertex_buffer(cso, vertices, PIPE_PRIM_QUADS, 4, 2);
+   }
 
    /* Probe pixels. */
    pass = pass && util_probe_rect_rgba(ctx, cb, 0, 0,
@@ -253,7 +339,114 @@ tgsi_vs_window_space_position(struct pipe_context *ctx)
    ctx->delete_fs_state(ctx, fs);
    pipe_resource_reference(&cb, NULL);
 
-   printf("Test(%s) = %s\n", __func__, pass ? "pass" : "fail");
+   util_report_result(pass);
+}
+
+static void
+null_sampler_view(struct pipe_context *ctx, unsigned tgsi_tex_target)
+{
+   struct cso_context *cso;
+   struct pipe_resource *cb;
+   void *fs, *vs;
+   bool pass = true;
+   /* 2 expected colors: */
+   static const float expected_tex[] = {0, 0, 0, 1,
+                                        0, 0, 0, 0};
+   static const float expected_buf[] = {0, 0, 0, 0};
+   const float *expected = tgsi_tex_target == TGSI_TEXTURE_BUFFER ?
+                              expected_buf : expected_tex;
+   unsigned num_expected = tgsi_tex_target == TGSI_TEXTURE_BUFFER ? 1 : 2;
+
+   if (tgsi_tex_target == TGSI_TEXTURE_BUFFER &&
+       !ctx->screen->get_param(ctx->screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS)) {
+      util_report_result_helper(SKIP, "%s: %s", __func__,
+                                tgsi_texture_names[tgsi_tex_target]);
+      return;
+   }
+
+   cso = cso_create_context(ctx);
+   cb = util_create_texture2d(ctx->screen, 256, 256,
+                              PIPE_FORMAT_R8G8B8A8_UNORM);
+   util_set_common_states_and_clear(cso, ctx, cb);
+
+   ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, NULL);
+
+   /* Fragment shader. */
+   fs = util_make_fragment_tex_shader(ctx, tgsi_tex_target,
+                                      TGSI_INTERPOLATE_LINEAR);
+   cso_set_fragment_shader_handle(cso, fs);
+
+   /* Vertex shader. */
+   vs = util_set_passthrough_vertex_shader(cso, ctx, false);
+   util_draw_fullscreen_quad(cso);
+
+   /* Probe pixels. */
+   pass = pass && util_probe_rect_rgba_multi(ctx, cb, 0, 0,
+                                  cb->width0, cb->height0, expected,
+                                  num_expected);
+
+   /* Cleanup. */
+   cso_destroy_context(cso);
+   ctx->delete_vs_state(ctx, vs);
+   ctx->delete_fs_state(ctx, fs);
+   pipe_resource_reference(&cb, NULL);
+
+   util_report_result_helper(pass, "%s: %s", __func__,
+                             tgsi_texture_names[tgsi_tex_target]);
+}
+
+static void
+null_constant_buffer(struct pipe_context *ctx)
+{
+   struct cso_context *cso;
+   struct pipe_resource *cb;
+   void *fs, *vs;
+   bool pass = true;
+   static const float zero[] = {0, 0, 0, 0};
+
+   cso = cso_create_context(ctx);
+   cb = util_create_texture2d(ctx->screen, 256, 256,
+                              PIPE_FORMAT_R8G8B8A8_UNORM);
+   util_set_common_states_and_clear(cso, ctx, cb);
+
+   ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, 0, NULL);
+
+   /* Fragment shader. */
+   {
+      static const char *text = /* I don't like ureg... */
+            "FRAG\n"
+            "DCL CONST[0]\n"
+            "DCL OUT[0], COLOR\n"
+
+            "MOV OUT[0], CONST[0]\n"
+            "END\n";
+      struct tgsi_token tokens[1000];
+      struct pipe_shader_state state = {tokens};
+
+      if (!tgsi_text_translate(text, tokens, Elements(tokens))) {
+         puts("Can't compile a fragment shader.");
+         util_report_result(FAIL);
+         return;
+      }
+      fs = ctx->create_fs_state(ctx, &state);
+      cso_set_fragment_shader_handle(cso, fs);
+   }
+
+   /* Vertex shader. */
+   vs = util_set_passthrough_vertex_shader(cso, ctx, false);
+   util_draw_fullscreen_quad(cso);
+
+   /* Probe pixels. */
+   pass = pass && util_probe_rect_rgba(ctx, cb, 0, 0, cb->width0,
+                                       cb->height0, zero);
+
+   /* Cleanup. */
+   cso_destroy_context(cso);
+   ctx->delete_vs_state(ctx, vs);
+   ctx->delete_fs_state(ctx, fs);
+   pipe_resource_reference(&cb, NULL);
+
+   util_report_result(pass);
 }
 
 /**
@@ -261,7 +454,17 @@ tgsi_vs_window_space_position(struct pipe_context *ctx)
  * context_create.
  */
 void
-util_run_tests(struct pipe_context *ctx)
+util_run_tests(struct pipe_screen *screen)
 {
+   struct pipe_context *ctx = screen->context_create(screen, NULL);
+
    tgsi_vs_window_space_position(ctx);
+   null_sampler_view(ctx, TGSI_TEXTURE_2D);
+   null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);
+   null_constant_buffer(ctx);
+
+   ctx->destroy(ctx);
+
+   puts("Done. Exiting..");
+   exit(0);
 }
diff --git a/mesalib/src/gallium/auxiliary/util/u_tests.h b/mesalib/src/gallium/auxiliary/util/u_tests.h
index a1439347d..49ae54f87 100644
--- a/mesalib/src/gallium/auxiliary/util/u_tests.h
+++ b/mesalib/src/gallium/auxiliary/util/u_tests.h
@@ -30,8 +30,8 @@
 
 #include "pipe/p_compiler.h"
 
-struct pipe_context;
+struct pipe_screen;
 
-void util_run_tests(struct pipe_context *ctx);
+void util_run_tests(struct pipe_screen *screen);
 
 #endif
diff --git a/mesalib/src/glsl/Android.mk b/mesalib/src/glsl/Android.mk
index 1cbc5c6d2..38c2087a4 100644
--- a/mesalib/src/glsl/Android.mk
+++ b/mesalib/src/glsl/Android.mk
@@ -27,7 +27,6 @@ LOCAL_PATH := $(call my-dir)
 
 include $(LOCAL_PATH)/Makefile.sources
 
-GLSL_SRCDIR = .
 # ---------------------------------------
 # Build libmesa_glsl
 # ---------------------------------------
diff --git a/mesalib/src/glsl/Makefile.am b/mesalib/src/glsl/Makefile.am
index 9a3131738..5a0a643da 100644
--- a/mesalib/src/glsl/Makefile.am
+++ b/mesalib/src/glsl/Makefile.am
@@ -19,13 +19,19 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+AUTOMAKE_OPTIONS = subdir-objects
+
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/glsl/glcpp \
+	-I$(top_srcdir)/src/glsl/nir \
 	-I$(top_srcdir)/src/gtest/include \
+	-I$(top_builddir)/src/glsl/nir \
 	$(DEFINES)
 AM_CFLAGS = $(VISIBILITY_CFLAGS)
 AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
@@ -35,12 +41,19 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README	\
 	glsl_parser.yy					\
 	glcpp/glcpp-lex.l				\
 	glcpp/glcpp-parse.y				\
+	nir/nir_algebraic.py				\
+	nir/nir_constant_expressions.py			\
+	nir/nir_opcodes.py				\
+	nir/nir_opcodes_c.py				\
+	nir/nir_opcodes_h.py				\
+	nir/nir_opt_algebraic.py			\
 	SConscript
 
 include Makefile.sources
 
 TESTS = glcpp/tests/glcpp-test				\
 	glcpp/tests/glcpp-test-cr-lf			\
+	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/optimization-test				\
 	tests/sampler-types-test                        \
@@ -54,17 +67,20 @@ noinst_LTLIBRARIES = libglsl.la libglcpp.la
 check_PROGRAMS =					\
 	glcpp/glcpp					\
 	glsl_test					\
+	tests/blob-test					\
 	tests/general-ir-test				\
 	tests/sampler-types-test			\
 	tests/uniform-initializer-test
 
 noinst_PROGRAMS = glsl_compiler
 
+tests_blob_test_SOURCES =				\
+	tests/blob_test.c
+tests_blob_test_LDADD =					\
+	$(top_builddir)/src/glsl/libglsl.la
+
 tests_general_ir_test_SOURCES =		\
-	$(top_srcdir)/src/mesa/main/imports.c		\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
+	standalone_scaffolding.cpp			\
 	tests/builtin_variable_test.cpp			\
 	tests/invalidate_locations_test.cpp		\
 	tests/general_ir_test.cpp			\
@@ -75,12 +91,10 @@ tests_general_ir_test_CFLAGS =				\
 tests_general_ir_test_LDADD =				\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 tests_uniform_initializer_test_SOURCES =		\
-	$(top_srcdir)/src/mesa/main/imports.c		\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
 	tests/copy_constant_to_storage_tests.cpp	\
 	tests/set_uniform_initializer_tests.cpp		\
 	tests/uniform_initializer_utils.cpp		\
@@ -91,11 +105,10 @@ tests_uniform_initializer_test_CFLAGS =			\
 tests_uniform_initializer_test_LDADD =			\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 tests_sampler_types_test_SOURCES =			\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c\
-	$(top_srcdir)/src/mesa/program/symbol_table.c	\
 	tests/sampler_types_test.cpp			\
 	tests/common.c
 tests_sampler_types_test_CFLAGS =			\
@@ -103,6 +116,7 @@ tests_sampler_types_test_CFLAGS =			\
 tests_sampler_types_test_LDADD =			\
 	$(top_builddir)/src/gtest/libgtest.la		\
 	$(top_builddir)/src/glsl/libglsl.la		\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 libglcpp_la_LIBADD =					\
@@ -115,10 +129,10 @@ libglcpp_la_SOURCES =					\
 
 glcpp_glcpp_SOURCES =					\
 	glcpp/glcpp.c					\
-	tests/common.c					\
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c
+	tests/common.c
 glcpp_glcpp_LDADD =					\
 	libglcpp.la					\
+	$(top_builddir)/src/libglsl_util.la		\
 	-lm
 
 libglsl_la_LIBADD = libglcpp.la
@@ -126,29 +140,28 @@ libglsl_la_SOURCES =					\
 	glsl_lexer.cpp					\
 	glsl_parser.cpp					\
 	glsl_parser.h					\
-	$(LIBGLSL_FILES)
+	$(LIBGLSL_FILES)				\
+	$(NIR_FILES)
 
 glsl_compiler_SOURCES = \
-	$(top_srcdir)/src/mesa/main/imports.c \
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c \
-	$(top_srcdir)/src/mesa/program/symbol_table.c \
 	$(GLSL_COMPILER_CXX_FILES)
 
 glsl_compiler_LDADD =					\
 	libglsl.la					\
+	$(top_builddir)/src/libglsl_util.la		\
 	$(PTHREAD_LIBS)
 
 glsl_test_SOURCES = \
-	$(top_srcdir)/src/mesa/main/imports.c \
-	$(top_srcdir)/src/mesa/program/prog_hash_table.c \
-	$(top_srcdir)/src/mesa/program/symbol_table.c \
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
+	standalone_scaffolding.cpp \
 	tests/common.c \
 	test.cpp \
 	test_optpass.cpp \
 	test_optpass.h
 
-glsl_test_LDADD = libglsl.la
+glsl_test_LDADD =					\
+	libglsl.la					\
+	$(top_builddir)/src/libglsl_util.la		\
+	$(PTHREAD_LIBS)
 
 # We write our own rules for yacc and lex below. We'd rather use automake,
 # but automake makes it especially difficult for a number of reasons:
@@ -181,14 +194,14 @@ am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =
 
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(GLSL_BUILDDIR)/glsl_parser.h $<
+	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
 
 glsl_lexer.cpp: glsl_lexer.ll
 	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
 
 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
 	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(GLSL_BUILDDIR)/glcpp/glcpp-parse.h $<
+	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
 
 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
 	$(AM_V_at)$(MKDIR_P) glcpp
@@ -204,7 +217,12 @@ BUILT_SOURCES =						\
 	glsl_parser.cpp					\
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
-	glcpp/glcpp-lex.c
+	glcpp/glcpp-lex.c				\
+	nir/nir_builder_opcodes.h				\
+	nir/nir_constant_expressions.c			\
+	nir/nir_opcodes.c				\
+	nir/nir_opcodes.h				\
+	nir/nir_opt_algebraic.c
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
@@ -216,3 +234,25 @@ clean-local:
 dist-hook:
 	$(RM) glcpp/tests/*.out
 	$(RM) glcpp/tests/subtest*/*.out
+
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+
+nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+
+nir/nir.h: nir/nir_opcodes.h
+
+nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+
+nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
+	$(MKDIR_P) nir;							\
+	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
diff --git a/mesalib/src/glsl/Makefile.sources b/mesalib/src/glsl/Makefile.sources
index e2acbac25..d0210d170 100644
--- a/mesalib/src/glsl/Makefile.sources
+++ b/mesalib/src/glsl/Makefile.sources
@@ -1,146 +1,197 @@
 # shared source lists for Makefile, SConscript, and Android.mk
 
-GLSL_SRCDIR = $(top_srcdir)/src/glsl
-GLSL_BUILDDIR = $(top_builddir)/src/glsl
-
 # libglcpp
 
 LIBGLCPP_FILES = \
-	$(GLSL_SRCDIR)/glcpp/glcpp.h \
-	$(GLSL_SRCDIR)/glcpp/pp.c
+	glcpp/glcpp.h \
+	glcpp/pp.c
 
 LIBGLCPP_GENERATED_FILES = \
-	$(GLSL_BUILDDIR)/glcpp/glcpp-lex.c \
-	$(GLSL_BUILDDIR)/glcpp/glcpp-parse.c
+	glcpp/glcpp-lex.c \
+	glcpp/glcpp-parse.c
+
+NIR_GENERATED_FILES = \
+	nir/nir_builder_opcodes.h \
+	nir/nir_constant_expressions.c \
+	nir/nir_opcodes.c \
+	nir/nir_opcodes.h \
+	nir/nir_opt_algebraic.c
+
+NIR_FILES = \
+	nir/glsl_to_nir.cpp \
+	nir/glsl_to_nir.h \
+	nir/nir.c \
+	nir/nir.h \
+	nir/nir_constant_expressions.h \
+	nir/nir_dominance.c \
+	nir/nir_from_ssa.c \
+	nir/nir_intrinsics.c \
+	nir/nir_intrinsics.h \
+	nir/nir_live_variables.c \
+	nir/nir_lower_alu_to_scalar.c \
+	nir/nir_lower_atomics.c \
+	nir/nir_lower_global_vars_to_local.c \
+	nir/nir_lower_locals_to_regs.c \
+	nir/nir_lower_io.c \
+	nir/nir_lower_phis_to_scalar.c \
+	nir/nir_lower_samplers.cpp \
+	nir/nir_lower_system_values.c \
+	nir/nir_lower_to_source_mods.c \
+	nir/nir_lower_vars_to_ssa.c \
+	nir/nir_lower_var_copies.c \
+	nir/nir_lower_vec_to_movs.c \
+	nir/nir_metadata.c \
+	nir/nir_opt_constant_folding.c \
+	nir/nir_opt_copy_propagate.c \
+	nir/nir_opt_cse.c \
+	nir/nir_opt_dce.c \
+	nir/nir_opt_gcm.c \
+	nir/nir_opt_global_to_local.c \
+	nir/nir_opt_peephole_select.c \
+	nir/nir_opt_remove_phis.c \
+	nir/nir_print.c \
+	nir/nir_remove_dead_variables.c \
+	nir/nir_search.c \
+	nir/nir_search.h \
+	nir/nir_split_var_copies.c \
+	nir/nir_to_ssa.c \
+	nir/nir_types.h \
+	nir/nir_validate.c \
+	nir/nir_worklist.c \
+	nir/nir_worklist.h \
+	nir/nir_types.cpp \
+	$(NIR_GENERATED_FILES)
 
 # libglsl
 
 LIBGLSL_FILES = \
-	$(GLSL_SRCDIR)/ast.h \
-	$(GLSL_SRCDIR)/ast_array_index.cpp \
-	$(GLSL_SRCDIR)/ast_expr.cpp \
-	$(GLSL_SRCDIR)/ast_function.cpp \
-	$(GLSL_SRCDIR)/ast_to_hir.cpp \
-	$(GLSL_SRCDIR)/ast_type.cpp \
-	$(GLSL_SRCDIR)/builtin_functions.cpp \
-	$(GLSL_SRCDIR)/builtin_type_macros.h \
-	$(GLSL_SRCDIR)/builtin_types.cpp \
-	$(GLSL_SRCDIR)/builtin_variables.cpp \
-	$(GLSL_SRCDIR)/glsl_parser_extras.cpp \
-	$(GLSL_SRCDIR)/glsl_parser_extras.h \
-	$(GLSL_SRCDIR)/glsl_symbol_table.cpp \
-	$(GLSL_SRCDIR)/glsl_symbol_table.h \
-	$(GLSL_SRCDIR)/glsl_types.cpp \
-	$(GLSL_SRCDIR)/glsl_types.h \
-	$(GLSL_SRCDIR)/hir_field_selection.cpp \
-	$(GLSL_SRCDIR)/ir_basic_block.cpp \
-	$(GLSL_SRCDIR)/ir_basic_block.h \
-	$(GLSL_SRCDIR)/ir_builder.cpp \
-	$(GLSL_SRCDIR)/ir_builder.h \
-	$(GLSL_SRCDIR)/ir_clone.cpp \
-	$(GLSL_SRCDIR)/ir_constant_expression.cpp \
-	$(GLSL_SRCDIR)/ir.cpp \
-	$(GLSL_SRCDIR)/ir.h \
-	$(GLSL_SRCDIR)/ir_equals.cpp \
-	$(GLSL_SRCDIR)/ir_expression_flattening.cpp \
-	$(GLSL_SRCDIR)/ir_expression_flattening.h \
-	$(GLSL_SRCDIR)/ir_function_can_inline.cpp \
-	$(GLSL_SRCDIR)/ir_function_detect_recursion.cpp \
-	$(GLSL_SRCDIR)/ir_function_inlining.h \
-	$(GLSL_SRCDIR)/ir_function.cpp \
-	$(GLSL_SRCDIR)/ir_hierarchical_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_hierarchical_visitor.h \
-	$(GLSL_SRCDIR)/ir_hv_accept.cpp \
-	$(GLSL_SRCDIR)/ir_import_prototypes.cpp \
-	$(GLSL_SRCDIR)/ir_optimization.h \
-	$(GLSL_SRCDIR)/ir_print_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_print_visitor.h \
-	$(GLSL_SRCDIR)/ir_reader.cpp \
-	$(GLSL_SRCDIR)/ir_reader.h \
-	$(GLSL_SRCDIR)/ir_rvalue_visitor.cpp \
-	$(GLSL_SRCDIR)/ir_rvalue_visitor.h \
-	$(GLSL_SRCDIR)/ir_set_program_inouts.cpp \
-	$(GLSL_SRCDIR)/ir_uniform.h \
-	$(GLSL_SRCDIR)/ir_validate.cpp \
-	$(GLSL_SRCDIR)/ir_variable_refcount.cpp \
-	$(GLSL_SRCDIR)/ir_variable_refcount.h \
-	$(GLSL_SRCDIR)/ir_visitor.h \
-	$(GLSL_SRCDIR)/linker.cpp \
-	$(GLSL_SRCDIR)/linker.h \
-	$(GLSL_SRCDIR)/link_atomics.cpp \
-	$(GLSL_SRCDIR)/link_functions.cpp \
-	$(GLSL_SRCDIR)/link_interface_blocks.cpp \
-	$(GLSL_SRCDIR)/link_uniforms.cpp \
-	$(GLSL_SRCDIR)/link_uniform_initializers.cpp \
-	$(GLSL_SRCDIR)/link_uniform_block_active_visitor.cpp \
-	$(GLSL_SRCDIR)/link_uniform_block_active_visitor.h \
-	$(GLSL_SRCDIR)/link_uniform_blocks.cpp \
-	$(GLSL_SRCDIR)/link_varyings.cpp \
-	$(GLSL_SRCDIR)/link_varyings.h \
-	$(GLSL_SRCDIR)/list.h \
-	$(GLSL_SRCDIR)/loop_analysis.cpp \
-	$(GLSL_SRCDIR)/loop_analysis.h \
-	$(GLSL_SRCDIR)/loop_controls.cpp \
-	$(GLSL_SRCDIR)/loop_unroll.cpp \
-	$(GLSL_SRCDIR)/lower_clip_distance.cpp \
-	$(GLSL_SRCDIR)/lower_const_arrays_to_uniforms.cpp \
-	$(GLSL_SRCDIR)/lower_discard.cpp \
-	$(GLSL_SRCDIR)/lower_discard_flow.cpp \
-	$(GLSL_SRCDIR)/lower_if_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_instructions.cpp \
-	$(GLSL_SRCDIR)/lower_jumps.cpp \
-	$(GLSL_SRCDIR)/lower_mat_op_to_vec.cpp \
-	$(GLSL_SRCDIR)/lower_noise.cpp \
-	$(GLSL_SRCDIR)/lower_offset_array.cpp \
-	$(GLSL_SRCDIR)/lower_packed_varyings.cpp \
-	$(GLSL_SRCDIR)/lower_named_interface_blocks.cpp \
-	$(GLSL_SRCDIR)/lower_packing_builtins.cpp \
-	$(GLSL_SRCDIR)/lower_texture_projection.cpp \
-	$(GLSL_SRCDIR)/lower_variable_index_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_vec_index_to_cond_assign.cpp \
-	$(GLSL_SRCDIR)/lower_vec_index_to_swizzle.cpp \
-	$(GLSL_SRCDIR)/lower_vector.cpp \
-	$(GLSL_SRCDIR)/lower_vector_insert.cpp \
-	$(GLSL_SRCDIR)/lower_vertex_id.cpp \
-	$(GLSL_SRCDIR)/lower_output_reads.cpp \
-	$(GLSL_SRCDIR)/lower_ubo_reference.cpp \
-	$(GLSL_SRCDIR)/opt_algebraic.cpp \
-	$(GLSL_SRCDIR)/opt_array_splitting.cpp \
-	$(GLSL_SRCDIR)/opt_constant_folding.cpp \
-	$(GLSL_SRCDIR)/opt_constant_propagation.cpp \
-	$(GLSL_SRCDIR)/opt_constant_variable.cpp \
-	$(GLSL_SRCDIR)/opt_copy_propagation.cpp \
-	$(GLSL_SRCDIR)/opt_copy_propagation_elements.cpp \
-	$(GLSL_SRCDIR)/opt_cse.cpp \
-	$(GLSL_SRCDIR)/opt_dead_builtin_variables.cpp \
-	$(GLSL_SRCDIR)/opt_dead_builtin_varyings.cpp \
-	$(GLSL_SRCDIR)/opt_dead_code.cpp \
-	$(GLSL_SRCDIR)/opt_dead_code_local.cpp \
-	$(GLSL_SRCDIR)/opt_dead_functions.cpp \
-	$(GLSL_SRCDIR)/opt_flatten_nested_if_blocks.cpp \
-	$(GLSL_SRCDIR)/opt_flip_matrices.cpp \
-	$(GLSL_SRCDIR)/opt_function_inlining.cpp \
-	$(GLSL_SRCDIR)/opt_if_simplification.cpp \
-	$(GLSL_SRCDIR)/opt_minmax.cpp \
-	$(GLSL_SRCDIR)/opt_noop_swizzle.cpp \
-	$(GLSL_SRCDIR)/opt_rebalance_tree.cpp \
-	$(GLSL_SRCDIR)/opt_redundant_jumps.cpp \
-	$(GLSL_SRCDIR)/opt_structure_splitting.cpp \
-	$(GLSL_SRCDIR)/opt_swizzle_swizzle.cpp \
-	$(GLSL_SRCDIR)/opt_tree_grafting.cpp \
-	$(GLSL_SRCDIR)/opt_vectorize.cpp \
-	$(GLSL_SRCDIR)/program.h \
-	$(GLSL_SRCDIR)/s_expression.cpp \
-	$(GLSL_SRCDIR)/s_expression.h
+	ast.h \
+	ast_array_index.cpp \
+	ast_expr.cpp \
+	ast_function.cpp \
+	ast_to_hir.cpp \
+	ast_type.cpp \
+	blob.c \
+	blob.h \
+	builtin_functions.cpp \
+	builtin_type_macros.h \
+	builtin_types.cpp \
+	builtin_variables.cpp \
+	glsl_parser_extras.cpp \
+	glsl_parser_extras.h \
+	glsl_symbol_table.cpp \
+	glsl_symbol_table.h \
+	glsl_types.cpp \
+	glsl_types.h \
+	hir_field_selection.cpp \
+	ir_basic_block.cpp \
+	ir_basic_block.h \
+	ir_builder.cpp \
+	ir_builder.h \
+	ir_clone.cpp \
+	ir_constant_expression.cpp \
+	ir.cpp \
+	ir.h \
+	ir_equals.cpp \
+	ir_expression_flattening.cpp \
+	ir_expression_flattening.h \
+	ir_function_can_inline.cpp \
+	ir_function_detect_recursion.cpp \
+	ir_function_inlining.h \
+	ir_function.cpp \
+	ir_hierarchical_visitor.cpp \
+	ir_hierarchical_visitor.h \
+	ir_hv_accept.cpp \
+	ir_import_prototypes.cpp \
+	ir_optimization.h \
+	ir_print_visitor.cpp \
+	ir_print_visitor.h \
+	ir_reader.cpp \
+	ir_reader.h \
+	ir_rvalue_visitor.cpp \
+	ir_rvalue_visitor.h \
+	ir_set_program_inouts.cpp \
+	ir_uniform.h \
+	ir_validate.cpp \
+	ir_variable_refcount.cpp \
+	ir_variable_refcount.h \
+	ir_visitor.h \
+	linker.cpp \
+	linker.h \
+	link_atomics.cpp \
+	link_functions.cpp \
+	link_interface_blocks.cpp \
+	link_uniforms.cpp \
+	link_uniform_initializers.cpp \
+	link_uniform_block_active_visitor.cpp \
+	link_uniform_block_active_visitor.h \
+	link_uniform_blocks.cpp \
+	link_varyings.cpp \
+	link_varyings.h \
+	list.h \
+	loop_analysis.cpp \
+	loop_analysis.h \
+	loop_controls.cpp \
+	loop_unroll.cpp \
+	lower_clip_distance.cpp \
+	lower_const_arrays_to_uniforms.cpp \
+	lower_discard.cpp \
+	lower_discard_flow.cpp \
+	lower_if_to_cond_assign.cpp \
+	lower_instructions.cpp \
+	lower_jumps.cpp \
+	lower_mat_op_to_vec.cpp \
+	lower_noise.cpp \
+	lower_offset_array.cpp \
+	lower_packed_varyings.cpp \
+	lower_named_interface_blocks.cpp \
+	lower_packing_builtins.cpp \
+	lower_texture_projection.cpp \
+	lower_variable_index_to_cond_assign.cpp \
+	lower_vec_index_to_cond_assign.cpp \
+	lower_vec_index_to_swizzle.cpp \
+	lower_vector.cpp \
+	lower_vector_insert.cpp \
+	lower_vertex_id.cpp \
+	lower_output_reads.cpp \
+	lower_ubo_reference.cpp \
+	opt_algebraic.cpp \
+	opt_array_splitting.cpp \
+	opt_constant_folding.cpp \
+	opt_constant_propagation.cpp \
+	opt_constant_variable.cpp \
+	opt_copy_propagation.cpp \
+	opt_copy_propagation_elements.cpp \
+	opt_cse.cpp \
+	opt_dead_builtin_variables.cpp \
+	opt_dead_builtin_varyings.cpp \
+	opt_dead_code.cpp \
+	opt_dead_code_local.cpp \
+	opt_dead_functions.cpp \
+	opt_flatten_nested_if_blocks.cpp \
+	opt_flip_matrices.cpp \
+	opt_function_inlining.cpp \
+	opt_if_simplification.cpp \
+	opt_minmax.cpp \
+	opt_noop_swizzle.cpp \
+	opt_rebalance_tree.cpp \
+	opt_redundant_jumps.cpp \
+	opt_structure_splitting.cpp \
+	opt_swizzle_swizzle.cpp \
+	opt_tree_grafting.cpp \
+	opt_vectorize.cpp \
+	program.h \
+	s_expression.cpp \
+	s_expression.h
 
 # glsl_compiler
 
 GLSL_COMPILER_CXX_FILES = \
-	$(GLSL_SRCDIR)/standalone_scaffolding.cpp \
-	$(GLSL_SRCDIR)/standalone_scaffolding.h \
-	$(GLSL_SRCDIR)/main.cpp
+	standalone_scaffolding.cpp \
+	standalone_scaffolding.h \
+	main.cpp
 
 # libglsl generated sources
 LIBGLSL_GENERATED_CXX_FILES = \
-	$(GLSL_BUILDDIR)/glsl_lexer.cpp \
-	$(GLSL_BUILDDIR)/glsl_parser.cpp
+	glsl_lexer.cpp \
+	glsl_parser.cpp
diff --git a/mesalib/src/glsl/README b/mesalib/src/glsl/README
index 2f93f12ff..bfcf69f90 100644
--- a/mesalib/src/glsl/README
+++ b/mesalib/src/glsl/README
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
 
 You can then use the new expression from builtins (if all backends
 would rather see it), or scan the IR and convert to use your new
-expression type (see ir_mod_to_fract, for example).
+expression type (see ir_mod_to_floor, for example).
 
 Q: How is memory management handled in the compiler?
 
diff --git a/mesalib/src/glsl/SConscript b/mesalib/src/glsl/SConscript
index 847e96246..21c8266a6 100644
--- a/mesalib/src/glsl/SConscript
+++ b/mesalib/src/glsl/SConscript
@@ -11,6 +11,8 @@ env.Prepend(CPPPATH = [
     '#src',
     '#src/mapi',
     '#src/mesa',
+    '#src/gallium/include',
+    '#src/gallium/auxiliary',
     '#src/glsl',
     '#src/glsl/glcpp',
 ])
diff --git a/mesalib/src/glsl/ast.h b/mesalib/src/glsl/ast.h
index 6995ae83b..ef74e5137 100644
--- a/mesalib/src/glsl/ast.h
+++ b/mesalib/src/glsl/ast.h
@@ -189,6 +189,7 @@ enum ast_operators {
    ast_uint_constant,
    ast_float_constant,
    ast_bool_constant,
+   ast_double_constant,
 
    ast_sequence,
    ast_aggregate
@@ -236,6 +237,7 @@ public:
       float float_constant;
       unsigned uint_constant;
       int bool_constant;
+      double double_constant;
    } primary_expression;
 
 
diff --git a/mesalib/src/glsl/ast_function.cpp b/mesalib/src/glsl/ast_function.cpp
index cbff9d8b4..918be6966 100644
--- a/mesalib/src/glsl/ast_function.cpp
+++ b/mesalib/src/glsl/ast_function.cpp
@@ -573,6 +573,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
 	 result = new(ctx) ir_expression(ir_unop_i2u,
 		  new(ctx) ir_expression(ir_unop_b2i, src));
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2u, src);
+	 break;
       }
       break;
    case GLSL_TYPE_INT:
@@ -586,6 +589,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_BOOL:
 	 result = new(ctx) ir_expression(ir_unop_b2i, src);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2i, src);
+	 break;
       }
       break;
    case GLSL_TYPE_FLOAT:
@@ -599,6 +605,9 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_BOOL:
 	 result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL);
+	 break;
       }
       break;
    case GLSL_TYPE_BOOL:
@@ -613,8 +622,27 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
       case GLSL_TYPE_FLOAT:
 	 result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+         result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL);
+         break;
       }
       break;
+   case GLSL_TYPE_DOUBLE:
+      switch (b) {
+      case GLSL_TYPE_INT:
+         result = new(ctx) ir_expression(ir_unop_i2d, src);
+         break;
+      case GLSL_TYPE_UINT:
+         result = new(ctx) ir_expression(ir_unop_u2d, src);
+         break;
+      case GLSL_TYPE_BOOL:
+         result = new(ctx) ir_expression(ir_unop_f2d,
+                  new(ctx) ir_expression(ir_unop_b2f, src));
+         break;
+      case GLSL_TYPE_FLOAT:
+         result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL);
+         break;
+      }
    }
 
    assert(result != NULL);
@@ -711,9 +739,9 @@ process_vec_mat_constructor(exec_list *instructions,
 
       /* Apply implicit conversions (not the scalar constructor rules!). See
        * the spec quote above. */
-      if (constructor_type->is_float()) {
+      if (constructor_type->base_type != result->type->base_type) {
          const glsl_type *desired_type =
-            glsl_type::get_instance(GLSL_TYPE_FLOAT,
+            glsl_type::get_instance(constructor_type->base_type,
                                     ir->type->vector_elements,
                                     ir->type->matrix_columns);
          if (result->type->can_implicitly_convert_to(desired_type, state)) {
@@ -847,13 +875,17 @@ process_array_constructor(exec_list *instructions,
    foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) {
       ir_rvalue *result = ir;
 
+      const glsl_base_type element_base_type =
+         constructor_type->element_type()->base_type;
+
       /* Apply implicit conversions (not the scalar constructor rules!). See
        * the spec quote above. */
-      if (constructor_type->element_type()->is_float()) {
-	 const glsl_type *desired_type =
-	    glsl_type::get_instance(GLSL_TYPE_FLOAT,
-				    ir->type->vector_elements,
-				    ir->type->matrix_columns);
+      if (element_base_type != result->type->base_type) {
+         const glsl_type *desired_type =
+            glsl_type::get_instance(element_base_type,
+                                    ir->type->vector_elements,
+                                    ir->type->matrix_columns);
+
 	 if (result->type->can_implicitly_convert_to(desired_type, state)) {
 	    /* Even though convert_component() implements the constructor
 	     * conversion rules (not the implicit conversion rules), its safe
@@ -1012,6 +1044,9 @@ emit_inline_vector_constructor(const glsl_type *type,
 	       case GLSL_TYPE_FLOAT:
 		  data.f[i + base_component] = c->get_float_component(i);
 		  break;
+	       case GLSL_TYPE_DOUBLE:
+		  data.d[i + base_component] = c->get_double_component(i);
+		  break;
 	       case GLSL_TYPE_BOOL:
 		  data.b[i + base_component] = c->get_bool_component(i);
 		  break;
@@ -1167,16 +1202,21 @@ emit_inline_matrix_constructor(const glsl_type *type,
       /* Assign the scalar to the X component of a vec4, and fill the remaining
        * components with zero.
        */
+      glsl_base_type param_base_type = first_param->type->base_type;
+      assert(param_base_type == GLSL_TYPE_FLOAT ||
+             param_base_type == GLSL_TYPE_DOUBLE);
       ir_variable *rhs_var =
-	 new(ctx) ir_variable(glsl_type::vec4_type, "mat_ctor_vec",
-			      ir_var_temporary);
+         new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1),
+                              "mat_ctor_vec",
+                              ir_var_temporary);
       instructions->push_tail(rhs_var);
 
       ir_constant_data zero;
-      zero.f[0] = 0.0;
-      zero.f[1] = 0.0;
-      zero.f[2] = 0.0;
-      zero.f[3] = 0.0;
+      for (unsigned i = 0; i < 4; i++)
+         if (param_base_type == GLSL_TYPE_FLOAT)
+            zero.f[i] = 0.0;
+         else
+            zero.d[i] = 0.0;
 
       ir_instruction *inst =
 	 new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var),
@@ -1524,10 +1564,10 @@ ast_function_expression::hir(exec_list *instructions,
       }
 
 
-      /* Constructors for samplers are illegal.
+      /* Constructors for opaque types are illegal.
        */
-      if (constructor_type->is_sampler()) {
-	 _mesa_glsl_error(& loc, state, "cannot construct sampler type `%s'",
+      if (constructor_type->contains_opaque()) {
+	 _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'",
 			  constructor_type->name);
 	 return ir_rvalue::error_value(ctx);
       }
diff --git a/mesalib/src/glsl/ast_to_hir.cpp b/mesalib/src/glsl/ast_to_hir.cpp
index 811a9557d..acb5c763c 100644
--- a/mesalib/src/glsl/ast_to_hir.cpp
+++ b/mesalib/src/glsl/ast_to_hir.cpp
@@ -172,6 +172,7 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from,
       switch (from->base_type) {
       case GLSL_TYPE_INT: return ir_unop_i2f;
       case GLSL_TYPE_UINT: return ir_unop_u2f;
+      case GLSL_TYPE_DOUBLE: return ir_unop_d2f;
       default: return (ir_expression_operation)0;
       }
 
@@ -183,6 +184,16 @@ get_conversion_operation(const glsl_type *to, const glsl_type *from,
          default: return (ir_expression_operation)0;
       }
 
+   case GLSL_TYPE_DOUBLE:
+      if (!state->has_double())
+         return (ir_expression_operation)0;
+      switch (from->base_type) {
+      case GLSL_TYPE_INT: return ir_unop_i2d;
+      case GLSL_TYPE_UINT: return ir_unop_u2d;
+      case GLSL_TYPE_FLOAT: return ir_unop_f2d;
+      default: return (ir_expression_operation)0;
+      }
+
    default: return (ir_expression_operation)0;
    }
 }
@@ -340,8 +351,10 @@ arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
     * type of both operands must be float.
     */
    assert(type_a->is_matrix() || type_b->is_matrix());
-   assert(type_a->base_type == GLSL_TYPE_FLOAT);
-   assert(type_b->base_type == GLSL_TYPE_FLOAT);
+   assert(type_a->base_type == GLSL_TYPE_FLOAT ||
+          type_a->base_type == GLSL_TYPE_DOUBLE);
+   assert(type_b->base_type == GLSL_TYPE_FLOAT ||
+          type_b->base_type == GLSL_TYPE_DOUBLE);
 
    /*   "* The operator is add (+), subtract (-), or divide (/), and the
     *      operands are matrices with the same number of rows and the same
@@ -959,6 +972,7 @@ do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
       return new(mem_ctx) ir_expression(operation, op0, op1);
 
    case GLSL_TYPE_ARRAY: {
@@ -1597,13 +1611,11 @@ ast_expression::do_hir(exec_list *instructions,
       }
 
       ir_constant *cond_val = op[0]->constant_expression_value();
-      ir_constant *then_val = op[1]->constant_expression_value();
-      ir_constant *else_val = op[2]->constant_expression_value();
 
       if (then_instructions.is_empty()
           && else_instructions.is_empty()
-          && (cond_val != NULL) && (then_val != NULL) && (else_val != NULL)) {
-         result = (cond_val->value.b[0]) ? then_val : else_val;
+          && cond_val != NULL) {
+         result = cond_val->value.b[0] ? op[1] : op[2];
       } else {
          ir_variable *const tmp =
             new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary);
@@ -1748,6 +1760,10 @@ ast_expression::do_hir(exec_list *instructions,
       result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant));
       break;
 
+   case ast_double_constant:
+      result = new(ctx) ir_constant(this->primary_expression.double_constant);
+      break;
+
    case ast_sequence: {
       /* It should not be possible to generate a sequence in the AST without
        * any expressions in it.
@@ -2562,6 +2578,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
          _mesa_glsl_error(loc, state,
                           "varying variables may not be of type struct");
          break;
+      case GLSL_TYPE_DOUBLE:
+         break;
       default:
          _mesa_glsl_error(loc, state, "illegal type for a varying variable");
          break;
@@ -3603,6 +3621,51 @@ ast_declarator_list::hir(exec_list *instructions,
 
             handle_geometry_shader_input_decl(state, loc, var);
          }
+      } else if (var->data.mode == ir_var_shader_out) {
+         const glsl_type *check_type = var->type->without_array();
+
+         /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+          *
+          *     It is a compile-time error to declare a vertex, tessellation
+          *     evaluation, tessellation control, or geometry shader output
+          *     that contains any of the following:
+          *
+          *     * A Boolean type (bool, bvec2 ...)
+          *     * An opaque type
+          */
+         if (check_type->is_boolean() || check_type->contains_opaque())
+            _mesa_glsl_error(&loc, state,
+                             "%s shader output cannot have type %s",
+                             _mesa_shader_stage_to_string(state->stage),
+                             check_type->name);
+
+         /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec:
+          *
+          *     It is a compile-time error to declare a fragment shader output
+          *     that contains any of the following:
+          *
+          *     * A Boolean type (bool, bvec2 ...)
+          *     * A double-precision scalar or vector (double, dvec2 ...)
+          *     * An opaque type
+          *     * Any matrix type
+          *     * A structure
+          */
+         if (state->stage == MESA_SHADER_FRAGMENT) {
+            if (check_type->is_record() || check_type->is_matrix())
+               _mesa_glsl_error(&loc, state,
+                                "fragment shader output "
+                                "cannot have struct or array type");
+            switch (check_type->base_type) {
+            case GLSL_TYPE_UINT:
+            case GLSL_TYPE_INT:
+            case GLSL_TYPE_FLOAT:
+               break;
+            default:
+               _mesa_glsl_error(&loc, state,
+                                "fragment shader output cannot have "
+                                "type %s", check_type->name);
+            }
+         }
       }
 
       /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
@@ -3647,6 +3710,15 @@ ast_declarator_list::hir(exec_list *instructions,
                           var_type);
       }
 
+      /* Double fragment inputs must be qualified with 'flat'. */
+      if (var->type->contains_double() &&
+          var->data.interpolation != INTERP_QUALIFIER_FLAT &&
+          state->stage == MESA_SHADER_FRAGMENT &&
+          var->data.mode == ir_var_shader_in) {
+         _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) "
+                          "a double, then it must be qualified with 'flat'",
+                          var_type);
+      }
 
       /* Interpolation qualifiers cannot be applied to 'centroid' and
        * 'centroid varying'.
@@ -4133,6 +4205,27 @@ ast_function::hir(exec_list *instructions,
       emit_function(state, f);
    }
 
+   /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71:
+    *
+    * "A shader cannot redefine or overload built-in functions."
+    *
+    * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions":
+    *
+    * "User code can overload the built-in functions but cannot redefine
+    * them."
+    */
+   if (state->es_shader && state->language_version >= 300) {
+      /* Local shader has no exact candidates; check the built-ins. */
+      _mesa_glsl_initialize_builtin_functions();
+      if (_mesa_glsl_find_builtin_function_by_name(state, name)) {
+         YYLTYPE loc = this->get_location();
+         _mesa_glsl_error(& loc, state,
+                          "A shader cannot redefine or overload built-in "
+                          "function `%s' in GLSL ES 3.00", name);
+         return NULL;
+      }
+   }
+
    /* Verify that this function's signature either doesn't match a previously
     * seen signature for a function with the same name, or, if a match is found,
     * that the previously seen signature does not have an associated definition.
@@ -5203,6 +5296,13 @@ ast_process_structure_or_interface_block(exec_list *instructions,
                              "members");
          }
 
+         if (qual->flags.q.constant) {
+            YYLTYPE loc = decl_list->get_location();
+            _mesa_glsl_error(&loc, state,
+                             "const storage qualifier cannot be applied "
+                             "to struct or interface block members");
+         }
+
          field_type = process_array_type(&loc, decl_type,
                                          decl->array_specifier, state);
          fields[i].type = field_type;
@@ -5383,6 +5483,14 @@ ast_interface_block::hir(exec_list *instructions,
 {
    YYLTYPE loc = this->get_location();
 
+   /* Interface blocks must be declared at global scope */
+   if (state->current_function != NULL) {
+      _mesa_glsl_error(&loc, state,
+                       "Interface block `%s' must be declared "
+                       "at global scope",
+                       this->block_name);
+   }
+
    /* The ast_interface_block has a list of ast_declarator_lists.  We
     * need to turn those into ir_variables with an association
     * with this uniform block.
@@ -5443,9 +5551,23 @@ ast_interface_block::hir(exec_list *instructions,
 
    state->struct_specifier_depth--;
 
-   if (!redeclaring_per_vertex)
+   if (!redeclaring_per_vertex) {
       validate_identifier(this->block_name, loc, state);
 
+      /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec:
+       *
+       *     "Block names have no other use within a shader beyond interface
+       *     matching; it is a compile-time error to use a block name at global
+       *     scope for anything other than as a block name."
+       */
+      ir_variable *var = state->symbols->get_variable(this->block_name);
+      if (var && !var->type->is_interface()) {
+         _mesa_glsl_error(&loc, state, "Block name `%s' is "
+                          "already used in the scope.",
+                          this->block_name);
+      }
+   }
+
    const glsl_type *earlier_per_vertex = NULL;
    if (redeclaring_per_vertex) {
       /* Find the previous declaration of gl_PerVertex.  If we're redeclaring
diff --git a/mesalib/src/glsl/blob.c b/mesalib/src/glsl/blob.c
new file mode 100644
index 000000000..dd4341be9
--- /dev/null
+++ b/mesalib/src/glsl/blob.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "main/macros.h"
+#include "util/ralloc.h"
+#include "blob.h"
+
+#define BLOB_INITIAL_SIZE 4096
+
+/* Ensure that \blob will be able to fit an additional object of size
+ * \additional.  The growing (if any) will occur by doubling the existing
+ * allocation.
+ */
+static bool
+grow_to_fit(struct blob *blob, size_t additional)
+{
+   size_t to_allocate;
+   uint8_t *new_data;
+
+   if (blob->size + additional <= blob->allocated)
+      return true;
+
+   if (blob->allocated == 0)
+      to_allocate = BLOB_INITIAL_SIZE;
+   else
+      to_allocate = blob->allocated * 2;
+
+   to_allocate = MAX2(to_allocate, blob->allocated + additional);
+
+   new_data = reralloc_size(blob, blob->data, to_allocate);
+   if (new_data == NULL)
+      return false;
+
+   blob->data = new_data;
+   blob->allocated = to_allocate;
+
+   return true;
+}
+
+/* Align the blob->size so that reading or writing a value at (blob->data +
+ * blob->size) will result in an access aligned to a granularity of \alignment
+ * bytes.
+ *
+ * \return True unless allocation fails
+ */
+static bool
+align_blob(struct blob *blob, size_t alignment)
+{
+   const size_t new_size = ALIGN(blob->size, alignment);
+
+   if (! grow_to_fit (blob, new_size - blob->size))
+      return false;
+
+   blob->size = new_size;
+
+   return true;
+}
+
+static void
+align_blob_reader(struct blob_reader *blob, size_t alignment)
+{
+   blob->current = blob->data + ALIGN(blob->current - blob->data, alignment);
+}
+
+struct blob *
+blob_create(void *mem_ctx)
+{
+   struct blob *blob;
+
+   blob = ralloc(mem_ctx, struct blob);
+   if (blob == NULL)
+      return NULL;
+
+   blob->data = NULL;
+   blob->allocated = 0;
+   blob->size = 0;
+
+   return blob;
+}
+
+bool
+blob_overwrite_bytes(struct blob *blob,
+                     size_t offset,
+                     const void *bytes,
+                     size_t to_write)
+{
+   /* Detect an attempt to overwrite data out of bounds. */
+   if (offset < 0 || blob->size - offset < to_write)
+      return false;
+
+   memcpy(blob->data + offset, bytes, to_write);
+
+   return true;
+}
+
+bool
+blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write)
+{
+   if (! grow_to_fit(blob, to_write))
+       return false;
+
+   memcpy(blob->data + blob->size, bytes, to_write);
+   blob->size += to_write;
+
+   return true;
+}
+
+uint8_t *
+blob_reserve_bytes(struct blob *blob, size_t to_write)
+{
+   uint8_t *ret;
+
+   if (! grow_to_fit (blob, to_write))
+      return NULL;
+
+   ret = blob->data + blob->size;
+   blob->size += to_write;
+
+   return ret;
+}
+
+bool
+blob_write_uint32(struct blob *blob, uint32_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_overwrite_uint32 (struct blob *blob,
+                       size_t offset,
+                       uint32_t value)
+{
+   return blob_overwrite_bytes(blob, offset, &value, sizeof(value));
+}
+
+bool
+blob_write_uint64(struct blob *blob, uint64_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_write_intptr(struct blob *blob, intptr_t value)
+{
+   align_blob(blob, sizeof(value));
+
+   return blob_write_bytes(blob, &value, sizeof(value));
+}
+
+bool
+blob_write_string(struct blob *blob, const char *str)
+{
+   return blob_write_bytes(blob, str, strlen(str) + 1);
+}
+
+void
+blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size)
+{
+   blob->data = data;
+   blob->end = data + size;
+   blob->current = data;
+   blob->overrun = false;
+}
+
+/* Check that an object of size \size can be read from this blob.
+ *
+ * If not, set blob->overrun to indicate that we attempted to read too far.
+ */
+static bool
+ensure_can_read(struct blob_reader *blob, size_t size)
+{
+   if (blob->current < blob->end && blob->end - blob->current >= size)
+      return true;
+
+   blob->overrun = true;
+
+   return false;
+}
+
+void *
+blob_read_bytes(struct blob_reader *blob, size_t size)
+{
+   void *ret;
+
+   if (! ensure_can_read (blob, size))
+      return NULL;
+
+   ret = blob->current;
+
+   blob->current += size;
+
+   return ret;
+}
+
+void
+blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size)
+{
+   uint8_t *bytes;
+
+   bytes = blob_read_bytes(blob, size);
+   if (bytes == NULL)
+      return;
+
+   memcpy(dest, bytes, size);
+}
+
+/* These next three read functions have identical form. If we add any beyond
+ * these first three we should probably switch to generating these with a
+ * preprocessor macro.
+*/
+uint32_t
+blob_read_uint32(struct blob_reader *blob)
+{
+   uint32_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((uint32_t*) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+uint64_t
+blob_read_uint64(struct blob_reader *blob)
+{
+   uint64_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((uint64_t*) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+intptr_t
+blob_read_intptr(struct blob_reader *blob)
+{
+   intptr_t ret;
+   int size = sizeof(ret);
+
+   align_blob_reader(blob, size);
+
+   if (! ensure_can_read(blob, size))
+      return 0;
+
+   ret = *((intptr_t *) blob->current);
+
+   blob->current += size;
+
+   return ret;
+}
+
+char *
+blob_read_string(struct blob_reader *blob)
+{
+   int size;
+   char *ret;
+   uint8_t *nul;
+
+   /* If we're already at the end, then this is an overrun. */
+   if (blob->current >= blob->end) {
+      blob->overrun = true;
+      return NULL;
+   }
+
+   /* Similarly, if there is no zero byte in the data remaining in this blob,
+    * we also consider that an overrun.
+    */
+   nul = memchr(blob->current, 0, blob->end - blob->current);
+
+   if (nul == NULL) {
+      blob->overrun = true;
+      return NULL;
+   }
+
+   size = nul - blob->current + 1;
+
+   assert(ensure_can_read(blob, size));
+
+   ret = (char *) blob->current;
+
+   blob->current += size;
+
+   return ret;
+}
diff --git a/mesalib/src/glsl/blob.h b/mesalib/src/glsl/blob.h
new file mode 100644
index 000000000..ec903ec14
--- /dev/null
+++ b/mesalib/src/glsl/blob.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef BLOB_H
+#define BLOB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/* The blob functions implement a simple, low-level API for serializing and
+ * deserializing.
+ *
+ * All objects written to a blob will be serialized directly, (without any
+ * additional meta-data to describe the data written). Therefore, it is the
+ * caller's responsibility to ensure that any data can be read later, (either
+ * by knowing exactly what data is expected, or by writing to the blob
+ * sufficient meta-data to describe what has been written).
+ *
+ * A blob is efficient in that it dynamically grows by doubling in size, so
+ * allocation costs are logarithmic.
+ */
+
+struct blob {
+   /* The data actually written to the blob. */
+   uint8_t *data;
+
+   /** Number of bytes that have been allocated for \c data. */
+   size_t allocated;
+
+   /** The number of bytes that have actual data written to them. */
+   size_t size;
+};
+
+/* When done reading, the caller can ensure that everything was consumed by
+ * checking the following:
+ *
+ *   1. blob->current should be equal to blob->end, (if not, too little was
+ *      read).
+ *
+ *   2. blob->overrun should be false, (otherwise, too much was read).
+ */
+struct blob_reader {
+   uint8_t *data;
+   uint8_t *end;
+   uint8_t *current;
+   bool overrun;
+};
+
+/**
+ * Create a new, empty blob, belonging to \mem_ctx.
+ *
+ * \return The new blob, (or NULL in case of allocation failure).
+ */
+struct blob *
+blob_create (void *mem_ctx);
+
+/**
+ * Add some unstructured, fixed-size data to a blob.
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write);
+
+/**
+ * Reserve space in \blob for a number of bytes.
+ *
+ * Space will be allocated within the blob for these byes, but the bytes will
+ * be left uninitialized. The caller is expected to use the return value to
+ * write directly (and immediately) to these bytes.
+ *
+ * \note The return value is valid immediately upon return, but can be
+ * invalidated by any other call to a blob function. So the caller should call
+ * blob_reserve_byes immediately before writing through the returned pointer.
+ *
+ * This function is intended to be used when interfacing with an existing API
+ * that is not aware of the blob API, (so that blob_write_bytes cannot be
+ * called).
+ *
+ * \return A pointer to space allocated within \blob to which \to_write bytes
+ * can be written, (or NULL in case of any allocation error).
+ */
+uint8_t *
+blob_reserve_bytes (struct blob *blob, size_t to_write);
+
+/**
+ * Overwrite some data previously written to the blob.
+ *
+ * Writes data to an existing portion of the blob at an offset of \offset.
+ * This data range must have previously been written to the blob by one of the
+ * blob_write_* calls.
+ *
+ * For example usage, see blob_overwrite_uint32
+ *
+ * \return True unless the requested offset or offset+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_bytes (struct blob *blob,
+                      size_t offset,
+                      const void *bytes,
+                      size_t to_write);
+
+/**
+ * Add a uint32_t to a blob.
+ *
+ * \note This function will only write to a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint32 (struct blob *blob, uint32_t value);
+
+/**
+ * Overwrite a uint32_t previously written to the blob.
+ *
+ * Writes a uint32_t value to an existing portion of the blob at an offset of
+ * \offset.  This data range must have previously been written to the blob by
+ * one of the blob_write_* calls.
+ *
+ *
+ * The expected usage is something like the following pattern:
+ *
+ *	size_t offset;
+ *
+ *	offset = blob->size;
+ *	blob_write_uint32 (blob, 0); // placeholder
+ *	... various blob write calls, writing N items ...
+ *	blob_overwrite_uint32 (blob, offset, N);
+ *
+ * \return True unless the requested position or position+to_write lie outside
+ * the current blob's size.
+ */
+bool
+blob_overwrite_uint32 (struct blob *blob,
+                       size_t offset,
+                       uint32_t value);
+
+/**
+ * Add a uint64_t to a blob.
+ *
+ * \note This function will only write to a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_uint64 (struct blob *blob, uint64_t value);
+
+/**
+ * Add an intptr_t to a blob.
+ *
+ * \note This function will only write to an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be added to the
+ * blob if this write follows some unaligned write (such as
+ * blob_write_string).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_intptr (struct blob *blob, intptr_t value);
+
+/**
+ * Add a NULL-terminated string to a blob, (including the NULL terminator).
+ *
+ * \return True unless allocation failed.
+ */
+bool
+blob_write_string (struct blob *blob, const char *str);
+
+/**
+ * Start reading a blob, (initializing the contents of \blob for reading).
+ *
+ * After this call, the caller can use the various blob_read_* functions to
+ * read elements from the data array.
+ *
+ * For all of the blob_read_* functions, if there is insufficient data
+ * remaining, the functions will do nothing, (perhaps returning default values
+ * such as 0). The caller can detect this by noting that the blob_reader's
+ * current value is unchanged before and after the call.
+ */
+void
+blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, (and
+ * update the current location to just past this data).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the data in order to use it after the lifetime of the data
+ * underlying the blob reader.
+ *
+ * \return The bytes read (see note above about memory lifetime).
+ */
+void *
+blob_read_bytes (struct blob_reader *blob, size_t size);
+
+/**
+ * Read some unstructured, fixed-size data from the current location, copying
+ * it to \dest (and update the current location to just past this data)
+ */
+void
+blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size);
+
+/**
+ * Read a uint32_t from the current location, (and update the current location
+ * to just past this uint32_t).
+ *
+ * \note This function will only read from a uint32_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint32_t read
+ */
+uint32_t
+blob_read_uint32 (struct blob_reader *blob);
+
+/**
+ * Read a uint64_t from the current location, (and update the current location
+ * to just past this uint64_t).
+ *
+ * \note This function will only read from a uint64_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The uint64_t read
+ */
+uint64_t
+blob_read_uint64 (struct blob_reader *blob);
+
+/**
+ * Read an intptr_t value from the current location, (and update the
+ * current location to just past this intptr_t).
+ *
+ * \note This function will only read from an intptr_t-aligned offset from the
+ * beginning of the blob's data, so some padding bytes may be skipped.
+ *
+ * \return The intptr_t read
+ */
+intptr_t
+blob_read_intptr (struct blob_reader *blob);
+
+/**
+ * Read a NULL-terminated string from the current location, (and update the
+ * current location to just past this string).
+ *
+ * \note The memory returned belongs to the data underlying the blob reader. The
+ * caller must copy the string in order to use the string after the lifetime
+ * of the data underlying the blob reader.
+ *
+ * \return The string read (see note above about memory lifetime). However, if
+ * there is no NULL byte remaining within the blob, this function returns
+ * NULL.
+ */
+char *
+blob_read_string (struct blob_reader *blob);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* BLOB_H */
diff --git a/mesalib/src/glsl/builtin_functions.cpp b/mesalib/src/glsl/builtin_functions.cpp
index bb7fbcdc1..b643927d0 100644
--- a/mesalib/src/glsl/builtin_functions.cpp
+++ b/mesalib/src/glsl/builtin_functions.cpp
@@ -381,6 +381,12 @@ gs_streams(const _mesa_glsl_parse_state *state)
    return gpu_shader5(state) && gs_only(state);
 }
 
+static bool
+fp64(const _mesa_glsl_parse_state *state)
+{
+   return state->has_double();
+}
+
 /** @} */
 
 /******************************************************************************/
@@ -436,6 +442,7 @@ private:
    ir_constant *imm(float f, unsigned vector_elements=1);
    ir_constant *imm(int i, unsigned vector_elements=1);
    ir_constant *imm(unsigned u, unsigned vector_elements=1);
+   ir_constant *imm(double d, unsigned vector_elements=1);
    ir_constant *imm(const glsl_type *type, const ir_constant_data &);
    ir_dereference_variable *var_ref(ir_variable *var);
    ir_dereference_array *array_ref(ir_variable *var, int i);
@@ -526,29 +533,29 @@ private:
    B1(log)
    B1(exp2)
    B1(log2)
-   B1(sqrt)
-   B1(inversesqrt)
-   B1(abs)
-   B1(sign)
-   B1(floor)
-   B1(trunc)
-   B1(round)
-   B1(roundEven)
-   B1(ceil)
-   B1(fract)
+   BA1(sqrt)
+   BA1(inversesqrt)
+   BA1(abs)
+   BA1(sign)
+   BA1(floor)
+   BA1(trunc)
+   BA1(round)
+   BA1(roundEven)
+   BA1(ceil)
+   BA1(fract)
    B2(mod)
-   B1(modf)
+   BA1(modf)
    BA2(min)
    BA2(max)
    BA2(clamp)
-   B2(mix_lrp)
+   BA2(mix_lrp)
    ir_function_signature *_mix_sel(builtin_available_predicate avail,
                                    const glsl_type *val_type,
                                    const glsl_type *blend_type);
-   B2(step)
-   B2(smoothstep)
-   B1(isnan)
-   B1(isinf)
+   BA2(step)
+   BA2(smoothstep)
+   BA1(isnan)
+   BA1(isinf)
    B1(floatBitsToInt)
    B1(floatBitsToUint)
    B1(intBitsToFloat)
@@ -563,24 +570,27 @@ private:
    ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail);
    ir_function_signature *_packHalf2x16(builtin_available_predicate avail);
    ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail);
-   B1(length)
-   B1(distance);
-   B1(dot);
-   B1(cross);
-   B1(normalize);
+   ir_function_signature *_packDouble2x32(builtin_available_predicate avail);
+   ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail);
+
+   BA1(length)
+   BA1(distance);
+   BA1(dot);
+   BA1(cross);
+   BA1(normalize);
    B0(ftransform);
-   B1(faceforward);
-   B1(reflect);
-   B1(refract);
-   B1(matrixCompMult);
-   B1(outerProduct);
-   B0(determinant_mat2);
-   B0(determinant_mat3);
-   B0(determinant_mat4);
-   B0(inverse_mat2);
-   B0(inverse_mat3);
-   B0(inverse_mat4);
-   B1(transpose);
+   BA1(faceforward);
+   BA1(reflect);
+   BA1(refract);
+   BA1(matrixCompMult);
+   BA1(outerProduct);
+   BA1(determinant_mat2);
+   BA1(determinant_mat3);
+   BA1(determinant_mat4);
+   BA1(inverse_mat2);
+   BA1(inverse_mat3);
+   BA1(inverse_mat4);
+   BA1(transpose);
    BA1(lessThan);
    BA1(lessThanEqual);
    BA1(greaterThan);
@@ -644,9 +654,10 @@ private:
    B1(bitCount)
    B1(findLSB)
    B1(findMSB)
-   B1(fma)
+   BA1(fma)
    B2(ldexp)
    B2(frexp)
+   B2(dfrexp)
    B1(uaddCarry)
    B1(usubBorrow)
    B1(mulExtended)
@@ -815,6 +826,42 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::vec4_type),  \
                 NULL);
 
+#define FD(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(always_available, glsl_type::float_type), \
+                _##NAME(always_available, glsl_type::vec2_type),  \
+                _##NAME(always_available, glsl_type::vec3_type),  \
+                _##NAME(always_available, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
+#define FD130(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(v130, glsl_type::float_type), \
+                _##NAME(v130, glsl_type::vec2_type),  \
+                _##NAME(v130, glsl_type::vec3_type),                  \
+                _##NAME(v130, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
+#define FDGS5(NAME)                                 \
+   add_function(#NAME,                          \
+                _##NAME(gpu_shader5, glsl_type::float_type), \
+                _##NAME(gpu_shader5, glsl_type::vec2_type),  \
+                _##NAME(gpu_shader5, glsl_type::vec3_type),                  \
+                _##NAME(gpu_shader5, glsl_type::vec4_type),  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type),    \
+                _##NAME(fp64, glsl_type::dvec3_type),     \
+                _##NAME(fp64, glsl_type::dvec4_type),      \
+                NULL);
+
 #define FI(NAME)                                \
    add_function(#NAME,                          \
                 _##NAME(glsl_type::float_type), \
@@ -827,7 +874,23 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::ivec4_type), \
                 NULL);
 
-#define FIU(NAME)                                                 \
+#define FID(NAME)                                \
+   add_function(#NAME,                          \
+                _##NAME(always_available, glsl_type::float_type), \
+                _##NAME(always_available, glsl_type::vec2_type),  \
+                _##NAME(always_available, glsl_type::vec3_type),  \
+                _##NAME(always_available, glsl_type::vec4_type),  \
+                _##NAME(always_available, glsl_type::int_type),   \
+                _##NAME(always_available, glsl_type::ivec2_type), \
+                _##NAME(always_available, glsl_type::ivec3_type), \
+                _##NAME(always_available, glsl_type::ivec4_type), \
+                _##NAME(fp64, glsl_type::double_type), \
+                _##NAME(fp64, glsl_type::dvec2_type),  \
+                _##NAME(fp64, glsl_type::dvec3_type),  \
+                _##NAME(fp64, glsl_type::dvec4_type),  \
+                NULL);
+
+#define FIUD(NAME)                                                 \
    add_function(#NAME,                                            \
                 _##NAME(always_available, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type),  \
@@ -843,6 +906,10 @@ builtin_builder::create_builtins()
                 _##NAME(v130, glsl_type::uvec2_type),             \
                 _##NAME(v130, glsl_type::uvec3_type),             \
                 _##NAME(v130, glsl_type::uvec4_type),             \
+                _##NAME(fp64, glsl_type::double_type), \
+                _##NAME(fp64, glsl_type::dvec2_type),  \
+                _##NAME(fp64, glsl_type::dvec3_type),  \
+                _##NAME(fp64, glsl_type::dvec4_type),  \
                 NULL);
 
 #define IU(NAME)                                \
@@ -858,7 +925,7 @@ builtin_builder::create_builtins()
                 _##NAME(glsl_type::uvec4_type), \
                 NULL);
 
-#define FIUB(NAME)                                                \
+#define FIUBD(NAME)                                                \
    add_function(#NAME,                                            \
                 _##NAME(always_available, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type),  \
@@ -879,9 +946,14 @@ builtin_builder::create_builtins()
                 _##NAME(always_available, glsl_type::bvec2_type), \
                 _##NAME(always_available, glsl_type::bvec3_type), \
                 _##NAME(always_available, glsl_type::bvec4_type), \
+                                                                  \
+                _##NAME(fp64, glsl_type::double_type),  \
+                _##NAME(fp64, glsl_type::dvec2_type), \
+                _##NAME(fp64, glsl_type::dvec3_type), \
+                _##NAME(fp64, glsl_type::dvec4_type), \
                 NULL);
 
-#define FIU2_MIXED(NAME)                                                                 \
+#define FIUD2_MIXED(NAME)                                                                 \
    add_function(#NAME,                                                                   \
                 _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \
                 _##NAME(always_available, glsl_type::vec2_type,  glsl_type::float_type), \
@@ -909,6 +981,14 @@ builtin_builder::create_builtins()
                 _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type),             \
                 _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type),             \
                 _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type),             \
+                                                                                         \
+                _##NAME(fp64, glsl_type::double_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type),           \
+                _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),           \
+                _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),           \
+                _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),           \
                 NULL);
 
    F(radians)
@@ -941,16 +1021,16 @@ builtin_builder::create_builtins()
    F(log)
    F(exp2)
    F(log2)
-   F(sqrt)
-   F(inversesqrt)
-   FI(abs)
-   FI(sign)
-   F(floor)
-   F(trunc)
-   F(round)
-   F(roundEven)
-   F(ceil)
-   F(fract)
+   FD(sqrt)
+   FD(inversesqrt)
+   FID(abs)
+   FID(sign)
+   FD(floor)
+   FD(trunc)
+   FD(round)
+   FD(roundEven)
+   FD(ceil)
+   FD(fract)
 
    add_function("mod",
                 _mod(glsl_type::float_type, glsl_type::float_type),
@@ -961,29 +1041,52 @@ builtin_builder::create_builtins()
                 _mod(glsl_type::vec2_type,  glsl_type::vec2_type),
                 _mod(glsl_type::vec3_type,  glsl_type::vec3_type),
                 _mod(glsl_type::vec4_type,  glsl_type::vec4_type),
+
+                _mod(glsl_type::double_type, glsl_type::double_type),
+                _mod(glsl_type::dvec2_type,  glsl_type::double_type),
+                _mod(glsl_type::dvec3_type,  glsl_type::double_type),
+                _mod(glsl_type::dvec4_type,  glsl_type::double_type),
+
+                _mod(glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _mod(glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _mod(glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
 
-   F(modf)
+   FD(modf)
 
-   FIU2_MIXED(min)
-   FIU2_MIXED(max)
-   FIU2_MIXED(clamp)
+   FIUD2_MIXED(min)
+   FIUD2_MIXED(max)
+   FIUD2_MIXED(clamp)
 
    add_function("mix",
-                _mix_lrp(glsl_type::float_type, glsl_type::float_type),
-                _mix_lrp(glsl_type::vec2_type,  glsl_type::float_type),
-                _mix_lrp(glsl_type::vec3_type,  glsl_type::float_type),
-                _mix_lrp(glsl_type::vec4_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec2_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec3_type,  glsl_type::float_type),
+                _mix_lrp(always_available, glsl_type::vec4_type,  glsl_type::float_type),
+
+                _mix_lrp(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _mix_lrp(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _mix_lrp(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
 
-                _mix_lrp(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _mix_lrp(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _mix_lrp(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec2_type,  glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec3_type,  glsl_type::double_type),
+                _mix_lrp(fp64, glsl_type::dvec4_type,  glsl_type::double_type),
+
+                _mix_lrp(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _mix_lrp(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _mix_lrp(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
 
                 _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type),
                 _mix_sel(v130, glsl_type::vec2_type,  glsl_type::bvec2_type),
                 _mix_sel(v130, glsl_type::vec3_type,  glsl_type::bvec3_type),
                 _mix_sel(v130, glsl_type::vec4_type,  glsl_type::bvec4_type),
 
+                _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type),
+                _mix_sel(fp64, glsl_type::dvec2_type,  glsl_type::bvec2_type),
+                _mix_sel(fp64, glsl_type::dvec3_type,  glsl_type::bvec3_type),
+                _mix_sel(fp64, glsl_type::dvec4_type,  glsl_type::bvec4_type),
+
                 _mix_sel(shader_integer_mix, glsl_type::int_type,   glsl_type::bool_type),
                 _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type),
                 _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type),
@@ -1001,29 +1104,45 @@ builtin_builder::create_builtins()
                 NULL);
 
    add_function("step",
-                _step(glsl_type::float_type, glsl_type::float_type),
-                _step(glsl_type::float_type, glsl_type::vec2_type),
-                _step(glsl_type::float_type, glsl_type::vec3_type),
-                _step(glsl_type::float_type, glsl_type::vec4_type),
-
-                _step(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _step(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _step(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _step(always_available, glsl_type::float_type, glsl_type::float_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec2_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec3_type),
+                _step(always_available, glsl_type::float_type, glsl_type::vec4_type),
+
+                _step(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _step(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _step(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
+                _step(fp64, glsl_type::double_type, glsl_type::double_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec2_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec3_type),
+                _step(fp64, glsl_type::double_type, glsl_type::dvec4_type),
+
+                _step(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _step(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _step(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
 
    add_function("smoothstep",
-                _smoothstep(glsl_type::float_type, glsl_type::float_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec2_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec3_type),
-                _smoothstep(glsl_type::float_type, glsl_type::vec4_type),
-
-                _smoothstep(glsl_type::vec2_type,  glsl_type::vec2_type),
-                _smoothstep(glsl_type::vec3_type,  glsl_type::vec3_type),
-                _smoothstep(glsl_type::vec4_type,  glsl_type::vec4_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type),
+                _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type),
+
+                _smoothstep(always_available, glsl_type::vec2_type,  glsl_type::vec2_type),
+                _smoothstep(always_available, glsl_type::vec3_type,  glsl_type::vec3_type),
+                _smoothstep(always_available, glsl_type::vec4_type,  glsl_type::vec4_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type),
+                _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type),
+
+                _smoothstep(fp64, glsl_type::dvec2_type,  glsl_type::dvec2_type),
+                _smoothstep(fp64, glsl_type::dvec3_type,  glsl_type::dvec3_type),
+                _smoothstep(fp64, glsl_type::dvec4_type,  glsl_type::dvec4_type),
                 NULL);
  
-   F(isnan)
-   F(isinf)
+   FD130(isnan)
+   FD130(isinf)
 
    F(floatBitsToInt)
    F(floatBitsToUint)
@@ -1050,68 +1169,106 @@ builtin_builder::create_builtins()
    add_function("unpackSnorm4x8",  _unpackSnorm4x8(shader_packing_or_gpu_shader5),         NULL);
    add_function("packHalf2x16",    _packHalf2x16(shader_packing_or_es3),                   NULL);
    add_function("unpackHalf2x16",  _unpackHalf2x16(shader_packing_or_es3),                 NULL);
+   add_function("packDouble2x32",    _packDouble2x32(fp64),                   NULL);
+   add_function("unpackDouble2x32",  _unpackDouble2x32(fp64),                 NULL);
 
-   F(length)
-   F(distance)
-   F(dot)
 
-   add_function("cross", _cross(glsl_type::vec3_type), NULL);
+   FD(length)
+   FD(distance)
+   FD(dot)
 
-   F(normalize)
+   add_function("cross", _cross(always_available, glsl_type::vec3_type), 
+                _cross(fp64, glsl_type::dvec3_type), NULL);
+
+   FD(normalize)
    add_function("ftransform", _ftransform(), NULL);
-   F(faceforward)
-   F(reflect)
-   F(refract)
+   FD(faceforward)
+   FD(reflect)
+   FD(refract)
    // ...
    add_function("matrixCompMult",
-                _matrixCompMult(glsl_type::mat2_type),
-                _matrixCompMult(glsl_type::mat3_type),
-                _matrixCompMult(glsl_type::mat4_type),
-                _matrixCompMult(glsl_type::mat2x3_type),
-                _matrixCompMult(glsl_type::mat2x4_type),
-                _matrixCompMult(glsl_type::mat3x2_type),
-                _matrixCompMult(glsl_type::mat3x4_type),
-                _matrixCompMult(glsl_type::mat4x2_type),
-                _matrixCompMult(glsl_type::mat4x3_type),
+                _matrixCompMult(always_available, glsl_type::mat2_type),
+                _matrixCompMult(always_available, glsl_type::mat3_type),
+                _matrixCompMult(always_available, glsl_type::mat4_type),
+                _matrixCompMult(always_available, glsl_type::mat2x3_type),
+                _matrixCompMult(always_available, glsl_type::mat2x4_type),
+                _matrixCompMult(always_available, glsl_type::mat3x2_type),
+                _matrixCompMult(always_available, glsl_type::mat3x4_type),
+                _matrixCompMult(always_available, glsl_type::mat4x2_type),
+                _matrixCompMult(always_available, glsl_type::mat4x3_type),
+                _matrixCompMult(fp64, glsl_type::dmat2_type),
+                _matrixCompMult(fp64, glsl_type::dmat3_type),
+                _matrixCompMult(fp64, glsl_type::dmat4_type),
+                _matrixCompMult(fp64, glsl_type::dmat2x3_type),
+                _matrixCompMult(fp64, glsl_type::dmat2x4_type),
+                _matrixCompMult(fp64, glsl_type::dmat3x2_type),
+                _matrixCompMult(fp64, glsl_type::dmat3x4_type),
+                _matrixCompMult(fp64, glsl_type::dmat4x2_type),
+                _matrixCompMult(fp64, glsl_type::dmat4x3_type),
                 NULL);
    add_function("outerProduct",
-                _outerProduct(glsl_type::mat2_type),
-                _outerProduct(glsl_type::mat3_type),
-                _outerProduct(glsl_type::mat4_type),
-                _outerProduct(glsl_type::mat2x3_type),
-                _outerProduct(glsl_type::mat2x4_type),
-                _outerProduct(glsl_type::mat3x2_type),
-                _outerProduct(glsl_type::mat3x4_type),
-                _outerProduct(glsl_type::mat4x2_type),
-                _outerProduct(glsl_type::mat4x3_type),
+                _outerProduct(v120, glsl_type::mat2_type),
+                _outerProduct(v120, glsl_type::mat3_type),
+                _outerProduct(v120, glsl_type::mat4_type),
+                _outerProduct(v120, glsl_type::mat2x3_type),
+                _outerProduct(v120, glsl_type::mat2x4_type),
+                _outerProduct(v120, glsl_type::mat3x2_type),
+                _outerProduct(v120, glsl_type::mat3x4_type),
+                _outerProduct(v120, glsl_type::mat4x2_type),
+                _outerProduct(v120, glsl_type::mat4x3_type),
+                _outerProduct(fp64, glsl_type::dmat2_type),
+                _outerProduct(fp64, glsl_type::dmat3_type),
+                _outerProduct(fp64, glsl_type::dmat4_type),
+                _outerProduct(fp64, glsl_type::dmat2x3_type),
+                _outerProduct(fp64, glsl_type::dmat2x4_type),
+                _outerProduct(fp64, glsl_type::dmat3x2_type),
+                _outerProduct(fp64, glsl_type::dmat3x4_type),
+                _outerProduct(fp64, glsl_type::dmat4x2_type),
+                _outerProduct(fp64, glsl_type::dmat4x3_type),
                 NULL);
    add_function("determinant",
-                _determinant_mat2(),
-                _determinant_mat3(),
-                _determinant_mat4(),
+                _determinant_mat2(v120, glsl_type::mat2_type),
+                _determinant_mat3(v120, glsl_type::mat3_type),
+                _determinant_mat4(v120, glsl_type::mat4_type),
+                _determinant_mat2(fp64, glsl_type::dmat2_type),
+                _determinant_mat3(fp64, glsl_type::dmat3_type),
+                _determinant_mat4(fp64, glsl_type::dmat4_type),
+
                 NULL);
    add_function("inverse",
-                _inverse_mat2(),
-                _inverse_mat3(),
-                _inverse_mat4(),
+                _inverse_mat2(v120, glsl_type::mat2_type),
+                _inverse_mat3(v120, glsl_type::mat3_type),
+                _inverse_mat4(v120, glsl_type::mat4_type),
+                _inverse_mat2(fp64, glsl_type::dmat2_type),
+                _inverse_mat3(fp64, glsl_type::dmat3_type),
+                _inverse_mat4(fp64, glsl_type::dmat4_type),
                 NULL);
    add_function("transpose",
-                _transpose(glsl_type::mat2_type),
-                _transpose(glsl_type::mat3_type),
-                _transpose(glsl_type::mat4_type),
-                _transpose(glsl_type::mat2x3_type),
-                _transpose(glsl_type::mat2x4_type),
-                _transpose(glsl_type::mat3x2_type),
-                _transpose(glsl_type::mat3x4_type),
-                _transpose(glsl_type::mat4x2_type),
-                _transpose(glsl_type::mat4x3_type),
+                _transpose(v120, glsl_type::mat2_type),
+                _transpose(v120, glsl_type::mat3_type),
+                _transpose(v120, glsl_type::mat4_type),
+                _transpose(v120, glsl_type::mat2x3_type),
+                _transpose(v120, glsl_type::mat2x4_type),
+                _transpose(v120, glsl_type::mat3x2_type),
+                _transpose(v120, glsl_type::mat3x4_type),
+                _transpose(v120, glsl_type::mat4x2_type),
+                _transpose(v120, glsl_type::mat4x3_type),
+                _transpose(fp64, glsl_type::dmat2_type),
+                _transpose(fp64, glsl_type::dmat3_type),
+                _transpose(fp64, glsl_type::dmat4_type),
+                _transpose(fp64, glsl_type::dmat2x3_type),
+                _transpose(fp64, glsl_type::dmat2x4_type),
+                _transpose(fp64, glsl_type::dmat3x2_type),
+                _transpose(fp64, glsl_type::dmat3x4_type),
+                _transpose(fp64, glsl_type::dmat4x2_type),
+                _transpose(fp64, glsl_type::dmat4x3_type),
                 NULL);
-   FIU(lessThan)
-   FIU(lessThanEqual)
-   FIU(greaterThan)
-   FIU(greaterThanEqual)
-   FIUB(notEqual)
-   FIUB(equal)
+   FIUD(lessThan)
+   FIUD(lessThanEqual)
+   FIUD(greaterThan)
+   FIUD(greaterThanEqual)
+   FIUBD(notEqual)
+   FIUBD(equal)
 
    add_function("any",
                 _any(glsl_type::bvec2_type),
@@ -2180,13 +2337,17 @@ builtin_builder::create_builtins()
    IU(bitCount)
    IU(findLSB)
    IU(findMSB)
-   F(fma)
+   FDGS5(fma)
 
    add_function("ldexp",
                 _ldexp(glsl_type::float_type, glsl_type::int_type),
                 _ldexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
                 _ldexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
                 _ldexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+                _ldexp(glsl_type::double_type, glsl_type::int_type),
+                _ldexp(glsl_type::dvec2_type,  glsl_type::ivec2_type),
+                _ldexp(glsl_type::dvec3_type,  glsl_type::ivec3_type),
+                _ldexp(glsl_type::dvec4_type,  glsl_type::ivec4_type),
                 NULL);
 
    add_function("frexp",
@@ -2194,6 +2355,10 @@ builtin_builder::create_builtins()
                 _frexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
                 _frexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
                 _frexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+                _dfrexp(glsl_type::double_type, glsl_type::int_type),
+                _dfrexp(glsl_type::dvec2_type,  glsl_type::ivec2_type),
+                _dfrexp(glsl_type::dvec3_type,  glsl_type::ivec3_type),
+                _dfrexp(glsl_type::dvec4_type,  glsl_type::ivec4_type),
                 NULL);
    add_function("uaddCarry",
                 _uaddCarry(glsl_type::uint_type),
@@ -2310,8 +2475,8 @@ builtin_builder::create_builtins()
 
 #undef F
 #undef FI
-#undef FIU
-#undef FIUB
+#undef FIUD
+#undef FIUBD
 #undef FIU2_MIXED
 }
 
@@ -2470,11 +2635,19 @@ builtin_builder::imm(unsigned u, unsigned vector_elements)
 }
 
 ir_constant *
+builtin_builder::imm(double d, unsigned vector_elements)
+{
+   return new(mem_ctx) ir_constant(d, vector_elements);
+}
+
+ir_constant *
 builtin_builder::imm(const glsl_type *type, const ir_constant_data &data)
 {
    return new(mem_ctx) ir_constant(type, &data);
 }
 
+#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val)
+
 ir_dereference_variable *
 builtin_builder::var_ref(ir_variable *var)
 {
@@ -2550,6 +2723,13 @@ builtin_builder::_##NAME(const glsl_type *type) \
    return unop(&AVAIL, OPCODE, type, type);     \
 }
 
+#define UNOPA(NAME, OPCODE)               \
+ir_function_signature *                         \
+builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \
+{                                               \
+   return unop(avail, OPCODE, type, type);     \
+}
+
 ir_function_signature *
 builtin_builder::binop(ir_expression_operation opcode,
                        builtin_available_predicate avail,
@@ -2855,19 +3035,19 @@ UNOP(exp,         ir_unop_exp,  always_available)
 UNOP(log,         ir_unop_log,  always_available)
 UNOP(exp2,        ir_unop_exp2, always_available)
 UNOP(log2,        ir_unop_log2, always_available)
-UNOP(sqrt,        ir_unop_sqrt, always_available)
-UNOP(inversesqrt, ir_unop_rsq,  always_available)
+UNOPA(sqrt,        ir_unop_sqrt)
+UNOPA(inversesqrt, ir_unop_rsq)
 
 /** @} */
 
-UNOP(abs,       ir_unop_abs,        always_available)
-UNOP(sign,      ir_unop_sign,       always_available)
-UNOP(floor,     ir_unop_floor,      always_available)
-UNOP(trunc,     ir_unop_trunc,      v130)
-UNOP(round,     ir_unop_round_even, always_available)
-UNOP(roundEven, ir_unop_round_even, always_available)
-UNOP(ceil,      ir_unop_ceil,       always_available)
-UNOP(fract,     ir_unop_fract,      always_available)
+UNOPA(abs,       ir_unop_abs)
+UNOPA(sign,      ir_unop_sign)
+UNOPA(floor,     ir_unop_floor)
+UNOPA(trunc,     ir_unop_trunc)
+UNOPA(round,     ir_unop_round_even)
+UNOPA(roundEven, ir_unop_round_even)
+UNOPA(ceil,      ir_unop_ceil)
+UNOPA(fract,     ir_unop_fract)
 
 ir_function_signature *
 builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
@@ -2876,11 +3056,11 @@ builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
 }
 
 ir_function_signature *
-builtin_builder::_modf(const glsl_type *type)
+builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
    ir_variable *i = out_var(type, "i");
-   MAKE_SIG(type, v130, 2, x, i);
+   MAKE_SIG(type, avail, 2, x, i);
 
    ir_variable *t = body.make_temp(type, "t");
    body.emit(assign(t, expr(ir_unop_trunc, x)));
@@ -2919,12 +3099,12 @@ builtin_builder::_clamp(builtin_available_predicate avail,
 }
 
 ir_function_signature *
-builtin_builder::_mix_lrp(const glsl_type *val_type, const glsl_type *blend_type)
+builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type)
 {
    ir_variable *x = in_var(val_type, "x");
    ir_variable *y = in_var(val_type, "y");
    ir_variable *a = in_var(blend_type, "a");
-   MAKE_SIG(val_type, always_available, 3, x, y, a);
+   MAKE_SIG(val_type, avail, 3, x, y, a);
 
    body.emit(ret(lrp(x, y, a)));
 
@@ -2954,26 +3134,37 @@ builtin_builder::_mix_sel(builtin_available_predicate avail,
 }
 
 ir_function_signature *
-builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type)
+builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type)
 {
    ir_variable *edge = in_var(edge_type, "edge");
    ir_variable *x = in_var(x_type, "x");
-   MAKE_SIG(x_type, always_available, 2, edge, x);
+   MAKE_SIG(x_type, avail, 2, edge, x);
 
    ir_variable *t = body.make_temp(x_type, "t");
    if (x_type->vector_elements == 1) {
       /* Both are floats */
-      body.emit(assign(t, b2f(gequal(x, edge))));
+      if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+         body.emit(assign(t, f2d(b2f(gequal(x, edge)))));
+      else
+         body.emit(assign(t, b2f(gequal(x, edge))));
    } else if (edge_type->vector_elements == 1) {
       /* x is a vector but edge is a float */
       for (int i = 0; i < x_type->vector_elements; i++) {
-         body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i));
+         if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+            body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i));
+         else
+            body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i));
       }
    } else {
       /* Both are vectors */
       for (int i = 0; i < x_type->vector_elements; i++) {
-         body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))),
-                          1 << i));
+         if (edge_type->base_type == GLSL_TYPE_DOUBLE)
+            body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))),
+                             1 << i));
+         else
+            body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))),
+                             1 << i));
+
       }
    }
    body.emit(ret(t));
@@ -2982,12 +3173,12 @@ builtin_builder::_step(const glsl_type *edge_type, const glsl_type *x_type)
 }
 
 ir_function_signature *
-builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type)
+builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type)
 {
    ir_variable *edge0 = in_var(edge_type, "edge0");
    ir_variable *edge1 = in_var(edge_type, "edge1");
    ir_variable *x = in_var(x_type, "x");
-   MAKE_SIG(x_type, always_available, 3, edge0, edge1, x);
+   MAKE_SIG(x_type, avail, 3, edge0, edge1, x);
 
    /* From the GLSL 1.10 specification:
     *
@@ -2998,18 +3189,18 @@ builtin_builder::_smoothstep(const glsl_type *edge_type, const glsl_type *x_type
 
    ir_variable *t = body.make_temp(x_type, "t");
    body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)),
-                             imm(0.0f), imm(1.0f))));
+                             IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0))));
 
-   body.emit(ret(mul(t, mul(t, sub(imm(3.0f), mul(imm(2.0f), t))))));
+   body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t))))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_isnan(const glsl_type *type)
+builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x);
+   MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x);
 
    body.emit(ret(nequal(x, x)));
 
@@ -3017,10 +3208,10 @@ builtin_builder::_isnan(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_isinf(const glsl_type *type)
+builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::bvec(type->vector_elements), v130, 1, x);
+   MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x);
 
    ir_constant_data infinities;
    for (int i = 0; i < type->vector_elements; i++) {
@@ -3160,10 +3351,28 @@ builtin_builder::_unpackHalf2x16(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_length(const glsl_type *type)
+builtin_builder::_packDouble2x32(builtin_available_predicate avail)
+{
+   ir_variable *v = in_var(glsl_type::uvec2_type, "v");
+   MAKE_SIG(glsl_type::double_type, avail, 1, v);
+   body.emit(ret(expr(ir_unop_pack_double_2x32, v)));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_unpackDouble2x32(builtin_available_predicate avail)
+{
+   ir_variable *p = in_var(glsl_type::double_type, "p");
+   MAKE_SIG(glsl_type::uvec2_type, avail, 1, p);
+   body.emit(ret(expr(ir_unop_unpack_double_2x32, p)));
+   return sig;
+}
+
+ir_function_signature *
+builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(glsl_type::float_type, always_available, 1, x);
+   MAKE_SIG(type->get_base_type(), avail, 1, x);
 
    body.emit(ret(sqrt(dot(x, x))));
 
@@ -3171,11 +3380,11 @@ builtin_builder::_length(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_distance(const glsl_type *type)
+builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *p0 = in_var(type, "p0");
    ir_variable *p1 = in_var(type, "p1");
-   MAKE_SIG(glsl_type::float_type, always_available, 2, p0, p1);
+   MAKE_SIG(type->get_base_type(), avail, 2, p0, p1);
 
    if (type->vector_elements == 1) {
       body.emit(ret(abs(sub(p0, p1))));
@@ -3189,21 +3398,21 @@ builtin_builder::_distance(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_dot(const glsl_type *type)
+builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type)
 {
    if (type->vector_elements == 1)
-      return binop(ir_binop_mul, always_available, type, type, type);
+      return binop(ir_binop_mul, avail, type, type, type);
 
-   return binop(ir_binop_dot, always_available,
-                glsl_type::float_type, type, type);
+   return binop(ir_binop_dot, avail,
+                type->get_base_type(), type, type);
 }
 
 ir_function_signature *
-builtin_builder::_cross(const glsl_type *type)
+builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *a = in_var(type, "a");
    ir_variable *b = in_var(type, "b");
-   MAKE_SIG(type, always_available, 2, a, b);
+   MAKE_SIG(type, avail, 2, a, b);
 
    int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
    int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0);
@@ -3215,10 +3424,10 @@ builtin_builder::_cross(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_normalize(const glsl_type *type)
+builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
-   MAKE_SIG(type, always_available, 1, x);
+   MAKE_SIG(type, avail, 1, x);
 
    if (type->vector_elements == 1) {
       body.emit(ret(sign(x)));
@@ -3248,41 +3457,41 @@ builtin_builder::_ftransform()
 }
 
 ir_function_signature *
-builtin_builder::_faceforward(const glsl_type *type)
+builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *N = in_var(type, "N");
    ir_variable *I = in_var(type, "I");
    ir_variable *Nref = in_var(type, "Nref");
-   MAKE_SIG(type, always_available, 3, N, I, Nref);
+   MAKE_SIG(type, avail, 3, N, I, Nref);
 
-   body.emit(if_tree(less(dot(Nref, I), imm(0.0f)),
+   body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)),
                      ret(N), ret(neg(N))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_reflect(const glsl_type *type)
+builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *I = in_var(type, "I");
    ir_variable *N = in_var(type, "N");
-   MAKE_SIG(type, always_available, 2, I, N);
+   MAKE_SIG(type, avail, 2, I, N);
 
    /* I - 2 * dot(N, I) * N */
-   body.emit(ret(sub(I, mul(imm(2.0f), mul(dot(N, I), N)))));
+   body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N)))));
 
    return sig;
 }
 
 ir_function_signature *
-builtin_builder::_refract(const glsl_type *type)
+builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *I = in_var(type, "I");
    ir_variable *N = in_var(type, "N");
-   ir_variable *eta = in_var(glsl_type::float_type, "eta");
-   MAKE_SIG(type, always_available, 3, I, N, eta);
+   ir_variable *eta = in_var(type->get_base_type(), "eta");
+   MAKE_SIG(type, avail, 3, I, N, eta);
 
-   ir_variable *n_dot_i = body.make_temp(glsl_type::float_type, "n_dot_i");
+   ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i");
    body.emit(assign(n_dot_i, dot(N, I)));
 
    /* From the GLSL 1.10 specification:
@@ -3292,11 +3501,11 @@ builtin_builder::_refract(const glsl_type *type)
     * else
     *    return eta * I - (eta * dot(N, I) + sqrt(k)) * N
     */
-   ir_variable *k = body.make_temp(glsl_type::float_type, "k");
-   body.emit(assign(k, sub(imm(1.0f),
-                           mul(eta, mul(eta, sub(imm(1.0f),
+   ir_variable *k = body.make_temp(type->get_base_type(), "k");
+   body.emit(assign(k, sub(IMM_FP(type, 1.0),
+                           mul(eta, mul(eta, sub(IMM_FP(type, 1.0),
                                                  mul(n_dot_i, n_dot_i)))))));
-   body.emit(if_tree(less(k, imm(0.0f)),
+   body.emit(if_tree(less(k, IMM_FP(type, 0.0)),
                      ret(ir_constant::zero(mem_ctx, type)),
                      ret(sub(mul(eta, I),
                              mul(add(mul(eta, n_dot_i), sqrt(k)), N)))));
@@ -3305,11 +3514,11 @@ builtin_builder::_refract(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_matrixCompMult(const glsl_type *type)
+builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *x = in_var(type, "x");
    ir_variable *y = in_var(type, "y");
-   MAKE_SIG(type, always_available, 2, x, y);
+   MAKE_SIG(type, avail, 2, x, y);
 
    ir_variable *z = body.make_temp(type, "z");
    for (int i = 0; i < type->matrix_columns; i++) {
@@ -3321,11 +3530,19 @@ builtin_builder::_matrixCompMult(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_outerProduct(const glsl_type *type)
+builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *c = in_var(glsl_type::vec(type->vector_elements), "c");
-   ir_variable *r = in_var(glsl_type::vec(type->matrix_columns), "r");
-   MAKE_SIG(type, v120, 2, c, r);
+   ir_variable *c;
+   ir_variable *r;
+
+   if (type->base_type == GLSL_TYPE_DOUBLE) {
+      r = in_var(glsl_type::dvec(type->matrix_columns), "r");
+      c = in_var(glsl_type::dvec(type->vector_elements), "c");
+   } else {
+      r = in_var(glsl_type::vec(type->matrix_columns), "r");
+      c = in_var(glsl_type::vec(type->vector_elements), "c");
+   }
+   MAKE_SIG(type, avail, 2, c, r);
 
    ir_variable *m = body.make_temp(type, "m");
    for (int i = 0; i < type->matrix_columns; i++) {
@@ -3337,15 +3554,15 @@ builtin_builder::_outerProduct(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_transpose(const glsl_type *orig_type)
+builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type)
 {
    const glsl_type *transpose_type =
-      glsl_type::get_instance(GLSL_TYPE_FLOAT,
+      glsl_type::get_instance(orig_type->base_type,
                               orig_type->matrix_columns,
                               orig_type->vector_elements);
 
    ir_variable *m = in_var(orig_type, "m");
-   MAKE_SIG(transpose_type, v120, 1, m);
+   MAKE_SIG(transpose_type, avail, 1, m);
 
    ir_variable *t = body.make_temp(transpose_type, "t");
    for (int i = 0; i < orig_type->matrix_columns; i++) {
@@ -3361,10 +3578,10 @@ builtin_builder::_transpose(const glsl_type *orig_type)
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat2()
+builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat2_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type->get_base_type(), avail, 1, m);
 
    body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)),
                      mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1)))));
@@ -3373,10 +3590,10 @@ builtin_builder::_determinant_mat2()
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat3()
+builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat3_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type->get_base_type(), avail, 1, m);
 
    ir_expression *f1 =
       sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)),
@@ -3398,30 +3615,31 @@ builtin_builder::_determinant_mat3()
 }
 
 ir_function_signature *
-builtin_builder::_determinant_mat4()
-{
-   ir_variable *m = in_var(glsl_type::mat4_type, "m");
-   MAKE_SIG(glsl_type::float_type, v120, 1, m);
-
-   ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00");
-   ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01");
-   ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02");
-   ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03");
-   ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04");
-   ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05");
-   ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06");
-   ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07");
-   ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08");
-   ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09");
-   ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10");
-   ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11");
-   ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12");
-   ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13");
-   ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14");
-   ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15");
-   ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16");
-   ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17");
-   ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18");
+builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type)
+{
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(btype, avail, 1, m);
+
+   ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00");
+   ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01");
+   ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02");
+   ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03");
+   ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04");
+   ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05");
+   ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06");
+   ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07");
+   ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08");
+   ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09");
+   ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10");
+   ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11");
+   ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12");
+   ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13");
+   ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14");
+   ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15");
+   ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16");
+   ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17");
+   ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18");
 
    body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3)))));
    body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3)))));
@@ -3443,7 +3661,7 @@ builtin_builder::_determinant_mat4()
    body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2)))));
    body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj_0 = body.make_temp(glsl_type::vec4_type, "adj_0");
+   ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0");
 
    body.emit(assign(adj_0,
                     add(sub(mul(matrix_elt(m, 1, 1), SubFactor00),
@@ -3472,12 +3690,12 @@ builtin_builder::_determinant_mat4()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat2()
+builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat2_type, "m");
-   MAKE_SIG(glsl_type::mat2_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   MAKE_SIG(type, avail, 1, m);
 
-   ir_variable *adj = body.make_temp(glsl_type::mat2_type, "adj");
+   ir_variable *adj = body.make_temp(type, "adj");
    body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0));
    body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1));
    body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0));
@@ -3492,14 +3710,15 @@ builtin_builder::_inverse_mat2()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat3()
+builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type)
 {
-   ir_variable *m = in_var(glsl_type::mat3_type, "m");
-   MAKE_SIG(glsl_type::mat3_type, v120, 1, m);
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(type, avail, 1, m);
 
-   ir_variable *f11_22_21_12 = body.make_temp(glsl_type::float_type, "f11_22_21_12");
-   ir_variable *f10_22_20_12 = body.make_temp(glsl_type::float_type, "f10_22_20_12");
-   ir_variable *f10_21_20_11 = body.make_temp(glsl_type::float_type, "f10_21_20_11");
+   ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12");
+   ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12");
+   ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11");
 
    body.emit(assign(f11_22_21_12,
                     sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)),
@@ -3511,7 +3730,7 @@ builtin_builder::_inverse_mat3()
                     sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)),
                         mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj = body.make_temp(glsl_type::mat3_type, "adj");
+   ir_variable *adj = body.make_temp(type, "adj");
    body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X));
    body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X));
    body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X));
@@ -3553,30 +3772,31 @@ builtin_builder::_inverse_mat3()
 }
 
 ir_function_signature *
-builtin_builder::_inverse_mat4()
-{
-   ir_variable *m = in_var(glsl_type::mat4_type, "m");
-   MAKE_SIG(glsl_type::mat4_type, v120, 1, m);
-
-   ir_variable *SubFactor00 = body.make_temp(glsl_type::float_type, "SubFactor00");
-   ir_variable *SubFactor01 = body.make_temp(glsl_type::float_type, "SubFactor01");
-   ir_variable *SubFactor02 = body.make_temp(glsl_type::float_type, "SubFactor02");
-   ir_variable *SubFactor03 = body.make_temp(glsl_type::float_type, "SubFactor03");
-   ir_variable *SubFactor04 = body.make_temp(glsl_type::float_type, "SubFactor04");
-   ir_variable *SubFactor05 = body.make_temp(glsl_type::float_type, "SubFactor05");
-   ir_variable *SubFactor06 = body.make_temp(glsl_type::float_type, "SubFactor06");
-   ir_variable *SubFactor07 = body.make_temp(glsl_type::float_type, "SubFactor07");
-   ir_variable *SubFactor08 = body.make_temp(glsl_type::float_type, "SubFactor08");
-   ir_variable *SubFactor09 = body.make_temp(glsl_type::float_type, "SubFactor09");
-   ir_variable *SubFactor10 = body.make_temp(glsl_type::float_type, "SubFactor10");
-   ir_variable *SubFactor11 = body.make_temp(glsl_type::float_type, "SubFactor11");
-   ir_variable *SubFactor12 = body.make_temp(glsl_type::float_type, "SubFactor12");
-   ir_variable *SubFactor13 = body.make_temp(glsl_type::float_type, "SubFactor13");
-   ir_variable *SubFactor14 = body.make_temp(glsl_type::float_type, "SubFactor14");
-   ir_variable *SubFactor15 = body.make_temp(glsl_type::float_type, "SubFactor15");
-   ir_variable *SubFactor16 = body.make_temp(glsl_type::float_type, "SubFactor16");
-   ir_variable *SubFactor17 = body.make_temp(glsl_type::float_type, "SubFactor17");
-   ir_variable *SubFactor18 = body.make_temp(glsl_type::float_type, "SubFactor18");
+builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type)
+{
+   ir_variable *m = in_var(type, "m");
+   const glsl_type *btype = type->get_base_type();
+   MAKE_SIG(type, avail, 1, m);
+
+   ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00");
+   ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01");
+   ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02");
+   ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03");
+   ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04");
+   ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05");
+   ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06");
+   ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07");
+   ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08");
+   ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09");
+   ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10");
+   ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11");
+   ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12");
+   ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13");
+   ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14");
+   ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15");
+   ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16");
+   ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17");
+   ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18");
 
    body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3)))));
    body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3)))));
@@ -3598,7 +3818,7 @@ builtin_builder::_inverse_mat4()
    body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2)))));
    body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1)))));
 
-   ir_variable *adj = body.make_temp(glsl_type::mat4_type, "adj");
+   ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj");
    body.emit(assign(array_ref(adj, 0),
                     add(sub(mul(matrix_elt(m, 1, 1), SubFactor00),
                             mul(matrix_elt(m, 1, 2), SubFactor01)),
@@ -4270,12 +4490,12 @@ builtin_builder::_findMSB(const glsl_type *type)
 }
 
 ir_function_signature *
-builtin_builder::_fma(const glsl_type *type)
+builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type)
 {
    ir_variable *a = in_var(type, "a");
    ir_variable *b = in_var(type, "b");
    ir_variable *c = in_var(type, "c");
-   MAKE_SIG(type, gpu_shader5, 3, a, b, c);
+   MAKE_SIG(type, avail, 3, a, b, c);
 
    body.emit(ret(ir_builder::fma(a, b, c)));
 
@@ -4285,7 +4505,20 @@ builtin_builder::_fma(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type)
 {
-   return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type);
+   return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5, x_type, x_type, exp_type);
+}
+
+ir_function_signature *
+builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type)
+{
+   ir_variable *x = in_var(x_type, "x");
+   ir_variable *exponent = out_var(exp_type, "exp");
+   MAKE_SIG(x_type, fp64, 2, x, exponent);
+
+   body.emit(assign(exponent, expr(ir_unop_frexp_exp, x)));
+
+   body.emit(ret(expr(ir_unop_frexp_sig, x)));
+   return sig;
 }
 
 ir_function_signature *
@@ -4618,6 +4851,17 @@ _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
    return s;
 }
 
+ir_function *
+_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
+                                         const char *name)
+{
+   ir_function *f;
+   mtx_lock(&builtins_lock);
+   f = builtins.shader->symbols->get_function(name);
+   mtx_unlock(&builtins_lock);
+   return f;
+}
+
 gl_shader *
 _mesa_glsl_get_builtin_function_shader()
 {
diff --git a/mesalib/src/glsl/builtin_type_macros.h b/mesalib/src/glsl/builtin_type_macros.h
index 236e1ce8c..8e16ae454 100644
--- a/mesalib/src/glsl/builtin_type_macros.h
+++ b/mesalib/src/glsl/builtin_type_macros.h
@@ -64,6 +64,22 @@ DECL_TYPE(mat3x4, GL_FLOAT_MAT3x4, GLSL_TYPE_FLOAT, 4, 3)
 DECL_TYPE(mat4x2, GL_FLOAT_MAT4x2, GLSL_TYPE_FLOAT, 2, 4)
 DECL_TYPE(mat4x3, GL_FLOAT_MAT4x3, GLSL_TYPE_FLOAT, 3, 4)
 
+DECL_TYPE(double,  GL_DOUBLE,        GLSL_TYPE_DOUBLE, 1, 1)
+DECL_TYPE(dvec2,   GL_DOUBLE_VEC2,   GLSL_TYPE_DOUBLE, 2, 1)
+DECL_TYPE(dvec3,   GL_DOUBLE_VEC3,   GLSL_TYPE_DOUBLE, 3, 1)
+DECL_TYPE(dvec4,   GL_DOUBLE_VEC4,   GLSL_TYPE_DOUBLE, 4, 1)
+
+DECL_TYPE(dmat2,   GL_DOUBLE_MAT2,   GLSL_TYPE_DOUBLE, 2, 2)
+DECL_TYPE(dmat3,   GL_DOUBLE_MAT3,   GLSL_TYPE_DOUBLE, 3, 3)
+DECL_TYPE(dmat4,   GL_DOUBLE_MAT4,   GLSL_TYPE_DOUBLE, 4, 4)
+
+DECL_TYPE(dmat2x3, GL_DOUBLE_MAT2x3, GLSL_TYPE_DOUBLE, 3, 2)
+DECL_TYPE(dmat2x4, GL_DOUBLE_MAT2x4, GLSL_TYPE_DOUBLE, 4, 2)
+DECL_TYPE(dmat3x2, GL_DOUBLE_MAT3x2, GLSL_TYPE_DOUBLE, 2, 3)
+DECL_TYPE(dmat3x4, GL_DOUBLE_MAT3x4, GLSL_TYPE_DOUBLE, 4, 3)
+DECL_TYPE(dmat4x2, GL_DOUBLE_MAT4x2, GLSL_TYPE_DOUBLE, 2, 4)
+DECL_TYPE(dmat4x3, GL_DOUBLE_MAT4x3, GLSL_TYPE_DOUBLE, 3, 4)
+
 DECL_TYPE(sampler1D,         GL_SAMPLER_1D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_1D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler2D,         GL_SAMPLER_2D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_2D,   0, 0, GLSL_TYPE_FLOAT)
 DECL_TYPE(sampler3D,         GL_SAMPLER_3D,                   GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_3D,   0, 0, GLSL_TYPE_FLOAT)
@@ -110,39 +126,39 @@ DECL_TYPE(sampler2DRectShadow,    GL_SAMPLER_2D_RECT_SHADOW,        GLSL_TYPE_SA
 
 DECL_TYPE(samplerExternalOES,     GL_SAMPLER_EXTERNAL_OES,          GLSL_TYPE_SAMPLER, GLSL_SAMPLER_DIM_EXTERNAL, 0, 0, GLSL_TYPE_FLOAT)
 
-DECL_TYPE(image1D,         GL_IMAGE_1D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2D,         GL_IMAGE_2D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image3D,         GL_IMAGE_3D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DRect,     GL_IMAGE_2D_RECT,                           GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageCube,       GL_IMAGE_CUBE,                              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageBuffer,     GL_IMAGE_BUFFER,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image1DArray,    GL_IMAGE_1D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DArray,    GL_IMAGE_2D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(imageCubeArray,  GL_IMAGE_CUBE_MAP_ARRAY,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DMS,       GL_IMAGE_2D_MULTISAMPLE,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_FLOAT);
-DECL_TYPE(image2DMSArray,  GL_IMAGE_2D_MULTISAMPLE_ARRAY,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_FLOAT);
-DECL_TYPE(iimage1D,        GL_INT_IMAGE_1D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2D,        GL_INT_IMAGE_2D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage3D,        GL_INT_IMAGE_3D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DRect,    GL_INT_IMAGE_2D_RECT,                       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimageCube,      GL_INT_IMAGE_CUBE,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimageBuffer,    GL_INT_IMAGE_BUFFER,                        GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage1DArray,   GL_INT_IMAGE_1D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DArray,   GL_INT_IMAGE_2D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DMS,      GL_INT_IMAGE_2D_MULTISAMPLE,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_INT);
-DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY,          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_INT);
-DECL_TYPE(uimage1D,        GL_UNSIGNED_INT_IMAGE_1D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2D,        GL_UNSIGNED_INT_IMAGE_2D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage3D,        GL_UNSIGNED_INT_IMAGE_3D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DRect,    GL_UNSIGNED_INT_IMAGE_2D_RECT,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimageCube,      GL_UNSIGNED_INT_IMAGE_CUBE,                 GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimageBuffer,    GL_UNSIGNED_INT_IMAGE_BUFFER,               GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage1DArray,   GL_UNSIGNED_INT_IMAGE_1D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DArray,   GL_UNSIGNED_INT_IMAGE_2D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DMS,      GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_UINT);
-DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_UINT);
+DECL_TYPE(image1D,         GL_IMAGE_1D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2D,         GL_IMAGE_2D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image3D,         GL_IMAGE_3D,                                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DRect,     GL_IMAGE_2D_RECT,                           GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageCube,       GL_IMAGE_CUBE,                              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageBuffer,     GL_IMAGE_BUFFER,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image1DArray,    GL_IMAGE_1D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DArray,    GL_IMAGE_2D_ARRAY,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(imageCubeArray,  GL_IMAGE_CUBE_MAP_ARRAY,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DMS,       GL_IMAGE_2D_MULTISAMPLE,                    GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_FLOAT)
+DECL_TYPE(image2DMSArray,  GL_IMAGE_2D_MULTISAMPLE_ARRAY,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_FLOAT)
+DECL_TYPE(iimage1D,        GL_INT_IMAGE_1D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2D,        GL_INT_IMAGE_2D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage3D,        GL_INT_IMAGE_3D,                            GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DRect,    GL_INT_IMAGE_2D_RECT,                       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimageCube,      GL_INT_IMAGE_CUBE,                          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimageBuffer,    GL_INT_IMAGE_BUFFER,                        GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage1DArray,   GL_INT_IMAGE_1D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DArray,   GL_INT_IMAGE_2D_ARRAY,                      GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimageCubeArray, GL_INT_IMAGE_CUBE_MAP_ARRAY,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DMS,      GL_INT_IMAGE_2D_MULTISAMPLE,                GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_INT)
+DECL_TYPE(iimage2DMSArray, GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY,          GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_INT)
+DECL_TYPE(uimage1D,        GL_UNSIGNED_INT_IMAGE_1D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2D,        GL_UNSIGNED_INT_IMAGE_2D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage3D,        GL_UNSIGNED_INT_IMAGE_3D,                   GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_3D,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DRect,    GL_UNSIGNED_INT_IMAGE_2D_RECT,              GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_RECT,   0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimageCube,      GL_UNSIGNED_INT_IMAGE_CUBE,                 GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimageBuffer,    GL_UNSIGNED_INT_IMAGE_BUFFER,               GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_BUF,    0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage1DArray,   GL_UNSIGNED_INT_IMAGE_1D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_1D,     0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DArray,   GL_UNSIGNED_INT_IMAGE_2D_ARRAY,             GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_2D,     0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimageCubeArray, GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_CUBE,   0, 1, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DMS,      GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE,       GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 0, GLSL_TYPE_UINT)
+DECL_TYPE(uimage2DMSArray, GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY, GLSL_TYPE_IMAGE, GLSL_SAMPLER_DIM_MS,     0, 1, GLSL_TYPE_UINT)
 
 DECL_TYPE(atomic_uint, GL_UNSIGNED_INT_ATOMIC_COUNTER, GLSL_TYPE_ATOMIC_UINT, 1, 1)
 
diff --git a/mesalib/src/glsl/builtin_types.cpp b/mesalib/src/glsl/builtin_types.cpp
index 10fac0f81..fef86df28 100644
--- a/mesalib/src/glsl/builtin_types.cpp
+++ b/mesalib/src/glsl/builtin_types.cpp
@@ -159,6 +159,20 @@ const static struct builtin_type_versions {
    T(mat4x2,                          120, 300)
    T(mat4x3,                          120, 300)
 
+   T(double,                          400, 999)
+   T(dvec2,                           400, 999)
+   T(dvec3,                           400, 999)
+   T(dvec4,                           400, 999)
+   T(dmat2,                           400, 999)
+   T(dmat3,                           400, 999)
+   T(dmat4,                           400, 999)
+   T(dmat2x3,                         400, 999)
+   T(dmat2x4,                         400, 999)
+   T(dmat3x2,                         400, 999)
+   T(dmat3x4,                         400, 999)
+   T(dmat4x2,                         400, 999)
+   T(dmat4x3,                         400, 999)
+
    T(sampler1D,                       110, 999)
    T(sampler2D,                       110, 100)
    T(sampler3D,                       110, 300)
@@ -361,5 +375,21 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state)
    if (state->ARB_shader_atomic_counters_enable) {
       add_type(symbols, glsl_type::atomic_uint_type);
    }
+
+   if (state->ARB_gpu_shader_fp64_enable) {
+      add_type(symbols, glsl_type::double_type);
+      add_type(symbols, glsl_type::dvec2_type);
+      add_type(symbols, glsl_type::dvec3_type);
+      add_type(symbols, glsl_type::dvec4_type);
+      add_type(symbols, glsl_type::dmat2_type);
+      add_type(symbols, glsl_type::dmat3_type);
+      add_type(symbols, glsl_type::dmat4_type);
+      add_type(symbols, glsl_type::dmat2x3_type);
+      add_type(symbols, glsl_type::dmat2x4_type);
+      add_type(symbols, glsl_type::dmat3x2_type);
+      add_type(symbols, glsl_type::dmat3x4_type);
+      add_type(symbols, glsl_type::dmat4x2_type);
+      add_type(symbols, glsl_type::dmat4x3_type);
+   }
 }
 /** @} */
diff --git a/mesalib/src/glsl/glcpp/glcpp-parse.y b/mesalib/src/glsl/glcpp/glcpp-parse.y
index 9b1a4f401..c2f5223a9 100644
--- a/mesalib/src/glsl/glcpp/glcpp-parse.y
+++ b/mesalib/src/glsl/glcpp/glcpp-parse.y
@@ -2375,6 +2375,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	if (parser->is_gles) {
 	   add_builtin_define(parser, "GL_ES", 1);
            add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1);
+           add_builtin_define(parser, "GL_EXT_draw_buffers", 1);
 
 	   if (extensions != NULL) {
 	      if (extensions->OES_EGL_image_external)
@@ -2444,6 +2445,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	      if (extensions->ARB_gpu_shader5)
 	         add_builtin_define(parser, "GL_ARB_gpu_shader5", 1);
 
+              if (extensions->ARB_gpu_shader_fp64)
+                 add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1);
+
 	      if (extensions->AMD_vertex_shader_layer)
 	         add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1);
 
@@ -2473,6 +2477,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 
               if (extensions->ARB_derivative_control)
                  add_builtin_define(parser, "GL_ARB_derivative_control", 1);
+
+              if (extensions->ARB_shader_precision)
+                 add_builtin_define(parser, "GL_ARB_shader_precision", 1);
 	   }
 	}
 
diff --git a/mesalib/src/glsl/glcpp/glcpp.c b/mesalib/src/glsl/glcpp/glcpp.c
index ca188015c..5144516a6 100644
--- a/mesalib/src/glsl/glcpp/glcpp.c
+++ b/mesalib/src/glsl/glcpp/glcpp.c
@@ -121,7 +121,7 @@ enum {
 	DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1
 };
 
-const static struct option
+static const struct option
 long_options[] = {
 	{"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT },
         {"debug",                      no_argument, 0, 'd'},
diff --git a/mesalib/src/glsl/glsl_lexer.ll b/mesalib/src/glsl/glsl_lexer.ll
index 57c46be84..8dc3d106b 100644
--- a/mesalib/src/glsl/glsl_lexer.ll
+++ b/mesalib/src/glsl/glsl_lexer.ll
@@ -458,6 +458,17 @@ layout		{
 			    return FLOATCONSTANT;
 			}
 
+[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF)	|
+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF)	|
+[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF)	|
+[0-9]+[eE][+-]?[0-9]+(lf|LF)		{
+			    if (!yyextra->is_version(400, 0) &&
+			        !yyextra->ARB_gpu_shader_fp64_enable)
+			        return ERROR_TOK;
+			    yylval->dreal = _mesa_strtod(yytext, NULL);
+			    return DOUBLECONSTANT;
+			}
+
 true			{
 			    yylval->n = 1;
 			    return BOOLCONSTANT;
@@ -489,7 +500,7 @@ external	KEYWORD(110, 100, 0, 0, EXTERNAL);
 interface	KEYWORD(110, 100, 0, 0, INTERFACE);
 long		KEYWORD(110, 100, 0, 0, LONG_TOK);
 short		KEYWORD(110, 100, 0, 0, SHORT_TOK);
-double		KEYWORD(110, 100, 400, 0, DOUBLE_TOK);
+double		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK);
 half		KEYWORD(110, 100, 0, 0, HALF);
 fixed		KEYWORD(110, 100, 0, 0, FIXED_TOK);
 unsigned	KEYWORD(110, 100, 0, 0, UNSIGNED);
@@ -498,9 +509,21 @@ output		KEYWORD(110, 100, 0, 0, OUTPUT);
 hvec2		KEYWORD(110, 100, 0, 0, HVEC2);
 hvec3		KEYWORD(110, 100, 0, 0, HVEC3);
 hvec4		KEYWORD(110, 100, 0, 0, HVEC4);
-dvec2		KEYWORD(110, 100, 400, 0, DVEC2);
-dvec3		KEYWORD(110, 100, 400, 0, DVEC3);
-dvec4		KEYWORD(110, 100, 400, 0, DVEC4);
+dvec2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2);
+dvec3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3);
+dvec4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4);
+dmat2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
+dmat3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
+dmat4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
+dmat2x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2);
+dmat2x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3);
+dmat2x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4);
+dmat3x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2);
+dmat3x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3);
+dmat3x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4);
+dmat4x2		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2);
+dmat4x3		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3);
+dmat4x4		KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4);
 fvec2		KEYWORD(110, 100, 0, 0, FVEC2);
 fvec3		KEYWORD(110, 100, 0, 0, FVEC3);
 fvec4		KEYWORD(110, 100, 0, 0, FVEC4);
@@ -544,7 +567,13 @@ subroutine	KEYWORD(0, 300, 0, 0, SUBROUTINE);
 [_a-zA-Z][_a-zA-Z0-9]*	{
 			    struct _mesa_glsl_parse_state *state = yyextra;
 			    void *ctx = state;	
-			    yylval->identifier = ralloc_strdup(ctx, yytext);
+			    if (state->es_shader && strlen(yytext) > 1024) {
+			       _mesa_glsl_error(yylloc, state,
+			                        "Identifier `%s' exceeds 1024 characters",
+			                        yytext);
+			    } else {
+			      yylval->identifier = ralloc_strdup(ctx, yytext);
+			    }
 			    return classify_identifier(state, yytext);
 			}
 
diff --git a/mesalib/src/glsl/glsl_parser.yy b/mesalib/src/glsl/glsl_parser.yy
index 7fb8c38ab..ea3bd8a24 100644
--- a/mesalib/src/glsl/glsl_parser.yy
+++ b/mesalib/src/glsl/glsl_parser.yy
@@ -94,6 +94,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %union {
    int n;
    float real;
+   double dreal;
    const char *identifier;
 
    struct ast_type_qualifier type_qualifier;
@@ -128,14 +129,17 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
    } selection_rest_statement;
 }
 
-%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK
+%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK
 %token BREAK CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT
-%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4
+%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4
 %token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE
 %token NOPERSPECTIVE FLAT SMOOTH
 %token MAT2X2 MAT2X3 MAT2X4
 %token MAT3X2 MAT3X3 MAT3X4
 %token MAT4X2 MAT4X3 MAT4X4
+%token DMAT2X2 DMAT2X3 DMAT2X4
+%token DMAT3X2 DMAT3X3 DMAT3X4
+%token DMAT4X2 DMAT4X3 DMAT4X4
 %token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW
 %token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW
 %token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW
@@ -162,6 +166,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %type <identifier> any_identifier
 %type <interface_block> instance_name_opt
 %token <real> FLOATCONSTANT
+%token <dreal> DOUBLECONSTANT
 %token <n> INTCONSTANT UINTCONSTANT BOOLCONSTANT
 %token <identifier> FIELD_SELECTION
 %token LEFT_OP RIGHT_OP
@@ -182,8 +187,8 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
     */
 %token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO
 %token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL
-%token LONG_TOK SHORT_TOK DOUBLE_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK
-%token HVEC2 HVEC3 HVEC4 DVEC2 DVEC3 DVEC4 FVEC2 FVEC3 FVEC4
+%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK
+%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4
 %token SAMPLER3DRECT
 %token SIZEOF CAST NAMESPACE USING
 %token RESOURCE PATCH
@@ -434,6 +439,13 @@ primary_expression:
       $$->set_location(@1);
       $$->primary_expression.float_constant = $1;
    }
+   | DOUBLECONSTANT
+   {
+      void *ctx = state;
+      $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL);
+      $$->set_location(@1);
+      $$->primary_expression.double_constant = $1;
+   }
    | BOOLCONSTANT
    {
       void *ctx = state;
@@ -1864,6 +1876,7 @@ type_specifier_nonarray:
 basic_type_specifier_nonarray:
    VOID_TOK                 { $$ = "void"; }
    | FLOAT_TOK              { $$ = "float"; }
+   | DOUBLE_TOK             { $$ = "double"; }
    | INT_TOK                { $$ = "int"; }
    | UINT_TOK               { $$ = "uint"; }
    | BOOL_TOK               { $$ = "bool"; }
@@ -1879,6 +1892,9 @@ basic_type_specifier_nonarray:
    | UVEC2                  { $$ = "uvec2"; }
    | UVEC3                  { $$ = "uvec3"; }
    | UVEC4                  { $$ = "uvec4"; }
+   | DVEC2                  { $$ = "dvec2"; }
+   | DVEC3                  { $$ = "dvec3"; }
+   | DVEC4                  { $$ = "dvec4"; }
    | MAT2X2                 { $$ = "mat2"; }
    | MAT2X3                 { $$ = "mat2x3"; }
    | MAT2X4                 { $$ = "mat2x4"; }
@@ -1888,6 +1904,15 @@ basic_type_specifier_nonarray:
    | MAT4X2                 { $$ = "mat4x2"; }
    | MAT4X3                 { $$ = "mat4x3"; }
    | MAT4X4                 { $$ = "mat4"; }
+   | DMAT2X2                { $$ = "dmat2"; }
+   | DMAT2X3                { $$ = "dmat2x3"; }
+   | DMAT2X4                { $$ = "dmat2x4"; }
+   | DMAT3X2                { $$ = "dmat3x2"; }
+   | DMAT3X3                { $$ = "dmat3"; }
+   | DMAT3X4                { $$ = "dmat3x4"; }
+   | DMAT4X2                { $$ = "dmat4x2"; }
+   | DMAT4X3                { $$ = "dmat4x3"; }
+   | DMAT4X4                { $$ = "dmat4"; }
    | SAMPLER1D              { $$ = "sampler1D"; }
    | SAMPLER2D              { $$ = "sampler2D"; }
    | SAMPLER2DRECT          { $$ = "sampler2DRect"; }
@@ -2539,6 +2564,28 @@ basic_interface_block:
                              "interface block member does not match "
                              "the interface block");
          }
+
+         /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks":
+          *
+          * "GLSL ES 3.0 does not support interface blocks for shader inputs or
+          * outputs."
+          *
+          * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":.
+          *
+          * "Only variables output from a shader can be candidates for
+          * invariance."
+          *
+          * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks":
+          *
+          * "If optional qualifiers are used, they can include interpolation
+          * qualifiers, auxiliary storage qualifiers, and storage qualifiers
+          * and they must declare an input, output, or uniform member
+          * consistent with the interface qualifier of the block"
+          */
+         if (qualifier.flags.q.invariant)
+            _mesa_glsl_error(&@1, state,
+                             "invariant qualifiers cannot be used "
+                             "with interface blocks members");
       }
 
       $$ = block;
diff --git a/mesalib/src/glsl/glsl_parser_extras.cpp b/mesalib/src/glsl/glsl_parser_extras.cpp
index 27e2eaf37..9f7931380 100644
--- a/mesalib/src/glsl/glsl_parser_extras.cpp
+++ b/mesalib/src/glsl/glsl_parser_extras.cpp
@@ -370,10 +370,27 @@ _mesa_shader_stage_to_string(unsigned stage)
    case MESA_SHADER_VERTEX:   return "vertex";
    case MESA_SHADER_FRAGMENT: return "fragment";
    case MESA_SHADER_GEOMETRY: return "geometry";
+   case MESA_SHADER_COMPUTE:  return "compute";
    }
 
-   assert(!"Should not get here.");
-   return "unknown";
+   unreachable("Unknown shader stage.");
+}
+
+/**
+ * Translate a gl_shader_stage to a shader stage abbreviation (VS, GS, FS)
+ * for debug printouts and error messages.
+ */
+const char *
+_mesa_shader_stage_to_abbrev(unsigned stage)
+{
+   switch (stage) {
+   case MESA_SHADER_VERTEX:   return "VS";
+   case MESA_SHADER_FRAGMENT: return "FS";
+   case MESA_SHADER_GEOMETRY: return "GS";
+   case MESA_SHADER_COMPUTE:  return "CS";
+   }
+
+   unreachable("Unknown shader stage.");
 }
 
 /* This helper function will append the given message to the shader's
@@ -527,11 +544,13 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_fragment_coord_conventions, true,  false,     ARB_fragment_coord_conventions),
    EXT(ARB_fragment_layer_viewport,    true,  false,     ARB_fragment_layer_viewport),
    EXT(ARB_gpu_shader5,                true,  false,     ARB_gpu_shader5),
+   EXT(ARB_gpu_shader_fp64,            true,  false,     ARB_gpu_shader_fp64),
    EXT(ARB_sample_shading,             true,  false,     ARB_sample_shading),
    EXT(ARB_separate_shader_objects,    true,  false,     dummy_true),
    EXT(ARB_shader_atomic_counters,     true,  false,     ARB_shader_atomic_counters),
    EXT(ARB_shader_bit_encoding,        true,  false,     ARB_shader_bit_encoding),
    EXT(ARB_shader_image_load_store,    true,  false,     ARB_shader_image_load_store),
+   EXT(ARB_shader_precision,           true,  false,     ARB_shader_precision),
    EXT(ARB_shader_stencil_export,      true,  false,     ARB_shader_stencil_export),
    EXT(ARB_shader_texture_lod,         true,  false,     ARB_shader_texture_lod),
    EXT(ARB_shading_language_420pack,   true,  false,     ARB_shading_language_420pack),
@@ -561,6 +580,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(AMD_shader_trinary_minmax,      true,  false,     dummy_true),
    EXT(AMD_vertex_shader_layer,        true,  false,     AMD_vertex_shader_layer),
    EXT(AMD_vertex_shader_viewport_index, true,  false,   AMD_vertex_shader_viewport_index),
+   EXT(EXT_draw_buffers,               false,  true,     dummy_true),
    EXT(EXT_separate_shader_objects,    false, true,      dummy_true),
    EXT(EXT_shader_integer_mix,         true,  true,      EXT_shader_integer_mix),
    EXT(EXT_texture_array,              true,  false,     EXT_texture_array),
@@ -959,6 +979,10 @@ ast_expression::print(void) const
       printf("%f ", primary_expression.float_constant);
       break;
 
+   case ast_double_constant:
+      printf("%f ", primary_expression.double_constant);
+      break;
+
    case ast_bool_constant:
       printf("%s ",
 	     primary_expression.bool_constant
diff --git a/mesalib/src/glsl/glsl_parser_extras.h b/mesalib/src/glsl/glsl_parser_extras.h
index e04f7ced5..0975c86ed 100644
--- a/mesalib/src/glsl/glsl_parser_extras.h
+++ b/mesalib/src/glsl/glsl_parser_extras.h
@@ -205,6 +205,11 @@ struct _mesa_glsl_parse_state {
          || EXT_separate_shader_objects_enable;
    }
 
+   bool has_double() const
+   {
+      return ARB_gpu_shader_fp64_enable || is_version(400, 0);
+   }
+
    void process_version_directive(YYLTYPE *locp, int version,
                                   const char *ident);
 
@@ -414,6 +419,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_fragment_layer_viewport_warn;
    bool ARB_gpu_shader5_enable;
    bool ARB_gpu_shader5_warn;
+   bool ARB_gpu_shader_fp64_enable;
+   bool ARB_gpu_shader_fp64_warn;
    bool ARB_sample_shading_enable;
    bool ARB_sample_shading_warn;
    bool ARB_separate_shader_objects_enable;
@@ -424,6 +431,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_shader_bit_encoding_warn;
    bool ARB_shader_image_load_store_enable;
    bool ARB_shader_image_load_store_warn;
+   bool ARB_shader_precision_enable;
+   bool ARB_shader_precision_warn;
    bool ARB_shader_stencil_export_enable;
    bool ARB_shader_stencil_export_warn;
    bool ARB_shader_texture_lod_enable;
@@ -473,6 +482,8 @@ struct _mesa_glsl_parse_state {
    bool AMD_vertex_shader_layer_warn;
    bool AMD_vertex_shader_viewport_index_enable;
    bool AMD_vertex_shader_viewport_index_warn;
+   bool EXT_draw_buffers_enable;
+   bool EXT_draw_buffers_warn;
    bool EXT_separate_shader_objects_enable;
    bool EXT_separate_shader_objects_warn;
    bool EXT_shader_integer_mix_enable;
@@ -572,6 +583,9 @@ extern "C" {
 extern const char *
 _mesa_shader_stage_to_string(unsigned stage);
 
+extern const char *
+_mesa_shader_stage_to_abbrev(unsigned stage);
+
 extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
                       const struct gl_extensions *extensions, struct gl_context *gl_ctx);
 
diff --git a/mesalib/src/glsl/glsl_types.cpp b/mesalib/src/glsl/glsl_types.cpp
index 0d2eb7cec..38b37a6a9 100644
--- a/mesalib/src/glsl/glsl_types.cpp
+++ b/mesalib/src/glsl/glsl_types.cpp
@@ -194,6 +194,22 @@ glsl_type::contains_integer() const
 }
 
 bool
+glsl_type::contains_double() const
+{
+   if (this->is_array()) {
+      return this->fields.array->contains_double();
+   } else if (this->is_record()) {
+      for (unsigned int i = 0; i < this->length; i++) {
+	 if (this->fields.structure[i].type->contains_double())
+	    return true;
+      }
+      return false;
+   } else {
+      return this->is_double();
+   }
+}
+
+bool
 glsl_type::contains_opaque() const {
    switch (base_type) {
    case GLSL_TYPE_SAMPLER:
@@ -268,6 +284,8 @@ const glsl_type *glsl_type::get_base_type() const
       return int_type;
    case GLSL_TYPE_FLOAT:
       return float_type;
+   case GLSL_TYPE_DOUBLE:
+      return double_type;
    case GLSL_TYPE_BOOL:
       return bool_type;
    default:
@@ -292,6 +310,8 @@ const glsl_type *glsl_type::get_scalar_type() const
       return int_type;
    case GLSL_TYPE_FLOAT:
       return float_type;
+   case GLSL_TYPE_DOUBLE:
+      return double_type;
    case GLSL_TYPE_BOOL:
       return bool_type;
    default:
@@ -377,6 +397,17 @@ glsl_type::vec(unsigned components)
    return ts[components - 1];
 }
 
+const glsl_type *
+glsl_type::dvec(unsigned components)
+{
+   if (components == 0 || components > 4)
+      return error_type;
+
+   static const glsl_type *const ts[] = {
+      double_type, dvec2_type, dvec3_type, dvec4_type
+   };
+   return ts[components - 1];
+}
 
 const glsl_type *
 glsl_type::ivec(unsigned components)
@@ -436,13 +467,15 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
 	 return ivec(rows);
       case GLSL_TYPE_FLOAT:
 	 return vec(rows);
+      case GLSL_TYPE_DOUBLE:
+	 return dvec(rows);
       case GLSL_TYPE_BOOL:
 	 return bvec(rows);
       default:
 	 return error_type;
       }
    } else {
-      if ((base_type != GLSL_TYPE_FLOAT) || (rows == 1))
+      if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1))
 	 return error_type;
 
       /* GLSL matrix types are named mat{COLUMNS}x{ROWS}.  Only the following
@@ -456,17 +489,32 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
        */
 #define IDX(c,r) (((c-1)*3) + (r-1))
 
-      switch (IDX(columns, rows)) {
-      case IDX(2,2): return mat2_type;
-      case IDX(2,3): return mat2x3_type;
-      case IDX(2,4): return mat2x4_type;
-      case IDX(3,2): return mat3x2_type;
-      case IDX(3,3): return mat3_type;
-      case IDX(3,4): return mat3x4_type;
-      case IDX(4,2): return mat4x2_type;
-      case IDX(4,3): return mat4x3_type;
-      case IDX(4,4): return mat4_type;
-      default: return error_type;
+      if (base_type == GLSL_TYPE_DOUBLE) {
+         switch (IDX(columns, rows)) {
+         case IDX(2,2): return dmat2_type;
+         case IDX(2,3): return dmat2x3_type;
+         case IDX(2,4): return dmat2x4_type;
+         case IDX(3,2): return dmat3x2_type;
+         case IDX(3,3): return dmat3_type;
+         case IDX(3,4): return dmat3x4_type;
+         case IDX(4,2): return dmat4x2_type;
+         case IDX(4,3): return dmat4x3_type;
+         case IDX(4,4): return dmat4_type;
+         default: return error_type;
+         }
+      } else {
+         switch (IDX(columns, rows)) {
+         case IDX(2,2): return mat2_type;
+         case IDX(2,3): return mat2x3_type;
+         case IDX(2,4): return mat2x4_type;
+         case IDX(3,2): return mat3x2_type;
+         case IDX(3,3): return mat3_type;
+         case IDX(3,4): return mat3x4_type;
+         case IDX(4,2): return mat4x2_type;
+         case IDX(4,3): return mat4x3_type;
+         case IDX(4,4): return mat4_type;
+         default: return error_type;
+         }
       }
    }
 
@@ -474,6 +522,117 @@ glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns)
    return error_type;
 }
 
+const glsl_type *
+glsl_type::get_sampler_instance(enum glsl_sampler_dim dim,
+                                bool shadow,
+                                bool array,
+                                glsl_base_type type)
+{
+   switch (type) {
+   case GLSL_TYPE_FLOAT:
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         if (shadow)
+            return (array ? sampler1DArrayShadow_type : sampler1DShadow_type);
+         else
+            return (array ? sampler1DArray_type : sampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         if (shadow)
+            return (array ? sampler2DArrayShadow_type : sampler2DShadow_type);
+         else
+            return (array ? sampler2DArray_type : sampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (shadow || array)
+            return error_type;
+         else
+            return sampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         if (shadow)
+            return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type);
+         else
+            return (array ? samplerCubeArray_type : samplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         if (shadow)
+            return sampler2DRectShadow_type;
+         else
+            return sampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (shadow || array)
+            return error_type;
+         else
+            return samplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         if (shadow)
+            return error_type;
+         return (array ? sampler2DMSArray_type : sampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         if (shadow || array)
+            return error_type;
+         else
+            return samplerExternalOES_type;
+      }
+   case GLSL_TYPE_INT:
+      if (shadow)
+         return error_type;
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         return (array ? isampler1DArray_type : isampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         return (array ? isampler2DArray_type : isampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (array)
+            return error_type;
+         return isampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         return (array ? isamplerCubeArray_type : isamplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         return isampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (array)
+            return error_type;
+         return isamplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         return (array ? isampler2DMSArray_type : isampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         return error_type;
+      }
+   case GLSL_TYPE_UINT:
+      if (shadow)
+         return error_type;
+      switch (dim) {
+      case GLSL_SAMPLER_DIM_1D:
+         return (array ? usampler1DArray_type : usampler1D_type);
+      case GLSL_SAMPLER_DIM_2D:
+         return (array ? usampler2DArray_type : usampler2D_type);
+      case GLSL_SAMPLER_DIM_3D:
+         if (array)
+            return error_type;
+         return usampler3D_type;
+      case GLSL_SAMPLER_DIM_CUBE:
+         return (array ? usamplerCubeArray_type : usamplerCube_type);
+      case GLSL_SAMPLER_DIM_RECT:
+         if (array)
+            return error_type;
+         return usampler2DRect_type;
+      case GLSL_SAMPLER_DIM_BUF:
+         if (array)
+            return error_type;
+         return usamplerBuffer_type;
+      case GLSL_SAMPLER_DIM_MS:
+         return (array ? usampler2DMSArray_type : usampler2DMS_type);
+      case GLSL_SAMPLER_DIM_EXTERNAL:
+         return error_type;
+      }
+   default:
+      return error_type;
+   }
+
+   unreachable("switch statement above should be complete");
+}
 
 const glsl_type *
 glsl_type::get_array_instance(const glsl_type *base, unsigned array_size)
@@ -707,6 +866,9 @@ glsl_type::component_slots() const
    case GLSL_TYPE_BOOL:
       return this->components();
 
+   case GLSL_TYPE_DOUBLE:
+      return 2 * this->components();
+
    case GLSL_TYPE_STRUCT:
    case GLSL_TYPE_INTERFACE: {
       unsigned size = 0;
@@ -742,6 +904,7 @@ glsl_type::uniform_locations() const
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL:
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
@@ -786,12 +949,26 @@ glsl_type::can_implicitly_convert_to(const glsl_type *desired,
          desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT)
       return true;
 
+   /* No implicit conversions from double. */
+   if ((!state || state->has_double()) && this->is_double())
+      return false;
+
+   /* Conversions from different types to double. */
+   if ((!state || state->has_double()) && desired->is_double()) {
+      if (this->is_float())
+         return true;
+      if (this->is_integer())
+         return true;
+   }
+
    return false;
 }
 
 unsigned
 glsl_type::std140_base_alignment(bool row_major) const
 {
+   unsigned N = is_double() ? 8 : 4;
+
    /* (1) If the member is a scalar consuming <N> basic machine units, the
     *     base alignment is <N>.
     *
@@ -805,12 +982,12 @@ glsl_type::std140_base_alignment(bool row_major) const
    if (this->is_scalar() || this->is_vector()) {
       switch (this->vector_elements) {
       case 1:
-	 return 4;
+	 return N;
       case 2:
-	 return 8;
+	 return 2 * N;
       case 3:
       case 4:
-	 return 16;
+	 return 4 * N;
       }
    }
 
@@ -859,10 +1036,10 @@ glsl_type::std140_base_alignment(bool row_major) const
       int r = this->vector_elements;
 
       if (row_major) {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT, c, 1);
+	 vec_type = get_instance(base_type, c, 1);
 	 array_type = glsl_type::get_array_instance(vec_type, r);
       } else {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT, r, 1);
+	 vec_type = get_instance(base_type, r, 1);
 	 array_type = glsl_type::get_array_instance(vec_type, c);
       }
 
@@ -900,6 +1077,15 @@ glsl_type::std140_base_alignment(bool row_major) const
       return base_alignment;
    }
 
+   /* A sampler may never occur in a UBO (without bindless of some sort),
+    * however it is convenient to use this alignment function even with
+    * regular uniforms. This allows use of this function on uniform structs
+    * that contain samplers.
+    */
+   if (this->is_sampler()) {
+      return 0;
+   }
+
    assert(!"not reached");
    return -1;
 }
@@ -907,6 +1093,8 @@ glsl_type::std140_base_alignment(bool row_major) const
 unsigned
 glsl_type::std140_size(bool row_major) const
 {
+   unsigned N = is_double() ? 8 : 4;
+
    /* (1) If the member is a scalar consuming <N> basic machine units, the
     *     base alignment is <N>.
     *
@@ -918,7 +1106,7 @@ glsl_type::std140_size(bool row_major) const
     *     <N> basic machine units, the base alignment is 4<N>.
     */
    if (this->is_scalar() || this->is_vector()) {
-      return this->vector_elements * 4;
+      return this->vector_elements * N;
    }
 
    /* (5) If the member is a column-major matrix with <C> columns and
@@ -953,11 +1141,12 @@ glsl_type::std140_size(bool row_major) const
       }
 
       if (row_major) {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT,
-				 element_type->matrix_columns, 1);
+         vec_type = get_instance(element_type->base_type,
+                                 element_type->matrix_columns, 1);
+
 	 array_len *= element_type->vector_elements;
       } else {
-	 vec_type = get_instance(GLSL_TYPE_FLOAT,
+	 vec_type = get_instance(element_type->base_type,
 				 element_type->vector_elements, 1);
 	 array_len *= element_type->matrix_columns;
       }
@@ -1060,6 +1249,7 @@ glsl_type::count_attribute_slots() const
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
    case GLSL_TYPE_BOOL:
+   case GLSL_TYPE_DOUBLE:
       return this->matrix_columns;
 
    case GLSL_TYPE_STRUCT:
@@ -1114,8 +1304,13 @@ glsl_type::coordinate_components() const
       break;
    }
 
-   /* Array textures need an additional component for the array index. */
-   if (sampler_array)
+   /* Array textures need an additional component for the array index, except
+    * for cubemap array images that behave like a 2D array of interleaved
+    * cubemap faces.
+    */
+   if (sampler_array &&
+       !(base_type == GLSL_TYPE_IMAGE &&
+         sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE))
       size += 1;
 
    return size;
diff --git a/mesalib/src/glsl/glsl_types.h b/mesalib/src/glsl/glsl_types.h
index 474b12914..7359e9476 100644
--- a/mesalib/src/glsl/glsl_types.h
+++ b/mesalib/src/glsl/glsl_types.h
@@ -28,7 +28,6 @@
 
 #include <string.h>
 #include <assert.h>
-#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,6 +50,7 @@ enum glsl_base_type {
    GLSL_TYPE_UINT = 0,
    GLSL_TYPE_INT,
    GLSL_TYPE_FLOAT,
+   GLSL_TYPE_DOUBLE,
    GLSL_TYPE_BOOL,
    GLSL_TYPE_SAMPLER,
    GLSL_TYPE_IMAGE,
@@ -103,6 +103,7 @@ enum glsl_matrix_layout {
 #ifdef __cplusplus
 #include "GL/gl.h"
 #include "util/ralloc.h"
+#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */
 
 struct glsl_type {
    GLenum gl_type;
@@ -199,6 +200,7 @@ struct glsl_type {
     * @{
     */
    static const glsl_type *vec(unsigned components);
+   static const glsl_type *dvec(unsigned components);
    static const glsl_type *ivec(unsigned components);
    static const glsl_type *uvec(unsigned components);
    static const glsl_type *bvec(unsigned components);
@@ -244,6 +246,15 @@ struct glsl_type {
 					unsigned columns);
 
    /**
+    * Get the instance of a sampler type
+    */
+   static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim,
+                                                bool shadow,
+                                                bool array,
+                                                glsl_base_type type);
+
+
+   /**
     * Get the instance of an array type
     */
    static const glsl_type *get_array_instance(const glsl_type *base,
@@ -378,7 +389,7 @@ struct glsl_type {
    bool is_matrix() const
    {
       /* GLSL only has float matrices. */
-      return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT);
+      return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE);
    }
 
    /**
@@ -386,7 +397,7 @@ struct glsl_type {
     */
    bool is_numeric() const
    {
-      return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_FLOAT);
+      return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE);
    }
 
    /**
@@ -404,6 +415,12 @@ struct glsl_type {
    bool contains_integer() const;
 
    /**
+    * Query whether or not type is a double type, or for struct and array
+    * types, contains a double type.
+    */
+   bool contains_double() const;
+
+   /**
     * Query whether or not a type is a float type
     */
    bool is_float() const
@@ -412,6 +429,14 @@ struct glsl_type {
    }
 
    /**
+    * Query whether or not a type is a double type
+    */
+   bool is_double() const
+   {
+      return base_type == GLSL_TYPE_DOUBLE;
+   }
+
+   /**
     * Query whether or not a type is a non-array boolean type
     */
    bool is_boolean() const
diff --git a/mesalib/src/glsl/ir.cpp b/mesalib/src/glsl/ir.cpp
index fe5601a16..f4f92e9df 100644
--- a/mesalib/src/glsl/ir.cpp
+++ b/mesalib/src/glsl/ir.cpp
@@ -257,6 +257,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_f2i:
    case ir_unop_b2i:
    case ir_unop_u2i:
+   case ir_unop_d2i:
    case ir_unop_bitcast_f2i:
    case ir_unop_bit_count:
    case ir_unop_find_msb:
@@ -268,6 +269,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
    case ir_unop_b2f:
    case ir_unop_i2f:
    case ir_unop_u2f:
+   case ir_unop_d2f:
    case ir_unop_bitcast_i2f:
    case ir_unop_bitcast_u2f:
       this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
@@ -276,12 +278,21 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
 
    case ir_unop_f2b:
    case ir_unop_i2b:
+   case ir_unop_d2b:
       this->type = glsl_type::get_instance(GLSL_TYPE_BOOL,
 					   op0->type->vector_elements, 1);
       break;
 
+   case ir_unop_f2d:
+   case ir_unop_i2d:
+   case ir_unop_u2d:
+      this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
+					   op0->type->vector_elements, 1);
+      break;
+
    case ir_unop_i2u:
    case ir_unop_f2u:
+   case ir_unop_d2u:
    case ir_unop_bitcast_f2u:
       this->type = glsl_type::get_instance(GLSL_TYPE_UINT,
 					   op0->type->vector_elements, 1);
@@ -293,6 +304,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::float_type;
       break;
 
+   case ir_unop_unpack_double_2x32:
+      this->type = glsl_type::uvec2_type;
+      break;
+
    case ir_unop_any:
       this->type = glsl_type::bool_type;
       break;
@@ -305,6 +320,10 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::uint_type;
       break;
 
+   case ir_unop_pack_double_2x32:
+      this->type = glsl_type::double_type;
+      break;
+
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_half_2x16:
@@ -316,6 +335,14 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
       this->type = glsl_type::vec4_type;
       break;
 
+   case ir_unop_frexp_sig:
+      this->type = op0->type;
+      break;
+   case ir_unop_frexp_exp:
+      this->type = glsl_type::get_instance(GLSL_TYPE_INT,
+					   op0->type->vector_elements, 1);
+      break;
+
    default:
       assert(!"not reached: missing automatic type setup for ir_expression");
       this->type = op0->type;
@@ -390,7 +417,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1)
       break;
 
    case ir_binop_dot:
-      this->type = glsl_type::float_type;
+      this->type = op0->type->get_base_type();
       break;
 
    case ir_binop_pack_half_2x16_split:
@@ -494,6 +521,13 @@ static const char *const operator_strs[] = {
    "u2f",
    "i2u",
    "u2i",
+   "d2f",
+   "f2d",
+   "d2i",
+   "i2d",
+   "d2u",
+   "u2d",
+   "d2b",
    "bitcast_i2f",
    "bitcast_f2i",
    "bitcast_u2f",
@@ -531,6 +565,10 @@ static const char *const operator_strs[] = {
    "find_msb",
    "find_lsb",
    "sat",
+   "packDouble2x32",
+   "unpackDouble2x32",
+   "frexp_sig",
+   "frexp_exp",
    "noise",
    "interpolate_at_centroid",
    "+",
@@ -646,6 +684,19 @@ ir_constant::ir_constant(float f, unsigned vector_elements)
    }
 }
 
+ir_constant::ir_constant(double d, unsigned vector_elements)
+   : ir_rvalue(ir_type_constant)
+{
+   assert(vector_elements <= 4);
+   this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+      this->value.d[i] = d;
+   }
+   for (unsigned i = vector_elements; i < 16; i++)  {
+      this->value.d[i] = 0.0;
+   }
+}
+
 ir_constant::ir_constant(unsigned int u, unsigned vector_elements)
    : ir_rvalue(ir_type_constant)
 {
@@ -695,6 +746,7 @@ ir_constant::ir_constant(const ir_constant *c, unsigned i)
    case GLSL_TYPE_INT:   this->value.i[0] = c->value.i[i]; break;
    case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break;
    case GLSL_TYPE_BOOL:  this->value.b[0] = c->value.b[i]; break;
+   case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break;
    default:              assert(!"Should not get here."); break;
    }
 }
@@ -746,9 +798,16 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
    if (value->type->is_scalar() && value->next->is_tail_sentinel()) {
       if (type->is_matrix()) {
 	 /* Matrix - fill diagonal (rest is already set to 0) */
-	 assert(type->base_type == GLSL_TYPE_FLOAT);
-	 for (unsigned i = 0; i < type->matrix_columns; i++)
-	    this->value.f[i * type->vector_elements + i] = value->value.f[0];
+         assert(type->base_type == GLSL_TYPE_FLOAT ||
+                type->base_type == GLSL_TYPE_DOUBLE);
+         for (unsigned i = 0; i < type->matrix_columns; i++) {
+            if (type->base_type == GLSL_TYPE_FLOAT)
+               this->value.f[i * type->vector_elements + i] =
+                  value->value.f[0];
+            else
+               this->value.d[i * type->vector_elements + i] =
+                  value->value.d[0];
+         }
       } else {
 	 /* Vector or scalar - fill all components */
 	 switch (type->base_type) {
@@ -761,6 +820,10 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
 	    for (unsigned i = 0; i < type->components(); i++)
 	       this->value.f[i] = value->value.f[0];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    for (unsigned i = 0; i < type->components(); i++)
+	       this->value.d[i] = value->value.d[0];
+	    break;
 	 case GLSL_TYPE_BOOL:
 	    for (unsigned i = 0; i < type->components(); i++)
 	       this->value.b[i] = value->value.b[0];
@@ -819,6 +882,9 @@ ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list)
 	 case GLSL_TYPE_BOOL:
 	    this->value.b[i] = value->get_bool_component(j);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    this->value.d[i] = value->get_double_component(j);
+	    break;
 	 default:
 	    /* FINISHME: What to do?  Exceptions are not the answer.
 	     */
@@ -869,6 +935,7 @@ ir_constant::get_bool_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i] != 0;
    case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0;
    case GLSL_TYPE_BOOL:  return this->value.b[i];
+   case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0;
    default:              assert(!"Should not get here."); break;
    }
 
@@ -886,6 +953,25 @@ ir_constant::get_float_component(unsigned i) const
    case GLSL_TYPE_INT:   return (float) this->value.i[i];
    case GLSL_TYPE_FLOAT: return this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0f : 0.0f;
+   case GLSL_TYPE_DOUBLE: return (float) this->value.d[i];
+   default:              assert(!"Should not get here."); break;
+   }
+
+   /* Must return something to make the compiler happy.  This is clearly an
+    * error case.
+    */
+   return 0.0;
+}
+
+double
+ir_constant::get_double_component(unsigned i) const
+{
+   switch (this->type->base_type) {
+   case GLSL_TYPE_UINT:  return (double) this->value.u[i];
+   case GLSL_TYPE_INT:   return (double) this->value.i[i];
+   case GLSL_TYPE_FLOAT: return (double) this->value.f[i];
+   case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1.0 : 0.0;
+   case GLSL_TYPE_DOUBLE: return this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -903,6 +989,7 @@ ir_constant::get_int_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i];
    case GLSL_TYPE_FLOAT: return (int) this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+   case GLSL_TYPE_DOUBLE: return (int) this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -920,6 +1007,7 @@ ir_constant::get_uint_component(unsigned i) const
    case GLSL_TYPE_INT:   return this->value.i[i];
    case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i];
    case GLSL_TYPE_BOOL:  return this->value.b[i] ? 1 : 0;
+   case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i];
    default:              assert(!"Should not get here."); break;
    }
 
@@ -984,6 +1072,7 @@ ir_constant::copy_offset(ir_constant *src, int offset)
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL: {
       unsigned int size = src->type->components();
       assert (size <= this->type->components() - offset);
@@ -1001,6 +1090,9 @@ ir_constant::copy_offset(ir_constant *src, int offset)
 	 case GLSL_TYPE_BOOL:
 	    value.b[i+offset] = src->get_bool_component(i);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    value.d[i+offset] = src->get_double_component(i);
+	    break;
 	 default: // Shut up the compiler
 	    break;
 	 }
@@ -1057,6 +1149,9 @@ ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask)
 	 case GLSL_TYPE_BOOL:
 	    value.b[i+offset] = src->get_bool_component(id++);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    value.d[i+offset] = src->get_double_component(id++);
+	    break;
 	 default:
 	    assert(!"Should not get here.");
 	    return;
@@ -1117,6 +1212,10 @@ ir_constant::has_value(const ir_constant *c) const
 	 if (this->value.b[i] != c->value.b[i])
 	    return false;
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 if (this->value.d[i] != c->value.d[i])
+	    return false;
+	 break;
       default:
 	 assert(!"Should not get here.");
 	 return false;
@@ -1154,6 +1253,10 @@ ir_constant::is_value(float f, int i) const
 	 if (this->value.b[c] != bool(i))
 	    return false;
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 if (this->value.d[c] != double(f))
+	    return false;
+	 break;
       default:
 	 /* The only other base types are structures, arrays, and samplers.
 	  * Samplers cannot be constants, and the others should have been
diff --git a/mesalib/src/glsl/ir.h b/mesalib/src/glsl/ir.h
index a0f48b2af..25f2ecada 100644
--- a/mesalib/src/glsl/ir.h
+++ b/mesalib/src/glsl/ir.h
@@ -450,11 +450,8 @@ public:
     */
    inline bool is_interface_instance() const
    {
-      const glsl_type *const t = this->type;
-
-      return (t == this->interface_type)
-         || (t->is_array() && t->fields.array == this->interface_type);
-    }
+      return this->type->without_array() == this->interface_type;
+   }
 
    /**
     * Set this->interface_type on a newly created variable.
@@ -1269,6 +1266,13 @@ enum ir_expression_operation {
    ir_unop_u2f,         /**< Unsigned-to-float conversion. */
    ir_unop_i2u,         /**< Integer-to-unsigned conversion. */
    ir_unop_u2i,         /**< Unsigned-to-integer conversion. */
+   ir_unop_d2f,         /**< Double-to-float conversion. */
+   ir_unop_f2d,         /**< Float-to-double conversion. */
+   ir_unop_d2i,         /**< Double-to-integer conversion. */
+   ir_unop_i2d,         /**< Integer-to-double conversion. */
+   ir_unop_d2u,         /**< Double-to-unsigned conversion. */
+   ir_unop_u2d,         /**< Unsigned-to-double conversion. */
+   ir_unop_d2b,         /**< Double-to-boolean conversion. */
    ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */
    ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */
    ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */
@@ -1345,6 +1349,18 @@ enum ir_expression_operation {
    /*@}*/
 
    ir_unop_saturate,
+
+   /**
+    * \name Double packing, part of ARB_gpu_shader_fp64.
+    */
+   /*@{*/
+   ir_unop_pack_double_2x32,
+   ir_unop_unpack_double_2x32,
+   /*@}*/
+
+   ir_unop_frexp_sig,
+   ir_unop_frexp_exp,
+
    ir_unop_noise,
 
    /**
@@ -2153,6 +2169,7 @@ union ir_constant_data {
       int i[16];
       float f[16];
       bool b[16];
+      double d[16];
 };
 
 
@@ -2163,6 +2180,7 @@ public:
    ir_constant(unsigned int u, unsigned vector_elements=1);
    ir_constant(int i, unsigned vector_elements=1);
    ir_constant(float f, unsigned vector_elements=1);
+   ir_constant(double d, unsigned vector_elements=1);
 
    /**
     * Construct an ir_constant from a list of ir_constant values
@@ -2209,6 +2227,7 @@ public:
    /*@{*/
    bool get_bool_component(unsigned i) const;
    float get_float_component(unsigned i) const;
+   double get_double_component(unsigned i) const;
    int get_int_component(unsigned i) const;
    unsigned get_uint_component(unsigned i) const;
    /*@}*/
@@ -2417,6 +2436,10 @@ extern ir_function_signature *
 _mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state,
                                  const char *name, exec_list *actual_parameters);
 
+extern ir_function *
+_mesa_glsl_find_builtin_function_by_name(_mesa_glsl_parse_state *state,
+                                         const char *name);
+
 extern gl_shader *
 _mesa_glsl_get_builtin_function_shader(void);
 
diff --git a/mesalib/src/glsl/ir_builder.cpp b/mesalib/src/glsl/ir_builder.cpp
index a2f6f2967..e44b05c99 100644
--- a/mesalib/src/glsl/ir_builder.cpp
+++ b/mesalib/src/glsl/ir_builder.cpp
@@ -246,11 +246,21 @@ ir_expression *borrow(operand a, operand b)
    return expr(ir_binop_borrow, a, b);
 }
 
+ir_expression *trunc(operand a)
+{
+   return expr(ir_unop_trunc, a);
+}
+
 ir_expression *round_even(operand a)
 {
    return expr(ir_unop_round_even, a);
 }
 
+ir_expression *fract(operand a)
+{
+   return expr(ir_unop_fract, a);
+}
+
 /* dot for vectors, mul for scalars */
 ir_expression *dot(operand a, operand b)
 {
@@ -515,6 +525,24 @@ interpolate_at_sample(operand a, operand b)
 }
 
 ir_expression *
+f2d(operand a)
+{
+   return expr(ir_unop_f2d, a);
+}
+
+ir_expression *
+i2d(operand a)
+{
+   return expr(ir_unop_i2d, a);
+}
+
+ir_expression *
+u2d(operand a)
+{
+   return expr(ir_unop_u2d, a);
+}
+
+ir_expression *
 fma(operand a, operand b, operand c)
 {
    return expr(ir_triop_fma, a, b, c);
diff --git a/mesalib/src/glsl/ir_builder.h b/mesalib/src/glsl/ir_builder.h
index 573596cf1..870265881 100644
--- a/mesalib/src/glsl/ir_builder.h
+++ b/mesalib/src/glsl/ir_builder.h
@@ -137,7 +137,9 @@ ir_expression *imul_high(operand a, operand b);
 ir_expression *div(operand a, operand b);
 ir_expression *carry(operand a, operand b);
 ir_expression *borrow(operand a, operand b);
+ir_expression *trunc(operand a);
 ir_expression *round_even(operand a);
+ir_expression *fract(operand a);
 ir_expression *dot(operand a, operand b);
 ir_expression *clamp(operand a, operand b, operand c);
 ir_expression *saturate(operand a);
@@ -183,6 +185,10 @@ ir_expression *i2b(operand a);
 ir_expression *f2b(operand a);
 ir_expression *b2f(operand a);
 
+ir_expression *f2d(operand a);
+ir_expression *i2d(operand a);
+ir_expression *u2d(operand a);
+
 ir_expression *min2(operand a, operand b);
 ir_expression *max2(operand a, operand b);
 
diff --git a/mesalib/src/glsl/ir_clone.cpp b/mesalib/src/glsl/ir_clone.cpp
index dffa57844..5c7279ca3 100644
--- a/mesalib/src/glsl/ir_clone.cpp
+++ b/mesalib/src/glsl/ir_clone.cpp
@@ -327,6 +327,7 @@ ir_constant::clone(void *mem_ctx, struct hash_table *ht) const
    case GLSL_TYPE_UINT:
    case GLSL_TYPE_INT:
    case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
    case GLSL_TYPE_BOOL:
       return new(mem_ctx) ir_constant(this->type, &this->value);
 
diff --git a/mesalib/src/glsl/ir_constant_expression.cpp b/mesalib/src/glsl/ir_constant_expression.cpp
index 1e8b3a3cc..07dd439d5 100644
--- a/mesalib/src/glsl/ir_constant_expression.cpp
+++ b/mesalib/src/glsl/ir_constant_expression.cpp
@@ -44,7 +44,7 @@ static int isnormal(double x)
 {
    return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
 }
-#elif defined(__SUNPRO_CC)
+#elif defined(__SUNPRO_CC) && !defined(isnormal)
 #include <ieeefp.h>
 static int isnormal(double x)
 {
@@ -60,7 +60,7 @@ static double copysign(double x, double y)
 #endif
 
 static float
-dot(ir_constant *op0, ir_constant *op1)
+dot_f(ir_constant *op0, ir_constant *op1)
 {
    assert(op0->type->is_float() && op1->type->is_float());
 
@@ -71,6 +71,18 @@ dot(ir_constant *op0, ir_constant *op1)
    return result;
 }
 
+static double
+dot_d(ir_constant *op0, ir_constant *op1)
+{
+   assert(op0->type->is_double() && op1->type->is_double());
+
+   double result = 0;
+   for (unsigned c = 0; c < op0->type->components(); c++)
+      result += op0->value.d[c] * op1->value.d[c];
+
+   return result;
+}
+
 /* This method is the only one supported by gcc.  Unions in particular
  * are iffy, and read-through-converted-pointer is killed by strict
  * aliasing.  OTOH, the compiler sees through the memcpy, so the
@@ -667,32 +679,81 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    data.b[0] = true;
       }
       break;
-
-   case ir_unop_trunc:
+   case ir_unop_d2f:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.f[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_f2d:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = truncf(op[0]->value.f[c]);
+	 data.d[c] = op[0]->value.f[c];
+      }
+      break;
+   case ir_unop_d2i:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.i[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_i2d:
+      assert(op[0]->type->base_type == GLSL_TYPE_INT);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.d[c] = op[0]->value.i[c];
+      }
+      break;
+   case ir_unop_d2u:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.u[c] = op[0]->value.d[c];
+      }
+      break;
+   case ir_unop_u2d:
+      assert(op[0]->type->base_type == GLSL_TYPE_UINT);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+	 data.d[c] = op[0]->value.u[c];
+      }
+      break;
+   case ir_unop_d2b:
+      assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+         data.b[c] = op[0]->value.d[c] != 0.0;
+      }
+      break;
+   case ir_unop_trunc:
+      for (unsigned c = 0; c < op[0]->type->components(); c++) {
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = trunc(op[0]->value.d[c]);
+         else
+            data.f[c] = truncf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_round_even:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = _mesa_round_to_even(op[0]->value.d[c]);
+         else
+            data.f[c] = _mesa_round_to_even(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_ceil:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = ceilf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = ceil(op[0]->value.d[c]);
+         else
+            data.f[c] = ceilf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_floor:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = floorf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = floor(op[0]->value.d[c]);
+         else
+            data.f[c] = floorf(op[0]->value.f[c]);
       }
       break;
 
@@ -708,6 +769,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -742,6 +806,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = -op[0]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = -op[0]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -762,6 +829,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = fabs(op[0]->value.f[c]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = fabs(op[0]->value.d[c]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -780,6 +850,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0));
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0));
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -787,7 +860,6 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_rcp:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
 	 switch (this->type->base_type) {
 	 case GLSL_TYPE_UINT:
@@ -802,6 +874,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    if (op[0]->value.f[c] != 0.0)
 	       data.f[c] = 1.0F / op[0]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    if (op[0]->value.d[c] != 0.0)
+	       data.d[c] = 1.0 / op[0]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -809,16 +885,20 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_unop_rsq:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = 1.0 / sqrt(op[0]->value.d[c]);
+         else
+            data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]);
       }
       break;
 
    case ir_unop_sqrt:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-	 data.f[c] = sqrtf(op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = sqrt(op[0]->value.d[c]);
+         else
+            data.f[c] = sqrtf(op[0]->value.f[c]);
       }
       break;
 
@@ -934,7 +1014,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       break;
 
    case ir_binop_dot:
-      data.f[0] = dot(op[0], op[1]);
+      if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+         data.d[0] = dot_d(op[0], op[1]);
+      else
+         data.f[0] = dot_f(op[0], op[1]);
       break;
 
    case ir_binop_min:
@@ -953,6 +1036,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -975,6 +1061,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -997,6 +1086,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1019,6 +1111,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1043,6 +1138,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    case GLSL_TYPE_FLOAT:
 	       data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1];
 	       break;
+	    case GLSL_TYPE_DOUBLE:
+	       data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1];
+	       break;
 	    default:
 	       assert(0);
 	    }
@@ -1066,7 +1164,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 for (unsigned j = 0; j < p; j++) {
 	    for (unsigned i = 0; i < n; i++) {
 	       for (unsigned k = 0; k < m; k++) {
-		  data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
+                  if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+                     data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j];
+                  else
+                     data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j];
 	       }
 	    }
 	 }
@@ -1098,6 +1199,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1133,6 +1237,13 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	    data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]
 	       * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]);
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    /* We don't use fmod because it rounds toward zero; GLSL specifies
+	     * the use of floor.
+	     */
+	    data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]
+	       * floor(op[0]->value.d[c0] / op[1]->value.d[c1]);
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1169,6 +1280,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] < op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] < op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1187,6 +1301,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] > op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] > op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1205,6 +1322,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1223,6 +1343,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1244,6 +1367,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_BOOL:
 	    data.b[c] = op[0]->value.b[c] == op[1]->value.b[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] == op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1265,6 +1391,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_BOOL:
 	    data.b[c] = op[0]->value.b[c] != op[1]->value.b[c];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.b[c] = op[0]->value.d[c] != op[1]->value.d[c];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1375,6 +1504,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       case GLSL_TYPE_FLOAT:
          data.f[0] = op[0]->value.f[c];
          break;
+      case GLSL_TYPE_DOUBLE:
+         data.d[0] = op[0]->value.d[c];
+         break;
       case GLSL_TYPE_BOOL:
          data.b[0] = op[0]->value.b[c];
          break;
@@ -1474,6 +1606,19 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
          data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f);
       }
       break;
+   case ir_unop_pack_double_2x32: {
+      /* XXX needs to be checked on big-endian */
+      uint64_t temp;
+      temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32);
+      data.d[0] = *(double *)&temp;
+
+      break;
+   }
+   case ir_unop_unpack_double_2x32:
+      /* XXX needs to be checked on big-endian */
+      data.u[0] = *(uint32_t *)&op[0]->value.d[0];
+      data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1);
+      break;
 
    case ir_triop_bitfield_extract: {
       int offset = op[1]->value.i[0];
@@ -1523,40 +1668,65 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 
    case ir_binop_ldexp:
       for (unsigned c = 0; c < components; c++) {
-         data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
-         /* Flush subnormal values to zero. */
-         if (!isnormal(data.f[c]))
-            data.f[c] = copysign(0.0f, op[0]->value.f[c]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) {
+            data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]);
+            /* Flush subnormal values to zero. */
+            if (!isnormal(data.d[c]))
+               data.d[c] = copysign(0.0, op[0]->value.d[c]);
+         } else {
+            data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
+            /* Flush subnormal values to zero. */
+            if (!isnormal(data.f[c]))
+               data.f[c] = copysign(0.0f, op[0]->value.f[c]);
+         }
       }
       break;
 
    case ir_triop_fma:
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[1]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[2]->type->base_type == GLSL_TYPE_DOUBLE);
 
       for (unsigned c = 0; c < components; c++) {
-         data.f[c] = op[0]->value.f[c] * op[1]->value.f[c]
-                                       + op[2]->value.f[c];
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.d[c] * op[1]->value.d[c]
+                                          + op[2]->value.d[c];
+         else
+            data.f[c] = op[0]->value.f[c] * op[1]->value.f[c]
+                                          + op[2]->value.f[c];
       }
       break;
 
    case ir_triop_lrp: {
-      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
-      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(op[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[1]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[1]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(op[2]->type->base_type == GLSL_TYPE_FLOAT ||
+             op[2]->type->base_type == GLSL_TYPE_DOUBLE);
 
       unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1;
       for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) {
-         data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
-                     (op[1]->value.f[c] * op[2]->value.f[c2]);
+         if (op[0]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) +
+               (op[1]->value.d[c] * op[2]->value.d[c2]);
+         else
+            data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) +
+               (op[1]->value.f[c] * op[2]->value.f[c2]);
       }
       break;
    }
 
    case ir_triop_csel:
       for (unsigned c = 0; c < components; c++) {
-         data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c]
+         if (op[1]->type->base_type == GLSL_TYPE_DOUBLE)
+            data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c]
+                                       : op[2]->value.d[c];
+         else
+            data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c]
                                        : op[2]->value.u[c];
       }
       break;
@@ -1579,6 +1749,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       case GLSL_TYPE_BOOL:
 	 data.b[idx] = op[1]->value.b[0];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 data.d[idx] = op[1]->value.d[0];
+	 break;
       default:
 	 assert(!"Should not get here.");
 	 break;
@@ -1625,6 +1798,9 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_FLOAT:
 	    data.f[c] = op[c]->value.f[0];
 	    break;
+	 case GLSL_TYPE_DOUBLE:
+	    data.d[c] = op[c]->value.d[0];
+	    break;
 	 default:
 	    assert(0);
 	 }
@@ -1666,6 +1842,7 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context)
 	 case GLSL_TYPE_INT:   data.u[i] = v->value.u[swiz_idx[i]]; break;
 	 case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
 	 case GLSL_TYPE_BOOL:  data.b[i] = v->value.b[swiz_idx[i]]; break;
+	 case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
 	 default:              assert(!"Should not get here."); break;
 	 }
       }
@@ -1740,6 +1917,12 @@ ir_dereference_array::constant_expression_value(struct hash_table *variable_cont
 
 	    break;
 
+	 case GLSL_TYPE_DOUBLE:
+	    for (unsigned i = 0; i < column_type->vector_elements; i++)
+	       data.d[i] = array->value.d[mat_idx + i];
+
+	    break;
+
 	 default:
 	    assert(!"Should not get here.");
 	    break;
diff --git a/mesalib/src/glsl/ir_function_can_inline.cpp b/mesalib/src/glsl/ir_function_can_inline.cpp
index 7b15d5df1..3b1d15f80 100644
--- a/mesalib/src/glsl/ir_function_can_inline.cpp
+++ b/mesalib/src/glsl/ir_function_can_inline.cpp
@@ -26,11 +26,10 @@
  *
  * Determines if we can inline a function call using ir_function_inlining.cpp.
  *
- * The primary restriction is that we can't return from the function
- * other than as the last instruction.  We could potentially work
- * around this for some constructs by flattening control flow and
- * moving the return to the end, or by using breaks from a do {} while
- * (0) loop surrounding the function body.
+ * The primary restriction is that we can't return from the function other
+ * than as the last instruction.  In lower_jumps.cpp, we can lower return
+ * statements not at the end of the function to other control flow in order to
+ * deal with this restriction.
  */
 
 #include "ir.h"
diff --git a/mesalib/src/glsl/ir_optimization.h b/mesalib/src/glsl/ir_optimization.h
index 34e0b4b94..7eb861ae5 100644
--- a/mesalib/src/glsl/ir_optimization.h
+++ b/mesalib/src/glsl/ir_optimization.h
@@ -34,13 +34,15 @@
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
-#define MOD_TO_FRACT       0x20
+#define MOD_TO_FLOOR       0x20
 #define INT_DIV_TO_MUL_RCP 0x40
 #define BITFIELD_INSERT_TO_BFM_BFI 0x80
 #define LDEXP_TO_ARITH     0x100
 #define CARRY_TO_ARITH     0x200
 #define BORROW_TO_ARITH    0x400
 #define SAT_TO_CLAMP       0x800
+#define DOPS_TO_DFRAC      0x1000
+#define DFREXP_DLDEXP_TO_ARITH    0x2000
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/mesalib/src/glsl/ir_print_visitor.cpp b/mesalib/src/glsl/ir_print_visitor.cpp
index bd398052c..01f52e85f 100644
--- a/mesalib/src/glsl/ir_print_visitor.cpp
+++ b/mesalib/src/glsl/ir_print_visitor.cpp
@@ -436,6 +436,17 @@ void ir_print_visitor::visit(ir_constant *ir)
                fprintf(f, "%f", ir->value.f[i]);
             break;
 	 case GLSL_TYPE_BOOL:  fprintf(f, "%d", ir->value.b[i]); break;
+	 case GLSL_TYPE_DOUBLE:
+            if (ir->value.d[i] == 0.0)
+               /* 0.0 == -0.0, so print with %f to get the proper sign. */
+               fprintf(f, "%.1f", ir->value.d[i]);
+            else if (fabs(ir->value.d[i]) < 0.000001)
+               fprintf(f, "%a", ir->value.d[i]);
+            else if (fabs(ir->value.d[i]) > 1000000.0)
+               fprintf(f, "%e", ir->value.d[i]);
+            else
+               fprintf(f, "%f", ir->value.d[i]);
+            break;
 	 default: assert(0);
 	 }
       }
diff --git a/mesalib/src/glsl/ir_set_program_inouts.cpp b/mesalib/src/glsl/ir_set_program_inouts.cpp
index 97ead750a..e877a2019 100644
--- a/mesalib/src/glsl/ir_set_program_inouts.cpp
+++ b/mesalib/src/glsl/ir_set_program_inouts.cpp
@@ -81,6 +81,13 @@ is_shader_inout(ir_variable *var)
           var->data.mode == ir_var_system_value;
 }
 
+static inline bool
+is_dual_slot(ir_variable *var)
+{
+   const glsl_type *type = var->type->without_array();
+   return type == glsl_type::dvec4_type || type == glsl_type::dvec3_type;
+}
+
 static void
 mark(struct gl_program *prog, ir_variable *var, int offset, int len,
      bool is_fragment_shader)
@@ -94,19 +101,32 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len,
     */
 
    for (int i = 0; i < len; i++) {
-      GLbitfield64 bitfield =
-         BITFIELD64_BIT(var->data.location + var->data.index + offset + i);
+      bool dual_slot = is_dual_slot(var);
+      int idx = var->data.location + var->data.index + offset + i;
+      GLbitfield64 bitfield = BITFIELD64_BIT(idx);
+
+      /* dvec3 and dvec4 take up 2 slots */
+      if (dual_slot) {
+         idx += i;
+         bitfield |= bitfield << 1;
+      }
       if (var->data.mode == ir_var_shader_in) {
 	 prog->InputsRead |= bitfield;
          if (is_fragment_shader) {
             gl_fragment_program *fprog = (gl_fragment_program *) prog;
-            fprog->InterpQualifier[var->data.location +
-                                   var->data.index + offset + i] =
+            fprog->InterpQualifier[idx] =
                (glsl_interp_qualifier) var->data.interpolation;
             if (var->data.centroid)
                fprog->IsCentroid |= bitfield;
             if (var->data.sample)
                fprog->IsSample |= bitfield;
+
+            /* Set the InterpQualifier of the next slot to the same as the
+             * current one, since dvec3 and dvec4 spans 2 slots.
+             */
+            if (dual_slot)
+               fprog->InterpQualifier[idx + 1] =
+                  (glsl_interp_qualifier) var->data.interpolation;
          }
       } else if (var->data.mode == ir_var_system_value) {
          prog->SystemValuesRead |= bitfield;
diff --git a/mesalib/src/glsl/ir_validate.cpp b/mesalib/src/glsl/ir_validate.cpp
index 5a6f8bbf5..667889480 100644
--- a/mesalib/src/glsl/ir_validate.cpp
+++ b/mesalib/src/glsl/ir_validate.cpp
@@ -313,6 +313,10 @@ ir_validate::visit_leave(ir_expression *ir)
    case ir_unop_ceil:
    case ir_unop_floor:
    case ir_unop_fract:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->operands[0]->type == ir->type);
+      break;
    case ir_unop_sin:
    case ir_unop_cos:
    case ir_unop_sin_reduced:
@@ -340,6 +344,11 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type == glsl_type::vec4_type);
       break;
 
+   case ir_unop_pack_double_2x32:
+      assert(ir->type == glsl_type::double_type);
+      assert(ir->operands[0]->type == glsl_type::uvec2_type);
+      break;
+
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_half_2x16:
@@ -359,6 +368,11 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type == glsl_type::uint_type);
       break;
 
+   case ir_unop_unpack_double_2x32:
+      assert(ir->type == glsl_type::uvec2_type);
+      assert(ir->operands[0]->type == glsl_type::double_type);
+      break;
+
    case ir_unop_bitfield_reverse:
       assert(ir->operands[0]->type == ir->type);
       assert(ir->type->is_integer());
@@ -381,6 +395,45 @@ ir_validate::visit_leave(ir_expression *ir)
       assert(ir->operands[0]->type->is_float());
       break;
 
+   case ir_unop_d2f:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+      break;
+   case ir_unop_f2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2i:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_INT);
+      break;
+   case ir_unop_i2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2u:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_UINT);
+      break;
+   case ir_unop_u2d:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_d2b:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_BOOL);
+      break;
+
+   case ir_unop_frexp_sig:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_DOUBLE);
+      break;
+   case ir_unop_frexp_exp:
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
+      assert(ir->type->base_type == GLSL_TYPE_INT);
+      break;
    case ir_binop_add:
    case ir_binop_sub:
    case ir_binop_mul:
@@ -481,8 +534,10 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_dot:
-      assert(ir->type == glsl_type::float_type);
-      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type == glsl_type::float_type ||
+             ir->type == glsl_type::double_type);
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->operands[0]->type->is_vector());
       assert(ir->operands[0]->type == ir->operands[1]->type);
       break;
@@ -507,7 +562,8 @@ ir_validate::visit_leave(ir_expression *ir)
 
    case ir_binop_ldexp:
       assert(ir->operands[0]->type == ir->type);
-      assert(ir->operands[0]->type->is_float());
+      assert(ir->operands[0]->type->is_float() ||
+             ir->operands[0]->type->is_double());
       assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
       assert(ir->operands[0]->type->components() ==
              ir->operands[1]->type->components());
@@ -533,16 +589,20 @@ ir_validate::visit_leave(ir_expression *ir)
       break;
 
    case ir_triop_fma:
-      assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->type == ir->operands[0]->type);
       assert(ir->type == ir->operands[1]->type);
       assert(ir->type == ir->operands[2]->type);
       break;
 
    case ir_triop_lrp:
-      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
+      assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT ||
+             ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE);
       assert(ir->operands[0]->type == ir->operands[1]->type);
-      assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type);
+      assert(ir->operands[2]->type == ir->operands[0]->type ||
+             ir->operands[2]->type == glsl_type::float_type ||
+             ir->operands[2]->type == glsl_type::double_type);
       break;
 
    case ir_triop_csel:
@@ -706,7 +766,7 @@ ir_validate::visit(ir_variable *ir)
    }
 
    if (ir->data.mode == ir_var_uniform
-       && strncmp(ir->name, "gl_", 3) == 0
+       && is_gl_identifier(ir->name)
        && ir->get_state_slots() == NULL) {
       printf("built-in uniform has no state\n");
       ir->print();
diff --git a/mesalib/src/glsl/link_uniform_blocks.cpp b/mesalib/src/glsl/link_uniform_blocks.cpp
index f5fc5022e..6ca41107e 100644
--- a/mesalib/src/glsl/link_uniform_blocks.cpp
+++ b/mesalib/src/glsl/link_uniform_blocks.cpp
@@ -67,6 +67,28 @@ private:
       assert(!"Should not get here.");
    }
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->offset = glsl_align(
+            this->offset, type->std140_base_alignment(row_major));
+   }
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+
+      /* If this is the last field of a structure, apply rule #9.  The
+       * GL_ARB_uniform_buffer_object spec says:
+       *
+       *     "The structure may have padding at the end; the base offset of
+       *     the member following the sub-structure is rounded up to the next
+       *     multiple of the base alignment of the structure."
+       */
+      this->offset = glsl_align(
+            this->offset, type->std140_base_alignment(row_major));
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
                             bool last_field)
@@ -97,27 +119,13 @@ private:
          v->IndexName = v->Name;
       }
 
-      const unsigned alignment = record_type
-         ? record_type->std140_base_alignment(v->RowMajor)
-         : type->std140_base_alignment(v->RowMajor);
+      const unsigned alignment = type->std140_base_alignment(v->RowMajor);
       unsigned size = type->std140_size(v->RowMajor);
 
       this->offset = glsl_align(this->offset, alignment);
       v->Offset = this->offset;
 
-      /* If this is the last field of a structure, apply rule #9.  The
-       * GL_ARB_uniform_buffer_object spec says:
-       *
-       *     "The structure may have padding at the end; the base offset of
-       *     the member following the sub-structure is rounded up to the next
-       *     multiple of the base alignment of the structure."
-       *
-       * last_field won't be set if this is the last field of a UBO that is
-       * not a named instance.
-       */
       this->offset += size;
-      if (last_field)
-         this->offset = glsl_align(this->offset, 16);
 
       /* From the GL_ARB_uniform_buffer_object spec:
        *
@@ -131,16 +139,6 @@ private:
        */
       this->buffer_size = glsl_align(this->offset, 16);
    }
-
-   virtual void visit_field(const glsl_struct_field *field)
-   {
-      /* FINISHME: When support for doubles (dvec4, etc.) is added to the
-       * FINISHME: compiler, this may be incorrect for a structure in a UBO
-       * FINISHME: like struct s { struct { float f } s1; dvec4 v; };.
-       */
-      this->offset = glsl_align(this->offset,
-                                field->type->std140_base_alignment(false));
-   }
 };
 
 class count_block_size : public program_resource_visitor {
diff --git a/mesalib/src/glsl/link_uniform_initializers.cpp b/mesalib/src/glsl/link_uniform_initializers.cpp
index f6a60bce9..69073841e 100644
--- a/mesalib/src/glsl/link_uniform_initializers.cpp
+++ b/mesalib/src/glsl/link_uniform_initializers.cpp
@@ -75,6 +75,11 @@ copy_constant_to_storage(union gl_constant_value *storage,
       case GLSL_TYPE_FLOAT:
 	 storage[i].f = val->value.f[i];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+         /* XXX need to check on big-endian */
+         storage[i * 2].u = *(uint32_t *)&val->value.d[i];
+         storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1);
+         break;
       case GLSL_TYPE_BOOL:
 	 storage[i].b = val->value.b[i] ? boolean_true : 0;
 	 break;
@@ -200,6 +205,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
 	 val->array_elements[0]->type->base_type;
       const unsigned int elements = val->array_elements[0]->type->components();
       unsigned int idx = 0;
+      unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1;
 
       assert(val->type->length >= storage->array_elements);
       for (unsigned int i = 0; i < storage->array_elements; i++) {
@@ -209,7 +215,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog,
                                   elements,
                                   boolean_true);
 
-	 idx += elements;
+	 idx += elements * dmul;
       }
    } else {
       copy_constant_to_storage(storage->storage,
diff --git a/mesalib/src/glsl/link_uniforms.cpp b/mesalib/src/glsl/link_uniforms.cpp
index de2f6c9ac..799c74bb9 100644
--- a/mesalib/src/glsl/link_uniforms.cpp
+++ b/mesalib/src/glsl/link_uniforms.cpp
@@ -169,6 +169,9 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
       if (record_type == NULL && t->is_record())
          record_type = t;
 
+      if (t->is_record())
+         this->enter_record(t, *name, row_major);
+
       for (unsigned i = 0; i < t->length; i++) {
 	 const char *field = t->fields.structure[i].name;
 	 size_t new_length = name_length;
@@ -208,6 +211,11 @@ program_resource_visitor::recursion(const glsl_type *t, char **name,
           */
          record_type = NULL;
       }
+
+      if (t->is_record()) {
+         (*name)[name_length] = '\0';
+         this->leave_record(t, *name, row_major);
+      }
    } else if (t->is_array() && (t->fields.array->is_record()
                                 || t->fields.array->is_interface())) {
       if (record_type == NULL && t->fields.array->is_record())
@@ -249,6 +257,16 @@ program_resource_visitor::visit_field(const glsl_struct_field *field)
    /* empty */
 }
 
+void
+program_resource_visitor::enter_record(const glsl_type *, const char *, bool)
+{
+}
+
+void
+program_resource_visitor::leave_record(const glsl_type *, const char *, bool)
+{
+}
+
 namespace {
 
 /**
@@ -526,6 +544,20 @@ private:
       assert(!"Should not get here.");
    }
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std140_base_alignment(row_major));
+   }
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major) {
+      assert(type->is_record());
+      this->ubo_byte_offset = glsl_align(
+            this->ubo_byte_offset, type->std140_base_alignment(row_major));
+   }
+
    virtual void visit_field(const glsl_type *type, const char *name,
                             bool row_major, const glsl_type *record_type,
                             bool last_field)
@@ -590,16 +622,11 @@ private:
       if (this->ubo_block_index != -1) {
 	 this->uniforms[id].block_index = this->ubo_block_index;
 
-	 const unsigned alignment = record_type
-	    ? record_type->std140_base_alignment(row_major)
-	    : type->std140_base_alignment(row_major);
+	 const unsigned alignment = type->std140_base_alignment(row_major);
 	 this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment);
 	 this->uniforms[id].offset = this->ubo_byte_offset;
 	 this->ubo_byte_offset += type->std140_size(row_major);
 
-         if (last_field)
-            this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, 16);
-
 	 if (type->is_array()) {
 	    this->uniforms[id].array_stride =
 	       glsl_align(type->fields.array->std140_size(row_major), 16);
@@ -608,7 +635,12 @@ private:
 	 }
 
 	 if (type->without_array()->is_matrix()) {
-	    this->uniforms[id].matrix_stride = 16;
+            const glsl_type *matrix = type->without_array();
+            const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4;
+            const unsigned items = row_major ? matrix->matrix_columns : matrix->vector_elements;
+
+            assert(items <= 4);
+            this->uniforms[id].matrix_stride = glsl_align(items * N, 16);
 	    this->uniforms[id].row_major = row_major;
 	 } else {
 	    this->uniforms[id].matrix_stride = 0;
diff --git a/mesalib/src/glsl/linker.h b/mesalib/src/glsl/linker.h
index 6ee585898..be4da5e0a 100644
--- a/mesalib/src/glsl/linker.h
+++ b/mesalib/src/glsl/linker.h
@@ -170,6 +170,12 @@ protected:
     */
    virtual void visit_field(const glsl_struct_field *field);
 
+   virtual void enter_record(const glsl_type *type, const char *name,
+                             bool row_major);
+
+   virtual void leave_record(const glsl_type *type, const char *name,
+                             bool row_major);
+
 private:
    /**
     * \param name_length  Length of the current name \b not including the
diff --git a/mesalib/src/glsl/list.h b/mesalib/src/glsl/list.h
index 995c666ea..ddb98f76f 100644
--- a/mesalib/src/glsl/list.h
+++ b/mesalib/src/glsl/list.h
@@ -51,6 +51,10 @@
  * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL,
  * the list is empty.
  *
+ * Do note that this means that the list nodes will contain pointers into the
+ * list structure itself and as a result you may not \c realloc() an  \c
+ * exec_list or any structure in which an \c exec_list is embedded.
+ *
  * To anyone familiar with "exec lists" on the Amiga, this structure should
  * be immediately recognizable.  See the following link for the original Amiga
  * operating system documentation on the subject.
@@ -534,9 +538,7 @@ exec_list_validate(const struct exec_list *list)
     * either require C++ or assume the exec_node is embedded in a structure
     * which is not the case for this function.
     */
-   for (node = exec_list_get_head_const(list);
-        !exec_node_is_tail_sentinel(node);
-        node = exec_node_get_next_const(node)) {
+   for (node = list->head; node->next != NULL; node = node->next) {
       assert(node->next->prev == node);
       assert(node->prev->next == node);
    }
@@ -638,6 +640,12 @@ inline void exec_node::insert_before(exec_list *before)
         __next != NULL;                              \
         __node = __next, __next = (__type *)__next->next)
 
+#define foreach_in_list_reverse_safe(__type, __node, __list) \
+   for (__type *__node = (__type *)(__list)->tail_pred,      \
+               *__prev = (__type *)__node->prev;             \
+        __prev != NULL;                                      \
+        __node = __prev, __prev = (__type *)__prev->prev)
+
 #define foreach_in_list_use_after(__type, __inst, __list) \
    __type *(__inst);                                      \
    for ((__inst) = (__type *)(__list)->head;              \
@@ -665,6 +673,12 @@ inline void exec_node::insert_before(exec_list *before)
 	(__node)->__field.next != NULL; 				\
 	(__node) = exec_node_data(__type, (__node)->__field.next, __field))
 
+#define foreach_list_typed_reverse(__type, __node, __field, __list)        \
+   for (__type * __node =                                                \
+           exec_node_data(__type, (__list)->tail_pred, __field);        \
+        (__node)->__field.prev != NULL;                                 \
+        (__node) = exec_node_data(__type, (__node)->__field.prev, __field))
+
 #define foreach_list_typed_safe(__type, __node, __field, __list)           \
    for (__type * __node =                                                  \
            exec_node_data(__type, (__list)->head, __field),                \
@@ -674,4 +688,13 @@ inline void exec_node::insert_before(exec_list *before)
         __node = __next, __next =                                          \
            exec_node_data(__type, (__next)->__field.next, __field))
 
+#define foreach_list_typed_safe_reverse(__type, __node, __field, __list)   \
+   for (__type * __node =                                                  \
+           exec_node_data(__type, (__list)->tail_pred, __field),           \
+               * __prev =                                                  \
+           exec_node_data(__type, (__node)->__field.prev, __field);        \
+        __prev != NULL;                                                    \
+        __node = __prev, __prev =                                          \
+           exec_node_data(__type, (__prev)->__field.prev, __field))
+
 #endif /* LIST_CONTAINER_H */
diff --git a/mesalib/src/glsl/loop_controls.cpp b/mesalib/src/glsl/loop_controls.cpp
index 1c1d34fef..2459fc1c3 100644
--- a/mesalib/src/glsl/loop_controls.cpp
+++ b/mesalib/src/glsl/loop_controls.cpp
@@ -102,9 +102,10 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
       return -1;
 
    if (!iter->type->is_integer()) {
+      const ir_expression_operation op = iter->type->is_double()
+         ? ir_unop_d2i : ir_unop_f2i;
       ir_rvalue *cast =
-	 new(mem_ctx) ir_expression(ir_unop_f2i, glsl_type::int_type, iter,
-				    NULL);
+         new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL);
 
       iter = cast->constant_expression_value();
    }
@@ -134,6 +135,9 @@ calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment,
       case GLSL_TYPE_FLOAT:
          iter = new(mem_ctx) ir_constant(float(iter_value + bias[i]));
          break;
+      case GLSL_TYPE_DOUBLE:
+         iter = new(mem_ctx) ir_constant(double(iter_value + bias[i]));
+         break;
       default:
           unreachable(!"Unsupported type for loop iterator.");
       }
diff --git a/mesalib/src/glsl/lower_instructions.cpp b/mesalib/src/glsl/lower_instructions.cpp
index 684285350..4779de059 100644
--- a/mesalib/src/glsl/lower_instructions.cpp
+++ b/mesalib/src/glsl/lower_instructions.cpp
@@ -36,12 +36,14 @@
  * - EXP_TO_EXP2
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
- * - MOD_TO_FRACT
+ * - MOD_TO_FLOOR
  * - LDEXP_TO_ARITH
+ * - DFREXP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  * - CARRY_TO_ARITH
  * - BORROW_TO_ARITH
  * - SAT_TO_CLAMP
+ * - DOPS_TO_DFRAC
  *
  * SUB_TO_ADD_NEG:
  * ---------------
@@ -77,17 +79,25 @@
  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
  * x**y to 2**(y * log2(x)).
  *
- * MOD_TO_FRACT:
+ * MOD_TO_FLOOR:
  * -------------
- * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
+ * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
  *
  * Many GPUs don't have a MOD instruction (945 and 965 included), and
  * if we have to break it down like this anyway, it gives an
  * opportunity to do things like constant fold the (1.0 / op1) easily.
  *
+ * Note: before we used to implement this as op1 * fract(op / op1) but this
+ * implementation had significant precision errors.
+ *
  * LDEXP_TO_ARITH:
  * -------------
- * Converts ir_binop_ldexp to arithmetic and bit operations.
+ * Converts ir_binop_ldexp to arithmetic and bit operations for float sources.
+ *
+ * DFREXP_DLDEXP_TO_ARITH:
+ * ---------------
+ * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to
+ * arithmetic and bit ops for double arguments.
  *
  * BITFIELD_INSERT_TO_BFM_BFI:
  * ---------------------------
@@ -109,9 +119,13 @@
  * -------------
  * Converts ir_unop_saturate into min(max(x, 0.0), 1.0)
  *
+ * DOPS_TO_DFRAC:
+ * --------------
+ * Converts double trunc, ceil, floor, round to fract
  */
 
 #include "main/core.h" /* for M_LOG2E */
+#include "program/prog_instruction.h" /* for swizzle */
 #include "glsl_types.h"
 #include "ir.h"
 #include "ir_builder.h"
@@ -136,15 +150,25 @@ private:
    void sub_to_add_neg(ir_expression *);
    void div_to_mul_rcp(ir_expression *);
    void int_div_to_mul_rcp(ir_expression *);
-   void mod_to_fract(ir_expression *);
+   void mod_to_floor(ir_expression *);
    void exp_to_exp2(ir_expression *);
    void pow_to_exp2(ir_expression *);
    void log_to_log2(ir_expression *);
    void bitfield_insert_to_bfm_bfi(ir_expression *);
    void ldexp_to_arith(ir_expression *);
+   void dldexp_to_arith(ir_expression *);
+   void dfrexp_sig_to_arith(ir_expression *);
+   void dfrexp_exp_to_arith(ir_expression *);
    void carry_to_arith(ir_expression *);
    void borrow_to_arith(ir_expression *);
    void sat_to_clamp(ir_expression *);
+   void double_dot_to_fma(ir_expression *);
+   void double_lrp(ir_expression *);
+   void dceil_to_dfrac(ir_expression *);
+   void dfloor_to_dfrac(ir_expression *);
+   void dround_even_to_dfrac(ir_expression *);
+   void dtrunc_to_dfrac(ir_expression *);
+   void dsign_to_csel(ir_expression *);
 };
 
 } /* anonymous namespace */
@@ -175,7 +199,7 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
 void
 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
 {
-   assert(ir->operands[1]->type->is_float());
+   assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double());
 
    /* New expression for the 1.0 / op1 */
    ir_rvalue *expr;
@@ -276,37 +300,50 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
 }
 
 void
-lower_instructions_visitor::mod_to_fract(ir_expression *ir)
+lower_instructions_visitor::mod_to_floor(ir_expression *ir)
 {
-   ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
-					   ir_var_temporary);
-   this->base_ir->insert_before(temp);
-
-   ir_assignment *const assign =
-      new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
-			    ir->operands[1], NULL);
-
-   this->base_ir->insert_before(assign);
+   ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
+                                         ir_var_temporary);
+   ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
+                                         ir_var_temporary);
+   this->base_ir->insert_before(x);
+   this->base_ir->insert_before(y);
+
+   ir_assignment *const assign_x =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
+                            ir->operands[0], NULL);
+   ir_assignment *const assign_y =
+      new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
+                            ir->operands[1], NULL);
+
+   this->base_ir->insert_before(assign_x);
+   this->base_ir->insert_before(assign_y);
 
    ir_expression *const div_expr =
-      new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
-			    ir->operands[0],
-			    new(ir) ir_dereference_variable(temp));
+      new(ir) ir_expression(ir_binop_div, x->type,
+                            new(ir) ir_dereference_variable(x),
+                            new(ir) ir_dereference_variable(y));
 
    /* Don't generate new IR that would need to be lowered in an additional
     * pass.
     */
-   if (lowering(DIV_TO_MUL_RCP))
+   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
       div_to_mul_rcp(div_expr);
 
-   ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
-					   ir->operands[0]->type,
-					   div_expr,
-					   NULL);
+   ir_expression *const floor_expr =
+      new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
 
-   ir->operation = ir_binop_mul;
-   ir->operands[0] = new(ir) ir_dereference_variable(temp);
-   ir->operands[1] = expr;
+   if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+      dfloor_to_dfrac(floor_expr);
+
+   ir_expression *const mul_expr =
+      new(ir) ir_expression(ir_binop_mul,
+                            new(ir) ir_dereference_variable(y),
+                            floor_expr);
+
+   ir->operation = ir_binop_sub;
+   ir->operands[0] = new(ir) ir_dereference_variable(x);
+   ir->operands[1] = mul_expr;
    this->progress = true;
 }
 
@@ -455,6 +492,262 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
 }
 
 void
+lower_instructions_visitor::dldexp_to_arith(ir_expression *ir)
+{
+   /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent
+    * from the significand.
+    */
+
+   const unsigned vec_elem = ir->type->vector_elements;
+
+   /* Types */
+   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+
+   /* Constants */
+   ir_constant *zeroi = ir_constant::zero(ir, ivec);
+
+   ir_constant *sign_mask = new(ir) ir_constant(0x80000000u);
+
+   ir_constant *exp_shift = new(ir) ir_constant(20);
+   ir_constant *exp_width = new(ir) ir_constant(11);
+   ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem);
+
+   /* Temporary variables */
+   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
+   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
+
+   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
+                                                  ir_var_temporary);
+
+   ir_variable *extracted_biased_exp =
+      new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
+   ir_variable *resulting_biased_exp =
+      new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
+
+   ir_variable *is_not_zero_or_underflow =
+      new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
+
+   ir_instruction &i = *base_ir;
+
+   /* Copy <x> and <exp> arguments. */
+   i.insert_before(x);
+   i.insert_before(assign(x, ir->operands[0]));
+   i.insert_before(exp);
+   i.insert_before(assign(exp, ir->operands[1]));
+
+   ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x);
+   if (lowering(DFREXP_DLDEXP_TO_ARITH))
+      dfrexp_exp_to_arith(frexp_exp);
+
+   /* Extract the biased exponent from <x>. */
+   i.insert_before(extracted_biased_exp);
+   i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias)));
+
+   i.insert_before(resulting_biased_exp);
+   i.insert_before(assign(resulting_biased_exp,
+                          add(extracted_biased_exp, exp)));
+
+   /* Test if result is ±0.0, subnormal, or underflow by checking if the
+    * resulting biased exponent would be less than 0x1. If so, the result is
+    * 0.0 with the sign of x. (Actually, invert the conditions so that
+    * immediate values are the second arguments, which is better for i965)
+    * TODO: Implement in a vector fashion.
+    */
+   i.insert_before(zero_sign_x);
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+      i.insert_before(unpacked);
+      i.insert_before(
+            assign(unpacked,
+                   expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
+      i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)),
+                             WRITEMASK_Y));
+      i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X));
+      i.insert_before(assign(zero_sign_x,
+                             expr(ir_unop_pack_double_2x32, unpacked),
+                             1 << elem));
+   }
+   i.insert_before(is_not_zero_or_underflow);
+   i.insert_before(assign(is_not_zero_or_underflow,
+                          gequal(resulting_biased_exp,
+                                  new(ir) ir_constant(0x1, vec_elem))));
+   i.insert_before(assign(x, csel(is_not_zero_or_underflow,
+                                  x, zero_sign_x)));
+   i.insert_before(assign(resulting_biased_exp,
+                          csel(is_not_zero_or_underflow,
+                               resulting_biased_exp, zeroi)));
+
+   /* We could test for overflows by checking if the resulting biased exponent
+    * would be greater than 0xFE. Turns out we don't need to because the GLSL
+    * spec says:
+    *
+    *    "If this product is too large to be represented in the
+    *     floating-point type, the result is undefined."
+    */
+
+   ir_rvalue *results[4] = {NULL};
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+      i.insert_before(unpacked);
+      i.insert_before(
+            assign(unpacked,
+                   expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1))));
+
+      ir_expression *bfi = bitfield_insert(
+            swizzle_y(unpacked),
+            i2u(swizzle(resulting_biased_exp, elem, 1)),
+            exp_shift->clone(ir, NULL),
+            exp_width->clone(ir, NULL));
+
+      if (lowering(BITFIELD_INSERT_TO_BFM_BFI))
+         bitfield_insert_to_bfm_bfi(bfi);
+
+      i.insert_before(assign(unpacked, bfi, WRITEMASK_Y));
+
+      results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
+   }
+
+   ir->operation = ir_quadop_vector;
+   ir->operands[0] = results[0];
+   ir->operands[1] = results[1];
+   ir->operands[2] = results[2];
+   ir->operands[3] = results[3];
+
+   /* Don't generate new IR that would need to be lowered in an additional
+    * pass.
+    */
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir)
+{
+   const unsigned vec_elem = ir->type->vector_elements;
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+
+   /* Double-precision floating-point values are stored as
+    *   1 sign bit;
+    *   11 exponent bits;
+    *   52 mantissa bits.
+    *
+    * We're just extracting the significand here, so we only need to modify
+    * the upper 32-bit uint. Unfortunately we must extract each double
+    * independently as there is no vector version of unpackDouble.
+    */
+
+   ir_instruction &i = *base_ir;
+
+   ir_variable *is_not_zero =
+      new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
+   ir_rvalue *results[4] = {NULL};
+
+   ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
+   i.insert_before(is_not_zero);
+   i.insert_before(
+         assign(is_not_zero,
+                nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero)));
+
+   /* TODO: Remake this as more vector-friendly when int64 support is
+    * available.
+    */
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_constant *zero = new(ir) ir_constant(0u, 1);
+      ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1);
+
+      /* Exponent of double floating-point values in the range [0.5, 1.0). */
+      ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1);
+
+      ir_variable *bits =
+         new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary);
+      ir_variable *unpacked =
+         new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary);
+
+      ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1);
+
+      i.insert_before(bits);
+      i.insert_before(unpacked);
+      i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x)));
+
+      /* Manipulate the high uint to remove the exponent and replace it with
+       * either the default exponent or zero.
+       */
+      i.insert_before(assign(bits, swizzle_y(unpacked)));
+      i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask)));
+      i.insert_before(assign(bits, bit_or(bits,
+                                          csel(swizzle(is_not_zero, elem, 1),
+                                               exponent_value,
+                                               zero))));
+      i.insert_before(assign(unpacked, bits, WRITEMASK_Y));
+      results[elem] = expr(ir_unop_pack_double_2x32, unpacked);
+   }
+
+   /* Put the dvec back together */
+   ir->operation = ir_quadop_vector;
+   ir->operands[0] = results[0];
+   ir->operands[1] = results[1];
+   ir->operands[2] = results[2];
+   ir->operands[3] = results[3];
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir)
+{
+   const unsigned vec_elem = ir->type->vector_elements;
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);
+   const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1);
+
+   /* Double-precision floating-point values are stored as
+    *   1 sign bit;
+    *   11 exponent bits;
+    *   52 mantissa bits.
+    *
+    * We're just extracting the exponent here, so we only care about the upper
+    * 32-bit uint.
+    */
+
+   ir_instruction &i = *base_ir;
+
+   ir_variable *is_not_zero =
+      new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary);
+   ir_variable *high_words =
+      new(ir) ir_variable(uvec, "high_words", ir_var_temporary);
+   ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem);
+   ir_constant *izero = new(ir) ir_constant(0, vec_elem);
+
+   ir_rvalue *absval = abs(ir->operands[0]);
+
+   i.insert_before(is_not_zero);
+   i.insert_before(high_words);
+   i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero)));
+
+   /* Extract all of the upper uints. */
+   for (unsigned elem = 0; elem < vec_elem; elem++) {
+      ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1);
+
+      i.insert_before(assign(high_words,
+                             swizzle_y(expr(ir_unop_unpack_double_2x32, x)),
+                             1 << elem));
+
+   }
+   ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem);
+   ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem);
+
+   /* For non-zero inputs, shift the exponent down and apply bias. */
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero);
+   ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift)));
+   ir->operands[2] = izero;
+
+   this->progress = true;
+}
+
+void
 lower_instructions_visitor::carry_to_arith(ir_expression *ir)
 {
    /* Translates
@@ -508,10 +801,211 @@ lower_instructions_visitor::sat_to_clamp(ir_expression *ir)
    this->progress = true;
 }
 
+void
+lower_instructions_visitor::double_dot_to_fma(ir_expression *ir)
+{
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res",
+					   ir_var_temporary);
+   this->base_ir->insert_before(temp);
+
+   int nc = ir->operands[0]->type->components();
+   for (int i = nc - 1; i >= 1; i--) {
+      ir_assignment *assig;
+      if (i == (nc - 1)) {
+         assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
+                                  swizzle(ir->operands[1]->clone(ir, NULL), i, 1)));
+      } else {
+         assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1),
+                                  swizzle(ir->operands[1]->clone(ir, NULL), i, 1),
+                                  temp));
+      }
+      this->base_ir->insert_before(assig);
+   }
+
+   ir->operation = ir_triop_fma;
+   ir->operands[0] = swizzle(ir->operands[0], 0, 1);
+   ir->operands[1] = swizzle(ir->operands[1], 0, 1);
+   ir->operands[2] = new(ir) ir_dereference_variable(temp);
+
+   this->progress = true;
+
+}
+
+void
+lower_instructions_visitor::double_lrp(ir_expression *ir)
+{
+   int swizval;
+   ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2];
+   ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements);
+
+   switch (op2->type->vector_elements) {
+   case 1:
+      swizval = SWIZZLE_XXXX;
+      break;
+   default:
+      assert(op0->type->vector_elements == op2->type->vector_elements);
+      swizval = SWIZZLE_XYZW;
+      break;
+   }
+
+   ir->operation = ir_triop_fma;
+   ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements);
+   ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0);
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * temp = sub(x, frtemp);
+    * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0);
+    */
+   ir_instruction &i = *base_ir;
+   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+                                             ir_var_temporary);
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(ir->operands[0])));
+
+   ir->operation = ir_binop_add;
+   ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp);
+   ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL));
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * result = sub(x, frtemp);
+    */
+   ir->operation = ir_binop_sub;
+   ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL));
+
+   this->progress = true;
+}
+void
+lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir)
+{
+   /*
+    * insane but works
+    * temp = x + 0.5;
+    * frtemp = frac(temp);
+    * t2 = sub(temp, frtemp);
+    * if (frac(x) == 0.5)
+    *     result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1;
+    *  else
+    *     result = t2;
+
+    */
+   ir_instruction &i = *base_ir;
+   ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp",
+                                             ir_var_temporary);
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
+                                           ir_var_temporary);
+   ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2",
+                                           ir_var_temporary);
+   ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements);
+   ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements);
+
+   i.insert_before(temp);
+   i.insert_before(assign(temp, add(ir->operands[0], p5)));
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(temp)));
+
+   i.insert_before(t2);
+   i.insert_before(assign(t2, sub(temp, frtemp)));
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)),
+                           p5->clone(ir, NULL));
+   ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))),
+                                zero),
+                          t2,
+                          sub(t2, one));
+   ir->operands[2] = new(ir) ir_dereference_variable(t2);
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir)
+{
+   /*
+    * frtemp = frac(x);
+    * temp = sub(x, frtemp);
+    * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1;
+    */
+   ir_rvalue *arg = ir->operands[0];
+   ir_instruction &i = *base_ir;
+
+   ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
+   ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp",
+                                             ir_var_temporary);
+   ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp",
+                                           ir_var_temporary);
+
+   i.insert_before(frtemp);
+   i.insert_before(assign(frtemp, fract(arg)));
+   i.insert_before(temp);
+   i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp)));
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = gequal(arg->clone(ir, NULL), zero);
+   ir->operands[1] = new (ir) ir_dereference_variable(temp);
+   ir->operands[2] = add(temp,
+                         csel(equal(frtemp, zero->clone(ir, NULL)),
+                              zero->clone(ir, NULL),
+                              one));
+
+   this->progress = true;
+}
+
+void
+lower_instructions_visitor::dsign_to_csel(ir_expression *ir)
+{
+   /*
+    * temp = x > 0.0 ? 1.0 : 0.0;
+    * result = x < 0.0 ? -1.0 : temp;
+    */
+   ir_rvalue *arg = ir->operands[0];
+   ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements);
+   ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements);
+   ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements);
+
+   ir->operation = ir_triop_csel;
+   ir->operands[0] = less(arg->clone(ir, NULL),
+                          zero->clone(ir, NULL));
+   ir->operands[1] = neg_one;
+   ir->operands[2] = csel(greater(arg, zero),
+                          one,
+                          zero->clone(ir, NULL));
+
+   this->progress = true;
+}
+
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
    switch (ir->operation) {
+   case ir_binop_dot:
+      if (ir->operands[0]->type->is_double())
+         double_dot_to_fma(ir);
+      break;
+   case ir_triop_lrp:
+      if (ir->operands[0]->type->is_double())
+         double_lrp(ir);
+      break;
    case ir_binop_sub:
       if (lowering(SUB_TO_ADD_NEG))
 	 sub_to_add_neg(ir);
@@ -520,7 +1014,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
    case ir_binop_div:
       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 	 int_div_to_mul_rcp(ir);
-      else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() ||
+                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
 	 div_to_mul_rcp(ir);
       break;
 
@@ -535,8 +1030,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_mod:
-      if (lowering(MOD_TO_FRACT) && ir->type->is_float())
-	 mod_to_fract(ir);
+      if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double()))
+	 mod_to_floor(ir);
       break;
 
    case ir_binop_pow:
@@ -550,8 +1045,20 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
       break;
 
    case ir_binop_ldexp:
-      if (lowering(LDEXP_TO_ARITH))
+      if (lowering(LDEXP_TO_ARITH) && ir->type->is_float())
          ldexp_to_arith(ir);
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double())
+         dldexp_to_arith(ir);
+      break;
+
+   case ir_unop_frexp_exp:
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
+         dfrexp_exp_to_arith(ir);
+      break;
+
+   case ir_unop_frexp_sig:
+      if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double())
+         dfrexp_sig_to_arith(ir);
       break;
 
    case ir_binop_carry:
@@ -569,6 +1076,30 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          sat_to_clamp(ir);
       break;
 
+   case ir_unop_trunc:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dtrunc_to_dfrac(ir);
+      break;
+
+   case ir_unop_ceil:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dceil_to_dfrac(ir);
+      break;
+
+   case ir_unop_floor:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dfloor_to_dfrac(ir);
+      break;
+
+   case ir_unop_round_even:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dround_even_to_dfrac(ir);
+      break;
+
+   case ir_unop_sign:
+      if (lowering(DOPS_TO_DFRAC) && ir->type->is_double())
+         dsign_to_csel(ir);
+      break;
    default:
       return visit_continue;
    }
diff --git a/mesalib/src/glsl/lower_mat_op_to_vec.cpp b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
index 105ee0d3f..dda754f91 100644
--- a/mesalib/src/glsl/lower_mat_op_to_vec.cpp
+++ b/mesalib/src/glsl/lower_mat_op_to_vec.cpp
@@ -354,6 +354,8 @@ ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign)
 
    /* OK, time to break down this matrix operation. */
    switch (orig_expr->operation) {
+   case ir_unop_d2f:
+   case ir_unop_f2d:
    case ir_unop_neg: {
       /* Apply the operation to each column.*/
       for (i = 0; i < matrix_columns; i++) {
diff --git a/mesalib/src/glsl/lower_ubo_reference.cpp b/mesalib/src/glsl/lower_ubo_reference.cpp
index 43dd067fa..4ea4ccb03 100644
--- a/mesalib/src/glsl/lower_ubo_reference.cpp
+++ b/mesalib/src/glsl/lower_ubo_reference.cpp
@@ -140,7 +140,8 @@ public:
 
    void handle_rvalue(ir_rvalue **rvalue);
    void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
-		       unsigned int deref_offset, bool row_major);
+                       unsigned int deref_offset, bool row_major,
+                       int matrix_columns);
    ir_expression *ubo_load(const struct glsl_type *type,
 			   ir_rvalue *offset);
 
@@ -265,6 +266,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
    unsigned const_offset = 0;
    bool row_major = is_dereferenced_thing_row_major(deref);
+   int matrix_columns = 1;
 
    /* Calculate the offset to the start of the region of the UBO
     * dereferenced by *rvalue.  This may be a variable offset if an
@@ -288,6 +290,9 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 	     * vector) is handled below in emit_ubo_loads.
 	     */
 	    array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+            matrix_columns = deref_array->array->type->matrix_columns;
          } else if (deref_array->type->is_interface()) {
             /* We're processing an array dereference of an interface instance
 	     * array.  The thing being dereferenced *must* be a variable
@@ -334,15 +339,6 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
 	 const glsl_type *struct_type = deref_record->record->type;
 	 unsigned intra_struct_offset = 0;
 
-         /* glsl_type::std140_base_alignment doesn't grok interfaces.  Use
-          * 16-bytes for the alignment because that is the general minimum of
-          * std140.
-          */
-         const unsigned struct_alignment = struct_type->is_interface()
-            ? 16
-            : struct_type->std140_base_alignment(row_major);
-
-
 	 for (unsigned int i = 0; i < struct_type->length; i++) {
 	    const glsl_type *type = struct_type->fields.structure[i].type;
 
@@ -372,7 +368,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
              */
             if (type->without_array()->is_record()) {
                intra_struct_offset = glsl_align(intra_struct_offset,
-                                                struct_alignment);
+                                                field_align);
 
             }
 	 }
@@ -405,7 +401,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
    base_ir->insert_before(assign(load_offset, offset));
 
    deref = new(mem_ctx) ir_dereference_variable(load_var);
-   emit_ubo_loads(deref, load_offset, const_offset, row_major);
+   emit_ubo_loads(deref, load_offset, const_offset, row_major, matrix_columns);
    *rvalue = deref;
 
    progress = true;
@@ -436,7 +432,8 @@ void
 lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 					    ir_variable *base_offset,
                                             unsigned int deref_offset,
-                                            bool row_major)
+                                            bool row_major,
+                                            int matrix_columns)
 {
    if (deref->type->is_record()) {
       unsigned int field_offset = 0;
@@ -453,7 +450,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
                        field->type->std140_base_alignment(row_major));
 
 	 emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset,
-                        row_major);
+                        row_major, 1);
 
 	 field_offset += field->type->std140_size(row_major);
       }
@@ -472,7 +469,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 					      element);
 	 emit_ubo_loads(element_deref, base_offset,
 			deref_offset + i * array_stride,
-                        row_major);
+                        row_major, 1);
       }
       return;
    }
@@ -488,14 +485,18 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
             /* For a row-major matrix, the next column starts at the next
              * element.
              */
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * 4,
-                           row_major);
+            int size_mul = deref->type->is_double() ? 8 : 4;
+            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
+                           row_major, deref->type->matrix_columns);
          } else {
             /* std140 always rounds the stride of arrays (and matrices) to a
-             * vec4, so matrices are always 16 between columns/rows.
+             * vec4, so matrices are always 16 between columns/rows. With
+             * doubles, they will be 32 apart when there are more than 2 rows.
              */
-            emit_ubo_loads(col_deref, base_offset, deref_offset + i * 16,
-                           row_major);
+            int size_mul = (deref->type->is_double() &&
+                            deref->type->vector_elements > 2) ? 32 : 16;
+            emit_ubo_loads(col_deref, base_offset, deref_offset + i * size_mul,
+                           row_major, deref->type->matrix_columns);
          }
       }
       return;
@@ -510,16 +511,24 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
 				    ubo_load(deref->type, offset)));
    } else {
+      unsigned N = deref->type->is_double() ? 8 : 4;
+
       /* We're dereffing a column out of a row-major matrix, so we
        * gather the vector from each stored row.
       */
-      assert(deref->type->base_type == GLSL_TYPE_FLOAT);
+      assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
+             deref->type->base_type == GLSL_TYPE_DOUBLE);
       /* Matrices, row_major or not, are stored as if they were
        * arrays of vectors of the appropriate size in std140.
        * Arrays have their strides rounded up to a vec4, so the
-       * matrix stride is always 16.
+       * matrix stride is always 16. However a double matrix may either be 16
+       * or 32 depending on the number of columns.
        */
-      unsigned matrix_stride = 16;
+      assert(matrix_columns <= 4);
+      unsigned matrix_stride = glsl_align(matrix_columns * N, 16);
+
+      const glsl_type *ubo_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
+         glsl_type::float_type : glsl_type::double_type;
 
       for (unsigned i = 0; i < deref->type->vector_elements; i++) {
 	 ir_rvalue *chan_offset =
@@ -527,7 +536,7 @@ lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
 		new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
 
 	 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-				       ubo_load(glsl_type::float_type,
+				       ubo_load(ubo_type,
 						chan_offset),
 				       (1U << i)));
       }
diff --git a/mesalib/src/glsl/nir/.gitignore b/mesalib/src/glsl/nir/.gitignore
new file mode 100644
index 000000000..64828eba6
--- /dev/null
+++ b/mesalib/src/glsl/nir/.gitignore
@@ -0,0 +1,5 @@
+nir_builder_opcodes.h
+nir_opt_algebraic.c
+nir_opcodes.c
+nir_opcodes.h
+nir_constant_expressions.c
diff --git a/mesalib/src/glsl/nir/README b/mesalib/src/glsl/nir/README
new file mode 100644
index 000000000..2c81db9db
--- /dev/null
+++ b/mesalib/src/glsl/nir/README
@@ -0,0 +1,118 @@
+New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR.
+Its design inherits from the various IR's that Mesa has used in the past, as
+well as Direct3D assembly, and it includes a few new ideas as well. It is a
+flat (in terms of using instructions instead of expressions), typeless IR,
+similar to TGSI and Mesa IR.  It also supports SSA (although it doesn't require
+it).
+
+Variables
+=========
+
+NIR includes support for source-level GLSL variables through a structure mostly
+copied from GLSL IR. These will be used for linking and conversion from GLSL IR
+(and later, from an AST), but for the most part, they will be lowered to
+registers (see below) and loads/stores.
+
+Registers
+=========
+
+Registers are light-weight; they consist of a structure that only contains its
+size, its index for liveness analysis, and an optional name for debugging. In
+addition, registers can be local to a function or global to the entire shader;
+the latter will be used in ARB_shader_subroutine for passing parameters and
+getting return values from subroutines. Registers can also be an array, in which
+case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.)
+works directly with registers or SSA values (see below).
+
+SSA
+========
+
+Everywhere a register can be loaded/stored, an SSA value can be used instead.
+The only exception is that arrays/indirect addressing are not supported with
+SSA; although research has been done on extensions of SSA to arrays before, it's
+usually for the purpose of parallelization (which we're not interested in), and
+adds some overhead in the form of adding copies or extra arrays (which is much
+more expensive than introducing copies between non-array registers). SSA uses
+point directly to their corresponding definition, which in turn points to the
+instruction it is part of. This creates an implicit use-def chain and avoids the
+need for an external structure for each SSA register.
+
+Functions
+=========
+
+Support for function calls is mostly similar to GLSL IR. Each shader contains a
+list of functions, and each function has a list of overloads. Each overload
+contains a list of parameters, and may contain an implementation which specifies
+the variables that correspond to the parameters and return value. Inlining a
+function, assuming it has a single return point, is as simple as copying its
+instructions, registers, and local variables into the target function and then
+inserting copies to and from the new parameters as appropriate. After functions
+are inlined and any non-subroutine functions are deleted, parameters and return
+variables will be converted to global variables and then global registers. We
+don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons:
+
+- If we want to do optimizations before link time, we need to have the function
+signature available during link-time.
+
+- If we do any inlining before link time, then we might wind up with the
+inlined function and the non-inlined function using the same global
+variables/registers which would preclude optimization.
+
+Intrinsics
+=========
+
+Any operation (other than function calls and textures) which touches a variable
+or is not referentially transparent is represented by an intrinsic. Intrinsics
+are similar to the idea of a "builtin function," i.e. a function declaration
+whose implementation is provided by the backend, except they are more powerful
+in the following ways:
+
+- They can also load and store registers when appropriate, which limits the
+number of variables needed in later stages of the IR while obviating the need
+for a separate load/store variable instruction.
+
+- Intrinsics can be marked as side-effect free, which permits them to be
+treated like any other instruction when it comes to optimizations. This allows
+load intrinsics to be represented as intrinsics while still being optimized
+away by dead code elimination, common subexpression elimination, etc.
+
+Intrinsics are used for:
+
+- Atomic operations
+- Memory barriers
+- Subroutine calls
+- Geometry shader emitVertex and endPrimitive
+- Loading and storing variables (before lowering)
+- Loading and storing uniforms, shader inputs and outputs, etc (after lowering)
+- Copying variables (cases where in GLSL the destination is a structure or
+array)
+- The kitchen sink
+- ...
+
+Textures
+=========
+
+Unfortunately, there are far too many texture operations to represent each one
+of them with an intrinsic, so there's a special texture instruction similar to
+the GLSL IR one. The biggest difference is that, while the texture instruction
+has a sampler dereference field used just like in GLSL IR, this gets lowered to
+a texture unit index (with a possible indirect offset) while the type
+information of the original sampler is kept around for backends. Also, all the
+non-constant sources are stored in a single array to make it easier for
+optimization passes to iterate over all the sources.
+
+Control Flow
+=========
+
+Like in GLSL IR, control flow consists of a tree of "control flow nodes", which
+include if statements and loops, and jump instructions (break, continue, and
+return). Unlike GLSL IR, though, the leaves of the tree aren't statements but
+basic blocks. Each basic block also keeps track of its successors and
+predecessors, and function implementations keep track of the beginning basic
+block (the first basic block of the function) and the ending basic block (a fake
+basic block that every return statement points to). Together, these elements
+make up the control flow graph, in this case a redundant piece of information on
+top of the control flow tree that will be used by almost all the optimizations.
+There are helper functions to add and remove control flow nodes that also update
+the control flow graph, and so usually it doesn't need to be touched by passes
+that modify control flow nodes.
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.cpp b/mesalib/src/glsl/nir/glsl_to_nir.cpp
new file mode 100644
index 000000000..544d0d932
--- /dev/null
+++ b/mesalib/src/glsl/nir/glsl_to_nir.cpp
@@ -0,0 +1,1814 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "glsl_to_nir.h"
+#include "ir_visitor.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+
+/*
+ * pass to lower GLSL IR to NIR
+ *
+ * This will lower variable dereferences to loads/stores of corresponding
+ * variables in NIR - the variables will be converted to registers in a later
+ * pass.
+ */
+
+namespace {
+
+class nir_visitor : public ir_visitor
+{
+public:
+   nir_visitor(nir_shader *shader, bool supports_ints);
+   ~nir_visitor();
+
+   virtual void visit(ir_variable *);
+   virtual void visit(ir_function *);
+   virtual void visit(ir_function_signature *);
+   virtual void visit(ir_loop *);
+   virtual void visit(ir_if *);
+   virtual void visit(ir_discard *);
+   virtual void visit(ir_loop_jump *);
+   virtual void visit(ir_return *);
+   virtual void visit(ir_call *);
+   virtual void visit(ir_assignment *);
+   virtual void visit(ir_emit_vertex *);
+   virtual void visit(ir_end_primitive *);
+   virtual void visit(ir_expression *);
+   virtual void visit(ir_swizzle *);
+   virtual void visit(ir_texture *);
+   virtual void visit(ir_constant *);
+   virtual void visit(ir_dereference_variable *);
+   virtual void visit(ir_dereference_record *);
+   virtual void visit(ir_dereference_array *);
+
+   void create_function(ir_function *ir);
+
+private:
+   void create_overload(ir_function_signature *ir, nir_function *function);
+   void add_instr(nir_instr *instr, unsigned num_components);
+   nir_src evaluate_rvalue(ir_rvalue *ir);
+
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src *srcs);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
+                       nir_src src2);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
+                       nir_src src2, nir_src src3);
+
+   bool supports_ints;
+
+   nir_shader *shader;
+   nir_function_impl *impl;
+   exec_list *cf_node_list;
+   nir_instr *result; /* result of the expression tree last visited */
+
+   /* the head of the dereference chain we're creating */
+   nir_deref_var *deref_head;
+   /* the tail of the dereference chain we're creating */
+   nir_deref *deref_tail;
+
+   nir_variable *var; /* variable created by ir_variable visitor */
+
+   /* whether the IR we're operating on is per-function or global */
+   bool is_global;
+
+   /* map of ir_variable -> nir_variable */
+   struct hash_table *var_table;
+
+   /* map of ir_function_signature -> nir_function_overload */
+   struct hash_table *overload_table;
+};
+
+/*
+ * This visitor runs before the main visitor, calling create_function() for
+ * each function so that the main visitor can resolve forward references in
+ * calls.
+ */
+
+class nir_function_visitor : public ir_hierarchical_visitor
+{
+public:
+   nir_function_visitor(nir_visitor *v) : visitor(v)
+   {
+   }
+   virtual ir_visitor_status visit_enter(ir_function *);
+
+private:
+   nir_visitor *visitor;
+};
+
+}; /* end of anonymous namespace */
+
+static const nir_shader_compiler_options default_options = {
+};
+
+nir_shader *
+glsl_to_nir(exec_list *ir, _mesa_glsl_parse_state *state,
+            bool native_integers)
+{
+   const nir_shader_compiler_options *options;
+
+   if (state) {
+      struct gl_context *ctx = state->ctx;
+      struct gl_shader_compiler_options *gl_options =
+         &ctx->Const.ShaderCompilerOptions[state->stage];
+
+      if (!gl_options->NirOptions) {
+         nir_shader_compiler_options *new_options =
+            rzalloc(ctx, nir_shader_compiler_options);
+         options = gl_options->NirOptions = new_options;
+
+         if (gl_options->EmitNoPow)
+            new_options->lower_fpow = true;
+      } else {
+         options = gl_options->NirOptions;
+      }
+   } else {
+      options = &default_options;
+   }
+
+   nir_shader *shader = nir_shader_create(NULL, options);
+
+   if (state) {
+      shader->num_user_structures = state->num_user_structures;
+      shader->user_structures = ralloc_array(shader, glsl_type *,
+                                             shader->num_user_structures);
+      memcpy(shader->user_structures, state->user_structures,
+            shader->num_user_structures * sizeof(glsl_type *));
+   } else {
+      shader->num_user_structures = 0;
+      shader->user_structures = NULL;
+   }
+
+   nir_visitor v1(shader, native_integers);
+   nir_function_visitor v2(&v1);
+   v2.run(ir);
+   visit_exec_list(ir, &v1);
+
+   return shader;
+}
+
+nir_visitor::nir_visitor(nir_shader *shader, bool supports_ints)
+{
+   this->supports_ints = supports_ints;
+   this->shader = shader;
+   this->is_global = true;
+   this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
+}
+
+nir_visitor::~nir_visitor()
+{
+   _mesa_hash_table_destroy(this->var_table, NULL);
+   _mesa_hash_table_destroy(this->overload_table, NULL);
+}
+
+static nir_constant *
+constant_copy(ir_constant *ir, void *mem_ctx)
+{
+   if (ir == NULL)
+      return NULL;
+
+   nir_constant *ret = ralloc(mem_ctx, nir_constant);
+
+   unsigned total_elems = ir->type->components();
+   unsigned i;
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_UINT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.u[i] = ir->value.u[i];
+      break;
+
+   case GLSL_TYPE_INT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.i[i] = ir->value.i[i];
+      break;
+
+   case GLSL_TYPE_FLOAT:
+      for (i = 0; i < total_elems; i++)
+         ret->value.f[i] = ir->value.f[i];
+      break;
+
+   case GLSL_TYPE_BOOL:
+      for (i = 0; i < total_elems; i++)
+         ret->value.b[i] = ir->value.b[i];
+      break;
+
+   case GLSL_TYPE_STRUCT:
+      ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                   ir->type->length);
+      i = 0;
+      foreach_in_list(ir_constant, field, &ir->components) {
+         ret->elements[i] = constant_copy(field, mem_ctx);
+         i++;
+      }
+      break;
+
+   case GLSL_TYPE_ARRAY:
+      ret->elements = ralloc_array(mem_ctx, nir_constant *,
+                                   ir->type->length);
+
+      for (i = 0; i < ir->type->length; i++)
+         ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   return ret;
+}
+
+void
+nir_visitor::visit(ir_variable *ir)
+{
+   nir_variable *var = ralloc(shader, nir_variable);
+   var->type = ir->type;
+   var->name = ralloc_strdup(var, ir->name);
+
+   if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
+      unsigned size = ir->get_interface_type()->length;
+      var->max_ifc_array_access = ralloc_array(var, unsigned, size);
+      memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
+             size * sizeof(unsigned));
+   } else {
+      var->max_ifc_array_access = NULL;
+   }
+
+   var->data.read_only = ir->data.read_only;
+   var->data.centroid = ir->data.centroid;
+   var->data.sample = ir->data.sample;
+   var->data.invariant = ir->data.invariant;
+   var->data.location = ir->data.location;
+
+   switch(ir->data.mode) {
+   case ir_var_auto:
+   case ir_var_temporary:
+      if (is_global)
+         var->data.mode = nir_var_global;
+      else
+         var->data.mode = nir_var_local;
+      break;
+
+   case ir_var_function_in:
+   case ir_var_function_out:
+   case ir_var_function_inout:
+   case ir_var_const_in:
+      var->data.mode = nir_var_local;
+      break;
+
+   case ir_var_shader_in:
+      if (ir->data.location == VARYING_SLOT_FACE) {
+         /* For whatever reason, GLSL IR makes gl_FrontFacing an input */
+         var->data.location = SYSTEM_VALUE_FRONT_FACE;
+         var->data.mode = nir_var_system_value;
+      } else {
+         var->data.mode = nir_var_shader_in;
+      }
+      break;
+
+   case ir_var_shader_out:
+      var->data.mode = nir_var_shader_out;
+      break;
+
+   case ir_var_uniform:
+      var->data.mode = nir_var_uniform;
+      break;
+
+
+   case ir_var_system_value:
+      var->data.mode = nir_var_system_value;
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   var->data.interpolation = ir->data.interpolation;
+   var->data.origin_upper_left = ir->data.origin_upper_left;
+   var->data.pixel_center_integer = ir->data.pixel_center_integer;
+   var->data.explicit_location = ir->data.explicit_location;
+   var->data.explicit_index = ir->data.explicit_index;
+   var->data.explicit_binding = ir->data.explicit_binding;
+   var->data.has_initializer = ir->data.has_initializer;
+   var->data.is_unmatched_generic_inout = ir->data.is_unmatched_generic_inout;
+   var->data.location_frac = ir->data.location_frac;
+   var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array;
+   var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray;
+
+   switch (ir->data.depth_layout) {
+   case ir_depth_layout_none:
+      var->data.depth_layout = nir_depth_layout_none;
+      break;
+   case ir_depth_layout_any:
+      var->data.depth_layout = nir_depth_layout_any;
+      break;
+   case ir_depth_layout_greater:
+      var->data.depth_layout = nir_depth_layout_greater;
+      break;
+   case ir_depth_layout_less:
+      var->data.depth_layout = nir_depth_layout_less;
+      break;
+   case ir_depth_layout_unchanged:
+      var->data.depth_layout = nir_depth_layout_unchanged;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   var->data.index = ir->data.index;
+   var->data.binding = ir->data.binding;
+   /* XXX Get rid of buffer_index */
+   var->data.atomic.buffer_index = ir->data.binding;
+   var->data.atomic.offset = ir->data.atomic.offset;
+   var->data.image.read_only = ir->data.image_read_only;
+   var->data.image.write_only = ir->data.image_write_only;
+   var->data.image.coherent = ir->data.image_coherent;
+   var->data.image._volatile = ir->data.image_volatile;
+   var->data.image.restrict_flag = ir->data.image_restrict;
+   var->data.image.format = ir->data.image_format;
+   var->data.max_array_access = ir->data.max_array_access;
+
+   var->num_state_slots = ir->get_num_state_slots();
+   if (var->num_state_slots > 0) {
+      var->state_slots = ralloc_array(var, nir_state_slot,
+                                      var->num_state_slots);
+
+      ir_state_slot *state_slots = ir->get_state_slots();
+      for (unsigned i = 0; i < var->num_state_slots; i++) {
+         for (unsigned j = 0; j < 5; j++)
+            var->state_slots[i].tokens[j] = state_slots[i].tokens[j];
+         var->state_slots[i].swizzle = state_slots[i].swizzle;
+      }
+   } else {
+      var->state_slots = NULL;
+   }
+
+   var->constant_initializer = constant_copy(ir->constant_initializer, var);
+
+   var->interface_type = ir->get_interface_type();
+
+   switch (var->data.mode) {
+   case nir_var_local:
+      exec_list_push_tail(&impl->locals, &var->node);
+      break;
+
+   case nir_var_global:
+      exec_list_push_tail(&shader->globals, &var->node);
+      break;
+
+   case nir_var_shader_in:
+      _mesa_hash_table_insert(shader->inputs, var->name, var);
+      break;
+
+   case nir_var_shader_out:
+      _mesa_hash_table_insert(shader->outputs, var->name, var);
+      break;
+
+   case nir_var_uniform:
+      _mesa_hash_table_insert(shader->uniforms, var->name, var);
+      break;
+
+   case nir_var_system_value:
+      exec_list_push_tail(&shader->system_values, &var->node);
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   _mesa_hash_table_insert(var_table, ir, var);
+   this->var = var;
+}
+
+ir_visitor_status
+nir_function_visitor::visit_enter(ir_function *ir)
+{
+   visitor->create_function(ir);
+   return visit_continue_with_parent;
+}
+
+
+void
+nir_visitor::create_function(ir_function *ir)
+{
+   nir_function *func = nir_function_create(this->shader, ir->name);
+   foreach_in_list(ir_function_signature, sig, &ir->signatures) {
+      create_overload(sig, func);
+   }
+}
+
+
+
+void
+nir_visitor::create_overload(ir_function_signature *ir, nir_function *function)
+{
+   if (ir->is_intrinsic)
+      return;
+
+   nir_function_overload *overload = nir_function_overload_create(function);
+
+   unsigned num_params = ir->parameters.length();
+   overload->num_params = num_params;
+   overload->params = ralloc_array(shader, nir_parameter, num_params);
+
+   unsigned i = 0;
+   foreach_in_list(ir_variable, param, &ir->parameters) {
+      switch (param->data.mode) {
+      case ir_var_function_in:
+         overload->params[i].param_type = nir_parameter_in;
+         break;
+
+      case ir_var_function_out:
+         overload->params[i].param_type = nir_parameter_out;
+         break;
+
+      case ir_var_function_inout:
+         overload->params[i].param_type = nir_parameter_inout;
+         break;
+
+      default:
+         unreachable("not reached");
+      }
+
+      overload->params[i].type = param->type;
+      i++;
+   }
+
+   overload->return_type = ir->return_type;
+
+   _mesa_hash_table_insert(this->overload_table, ir, overload);
+}
+
+void
+nir_visitor::visit(ir_function *ir)
+{
+   foreach_in_list(ir_function_signature, sig, &ir->signatures)
+      sig->accept(this);
+}
+
+void
+nir_visitor::visit(ir_function_signature *ir)
+{
+   if (ir->is_intrinsic)
+      return;
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir);
+
+   assert(entry);
+   nir_function_overload *overload = (nir_function_overload *) entry->data;
+
+   if (ir->is_defined) {
+      nir_function_impl *impl = nir_function_impl_create(overload);
+      this->impl = impl;
+
+      unsigned num_params = overload->num_params;
+      impl->num_params = num_params;
+      impl->params = ralloc_array(this->shader, nir_variable *, num_params);
+      unsigned i = 0;
+      foreach_in_list(ir_variable, param, &ir->parameters) {
+         param->accept(this);
+         impl->params[i] = this->var;
+         i++;
+      }
+
+      if (overload->return_type == glsl_type::void_type) {
+         impl->return_var = NULL;
+      } else {
+         impl->return_var = ralloc(this->shader, nir_variable);
+         impl->return_var->name = ralloc_strdup(impl->return_var,
+                                                "return_var");
+         impl->return_var->type = overload->return_type;
+      }
+
+      this->is_global = false;
+
+      this->cf_node_list = &impl->body;
+      visit_exec_list(&ir->body, this);
+
+      this->is_global = true;
+   } else {
+      overload->impl = NULL;
+   }
+}
+
+void
+nir_visitor::visit(ir_loop *ir)
+{
+   exec_list *old_list = this->cf_node_list;
+
+   nir_loop *loop = nir_loop_create(this->shader);
+   nir_cf_node_insert_end(old_list, &loop->cf_node);
+   this->cf_node_list = &loop->body;
+   visit_exec_list(&ir->body_instructions, this);
+
+   this->cf_node_list = old_list;
+}
+
+void
+nir_visitor::visit(ir_if *ir)
+{
+   nir_src condition = evaluate_rvalue(ir->condition);
+
+   exec_list *old_list = this->cf_node_list;
+
+   nir_if *if_stmt = nir_if_create(this->shader);
+   if_stmt->condition = condition;
+   nir_cf_node_insert_end(old_list, &if_stmt->cf_node);
+
+   this->cf_node_list = &if_stmt->then_list;
+   visit_exec_list(&ir->then_instructions, this);
+
+   this->cf_node_list = &if_stmt->else_list;
+   visit_exec_list(&ir->else_instructions, this);
+
+   this->cf_node_list = old_list;
+}
+
+void
+nir_visitor::visit(ir_discard *ir)
+{
+   /*
+    * discards aren't treated as control flow, because before we lower them
+    * they can appear anywhere in the shader and the stuff after them may still
+    * be executed (yay, crazy GLSL rules!). However, after lowering, all the
+    * discards will be immediately followed by a return.
+    */
+
+   nir_intrinsic_instr *discard =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &discard->instr);
+}
+
+void
+nir_visitor::visit(ir_emit_vertex *ir)
+{
+   nir_intrinsic_instr *instr =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
+   instr->const_index[0] = ir->stream_id();
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_end_primitive *ir)
+{
+   nir_intrinsic_instr *instr =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
+   instr->const_index[0] = ir->stream_id();
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_loop_jump *ir)
+{
+   nir_jump_type type;
+   switch (ir->mode) {
+   case ir_loop_jump::jump_break:
+      type = nir_jump_break;
+      break;
+   case ir_loop_jump::jump_continue:
+      type = nir_jump_continue;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_return *ir)
+{
+   if (ir->value != NULL) {
+      ir->value->accept(this);
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+      copy->variables[0] = nir_deref_var_create(this->shader,
+                                                this->impl->return_var);
+      copy->variables[1] = this->deref_head;
+   }
+
+   nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_call *ir)
+{
+   if (ir->callee->is_intrinsic) {
+      nir_intrinsic_op op;
+      if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) {
+         op = nir_intrinsic_atomic_counter_read_var;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) {
+         op = nir_intrinsic_atomic_counter_inc_var;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) {
+         op = nir_intrinsic_atomic_counter_dec_var;
+      } else {
+         unreachable("not reached");
+      }
+
+      nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
+      ir_dereference *param =
+         (ir_dereference *) ir->actual_parameters.get_head();
+      param->accept(this);
+      instr->variables[0] = this->deref_head;
+      nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+
+      nir_intrinsic_instr *store_instr =
+         nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+      store_instr->num_components = 1;
+
+      ir->return_deref->accept(this);
+      store_instr->variables[0] = this->deref_head;
+      store_instr->src[0].is_ssa = true;
+      store_instr->src[0].ssa = &instr->dest.ssa;
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &store_instr->instr);
+
+      return;
+   }
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->overload_table, ir->callee);
+   assert(entry);
+   nir_function_overload *callee = (nir_function_overload *) entry->data;
+
+   nir_call_instr *instr = nir_call_instr_create(this->shader, callee);
+
+   unsigned i = 0;
+   foreach_in_list(ir_dereference, param, &ir->actual_parameters) {
+      param->accept(this);
+      instr->params[i] = this->deref_head;
+      i++;
+   }
+
+   ir->return_deref->accept(this);
+   instr->return_deref = this->deref_head;
+   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+}
+
+void
+nir_visitor::visit(ir_assignment *ir)
+{
+   unsigned num_components = ir->lhs->type->vector_elements;
+
+   if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
+       (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
+      /* We're doing a plain-as-can-be copy, so emit a copy_var */
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var);
+
+      ir->lhs->accept(this);
+      copy->variables[0] = this->deref_head;
+
+      ir->rhs->accept(this);
+      copy->variables[1] = this->deref_head;
+
+
+      if (ir->condition) {
+         nir_if *if_stmt = nir_if_create(this->shader);
+         if_stmt->condition = evaluate_rvalue(ir->condition);
+         nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+         nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+      } else {
+         nir_instr_insert_after_cf_list(this->cf_node_list, &copy->instr);
+      }
+      return;
+   }
+
+   assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());
+
+   ir->lhs->accept(this);
+   nir_deref_var *lhs_deref = this->deref_head;
+   nir_src src = evaluate_rvalue(ir->rhs);
+
+   if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
+      /*
+       * We have no good way to update only part of a variable, so just load
+       * the LHS and do a vec operation to combine the old with the new, and
+       * then store it
+       * back into the LHS. Copy propagation should get rid of the mess.
+       */
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load->num_components = ir->lhs->type->vector_elements;
+      nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+      load->variables[0] = lhs_deref;
+      nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
+
+      nir_op vec_op;
+      switch (ir->lhs->type->vector_elements) {
+         case 1: vec_op = nir_op_imov; break;
+         case 2: vec_op = nir_op_vec2; break;
+         case 3: vec_op = nir_op_vec3; break;
+         case 4: vec_op = nir_op_vec4; break;
+         default: unreachable("Invalid number of components"); break;
+      }
+      nir_alu_instr *vec = nir_alu_instr_create(this->shader, vec_op);
+      nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+      vec->dest.write_mask = (1 << num_components) - 1;
+
+      unsigned component = 0;
+      for (unsigned i = 0; i < ir->lhs->type->vector_elements; i++) {
+         if (ir->write_mask & (1 << i)) {
+            vec->src[i].src = src;
+
+            /* GLSL IR will give us the input to the write-masked assignment
+             * in a single packed vector.  So, for example, if the
+             * writemask is xzw, then we have to swizzle x -> x, y -> z,
+             * and z -> w and get the y component from the load.
+             */
+            vec->src[i].swizzle[0] = component++;
+         } else {
+            vec->src[i].src.is_ssa = true;
+            vec->src[i].src.ssa = &load->dest.ssa;
+            vec->src[i].swizzle[0] = i;
+         }
+      }
+
+      nir_instr_insert_after_cf_list(this->cf_node_list, &vec->instr);
+
+      src.is_ssa = true;
+      src.ssa = &vec->dest.dest.ssa;
+   }
+
+   nir_intrinsic_instr *store =
+      nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var);
+   store->num_components = ir->lhs->type->vector_elements;
+   nir_deref *store_deref = nir_copy_deref(this->shader, &lhs_deref->deref);
+   store->variables[0] = nir_deref_as_var(store_deref);
+   store->src[0] = src;
+
+   if (ir->condition) {
+      nir_if *if_stmt = nir_if_create(this->shader);
+      if_stmt->condition = evaluate_rvalue(ir->condition);
+      nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+      nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+   } else {
+      nir_instr_insert_after_cf_list(this->cf_node_list, &store->instr);
+   }
+}
+
+/*
+ * Given an instruction, returns a pointer to its destination or NULL if there
+ * is no destination.
+ *
+ * Note that this only handles instructions we generate at this level.
+ */
+static nir_dest *
+get_instr_dest(nir_instr *instr)
+{
+   nir_alu_instr *alu_instr;
+   nir_intrinsic_instr *intrinsic_instr;
+   nir_tex_instr *tex_instr;
+
+   switch (instr->type) {
+      case nir_instr_type_alu:
+         alu_instr = nir_instr_as_alu(instr);
+         return &alu_instr->dest.dest;
+
+      case nir_instr_type_intrinsic:
+         intrinsic_instr = nir_instr_as_intrinsic(instr);
+         if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)
+            return &intrinsic_instr->dest;
+         else
+            return NULL;
+
+      case nir_instr_type_tex:
+         tex_instr = nir_instr_as_tex(instr);
+         return &tex_instr->dest;
+
+      default:
+         unreachable("not reached");
+   }
+
+   return NULL;
+}
+
+void
+nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
+{
+   nir_dest *dest = get_instr_dest(instr);
+
+   nir_ssa_dest_init(instr, dest, num_components, NULL);
+
+   nir_instr_insert_after_cf_list(this->cf_node_list, instr);
+   this->result = instr;
+}
+
+nir_src
+nir_visitor::evaluate_rvalue(ir_rvalue* ir)
+{
+   ir->accept(this);
+   if (ir->as_dereference() || ir->as_constant()) {
+      /*
+       * A dereference is being used on the right hand side, which means we
+       * must emit a variable load.
+       */
+
+      nir_intrinsic_instr *load_instr =
+         nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var);
+      load_instr->num_components = ir->type->vector_elements;
+      load_instr->variables[0] = this->deref_head;
+      add_instr(&load_instr->instr, ir->type->vector_elements);
+   }
+
+   nir_dest *dest = get_instr_dest(this->result);
+
+   assert(dest->is_ssa);
+   nir_src src;
+   src.is_ssa = true;
+   src.ssa = &dest->ssa;
+
+   return src;
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src *srcs)
+{
+   nir_alu_instr *instr = nir_alu_instr_create(this->shader, op);
+   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
+      instr->src[i].src = srcs[i];
+   instr->dest.write_mask = (1 << dest_size) - 1;
+   add_instr(&instr->instr, dest_size);
+   return instr;
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1)
+{
+   assert(nir_op_infos[op].num_inputs == 1);
+   return emit(op, dest_size, &src1);
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
+                  nir_src src2)
+{
+   assert(nir_op_infos[op].num_inputs == 2);
+   nir_src srcs[] = { src1, src2 };
+   return emit(op, dest_size, srcs);
+}
+
+nir_alu_instr *
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
+                  nir_src src2, nir_src src3)
+{
+   assert(nir_op_infos[op].num_inputs == 3);
+   nir_src srcs[] = { src1, src2, src3 };
+   return emit(op, dest_size, srcs);
+}
+
+void
+nir_visitor::visit(ir_expression *ir)
+{
+   /* Some special cases */
+   switch (ir->operation) {
+   case ir_binop_ubo_load: {
+      ir_constant *const_index = ir->operands[1]->as_constant();
+
+      nir_intrinsic_op op;
+      if (const_index) {
+         op = nir_intrinsic_load_ubo;
+      } else {
+         op = nir_intrinsic_load_ubo_indirect;
+      }
+      nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+      load->num_components = ir->type->vector_elements;
+      load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
+      load->const_index[1] = 1; /* number of vec4's */
+      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      if (!const_index)
+         load->src[1] = evaluate_rvalue(ir->operands[1]);
+      add_instr(&load->instr, ir->type->vector_elements);
+
+      /*
+       * In UBO's, a true boolean value is any non-zero value, but we consider
+       * a true boolean to be ~0. Fix this up with a != 0 comparison.
+       */
+
+      if (ir->type->base_type == GLSL_TYPE_BOOL) {
+         nir_load_const_instr *const_zero = nir_load_const_instr_create(shader, 1);
+         const_zero->value.u[0] = 0;
+         nir_instr_insert_after_cf_list(this->cf_node_list, &const_zero->instr);
+
+         nir_alu_instr *compare = nir_alu_instr_create(shader, nir_op_ine);
+         compare->src[0].src.is_ssa = true;
+         compare->src[0].src.ssa = &load->dest.ssa;
+         compare->src[1].src.is_ssa = true;
+         compare->src[1].src.ssa = &const_zero->def;
+         for (unsigned i = 0; i < ir->type->vector_elements; i++)
+            compare->src[1].swizzle[i] = 0;
+         compare->dest.write_mask = (1 << ir->type->vector_elements) - 1;
+
+         add_instr(&compare->instr, ir->type->vector_elements);
+      }
+
+      return;
+   }
+
+   case ir_unop_interpolate_at_centroid:
+   case ir_binop_interpolate_at_offset:
+   case ir_binop_interpolate_at_sample: {
+      ir_dereference *deref = ir->operands[0]->as_dereference();
+      ir_swizzle *swizzle = NULL;
+      if (!deref) {
+         /* the api does not allow a swizzle here, but the varying packing code
+          * may have pushed one into here.
+          */
+         swizzle = ir->operands[0]->as_swizzle();
+         assert(swizzle);
+         deref = swizzle->val->as_dereference();
+         assert(deref);
+      }
+
+      deref->accept(this);
+
+      nir_intrinsic_op op;
+      if (this->deref_head->var->data.mode == nir_var_shader_in) {
+         switch (ir->operation) {
+         case ir_unop_interpolate_at_centroid:
+            op = nir_intrinsic_interp_var_at_centroid;
+            break;
+         case ir_binop_interpolate_at_offset:
+            op = nir_intrinsic_interp_var_at_offset;
+            break;
+         case ir_binop_interpolate_at_sample:
+            op = nir_intrinsic_interp_var_at_sample;
+            break;
+         default:
+            unreachable("Invalid interpolation intrinsic");
+         }
+      } else {
+         /* This case can happen if the vertex shader does not write the
+          * given varying.  In this case, the linker will lower it to a
+          * global variable.  Since interpolating a variable makes no
+          * sense, we'll just turn it into a load which will probably
+          * eventually end up as an SSA definition.
+          */
+         assert(this->deref_head->var->data.mode == nir_var_global);
+         op = nir_intrinsic_load_var;
+      }
+
+      nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);
+      intrin->num_components = deref->type->vector_elements;
+      intrin->variables[0] = this->deref_head;
+
+      if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
+          intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
+         intrin->src[0] = evaluate_rvalue(ir->operands[1]);
+
+      add_instr(&intrin->instr, deref->type->vector_elements);
+
+      if (swizzle) {
+         nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
+         mov->dest.write_mask = (1 << swizzle->type->vector_elements) - 1;
+         mov->src[0].src.is_ssa = true;
+         mov->src[0].src.ssa = &intrin->dest.ssa;
+
+         mov->src[0].swizzle[0] = swizzle->mask.x;
+         mov->src[0].swizzle[1] = swizzle->mask.y;
+         mov->src[0].swizzle[2] = swizzle->mask.z;
+         mov->src[0].swizzle[3] = swizzle->mask.w;
+         for (unsigned i = deref->type->vector_elements; i < 4; i++)
+            mov->src[0].swizzle[i] = 0;
+
+         add_instr(&mov->instr, swizzle->type->vector_elements);
+      }
+
+      return;
+   }
+
+   default:
+      break;
+   }
+
+   nir_src srcs[4];
+   for (unsigned i = 0; i < ir->get_num_operands(); i++)
+      srcs[i] = evaluate_rvalue(ir->operands[i]);
+
+   glsl_base_type types[4];
+   for (unsigned i = 0; i < ir->get_num_operands(); i++)
+      if (supports_ints)
+         types[i] = ir->operands[i]->type->base_type;
+      else
+         types[i] = GLSL_TYPE_FLOAT;
+
+   glsl_base_type out_type;
+   if (supports_ints)
+      out_type = ir->type->base_type;
+   else
+      out_type = GLSL_TYPE_FLOAT;
+
+   unsigned dest_size = ir->type->vector_elements;
+
+   nir_alu_instr *instr;
+   nir_op op;
+
+   switch (ir->operation) {
+   case ir_unop_bit_not: emit(nir_op_inot, dest_size, srcs); break;
+   case ir_unop_logic_not:
+      emit(supports_ints ? nir_op_inot : nir_op_fnot, dest_size, srcs);
+      break;
+   case ir_unop_neg:
+      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fneg : nir_op_ineg,
+                   dest_size, srcs);
+      break;
+   case ir_unop_abs:
+      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fabs : nir_op_iabs,
+                   dest_size, srcs);
+      break;
+   case ir_unop_saturate:
+      assert(types[0] == GLSL_TYPE_FLOAT);
+      instr = emit(nir_op_fsat, dest_size, srcs);
+      break;
+   case ir_unop_sign:
+      emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fsign : nir_op_isign,
+           dest_size, srcs);
+      break;
+   case ir_unop_rcp:  emit(nir_op_frcp, dest_size, srcs);  break;
+   case ir_unop_rsq:  emit(nir_op_frsq, dest_size, srcs);  break;
+   case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break;
+   case ir_unop_exp:  emit(nir_op_fexp, dest_size, srcs);  break;
+   case ir_unop_log:  emit(nir_op_flog, dest_size, srcs);  break;
+   case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break;
+   case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break;
+   case ir_unop_i2f:
+      emit(supports_ints ? nir_op_i2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_u2f:
+      emit(supports_ints ? nir_op_u2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_b2f:
+      emit(supports_ints ? nir_op_b2f : nir_op_fmov, dest_size, srcs);
+      break;
+   case ir_unop_f2i:  emit(nir_op_f2i, dest_size, srcs);   break;
+   case ir_unop_f2u:  emit(nir_op_f2u, dest_size, srcs);   break;
+   case ir_unop_f2b:  emit(nir_op_f2b, dest_size, srcs);   break;
+   case ir_unop_i2b:  emit(nir_op_i2b, dest_size, srcs);   break;
+   case ir_unop_b2i:  emit(nir_op_b2i, dest_size, srcs);   break;
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+   case ir_unop_bitcast_i2f:
+   case ir_unop_bitcast_f2i:
+   case ir_unop_bitcast_u2f:
+   case ir_unop_bitcast_f2u:
+      /* no-op */
+      emit(nir_op_imov, dest_size, srcs);
+      break;
+   case ir_unop_any:
+      switch (ir->operands[0]->type->vector_elements) {
+      case 2:
+         emit(supports_ints ? nir_op_bany2 : nir_op_fany2,
+              dest_size, srcs);
+         break;
+      case 3:
+         emit(supports_ints ? nir_op_bany3 : nir_op_fany3,
+              dest_size, srcs);
+         break;
+      case 4:
+         emit(supports_ints ? nir_op_bany4 : nir_op_fany4,
+              dest_size, srcs);
+         break;
+      default:
+         unreachable("not reached");
+      }
+      break;
+   case ir_unop_trunc: emit(nir_op_ftrunc, dest_size, srcs); break;
+   case ir_unop_ceil:  emit(nir_op_fceil,  dest_size, srcs); break;
+   case ir_unop_floor: emit(nir_op_ffloor, dest_size, srcs); break;
+   case ir_unop_fract: emit(nir_op_ffract, dest_size, srcs); break;
+   case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
+   case ir_unop_sin:   emit(nir_op_fsin,   dest_size, srcs); break;
+   case ir_unop_cos:   emit(nir_op_fcos,   dest_size, srcs); break;
+   case ir_unop_sin_reduced:
+      emit(nir_op_fsin_reduced, dest_size, srcs);
+      break;
+   case ir_unop_cos_reduced:
+      emit(nir_op_fcos_reduced, dest_size, srcs);
+      break;
+   case ir_unop_dFdx:        emit(nir_op_fddx,        dest_size, srcs); break;
+   case ir_unop_dFdy:        emit(nir_op_fddy,        dest_size, srcs); break;
+   case ir_unop_dFdx_fine:   emit(nir_op_fddx_fine,   dest_size, srcs); break;
+   case ir_unop_dFdy_fine:   emit(nir_op_fddy_fine,   dest_size, srcs); break;
+   case ir_unop_dFdx_coarse: emit(nir_op_fddx_coarse, dest_size, srcs); break;
+   case ir_unop_dFdy_coarse: emit(nir_op_fddy_coarse, dest_size, srcs); break;
+   case ir_unop_pack_snorm_2x16:
+      emit(nir_op_pack_snorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_pack_snorm_4x8:
+      emit(nir_op_pack_snorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_pack_unorm_2x16:
+      emit(nir_op_pack_unorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_pack_unorm_4x8:
+      emit(nir_op_pack_unorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_pack_half_2x16:
+      emit(nir_op_pack_half_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_snorm_2x16:
+      emit(nir_op_unpack_snorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_snorm_4x8:
+      emit(nir_op_unpack_snorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_unpack_unorm_2x16:
+      emit(nir_op_unpack_unorm_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_unorm_4x8:
+      emit(nir_op_unpack_unorm_4x8, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16:
+      emit(nir_op_unpack_half_2x16, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16_split_x:
+      emit(nir_op_unpack_half_2x16_split_x, dest_size, srcs);
+      break;
+   case ir_unop_unpack_half_2x16_split_y:
+      emit(nir_op_unpack_half_2x16_split_y, dest_size, srcs);
+      break;
+   case ir_unop_bitfield_reverse:
+      emit(nir_op_bitfield_reverse, dest_size, srcs);
+      break;
+   case ir_unop_bit_count:
+      emit(nir_op_bit_count, dest_size, srcs);
+      break;
+   case ir_unop_find_msb:
+      switch (types[0]) {
+      case GLSL_TYPE_UINT:
+         emit(nir_op_ufind_msb, dest_size, srcs);
+         break;
+      case GLSL_TYPE_INT:
+         emit(nir_op_ifind_msb, dest_size, srcs);
+         break;
+      default:
+         unreachable("Invalid type for findMSB()");
+      }
+      break;
+   case ir_unop_find_lsb:
+      emit(nir_op_find_lsb,  dest_size, srcs);
+      break;
+
+   case ir_unop_noise:
+      switch (ir->type->vector_elements) {
+      case 1:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise1_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise1_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise1_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise1_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 2:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise2_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise2_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise2_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise2_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 3:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise3_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise3_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise3_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise3_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      case 4:
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_fnoise4_1, dest_size, srcs); break;
+            case 2: emit(nir_op_fnoise4_2, dest_size, srcs); break;
+            case 3: emit(nir_op_fnoise4_3, dest_size, srcs); break;
+            case 4: emit(nir_op_fnoise4_4, dest_size, srcs); break;
+            default: unreachable("not reached");
+         }
+         break;
+      default:
+         unreachable("not reached");
+      }
+      break;
+   case ir_binop_add:
+   case ir_binop_sub:
+   case ir_binop_mul:
+   case ir_binop_div:
+   case ir_binop_mod:
+   case ir_binop_min:
+   case ir_binop_max:
+   case ir_binop_pow:
+   case ir_binop_bit_and:
+   case ir_binop_bit_or:
+   case ir_binop_bit_xor:
+   case ir_binop_lshift:
+   case ir_binop_rshift:
+      switch (ir->operation) {
+      case ir_binop_add:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fadd;
+         else
+            op = nir_op_iadd;
+         break;
+      case ir_binop_sub:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fsub;
+         else
+            op = nir_op_isub;
+         break;
+      case ir_binop_mul:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmul;
+         else
+            op = nir_op_imul;
+         break;
+      case ir_binop_div:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fdiv;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_idiv;
+         else
+            op = nir_op_udiv;
+         break;
+      case ir_binop_mod:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmod;
+         else
+            op = nir_op_umod;
+         break;
+      case ir_binop_min:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmin;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_imin;
+         else
+            op = nir_op_umin;
+         break;
+      case ir_binop_max:
+         if (out_type == GLSL_TYPE_FLOAT)
+            op = nir_op_fmax;
+         else if (out_type == GLSL_TYPE_INT)
+            op = nir_op_imax;
+         else
+            op = nir_op_umax;
+         break;
+      case ir_binop_bit_and:
+         op = nir_op_iand;
+         break;
+      case ir_binop_bit_or:
+         op = nir_op_ior;
+         break;
+      case ir_binop_bit_xor:
+         op = nir_op_ixor;
+         break;
+      case ir_binop_lshift:
+         op = nir_op_ishl;
+         break;
+      case ir_binop_rshift:
+         if (out_type == GLSL_TYPE_INT)
+            op = nir_op_ishr;
+         else
+            op = nir_op_ushr;
+         break;
+      case ir_binop_pow:
+         op = nir_op_fpow;
+         break;
+
+      default:
+         unreachable("not reached");
+      }
+
+      instr = emit(op, dest_size, srcs);
+
+      if (ir->operands[0]->type->vector_elements != 1 &&
+          ir->operands[1]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
+              i++) {
+            instr->src[1].swizzle[i] = 0;
+         }
+      }
+
+      if (ir->operands[1]->type->vector_elements != 1 &&
+          ir->operands[0]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[1]->type->vector_elements;
+              i++) {
+            instr->src[0].swizzle[i] = 0;
+         }
+      }
+
+      break;
+   case ir_binop_imul_high:
+      emit(out_type == GLSL_TYPE_UINT ? nir_op_umul_high : nir_op_imul_high,
+           dest_size, srcs);
+      break;
+   case ir_binop_carry:  emit(nir_op_uadd_carry, dest_size, srcs);  break;
+   case ir_binop_borrow: emit(nir_op_usub_borrow, dest_size, srcs); break;
+   case ir_binop_less:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_flt, dest_size, srcs);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ilt, dest_size, srcs);
+         else
+            emit(nir_op_ult, dest_size, srcs);
+      } else {
+         emit(nir_op_slt, dest_size, srcs);
+      }
+      break;
+   case ir_binop_greater:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_flt, dest_size, srcs[1], srcs[0]);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ilt, dest_size, srcs[1], srcs[0]);
+         else
+            emit(nir_op_ult, dest_size, srcs[1], srcs[0]);
+      } else {
+         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+      }
+      break;
+   case ir_binop_lequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fge, dest_size, srcs[1], srcs[0]);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ige, dest_size, srcs[1], srcs[0]);
+         else
+            emit(nir_op_uge, dest_size, srcs[1], srcs[0]);
+      } else {
+         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+      }
+      break;
+   case ir_binop_gequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fge, dest_size, srcs);
+         else if (types[0] == GLSL_TYPE_INT)
+            emit(nir_op_ige, dest_size, srcs);
+         else
+            emit(nir_op_uge, dest_size, srcs);
+      } else {
+         emit(nir_op_slt, dest_size, srcs);
+      }
+      break;
+   case ir_binop_equal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_feq, dest_size, srcs);
+         else
+            emit(nir_op_ieq, dest_size, srcs);
+      } else {
+         emit(nir_op_seq, dest_size, srcs);
+      }
+      break;
+   case ir_binop_nequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT)
+            emit(nir_op_fne, dest_size, srcs);
+         else
+            emit(nir_op_ine, dest_size, srcs);
+      } else {
+         emit(nir_op_sne, dest_size, srcs);
+      }
+      break;
+   case ir_binop_all_equal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT) {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_feq, dest_size, srcs); break;
+               case 2: emit(nir_op_ball_fequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_ball_fequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_ball_fequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         } else {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_ieq, dest_size, srcs); break;
+               case 2: emit(nir_op_ball_iequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_ball_iequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_ball_iequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         }
+      } else {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_seq, dest_size, srcs); break;
+            case 2: emit(nir_op_fall_equal2, dest_size, srcs); break;
+            case 3: emit(nir_op_fall_equal3, dest_size, srcs); break;
+            case 4: emit(nir_op_fall_equal4, dest_size, srcs); break;
+            default:
+               unreachable("not reached");
+         }
+      }
+      break;
+   case ir_binop_any_nequal:
+      if (supports_ints) {
+         if (types[0] == GLSL_TYPE_FLOAT) {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_fne, dest_size, srcs); break;
+               case 2: emit(nir_op_bany_fnequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_bany_fnequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_bany_fnequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         } else {
+            switch (ir->operands[0]->type->vector_elements) {
+               case 1: emit(nir_op_ine, dest_size, srcs); break;
+               case 2: emit(nir_op_bany_inequal2, dest_size, srcs); break;
+               case 3: emit(nir_op_bany_inequal3, dest_size, srcs); break;
+               case 4: emit(nir_op_bany_inequal4, dest_size, srcs); break;
+               default:
+                  unreachable("not reached");
+            }
+         }
+      } else {
+         switch (ir->operands[0]->type->vector_elements) {
+            case 1: emit(nir_op_sne, dest_size, srcs); break;
+            case 2: emit(nir_op_fany_nequal2, dest_size, srcs); break;
+            case 3: emit(nir_op_fany_nequal3, dest_size, srcs); break;
+            case 4: emit(nir_op_fany_nequal4, dest_size, srcs); break;
+            default:
+               unreachable("not reached");
+         }
+      }
+      break;
+   case ir_binop_logic_and:
+      if (supports_ints)
+         emit(nir_op_iand, dest_size, srcs);
+      else
+         emit(nir_op_fand, dest_size, srcs);
+      break;
+   case ir_binop_logic_or:
+      if (supports_ints)
+         emit(nir_op_ior, dest_size, srcs);
+      else
+         emit(nir_op_for, dest_size, srcs);
+      break;
+   case ir_binop_logic_xor:
+      if (supports_ints)
+         emit(nir_op_ixor, dest_size, srcs);
+      else
+         emit(nir_op_fxor, dest_size, srcs);
+      break;
+   case ir_binop_dot:
+      switch (ir->operands[0]->type->vector_elements) {
+         case 2: emit(nir_op_fdot2, dest_size, srcs); break;
+         case 3: emit(nir_op_fdot3, dest_size, srcs); break;
+         case 4: emit(nir_op_fdot4, dest_size, srcs); break;
+         default:
+            unreachable("not reached");
+      }
+      break;
+
+   case ir_binop_pack_half_2x16_split:
+         emit(nir_op_pack_half_2x16_split, dest_size, srcs);
+         break;
+   case ir_binop_bfm:   emit(nir_op_bfm, dest_size, srcs);   break;
+   case ir_binop_ldexp: emit(nir_op_ldexp, dest_size, srcs); break;
+   case ir_triop_fma:   emit(nir_op_ffma, dest_size, srcs);  break;
+   case ir_triop_lrp:
+      instr = emit(nir_op_flrp, dest_size, srcs);
+      if (ir->operands[0]->type->vector_elements != 1 &&
+          ir->operands[2]->type->vector_elements == 1) {
+         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
+              i++) {
+            instr->src[2].swizzle[i] = 0;
+         }
+      }
+      break;
+   case ir_triop_csel:
+      if (supports_ints)
+         emit(nir_op_bcsel, dest_size, srcs);
+      else
+         emit(nir_op_fcsel, dest_size, srcs);
+      break;
+   case ir_triop_bfi:
+      instr = emit(nir_op_bfi, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[1]->type->vector_elements; i++) {
+         instr->src[0].swizzle[i] = 0;
+      }
+      break;
+   case ir_triop_bitfield_extract:
+      instr = emit(out_type == GLSL_TYPE_INT ? nir_op_ibitfield_extract :
+                   nir_op_ubitfield_extract, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
+         instr->src[1].swizzle[i] = 0;
+         instr->src[2].swizzle[i] = 0;
+      }
+      break;
+   case ir_quadop_bitfield_insert:
+      instr = emit(nir_op_bitfield_insert, dest_size, srcs);
+      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
+         instr->src[2].swizzle[i] = 0;
+         instr->src[3].swizzle[i] = 0;
+      }
+      break;
+   case ir_quadop_vector:
+      switch (ir->type->vector_elements) {
+         case 2: emit(nir_op_vec2, dest_size, srcs); break;
+         case 3: emit(nir_op_vec3, dest_size, srcs); break;
+         case 4: emit(nir_op_vec4, dest_size, srcs); break;
+         default: unreachable("not reached");
+      }
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+}
+
+void
+nir_visitor::visit(ir_swizzle *ir)
+{
+   nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov,
+                               ir->type->vector_elements,
+                               evaluate_rvalue(ir->val));
+
+   unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
+   for (unsigned i = 0; i < ir->type->vector_elements; i++)
+      instr->src[0].swizzle[i] = swizzle[i];
+}
+
+void
+nir_visitor::visit(ir_texture *ir)
+{
+   unsigned num_srcs;
+   nir_texop op;
+   switch (ir->op) {
+   case ir_tex:
+      op = nir_texop_tex;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_txb:
+   case ir_txl:
+      op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;
+      num_srcs = 2; /* coordinate, bias/lod */
+      break;
+
+   case ir_txd:
+      op = nir_texop_txd; /* coordinate, dPdx, dPdy */
+      num_srcs = 3;
+      break;
+
+   case ir_txf:
+      op = nir_texop_txf;
+      if (ir->lod_info.lod != NULL)
+         num_srcs = 2; /* coordinate, lod */
+      else
+         num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_txf_ms:
+      op = nir_texop_txf_ms;
+      num_srcs = 2; /* coordinate, sample_index */
+      break;
+
+   case ir_txs:
+      op = nir_texop_txs;
+      if (ir->lod_info.lod != NULL)
+         num_srcs = 1; /* lod */
+      else
+         num_srcs = 0;
+      break;
+
+   case ir_lod:
+      op = nir_texop_lod;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_tg4:
+      op = nir_texop_tg4;
+      num_srcs = 1; /* coordinate */
+      break;
+
+   case ir_query_levels:
+      op = nir_texop_query_levels;
+      num_srcs = 0;
+      break;
+
+   default:
+      unreachable("not reached");
+   }
+
+   if (ir->projector != NULL)
+      num_srcs++;
+   if (ir->shadow_comparitor != NULL)
+      num_srcs++;
+   if (ir->offset != NULL && ir->offset->as_constant() == NULL)
+      num_srcs++;
+
+   nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);
+
+   instr->op = op;
+   instr->sampler_dim =
+      (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;
+   instr->is_array = ir->sampler->type->sampler_array;
+   instr->is_shadow = ir->sampler->type->sampler_shadow;
+   if (instr->is_shadow)
+      instr->is_new_style_shadow = (ir->type->vector_elements == 1);
+   switch (ir->type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      instr->dest_type = nir_type_float;
+      break;
+   case GLSL_TYPE_INT:
+      instr->dest_type = nir_type_int;
+      break;
+   case GLSL_TYPE_UINT:
+      instr->dest_type = nir_type_unsigned;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   ir->sampler->accept(this);
+   instr->sampler = this->deref_head;
+
+   unsigned src_number = 0;
+
+   if (ir->coordinate != NULL) {
+      instr->coord_components = ir->coordinate->type->vector_elements;
+      instr->src[src_number].src = evaluate_rvalue(ir->coordinate);
+      instr->src[src_number].src_type = nir_tex_src_coord;
+      src_number++;
+   }
+
+   if (ir->projector != NULL) {
+      instr->src[src_number].src = evaluate_rvalue(ir->projector);
+      instr->src[src_number].src_type = nir_tex_src_projector;
+      src_number++;
+   }
+
+   if (ir->shadow_comparitor != NULL) {
+      instr->src[src_number].src = evaluate_rvalue(ir->shadow_comparitor);
+      instr->src[src_number].src_type = nir_tex_src_comparitor;
+      src_number++;
+   }
+
+   if (ir->offset != NULL) {
+      /* we don't support multiple offsets yet */
+      assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());
+
+      ir_constant *const_offset = ir->offset->as_constant();
+      if (const_offset != NULL) {
+         for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
+            instr->const_offset[i] = const_offset->value.i[i];
+      } else {
+         instr->src[src_number].src = evaluate_rvalue(ir->offset);
+         instr->src[src_number].src_type = nir_tex_src_offset;
+         src_number++;
+      }
+   }
+
+   switch (ir->op) {
+   case ir_txb:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.bias);
+      instr->src[src_number].src_type = nir_tex_src_bias;
+      src_number++;
+      break;
+
+   case ir_txl:
+   case ir_txf:
+   case ir_txs:
+      if (ir->lod_info.lod != NULL) {
+         instr->src[src_number].src = evaluate_rvalue(ir->lod_info.lod);
+         instr->src[src_number].src_type = nir_tex_src_lod;
+         src_number++;
+      }
+      break;
+
+   case ir_txd:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdx);
+      instr->src[src_number].src_type = nir_tex_src_ddx;
+      src_number++;
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdy);
+      instr->src[src_number].src_type = nir_tex_src_ddy;
+      src_number++;
+      break;
+
+   case ir_txf_ms:
+      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.sample_index);
+      instr->src[src_number].src_type = nir_tex_src_ms_index;
+      src_number++;
+      break;
+
+   case ir_tg4:
+      instr->component = ir->lod_info.component->as_constant()->value.u[0];
+      break;
+
+   default:
+      break;
+   }
+
+   assert(src_number == num_srcs);
+
+   add_instr(&instr->instr, nir_tex_instr_dest_size(instr));
+}
+
+void
+nir_visitor::visit(ir_constant *ir)
+{
+   /*
+    * We don't know if this variable is an an array or struct that gets
+    * dereferenced, so do the safe thing an make it a variable with a
+    * constant initializer and return a dereference.
+    */
+
+   nir_variable *var = ralloc(this->shader, nir_variable);
+   var->name = ralloc_strdup(var, "const_temp");
+   var->type = ir->type;
+   var->data.mode = nir_var_local;
+   var->data.read_only = true;
+   var->constant_initializer = constant_copy(ir, var);
+   exec_list_push_tail(&this->impl->locals, &var->node);
+
+   this->deref_head = nir_deref_var_create(this->shader, var);
+   this->deref_tail = &this->deref_head->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_variable *ir)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(this->var_table, ir->var);
+   assert(entry);
+   nir_variable *var = (nir_variable *) entry->data;
+
+   nir_deref_var *deref = nir_deref_var_create(this->shader, var);
+   this->deref_head = deref;
+   this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_record *ir)
+{
+   ir->record->accept(this);
+
+   int field_index = this->deref_tail->type->field_index(ir->field);
+   assert(field_index >= 0);
+
+   nir_deref_struct *deref = nir_deref_struct_create(this->shader, field_index);
+   deref->deref.type = ir->type;
+   this->deref_tail->child = &deref->deref;
+   this->deref_tail = &deref->deref;
+}
+
+void
+nir_visitor::visit(ir_dereference_array *ir)
+{
+   nir_deref_array *deref = nir_deref_array_create(this->shader);
+   deref->deref.type = ir->type;
+
+   ir_constant *const_index = ir->array_index->as_constant();
+   if (const_index != NULL) {
+      deref->deref_array_type = nir_deref_array_type_direct;
+      deref->base_offset = const_index->value.u[0];
+   } else {
+      deref->deref_array_type = nir_deref_array_type_indirect;
+      deref->indirect = evaluate_rvalue(ir->array_index);
+   }
+
+   ir->array->accept(this);
+
+   this->deref_tail->child = &deref->deref;
+   this->deref_tail = &deref->deref;
+}
diff --git a/mesalib/src/glsl/nir/glsl_to_nir.h b/mesalib/src/glsl/nir/glsl_to_nir.h
new file mode 100644
index 000000000..58b2cee6a
--- /dev/null
+++ b/mesalib/src/glsl/nir/glsl_to_nir.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "../glsl_parser_extras.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+nir_shader *glsl_to_nir(exec_list * ir, _mesa_glsl_parse_state *state,
+                        bool native_integers);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mesalib/src/glsl/nir/nir.c b/mesalib/src/glsl/nir/nir.c
new file mode 100644
index 000000000..5b0e4bc50
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir.c
@@ -0,0 +1,2085 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <assert.h>
+
+nir_shader *
+nir_shader_create(void *mem_ctx, const nir_shader_compiler_options *options)
+{
+   nir_shader *shader = ralloc(mem_ctx, nir_shader);
+
+   shader->uniforms = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                              _mesa_key_string_equal);
+   shader->inputs = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                            _mesa_key_string_equal);
+   shader->outputs = _mesa_hash_table_create(shader, _mesa_key_hash_string,
+                                             _mesa_key_string_equal);
+
+   shader->options = options;
+
+   shader->num_user_structures = 0;
+   shader->user_structures = NULL;
+
+   exec_list_make_empty(&shader->functions);
+   exec_list_make_empty(&shader->registers);
+   exec_list_make_empty(&shader->globals);
+   exec_list_make_empty(&shader->system_values);
+   shader->reg_alloc = 0;
+
+   shader->num_inputs = 0;
+   shader->num_outputs = 0;
+   shader->num_uniforms = 0;
+
+   return shader;
+}
+
+static nir_register *
+reg_create(void *mem_ctx, struct exec_list *list)
+{
+   nir_register *reg = ralloc(mem_ctx, nir_register);
+
+   reg->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   reg->defs = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   reg->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                   _mesa_key_pointer_equal);
+
+   reg->num_components = 0;
+   reg->num_array_elems = 0;
+   reg->is_packed = false;
+   reg->name = NULL;
+
+   exec_list_push_tail(list, &reg->node);
+
+   return reg;
+}
+
+nir_register *
+nir_global_reg_create(nir_shader *shader)
+{
+   nir_register *reg = reg_create(shader, &shader->registers);
+   reg->index = shader->reg_alloc++;
+   reg->is_global = true;
+
+   return reg;
+}
+
+nir_register *
+nir_local_reg_create(nir_function_impl *impl)
+{
+   nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
+   reg->index = impl->reg_alloc++;
+   reg->is_global = false;
+
+   return reg;
+}
+
+void
+nir_reg_remove(nir_register *reg)
+{
+   exec_node_remove(&reg->node);
+}
+
+nir_function *
+nir_function_create(nir_shader *shader, const char *name)
+{
+   nir_function *func = ralloc(shader, nir_function);
+
+   exec_list_push_tail(&shader->functions, &func->node);
+   exec_list_make_empty(&func->overload_list);
+   func->name = name;
+   func->shader = shader;
+
+   return func;
+}
+
+nir_function_overload *
+nir_function_overload_create(nir_function *func)
+{
+   void *mem_ctx = ralloc_parent(func);
+
+   nir_function_overload *overload = ralloc(mem_ctx, nir_function_overload);
+
+   overload->num_params = 0;
+   overload->params = NULL;
+   overload->return_type = glsl_void_type();
+   overload->impl = NULL;
+
+   exec_list_push_tail(&func->overload_list, &overload->node);
+   overload->function = func;
+
+   return overload;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
+{
+   dest->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      dest->ssa = src->ssa;
+   } else {
+      dest->reg.base_offset = src->reg.base_offset;
+      dest->reg.reg = src->reg.reg;
+      if (src->reg.indirect) {
+         dest->reg.indirect = ralloc(mem_ctx, nir_src);
+         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      } else {
+         dest->reg.indirect = NULL;
+      }
+   }
+}
+
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+{
+   dest->is_ssa = src->is_ssa;
+   if (src->is_ssa) {
+      dest->ssa = src->ssa;
+   } else {
+      dest->reg.base_offset = src->reg.base_offset;
+      dest->reg.reg = src->reg.reg;
+      if (src->reg.indirect) {
+         dest->reg.indirect = ralloc(mem_ctx, nir_src);
+         nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      } else {
+         dest->reg.indirect = NULL;
+      }
+   }
+}
+
+void
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+{
+   nir_src_copy(&dest->src, &src->src, mem_ctx);
+   dest->abs = src->abs;
+   dest->negate = src->negate;
+   for (unsigned i = 0; i < 4; i++)
+      dest->swizzle[i] = src->swizzle[i];
+}
+
+void
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+{
+   nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+   dest->write_mask = src->write_mask;
+   dest->saturate = src->saturate;
+}
+
+static inline void
+block_add_pred(nir_block *block, nir_block *pred)
+{
+   _mesa_set_add(block->predecessors, pred);
+}
+
+static void
+cf_init(nir_cf_node *node, nir_cf_node_type type)
+{
+   exec_node_init(&node->node);
+   node->parent = NULL;
+   node->type = type;
+}
+
+static void
+link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2)
+{
+   pred->successors[0] = succ1;
+   block_add_pred(succ1, pred);
+
+   pred->successors[1] = succ2;
+   if (succ2 != NULL)
+      block_add_pred(succ2, pred);
+}
+
+static void
+unlink_blocks(nir_block *pred, nir_block *succ)
+{
+   if (pred->successors[0] == succ) {
+      pred->successors[0] = pred->successors[1];
+      pred->successors[1] = NULL;
+   } else {
+      assert(pred->successors[1] == succ);
+      pred->successors[1] = NULL;
+   }
+
+   struct set_entry *entry = _mesa_set_search(succ->predecessors, pred);
+
+   assert(entry);
+
+   _mesa_set_remove(succ->predecessors, entry);
+}
+
+static void
+unlink_block_successors(nir_block *block)
+{
+   if (block->successors[0] != NULL)
+      unlink_blocks(block, block->successors[0]);
+   if (block->successors[1] != NULL)
+      unlink_blocks(block, block->successors[1]);
+}
+
+
+nir_function_impl *
+nir_function_impl_create(nir_function_overload *overload)
+{
+   assert(overload->impl == NULL);
+
+   void *mem_ctx = ralloc_parent(overload);
+
+   nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl);
+
+   overload->impl = impl;
+   impl->overload = overload;
+
+   cf_init(&impl->cf_node, nir_cf_node_function);
+
+   exec_list_make_empty(&impl->body);
+   exec_list_make_empty(&impl->registers);
+   exec_list_make_empty(&impl->locals);
+   impl->num_params = 0;
+   impl->params = NULL;
+   impl->return_var = NULL;
+   impl->reg_alloc = 0;
+   impl->ssa_alloc = 0;
+   impl->valid_metadata = nir_metadata_none;
+
+   /* create start & end blocks */
+   nir_block *start_block = nir_block_create(mem_ctx);
+   nir_block *end_block = nir_block_create(mem_ctx);
+   start_block->cf_node.parent = &impl->cf_node;
+   end_block->cf_node.parent = &impl->cf_node;
+   impl->start_block = start_block;
+   impl->end_block = end_block;
+
+   exec_list_push_tail(&impl->body, &start_block->cf_node.node);
+
+   start_block->successors[0] = end_block;
+   block_add_pred(end_block, start_block);
+
+   return impl;
+}
+
+nir_block *
+nir_block_create(void *mem_ctx)
+{
+   nir_block *block = ralloc(mem_ctx, nir_block);
+
+   cf_init(&block->cf_node, nir_cf_node_block);
+
+   block->successors[0] = block->successors[1] = NULL;
+   block->predecessors = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+   block->imm_dom = NULL;
+   block->dom_frontier = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+
+   exec_list_make_empty(&block->instr_list);
+
+   return block;
+}
+
+static inline void
+src_init(nir_src *src)
+{
+   src->is_ssa = false;
+   src->reg.reg = NULL;
+   src->reg.indirect = NULL;
+   src->reg.base_offset = 0;
+}
+
+nir_if *
+nir_if_create(void *mem_ctx)
+{
+   nir_if *if_stmt = ralloc(mem_ctx, nir_if);
+
+   cf_init(&if_stmt->cf_node, nir_cf_node_if);
+   src_init(&if_stmt->condition);
+
+   nir_block *then = nir_block_create(mem_ctx);
+   exec_list_make_empty(&if_stmt->then_list);
+   exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
+   then->cf_node.parent = &if_stmt->cf_node;
+
+   nir_block *else_stmt = nir_block_create(mem_ctx);
+   exec_list_make_empty(&if_stmt->else_list);
+   exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
+   else_stmt->cf_node.parent = &if_stmt->cf_node;
+
+   return if_stmt;
+}
+
+nir_loop *
+nir_loop_create(void *mem_ctx)
+{
+   nir_loop *loop = ralloc(mem_ctx, nir_loop);
+
+   cf_init(&loop->cf_node, nir_cf_node_loop);
+
+   nir_block *body = nir_block_create(mem_ctx);
+   exec_list_make_empty(&loop->body);
+   exec_list_push_tail(&loop->body, &body->cf_node.node);
+   body->cf_node.parent = &loop->cf_node;
+
+   body->successors[0] = body;
+   block_add_pred(body, body);
+
+   return loop;
+}
+
+static void
+instr_init(nir_instr *instr, nir_instr_type type)
+{
+   instr->type = type;
+   instr->block = NULL;
+   exec_node_init(&instr->node);
+}
+
+static void
+dest_init(nir_dest *dest)
+{
+   dest->is_ssa = false;
+   dest->reg.reg = NULL;
+   dest->reg.indirect = NULL;
+   dest->reg.base_offset = 0;
+}
+
+static void
+alu_dest_init(nir_alu_dest *dest)
+{
+   dest_init(&dest->dest);
+   dest->saturate = false;
+   dest->write_mask = 0xf;
+}
+
+static void
+alu_src_init(nir_alu_src *src)
+{
+   src_init(&src->src);
+   src->abs = src->negate = false;
+   src->swizzle[0] = 0;
+   src->swizzle[1] = 1;
+   src->swizzle[2] = 2;
+   src->swizzle[3] = 3;
+}
+
+nir_alu_instr *
+nir_alu_instr_create(void *mem_ctx, nir_op op)
+{
+   unsigned num_srcs = nir_op_infos[op].num_inputs;
+   nir_alu_instr *instr =
+      ralloc_size(mem_ctx,
+                  sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
+
+   instr_init(&instr->instr, nir_instr_type_alu);
+   instr->op = op;
+   alu_dest_init(&instr->dest);
+   for (unsigned i = 0; i < num_srcs; i++)
+      alu_src_init(&instr->src[i]);
+
+   return instr;
+}
+
+nir_jump_instr *
+nir_jump_instr_create(void *mem_ctx, nir_jump_type type)
+{
+   nir_jump_instr *instr = ralloc(mem_ctx, nir_jump_instr);
+   instr_init(&instr->instr, nir_instr_type_jump);
+   instr->type = type;
+   return instr;
+}
+
+nir_load_const_instr *
+nir_load_const_instr_create(void *mem_ctx, unsigned num_components)
+{
+   nir_load_const_instr *instr = ralloc(mem_ctx, nir_load_const_instr);
+   instr_init(&instr->instr, nir_instr_type_load_const);
+
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+   return instr;
+}
+
+nir_intrinsic_instr *
+nir_intrinsic_instr_create(void *mem_ctx, nir_intrinsic_op op)
+{
+   unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
+   nir_intrinsic_instr *instr =
+      ralloc_size(mem_ctx,
+                  sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
+
+   instr_init(&instr->instr, nir_instr_type_intrinsic);
+   instr->intrinsic = op;
+
+   if (nir_intrinsic_infos[op].has_dest)
+      dest_init(&instr->dest);
+
+   for (unsigned i = 0; i < num_srcs; i++)
+      src_init(&instr->src[i]);
+
+   return instr;
+}
+
+nir_call_instr *
+nir_call_instr_create(void *mem_ctx, nir_function_overload *callee)
+{
+   nir_call_instr *instr = ralloc(mem_ctx, nir_call_instr);
+   instr_init(&instr->instr, nir_instr_type_call);
+
+   instr->callee = callee;
+   instr->num_params = callee->num_params;
+   instr->params = ralloc_array(mem_ctx, nir_deref_var *, instr->num_params);
+   instr->return_deref = NULL;
+
+   return instr;
+}
+
+nir_tex_instr *
+nir_tex_instr_create(void *mem_ctx, unsigned num_srcs)
+{
+   nir_tex_instr *instr = ralloc(mem_ctx, nir_tex_instr);
+   instr_init(&instr->instr, nir_instr_type_tex);
+
+   dest_init(&instr->dest);
+
+   instr->num_srcs = num_srcs;
+   instr->src = ralloc_array(mem_ctx, nir_tex_src, num_srcs);
+   for (unsigned i = 0; i < num_srcs; i++)
+      src_init(&instr->src[i].src);
+
+   instr->sampler_index = 0;
+   instr->sampler_array_size = 0;
+   instr->sampler = NULL;
+
+   return instr;
+}
+
+nir_phi_instr *
+nir_phi_instr_create(void *mem_ctx)
+{
+   nir_phi_instr *instr = ralloc(mem_ctx, nir_phi_instr);
+   instr_init(&instr->instr, nir_instr_type_phi);
+
+   dest_init(&instr->dest);
+   exec_list_make_empty(&instr->srcs);
+   return instr;
+}
+
+nir_parallel_copy_instr *
+nir_parallel_copy_instr_create(void *mem_ctx)
+{
+   nir_parallel_copy_instr *instr = ralloc(mem_ctx, nir_parallel_copy_instr);
+   instr_init(&instr->instr, nir_instr_type_parallel_copy);
+
+   exec_list_make_empty(&instr->entries);
+
+   return instr;
+}
+
+nir_ssa_undef_instr *
+nir_ssa_undef_instr_create(void *mem_ctx, unsigned num_components)
+{
+   nir_ssa_undef_instr *instr = ralloc(mem_ctx, nir_ssa_undef_instr);
+   instr_init(&instr->instr, nir_instr_type_ssa_undef);
+
+   nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+
+   return instr;
+}
+
+nir_deref_var *
+nir_deref_var_create(void *mem_ctx, nir_variable *var)
+{
+   nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var);
+   deref->deref.deref_type = nir_deref_type_var;
+   deref->deref.child = NULL;
+   deref->deref.type = var->type;
+   deref->var = var;
+   return deref;
+}
+
+nir_deref_array *
+nir_deref_array_create(void *mem_ctx)
+{
+   nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array);
+   deref->deref.deref_type = nir_deref_type_array;
+   deref->deref.child = NULL;
+   deref->deref_array_type = nir_deref_array_type_direct;
+   src_init(&deref->indirect);
+   deref->base_offset = 0;
+   return deref;
+}
+
+nir_deref_struct *
+nir_deref_struct_create(void *mem_ctx, unsigned field_index)
+{
+   nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct);
+   deref->deref.deref_type = nir_deref_type_struct;
+   deref->deref.child = NULL;
+   deref->index = field_index;
+   return deref;
+}
+
+static nir_deref_var *
+copy_deref_var(void *mem_ctx, nir_deref_var *deref)
+{
+   nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+static nir_deref_array *
+copy_deref_array(void *mem_ctx, nir_deref_array *deref)
+{
+   nir_deref_array *ret = nir_deref_array_create(mem_ctx);
+   ret->base_offset = deref->base_offset;
+   ret->deref_array_type = deref->deref_array_type;
+   if (deref->deref_array_type == nir_deref_array_type_indirect) {
+      nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx);
+   }
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+static nir_deref_struct *
+copy_deref_struct(void *mem_ctx, nir_deref_struct *deref)
+{
+   nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index);
+   ret->deref.type = deref->deref.type;
+   if (deref->deref.child)
+      ret->deref.child = nir_copy_deref(mem_ctx, deref->deref.child);
+   return ret;
+}
+
+nir_deref *
+nir_copy_deref(void *mem_ctx, nir_deref *deref)
+{
+   switch (deref->deref_type) {
+   case nir_deref_type_var:
+      return &copy_deref_var(mem_ctx, nir_deref_as_var(deref))->deref;
+   case nir_deref_type_array:
+      return &copy_deref_array(mem_ctx, nir_deref_as_array(deref))->deref;
+   case nir_deref_type_struct:
+      return &copy_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref;
+   default:
+      unreachable("Invalid dereference type");
+   }
+
+   return NULL;
+}
+
+
+/**
+ * \name Control flow modification
+ *
+ * These functions modify the control flow tree while keeping the control flow
+ * graph up-to-date. The invariants respected are:
+ * 1. Each then statement, else statement, or loop body must have at least one
+ *    control flow node.
+ * 2. Each if-statement and loop must have one basic block before it and one
+ *    after.
+ * 3. Two basic blocks cannot be directly next to each other.
+ * 4. If a basic block has a jump instruction, there must be only one and it
+ *    must be at the end of the block.
+ * 5. The CFG must always be connected - this means that we must insert a fake
+ *    CFG edge for loops with no break statement.
+ *
+ * The purpose of the second one is so that we have places to insert code during
+ * GCM, as well as eliminating the possibility of critical edges.
+ */
+/*@{*/
+
+static void
+link_non_block_to_block(nir_cf_node *node, nir_block *block)
+{
+   if (node->type == nir_cf_node_if) {
+      /*
+       * We're trying to link an if to a block after it; this just means linking
+       * the last block of the then and else branches.
+       */
+
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+
+      nir_cf_node *last_then = nir_if_last_then_node(if_stmt);
+      assert(last_then->type == nir_cf_node_block);
+      nir_block *last_then_block = nir_cf_node_as_block(last_then);
+
+      nir_cf_node *last_else = nir_if_last_else_node(if_stmt);
+      assert(last_else->type == nir_cf_node_block);
+      nir_block *last_else_block = nir_cf_node_as_block(last_else);
+
+      if (exec_list_is_empty(&last_then_block->instr_list) ||
+          nir_block_last_instr(last_then_block)->type != nir_instr_type_jump) {
+         unlink_block_successors(last_then_block);
+         link_blocks(last_then_block, block, NULL);
+      }
+
+      if (exec_list_is_empty(&last_else_block->instr_list) ||
+          nir_block_last_instr(last_else_block)->type != nir_instr_type_jump) {
+         unlink_block_successors(last_else_block);
+         link_blocks(last_else_block, block, NULL);
+      }
+   } else {
+      assert(node->type == nir_cf_node_loop);
+
+      /*
+       * We can only get to this codepath if we're inserting a new loop, or
+       * at least a loop with no break statements; we can't insert break
+       * statements into a loop when we haven't inserted it into the CFG
+       * because we wouldn't know which block comes after the loop
+       * and therefore, which block should be the successor of the block with
+       * the break). Therefore, we need to insert a fake edge (see invariant
+       * #5).
+       */
+
+      nir_loop *loop = nir_cf_node_as_loop(node);
+
+      nir_cf_node *last = nir_loop_last_cf_node(loop);
+      assert(last->type == nir_cf_node_block);
+      nir_block *last_block =  nir_cf_node_as_block(last);
+
+      last_block->successors[1] = block;
+      block_add_pred(block, last_block);
+   }
+}
+
+static void
+link_block_to_non_block(nir_block *block, nir_cf_node *node)
+{
+   if (node->type == nir_cf_node_if) {
+      /*
+       * We're trying to link a block to an if after it; this just means linking
+       * the block to the first block of the then and else branches.
+       */
+
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+
+      nir_cf_node *first_then = nir_if_first_then_node(if_stmt);
+      assert(first_then->type == nir_cf_node_block);
+      nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+      nir_cf_node *first_else = nir_if_first_else_node(if_stmt);
+      assert(first_else->type == nir_cf_node_block);
+      nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+      unlink_block_successors(block);
+      link_blocks(block, first_then_block, first_else_block);
+   } else {
+      /*
+       * For similar reasons as the corresponding case in
+       * link_non_block_to_block(), don't worry about if the loop header has
+       * any predecessors that need to be unlinked.
+       */
+
+      assert(node->type == nir_cf_node_loop);
+
+      nir_loop *loop = nir_cf_node_as_loop(node);
+
+      nir_cf_node *loop_header = nir_loop_first_cf_node(loop);
+      assert(loop_header->type == nir_cf_node_block);
+      nir_block *loop_header_block = nir_cf_node_as_block(loop_header);
+
+      unlink_block_successors(block);
+      link_blocks(block, loop_header_block, NULL);
+   }
+
+}
+
+/**
+ * Takes a basic block and inserts a new empty basic block before it, making its
+ * predecessors point to the new block. This essentially splits the block into
+ * an empty header and a body so that another non-block CF node can be inserted
+ * between the two. Note that this does *not* link the two basic blocks, so
+ * some kind of cleanup *must* be performed after this call.
+ */
+
+static nir_block *
+split_block_beginning(nir_block *block)
+{
+   nir_block *new_block = nir_block_create(ralloc_parent(block));
+   new_block->cf_node.parent = block->cf_node.parent;
+   exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node);
+
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      unlink_blocks(pred, block);
+      link_blocks(pred, new_block, NULL);
+   }
+
+   return new_block;
+}
+
+static void
+rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_foreach_phi_src(phi, src) {
+         if (src->pred == old_pred) {
+            src->pred = new_pred;
+            break;
+         }
+      }
+   }
+}
+
+/**
+ * Moves the successors of source to the successors of dest, leaving both
+ * successors of source NULL.
+ */
+
+static void
+move_successors(nir_block *source, nir_block *dest)
+{
+   nir_block *succ1 = source->successors[0];
+   nir_block *succ2 = source->successors[1];
+
+   if (succ1) {
+      unlink_blocks(source, succ1);
+      rewrite_phi_preds(succ1, source, dest);
+   }
+
+   if (succ2) {
+      unlink_blocks(source, succ2);
+      rewrite_phi_preds(succ2, source, dest);
+   }
+
+   unlink_block_successors(dest);
+   link_blocks(dest, succ1, succ2);
+}
+
+static nir_block *
+split_block_end(nir_block *block)
+{
+   nir_block *new_block = nir_block_create(ralloc_parent(block));
+   new_block->cf_node.parent = block->cf_node.parent;
+   exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node);
+
+   move_successors(block, new_block);
+
+   return new_block;
+}
+
+/**
+ * Inserts a non-basic block between two basic blocks and links them together.
+ */
+
+static void
+insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after)
+{
+   node->parent = before->cf_node.parent;
+   exec_node_insert_after(&before->cf_node.node, &node->node);
+   link_block_to_non_block(before, node);
+   link_non_block_to_block(node, after);
+}
+
+/**
+ * Inserts a non-basic block before a basic block.
+ */
+
+static void
+insert_non_block_before_block(nir_cf_node *node, nir_block *block)
+{
+   /* split off the beginning of block into new_block */
+   nir_block *new_block = split_block_beginning(block);
+
+   /* insert our node in between new_block and block */
+   insert_non_block(new_block, node, block);
+}
+
+static void
+insert_non_block_after_block(nir_block *block, nir_cf_node *node)
+{
+   /* split off the end of block into new_block */
+   nir_block *new_block = split_block_end(block);
+
+   /* insert our node in between block and new_block */
+   insert_non_block(block, node, new_block);
+}
+
+/* walk up the control flow tree to find the innermost enclosed loop */
+static nir_loop *
+nearest_loop(nir_cf_node *node)
+{
+   while (node->type != nir_cf_node_loop) {
+      node = node->parent;
+   }
+
+   return nir_cf_node_as_loop(node);
+}
+
+nir_function_impl *
+nir_cf_node_get_function(nir_cf_node *node)
+{
+   while (node->type != nir_cf_node_function) {
+      node = node->parent;
+   }
+
+   return nir_cf_node_as_function(node);
+}
+
+/*
+ * update the CFG after a jump instruction has been added to the end of a block
+ */
+
+static void
+handle_jump(nir_block *block)
+{
+   nir_instr *instr = nir_block_last_instr(block);
+   nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+
+   unlink_block_successors(block);
+
+   nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+
+   if (jump_instr->type == nir_jump_break ||
+       jump_instr->type == nir_jump_continue) {
+      nir_loop *loop = nearest_loop(&block->cf_node);
+
+      if (jump_instr->type == nir_jump_continue) {
+         nir_cf_node *first_node = nir_loop_first_cf_node(loop);
+         assert(first_node->type == nir_cf_node_block);
+         nir_block *first_block = nir_cf_node_as_block(first_node);
+         link_blocks(block, first_block, NULL);
+      } else {
+         nir_cf_node *after = nir_cf_node_next(&loop->cf_node);
+         assert(after->type == nir_cf_node_block);
+         nir_block *after_block = nir_cf_node_as_block(after);
+         link_blocks(block, after_block, NULL);
+
+         /* If we inserted a fake link, remove it */
+         nir_cf_node *last = nir_loop_last_cf_node(loop);
+         assert(last->type == nir_cf_node_block);
+         nir_block *last_block =  nir_cf_node_as_block(last);
+         if (last_block->successors[1] != NULL)
+            unlink_blocks(last_block, after_block);
+      }
+   } else {
+      assert(jump_instr->type == nir_jump_return);
+      link_blocks(block, impl->end_block, NULL);
+   }
+}
+
+static void
+handle_remove_jump(nir_block *block, nir_jump_type type)
+{
+   unlink_block_successors(block);
+
+   if (exec_node_is_tail_sentinel(block->cf_node.node.next)) {
+      nir_cf_node *parent = block->cf_node.parent;
+      if (parent->type == nir_cf_node_if) {
+         nir_cf_node *next = nir_cf_node_next(parent);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         link_blocks(block, next_block, NULL);
+      } else {
+         assert(parent->type == nir_cf_node_loop);
+         nir_loop *loop = nir_cf_node_as_loop(parent);
+
+         nir_cf_node *head = nir_loop_first_cf_node(loop);
+         assert(head->type == nir_cf_node_block);
+         nir_block *head_block = nir_cf_node_as_block(head);
+
+         link_blocks(block, head_block, NULL);
+      }
+   } else {
+      nir_cf_node *next = nir_cf_node_next(&block->cf_node);
+      if (next->type == nir_cf_node_if) {
+         nir_if *next_if = nir_cf_node_as_if(next);
+
+         nir_cf_node *first_then = nir_if_first_then_node(next_if);
+         assert(first_then->type == nir_cf_node_block);
+         nir_block *first_then_block = nir_cf_node_as_block(first_then);
+
+         nir_cf_node *first_else = nir_if_first_else_node(next_if);
+         assert(first_else->type == nir_cf_node_block);
+         nir_block *first_else_block = nir_cf_node_as_block(first_else);
+
+         link_blocks(block, first_then_block, first_else_block);
+      } else {
+         assert(next->type == nir_cf_node_loop);
+         nir_loop *next_loop = nir_cf_node_as_loop(next);
+
+         nir_cf_node *first = nir_loop_first_cf_node(next_loop);
+         assert(first->type == nir_cf_node_block);
+         nir_block *first_block = nir_cf_node_as_block(first);
+
+         link_blocks(block, first_block, NULL);
+      }
+   }
+
+   if (type == nir_jump_break) {
+      nir_loop *loop = nearest_loop(&block->cf_node);
+
+      nir_cf_node *next = nir_cf_node_next(&loop->cf_node);
+      assert(next->type == nir_cf_node_block);
+      nir_block *next_block = nir_cf_node_as_block(next);
+
+      if (next_block->predecessors->entries == 0) {
+         /* insert fake link */
+         nir_cf_node *last = nir_loop_last_cf_node(loop);
+         assert(last->type == nir_cf_node_block);
+         nir_block *last_block = nir_cf_node_as_block(last);
+
+         last_block->successors[1] = next_block;
+         block_add_pred(next_block, last_block);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+/**
+ * Inserts a basic block before another by merging the instructions.
+ *
+ * @param block the target of the insertion
+ * @param before the block to be inserted - must not have been inserted before
+ * @param has_jump whether \before has a jump instruction at the end
+ */
+
+static void
+insert_block_before_block(nir_block *block, nir_block *before, bool has_jump)
+{
+   assert(!has_jump || exec_list_is_empty(&block->instr_list));
+
+   foreach_list_typed(nir_instr, instr, node, &before->instr_list) {
+      instr->block = block;
+   }
+
+   exec_list_prepend(&block->instr_list, &before->instr_list);
+
+   if (has_jump)
+      handle_jump(block);
+}
+
+/**
+ * Inserts a basic block after another by merging the instructions.
+ *
+ * @param block the target of the insertion
+ * @param after the block to be inserted - must not have been inserted before
+ * @param has_jump whether \after has a jump instruction at the end
+ */
+
+static void
+insert_block_after_block(nir_block *block, nir_block *after, bool has_jump)
+{
+   foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+      instr->block = block;
+   }
+
+   exec_list_append(&block->instr_list, &after->instr_list);
+
+   if (has_jump)
+      handle_jump(block);
+}
+
+static void
+update_if_uses(nir_cf_node *node)
+{
+   if (node->type != nir_cf_node_if)
+      return;
+
+   nir_if *if_stmt = nir_cf_node_as_if(node);
+
+   struct set *if_uses_set = if_stmt->condition.is_ssa ?
+                             if_stmt->condition.ssa->if_uses :
+                             if_stmt->condition.reg.reg->uses;
+
+   _mesa_set_add(if_uses_set, if_stmt);
+}
+
+void
+nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after)
+{
+   update_if_uses(after);
+
+   if (after->type == nir_cf_node_block) {
+      /*
+       * either node or the one after it must be a basic block, by invariant #2;
+       * in either case, just merge the blocks together.
+       */
+      nir_block *after_block = nir_cf_node_as_block(after);
+
+      bool has_jump = !exec_list_is_empty(&after_block->instr_list) &&
+         nir_block_last_instr(after_block)->type == nir_instr_type_jump;
+
+      if (node->type == nir_cf_node_block) {
+         insert_block_after_block(nir_cf_node_as_block(node), after_block,
+                                  has_jump);
+      } else {
+         nir_cf_node *next = nir_cf_node_next(node);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         insert_block_before_block(next_block, after_block, has_jump);
+      }
+   } else {
+      if (node->type == nir_cf_node_block) {
+         insert_non_block_after_block(nir_cf_node_as_block(node), after);
+      } else {
+         /*
+          * We have to insert a non-basic block after a non-basic block. Since
+          * every non-basic block has a basic block after it, this is equivalent
+          * to inserting a non-basic block before a basic block.
+          */
+
+         nir_cf_node *next = nir_cf_node_next(node);
+         assert(next->type == nir_cf_node_block);
+         nir_block *next_block = nir_cf_node_as_block(next);
+
+         insert_non_block_before_block(after, next_block);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+void
+nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before)
+{
+   update_if_uses(before);
+
+   if (before->type == nir_cf_node_block) {
+      nir_block *before_block = nir_cf_node_as_block(before);
+
+      bool has_jump = !exec_list_is_empty(&before_block->instr_list) &&
+         nir_block_last_instr(before_block)->type == nir_instr_type_jump;
+
+      if (node->type == nir_cf_node_block) {
+         insert_block_before_block(nir_cf_node_as_block(node), before_block,
+                                   has_jump);
+      } else {
+         nir_cf_node *prev = nir_cf_node_prev(node);
+         assert(prev->type == nir_cf_node_block);
+         nir_block *prev_block = nir_cf_node_as_block(prev);
+
+         insert_block_after_block(prev_block, before_block, has_jump);
+      }
+   } else {
+      if (node->type == nir_cf_node_block) {
+         insert_non_block_before_block(before, nir_cf_node_as_block(node));
+      } else {
+         /*
+          * We have to insert a non-basic block before a non-basic block. This
+          * is equivalent to inserting a non-basic block after a basic block.
+          */
+
+         nir_cf_node *prev_node = nir_cf_node_prev(node);
+         assert(prev_node->type == nir_cf_node_block);
+         nir_block *prev_block = nir_cf_node_as_block(prev_node);
+
+         insert_non_block_after_block(prev_block, before);
+      }
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+void
+nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node)
+{
+   nir_cf_node *begin = exec_node_data(nir_cf_node, list->head, node);
+   nir_cf_node_insert_before(begin, node);
+}
+
+void
+nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node)
+{
+   nir_cf_node *end = exec_node_data(nir_cf_node, list->tail_pred, node);
+   nir_cf_node_insert_after(end, node);
+}
+
+/**
+ * Stitch two basic blocks together into one. The aggregate must have the same
+ * predecessors as the first and the same successors as the second.
+ */
+
+static void
+stitch_blocks(nir_block *before, nir_block *after)
+{
+   /*
+    * We move after into before, so we have to deal with up to 2 successors vs.
+    * possibly a large number of predecessors.
+    *
+    * TODO: special case when before is empty and after isn't?
+    */
+
+   move_successors(after, before);
+
+   foreach_list_typed(nir_instr, instr, node, &after->instr_list) {
+      instr->block = before;
+   }
+
+   exec_list_append(&before->instr_list, &after->instr_list);
+   exec_node_remove(&after->cf_node.node);
+}
+
+static void
+remove_defs_uses(nir_instr *instr);
+
+static void
+cleanup_cf_node(nir_cf_node *node)
+{
+   switch (node->type) {
+   case nir_cf_node_block: {
+      nir_block *block = nir_cf_node_as_block(node);
+      /* We need to walk the instructions and clean up defs/uses */
+      nir_foreach_instr(block, instr)
+         remove_defs_uses(instr);
+      break;
+   }
+
+   case nir_cf_node_if: {
+      nir_if *if_stmt = nir_cf_node_as_if(node);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list)
+         cleanup_cf_node(child);
+      foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list)
+         cleanup_cf_node(child);
+
+      struct set *if_uses;
+      if (if_stmt->condition.is_ssa) {
+         if_uses = if_stmt->condition.ssa->if_uses;
+      } else {
+         if_uses = if_stmt->condition.reg.reg->if_uses;
+      }
+
+      struct set_entry *entry = _mesa_set_search(if_uses, if_stmt);
+      assert(entry);
+      _mesa_set_remove(if_uses, entry);
+      break;
+   }
+
+   case nir_cf_node_loop: {
+      nir_loop *loop = nir_cf_node_as_loop(node);
+      foreach_list_typed(nir_cf_node, child, node, &loop->body)
+         cleanup_cf_node(child);
+      break;
+   }
+   case nir_cf_node_function: {
+      nir_function_impl *impl = nir_cf_node_as_function(node);
+      foreach_list_typed(nir_cf_node, child, node, &impl->body)
+         cleanup_cf_node(child);
+      break;
+   }
+   default:
+      unreachable("Invalid CF node type");
+   }
+}
+
+void
+nir_cf_node_remove(nir_cf_node *node)
+{
+   nir_function_impl *impl = nir_cf_node_get_function(node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+
+   if (node->type == nir_cf_node_block) {
+      /*
+       * Basic blocks can't really be removed by themselves, since they act as
+       * padding between the non-basic blocks. So all we do here is empty the
+       * block of instructions.
+       *
+       * TODO: could we assert here?
+       */
+      exec_list_make_empty(&nir_cf_node_as_block(node)->instr_list);
+   } else {
+      nir_cf_node *before = nir_cf_node_prev(node);
+      assert(before->type == nir_cf_node_block);
+      nir_block *before_block = nir_cf_node_as_block(before);
+
+      nir_cf_node *after = nir_cf_node_next(node);
+      assert(after->type == nir_cf_node_block);
+      nir_block *after_block = nir_cf_node_as_block(after);
+
+      exec_node_remove(&node->node);
+      stitch_blocks(before_block, after_block);
+   }
+
+   cleanup_cf_node(node);
+}
+
+static bool
+add_use_cb(nir_src *src, void *state)
+{
+   nir_instr *instr = state;
+
+   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
+
+   _mesa_set_add(uses_set, instr);
+
+   return true;
+}
+
+static bool
+add_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+   nir_instr *instr = state;
+
+   if (instr->block && def->index == UINT_MAX) {
+      nir_function_impl *impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+
+      def->index = impl->ssa_alloc++;
+   }
+
+   return true;
+}
+
+static bool
+add_reg_def_cb(nir_dest *dest, void *state)
+{
+   nir_instr *instr = state;
+
+   if (!dest->is_ssa)
+      _mesa_set_add(dest->reg.reg->defs, instr);
+
+   return true;
+}
+
+static void
+add_defs_uses(nir_instr *instr)
+{
+   nir_foreach_src(instr, add_use_cb, instr);
+   nir_foreach_dest(instr, add_reg_def_cb, instr);
+   nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
+}
+
+void
+nir_instr_insert_before(nir_instr *instr, nir_instr *before)
+{
+   assert(before->type != nir_instr_type_jump);
+   before->block = instr->block;
+   add_defs_uses(before);
+   exec_node_insert_node_before(&instr->node, &before->node);
+}
+
+void
+nir_instr_insert_after(nir_instr *instr, nir_instr *after)
+{
+   if (after->type == nir_instr_type_jump) {
+      assert(instr == nir_block_last_instr(instr->block));
+      assert(instr->type != nir_instr_type_jump);
+   }
+
+   after->block = instr->block;
+   add_defs_uses(after);
+   exec_node_insert_after(&instr->node, &after->node);
+
+   if (after->type == nir_instr_type_jump)
+      handle_jump(after->block);
+}
+
+void
+nir_instr_insert_before_block(nir_block *block, nir_instr *before)
+{
+   if (before->type == nir_instr_type_jump)
+      assert(exec_list_is_empty(&block->instr_list));
+
+   before->block = block;
+   add_defs_uses(before);
+   exec_list_push_head(&block->instr_list, &before->node);
+
+   if (before->type == nir_instr_type_jump)
+      handle_jump(block);
+}
+
+void
+nir_instr_insert_after_block(nir_block *block, nir_instr *after)
+{
+   if (after->type == nir_instr_type_jump) {
+      assert(exec_list_is_empty(&block->instr_list) ||
+             nir_block_last_instr(block)->type != nir_instr_type_jump);
+   }
+
+   after->block = block;
+   add_defs_uses(after);
+   exec_list_push_tail(&block->instr_list, &after->node);
+
+   if (after->type == nir_instr_type_jump)
+      handle_jump(block);
+}
+
+void
+nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
+{
+   if (node->type == nir_cf_node_block) {
+      nir_instr_insert_before_block(nir_cf_node_as_block(node), before);
+   } else {
+      nir_cf_node *prev = nir_cf_node_prev(node);
+      assert(prev->type == nir_cf_node_block);
+      nir_block *prev_block = nir_cf_node_as_block(prev);
+
+      nir_instr_insert_before_block(prev_block, before);
+   }
+}
+
+void
+nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
+{
+   if (node->type == nir_cf_node_block) {
+      nir_instr_insert_after_block(nir_cf_node_as_block(node), after);
+   } else {
+      nir_cf_node *next = nir_cf_node_next(node);
+      assert(next->type == nir_cf_node_block);
+      nir_block *next_block = nir_cf_node_as_block(next);
+
+      nir_instr_insert_before_block(next_block, after);
+   }
+}
+
+void
+nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
+{
+   nir_cf_node *first_node = exec_node_data(nir_cf_node,
+                                            exec_list_get_head(list), node);
+   nir_instr_insert_before_cf(first_node, before);
+}
+
+void
+nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
+{
+   nir_cf_node *last_node = exec_node_data(nir_cf_node,
+                                           exec_list_get_tail(list), node);
+   nir_instr_insert_after_cf(last_node, after);
+}
+
+static bool
+remove_use_cb(nir_src *src, void *state)
+{
+   nir_instr *instr = state;
+
+   struct set *uses_set = src->is_ssa ? src->ssa->uses : src->reg.reg->uses;
+
+   struct set_entry *entry = _mesa_set_search(uses_set, instr);
+   if (entry)
+      _mesa_set_remove(uses_set, entry);
+
+   return true;
+}
+
+static bool
+remove_def_cb(nir_dest *dest, void *state)
+{
+   nir_instr *instr = state;
+
+   if (dest->is_ssa)
+      return true;
+
+   nir_register *reg = dest->reg.reg;
+
+   struct set_entry *entry = _mesa_set_search(reg->defs, instr);
+   if (entry)
+      _mesa_set_remove(reg->defs, entry);
+
+   return true;
+}
+
+static void
+remove_defs_uses(nir_instr *instr)
+{
+   nir_foreach_dest(instr, remove_def_cb, instr);
+   nir_foreach_src(instr, remove_use_cb, instr);
+}
+
+void nir_instr_remove(nir_instr *instr)
+{
+   remove_defs_uses(instr);
+   exec_node_remove(&instr->node);
+
+   if (instr->type == nir_instr_type_jump) {
+      nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
+      handle_remove_jump(instr->block, jump_instr->type);
+   }
+}
+
+/*@}*/
+
+void
+nir_index_local_regs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      reg->index = index++;
+   }
+   impl->reg_alloc = index;
+}
+
+void
+nir_index_global_regs(nir_shader *shader)
+{
+   unsigned index = 0;
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      reg->index = index++;
+   }
+   shader->reg_alloc = index;
+}
+
+static bool
+visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(&instr->dest.dest, state);
+}
+
+static bool
+visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
+                     void *state)
+{
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      return cb(&instr->dest, state);
+
+   return true;
+}
+
+static bool
+visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
+                   void *state)
+{
+   return cb(&instr->dest, state);
+}
+
+static bool
+visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   return cb(&instr->dest, state);
+}
+
+static bool
+visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
+                         nir_foreach_dest_cb cb, void *state)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (!cb(&entry->dest, state))
+         return false;
+   }
+
+   return true;
+}
+
+bool
+nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
+   case nir_instr_type_intrinsic:
+      return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
+   case nir_instr_type_tex:
+      return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
+   case nir_instr_type_phi:
+      return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
+   case nir_instr_type_parallel_copy:
+      return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
+                                      cb, state);
+
+   case nir_instr_type_load_const:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      break;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+
+   return true;
+}
+
+struct foreach_ssa_def_state {
+   nir_foreach_ssa_def_cb cb;
+   void *client_state;
+};
+
+static inline bool
+nir_ssa_def_visitor(nir_dest *dest, void *void_state)
+{
+   struct foreach_ssa_def_state *state = void_state;
+
+   if (dest->is_ssa)
+      return state->cb(&dest->ssa, state->client_state);
+   else
+      return true;
+}
+
+bool
+nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_tex:
+   case nir_instr_type_intrinsic:
+   case nir_instr_type_phi:
+   case nir_instr_type_parallel_copy: {
+      struct foreach_ssa_def_state foreach_state = {cb, state};
+      return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
+   }
+
+   case nir_instr_type_load_const:
+      return cb(&nir_instr_as_load_const(instr)->def, state);
+   case nir_instr_type_ssa_undef:
+      return cb(&nir_instr_as_ssa_undef(instr)->def, state);
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      return true;
+   default:
+      unreachable("Invalid instruction type");
+   }
+}
+
+static bool
+visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
+{
+   if (!cb(src, state))
+      return false;
+   if (!src->is_ssa && src->reg.indirect)
+      return cb(src->reg.indirect, state);
+   return true;
+}
+
+static bool
+visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb,
+                      void *state)
+{
+   if (deref->deref_array_type == nir_deref_array_type_indirect)
+      return visit_src(&deref->indirect, cb, state);
+   return true;
+}
+
+static bool
+visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state)
+{
+   nir_deref *cur = &deref->deref;
+   while (cur != NULL) {
+      if (cur->deref_type == nir_deref_type_array)
+         if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state))
+            return false;
+
+      cur = cur->child;
+   }
+
+   return true;
+}
+
+static bool
+visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+      if (!visit_src(&instr->src[i].src, cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   for (unsigned i = 0; i < instr->num_srcs; i++)
+      if (!visit_src(&instr->src[i].src, cb, state))
+         return false;
+
+   if (instr->sampler != NULL)
+      if (!visit_deref_src(instr->sampler, cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
+                    void *state)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   for (unsigned i = 0; i < num_srcs; i++)
+      if (!visit_src(&instr->src[i], cb, state))
+         return false;
+
+   unsigned num_vars =
+      nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++)
+      if (!visit_deref_src(instr->variables[i], cb, state))
+         return false;
+
+   return true;
+}
+
+static bool
+visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   return true;
+}
+
+static bool
+visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb,
+                     void *state)
+{
+   return true;
+}
+
+static bool
+visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   nir_foreach_phi_src(instr, src) {
+      if (!visit_src(&src->src, cb, state))
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+visit_parallel_copy_src(nir_parallel_copy_instr *instr,
+                        nir_foreach_src_cb cb, void *state)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (!visit_src(&entry->src, cb, state))
+         return false;
+   }
+
+   return true;
+}
+
+typedef struct {
+   void *state;
+   nir_foreach_src_cb cb;
+} visit_dest_indirect_state;
+
+static bool
+visit_dest_indirect(nir_dest *dest, void *_state)
+{
+   visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
+
+   if (!dest->is_ssa && dest->reg.indirect)
+      return state->cb(dest->reg.indirect, state->state);
+
+   return true;
+}
+
+bool
+nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_intrinsic:
+      if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_tex:
+      if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_call:
+      if (!visit_call_src(nir_instr_as_call(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_load_const:
+      if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_phi:
+      if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
+         return false;
+      break;
+   case nir_instr_type_parallel_copy:
+      if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
+                                   cb, state))
+         return false;
+      break;
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return true;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+
+   visit_dest_indirect_state dest_state;
+   dest_state.state = state;
+   dest_state.cb = cb;
+   return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
+}
+
+nir_const_value *
+nir_src_as_const_value(nir_src src)
+{
+   if (!src.is_ssa)
+      return NULL;
+
+   if (src.ssa->parent_instr->type != nir_instr_type_load_const)
+      return NULL;
+
+   nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
+
+   return &load->value;
+}
+
+bool
+nir_srcs_equal(nir_src src1, nir_src src2)
+{
+   if (src1.is_ssa) {
+      if (src2.is_ssa) {
+         return src1.ssa == src2.ssa;
+      } else {
+         return false;
+      }
+   } else {
+      if (src2.is_ssa) {
+         return false;
+      } else {
+         if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL))
+            return false;
+
+         if (src1.reg.indirect) {
+            if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect))
+               return false;
+         }
+
+         return src1.reg.reg == src2.reg.reg &&
+                src1.reg.base_offset == src2.reg.base_offset;
+      }
+   }
+}
+
+static bool
+src_does_not_use_def(nir_src *src, void *void_def)
+{
+   nir_ssa_def *def = void_def;
+
+   if (src->is_ssa) {
+      return src->ssa != def;
+   } else {
+      return true;
+   }
+}
+
+static bool
+src_does_not_use_reg(nir_src *src, void *void_reg)
+{
+   nir_register *reg = void_reg;
+
+   if (src->is_ssa) {
+      return true;
+   } else {
+      return src->reg.reg != reg;
+   }
+}
+
+void
+nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
+{
+   if (src->is_ssa) {
+      nir_ssa_def *old_ssa = src->ssa;
+      *src = new_src;
+      if (old_ssa && nir_foreach_src(instr, src_does_not_use_def, old_ssa)) {
+         struct set_entry *entry = _mesa_set_search(old_ssa->uses, instr);
+         assert(entry);
+         _mesa_set_remove(old_ssa->uses, entry);
+      }
+   } else {
+      if (src->reg.indirect)
+         nir_instr_rewrite_src(instr, src->reg.indirect, new_src);
+
+      nir_register *old_reg = src->reg.reg;
+      *src = new_src;
+      if (old_reg && nir_foreach_src(instr, src_does_not_use_reg, old_reg)) {
+         struct set_entry *entry = _mesa_set_search(old_reg->uses, instr);
+         assert(entry);
+         _mesa_set_remove(old_reg->uses, entry);
+      }
+   }
+
+   if (new_src.is_ssa) {
+      if (new_src.ssa)
+         _mesa_set_add(new_src.ssa->uses, instr);
+   } else {
+      if (new_src.reg.reg)
+         _mesa_set_add(new_src.reg.reg->uses, instr);
+   }
+}
+
+void
+nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                 unsigned num_components, const char *name)
+{
+   void *mem_ctx = ralloc_parent(instr);
+
+   def->name = name;
+   def->parent_instr = instr;
+   def->uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                _mesa_key_pointer_equal);
+   def->if_uses = _mesa_set_create(mem_ctx, _mesa_hash_pointer,
+                                   _mesa_key_pointer_equal);
+   def->num_components = num_components;
+
+   if (instr->block) {
+      nir_function_impl *impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+
+      def->index = impl->ssa_alloc++;
+   } else {
+      def->index = UINT_MAX;
+   }
+}
+
+void
+nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                 unsigned num_components, const char *name)
+{
+   dest->is_ssa = true;
+   nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+}
+
+struct ssa_def_rewrite_state {
+   void *mem_ctx;
+   nir_ssa_def *old;
+   nir_src new_src;
+};
+
+static bool
+ssa_def_rewrite_uses_src(nir_src *src, void *void_state)
+{
+   struct ssa_def_rewrite_state *state = void_state;
+
+   if (src->is_ssa && src->ssa == state->old)
+      nir_src_copy(src, &state->new_src, state->mem_ctx);
+
+   return true;
+}
+
+void
+nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
+{
+   struct ssa_def_rewrite_state state;
+   state.mem_ctx = mem_ctx;
+   state.old = def;
+   state.new_src = new_src;
+
+   assert(!new_src.is_ssa || def != new_src.ssa);
+
+   struct set *new_uses, *new_if_uses;
+   if (new_src.is_ssa) {
+      new_uses = new_src.ssa->uses;
+      new_if_uses = new_src.ssa->if_uses;
+   } else {
+      new_uses = new_src.reg.reg->uses;
+      new_if_uses = new_src.reg.reg->if_uses;
+   }
+
+   struct set_entry *entry;
+   set_foreach(def->uses, entry) {
+      nir_instr *instr = (nir_instr *)entry->key;
+
+      _mesa_set_remove(def->uses, entry);
+      nir_foreach_src(instr, ssa_def_rewrite_uses_src, &state);
+      _mesa_set_add(new_uses, instr);
+   }
+
+   set_foreach(def->if_uses, entry) {
+      nir_if *if_use = (nir_if *)entry->key;
+
+      _mesa_set_remove(def->if_uses, entry);
+      nir_src_copy(&if_use->condition, &new_src, mem_ctx);
+      _mesa_set_add(new_if_uses, if_use);
+   }
+}
+
+
+static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                            bool reverse, void *state);
+
+static inline bool
+foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+   if (reverse) {
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+                                      &if_stmt->else_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+                                      &if_stmt->then_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   } else {
+      foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+
+      foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static inline bool
+foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
+{
+   if (reverse) {
+      foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   } else {
+      foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+         if (!foreach_cf_node(node, cb, reverse, state))
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
+                bool reverse, void *state)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+      return cb(nir_cf_node_as_block(node), state);
+   case nir_cf_node_if:
+      return foreach_if(nir_cf_node_as_if(node), cb, reverse, state);
+   case nir_cf_node_loop:
+      return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state);
+      break;
+
+   default:
+      unreachable("Invalid CFG node type");
+      break;
+   }
+
+   return false;
+}
+
+bool
+nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state)
+{
+   foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) {
+      if (!foreach_cf_node(node, cb, false, state))
+         return false;
+   }
+
+   return cb(impl->end_block, state);
+}
+
+bool
+nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                          void *state)
+{
+   if (!cb(impl->end_block, state))
+      return false;
+
+   foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) {
+      if (!foreach_cf_node(node, cb, true, state))
+         return false;
+   }
+
+   return true;
+}
+
+nir_if *
+nir_block_get_following_if(nir_block *block)
+{
+   if (exec_node_is_tail_sentinel(&block->cf_node.node))
+      return NULL;
+
+   if (nir_cf_node_is_last(&block->cf_node))
+      return NULL;
+
+   nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
+
+   if (next_node->type != nir_cf_node_if)
+      return NULL;
+
+   return nir_cf_node_as_if(next_node);
+}
+
+static bool
+index_block(nir_block *block, void *state)
+{
+   unsigned *index = state;
+   block->index = (*index)++;
+   return true;
+}
+
+void
+nir_index_blocks(nir_function_impl *impl)
+{
+   unsigned index = 0;
+
+   if (impl->valid_metadata & nir_metadata_block_index)
+      return;
+
+   nir_foreach_block(impl, index_block, &index);
+
+   impl->num_blocks = index;
+}
+
+static bool
+index_ssa_def_cb(nir_ssa_def *def, void *state)
+{
+   unsigned *index = (unsigned *) state;
+   def->index = (*index)++;
+
+   return true;
+}
+
+static bool
+index_ssa_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, index_ssa_def_cb, state);
+
+   return true;
+}
+
+void
+nir_index_ssa_defs(nir_function_impl *impl)
+{
+   unsigned index = 0;
+   nir_foreach_block(impl, index_ssa_block, &index);
+   impl->ssa_alloc = index;
+}
diff --git a/mesalib/src/glsl/nir/nir.h b/mesalib/src/glsl/nir/nir.h
new file mode 100644
index 000000000..d74caa959
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir.h
@@ -0,0 +1,1618 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#pragma once
+
+#include "util/hash_table.h"
+#include "../list.h"
+#include "GL/gl.h" /* GLenum */
+#include "util/ralloc.h"
+#include "util/set.h"
+#include "util/bitset.h"
+#include "nir_types.h"
+#include <stdio.h>
+
+#include "nir_opcodes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct gl_program;
+struct gl_shader_program;
+
+#define NIR_FALSE 0u
+#define NIR_TRUE (~0u)
+
+/** Defines a cast function
+ *
+ * This macro defines a cast function from in_type to out_type where
+ * out_type is some structure type that contains a field of type out_type.
+ *
+ * Note that you have to be a bit careful as the generated cast function
+ * destroys constness.
+ */
+#define NIR_DEFINE_CAST(name, in_type, out_type, field)  \
+static inline out_type *                                 \
+name(const in_type *parent)                              \
+{                                                        \
+   return exec_node_data(out_type, parent, field);       \
+}
+
+struct nir_function_overload;
+struct nir_function;
+struct nir_shader;
+
+
+/**
+ * Description of built-in state associated with a uniform
+ *
+ * \sa nir_variable::state_slots
+ */
+typedef struct {
+   int tokens[5];
+   int swizzle;
+} nir_state_slot;
+
+typedef enum {
+   nir_var_shader_in,
+   nir_var_shader_out,
+   nir_var_global,
+   nir_var_local,
+   nir_var_uniform,
+   nir_var_system_value
+} nir_variable_mode;
+
+/**
+ * Data stored in an nir_constant
+ */
+union nir_constant_data {
+   unsigned u[16];
+   int i[16];
+   float f[16];
+   bool b[16];
+};
+
+typedef struct nir_constant {
+   /**
+    * Value of the constant.
+    *
+    * The field used to back the values supplied by the constant is determined
+    * by the type associated with the \c nir_variable.  Constants may be
+    * scalars, vectors, or matrices.
+    */
+   union nir_constant_data value;
+
+   /* Array elements / Structure Fields */
+   struct nir_constant **elements;
+} nir_constant;
+
+/**
+ * \brief Layout qualifiers for gl_FragDepth.
+ *
+ * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
+ * with a layout qualifier.
+ */
+typedef enum {
+    nir_depth_layout_none, /**< No depth layout is specified. */
+    nir_depth_layout_any,
+    nir_depth_layout_greater,
+    nir_depth_layout_less,
+    nir_depth_layout_unchanged
+} nir_depth_layout;
+
+/**
+ * Either a uniform, global variable, shader input, or shader output. Based on
+ * ir_variable - it should be easy to translate between the two.
+ */
+
+typedef struct {
+   struct exec_node node;
+
+   /**
+    * Declared type of the variable
+    */
+   const struct glsl_type *type;
+
+   /**
+    * Declared name of the variable
+    */
+   char *name;
+
+   /**
+    * For variables which satisfy the is_interface_instance() predicate, this
+    * points to an array of integers such that if the ith member of the
+    * interface block is an array, max_ifc_array_access[i] is the maximum
+    * array element of that member that has been accessed.  If the ith member
+    * of the interface block is not an array, max_ifc_array_access[i] is
+    * unused.
+    *
+    * For variables whose type is not an interface block, this pointer is
+    * NULL.
+    */
+   unsigned *max_ifc_array_access;
+
+   struct nir_variable_data {
+
+      /**
+       * Is the variable read-only?
+       *
+       * This is set for variables declared as \c const, shader inputs,
+       * and uniforms.
+       */
+      unsigned read_only:1;
+      unsigned centroid:1;
+      unsigned sample:1;
+      unsigned invariant:1;
+
+      /**
+       * Storage class of the variable.
+       *
+       * \sa nir_variable_mode
+       */
+      nir_variable_mode mode:4;
+
+      /**
+       * Interpolation mode for shader inputs / outputs
+       *
+       * \sa glsl_interp_qualifier
+       */
+      unsigned interpolation:2;
+
+      /**
+       * \name ARB_fragment_coord_conventions
+       * @{
+       */
+      unsigned origin_upper_left:1;
+      unsigned pixel_center_integer:1;
+      /*@}*/
+
+      /**
+       * Was the location explicitly set in the shader?
+       *
+       * If the location is explicitly set in the shader, it \b cannot be changed
+       * by the linker or by the API (e.g., calls to \c glBindAttribLocation have
+       * no effect).
+       */
+      unsigned explicit_location:1;
+      unsigned explicit_index:1;
+
+      /**
+       * Was an initial binding explicitly set in the shader?
+       *
+       * If so, constant_initializer contains an integer nir_constant
+       * representing the initial binding point.
+       */
+      unsigned explicit_binding:1;
+
+      /**
+       * Does this variable have an initializer?
+       *
+       * This is used by the linker to cross-validiate initializers of global
+       * variables.
+       */
+      unsigned has_initializer:1;
+
+      /**
+       * Is this variable a generic output or input that has not yet been matched
+       * up to a variable in another stage of the pipeline?
+       *
+       * This is used by the linker as scratch storage while assigning locations
+       * to generic inputs and outputs.
+       */
+      unsigned is_unmatched_generic_inout:1;
+
+      /**
+       * If non-zero, then this variable may be packed along with other variables
+       * into a single varying slot, so this offset should be applied when
+       * accessing components.  For example, an offset of 1 means that the x
+       * component of this variable is actually stored in component y of the
+       * location specified by \c location.
+       */
+      unsigned location_frac:2;
+
+      /**
+       * Non-zero if this variable was created by lowering a named interface
+       * block which was not an array.
+       *
+       * Note that this variable and \c from_named_ifc_block_array will never
+       * both be non-zero.
+       */
+      unsigned from_named_ifc_block_nonarray:1;
+
+      /**
+       * Non-zero if this variable was created by lowering a named interface
+       * block which was an array.
+       *
+       * Note that this variable and \c from_named_ifc_block_nonarray will never
+       * both be non-zero.
+       */
+      unsigned from_named_ifc_block_array:1;
+
+      /**
+       * \brief Layout qualifier for gl_FragDepth.
+       *
+       * This is not equal to \c ir_depth_layout_none if and only if this
+       * variable is \c gl_FragDepth and a layout qualifier is specified.
+       */
+      nir_depth_layout depth_layout;
+
+      /**
+       * Storage location of the base of this variable
+       *
+       * The precise meaning of this field depends on the nature of the variable.
+       *
+       *   - Vertex shader input: one of the values from \c gl_vert_attrib.
+       *   - Vertex shader output: one of the values from \c gl_varying_slot.
+       *   - Geometry shader input: one of the values from \c gl_varying_slot.
+       *   - Geometry shader output: one of the values from \c gl_varying_slot.
+       *   - Fragment shader input: one of the values from \c gl_varying_slot.
+       *   - Fragment shader output: one of the values from \c gl_frag_result.
+       *   - Uniforms: Per-stage uniform slot number for default uniform block.
+       *   - Uniforms: Index within the uniform block definition for UBO members.
+       *   - Other: This field is not currently used.
+       *
+       * If the variable is a uniform, shader input, or shader output, and the
+       * slot has not been assigned, the value will be -1.
+       */
+      int location;
+
+      /**
+       * The actual location of the variable in the IR. Only valid for inputs
+       * and outputs.
+       */
+      unsigned int driver_location;
+
+      /**
+       * output index for dual source blending.
+       */
+      int index;
+
+      /**
+       * Initial binding point for a sampler or UBO.
+       *
+       * For array types, this represents the binding point for the first element.
+       */
+      int binding;
+
+      /**
+       * Location an atomic counter is stored at.
+       */
+      struct {
+         unsigned buffer_index;
+         unsigned offset;
+      } atomic;
+
+      /**
+       * ARB_shader_image_load_store qualifiers.
+       */
+      struct {
+         bool read_only; /**< "readonly" qualifier. */
+         bool write_only; /**< "writeonly" qualifier. */
+         bool coherent;
+         bool _volatile;
+         bool restrict_flag;
+
+         /** Image internal format if specified explicitly, otherwise GL_NONE. */
+         GLenum format;
+      } image;
+
+      /**
+       * Highest element accessed with a constant expression array index
+       *
+       * Not used for non-array variables.
+       */
+      unsigned max_array_access;
+
+   } data;
+
+   /**
+    * Built-in state that backs this uniform
+    *
+    * Once set at variable creation, \c state_slots must remain invariant.
+    * This is because, ideally, this array would be shared by all clones of
+    * this variable in the IR tree.  In other words, we'd really like for it
+    * to be a fly-weight.
+    *
+    * If the variable is not a uniform, \c num_state_slots will be zero and
+    * \c state_slots will be \c NULL.
+    */
+   /*@{*/
+   unsigned num_state_slots;    /**< Number of state slots used */
+   nir_state_slot *state_slots;  /**< State descriptors. */
+   /*@}*/
+
+   /**
+    * Constant expression assigned in the initializer of the variable
+    */
+   nir_constant *constant_initializer;
+
+   /**
+    * For variables that are in an interface block or are an instance of an
+    * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
+    *
+    * \sa ir_variable::location
+    */
+   const struct glsl_type *interface_type;
+} nir_variable;
+
+typedef struct {
+   struct exec_node node;
+
+   unsigned num_components; /** < number of vector components */
+   unsigned num_array_elems; /** < size of array (0 for no array) */
+
+   /** generic register index. */
+   unsigned index;
+
+   /** only for debug purposes, can be NULL */
+   const char *name;
+
+   /** whether this register is local (per-function) or global (per-shader) */
+   bool is_global;
+
+   /**
+    * If this flag is set to true, then accessing channels >= num_components
+    * is well-defined, and simply spills over to the next array element. This
+    * is useful for backends that can do per-component accessing, in
+    * particular scalar backends. By setting this flag and making
+    * num_components equal to 1, structures can be packed tightly into
+    * registers and then registers can be accessed per-component to get to
+    * each structure member, even if it crosses vec4 boundaries.
+    */
+   bool is_packed;
+
+   /** set of nir_instr's where this register is used (read from) */
+   struct set *uses;
+
+   /** set of nir_instr's where this register is defined (written to) */
+   struct set *defs;
+
+   /** set of nir_if's where this register is used as a condition */
+   struct set *if_uses;
+} nir_register;
+
+typedef enum {
+   nir_instr_type_alu,
+   nir_instr_type_call,
+   nir_instr_type_tex,
+   nir_instr_type_intrinsic,
+   nir_instr_type_load_const,
+   nir_instr_type_jump,
+   nir_instr_type_ssa_undef,
+   nir_instr_type_phi,
+   nir_instr_type_parallel_copy,
+} nir_instr_type;
+
+typedef struct {
+   struct exec_node node;
+   nir_instr_type type;
+   struct nir_block *block;
+
+   /* A temporary for optimization and analysis passes to use for storing
+    * flags.  For instance, DCE uses this to store the "dead/live" info.
+    */
+   uint8_t pass_flags;
+} nir_instr;
+
+static inline nir_instr *
+nir_instr_next(nir_instr *instr)
+{
+   struct exec_node *next = exec_node_get_next(&instr->node);
+   if (exec_node_is_tail_sentinel(next))
+      return NULL;
+   else
+      return exec_node_data(nir_instr, next, node);
+}
+
+static inline nir_instr *
+nir_instr_prev(nir_instr *instr)
+{
+   struct exec_node *prev = exec_node_get_prev(&instr->node);
+   if (exec_node_is_head_sentinel(prev))
+      return NULL;
+   else
+      return exec_node_data(nir_instr, prev, node);
+}
+
+typedef struct {
+   /** for debugging only, can be NULL */
+   const char* name;
+
+   /** generic SSA definition index. */
+   unsigned index;
+
+   /** Index into the live_in and live_out bitfields */
+   unsigned live_index;
+
+   nir_instr *parent_instr;
+
+   /** set of nir_instr's where this register is used (read from) */
+   struct set *uses;
+
+   /** set of nir_if's where this register is used as a condition */
+   struct set *if_uses;
+
+   uint8_t num_components;
+} nir_ssa_def;
+
+struct nir_src;
+
+typedef struct {
+   nir_register *reg;
+   struct nir_src *indirect; /** < NULL for no indirect offset */
+   unsigned base_offset;
+
+   /* TODO use-def chain goes here */
+} nir_reg_src;
+
+typedef struct {
+   nir_register *reg;
+   struct nir_src *indirect; /** < NULL for no indirect offset */
+   unsigned base_offset;
+
+   /* TODO def-use chain goes here */
+} nir_reg_dest;
+
+typedef struct nir_src {
+   union {
+      nir_reg_src reg;
+      nir_ssa_def *ssa;
+   };
+
+   bool is_ssa;
+} nir_src;
+
+typedef struct {
+   union {
+      nir_reg_dest reg;
+      nir_ssa_def ssa;
+   };
+
+   bool is_ssa;
+} nir_dest;
+
+static inline nir_src
+nir_src_for_ssa(nir_ssa_def *def)
+{
+   nir_src src;
+
+   src.is_ssa = true;
+   src.ssa = def;
+
+   return src;
+}
+
+static inline nir_src
+nir_src_for_reg(nir_register *reg)
+{
+   nir_src src;
+
+   src.is_ssa = false;
+   src.reg.reg = reg;
+   src.reg.indirect = NULL;
+   src.reg.base_offset = 0;
+
+   return src;
+}
+
+static inline nir_dest
+nir_dest_for_reg(nir_register *reg)
+{
+   nir_dest dest;
+
+   dest.is_ssa = false;
+   dest.reg.reg = reg;
+   dest.reg.indirect = NULL;
+   dest.reg.base_offset = 0;
+
+   return dest;
+}
+
+void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+
+typedef struct {
+   nir_src src;
+
+   /**
+    * \name input modifiers
+    */
+   /*@{*/
+   /**
+    * For inputs interpreted as floating point, flips the sign bit. For
+    * inputs interpreted as integers, performs the two's complement negation.
+    */
+   bool negate;
+
+   /**
+    * Clears the sign bit for floating point values, and computes the integer
+    * absolute value for integers. Note that the negate modifier acts after
+    * the absolute value modifier, therefore if both are set then all inputs
+    * will become negative.
+    */
+   bool abs;
+   /*@}*/
+
+   /**
+    * For each input component, says which component of the register it is
+    * chosen from. Note that which elements of the swizzle are used and which
+    * are ignored are based on the write mask for most opcodes - for example,
+    * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
+    * a swizzle of {2, x, 1, 0} where x means "don't care."
+    */
+   uint8_t swizzle[4];
+} nir_alu_src;
+
+typedef struct {
+   nir_dest dest;
+
+   /**
+    * \name saturate output modifier
+    *
+    * Only valid for opcodes that output floating-point numbers. Clamps the
+    * output to between 0.0 and 1.0 inclusive.
+    */
+
+   bool saturate;
+
+   unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
+} nir_alu_dest;
+
+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                       void *mem_ctx);
+
+typedef enum {
+   nir_type_invalid = 0, /* Not a valid type */
+   nir_type_float,
+   nir_type_int,
+   nir_type_unsigned,
+   nir_type_bool
+} nir_alu_type;
+
+typedef enum {
+   NIR_OP_IS_COMMUTATIVE = (1 << 0),
+   NIR_OP_IS_ASSOCIATIVE = (1 << 1),
+} nir_op_algebraic_property;
+
+typedef struct {
+   const char *name;
+
+   unsigned num_inputs;
+
+   /**
+    * The number of components in the output
+    *
+    * If non-zero, this is the size of the output and input sizes are
+    * explicitly given; swizzle and writemask are still in effect, but if
+    * the output component is masked out, then the input component may
+    * still be in use.
+    *
+    * If zero, the opcode acts in the standard, per-component manner; the
+    * operation is performed on each component (except the ones that are
+    * masked out) with the input being taken from the input swizzle for
+    * that component.
+    *
+    * The size of some of the inputs may be given (i.e. non-zero) even
+    * though output_size is zero; in that case, the inputs with a zero
+    * size act per-component, while the inputs with non-zero size don't.
+    */
+   unsigned output_size;
+
+   /**
+    * The type of vector that the instruction outputs. Note that the
+    * staurate modifier is only allowed on outputs with the float type.
+    */
+
+   nir_alu_type output_type;
+
+   /**
+    * The number of components in each input
+    */
+   unsigned input_sizes[4];
+
+   /**
+    * The type of vector that each input takes. Note that negate and
+    * absolute value are only allowed on inputs with int or float type and
+    * behave differently on the two.
+    */
+   nir_alu_type input_types[4];
+
+   nir_op_algebraic_property algebraic_properties;
+} nir_op_info;
+
+extern const nir_op_info nir_op_infos[nir_num_opcodes];
+
+typedef struct nir_alu_instr {
+   nir_instr instr;
+   nir_op op;
+   nir_alu_dest dest;
+   nir_alu_src src[];
+} nir_alu_instr;
+
+/* is this source channel used? */
+static inline bool
+nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
+{
+   if (nir_op_infos[instr->op].input_sizes[src] > 0)
+      return channel < nir_op_infos[instr->op].input_sizes[src];
+
+   return (instr->dest.write_mask >> channel) & 1;
+}
+
+/*
+ * For instructions whose destinations are SSA, get the number of channels
+ * used for a source
+ */
+static inline unsigned
+nir_ssa_alu_instr_src_components(nir_alu_instr *instr, unsigned src)
+{
+   assert(instr->dest.dest.is_ssa);
+
+   if (nir_op_infos[instr->op].input_sizes[src] > 0)
+      return nir_op_infos[instr->op].input_sizes[src];
+
+   return instr->dest.dest.ssa.num_components;
+}
+
+typedef enum {
+   nir_deref_type_var,
+   nir_deref_type_array,
+   nir_deref_type_struct
+} nir_deref_type;
+
+typedef struct nir_deref {
+   nir_deref_type deref_type;
+   struct nir_deref *child;
+   const struct glsl_type *type;
+} nir_deref;
+
+typedef struct {
+   nir_deref deref;
+
+   nir_variable *var;
+} nir_deref_var;
+
+/* This enum describes how the array is referenced.  If the deref is
+ * direct then the base_offset is used.  If the deref is indirect then then
+ * offset is given by base_offset + indirect.  If the deref is a wildcard
+ * then the deref refers to all of the elements of the array at the same
+ * time.  Wildcard dereferences are only ever allowed in copy_var
+ * intrinsics and the source and destination derefs must have matching
+ * wildcards.
+ */
+typedef enum {
+   nir_deref_array_type_direct,
+   nir_deref_array_type_indirect,
+   nir_deref_array_type_wildcard,
+} nir_deref_array_type;
+
+typedef struct {
+   nir_deref deref;
+
+   nir_deref_array_type deref_array_type;
+   unsigned base_offset;
+   nir_src indirect;
+} nir_deref_array;
+
+typedef struct {
+   nir_deref deref;
+
+   unsigned index;
+} nir_deref_struct;
+
+NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
+NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
+NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
+
+typedef struct {
+   nir_instr instr;
+
+   unsigned num_params;
+   nir_deref_var **params;
+   nir_deref_var *return_deref;
+
+   struct nir_function_overload *callee;
+} nir_call_instr;
+
+#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \
+                  num_variables, num_indices, flags) \
+   nir_intrinsic_##name,
+
+#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name,
+
+typedef enum {
+#include "nir_intrinsics.h"
+   nir_num_intrinsics = nir_last_intrinsic + 1
+} nir_intrinsic_op;
+
+#undef INTRINSIC
+#undef LAST_INTRINSIC
+
+/** Represents an intrinsic
+ *
+ * An intrinsic is an instruction type for handling things that are
+ * more-or-less regular operations but don't just consume and produce SSA
+ * values like ALU operations do.  Intrinsics are not for things that have
+ * special semantic meaning such as phi nodes and parallel copies.
+ * Examples of intrinsics include variable load/store operations, system
+ * value loads, and the like.  Even though texturing more-or-less falls
+ * under this category, texturing is its own instruction type because
+ * trying to represent texturing with intrinsics would lead to a
+ * combinatorial explosion of intrinsic opcodes.
+ *
+ * By having a single instruction type for handling a lot of different
+ * cases, optimization passes can look for intrinsics and, for the most
+ * part, completely ignore them.  Each intrinsic type also has a few
+ * possible flags that govern whether or not they can be reordered or
+ * eliminated.  That way passes like dead code elimination can still work
+ * on intrisics without understanding the meaning of each.
+ *
+ * Each intrinsic has some number of constant indices, some number of
+ * variables, and some number of sources.  What these sources, variables,
+ * and indices mean depends on the intrinsic and is documented with the
+ * intrinsic declaration in nir_intrinsics.h.  Intrinsics and texture
+ * instructions are the only types of instruction that can operate on
+ * variables.
+ */
+typedef struct {
+   nir_instr instr;
+
+   nir_intrinsic_op intrinsic;
+
+   nir_dest dest;
+
+   /** number of components if this is a vectorized intrinsic
+    *
+    * Similarly to ALU operations, some intrinsics are vectorized.
+    * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
+    * For vectorized intrinsics, the num_components field specifies the
+    * number of destination components and the number of source components
+    * for all sources with nir_intrinsic_infos.src_components[i] == 0.
+    */
+   uint8_t num_components;
+
+   int const_index[3];
+
+   nir_deref_var *variables[2];
+
+   nir_src src[];
+} nir_intrinsic_instr;
+
+/**
+ * \name NIR intrinsics semantic flags
+ *
+ * information about what the compiler can do with the intrinsics.
+ *
+ * \sa nir_intrinsic_info::flags
+ */
+typedef enum {
+   /**
+    * whether the intrinsic can be safely eliminated if none of its output
+    * value is not being used.
+    */
+   NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
+
+   /**
+    * Whether the intrinsic can be reordered with respect to any other
+    * intrinsic, i.e. whether the only reordering dependencies of the
+    * intrinsic are due to the register reads/writes.
+    */
+   NIR_INTRINSIC_CAN_REORDER = (1 << 1),
+} nir_intrinsic_semantic_flag;
+
+#define NIR_INTRINSIC_MAX_INPUTS 4
+
+typedef struct {
+   const char *name;
+
+   unsigned num_srcs; /** < number of register/SSA inputs */
+
+   /** number of components of each input register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
+   unsigned src_components[NIR_INTRINSIC_MAX_INPUTS];
+
+   bool has_dest;
+
+   /** number of components of the output register
+    *
+    * If this value is 0, the number of components is given by the
+    * num_components field of nir_intrinsic_instr.
+    */
+   unsigned dest_components;
+
+   /** the number of inputs/outputs that are variables */
+   unsigned num_variables;
+
+   /** the number of constant indices used by the intrinsic */
+   unsigned num_indices;
+
+   /** semantic flags for calls to this intrinsic */
+   nir_intrinsic_semantic_flag flags;
+} nir_intrinsic_info;
+
+extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
+
+/**
+ * \group texture information
+ *
+ * This gives semantic information about textures which is useful to the
+ * frontend, the backend, and lowering passes, but not the optimizer.
+ */
+
+typedef enum {
+   nir_tex_src_coord,
+   nir_tex_src_projector,
+   nir_tex_src_comparitor, /* shadow comparitor */
+   nir_tex_src_offset,
+   nir_tex_src_bias,
+   nir_tex_src_lod,
+   nir_tex_src_ms_index, /* MSAA sample index */
+   nir_tex_src_ddx,
+   nir_tex_src_ddy,
+   nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
+   nir_num_tex_src_types
+} nir_tex_src_type;
+
+typedef struct {
+   nir_src src;
+   nir_tex_src_type src_type;
+} nir_tex_src;
+
+typedef enum {
+   nir_texop_tex,                /**< Regular texture look-up */
+   nir_texop_txb,                /**< Texture look-up with LOD bias */
+   nir_texop_txl,                /**< Texture look-up with explicit LOD */
+   nir_texop_txd,                /**< Texture look-up with partial derivatvies */
+   nir_texop_txf,                /**< Texel fetch with explicit LOD */
+   nir_texop_txf_ms,                /**< Multisample texture fetch */
+   nir_texop_txs,                /**< Texture size */
+   nir_texop_lod,                /**< Texture lod query */
+   nir_texop_tg4,                /**< Texture gather */
+   nir_texop_query_levels       /**< Texture levels query */
+} nir_texop;
+
+typedef struct {
+   nir_instr instr;
+
+   enum glsl_sampler_dim sampler_dim;
+   nir_alu_type dest_type;
+
+   nir_texop op;
+   nir_dest dest;
+   nir_tex_src *src;
+   unsigned num_srcs, coord_components;
+   bool is_array, is_shadow;
+
+   /**
+    * If is_shadow is true, whether this is the old-style shadow that outputs 4
+    * components or the new-style shadow that outputs 1 component.
+    */
+   bool is_new_style_shadow;
+
+   /* constant offset - must be 0 if the offset source is used */
+   int const_offset[4];
+
+   /* gather component selector */
+   unsigned component : 2;
+
+   /** The sampler index
+    *
+    * If this texture instruction has a nir_tex_src_sampler_offset source,
+    * then the sampler index is given by sampler_index + sampler_offset.
+    */
+   unsigned sampler_index;
+
+   /** The size of the sampler array or 0 if it's not an array */
+   unsigned sampler_array_size;
+
+   nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */
+} nir_tex_instr;
+
+static inline unsigned
+nir_tex_instr_dest_size(nir_tex_instr *instr)
+{
+   if (instr->op == nir_texop_txs) {
+      unsigned ret;
+      switch (instr->sampler_dim) {
+         case GLSL_SAMPLER_DIM_1D:
+         case GLSL_SAMPLER_DIM_BUF:
+            ret = 1;
+            break;
+         case GLSL_SAMPLER_DIM_2D:
+         case GLSL_SAMPLER_DIM_CUBE:
+         case GLSL_SAMPLER_DIM_MS:
+         case GLSL_SAMPLER_DIM_RECT:
+         case GLSL_SAMPLER_DIM_EXTERNAL:
+            ret = 2;
+            break;
+         case GLSL_SAMPLER_DIM_3D:
+            ret = 3;
+            break;
+         default:
+            unreachable("not reached");
+      }
+      if (instr->is_array)
+         ret++;
+      return ret;
+   }
+
+   if (instr->op == nir_texop_query_levels)
+      return 2;
+
+   if (instr->is_shadow && instr->is_new_style_shadow)
+      return 1;
+
+   return 4;
+}
+
+static inline unsigned
+nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
+{
+   if (instr->src[src].src_type == nir_tex_src_coord)
+      return instr->coord_components;
+
+
+   if (instr->src[src].src_type == nir_tex_src_offset ||
+       instr->src[src].src_type == nir_tex_src_ddx ||
+       instr->src[src].src_type == nir_tex_src_ddy) {
+      if (instr->is_array)
+         return instr->coord_components - 1;
+      else
+         return instr->coord_components;
+   }
+
+   return 1;
+}
+
+static inline int
+nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
+{
+   for (unsigned i = 0; i < instr->num_srcs; i++)
+      if (instr->src[i].src_type == type)
+         return (int) i;
+
+   return -1;
+}
+
+typedef struct {
+   union {
+      float f[4];
+      int32_t i[4];
+      uint32_t u[4];
+   };
+} nir_const_value;
+
+typedef struct {
+   nir_instr instr;
+
+   nir_const_value value;
+
+   nir_ssa_def def;
+} nir_load_const_instr;
+
+typedef enum {
+   nir_jump_return,
+   nir_jump_break,
+   nir_jump_continue,
+} nir_jump_type;
+
+typedef struct {
+   nir_instr instr;
+   nir_jump_type type;
+} nir_jump_instr;
+
+/* creates a new SSA variable in an undefined state */
+
+typedef struct {
+   nir_instr instr;
+   nir_ssa_def def;
+} nir_ssa_undef_instr;
+
+typedef struct {
+   struct exec_node node;
+
+   /* The predecessor block corresponding to this source */
+   struct nir_block *pred;
+
+   nir_src src;
+} nir_phi_src;
+
+#define nir_foreach_phi_src(phi, entry) \
+   foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs)
+
+typedef struct {
+   nir_instr instr;
+
+   struct exec_list srcs; /** < list of nir_phi_src */
+
+   nir_dest dest;
+} nir_phi_instr;
+
+typedef struct {
+   struct exec_node node;
+   nir_src src;
+   nir_dest dest;
+} nir_parallel_copy_entry;
+
+#define nir_foreach_parallel_copy_entry(pcopy, entry) \
+   foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
+
+typedef struct {
+   nir_instr instr;
+
+   /* A list of nir_parallel_copy_entry's.  The sources of all of the
+    * entries are copied to the corresponding destinations "in parallel".
+    * In other words, if we have two entries: a -> b and b -> a, the values
+    * get swapped.
+    */
+   struct exec_list entries;
+} nir_parallel_copy_instr;
+
+NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr)
+NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
+                nir_parallel_copy_instr, instr)
+
+/*
+ * Control flow
+ *
+ * Control flow consists of a tree of control flow nodes, which include
+ * if-statements and loops. The leaves of the tree are basic blocks, lists of
+ * instructions that always run start-to-finish. Each basic block also keeps
+ * track of its successors (blocks which may run immediately after the current
+ * block) and predecessors (blocks which could have run immediately before the
+ * current block). Each function also has a start block and an end block which
+ * all return statements point to (which is always empty). Together, all the
+ * blocks with their predecessors and successors make up the control flow
+ * graph (CFG) of the function. There are helpers that modify the tree of
+ * control flow nodes while modifying the CFG appropriately; these should be
+ * used instead of modifying the tree directly.
+ */
+
+typedef enum {
+   nir_cf_node_block,
+   nir_cf_node_if,
+   nir_cf_node_loop,
+   nir_cf_node_function
+} nir_cf_node_type;
+
+typedef struct nir_cf_node {
+   struct exec_node node;
+   nir_cf_node_type type;
+   struct nir_cf_node *parent;
+} nir_cf_node;
+
+typedef struct nir_block {
+   nir_cf_node cf_node;
+
+   struct exec_list instr_list; /** < list of nir_instr */
+
+   /** generic block index; generated by nir_index_blocks */
+   unsigned index;
+
+   /*
+    * Each block can only have up to 2 successors, so we put them in a simple
+    * array - no need for anything more complicated.
+    */
+   struct nir_block *successors[2];
+
+   /* Set of nir_block predecessors in the CFG */
+   struct set *predecessors;
+
+   /*
+    * this node's immediate dominator in the dominance tree - set to NULL for
+    * the start block.
+    */
+   struct nir_block *imm_dom;
+
+   /* This node's children in the dominance tree */
+   unsigned num_dom_children;
+   struct nir_block **dom_children;
+
+   /* Set of nir_block's on the dominance frontier of this block */
+   struct set *dom_frontier;
+
+   /*
+    * These two indices have the property that dom_{pre,post}_index for each
+    * child of this block in the dominance tree will always be between
+    * dom_pre_index and dom_post_index for this block, which makes testing if
+    * a given block is dominated by another block an O(1) operation.
+    */
+   unsigned dom_pre_index, dom_post_index;
+
+   /* live in and out for this block; used for liveness analysis */
+   BITSET_WORD *live_in;
+   BITSET_WORD *live_out;
+} nir_block;
+
+static inline nir_instr *
+nir_block_first_instr(nir_block *block)
+{
+   struct exec_node *head = exec_list_get_head(&block->instr_list);
+   return exec_node_data(nir_instr, head, node);
+}
+
+static inline nir_instr *
+nir_block_last_instr(nir_block *block)
+{
+   struct exec_node *tail = exec_list_get_tail(&block->instr_list);
+   return exec_node_data(nir_instr, tail, node);
+}
+
+#define nir_foreach_instr(block, instr) \
+   foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse(block, instr) \
+   foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_safe(block, instr) \
+   foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
+
+typedef struct {
+   nir_cf_node cf_node;
+   nir_src condition;
+
+   struct exec_list then_list; /** < list of nir_cf_node */
+   struct exec_list else_list; /** < list of nir_cf_node */
+} nir_if;
+
+static inline nir_cf_node *
+nir_if_first_then_node(nir_if *if_stmt)
+{
+   struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
+   return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_then_node(nir_if *if_stmt)
+{
+   struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
+   return exec_node_data(nir_cf_node, tail, node);
+}
+
+static inline nir_cf_node *
+nir_if_first_else_node(nir_if *if_stmt)
+{
+   struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
+   return exec_node_data(nir_cf_node, head, node);
+}
+
+static inline nir_cf_node *
+nir_if_last_else_node(nir_if *if_stmt)
+{
+   struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
+   return exec_node_data(nir_cf_node, tail, node);
+}
+
+typedef struct {
+   nir_cf_node cf_node;
+
+   struct exec_list body; /** < list of nir_cf_node */
+} nir_loop;
+
+static inline nir_cf_node *
+nir_loop_first_cf_node(nir_loop *loop)
+{
+   return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node);
+}
+
+static inline nir_cf_node *
+nir_loop_last_cf_node(nir_loop *loop)
+{
+   return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node);
+}
+
+/**
+ * Various bits of metadata that can may be created or required by
+ * optimization and analysis passes
+ */
+typedef enum {
+   nir_metadata_none = 0x0,
+   nir_metadata_block_index = 0x1,
+   nir_metadata_dominance = 0x2,
+   nir_metadata_live_variables = 0x4,
+} nir_metadata;
+
+typedef struct {
+   nir_cf_node cf_node;
+
+   /** pointer to the overload of which this is an implementation */
+   struct nir_function_overload *overload;
+
+   struct exec_list body; /** < list of nir_cf_node */
+
+   nir_block *start_block, *end_block;
+
+   /** list for all local variables in the function */
+   struct exec_list locals;
+
+   /** array of variables used as parameters */
+   unsigned num_params;
+   nir_variable **params;
+
+   /** variable used to hold the result of the function */
+   nir_variable *return_var;
+
+   /** list of local registers in the function */
+   struct exec_list registers;
+
+   /** next available local register index */
+   unsigned reg_alloc;
+
+   /** next available SSA value index */
+   unsigned ssa_alloc;
+
+   /* total number of basic blocks, only valid when block_index_dirty = false */
+   unsigned num_blocks;
+
+   nir_metadata valid_metadata;
+} nir_function_impl;
+
+static inline nir_cf_node *
+nir_cf_node_next(nir_cf_node *node)
+{
+   struct exec_node *next = exec_node_get_next(&node->node);
+   if (exec_node_is_tail_sentinel(next))
+      return NULL;
+   else
+      return exec_node_data(nir_cf_node, next, node);
+}
+
+static inline nir_cf_node *
+nir_cf_node_prev(nir_cf_node *node)
+{
+   struct exec_node *prev = exec_node_get_prev(&node->node);
+   if (exec_node_is_head_sentinel(prev))
+      return NULL;
+   else
+      return exec_node_data(nir_cf_node, prev, node);
+}
+
+static inline bool
+nir_cf_node_is_first(const nir_cf_node *node)
+{
+   return exec_node_is_head_sentinel(node->node.prev);
+}
+
+static inline bool
+nir_cf_node_is_last(const nir_cf_node *node)
+{
+   return exec_node_is_tail_sentinel(node->node.next);
+}
+
+NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node)
+NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node)
+
+typedef enum {
+   nir_parameter_in,
+   nir_parameter_out,
+   nir_parameter_inout,
+} nir_parameter_type;
+
+typedef struct {
+   nir_parameter_type param_type;
+   const struct glsl_type *type;
+} nir_parameter;
+
+typedef struct nir_function_overload {
+   struct exec_node node;
+
+   unsigned num_params;
+   nir_parameter *params;
+   const struct glsl_type *return_type;
+
+   nir_function_impl *impl; /** < NULL if the overload is only declared yet */
+
+   /** pointer to the function of which this is an overload */
+   struct nir_function *function;
+} nir_function_overload;
+
+typedef struct nir_function {
+   struct exec_node node;
+
+   struct exec_list overload_list; /** < list of nir_function_overload */
+   const char *name;
+   struct nir_shader *shader;
+} nir_function;
+
+#define nir_function_first_overload(func) \
+   exec_node_data(nir_function_overload, \
+                  exec_list_get_head(&(func)->overload_list), node)
+
+typedef struct nir_shader_compiler_options {
+   bool lower_ffma;
+   bool lower_fpow;
+   bool lower_fsat;
+   bool lower_fsqrt;
+   /** lowers fneg and ineg to fsub and isub. */
+   bool lower_negate;
+} nir_shader_compiler_options;
+
+typedef struct nir_shader {
+   /** hash table of name -> uniform nir_variable */
+   struct hash_table *uniforms;
+
+   /** hash table of name -> input nir_variable */
+   struct hash_table *inputs;
+
+   /** hash table of name -> output nir_variable */
+   struct hash_table *outputs;
+
+   /** Set of driver-specific options for the shader.
+    *
+    * The memory for the options is expected to be kept in a single static
+    * copy by the driver.
+    */
+   const struct nir_shader_compiler_options *options;
+
+   /** list of global variables in the shader */
+   struct exec_list globals;
+
+   /** list of system value variables in the shader */
+   struct exec_list system_values;
+
+   struct exec_list functions; /** < list of nir_function */
+
+   /** list of global register in the shader */
+   struct exec_list registers;
+
+   /** structures used in this shader */
+   unsigned num_user_structures;
+   struct glsl_type **user_structures;
+
+   /** next available global register index */
+   unsigned reg_alloc;
+
+   /**
+    * the highest index a load_input_*, load_uniform_*, etc. intrinsic can
+    * access plus one
+    */
+   unsigned num_inputs, num_uniforms, num_outputs;
+} nir_shader;
+
+#define nir_foreach_overload(shader, overload)                        \
+   foreach_list_typed(nir_function, func, node, &(shader)->functions) \
+      foreach_list_typed(nir_function_overload, overload, node, \
+                         &(func)->overload_list)
+
+nir_shader *nir_shader_create(void *mem_ctx,
+                              const nir_shader_compiler_options *options);
+
+/** creates a register, including assigning it an index and adding it to the list */
+nir_register *nir_global_reg_create(nir_shader *shader);
+
+nir_register *nir_local_reg_create(nir_function_impl *impl);
+
+void nir_reg_remove(nir_register *reg);
+
+/** creates a function and adds it to the shader's list of functions */
+nir_function *nir_function_create(nir_shader *shader, const char *name);
+
+/** creates a null function returning null */
+nir_function_overload *nir_function_overload_create(nir_function *func);
+
+nir_function_impl *nir_function_impl_create(nir_function_overload *func);
+
+nir_block *nir_block_create(void *mem_ctx);
+nir_if *nir_if_create(void *mem_ctx);
+nir_loop *nir_loop_create(void *mem_ctx);
+
+nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
+
+/** puts a control flow node immediately after another control flow node */
+void nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after);
+
+/** puts a control flow node immediately before another control flow node */
+void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before);
+
+/** puts a control flow node at the beginning of a list from an if, loop, or function */
+void nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node);
+
+/** puts a control flow node at the end of a list from an if, loop, or function */
+void nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node);
+
+/** removes a control flow node, doing any cleanup necessary */
+void nir_cf_node_remove(nir_cf_node *node);
+
+/** requests that the given pieces of metadata be generated */
+void nir_metadata_require(nir_function_impl *impl, nir_metadata required);
+/** dirties all but the preserved metadata */
+void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
+
+/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
+nir_alu_instr *nir_alu_instr_create(void *mem_ctx, nir_op op);
+
+nir_jump_instr *nir_jump_instr_create(void *mem_ctx, nir_jump_type type);
+
+nir_load_const_instr *nir_load_const_instr_create(void *mem_ctx,
+                                                  unsigned num_components);
+
+nir_intrinsic_instr *nir_intrinsic_instr_create(void *mem_ctx,
+                                                nir_intrinsic_op op);
+
+nir_call_instr *nir_call_instr_create(void *mem_ctx,
+                                      nir_function_overload *callee);
+
+nir_tex_instr *nir_tex_instr_create(void *mem_ctx, unsigned num_srcs);
+
+nir_phi_instr *nir_phi_instr_create(void *mem_ctx);
+
+nir_parallel_copy_instr *nir_parallel_copy_instr_create(void *mem_ctx);
+
+nir_ssa_undef_instr *nir_ssa_undef_instr_create(void *mem_ctx,
+                                                unsigned num_components);
+
+nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var);
+nir_deref_array *nir_deref_array_create(void *mem_ctx);
+nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);
+
+nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);
+
+void nir_instr_insert_before(nir_instr *instr, nir_instr *before);
+void nir_instr_insert_after(nir_instr *instr, nir_instr *after);
+
+void nir_instr_insert_before_block(nir_block *block, nir_instr *before);
+void nir_instr_insert_after_block(nir_block *block, nir_instr *after);
+
+void nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before);
+void nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after);
+
+void nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before);
+void nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after);
+
+void nir_instr_remove(nir_instr *instr);
+
+typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
+typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
+typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
+bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
+                         void *state);
+bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
+bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
+
+nir_const_value *nir_src_as_const_value(nir_src src);
+bool nir_srcs_equal(nir_src src1, nir_src src2);
+void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
+
+void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
+                       unsigned num_components, const char *name);
+void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
+                      unsigned num_components, const char *name);
+void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx);
+
+/* visits basic blocks in source-code order */
+typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
+bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb,
+                       void *state);
+bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
+                               void *state);
+
+/* If the following CF node is an if, this function returns that if.
+ * Otherwise, it returns NULL.
+ */
+nir_if *nir_block_get_following_if(nir_block *block);
+
+void nir_index_local_regs(nir_function_impl *impl);
+void nir_index_global_regs(nir_shader *shader);
+void nir_index_ssa_defs(nir_function_impl *impl);
+
+void nir_index_blocks(nir_function_impl *impl);
+
+void nir_print_shader(nir_shader *shader, FILE *fp);
+void nir_print_instr(const nir_instr *instr, FILE *fp);
+
+#ifdef DEBUG
+void nir_validate_shader(nir_shader *shader);
+#else
+static inline void nir_validate_shader(nir_shader *shader) { }
+#endif /* DEBUG */
+
+void nir_calc_dominance_impl(nir_function_impl *impl);
+void nir_calc_dominance(nir_shader *shader);
+
+nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
+bool nir_block_dominates(nir_block *parent, nir_block *child);
+
+void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
+
+void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
+
+void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
+void nir_dump_cfg(nir_shader *shader, FILE *fp);
+
+void nir_split_var_copies(nir_shader *shader);
+
+void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
+void nir_lower_var_copies(nir_shader *shader);
+
+void nir_lower_global_vars_to_local(nir_shader *shader);
+
+void nir_lower_locals_to_regs(nir_shader *shader);
+
+void nir_lower_io(nir_shader *shader);
+
+void nir_lower_vars_to_ssa(nir_shader *shader);
+
+void nir_remove_dead_variables(nir_shader *shader);
+
+void nir_lower_vec_to_movs(nir_shader *shader);
+void nir_lower_alu_to_scalar(nir_shader *shader);
+
+void nir_lower_phis_to_scalar(nir_shader *shader);
+
+void nir_lower_samplers(nir_shader *shader,
+                        struct gl_shader_program *shader_program,
+                        struct gl_program *prog);
+
+void nir_lower_system_values(nir_shader *shader);
+
+void nir_lower_atomics(nir_shader *shader);
+void nir_lower_to_source_mods(nir_shader *shader);
+
+void nir_live_variables_impl(nir_function_impl *impl);
+bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
+
+void nir_convert_to_ssa_impl(nir_function_impl *impl);
+void nir_convert_to_ssa(nir_shader *shader);
+void nir_convert_from_ssa(nir_shader *shader);
+
+bool nir_opt_algebraic(nir_shader *shader);
+bool nir_opt_constant_folding(nir_shader *shader);
+
+bool nir_opt_global_to_local(nir_shader *shader);
+
+bool nir_copy_prop_impl(nir_function_impl *impl);
+bool nir_copy_prop(nir_shader *shader);
+
+bool nir_opt_cse(nir_shader *shader);
+
+bool nir_opt_dce_impl(nir_function_impl *impl);
+bool nir_opt_dce(nir_shader *shader);
+
+void nir_opt_gcm(nir_shader *shader);
+
+bool nir_opt_peephole_select(nir_shader *shader);
+bool nir_opt_peephole_ffma(nir_shader *shader);
+
+bool nir_opt_remove_phis(nir_shader *shader);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/mesalib/src/glsl/nir/nir_algebraic.py b/mesalib/src/glsl/nir/nir_algebraic.py
new file mode 100644
index 000000000..afab1a008
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_algebraic.py
@@ -0,0 +1,300 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Jason Ekstrand (jason@jlekstrand.net)
+
+import itertools
+import struct
+import sys
+import mako.template
+import re
+
+# Represents a set of variables, each with a unique id
+class VarSet(object):
+   def __init__(self):
+      self.names = {}
+      self.ids = itertools.count()
+      self.immutable = False;
+
+   def __getitem__(self, name):
+      if name not in self.names:
+         assert not self.immutable, "Unknown replacement variable: " + name
+         self.names[name] = self.ids.next()
+
+      return self.names[name]
+
+   def lock(self):
+      self.immutable = True
+
+class Value(object):
+   @staticmethod
+   def create(val, name_base, varset):
+      if isinstance(val, tuple):
+         return Expression(val, name_base, varset)
+      elif isinstance(val, Expression):
+         return val
+      elif isinstance(val, (str, unicode)):
+         return Variable(val, name_base, varset)
+      elif isinstance(val, (bool, int, long, float)):
+         return Constant(val, name_base)
+
+   __template = mako.template.Template("""
+static const ${val.c_type} ${val.name} = {
+   { ${val.type_enum} },
+% if isinstance(val, Constant):
+   { ${hex(val)} /* ${val.value} */ },
+% elif isinstance(val, Variable):
+   ${val.index}, /* ${val.var_name} */
+   ${'true' if val.is_constant else 'false'},
+   nir_type_${ val.required_type or 'invalid' },
+% elif isinstance(val, Expression):
+   nir_op_${val.opcode},
+   { ${', '.join(src.c_ptr for src in val.sources)} },
+% endif
+};""")
+
+   def __init__(self, name, type_str):
+      self.name = name
+      self.type_str = type_str
+
+   @property
+   def type_enum(self):
+      return "nir_search_value_" + self.type_str
+
+   @property
+   def c_type(self):
+      return "nir_search_" + self.type_str
+
+   @property
+   def c_ptr(self):
+      return "&{0}.value".format(self.name)
+
+   def render(self):
+      return self.__template.render(val=self,
+                                    Constant=Constant,
+                                    Variable=Variable,
+                                    Expression=Expression)
+
+class Constant(Value):
+   def __init__(self, val, name):
+      Value.__init__(self, name, "constant")
+      self.value = val
+
+   def __hex__(self):
+      # Even if it's an integer, we still need to unpack as an unsigned
+      # int.  This is because, without C99, we can only assign to the first
+      # element of a union in an initializer.
+      if isinstance(self.value, (bool)):
+         return 'NIR_TRUE' if self.value else 'NIR_FALSE'
+      if isinstance(self.value, (int, long)):
+         return hex(struct.unpack('I', struct.pack('i', self.value))[0])
+      elif isinstance(self.value, float):
+         return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+      else:
+         assert False
+
+_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
+
+class Variable(Value):
+   def __init__(self, val, name, varset):
+      Value.__init__(self, name, "variable")
+
+      m = _var_name_re.match(val)
+      assert m and m.group('name') is not None
+
+      self.var_name = m.group('name')
+      self.is_constant = m.group('const') is not None
+      self.required_type = m.group('type')
+
+      if self.required_type is not None:
+         assert self.required_type in ('float', 'bool', 'int', 'unsigned')
+
+      self.index = varset[self.var_name]
+
+class Expression(Value):
+   def __init__(self, expr, name_base, varset):
+      Value.__init__(self, name_base, "expression")
+      assert isinstance(expr, tuple)
+
+      self.opcode = expr[0]
+      self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
+                       for (i, src) in enumerate(expr[1:]) ]
+
+   def render(self):
+      srcs = "\n".join(src.render() for src in self.sources)
+      return srcs + super(Expression, self).render()
+
+_optimization_ids = itertools.count()
+
+condition_list = ['true']
+
+class SearchAndReplace(object):
+   def __init__(self, transform):
+      self.id = _optimization_ids.next()
+
+      search = transform[0]
+      replace = transform[1]
+      if len(transform) > 2:
+         self.condition = transform[2]
+      else:
+         self.condition = 'true'
+
+      if self.condition not in condition_list:
+         condition_list.append(self.condition)
+      self.condition_index = condition_list.index(self.condition)
+
+      varset = VarSet()
+      if isinstance(search, Expression):
+         self.search = search
+      else:
+         self.search = Expression(search, "search{0}".format(self.id), varset)
+
+      varset.lock()
+
+      if isinstance(replace, Value):
+         self.replace = replace
+      else:
+         self.replace = Value.create(replace, "replace{0}".format(self.id), varset)
+
+_algebraic_pass_template = mako.template.Template("""
+#include "nir.h"
+#include "nir_search.h"
+
+struct transform {
+   const nir_search_expression *search;
+   const nir_search_value *replace;
+   unsigned condition_offset;
+};
+
+% for (opcode, xform_list) in xform_dict.iteritems():
+% for xform in xform_list:
+   ${xform.search.render()}
+   ${xform.replace.render()}
+% endfor
+
+static const struct transform ${pass_name}_${opcode}_xforms[] = {
+% for xform in xform_list:
+   { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} },
+% endfor
+};
+% endfor
+
+struct opt_state {
+   void *mem_ctx;
+   bool progress;
+   const bool *condition_flags;
+};
+
+static bool
+${pass_name}_block(nir_block *block, void *void_state)
+{
+   struct opt_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+      if (!alu->dest.dest.is_ssa)
+         continue;
+
+      switch (alu->op) {
+      % for opcode in xform_dict.keys():
+      case nir_op_${opcode}:
+         for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) {
+            const struct transform *xform = &${pass_name}_${opcode}_xforms[i];
+            if (state->condition_flags[xform->condition_offset] &&
+                nir_replace_instr(alu, xform->search, xform->replace,
+                                  state->mem_ctx)) {
+               state->progress = true;
+               break;
+            }
+         }
+         break;
+      % endfor
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static bool
+${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags)
+{
+   struct opt_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+   state.condition_flags = condition_flags;
+
+   nir_foreach_block(impl, ${pass_name}_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+
+bool
+${pass_name}(nir_shader *shader)
+{
+   bool progress = false;
+   bool condition_flags[${len(condition_list)}];
+   const nir_shader_compiler_options *options = shader->options;
+
+   % for index, condition in enumerate(condition_list):
+   condition_flags[${index}] = ${condition};
+   % endfor
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= ${pass_name}_impl(overload->impl, condition_flags);
+   }
+
+   return progress;
+}
+""")
+
+class AlgebraicPass(object):
+   def __init__(self, pass_name, transforms):
+      self.xform_dict = {}
+      self.pass_name = pass_name
+
+      for xform in transforms:
+         if not isinstance(xform, SearchAndReplace):
+            xform = SearchAndReplace(xform)
+
+         if xform.search.opcode not in self.xform_dict:
+            self.xform_dict[xform.search.opcode] = []
+
+         self.xform_dict[xform.search.opcode].append(xform)
+
+   def render(self):
+      return _algebraic_pass_template.render(pass_name=self.pass_name,
+                                             xform_dict=self.xform_dict,
+                                             condition_list=condition_list)
diff --git a/mesalib/src/glsl/nir/nir_builder.h b/mesalib/src/glsl/nir/nir_builder.h
new file mode 100644
index 000000000..7c4f7fd96
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_builder.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef NIR_BUILDER_H
+#define NIR_BUILDER_H
+
+struct exec_list;
+
+typedef struct nir_builder {
+   struct exec_list *cf_node_list;
+   nir_shader *shader;
+   nir_function_impl *impl;
+} nir_builder;
+
+static inline void
+nir_builder_init(nir_builder *build, nir_function_impl *impl)
+{
+   memset(build, 0, sizeof(*build));
+   build->impl = impl;
+   build->shader = impl->overload->function->shader;
+}
+
+static inline void
+nir_builder_insert_after_cf_list(nir_builder *build,
+                                 struct exec_list *cf_node_list)
+{
+   build->cf_node_list = cf_node_list;
+}
+
+
+static inline nir_ssa_def *
+nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
+              nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
+{
+   const nir_op_info *op_info = &nir_op_infos[op];
+   nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
+   if (!instr)
+      return NULL;
+
+   instr->src[0].src = nir_src_for_ssa(src0);
+   if (src1)
+      instr->src[1].src = nir_src_for_ssa(src1);
+   if (src2)
+      instr->src[2].src = nir_src_for_ssa(src2);
+   if (src3)
+      instr->src[3].src = nir_src_for_ssa(src3);
+
+   /* Guess the number of components the destination temporary should have
+    * based on our input sizes, if it's not fixed for the op.
+    */
+   unsigned num_components = op_info->output_size;
+   if (num_components == 0) {
+      for (unsigned i = 0; i < op_info->num_inputs; i++) {
+         if (op_info->input_sizes[i] == 0)
+            num_components = MAX2(num_components,
+                                  instr->src[i].src.ssa->num_components);
+      }
+   }
+   assert(num_components != 0);
+
+   /* Make sure we don't swizzle from outside of our source vector (like if a
+    * scalar value was passed into a multiply with a vector).
+    */
+   for (unsigned i = 0; i < op_info->num_inputs; i++) {
+      for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) {
+         instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
+      }
+   }
+
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   instr->dest.write_mask = (1 << num_components) - 1;
+
+   nir_instr_insert_after_cf_list(build->cf_node_list, &instr->instr);
+
+   return &instr->dest.dest.ssa;
+}
+
+#define ALU1(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0)                           \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL);      \
+}
+
+#define ALU2(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)        \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL);      \
+}
+
+#define ALU3(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+         nir_ssa_def *src1, nir_ssa_def *src2)                            \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL);      \
+}
+
+#define ALU4(op)                                                          \
+static inline nir_ssa_def *                                               \
+nir_##op(nir_builder *build, nir_ssa_def *src0,                           \
+         nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)         \
+{                                                                         \
+   return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3);      \
+}
+
+#include "nir_builder_opcodes.h"
+
+#endif /* NIR_BUILDER_H */
diff --git a/mesalib/src/glsl/nir/nir_builder_opcodes_h.py b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py
new file mode 100644
index 000000000..e27206ea8
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_builder_opcodes_h.py
@@ -0,0 +1,38 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _NIR_BUILDER_OPCODES_
+#define _NIR_BUILDER_OPCODES_
+
+% for name, opcode in sorted(opcodes.iteritems()):
+ALU${opcode.num_inputs}(${name});
+% endfor
+
+#endif /* _NIR_BUILDER_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.h b/mesalib/src/glsl/nir/nir_constant_expressions.h
new file mode 100644
index 000000000..97997f2e5
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_constant_expressions.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
+                                      nir_const_value *src);
diff --git a/mesalib/src/glsl/nir/nir_constant_expressions.py b/mesalib/src/glsl/nir/nir_constant_expressions.py
new file mode 100644
index 000000000..22bc4f095
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_constant_expressions.py
@@ -0,0 +1,352 @@
+#! /usr/bin/python2
+template = """\
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include <math.h>
+#include "main/core.h"
+#include "nir_constant_expressions.h"
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+static int isnormal(double x)
+{
+   return _fpclass(x) == _FPCLASS_NN || _fpclass(x) == _FPCLASS_PN;
+}
+#elif defined(__SUNPRO_CC)
+#include <ieeefp.h>
+static int isnormal(double x)
+{
+   return fpclass(x) == FP_NORMAL;
+}
+#endif
+
+#if defined(_MSC_VER)
+static double copysign(double x, double y)
+{
+   return _copysign(x, y);
+}
+#endif
+
+/**
+ * Evaluate one component of packSnorm4x8.
+ */
+static uint8_t
+pack_snorm_1x8(float x)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    packSnorm4x8
+     *    ------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
+     *
+     * We must first cast the float to an int, because casting a negative
+     * float to a uint is undefined.
+     */
+   return (uint8_t) (int8_t)
+          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 127.0f);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packSnorm2x16
+     *    -------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+     *
+     * We must first cast the float to an int, because casting a negative
+     * float to a uint is undefined.
+     */
+   return (uint16_t) (int16_t)
+          _mesa_round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm4x8.
+ */
+static float
+unpack_snorm_1x8(uint8_t u)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    unpackSnorm4x8
+     *    --------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
+     */
+   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static float
+unpack_snorm_1x16(uint16_t u)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackSnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
+     */
+   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm4x8.
+ */
+static uint8_t
+pack_unorm_1x8(float x)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    packUnorm4x8
+     *    ------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
+     */
+   return (uint8_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 255.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packUnorm2x16
+     *    -------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+     */
+   return (uint16_t) _mesa_round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+/**
+ * Evaluate one component of unpackUnorm4x8.
+ */
+static float
+unpack_unorm_1x8(uint8_t u)
+{
+    /* From section 8.4 of the GLSL 4.30 spec:
+     *
+     *    unpackUnorm4x8
+     *    --------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackUnorm4x8: f / 255.0
+     */
+   return (float) u / 255.0f;
+}
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static float
+unpack_unorm_1x16(uint16_t u)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackUnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackUnorm2x16: f / 65535.0
+     */
+   return (float) u / 65535.0f;
+}
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+   return _mesa_float_to_half(x);
+}
+
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static float
+unpack_half_1x16(uint16_t u)
+{
+   return _mesa_half_to_float(u);
+}
+
+/* Some typed vector structures to make things like src0.y work */
+% for type in ["float", "int", "unsigned", "bool"]:
+struct ${type}_vec {
+   ${type} x;
+   ${type} y;
+   ${type} z;
+   ${type} w;
+};
+% endfor
+
+% for name, op in sorted(opcodes.iteritems()):
+static nir_const_value
+evaluate_${name}(unsigned num_components, nir_const_value *_src)
+{
+   nir_const_value _dst_val = { { {0, 0, 0, 0} } };
+
+   ## For each non-per-component input, create a variable srcN that
+   ## contains x, y, z, and w elements which are filled in with the
+   ## appropriately-typed values.
+   % for j in range(op.num_inputs):
+      % if op.input_sizes[j] == 0:
+         <% continue %>
+      % elif "src" + str(j) not in op.const_expr:
+         ## Avoid unused variable warnings
+         <% continue %>
+      %endif
+
+      struct ${op.input_types[j]}_vec src${j} = {
+      % for k in range(op.input_sizes[j]):
+         % if op.input_types[j] == "bool":
+            _src[${j}].u[${k}] != 0,
+         % else:
+            _src[${j}].${op.input_types[j][:1]}[${k}],
+         % endif
+      % endfor
+      };
+   % endfor
+
+   % if op.output_size == 0:
+      ## For per-component instructions, we need to iterate over the
+      ## components and apply the constant expression one component
+      ## at a time.
+      for (unsigned _i = 0; _i < num_components; _i++) {
+         ## For each per-component input, create a variable srcN that
+         ## contains the value of the current (_i'th) component.
+         % for j in range(op.num_inputs):
+            % if op.input_sizes[j] != 0:
+               <% continue %>
+            % elif "src" + str(j) not in op.const_expr:
+               ## Avoid unused variable warnings
+               <% continue %>
+            % elif op.input_types[j] == "bool":
+               bool src${j} = _src[${j}].u[_i] != 0;
+            % else:
+               ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+            % endif
+         % endfor
+
+         ## Create an appropriately-typed variable dst and assign the
+         ## result of the const_expr to it.  If const_expr already contains
+         ## writes to dst, just include const_expr directly.
+         % if "dst" in op.const_expr:
+            ${op.output_type} dst;
+            ${op.const_expr}
+         % else:
+            ${op.output_type} dst = ${op.const_expr};
+         % endif
+
+         ## Store the current component of the actual destination to the
+         ## value of dst.
+         % if op.output_type == "bool":
+            ## Sanitize the C value to a proper NIR bool
+            _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
+         % else:
+            _dst_val.${op.output_type[:1]}[_i] = dst;
+         % endif
+      }
+   % else:
+      ## In the non-per-component case, create a struct dst with
+      ## appropriately-typed elements x, y, z, and w and assign the result
+      ## of the const_expr to all components of dst, or include the
+      ## const_expr directly if it writes to dst already.
+      struct ${op.output_type}_vec dst;
+
+      % if "dst" in op.const_expr:
+         ${op.const_expr}
+      % else:
+         ## Splat the value to all components.  This way expressions which
+         ## write the same value to all components don't need to explicitly
+         ## write to dest.  One such example is fnoise which has a
+         ## const_expr of 0.0f.
+         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+      % endif
+
+      ## For each component in the destination, copy the value of dst to
+      ## the actual destination.
+      % for k in range(op.output_size):
+         % if op.output_type == "bool":
+            ## Sanitize the C value to a proper NIR bool
+            _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+         % else:
+            _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
+         % endif
+      % endfor
+   % endif
+
+   return _dst_val;
+}
+% endfor
+
+nir_const_value
+nir_eval_const_opcode(nir_op op, unsigned num_components,
+                      nir_const_value *src)
+{
+   switch (op) {
+% for name in sorted(opcodes.iterkeys()):
+   case nir_op_${name}: {
+      return evaluate_${name}(num_components, src);
+      break;
+   }
+% endfor
+   default:
+      unreachable("shouldn't get here");
+   }
+}"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_dominance.c b/mesalib/src/glsl/nir/nir_dominance.c
new file mode 100644
index 000000000..2f50db1c1
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_dominance.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements the algorithms for computing the dominance tree and the
+ * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
+ * Harvey, and Kennedy.
+ */
+
+typedef struct {
+   nir_function_impl *impl;
+   bool progress;
+} dom_state;
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+   dom_state *state = (dom_state *) _state;
+   if (block == state->impl->start_block)
+      block->imm_dom = block;
+   else
+      block->imm_dom = NULL;
+   block->num_dom_children = 0;
+
+   struct set_entry *entry;
+   set_foreach(block->dom_frontier, entry) {
+      _mesa_set_remove(block->dom_frontier, entry);
+   }
+
+   return true;
+}
+
+static nir_block *
+intersect(nir_block *b1, nir_block *b2)
+{
+   while (b1 != b2) {
+      /*
+       * Note, the comparisons here are the opposite of what the paper says
+       * because we index blocks from beginning -> end (i.e. reverse
+       * post-order) instead of post-order like they assume.
+       */
+      while (b1->index > b2->index)
+         b1 = b1->imm_dom;
+      while (b2->index > b1->index)
+         b2 = b2->imm_dom;
+   }
+
+   return b1;
+}
+
+static bool
+calc_dominance_cb(nir_block *block, void *_state)
+{
+   dom_state *state = (dom_state *) _state;
+   if (block == state->impl->start_block)
+      return true;
+
+   nir_block *new_idom = NULL;
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      if (pred->imm_dom) {
+         if (new_idom)
+            new_idom = intersect(pred, new_idom);
+         else
+            new_idom = pred;
+      }
+   }
+
+   assert(new_idom);
+   if (block->imm_dom != new_idom) {
+      block->imm_dom = new_idom;
+      state->progress = true;
+   }
+
+   return true;
+}
+
+static bool
+calc_dom_frontier_cb(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->predecessors->entries > 1) {
+      struct set_entry *entry;
+      set_foreach(block->predecessors, entry) {
+         nir_block *runner = (nir_block *) entry->key;
+         while (runner != block->imm_dom) {
+            _mesa_set_add(runner->dom_frontier, block);
+            runner = runner->imm_dom;
+         }
+      }
+   }
+
+   return true;
+}
+
+/*
+ * Compute each node's children in the dominance tree from the immediate
+ * dominator information. We do this in three stages:
+ *
+ * 1. Calculate the number of children each node has
+ * 2. Allocate arrays, setting the number of children to 0 again
+ * 3. For each node, add itself to its parent's list of children, using
+ *    num_dom_children as an index - at the end of this step, num_dom_children
+ *    for each node will be the same as it was at the end of step #1.
+ */
+
+static bool
+block_count_children(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->imm_dom)
+      block->imm_dom->num_dom_children++;
+
+   return true;
+}
+
+static bool
+block_alloc_children(nir_block *block, void *state)
+{
+   void *mem_ctx = state;
+
+   block->dom_children = ralloc_array(mem_ctx, nir_block *,
+                                      block->num_dom_children);
+   block->num_dom_children = 0;
+
+   return true;
+}
+
+static bool
+block_add_child(nir_block *block, void *state)
+{
+   (void) state;
+
+   if (block->imm_dom)
+      block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block;
+
+   return true;
+}
+
+static void
+calc_dom_children(nir_function_impl* impl)
+{
+   void *mem_ctx = ralloc_parent(impl);
+
+   nir_foreach_block(impl, block_count_children, NULL);
+   nir_foreach_block(impl, block_alloc_children, mem_ctx);
+   nir_foreach_block(impl, block_add_child, NULL);
+}
+
+static void
+calc_dfs_indicies(nir_block *block, unsigned *index)
+{
+   block->dom_pre_index = (*index)++;
+
+   for (unsigned i = 0; i < block->num_dom_children; i++)
+      calc_dfs_indicies(block->dom_children[i], index);
+
+   block->dom_post_index = (*index)++;
+}
+
+void
+nir_calc_dominance_impl(nir_function_impl *impl)
+{
+   if (impl->valid_metadata & nir_metadata_dominance)
+      return;
+
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   dom_state state;
+   state.impl = impl;
+   state.progress = true;
+
+   nir_foreach_block(impl, init_block_cb, &state);
+
+   while (state.progress) {
+      state.progress = false;
+      nir_foreach_block(impl, calc_dominance_cb, &state);
+   }
+
+   nir_foreach_block(impl, calc_dom_frontier_cb, &state);
+
+   impl->start_block->imm_dom = NULL;
+
+   calc_dom_children(impl);
+
+   unsigned dfs_index = 0;
+   calc_dfs_indicies(impl->start_block, &dfs_index);
+}
+
+void
+nir_calc_dominance(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_calc_dominance_impl(overload->impl);
+   }
+}
+
+/**
+ * Computes the least common anscestor of two blocks.  If one of the blocks
+ * is null, the other block is returned.
+ */
+nir_block *
+nir_dominance_lca(nir_block *b1, nir_block *b2)
+{
+   if (b1 == NULL)
+      return b2;
+
+   if (b2 == NULL)
+      return b1;
+
+   assert(nir_cf_node_get_function(&b1->cf_node) ==
+          nir_cf_node_get_function(&b2->cf_node));
+
+   assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
+          nir_metadata_dominance);
+
+   return intersect(b1, b2);
+}
+
+/**
+ * Returns true if parent dominates child
+ */
+bool
+nir_block_dominates(nir_block *parent, nir_block *child)
+{
+   assert(nir_cf_node_get_function(&parent->cf_node) ==
+          nir_cf_node_get_function(&child->cf_node));
+
+   assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
+          nir_metadata_dominance);
+
+   return child->dom_pre_index >= parent->dom_pre_index &&
+          child->dom_post_index <= parent->dom_post_index;
+}
+
+static bool
+dump_block_dom(nir_block *block, void *state)
+{
+   FILE *fp = state;
+   if (block->imm_dom)
+      fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
+   return true;
+}
+
+void
+nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
+{
+   fprintf(fp, "digraph doms_%s {\n", impl->overload->function->name);
+   nir_foreach_block(impl, dump_block_dom, fp);
+   fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_dom_tree(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_dom_tree_impl(overload->impl, fp);
+   }
+}
+
+static bool
+dump_block_dom_frontier(nir_block *block, void *state)
+{
+   FILE *fp = state;
+
+   fprintf(fp, "DF(%u) = {", block->index);
+   struct set_entry *entry;
+   set_foreach(block->dom_frontier, entry) {
+      nir_block *df = (nir_block *) entry->key;
+      fprintf(fp, "%u, ", df->index);
+   }
+   fprintf(fp, "}\n");
+   return true;
+}
+
+void
+nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
+{
+   nir_foreach_block(impl, dump_block_dom_frontier, fp);
+}
+
+void
+nir_dump_dom_frontier(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_dom_frontier_impl(overload->impl, fp);
+   }
+}
+
+static bool
+dump_block_succs(nir_block *block, void *state)
+{
+   FILE *fp = state;
+   if (block->successors[0])
+      fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
+   if (block->successors[1])
+      fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
+   return true;
+}
+
+void
+nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
+{
+   fprintf(fp, "digraph cfg_%s {\n", impl->overload->function->name);
+   nir_foreach_block(impl, dump_block_succs, fp);
+   fprintf(fp, "}\n\n");
+}
+
+void
+nir_dump_cfg(nir_shader *shader, FILE *fp)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_dump_cfg_impl(overload->impl, fp);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_from_ssa.c b/mesalib/src/glsl/nir/nir_from_ssa.c
new file mode 100644
index 000000000..7c5009577
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_from_ssa.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This file implements an out-of-SSA pass as described in "Revisiting
+ * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+
+struct from_ssa_state {
+   void *mem_ctx;
+   void *dead_ctx;
+   struct hash_table *ssa_table;
+   struct hash_table *merge_node_table;
+   nir_instr *instr;
+   nir_function_impl *impl;
+};
+
+/* Returns true if a dominates b */
+static bool
+ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b)
+{
+   if (a->live_index == 0) {
+      /* SSA undefs always dominate */
+      return true;
+   } else if (b->live_index < a->live_index) {
+      return false;
+   } else if (a->parent_instr->block == b->parent_instr->block) {
+      return a->live_index <= b->live_index;
+   } else {
+      return nir_block_dominates(a->parent_instr->block,
+                                 b->parent_instr->block);
+   }
+}
+
+
+/* The following data structure, which I have named merge_set is a way of
+ * representing a set registers of non-interfering registers.  This is
+ * based on the concept of a "dominence forest" presented in "Fast Copy
+ * Coalescing and Live-Range Identification" by Budimlic et. al. but the
+ * implementation concept is taken from  "Revisiting Out-of-SSA Translation
+ * for Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ *
+ * Each SSA definition is associated with a merge_node and the association
+ * is represented by a combination of a hash table and the "def" parameter
+ * in the merge_node structure.  The merge_set stores a linked list of
+ * merge_node's in dominence order of the ssa definitions.  (Since the
+ * liveness analysis pass indexes the SSA values in dominence order for us,
+ * this is an easy thing to keep up.)  It is assumed that no pair of the
+ * nodes in a given set interfere.  Merging two sets or checking for
+ * interference can be done in a single linear-time merge-sort walk of the
+ * two lists of nodes.
+ */
+struct merge_set;
+
+typedef struct {
+   struct exec_node node;
+   struct merge_set *set;
+   nir_ssa_def *def;
+} merge_node;
+
+typedef struct merge_set {
+   struct exec_list nodes;
+   unsigned size;
+   nir_register *reg;
+} merge_set;
+
+#if 0
+static void
+merge_set_dump(merge_set *set, FILE *fp)
+{
+   nir_ssa_def *dom[set->size];
+   int dom_idx = -1;
+
+   foreach_list_typed(merge_node, node, node, &set->nodes) {
+      while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def))
+         dom_idx--;
+
+      for (int i = 0; i <= dom_idx; i++)
+         fprintf(fp, "  ");
+
+      if (node->def->name)
+         fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name);
+      else
+         fprintf(fp, "ssa_%d\n", node->def->index);
+
+      dom[++dom_idx] = node->def;
+   }
+}
+#endif
+
+static merge_node *
+get_merge_node(nir_ssa_def *def, struct from_ssa_state *state)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->merge_node_table, def);
+   if (entry)
+      return entry->data;
+
+   merge_set *set = ralloc(state->dead_ctx, merge_set);
+   exec_list_make_empty(&set->nodes);
+   set->size = 1;
+   set->reg = NULL;
+
+   merge_node *node = ralloc(state->dead_ctx, merge_node);
+   node->set = set;
+   node->def = def;
+   exec_list_push_head(&set->nodes, &node->node);
+
+   _mesa_hash_table_insert(state->merge_node_table, def, node);
+
+   return node;
+}
+
+static bool
+merge_nodes_interfere(merge_node *a, merge_node *b)
+{
+   return nir_ssa_defs_interfere(a->def, b->def);
+}
+
+/* Merges b into a */
+static merge_set *
+merge_merge_sets(merge_set *a, merge_set *b)
+{
+   struct exec_node *an = exec_list_get_head(&a->nodes);
+   struct exec_node *bn = exec_list_get_head(&b->nodes);
+   while (!exec_node_is_tail_sentinel(bn)) {
+      merge_node *a_node = exec_node_data(merge_node, an, node);
+      merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+      if (exec_node_is_tail_sentinel(an) ||
+          a_node->def->live_index > b_node->def->live_index) {
+         struct exec_node *next = bn->next;
+         exec_node_remove(bn);
+         exec_node_insert_node_before(an, bn);
+         exec_node_data(merge_node, bn, node)->set = a;
+         bn = next;
+      } else {
+         an = an->next;
+      }
+   }
+
+   a->size += b->size;
+   b->size = 0;
+
+   return a;
+}
+
+/* Checks for any interference between two merge sets
+ *
+ * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA
+ * Translation for Correctness, Code Quality, and Efficiency" by
+ * Boissinot et. al.
+ */
+static bool
+merge_sets_interfere(merge_set *a, merge_set *b)
+{
+   merge_node *dom[a->size + b->size];
+   int dom_idx = -1;
+
+   struct exec_node *an = exec_list_get_head(&a->nodes);
+   struct exec_node *bn = exec_list_get_head(&b->nodes);
+   while (!exec_node_is_tail_sentinel(an) ||
+          !exec_node_is_tail_sentinel(bn)) {
+
+      merge_node *current;
+      if (exec_node_is_tail_sentinel(an)) {
+         current = exec_node_data(merge_node, bn, node);
+         bn = bn->next;
+      } else if (exec_node_is_tail_sentinel(bn)) {
+         current = exec_node_data(merge_node, an, node);
+         an = an->next;
+      } else {
+         merge_node *a_node = exec_node_data(merge_node, an, node);
+         merge_node *b_node = exec_node_data(merge_node, bn, node);
+
+         if (a_node->def->live_index <= b_node->def->live_index) {
+            current = a_node;
+            an = an->next;
+         } else {
+            current = b_node;
+            bn = bn->next;
+         }
+      }
+
+      while (dom_idx >= 0 &&
+             !ssa_def_dominates(dom[dom_idx]->def, current->def))
+         dom_idx--;
+
+      if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx]))
+         return true;
+
+      dom[++dom_idx] = current;
+   }
+
+   return false;
+}
+
+static bool
+add_parallel_copy_to_end_of_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   bool need_end_copy = false;
+   if (block->successors[0]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[0]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
+
+   if (block->successors[1]) {
+      nir_instr *instr = nir_block_first_instr(block->successors[1]);
+      if (instr && instr->type == nir_instr_type_phi)
+         need_end_copy = true;
+   }
+
+   if (need_end_copy) {
+      /* If one of our successors has at least one phi node, we need to
+       * create a parallel copy at the end of the block but before the jump
+       * (if there is one).
+       */
+      nir_parallel_copy_instr *pcopy =
+         nir_parallel_copy_instr_create(state->dead_ctx);
+
+      nir_instr *last_instr = nir_block_last_instr(block);
+      if (last_instr && last_instr->type == nir_instr_type_jump) {
+         nir_instr_insert_before(last_instr, &pcopy->instr);
+      } else {
+         nir_instr_insert_after_block(block, &pcopy->instr);
+      }
+   }
+
+   return true;
+}
+
+static nir_parallel_copy_instr *
+get_parallel_copy_at_end_of_block(nir_block *block)
+{
+   nir_instr *last_instr = nir_block_last_instr(block);
+   if (last_instr == NULL)
+      return NULL;
+
+   /* The last instruction may be a jump in which case the parallel copy is
+    * right before it.
+    */
+   if (last_instr->type == nir_instr_type_jump)
+      last_instr = nir_instr_prev(last_instr);
+
+   if (last_instr && last_instr->type == nir_instr_type_parallel_copy)
+      return nir_instr_as_parallel_copy(last_instr);
+   else
+      return NULL;
+}
+
+/** Isolate phi nodes with parallel copies
+ *
+ * In order to solve the dependency problems with the sources and
+ * destinations of phi nodes, we first isolate them by adding parallel
+ * copies to the beginnings and ends of basic blocks.  For every block with
+ * phi nodes, we add a parallel copy immediately following the last phi
+ * node that copies the destinations of all of the phi nodes to new SSA
+ * values.  We also add a parallel copy to the end of every block that has
+ * a successor with phi nodes that, for each phi node in each successor,
+ * copies the corresponding sorce of the phi node and adjust the phi to
+ * used the destination of the parallel copy.
+ *
+ * In SSA form, each value has exactly one definition.  What this does is
+ * ensure that each value used in a phi also has exactly one use.  The
+ * destinations of phis are only used by the parallel copy immediately
+ * following the phi nodes and.  Thanks to the parallel copy at the end of
+ * the predecessor block, the sources of phi nodes are are the only use of
+ * that value.  This allows us to immediately assign all the sources and
+ * destinations of any given phi node to the same register without worrying
+ * about interference at all.  We do coalescing to get rid of the parallel
+ * copies where possible.
+ *
+ * Before this pass can be run, we have to iterate over the blocks with
+ * add_parallel_copy_to_end_of_block to ensure that the parallel copies at
+ * the ends of blocks exist.  We can create the ones at the beginnings as
+ * we go, but the ones at the ends of blocks need to be created ahead of
+ * time because of potential back-edges in the CFG.
+ */
+static bool
+isolate_phi_nodes_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_instr *last_phi_instr = NULL;
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      last_phi_instr = instr;
+   }
+
+   /* If we don't have any phi's, then there's nothing for us to do. */
+   if (last_phi_instr == NULL)
+      return true;
+
+   /* If we have phi nodes, we need to create a parallel copy at the
+    * start of this block but after the phi nodes.
+    */
+   nir_parallel_copy_instr *block_pcopy =
+      nir_parallel_copy_instr_create(state->dead_ctx);
+   nir_instr_insert_after(last_phi_instr, &block_pcopy->instr);
+
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      assert(phi->dest.is_ssa);
+      nir_foreach_phi_src(phi, src) {
+         nir_parallel_copy_instr *pcopy =
+            get_parallel_copy_at_end_of_block(src->pred);
+         assert(pcopy);
+
+         nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
+                                                 nir_parallel_copy_entry);
+         exec_list_push_tail(&pcopy->entries, &entry->node);
+
+         nir_src_copy(&entry->src, &src->src, state->dead_ctx);
+         _mesa_set_add(src->src.ssa->uses, &pcopy->instr);
+
+         nir_ssa_dest_init(&pcopy->instr, &entry->dest,
+                           phi->dest.ssa.num_components, src->src.ssa->name);
+
+         struct set_entry *use_entry =
+            _mesa_set_search(src->src.ssa->uses, instr);
+         if (use_entry)
+            /* It is possible that a phi node can use the same source twice
+             * but for different basic blocks.  If that happens, entry will
+             * be NULL because we already deleted it.  This is safe
+             * because, by the time the loop is done, we will have deleted
+             * all of the sources of the phi from their respective use sets
+             * and moved them to the parallel copy definitions.
+             */
+            _mesa_set_remove(src->src.ssa->uses, use_entry);
+
+         src->src.ssa = &entry->dest.ssa;
+         _mesa_set_add(entry->dest.ssa.uses, instr);
+      }
+
+      nir_parallel_copy_entry *entry = ralloc(state->dead_ctx,
+                                              nir_parallel_copy_entry);
+      exec_list_push_tail(&block_pcopy->entries, &entry->node);
+
+      nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
+                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&entry->dest.ssa),
+                               state->mem_ctx);
+
+      entry->src.is_ssa = true;
+      entry->src.ssa = &phi->dest.ssa;
+      _mesa_set_add(phi->dest.ssa.uses, &block_pcopy->instr);
+   }
+
+   return true;
+}
+
+static bool
+coalesce_phi_nodes_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      assert(phi->dest.is_ssa);
+      merge_node *dest_node = get_merge_node(&phi->dest.ssa, state);
+
+      nir_foreach_phi_src(phi, src) {
+         assert(src->src.is_ssa);
+         merge_node *src_node = get_merge_node(src->src.ssa, state);
+         if (src_node->set != dest_node->set)
+            merge_merge_sets(dest_node->set, src_node->set);
+      }
+   }
+
+   return true;
+}
+
+static void
+agressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy,
+                                 struct from_ssa_state *state)
+{
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      if (!entry->src.is_ssa)
+         continue;
+
+      /* Since load_const instructions are SSA only, we can't replace their
+       * destinations with registers and, therefore, can't coalesce them.
+       */
+      if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const)
+         continue;
+
+      /* Don't try and coalesce these */
+      if (entry->dest.ssa.num_components != entry->src.ssa->num_components)
+         continue;
+
+      merge_node *src_node = get_merge_node(entry->src.ssa, state);
+      merge_node *dest_node = get_merge_node(&entry->dest.ssa, state);
+
+      if (src_node->set == dest_node->set)
+         continue;
+
+      if (!merge_sets_interfere(src_node->set, dest_node->set))
+         merge_merge_sets(src_node->set, dest_node->set);
+   }
+}
+
+static bool
+agressive_coalesce_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_parallel_copy_instr *start_pcopy = NULL;
+   nir_foreach_instr(block, instr) {
+      /* Phi nodes only ever come at the start of a block */
+      if (instr->type != nir_instr_type_phi) {
+         if (instr->type != nir_instr_type_parallel_copy)
+            break; /* The parallel copy must be right after the phis */
+
+         start_pcopy = nir_instr_as_parallel_copy(instr);
+
+         agressive_coalesce_parallel_copy(start_pcopy, state);
+
+         break;
+      }
+   }
+
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+
+   if (end_pcopy && end_pcopy != start_pcopy)
+      agressive_coalesce_parallel_copy(end_pcopy, state);
+
+   return true;
+}
+
+static nir_register *
+get_register_for_ssa_def(nir_ssa_def *def, struct from_ssa_state *state)
+{
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->merge_node_table, def);
+   if (entry) {
+      merge_node *node = (merge_node *)entry->data;
+
+      /* If it doesn't have a register yet, create one.  Note that all of
+       * the things in the merge set should be the same so it doesn't
+       * matter which node's definition we use.
+       */
+      if (node->set->reg == NULL) {
+         node->set->reg = nir_local_reg_create(state->impl);
+         node->set->reg->name = def->name;
+         node->set->reg->num_components = def->num_components;
+         node->set->reg->num_array_elems = 0;
+      }
+
+      return node->set->reg;
+   }
+
+   entry = _mesa_hash_table_search(state->ssa_table, def);
+   if (entry) {
+      return (nir_register *)entry->data;
+   } else {
+      /* We leave load_const SSA values alone.  They act as immediates to
+       * the backend.  If it got coalesced into a phi, that's ok.
+       */
+      if (def->parent_instr->type == nir_instr_type_load_const)
+         return NULL;
+
+      nir_register *reg = nir_local_reg_create(state->impl);
+      reg->name = def->name;
+      reg->num_components = def->num_components;
+      reg->num_array_elems = 0;
+
+      _mesa_hash_table_insert(state->ssa_table, def, reg);
+      return reg;
+   }
+}
+
+static bool
+rewrite_ssa_src(nir_src *src, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   if (src->is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(src->ssa, state);
+
+      if (reg == NULL) {
+         assert(src->ssa->parent_instr->type == nir_instr_type_load_const);
+         return true;
+      }
+
+      memset(src, 0, sizeof *src);
+      src->reg.reg = reg;
+
+      /* We don't need to remove it from the uses set because that is going
+       * away.  We just need to add it to the one for the register. */
+      _mesa_set_add(reg->uses, state->instr);
+   }
+
+   return true;
+}
+
+static bool
+rewrite_ssa_dest(nir_dest *dest, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   if (dest->is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(&dest->ssa, state);
+
+      if (reg == NULL) {
+         assert(dest->ssa.parent_instr->type == nir_instr_type_load_const);
+         return true;
+      }
+
+      _mesa_set_destroy(dest->ssa.uses, NULL);
+      _mesa_set_destroy(dest->ssa.if_uses, NULL);
+
+      memset(dest, 0, sizeof *dest);
+      dest->reg.reg = reg;
+
+      _mesa_set_add(reg->defs, state->instr);
+   }
+
+   return true;
+}
+
+/* Resolves ssa definitions to registers.  While we're at it, we also
+ * remove phi nodes and ssa_undef instructions
+ */
+static bool
+resolve_registers_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      state->instr = instr;
+      nir_foreach_src(instr, rewrite_ssa_src, state);
+      nir_foreach_dest(instr, rewrite_ssa_dest, state);
+
+      if (instr->type == nir_instr_type_ssa_undef ||
+          instr->type == nir_instr_type_phi) {
+         nir_instr_remove(instr);
+         ralloc_steal(state->dead_ctx, instr);
+      }
+   }
+   state->instr = NULL;
+
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (following_if && following_if->condition.is_ssa) {
+      nir_register *reg = get_register_for_ssa_def(following_if->condition.ssa,
+                                                   state);
+      if (reg) {
+         memset(&following_if->condition, 0, sizeof following_if->condition);
+         following_if->condition.reg.reg = reg;
+
+         _mesa_set_add(reg->if_uses, following_if);
+      } else {
+         /* FIXME: We really shouldn't hit this.  We should be doing
+          * constant control flow propagation.
+          */
+         assert(following_if->condition.ssa->parent_instr->type == nir_instr_type_load_const);
+      }
+   }
+
+   return true;
+}
+
+static void
+emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
+          void *mem_ctx)
+{
+   assert(!dest_src.is_ssa &&
+          dest_src.reg.indirect == NULL &&
+          dest_src.reg.base_offset == 0);
+
+   if (src.is_ssa)
+      assert(src.ssa->num_components >= dest_src.reg.reg->num_components);
+   else
+      assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
+
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+   mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
+   mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
+
+   nir_instr_insert_before(&pcopy->instr, &mov->instr);
+}
+
+/* Resolves a single parallel copy operation into a sequence of mov's
+ *
+ * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for
+ * Correctness, Code Quality, and Efficiency" by Boissinot et. al..
+ * However, I never got the algorithm to work as written, so this version
+ * is slightly modified.
+ *
+ * The algorithm works by playing this little shell game with the values.
+ * We start by recording where every source value is and which source value
+ * each destination value should recieve.  We then grab any copy whose
+ * destination is "empty", i.e. not used as a source, and do the following:
+ *  - Find where its source value currently lives
+ *  - Emit the move instruction
+ *  - Set the location of the source value to the destination
+ *  - Mark the location containing the source value
+ *  - Mark the destination as no longer needing to be copied
+ *
+ * When we run out of "empty" destinations, we have a cycle and so we
+ * create a temporary register, copy to that register, and mark the value
+ * we copied as living in that temporary.  Now, the cycle is broken, so we
+ * can continue with the above steps.
+ */
+static void
+resolve_parallel_copy(nir_parallel_copy_instr *pcopy,
+                      struct from_ssa_state *state)
+{
+   unsigned num_copies = 0;
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      /* Sources may be SSA */
+      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+         continue;
+
+      num_copies++;
+   }
+
+   if (num_copies == 0) {
+      /* Hooray, we don't need any copies! */
+      nir_instr_remove(&pcopy->instr);
+      return;
+   }
+
+   /* The register/source corresponding to the given index */
+   nir_src values[num_copies * 2];
+   memset(values, 0, sizeof values);
+
+   /* The current location of a given piece of data */
+   int loc[num_copies * 2];
+
+   /* The piece of data that the given piece of data is to be copied from */
+   int pred[num_copies * 2];
+
+   /* Initialize loc and pred.  We will use -1 for "null" */
+   memset(loc, -1, sizeof loc);
+   memset(pred, -1, sizeof pred);
+
+   /* The destinations we have yet to properly fill */
+   int to_do[num_copies * 2];
+   int to_do_idx = -1;
+
+   /* Now we set everything up:
+    *  - All values get assigned a temporary index
+    *  - Current locations are set from sources
+    *  - Predicessors are recorded from sources and destinations
+    */
+   int num_vals = 0;
+   nir_foreach_parallel_copy_entry(pcopy, entry) {
+      /* Sources may be SSA */
+      if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg)
+         continue;
+
+      int src_idx = -1;
+      for (int i = 0; i < num_vals; ++i) {
+         if (nir_srcs_equal(values[i], entry->src))
+            src_idx = i;
+      }
+      if (src_idx < 0) {
+         src_idx = num_vals++;
+         values[src_idx] = entry->src;
+      }
+
+      nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg);
+
+      int dest_idx = -1;
+      for (int i = 0; i < num_vals; ++i) {
+         if (nir_srcs_equal(values[i], dest_src)) {
+            /* Each destination of a parallel copy instruction should be
+             * unique.  A destination may get used as a source, so we still
+             * have to walk the list.  However, the predecessor should not,
+             * at this point, be set yet, so we should have -1 here.
+             */
+            assert(pred[i] == -1);
+            dest_idx = i;
+         }
+      }
+      if (dest_idx < 0) {
+         dest_idx = num_vals++;
+         values[dest_idx] = dest_src;
+      }
+
+      loc[src_idx] = src_idx;
+      pred[dest_idx] = src_idx;
+
+      to_do[++to_do_idx] = dest_idx;
+   }
+
+   /* Currently empty destinations we can go ahead and fill */
+   int ready[num_copies * 2];
+   int ready_idx = -1;
+
+   /* Mark the ones that are ready for copying.  We know an index is a
+    * destination if it has a predecessor and it's ready for copying if
+    * it's not marked as containing data.
+    */
+   for (int i = 0; i < num_vals; i++) {
+      if (pred[i] != -1 && loc[i] == -1)
+         ready[++ready_idx] = i;
+   }
+
+   while (to_do_idx >= 0) {
+      while (ready_idx >= 0) {
+         int b = ready[ready_idx--];
+         int a = pred[b];
+         emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx);
+
+         /* If any other copies want a they can find it at b */
+         loc[a] = b;
+
+         /* b has been filled, mark it as not needing to be copied */
+         pred[b] = -1;
+
+         /* If a needs to be filled, it's ready for copying now */
+         if (pred[a] != -1)
+            ready[++ready_idx] = a;
+      }
+      int b = to_do[to_do_idx--];
+      if (pred[b] == -1)
+         continue;
+
+      /* If we got here, then we don't have any more trivial copies that we
+       * can do.  We have to break a cycle, so we create a new temporary
+       * register for that purpose.  Normally, if going out of SSA after
+       * register allocation, you would want to avoid creating temporary
+       * registers.  However, we are going out of SSA before register
+       * allocation, so we would rather not create extra register
+       * dependencies for the backend to deal with.  If it wants, the
+       * backend can coalesce the (possibly multiple) temporaries.
+       */
+      assert(num_vals < num_copies * 2);
+      nir_register *reg = nir_local_reg_create(state->impl);
+      reg->name = "copy_temp";
+      reg->num_array_elems = 0;
+      if (values[b].is_ssa)
+         reg->num_components = values[b].ssa->num_components;
+      else
+         reg->num_components = values[b].reg.reg->num_components;
+      values[num_vals].is_ssa = false;
+      values[num_vals].reg.reg = reg;
+
+      emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx);
+      loc[b] = num_vals;
+      ready[++ready_idx] = b;
+      num_vals++;
+   }
+
+   nir_instr_remove(&pcopy->instr);
+}
+
+/* Resolves the parallel copies in a block.  Each block can have at most
+ * two:  One at the beginning, right after all the phi noces, and one at
+ * the end (or right before the final jump if it exists).
+ */
+static bool
+resolve_parallel_copies_block(nir_block *block, void *void_state)
+{
+   struct from_ssa_state *state = void_state;
+
+   /* At this point, we have removed all of the phi nodes.  If a parallel
+    * copy existed right after the phi nodes in this block, it is now the
+    * first instruction.
+    */
+   nir_instr *first_instr = nir_block_first_instr(block);
+   if (first_instr == NULL)
+      return true; /* Empty, nothing to do. */
+
+   if (first_instr->type == nir_instr_type_parallel_copy) {
+      nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr);
+
+      resolve_parallel_copy(pcopy, state);
+   }
+
+   /* It's possible that the above code already cleaned up the end parallel
+    * copy.  However, doing so removed it form the instructions list so we
+    * won't find it here.  Therefore, it's safe to go ahead and just look
+    * for one and clean it up if it exists.
+    */
+   nir_parallel_copy_instr *end_pcopy =
+      get_parallel_copy_at_end_of_block(block);
+   if (end_pcopy)
+      resolve_parallel_copy(end_pcopy, state);
+
+   return true;
+}
+
+static void
+nir_convert_from_ssa_impl(nir_function_impl *impl)
+{
+   struct from_ssa_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+   state.impl = impl;
+   state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                    _mesa_key_pointer_equal);
+
+   nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state);
+   nir_foreach_block(impl, isolate_phi_nodes_block, &state);
+
+   /* Mark metadata as dirty before we ask for liveness analysis */
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   nir_metadata_require(impl, nir_metadata_live_variables |
+                              nir_metadata_dominance);
+
+   nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
+   nir_foreach_block(impl, agressive_coalesce_block, &state);
+
+   state.ssa_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   nir_foreach_block(impl, resolve_registers_block, &state);
+
+   nir_foreach_block(impl, resolve_parallel_copies_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   /* Clean up dead instructions and the hash tables */
+   _mesa_hash_table_destroy(state.ssa_table, NULL);
+   _mesa_hash_table_destroy(state.merge_node_table, NULL);
+   ralloc_free(state.dead_ctx);
+}
+
+void
+nir_convert_from_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_convert_from_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_intrinsics.c b/mesalib/src/glsl/nir/nir_intrinsics.c
new file mode 100644
index 000000000..a7c868c39
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_intrinsics.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+#define OPCODE(name) nir_intrinsic_##name
+
+#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \
+                  _dest_components, _num_variables, _num_indices, _flags) \
+{ \
+   .name = #_name, \
+   .num_srcs = _num_srcs, \
+   .src_components = _src_components, \
+   .has_dest = _has_dest, \
+   .dest_components = _dest_components, \
+   .num_variables = _num_variables, \
+   .num_indices = _num_indices, \
+   .flags = _flags \
+},
+
+#define LAST_INTRINSIC(name)
+
+const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = {
+#include "nir_intrinsics.h"
+};
+\ No newline at end of file
diff --git a/mesalib/src/glsl/nir/nir_intrinsics.h b/mesalib/src/glsl/nir/nir_intrinsics.h
new file mode 100644
index 000000000..d94866c85
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_intrinsics.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+/**
+ * This header file defines all the available intrinsics in one place. It
+ * expands to a list of macros of the form:
+ *
+ * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
+ *              num_variables, num_indices, flags)
+ *
+ * Which should correspond one-to-one with the nir_intrinsic_info structure. It
+ * is included in both ir.h to create the nir_intrinsic enum (with members of
+ * the form nir_intrinsic_(name)) and and in opcodes.c to create
+ * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
+ * for each intrinsic.
+ */
+
+#define ARR(...) { __VA_ARGS__ }
+
+
+INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0)
+
+/*
+ * Interpolation of input.  The interp_var_at* intrinsics are similar to the
+ * load_var intrinsic acting an a shader input except that they interpolate
+ * the input differently.  The at_sample and at_offset intrinsics take an
+ * aditional source that is a integer sample id or a vec2 position offset
+ * respectively.
+ */
+
+INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0,
+          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+/*
+ * a barrier is an intrinsic with no inputs/outputs but which can't be moved
+ * around/optimized in general
+ */
+#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0)
+
+BARRIER(discard)
+
+INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, 0)
+INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0)
+
+/*
+ * Atomic counters
+ *
+ * The *_var variants take an atomic_uint nir_variable, while the other,
+ * lowered, variants take a constant buffer index and register offset.
+ */
+
+#define ATOMIC(name, flags) \
+   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \
+   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags)
+
+ATOMIC(inc, 0)
+ATOMIC(dec, 0)
+ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
+
+#define SYSTEM_VALUE(name, components) \
+   INTRINSIC(load_##name, 0, ARR(), true, components, 0, 0, \
+   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+
+SYSTEM_VALUE(front_face, 1)
+SYSTEM_VALUE(vertex_id, 1)
+SYSTEM_VALUE(instance_id, 1)
+SYSTEM_VALUE(sample_id, 1)
+SYSTEM_VALUE(sample_pos, 2)
+SYSTEM_VALUE(sample_mask_in, 1)
+SYSTEM_VALUE(invocation_id, 1)
+
+/*
+ * The first index is the address to load from, and the second index is the
+ * number of array elements to load.  Indirect loads have an additional
+ * register input, which is added to the constant address to compute the
+ * final address to load from.  For UBO's (and SSBO's), the first source is
+ * the (possibly constant) UBO buffer index and the indirect (if it exists)
+ * is the second source.
+ *
+ * For vector backends, the address is in terms of one vec4, and so each array
+ * element is +4 scalar components from the previous array element. For scalar
+ * backends, the address is in terms of a single 4-byte float/int and arrays
+ * elements begin immediately after the previous array element.
+ */
+
+#define LOAD(name, extra_srcs, flags) \
+   INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, 2, flags) \
+   INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
+             true, 0, 0, 2, flags)
+
+LOAD(uniform, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(ubo, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+LOAD(input, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* LOAD(ssbo, 1, 0) */
+
+/*
+ * Stores work the same way as loads, except now the first register input is
+ * the value or array to store and the optional second input is the indirect
+ * offset.
+ */
+
+#define STORE(name, num_indices, flags) \
+   INTRINSIC(store_##name, 1, ARR(0), false, 0, 0, num_indices, flags) \
+   INTRINSIC(store_##name##_indirect, 2, ARR(0, 1), false, 0, 0, \
+             num_indices, flags) \
+
+STORE(output, 2, 0)
+/* STORE(ssbo, 3, 0) */
+
+LAST_INTRINSIC(store_output_indirect)
diff --git a/mesalib/src/glsl/nir/nir_live_variables.c b/mesalib/src/glsl/nir/nir_live_variables.c
new file mode 100644
index 000000000..7402dc087
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_live_variables.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include "nir.h"
+#include "nir_worklist.h"
+
+/*
+ * Basic liveness analysis.  This works only in SSA form.
+ *
+ * This liveness pass treats phi nodes as being melded to the space between
+ * blocks so that the destinations of a phi are in the livein of the block
+ * in which it resides and the sources are in the liveout of the
+ * corresponding block.  By formulating the liveness information in this
+ * way, we ensure that the definition of any variable dominates its entire
+ * live range.  This is true because the only way that the definition of an
+ * SSA value may not dominate a use is if the use is in a phi node and the
+ * uses in phi no are in the live-out of the corresponding predecessor
+ * block but not in the live-in of the block containing the phi node.
+ */
+
+struct live_variables_state {
+   unsigned num_ssa_defs;
+   unsigned bitset_words;
+
+   nir_block_worklist worklist;
+};
+
+static bool
+index_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   struct live_variables_state *state = void_state;
+
+   if (def->parent_instr->type == nir_instr_type_ssa_undef)
+      def->live_index = 0;
+   else
+      def->live_index = state->num_ssa_defs++;
+
+   return true;
+}
+
+static bool
+index_ssa_definitions_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, index_ssa_def, state);
+
+   return true;
+}
+
+/* Initialize the liveness data to zero and add the given block to the
+ * worklist.
+ */
+static bool
+init_liveness_block(nir_block *block, void *void_state)
+{
+   struct live_variables_state *state = void_state;
+
+   block->live_in = reralloc(block, block->live_in, BITSET_WORD,
+                             state->bitset_words);
+   memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+   block->live_out = reralloc(block, block->live_out, BITSET_WORD,
+                              state->bitset_words);
+   memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD));
+
+   nir_block_worklist_push_head(&state->worklist, block);
+
+   return true;
+}
+
+static bool
+set_src_live(nir_src *src, void *void_live)
+{
+   BITSET_WORD *live = void_live;
+
+   if (!src->is_ssa)
+      return true;
+
+   if (src->ssa->live_index == 0)
+      return true;   /* undefined variables are never live */
+
+   BITSET_SET(live, src->ssa->live_index);
+
+   return true;
+}
+
+static bool
+set_ssa_def_dead(nir_ssa_def *def, void *void_live)
+{
+   BITSET_WORD *live = void_live;
+
+   BITSET_CLEAR(live, def->live_index);
+
+   return true;
+}
+
+/** Propagates the live in of succ across the edge to the live out of pred
+ *
+ * Phi nodes exist "between" blocks and all the phi nodes at the start of a
+ * block act "in parallel".  When we propagate from the live_in of one
+ * block to the live out of the other, we have to kill any writes from phis
+ * and make live any sources.
+ *
+ * Returns true if updating live out of pred added anything
+ */
+static bool
+propagate_across_edge(nir_block *pred, nir_block *succ,
+                      struct live_variables_state *state)
+{
+   BITSET_WORD live[state->bitset_words];
+   memcpy(live, succ->live_in, sizeof live);
+
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      assert(phi->dest.is_ssa);
+      set_ssa_def_dead(&phi->dest.ssa, live);
+   }
+
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_foreach_phi_src(phi, src) {
+         if (src->pred == pred) {
+            set_src_live(&src->src, live);
+            break;
+         }
+      }
+   }
+
+   BITSET_WORD progress = 0;
+   for (unsigned i = 0; i < state->bitset_words; ++i) {
+      progress |= live[i] & ~pred->live_out[i];
+      pred->live_out[i] |= live[i];
+   }
+   return progress != 0;
+}
+
+void
+nir_live_variables_impl(nir_function_impl *impl)
+{
+   struct live_variables_state state;
+
+   /* We start at 1 because we reserve the index value of 0 for ssa_undef
+    * instructions.  Those are never live, so their liveness information
+    * can be compacted into a single bit.
+    */
+   state.num_ssa_defs = 1;
+   nir_foreach_block(impl, index_ssa_definitions_block, &state);
+
+   nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
+
+   /* We now know how many unique ssa definitions we have and we can go
+    * ahead and allocate live_in and live_out sets and add all of the
+    * blocks to the worklist.
+    */
+   state.bitset_words = BITSET_WORDS(state.num_ssa_defs);
+   nir_foreach_block(impl, init_liveness_block, &state);
+
+   /* We're now ready to work through the worklist and update the liveness
+    * sets of each of the blocks.  By the time we get to this point, every
+    * block in the function implementation has been pushed onto the
+    * worklist in reverse order.  As long as we keep the worklist
+    * up-to-date as we go, everything will get covered.
+    */
+   while (!nir_block_worklist_is_empty(&state.worklist)) {
+      /* We pop them off in the reverse order we pushed them on.  This way
+       * the first walk of the instructions is backwards so we only walk
+       * once in the case of no control flow.
+       */
+      nir_block *block = nir_block_worklist_pop_head(&state.worklist);
+
+      memcpy(block->live_in, block->live_out,
+             state.bitset_words * sizeof(BITSET_WORD));
+
+      nir_if *following_if = nir_block_get_following_if(block);
+      if (following_if)
+         set_src_live(&following_if->condition, block->live_in);
+
+      nir_foreach_instr_reverse(block, instr) {
+         /* Phi nodes are handled seperately so we want to skip them.  Since
+          * we are going backwards and they are at the beginning, we can just
+          * break as soon as we see one.
+          */
+         if (instr->type == nir_instr_type_phi)
+            break;
+
+         nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in);
+         nir_foreach_src(instr, set_src_live, block->live_in);
+      }
+
+      /* Walk over all of the predecessors of the current block updating
+       * their live in with the live out of this one.  If anything has
+       * changed, add the predecessor to the work list so that we ensure
+       * that the new information is used.
+       */
+      struct set_entry *entry;
+      set_foreach(block->predecessors, entry) {
+         nir_block *pred = (nir_block *)entry->key;
+         if (propagate_across_edge(pred, block, &state))
+            nir_block_worklist_push_tail(&state.worklist, pred);
+      }
+   }
+
+   nir_block_worklist_fini(&state.worklist);
+}
+
+static bool
+src_does_not_use_def(nir_src *src, void *def)
+{
+   return !src->is_ssa || src->ssa != (nir_ssa_def *)def;
+}
+
+static bool
+search_for_use_after_instr(nir_instr *start, nir_ssa_def *def)
+{
+   /* Only look for a use strictly after the given instruction */
+   struct exec_node *node = start->node.next;
+   while (!exec_node_is_tail_sentinel(node)) {
+      nir_instr *instr = exec_node_data(nir_instr, node, node);
+      if (!nir_foreach_src(instr, src_does_not_use_def, def))
+         return true;
+      node = node->next;
+   }
+   return false;
+}
+
+/* Returns true if def is live at instr assuming that def comes before
+ * instr in a pre DFS search of the dominance tree.
+ */
+static bool
+nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr)
+{
+   if (BITSET_TEST(instr->block->live_out, def->live_index)) {
+      /* Since def dominates instr, if def is in the liveout of the block,
+       * it's live at instr
+       */
+      return true;
+   } else {
+      if (BITSET_TEST(instr->block->live_in, def->live_index) ||
+          def->parent_instr->block == instr->block) {
+         /* In this case it is either live coming into instr's block or it
+          * is defined in the same block.  In this case, we simply need to
+          * see if it is used after instr.
+          */
+         return search_for_use_after_instr(instr, def);
+      } else {
+         return false;
+      }
+   }
+}
+
+bool
+nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b)
+{
+   if (a->parent_instr == b->parent_instr) {
+      /* Two variables defined at the same time interfere assuming at
+       * least one isn't dead.
+       */
+      return true;
+   } else if (a->live_index == 0 || b->live_index == 0) {
+      /* If either variable is an ssa_undef, then there's no interference */
+      return false;
+   } else if (a->live_index < b->live_index) {
+      return nir_ssa_def_is_live_at(a, b->parent_instr);
+   } else {
+      return nir_ssa_def_is_live_at(b, a->parent_instr);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c
new file mode 100644
index 000000000..25bba4ef0
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+
+/** @file nir_lower_alu_to_scalar.c
+ *
+ * Replaces nir_alu_instr operations with more than one channel used in the
+ * arguments with individual per-channel operations.
+ */
+
+static void
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+{
+   nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+   instr->dest.write_mask = (1 << num_components) - 1;
+}
+
+static void
+lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
+                void *mem_ctx)
+{
+   unsigned num_components = nir_op_infos[instr->op].input_sizes[0];
+
+   nir_ssa_def *last = NULL;
+   for (unsigned i = 0; i < num_components; i++) {
+      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
+      nir_alu_ssa_dest_init(chan, 1);
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+      chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
+      if (nir_op_infos[chan_op].num_inputs > 1) {
+         assert(nir_op_infos[chan_op].num_inputs == 2);
+         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+         chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
+      }
+
+      nir_instr_insert_before(&instr->instr, &chan->instr);
+
+      if (i == 0) {
+         last = &chan->dest.dest.ssa;
+      } else {
+         nir_alu_instr *merge = nir_alu_instr_create(mem_ctx, merge_op);
+         nir_alu_ssa_dest_init(merge, 1);
+         merge->dest.write_mask = 1;
+         merge->src[0].src = nir_src_for_ssa(last);
+         merge->src[1].src = nir_src_for_ssa(&chan->dest.dest.ssa);
+         nir_instr_insert_before(&instr->instr, &merge->instr);
+         last = &merge->dest.dest.ssa;
+      }
+   }
+
+   assert(instr->dest.write_mask == 1);
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last),
+                            mem_ctx);
+   nir_instr_remove(&instr->instr);
+}
+
+static void
+lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
+{
+   unsigned num_src = nir_op_infos[instr->op].num_inputs;
+   unsigned i, chan;
+
+   assert(instr->dest.dest.is_ssa);
+   assert(instr->dest.write_mask != 0);
+
+#define LOWER_REDUCTION(name, chan, merge) \
+   case name##2: \
+   case name##3: \
+   case name##4: \
+      lower_reduction(instr, chan, merge, mem_ctx); \
+      break;
+
+   switch (instr->op) {
+   case nir_op_vec4:
+   case nir_op_vec3:
+   case nir_op_vec2:
+      /* We don't need to scalarize these ops, they're the ones generated to
+       * group up outputs into a value that can be SSAed.
+       */
+      return;
+
+      LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
+      LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand);
+      LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand);
+      LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior);
+      LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior);
+      LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand);
+      LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for);
+      LOWER_REDUCTION(nir_op_ball, nir_op_imov, nir_op_iand);
+      LOWER_REDUCTION(nir_op_bany, nir_op_imov, nir_op_ior);
+      LOWER_REDUCTION(nir_op_fall, nir_op_fmov, nir_op_fand);
+      LOWER_REDUCTION(nir_op_fany, nir_op_fmov, nir_op_for);
+
+   default:
+      break;
+   }
+
+   if (instr->dest.dest.ssa.num_components == 1)
+      return;
+
+   unsigned num_components = instr->dest.dest.ssa.num_components;
+   static const nir_op nir_op_map[] = {nir_op_vec2, nir_op_vec3, nir_op_vec4};
+   nir_alu_instr *vec_instr =
+      nir_alu_instr_create(mem_ctx, nir_op_map[num_components - 2]);
+   nir_alu_ssa_dest_init(vec_instr, num_components);
+
+   for (chan = 0; chan < 4; chan++) {
+      if (!(instr->dest.write_mask & (1 << chan)))
+         continue;
+
+      nir_alu_instr *lower = nir_alu_instr_create(mem_ctx, instr->op);
+      for (i = 0; i < num_src; i++) {
+         /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar
+          * args (input_sizes[] == 1).
+          */
+         assert(nir_op_infos[instr->op].input_sizes[i] < 2);
+         unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
+                              0 : chan);
+
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+         for (int j = 0; j < 4; j++)
+            lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
+      }
+
+      nir_alu_ssa_dest_init(lower, 1);
+      lower->dest.saturate = instr->dest.saturate;
+      vec_instr->src[chan].src = nir_src_for_ssa(&lower->dest.dest.ssa);
+
+      nir_instr_insert_before(&instr->instr, &lower->instr);
+   }
+
+   nir_instr_insert_before(&instr->instr, &vec_instr->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&vec_instr->dest.dest.ssa),
+                            mem_ctx);
+
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_alu_to_scalar_block(nir_block *block, void *data)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_alu)
+         lower_alu_instr_scalar((nir_alu_instr *)instr, data);
+   }
+
+   return true;
+}
+
+static void
+nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_alu_to_scalar_block, ralloc_parent(impl));
+}
+
+void
+nir_lower_alu_to_scalar(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_alu_to_scalar_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_atomics.c b/mesalib/src/glsl/nir/nir_lower_atomics.c
new file mode 100644
index 000000000..e82df0169
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_atomics.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "main/config.h"
+#include <assert.h>
+
+/*
+ * replace atomic counter intrinsics that use a variable with intrinsics
+ * that directly store the buffer index and byte offset
+ */
+
+static void
+lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
+{
+   nir_intrinsic_op op;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_atomic_counter_read_var:
+      op = nir_intrinsic_atomic_counter_read;
+      break;
+
+   case nir_intrinsic_atomic_counter_inc_var:
+      op = nir_intrinsic_atomic_counter_inc;
+      break;
+
+   case nir_intrinsic_atomic_counter_dec_var:
+      op = nir_intrinsic_atomic_counter_dec;
+      break;
+
+   default:
+      return;
+   }
+
+   if (instr->variables[0]->var->data.mode != nir_var_uniform)
+      return; /* atomics passed as function arguments can't be lowered */
+
+   void *mem_ctx = ralloc_parent(instr);
+
+   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+   new_instr->const_index[0] =
+      (int) instr->variables[0]->var->data.atomic.buffer_index;
+
+   nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
+   offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
+
+   nir_instr_insert_before(&instr->instr, &offset_const->instr);
+
+   nir_ssa_def *offset_def = &offset_const->def;
+
+   if (instr->variables[0]->deref.child != NULL) {
+      assert(instr->variables[0]->deref.child->deref_type ==
+             nir_deref_type_array);
+      nir_deref_array *deref_array =
+         nir_deref_as_array(instr->variables[0]->deref.child);
+      assert(deref_array->deref.child == NULL);
+
+      offset_const->value.u[0] += deref_array->base_offset;
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         nir_load_const_instr *atomic_counter_size =
+               nir_load_const_instr_create(mem_ctx, 1);
+         atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE;
+         nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
+
+         nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         mul->dest.write_mask = 0x1;
+         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+         mul->src[1].src.is_ssa = true;
+         mul->src[1].src.ssa = &atomic_counter_size->def;
+         nir_instr_insert_before(&instr->instr, &mul->instr);
+
+         nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
+         nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+         add->dest.write_mask = 0x1;
+         add->src[0].src.is_ssa = true;
+         add->src[0].src.ssa = &mul->dest.dest.ssa;
+         add->src[1].src.is_ssa = true;
+         add->src[1].src.ssa = &offset_const->def;
+         nir_instr_insert_before(&instr->instr, &add->instr);
+
+         offset_def = &add->dest.dest.ssa;
+      }
+   }
+
+   new_instr->src[0].is_ssa = true;
+   new_instr->src[0].ssa = offset_def;;
+
+   if (instr->dest.is_ssa) {
+      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+                        instr->dest.ssa.num_components, NULL);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+                               nir_src_for_ssa(&new_instr->dest.ssa),
+                               mem_ctx);
+   } else {
+      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+   }
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+lower_block(nir_block *block, void *state)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_intrinsic)
+         lower_instr(nir_instr_as_intrinsic(instr), state);
+   }
+
+   return true;
+}
+
+void
+nir_lower_atomics(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, lower_block, overload->impl);
+         nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+                                               nir_metadata_dominance);
+      }
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c
new file mode 100644
index 000000000..0cd8740cc
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass detects when a global variable is only being used by
+ * one function and makes it local to that function
+ */
+
+#include "nir.h"
+
+struct global_to_local_state {
+   nir_function_impl *impl;
+   /* A hash table keyed on variable pointers that stores the unique
+    * nir_function_impl that uses the given variable.  If a variable is
+    * used in multiple functions, the data for the given key will be NULL.
+    */
+   struct hash_table *var_func_table;
+};
+
+static bool
+mark_global_var_uses_block(nir_block *block, void *void_state)
+{
+   struct global_to_local_state *state = void_state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables;
+
+      for (unsigned i = 0; i < num_vars; i++) {
+         nir_variable *var = intrin->variables[i]->var;
+         if (var->data.mode != nir_var_global)
+            continue;
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->var_func_table, var);
+
+         if (entry) {
+            if (entry->data != state->impl)
+               entry->data = NULL;
+         } else {
+            _mesa_hash_table_insert(state->var_func_table, var, state->impl);
+         }
+      }
+   }
+
+   return true;
+}
+
+void
+nir_lower_global_vars_to_local(nir_shader *shader)
+{
+   struct global_to_local_state state;
+
+   state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         state.impl = overload->impl;
+         nir_foreach_block(overload->impl, mark_global_var_uses_block, &state);
+      }
+   }
+
+   struct hash_entry *entry;
+   hash_table_foreach(state.var_func_table, entry) {
+      nir_variable *var = (void *)entry->key;
+      nir_function_impl *impl = entry->data;
+
+      assert(var->data.mode == nir_var_global);
+
+      if (impl != NULL) {
+         exec_node_remove(&var->node);
+         var->data.mode = nir_var_local;
+         exec_list_push_tail(&impl->locals, &var->node);
+      }
+   }
+
+   _mesa_hash_table_destroy(state.var_func_table, NULL);
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_io.c b/mesalib/src/glsl/nir/nir_lower_io.c
new file mode 100644
index 000000000..207f8daa1
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_io.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+/*
+ * This lowering pass converts references to input/output variables with
+ * loads/stores to actual input/output intrinsics.
+ *
+ * NOTE: This pass really only works for scalar backends at the moment due
+ * to the way it packes the input/output data.
+ */
+
+#include "nir.h"
+
+struct lower_io_state {
+   void *mem_ctx;
+};
+
+static unsigned
+type_size(const struct glsl_type *type)
+{
+   unsigned int size, i;
+
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      return glsl_get_components(type);
+   case GLSL_TYPE_ARRAY:
+      return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
+   case GLSL_TYPE_STRUCT:
+      size = 0;
+      for (i = 0; i < glsl_get_length(type); i++) {
+         size += type_size(glsl_get_struct_field(type, i));
+      }
+      return size;
+   case GLSL_TYPE_SAMPLER:
+      return 0;
+   case GLSL_TYPE_ATOMIC_UINT:
+      return 0;
+   case GLSL_TYPE_INTERFACE:
+      return 0;
+   case GLSL_TYPE_IMAGE:
+      return 0;
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_DOUBLE:
+      unreachable("not reached");
+   }
+
+   return 0;
+}
+
+static void
+assign_var_locations(struct hash_table *ht, unsigned *size)
+{
+   unsigned location = 0;
+
+   struct hash_entry *entry;
+   hash_table_foreach(ht, entry) {
+      nir_variable *var = (nir_variable *) entry->data;
+
+      /*
+       * UBO's have their own address spaces, so don't count them towards the
+       * number of global uniforms
+       */
+      if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
+         continue;
+
+      var->data.driver_location = location;
+      location += type_size(var->type);
+   }
+
+   *size = location;
+}
+
+static void
+assign_var_locations_shader(nir_shader *shader)
+{
+   assign_var_locations(shader->inputs, &shader->num_inputs);
+   assign_var_locations(shader->outputs, &shader->num_outputs);
+   assign_var_locations(shader->uniforms, &shader->num_uniforms);
+}
+
+static bool
+deref_has_indirect(nir_deref_var *deref)
+{
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      if (tail->deref_type == nir_deref_type_array) {
+         nir_deref_array *arr = nir_deref_as_array(tail);
+         if (arr->deref_array_type == nir_deref_array_type_indirect)
+            return true;
+      }
+   }
+
+   return false;
+}
+
+static unsigned
+get_io_offset(nir_deref_var *deref, nir_instr *instr, nir_src *indirect,
+              struct lower_io_state *state)
+{
+   bool found_indirect = false;
+   unsigned base_offset = 0;
+
+   nir_deref *tail = &deref->deref;
+   while (tail->child != NULL) {
+      const struct glsl_type *parent_type = tail->type;
+      tail = tail->child;
+
+      if (tail->deref_type == nir_deref_type_array) {
+         nir_deref_array *deref_array = nir_deref_as_array(tail);
+         unsigned size = type_size(tail->type);
+
+         base_offset += size * deref_array->base_offset;
+
+         if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+            nir_load_const_instr *load_const =
+               nir_load_const_instr_create(state->mem_ctx, 1);
+            load_const->value.u[0] = size;
+            nir_instr_insert_before(instr, &load_const->instr);
+
+            nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imul);
+            mul->src[0].src.is_ssa = true;
+            mul->src[0].src.ssa = &load_const->def;
+            nir_src_copy(&mul->src[1].src, &deref_array->indirect,
+                         state->mem_ctx);
+            mul->dest.write_mask = 1;
+            nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+            nir_instr_insert_before(instr, &mul->instr);
+
+            if (found_indirect) {
+               nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,
+                                                         nir_op_iadd);
+               add->src[0].src = *indirect;
+               add->src[1].src.is_ssa = true;
+               add->src[1].src.ssa = &mul->dest.dest.ssa;
+               add->dest.write_mask = 1;
+               nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+               nir_instr_insert_before(instr, &add->instr);
+
+               indirect->is_ssa = true;
+               indirect->ssa = &add->dest.dest.ssa;
+            } else {
+               indirect->is_ssa = true;
+               indirect->ssa = &mul->dest.dest.ssa;
+               found_indirect = true;
+            }
+         }
+      } else if (tail->deref_type == nir_deref_type_struct) {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+         for (unsigned i = 0; i < deref_struct->index; i++)
+            base_offset += type_size(glsl_get_struct_field(parent_type, i));
+      }
+   }
+
+   return base_offset;
+}
+
+static bool
+nir_lower_io_block(nir_block *block, void *void_state)
+{
+   struct lower_io_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var: {
+         nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+         if (mode != nir_var_shader_in && mode != nir_var_uniform)
+            continue;
+
+         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+
+         /* Figure out the opcode */
+         nir_intrinsic_op load_op;
+         switch (mode) {
+         case nir_var_shader_in:
+            load_op = has_indirect ? nir_intrinsic_load_input_indirect :
+                                     nir_intrinsic_load_input;
+            break;
+         case nir_var_uniform:
+            load_op = has_indirect ? nir_intrinsic_load_uniform_indirect :
+                                     nir_intrinsic_load_uniform;
+            break;
+         default:
+            unreachable("Unknown variable mode");
+         }
+
+         nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx,
+                                                                load_op);
+         load->num_components = intrin->num_components;
+
+         nir_src indirect;
+         unsigned offset = get_io_offset(intrin->variables[0],
+                                         &intrin->instr, &indirect, state);
+         offset += intrin->variables[0]->var->data.driver_location;
+
+         load->const_index[0] = offset;
+         load->const_index[1] = 1;
+
+         if (has_indirect)
+            load->src[0] = indirect;
+
+         if (intrin->dest.is_ssa) {
+            nir_ssa_dest_init(&load->instr, &load->dest,
+                              intrin->num_components, NULL);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&load->dest.ssa),
+                                     state->mem_ctx);
+         } else {
+            nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx);
+         }
+
+         nir_instr_insert_before(&intrin->instr, &load->instr);
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_store_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
+            continue;
+
+         bool has_indirect = deref_has_indirect(intrin->variables[0]);
+
+         nir_intrinsic_op store_op;
+         if (has_indirect) {
+            store_op = nir_intrinsic_store_output_indirect;
+         } else {
+            store_op = nir_intrinsic_store_output;
+         }
+
+         nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
+                                                                 store_op);
+         store->num_components = intrin->num_components;
+
+         nir_src indirect;
+         unsigned offset = get_io_offset(intrin->variables[0],
+                                         &intrin->instr, &indirect, state);
+         offset += intrin->variables[0]->var->data.driver_location;
+
+         store->const_index[0] = offset;
+         store->const_index[1] = 1;
+
+         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+
+         if (has_indirect)
+            store->src[1] = indirect;
+
+         nir_instr_insert_before(&intrin->instr, &store->instr);
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_io_impl(nir_function_impl *impl)
+{
+   struct lower_io_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+
+   nir_foreach_block(impl, nir_lower_io_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_io(nir_shader *shader)
+{
+   assign_var_locations_shader(shader);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_io_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c
new file mode 100644
index 000000000..8c5df7be6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+struct locals_to_regs_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+
+   /* A hash table mapping derefs to registers */
+   struct hash_table *regs_table;
+};
+
+/* The following two functions implement a hash and equality check for
+ * variable dreferences.  When the hash or equality function encounters an
+ * array, it ignores the offset and whether it is direct or indirect
+ * entirely.
+ */
+static uint32_t
+hash_deref(const void *void_deref)
+{
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+   const nir_deref_var *deref_var = void_deref;
+   hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
+
+   for (const nir_deref *deref = deref_var->deref.child;
+        deref; deref = deref->child) {
+      if (deref->deref_type == nir_deref_type_struct) {
+         const nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
+      }
+   }
+
+   return hash;
+}
+
+static bool
+derefs_equal(const void *void_a, const void *void_b)
+{
+   const nir_deref_var *a_var = void_a;
+   const nir_deref_var *b_var = void_b;
+
+   if (a_var->var != b_var->var)
+      return false;
+
+   for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
+        a != NULL; a = a->child, b = b->child) {
+      if (a->deref_type != b->deref_type)
+         return false;
+
+      if (a->deref_type == nir_deref_type_struct) {
+         if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
+            return false;
+      }
+      /* Do nothing for arrays.  They're all the same. */
+
+      assert((a->child == NULL) == (b->child == NULL));
+      if((a->child == NULL) != (b->child == NULL))
+         return false;
+   }
+
+   return true;
+}
+
+static nir_register *
+get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
+{
+   uint32_t hash = hash_deref(deref);
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref);
+   if (entry)
+      return entry->data;
+
+   unsigned array_size = 1;
+   nir_deref *tail = &deref->deref;
+   while (tail->child) {
+      if (tail->child->deref_type == nir_deref_type_array) {
+         /* Multiply by the parent's type. */
+         if (glsl_type_is_matrix(tail->type)) {
+            array_size *= glsl_get_matrix_columns(tail->type);
+         } else {
+            assert(glsl_get_length(tail->type) > 0);
+            array_size *= glsl_get_length(tail->type);
+         }
+      }
+      tail = tail->child;
+   }
+
+   assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));
+
+   nir_register *reg = nir_local_reg_create(state->impl);
+   reg->num_components = glsl_get_vector_elements(tail->type);
+   reg->num_array_elems = array_size > 1 ? array_size : 0;
+
+   _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
+
+   return reg;
+}
+
+static nir_src
+get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
+                  struct locals_to_regs_state *state)
+{
+   nir_src src;
+
+   src.is_ssa = false;
+   src.reg.reg = get_reg_for_deref(deref, state);
+   src.reg.base_offset = 0;
+   src.reg.indirect = NULL;
+
+   nir_deref *tail = &deref->deref;
+   while (tail->child != NULL) {
+      const struct glsl_type *parent_type = tail->type;
+      tail = tail->child;
+
+      if (tail->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *deref_array = nir_deref_as_array(tail);
+
+      src.reg.base_offset *= glsl_get_length(parent_type);
+      src.reg.base_offset += deref_array->base_offset;
+
+      if (src.reg.indirect) {
+         nir_load_const_instr *load_const =
+            nir_load_const_instr_create(state->mem_ctx, 1);
+         load_const->value.u[0] = glsl_get_length(parent_type);
+         nir_instr_insert_before(instr, &load_const->instr);
+
+         nir_alu_instr *mul = nir_alu_instr_create(state->mem_ctx, nir_op_imul);
+         mul->src[0].src = *src.reg.indirect;
+         mul->src[1].src.is_ssa = true;
+         mul->src[1].src.ssa = &load_const->def;
+         mul->dest.write_mask = 1;
+         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+         nir_instr_insert_before(instr, &mul->instr);
+
+         src.reg.indirect->is_ssa = true;
+         src.reg.indirect->ssa = &mul->dest.dest.ssa;
+      }
+
+      if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
+         if (src.reg.indirect == NULL) {
+            src.reg.indirect = ralloc(state->mem_ctx, nir_src);
+            nir_src_copy(src.reg.indirect, &deref_array->indirect,
+                         state->mem_ctx);
+         } else {
+            nir_alu_instr *add = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_iadd);
+            add->src[0].src = *src.reg.indirect;
+            nir_src_copy(&add->src[1].src, &deref_array->indirect,
+                         state->mem_ctx);
+            add->dest.write_mask = 1;
+            nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+            nir_instr_insert_before(instr, &add->instr);
+
+            src.reg.indirect->is_ssa = true;
+            src.reg.indirect->ssa = &add->dest.dest.ssa;
+         }
+      }
+   }
+
+   return src;
+}
+
+static bool
+lower_locals_to_regs_block(nir_block *block, void *void_state)
+{
+   struct locals_to_regs_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_local)
+            continue;
+
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
+         mov->src[0].src = get_deref_reg_src(intrin->variables[0],
+                                             &intrin->instr, state);
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         if (intrin->dest.is_ssa) {
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa),
+                                     state->mem_ctx);
+         } else {
+            nir_dest_copy(&mov->dest.dest, &intrin->dest, state->mem_ctx);
+         }
+         nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_store_var: {
+         if (intrin->variables[0]->var->data.mode != nir_var_local)
+            continue;
+
+         nir_src reg_src = get_deref_reg_src(intrin->variables[0],
+                                             &intrin->instr, state);
+
+         nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, nir_op_imov);
+         nir_src_copy(&mov->src[0].src, &intrin->src[0], state->mem_ctx);
+         mov->dest.write_mask = (1 << intrin->num_components) - 1;
+         mov->dest.dest.is_ssa = false;
+         mov->dest.dest.reg.reg = reg_src.reg.reg;
+         mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
+         mov->dest.dest.reg.indirect = reg_src.reg.indirect;
+
+         nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+         nir_instr_remove(&intrin->instr);
+         break;
+      }
+
+      case nir_intrinsic_copy_var:
+         unreachable("There should be no copies whatsoever at this point");
+         break;
+
+      default:
+         continue;
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_locals_to_regs_impl(nir_function_impl *impl)
+{
+   struct locals_to_regs_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
+
+   nir_foreach_block(impl, lower_locals_to_regs_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   _mesa_hash_table_destroy(state.regs_table, NULL);
+}
+
+void
+nir_lower_locals_to_regs(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_locals_to_regs_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
new file mode 100644
index 000000000..7cd93ea0a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a pass that lowers vector phi nodes to scalar phi nodes when
+ * we don't think it will hurt anything.
+ */
+
+struct lower_phis_to_scalar_state {
+   void *mem_ctx;
+   void *dead_ctx;
+
+   /* Hash table marking which phi nodes are scalarizable.  The key is
+    * pointers to phi instructions and the entry is either NULL for not
+    * scalarizable or non-null for scalarizable.
+    */
+   struct hash_table *phi_table;
+};
+
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state);
+
+static bool
+is_phi_src_scalarizable(nir_phi_src *src,
+                        struct lower_phis_to_scalar_state *state)
+{
+   /* Don't know what to do with non-ssa sources */
+   if (!src->src.is_ssa)
+      return false;
+
+   nir_instr *src_instr = src->src.ssa->parent_instr;
+   switch (src_instr->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
+
+      /* ALU operations with output_size == 0 should be scalarized.  We
+       * will also see a bunch of vecN operations from scalarizing ALU
+       * operations and, since they can easily be copy-propagated, they
+       * are ok too.
+       */
+      return nir_op_infos[src_alu->op].output_size == 0 ||
+             src_alu->op == nir_op_vec2 ||
+             src_alu->op == nir_op_vec3 ||
+             src_alu->op == nir_op_vec4;
+   }
+
+   case nir_instr_type_phi:
+      /* A phi is scalarizable if we're going to lower it */
+      return should_lower_phi(nir_instr_as_phi(src_instr), state);
+
+   case nir_instr_type_load_const:
+      /* These are trivially scalarizable */
+      return true;
+
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr);
+
+      switch (src_intrin->intrinsic) {
+      case nir_intrinsic_load_var:
+         return src_intrin->variables[0]->var->data.mode == nir_var_shader_in ||
+                src_intrin->variables[0]->var->data.mode == nir_var_uniform;
+
+      case nir_intrinsic_interp_var_at_centroid:
+      case nir_intrinsic_interp_var_at_sample:
+      case nir_intrinsic_interp_var_at_offset:
+      case nir_intrinsic_load_uniform:
+      case nir_intrinsic_load_uniform_indirect:
+      case nir_intrinsic_load_ubo:
+      case nir_intrinsic_load_ubo_indirect:
+      case nir_intrinsic_load_input:
+      case nir_intrinsic_load_input_indirect:
+         return true;
+      default:
+         break;
+      }
+   }
+
+   default:
+      /* We can't scalarize this type of instruction */
+      return false;
+   }
+}
+
+/**
+ * Determines if the given phi node should be lowered.  The only phi nodes
+ * we will scalarize at the moment are those where all of the sources are
+ * scalarizable.
+ *
+ * The reason for this comes down to coalescing.  Since phi sources can't
+ * swizzle, swizzles on phis have to be resolved by inserting a mov right
+ * before the phi.  The choice then becomes between movs to pick off
+ * components for a scalar phi or potentially movs to recombine components
+ * for a vector phi.  The problem is that the movs generated to pick off
+ * the components are almost uncoalescable.  We can't coalesce them in NIR
+ * because we need them to pick off components and we can't coalesce them
+ * in the backend because the source register is a vector and the
+ * destination is a scalar that may be used at other places in the program.
+ * On the other hand, if we have a bunch of scalars going into a vector
+ * phi, the situation is much better.  In this case, if the SSA def is
+ * generated in the predecessor block to the corresponding phi source, the
+ * backend code will be an ALU op into a temporary and then a mov into the
+ * given vector component;  this move can almost certainly be coalesced
+ * away.
+ */
+static bool
+should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state)
+{
+   /* Already scalar */
+   if (phi->dest.ssa.num_components == 1)
+      return false;
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi);
+   if (entry)
+      return entry->data != NULL;
+
+   /* Insert an entry and mark it as scalarizable for now. That way
+    * we don't recurse forever and a cycle in the dependence graph
+    * won't automatically make us fail to scalarize.
+    */
+   entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1);
+
+   bool scalarizable = true;
+
+   nir_foreach_phi_src(phi, src) {
+      scalarizable = is_phi_src_scalarizable(src, state);
+      if (!scalarizable)
+         break;
+   }
+
+   entry->data = (void *)(intptr_t)scalarizable;
+
+   return scalarizable;
+}
+
+static bool
+lower_phis_to_scalar_block(nir_block *block, void *void_state)
+{
+   struct lower_phis_to_scalar_state *state = void_state;
+
+   /* Find the last phi node in the block */
+   nir_phi_instr *last_phi = NULL;
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      last_phi = nir_instr_as_phi(instr);
+   }
+
+   /* We have to handle the phi nodes in their own pass due to the way
+    * we're modifying the linked list of instructions.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      if (!should_lower_phi(phi, state))
+         continue;
+
+      /* Create a vecN operation to combine the results.  Most of these
+       * will be redundant, but copy propagation should clean them up for
+       * us.  No need to add the complexity here.
+       */
+      nir_op vec_op;
+      switch (phi->dest.ssa.num_components) {
+      case 2: vec_op = nir_op_vec2; break;
+      case 3: vec_op = nir_op_vec3; break;
+      case 4: vec_op = nir_op_vec4; break;
+      default: unreachable("Invalid number of components");
+      }
+
+      nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
+      nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
+                        phi->dest.ssa.num_components, NULL);
+      vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+      for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
+         nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
+         nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+
+         vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
+
+         nir_foreach_phi_src(phi, src) {
+            /* We need to insert a mov to grab the i'th component of src */
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+            mov->dest.write_mask = 1;
+            nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
+            mov->src[0].swizzle[0] = i;
+
+            /* Insert at the end of the predecessor but before the jump */
+            nir_instr *pred_last_instr = nir_block_last_instr(src->pred);
+            if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump)
+               nir_instr_insert_before(pred_last_instr, &mov->instr);
+            else
+               nir_instr_insert_after_block(src->pred, &mov->instr);
+
+            nir_phi_src *new_src = ralloc(state->mem_ctx, nir_phi_src);
+            new_src->pred = src->pred;
+            new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa);
+
+            exec_list_push_tail(&new_phi->srcs, &new_src->node);
+         }
+
+         nir_instr_insert_before(&phi->instr, &new_phi->instr);
+      }
+
+      nir_instr_insert_after(&last_phi->instr, &vec->instr);
+
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&vec->dest.dest.ssa),
+                               state->mem_ctx);
+
+      ralloc_steal(state->dead_ctx, phi);
+      nir_instr_remove(&phi->instr);
+
+      /* We're using the safe iterator and inserting all the newly
+       * scalarized phi nodes before their non-scalarized version so that's
+       * ok.  However, we are also inserting vec operations after all of
+       * the last phi node so once we get here, we can't trust even the
+       * safe iterator to stop properly.  We have to break manually.
+       */
+      if (instr == &last_phi->instr)
+         break;
+   }
+
+   return true;
+}
+
+static void
+lower_phis_to_scalar_impl(nir_function_impl *impl)
+{
+   struct lower_phis_to_scalar_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+   state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   nir_foreach_block(impl, lower_phis_to_scalar_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   ralloc_free(state.dead_ctx);
+}
+
+/** A pass that lowers vector phi nodes to scalar
+ *
+ * This pass loops through the blocks and lowers looks for vector phi nodes
+ * it can lower to scalar phi nodes.  Not all phi nodes are lowered.  For
+ * instance, if one of the sources is a non-scalarizable vector, then we
+ * don't bother lowering because that would generate hard-to-coalesce movs.
+ */
+void
+nir_lower_phis_to_scalar(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_phis_to_scalar_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_samplers.cpp b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
new file mode 100644
index 000000000..3015dbd09
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_samplers.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "../program.h"
+#include "program/hash_table.h"
+#include "ir_uniform.h"
+
+extern "C" {
+#include "main/compiler.h"
+#include "main/mtypes.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+}
+
+static unsigned
+get_sampler_index(struct gl_shader_program *shader_program, const char *name,
+                  const struct gl_program *prog)
+{
+   GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target);
+
+   unsigned location;
+   if (!shader_program->UniformHash->get(location, name)) {
+      linker_error(shader_program,
+                   "failed to find sampler named %s.\n", name);
+      return 0;
+   }
+
+   if (!shader_program->UniformStorage[location].sampler[shader].active) {
+      assert(0 && "cannot return a sampler");
+      linker_error(shader_program,
+                   "cannot return a sampler named %s, because it is not "
+                   "used in this shader stage. This is a driver bug.\n",
+                   name);
+      return 0;
+   }
+
+   return shader_program->UniformStorage[location].sampler[shader].index;
+}
+
+static void
+lower_sampler(nir_tex_instr *instr, struct gl_shader_program *shader_program,
+              const struct gl_program *prog, void *mem_ctx)
+{
+   if (instr->sampler == NULL)
+      return;
+
+   /* Get the name and the offset */
+   instr->sampler_index = 0;
+   char *name = ralloc_strdup(mem_ctx, instr->sampler->var->name);
+
+   for (nir_deref *deref = &instr->sampler->deref;
+        deref->child; deref = deref->child) {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *deref_array = nir_deref_as_array(deref->child);
+
+         /* XXX: We're assuming here that the indirect is the last array
+          * thing we have.  This should be ok for now as we don't support
+          * arrays_of_arrays yet.
+          */
+
+         instr->sampler_index *= glsl_get_length(deref->type);
+         switch (deref_array->deref_array_type) {
+         case nir_deref_array_type_direct:
+            instr->sampler_index += deref_array->base_offset;
+            if (deref_array->deref.child)
+               ralloc_asprintf_append(&name, "[%u]", deref_array->base_offset);
+            break;
+         case nir_deref_array_type_indirect: {
+            instr->src = reralloc(mem_ctx, instr->src, nir_tex_src,
+                                  instr->num_srcs + 1);
+            memset(&instr->src[instr->num_srcs], 0, sizeof *instr->src);
+            instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset;
+            instr->num_srcs++;
+
+            nir_instr_rewrite_src(&instr->instr,
+                                  &instr->src[instr->num_srcs - 1].src,
+                                  deref_array->indirect);
+
+            instr->sampler_array_size = glsl_get_length(deref->type);
+
+            nir_src empty;
+            memset(&empty, 0, sizeof empty);
+            nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, empty);
+
+            if (deref_array->deref.child)
+               ralloc_strcat(&name, "[0]");
+            break;
+         }
+
+         case nir_deref_array_type_wildcard:
+            unreachable("Cannot copy samplers");
+         default:
+            unreachable("Invalid deref array type");
+         }
+         break;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(deref->child);
+         const char *field = glsl_get_struct_elem_name(deref->type,
+                                                       deref_struct->index);
+         ralloc_asprintf_append(&name, ".%s", field);
+         break;
+      }
+
+      default:
+         unreachable("Invalid deref type");
+         break;
+      }
+   }
+
+   instr->sampler_index += get_sampler_index(shader_program, name, prog);
+
+   instr->sampler = NULL;
+}
+
+typedef struct {
+   void *mem_ctx;
+   struct gl_shader_program *shader_program;
+   struct gl_program *prog;
+} lower_state;
+
+static bool
+lower_block_cb(nir_block *block, void *_state)
+{
+   lower_state *state = (lower_state *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_tex) {
+         nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+         lower_sampler(tex_instr, state->shader_program, state->prog,
+                       state->mem_ctx);
+      }
+   }
+
+   return true;
+}
+
+static void
+lower_impl(nir_function_impl *impl, struct gl_shader_program *shader_program,
+           struct gl_program *prog)
+{
+   lower_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.shader_program = shader_program;
+   state.prog = prog;
+
+   nir_foreach_block(impl, lower_block_cb, &state);
+}
+
+extern "C" void
+nir_lower_samplers(nir_shader *shader, struct gl_shader_program *shader_program,
+                   struct gl_program *prog)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_impl(overload->impl, shader_program, prog);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_system_values.c b/mesalib/src/glsl/nir/nir_lower_system_values.c
new file mode 100644
index 000000000..328d4f1ab
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_system_values.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include "main/mtypes.h"
+
+static void
+convert_instr(nir_intrinsic_instr *instr)
+{
+   if (instr->intrinsic != nir_intrinsic_load_var)
+      return;
+
+   nir_variable *var = instr->variables[0]->var;
+   if (var->data.mode != nir_var_system_value)
+      return;
+
+   void *mem_ctx = ralloc_parent(instr);
+
+   nir_intrinsic_op op;
+
+   switch (var->data.location) {
+   case SYSTEM_VALUE_FRONT_FACE:
+      op = nir_intrinsic_load_front_face;
+      break;
+   case SYSTEM_VALUE_VERTEX_ID:
+      op = nir_intrinsic_load_vertex_id;
+      break;
+   case SYSTEM_VALUE_INSTANCE_ID:
+      op = nir_intrinsic_load_instance_id;
+      break;
+   case SYSTEM_VALUE_SAMPLE_ID:
+      op = nir_intrinsic_load_sample_id;
+      break;
+   case SYSTEM_VALUE_SAMPLE_POS:
+      op = nir_intrinsic_load_sample_pos;
+      break;
+   case SYSTEM_VALUE_SAMPLE_MASK_IN:
+      op = nir_intrinsic_load_sample_mask_in;
+      break;
+   case SYSTEM_VALUE_INVOCATION_ID:
+      op = nir_intrinsic_load_invocation_id;
+      break;
+   default:
+      unreachable("not reached");
+   }
+
+   nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
+
+   if (instr->dest.is_ssa) {
+      nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
+                        instr->dest.ssa.num_components, NULL);
+      nir_ssa_def_rewrite_uses(&instr->dest.ssa,
+                               nir_src_for_ssa(&new_instr->dest.ssa),
+                               mem_ctx);
+   } else {
+      nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx);
+   }
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+   nir_instr_remove(&instr->instr);
+}
+
+static bool
+convert_block(nir_block *block, void *state)
+{
+   (void) state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_intrinsic)
+         convert_instr(nir_instr_as_intrinsic(instr));
+   }
+
+   return true;
+}
+
+static void
+convert_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, convert_block, NULL);
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+}
+
+void
+nir_lower_system_values(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         convert_impl(overload->impl);
+   }
+
+   exec_list_make_empty(&shader->system_values);
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_to_source_mods.c b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c
new file mode 100644
index 000000000..d6bf77f17
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_to_source_mods.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This pass lowers the neg, abs, and sat operations to source modifiers on
+ * ALU operations to make things nicer for the backend.  It's just much
+ * easier to not have them when we're doing optimizations.
+ */
+
+static bool
+nir_lower_to_source_mods_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+      for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+         if (!alu->src[i].src.is_ssa)
+            continue;
+
+         if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu)
+            continue;
+
+         nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
+
+         if (parent->dest.saturate)
+            continue;
+
+         switch (nir_op_infos[alu->op].input_types[i]) {
+         case nir_type_float:
+            if (parent->op != nir_op_fmov)
+               continue;
+            break;
+         case nir_type_int:
+            if (parent->op != nir_op_imov)
+               continue;
+            break;
+         default:
+            continue;
+         }
+
+         nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src);
+         if (alu->src[i].abs) {
+            /* abs trumps both neg and abs, do nothing */
+         } else {
+            alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate);
+            alu->src[i].abs |= parent->src[0].abs;
+         }
+
+         for (int j = 0; j < 4; ++j) {
+            if (!nir_alu_instr_channel_used(alu, i, j))
+               continue;
+            alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]];
+         }
+
+         if (parent->dest.dest.ssa.uses->entries == 0 &&
+             parent->dest.dest.ssa.if_uses->entries == 0)
+            nir_instr_remove(&parent->instr);
+      }
+
+      switch (alu->op) {
+      case nir_op_fsat:
+         alu->op = nir_op_fmov;
+         alu->dest.saturate = true;
+         break;
+      case nir_op_ineg:
+         alu->op = nir_op_imov;
+         alu->src[0].negate = !alu->src[0].negate;
+         break;
+      case nir_op_fneg:
+         alu->op = nir_op_fmov;
+         alu->src[0].negate = !alu->src[0].negate;
+         break;
+      case nir_op_iabs:
+         alu->op = nir_op_imov;
+         alu->src[0].abs = true;
+         alu->src[0].negate = false;
+         break;
+      case nir_op_fabs:
+         alu->op = nir_op_fmov;
+         alu->src[0].abs = true;
+         alu->src[0].negate = false;
+         break;
+      default:
+         break;
+      }
+
+      /* We've covered sources.  Now we're going to try and saturate the
+       * destination if we can.
+       */
+
+      if (!alu->dest.dest.is_ssa)
+         continue;
+
+      /* We can only saturate float destinations */
+      if (nir_op_infos[alu->op].output_type != nir_type_float)
+         continue;
+
+      if (alu->dest.dest.ssa.if_uses->entries != 0)
+         continue;
+
+      bool all_children_are_sat = true;
+      struct set_entry *entry;
+      set_foreach(alu->dest.dest.ssa.uses, entry) {
+         const nir_instr *child = entry->key;
+         if (child->type != nir_instr_type_alu) {
+            all_children_are_sat = false;
+            continue;
+         }
+
+         nir_alu_instr *child_alu = nir_instr_as_alu(child);
+         if (child_alu->src[0].negate || child_alu->src[0].abs) {
+            all_children_are_sat = false;
+            continue;
+         }
+
+         if (child_alu->op != nir_op_fsat &&
+             !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) {
+            all_children_are_sat = false;
+            continue;
+         }
+      }
+
+      if (!all_children_are_sat)
+         continue;
+
+      alu->dest.saturate = true;
+
+      set_foreach(alu->dest.dest.ssa.uses, entry) {
+         nir_alu_instr *child_alu = nir_instr_as_alu((nir_instr *)entry->key);
+         child_alu->op = nir_op_fmov;
+         child_alu->dest.saturate = false;
+         /* We could propagate the dest of our instruction to the
+          * destinations of the uses here.  However, one quick round of
+          * copy propagation will clean that all up and then we don't have
+          * the complexity.
+          */
+      }
+   }
+
+   return true;
+}
+
+static void
+nir_lower_to_source_mods_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL);
+}
+
+void
+nir_lower_to_source_mods(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_to_source_mods_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_var_copies.c b/mesalib/src/glsl/nir/nir_lower_var_copies.c
new file mode 100644
index 000000000..85ebb281c
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_var_copies.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+#include "nir_types.h"
+
+/*
+ * Lowers all copy intrinsics to sequences of load/store intrinsics.
+ */
+
+/* Walks down the deref chain and returns the next deref in the chain whose
+ * child is a wildcard.  In other words, given the chain  a[1].foo[*].bar,
+ * this function will return the deref to foo.  Calling it a second time
+ * with the [*].bar, it will return NULL.
+ */
+static nir_deref *
+deref_next_wildcard_parent(nir_deref *deref)
+{
+   for (nir_deref *tail = deref; tail->child; tail = tail->child) {
+      if (tail->child->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(tail->child);
+
+      if (arr->deref_array_type == nir_deref_array_type_wildcard)
+         return tail;
+   }
+
+   return NULL;
+}
+
+/* Returns the last deref in the chain.
+ */
+static nir_deref *
+get_deref_tail(nir_deref *deref)
+{
+   while (deref->child)
+      deref = deref->child;
+
+   return deref;
+}
+
+static int
+type_get_length(const struct glsl_type *type)
+{
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_ARRAY:
+      return glsl_get_length(type);
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(type))
+         return glsl_get_matrix_columns(type);
+      else
+         return glsl_get_vector_elements(type);
+   default:
+      unreachable("Invalid deref base type");
+   }
+}
+
+/* This function recursively walks the given deref chain and replaces the
+ * given copy instruction with an equivalent sequence load/store
+ * operations.
+ *
+ * @copy_instr    The copy instruction to replace; new instructions will be
+ *                inserted before this one
+ *
+ * @dest_head     The head of the destination variable deref chain
+ *
+ * @src_head      The head of the source variable deref chain
+ *
+ * @dest_tail     The current tail of the destination variable deref chain;
+ *                this is used for recursion and external callers of this
+ *                function should call it with tail == head
+ *
+ * @src_tail      The current tail of the source variable deref chain;
+ *                this is used for recursion and external callers of this
+ *                function should call it with tail == head
+ *
+ * @state         The current variable lowering state
+ */
+static void
+emit_copy_load_store(nir_intrinsic_instr *copy_instr,
+                     nir_deref_var *dest_head, nir_deref_var *src_head,
+                     nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx)
+{
+   /* Find the next pair of wildcards */
+   nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail);
+   nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail);
+
+   if (src_arr_parent || dest_arr_parent) {
+      /* Wildcards had better come in matched pairs */
+      assert(dest_arr_parent && dest_arr_parent);
+
+      nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child);
+      nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child);
+
+      unsigned length = type_get_length(src_arr_parent->type);
+      /* The wildcards should represent the same number of elements */
+      assert(length == type_get_length(dest_arr_parent->type));
+      assert(length > 0);
+
+      /* Walk over all of the elements that this wildcard refers to and
+       * call emit_copy_load_store on each one of them */
+      src_arr->deref_array_type = nir_deref_array_type_direct;
+      dest_arr->deref_array_type = nir_deref_array_type_direct;
+      for (unsigned i = 0; i < length; i++) {
+         src_arr->base_offset = i;
+         dest_arr->base_offset = i;
+         emit_copy_load_store(copy_instr, dest_head, src_head,
+                              &dest_arr->deref, &src_arr->deref, mem_ctx);
+      }
+      src_arr->deref_array_type = nir_deref_array_type_wildcard;
+      dest_arr->deref_array_type = nir_deref_array_type_wildcard;
+   } else {
+      /* In this case, we have no wildcards anymore, so all we have to do
+       * is just emit the load and store operations. */
+      src_tail = get_deref_tail(src_tail);
+      dest_tail = get_deref_tail(dest_tail);
+
+      assert(src_tail->type == dest_tail->type);
+
+      unsigned num_components = glsl_get_vector_elements(src_tail->type);
+
+      nir_deref *src_deref = nir_copy_deref(mem_ctx, &src_head->deref);
+      nir_deref *dest_deref = nir_copy_deref(mem_ctx, &dest_head->deref);
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
+      load->num_components = num_components;
+      load->variables[0] = nir_deref_as_var(src_deref);
+      nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+
+      nir_instr_insert_before(&copy_instr->instr, &load->instr);
+
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var);
+      store->num_components = num_components;
+      store->variables[0] = nir_deref_as_var(dest_deref);
+      store->src[0].is_ssa = true;
+      store->src[0].ssa = &load->dest.ssa;
+
+      nir_instr_insert_before(&copy_instr->instr, &store->instr);
+   }
+}
+
+/* Lowers a copy instruction to a sequence of load/store instructions
+ *
+ * The new instructions are placed before the copy instruction in the IR.
+ */
+void
+nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx)
+{
+   assert(copy->intrinsic == nir_intrinsic_copy_var);
+   emit_copy_load_store(copy, copy->variables[0], copy->variables[1],
+                        &copy->variables[0]->deref,
+                        &copy->variables[1]->deref, mem_ctx);
+}
+
+static bool
+lower_var_copies_block(nir_block *block, void *mem_ctx)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
+      if (copy->intrinsic != nir_intrinsic_copy_var)
+         continue;
+
+      nir_lower_var_copy_instr(copy, mem_ctx);
+
+      nir_instr_remove(&copy->instr);
+      ralloc_free(copy);
+   }
+
+   return true;
+}
+
+static void
+lower_var_copies_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl));
+}
+
+/* Lowers every copy_var instruction in the program to a sequence of
+ * load/store instructions.
+ */
+void
+nir_lower_var_copies(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         lower_var_copies_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
new file mode 100644
index 000000000..8af753029
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -0,0 +1,1102 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+struct deref_node {
+   struct deref_node *parent;
+   const struct glsl_type *type;
+
+   bool lower_to_ssa;
+
+   struct set *loads;
+   struct set *stores;
+   struct set *copies;
+
+   nir_ssa_def **def_stack;
+   nir_ssa_def **def_stack_tail;
+
+   struct deref_node *wildcard;
+   struct deref_node *indirect;
+   struct deref_node *children[0];
+};
+
+struct lower_variables_state {
+   void *mem_ctx;
+   void *dead_ctx;
+   nir_function_impl *impl;
+
+   /* A hash table mapping variables to deref_node data */
+   struct hash_table *deref_var_nodes;
+
+   /* A hash table mapping fully-qualified direct dereferences, i.e.
+    * dereferences with no indirect or wildcard array dereferences, to
+    * deref_node data.
+    *
+    * At the moment, we only lower loads, stores, and copies that can be
+    * trivially lowered to loads and stores, i.e. copies with no indirects
+    * and no wildcards.  If a part of a variable that is being loaded from
+    * and/or stored into is also involved in a copy operation with
+    * wildcards, then we lower that copy operation to loads and stores, but
+    * otherwise we leave copies with wildcards alone. Since the only derefs
+    * used in these loads, stores, and trivial copies are ones with no
+    * wildcards and no indirects, these are precisely the derefs that we
+    * can actually consider lowering.
+    */
+   struct hash_table *direct_deref_nodes;
+
+   /* Controls whether get_deref_node will add variables to the
+    * direct_deref_nodes table.  This is turned on when we are initially
+    * scanning for load/store instructions.  It is then turned off so we
+    * don't accidentally change the direct_deref_nodes table while we're
+    * iterating throug it.
+    */
+   bool add_to_direct_deref_nodes;
+
+   /* A hash table mapping phi nodes to deref_state data */
+   struct hash_table *phi_table;
+};
+
+/* The following two functions implement a hash and equality check for
+ * variable dreferences.  When the hash or equality function encounters an
+ * array, all indirects are treated as equal and are never equal to a
+ * direct dereference or a wildcard.
+ */
+static uint32_t
+hash_deref(const void *void_deref)
+{
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
+
+   const nir_deref_var *deref_var = void_deref;
+   hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
+
+   for (const nir_deref *deref = deref_var->deref.child;
+        deref; deref = deref->child) {
+      switch (deref->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *deref_array = nir_deref_as_array(deref);
+
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_array->deref_array_type);
+
+         if (deref_array->deref_array_type == nir_deref_array_type_direct)
+            hash = _mesa_fnv32_1a_accumulate(hash, deref_array->base_offset);
+         break;
+      }
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
+         hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
+         break;
+      }
+      default:
+         assert("Invalid deref chain");
+      }
+   }
+
+   return hash;
+}
+
+static bool
+derefs_equal(const void *void_a, const void *void_b)
+{
+   const nir_deref_var *a_var = void_a;
+   const nir_deref_var *b_var = void_b;
+
+   if (a_var->var != b_var->var)
+      return false;
+
+   for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
+        a != NULL; a = a->child, b = b->child) {
+      if (a->deref_type != b->deref_type)
+         return false;
+
+      switch (a->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *a_arr = nir_deref_as_array(a);
+         nir_deref_array *b_arr = nir_deref_as_array(b);
+
+         if (a_arr->deref_array_type != b_arr->deref_array_type)
+            return false;
+
+         if (a_arr->deref_array_type == nir_deref_array_type_direct &&
+             a_arr->base_offset != b_arr->base_offset)
+            return false;
+         break;
+      }
+      case nir_deref_type_struct:
+         if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
+            return false;
+         break;
+      default:
+         assert("Invalid deref chain");
+         return false;
+      }
+
+      assert((a->child == NULL) == (b->child == NULL));
+      if((a->child == NULL) != (b->child == NULL))
+         return false;
+   }
+
+   return true;
+}
+
+static int
+type_get_length(const struct glsl_type *type)
+{
+   switch (glsl_get_base_type(type)) {
+   case GLSL_TYPE_STRUCT:
+   case GLSL_TYPE_ARRAY:
+      return glsl_get_length(type);
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(type))
+         return glsl_get_matrix_columns(type);
+      else
+         return glsl_get_vector_elements(type);
+   default:
+      unreachable("Invalid deref base type");
+   }
+}
+
+static struct deref_node *
+deref_node_create(struct deref_node *parent,
+                  const struct glsl_type *type, void *mem_ctx)
+{
+   size_t size = sizeof(struct deref_node) +
+                 type_get_length(type) * sizeof(struct deref_node *);
+
+   struct deref_node *node = rzalloc_size(mem_ctx, size);
+   node->type = type;
+   node->parent = parent;
+
+   return node;
+}
+
+/* Returns the deref node associated with the given variable.  This will be
+ * the root of the tree representing all of the derefs of the given variable.
+ */
+static struct deref_node *
+get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state)
+{
+   struct deref_node *node;
+
+   struct hash_entry *var_entry =
+      _mesa_hash_table_search(state->deref_var_nodes, var);
+
+   if (var_entry) {
+      return var_entry->data;
+   } else {
+      node = deref_node_create(NULL, var->type, state->dead_ctx);
+      _mesa_hash_table_insert(state->deref_var_nodes, var, node);
+      return node;
+   }
+}
+
+/* Gets the deref_node for the given deref chain and creates it if it
+ * doesn't yet exist.  If the deref is fully-qualified and direct and
+ * state->add_to_direct_deref_nodes is true, it will be added to the hash
+ * table of of fully-qualified direct derefs.
+ */
+static struct deref_node *
+get_deref_node(nir_deref_var *deref, struct lower_variables_state *state)
+{
+   bool is_direct = true;
+
+   /* Start at the base of the chain. */
+   struct deref_node *node = get_deref_node_for_var(deref->var, state);
+   assert(deref->deref.type == node->type);
+
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      switch (tail->deref_type) {
+      case nir_deref_type_struct: {
+         nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+
+         assert(deref_struct->index < type_get_length(node->type));
+
+         if (node->children[deref_struct->index] == NULL)
+            node->children[deref_struct->index] =
+               deref_node_create(node, tail->type, state->dead_ctx);
+
+         node = node->children[deref_struct->index];
+         break;
+      }
+
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(tail);
+
+         switch (arr->deref_array_type) {
+         case nir_deref_array_type_direct:
+            /* This is possible if a loop unrolls and generates an
+             * out-of-bounds offset.  We need to handle this at least
+             * somewhat gracefully.
+             */
+            if (arr->base_offset >= type_get_length(node->type))
+               return NULL;
+
+            if (node->children[arr->base_offset] == NULL)
+               node->children[arr->base_offset] =
+                  deref_node_create(node, tail->type, state->dead_ctx);
+
+            node = node->children[arr->base_offset];
+            break;
+
+         case nir_deref_array_type_indirect:
+            if (node->indirect == NULL)
+               node->indirect = deref_node_create(node, tail->type,
+                                                  state->dead_ctx);
+
+            node = node->indirect;
+            is_direct = false;
+            break;
+
+         case nir_deref_array_type_wildcard:
+            if (node->wildcard == NULL)
+               node->wildcard = deref_node_create(node, tail->type,
+                                                  state->dead_ctx);
+
+            node = node->wildcard;
+            is_direct = false;
+            break;
+
+         default:
+            unreachable("Invalid array deref type");
+         }
+         break;
+      }
+      default:
+         unreachable("Invalid deref type");
+      }
+   }
+
+   assert(node);
+
+   if (is_direct && state->add_to_direct_deref_nodes)
+      _mesa_hash_table_insert(state->direct_deref_nodes, deref, node);
+
+   return node;
+}
+
+/* \sa foreach_deref_node_match */
+static bool
+foreach_deref_node_worker(struct deref_node *node, nir_deref *deref,
+                          bool (* cb)(struct deref_node *node,
+                                      struct lower_variables_state *state),
+                          struct lower_variables_state *state)
+{
+   if (deref->child == NULL) {
+      return cb(node, state);
+   } else {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(deref->child);
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+         if (node->children[arr->base_offset] &&
+             !foreach_deref_node_worker(node->children[arr->base_offset],
+                                        deref->child, cb, state))
+            return false;
+
+         if (node->wildcard &&
+             !foreach_deref_node_worker(node->wildcard,
+                                        deref->child, cb, state))
+            return false;
+
+         return true;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *str = nir_deref_as_struct(deref->child);
+         return foreach_deref_node_worker(node->children[str->index],
+                                          deref->child, cb, state);
+      }
+
+      default:
+         unreachable("Invalid deref child type");
+      }
+   }
+}
+
+/* Walks over every "matching" deref_node and calls the callback.  A node
+ * is considered to "match" if either refers to that deref or matches up t
+ * a wildcard.  In other words, the following would match a[6].foo[3].bar:
+ *
+ * a[6].foo[3].bar
+ * a[*].foo[3].bar
+ * a[6].foo[*].bar
+ * a[*].foo[*].bar
+ *
+ * The given deref must be a full-length and fully qualified (no wildcards
+ * or indirects) deref chain.
+ */
+static bool
+foreach_deref_node_match(nir_deref_var *deref,
+                         bool (* cb)(struct deref_node *node,
+                                     struct lower_variables_state *state),
+                         struct lower_variables_state *state)
+{
+   nir_deref_var var_deref = *deref;
+   var_deref.deref.child = NULL;
+   struct deref_node *node = get_deref_node(&var_deref, state);
+
+   if (node == NULL)
+      return false;
+
+   return foreach_deref_node_worker(node, &deref->deref, cb, state);
+}
+
+/* \sa deref_may_be_aliased */
+static bool
+deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref,
+                          struct lower_variables_state *state)
+{
+   if (deref->child == NULL) {
+      return false;
+   } else {
+      switch (deref->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(deref->child);
+         if (arr->deref_array_type == nir_deref_array_type_indirect)
+            return true;
+
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+
+         if (node->children[arr->base_offset] &&
+             deref_may_be_aliased_node(node->children[arr->base_offset],
+                                       deref->child, state))
+            return true;
+
+         if (node->wildcard &&
+             deref_may_be_aliased_node(node->wildcard, deref->child, state))
+            return true;
+
+         return false;
+      }
+
+      case nir_deref_type_struct: {
+         nir_deref_struct *str = nir_deref_as_struct(deref->child);
+         if (node->children[str->index]) {
+             return deref_may_be_aliased_node(node->children[str->index],
+                                              deref->child, state);
+         } else {
+            return false;
+         }
+      }
+
+      default:
+         unreachable("Invalid nir_deref child type");
+      }
+   }
+}
+
+/* Returns true if there are no indirects that can ever touch this deref.
+ *
+ * For example, if the given deref is a[6].foo, then any uses of a[i].foo
+ * would cause this to return false, but a[i].bar would not affect it
+ * because it's a different structure member.  A var_copy involving of
+ * a[*].bar also doesn't affect it because that can be lowered to entirely
+ * direct load/stores.
+ *
+ * We only support asking this question about fully-qualified derefs.
+ * Obviously, it's pointless to ask this about indirects, but we also
+ * rule-out wildcards.  Handling Wildcard dereferences would involve
+ * checking each array index to make sure that there aren't any indirect
+ * references.
+ */
+static bool
+deref_may_be_aliased(nir_deref_var *deref,
+                     struct lower_variables_state *state)
+{
+   return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state),
+                                    &deref->deref, state);
+}
+
+static void
+register_load_instr(nir_intrinsic_instr *load_instr,
+                    struct lower_variables_state *state)
+{
+   struct deref_node *node = get_deref_node(load_instr->variables[0], state);
+   if (node == NULL)
+      return;
+
+   if (node->loads == NULL)
+      node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+
+   _mesa_set_add(node->loads, load_instr);
+}
+
+static void
+register_store_instr(nir_intrinsic_instr *store_instr,
+                     struct lower_variables_state *state)
+{
+   struct deref_node *node = get_deref_node(store_instr->variables[0], state);
+   if (node == NULL)
+      return;
+
+   if (node->stores == NULL)
+      node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                     _mesa_key_pointer_equal);
+
+   _mesa_set_add(node->stores, store_instr);
+}
+
+static void
+register_copy_instr(nir_intrinsic_instr *copy_instr,
+                    struct lower_variables_state *state)
+{
+   for (unsigned idx = 0; idx < 2; idx++) {
+      struct deref_node *node =
+         get_deref_node(copy_instr->variables[idx], state);
+
+      if (node == NULL)
+         continue;
+
+      if (node->copies == NULL)
+         node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+
+      _mesa_set_add(node->copies, copy_instr);
+   }
+}
+
+/* Registers all variable uses in the given block. */
+static bool
+register_variable_uses_block(nir_block *block, void *void_state)
+{
+   struct lower_variables_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_var:
+         register_load_instr(intrin, state);
+         break;
+
+      case nir_intrinsic_store_var:
+         register_store_instr(intrin, state);
+         break;
+
+      case nir_intrinsic_copy_var:
+         register_copy_instr(intrin, state);
+         break;
+
+      default:
+         continue;
+      }
+   }
+
+   return true;
+}
+
+/* Walks over all of the copy instructions to or from the given deref_node
+ * and lowers them to load/store intrinsics.
+ */
+static bool
+lower_copies_to_load_store(struct deref_node *node,
+                           struct lower_variables_state *state)
+{
+   if (!node->copies)
+      return true;
+
+   struct set_entry *copy_entry;
+   set_foreach(node->copies, copy_entry) {
+      nir_intrinsic_instr *copy = (void *)copy_entry->key;
+
+      nir_lower_var_copy_instr(copy, state->mem_ctx);
+
+      for (unsigned i = 0; i < 2; ++i) {
+         struct deref_node *arg_node =
+            get_deref_node(copy->variables[i], state);
+
+         if (arg_node == NULL)
+            continue;
+
+         struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
+         assert(arg_entry);
+         _mesa_set_remove(node->copies, arg_entry);
+      }
+
+      nir_instr_remove(&copy->instr);
+   }
+
+   return true;
+}
+
+/* Returns a load_const instruction that represents the constant
+ * initializer for the given deref chain.  The caller is responsible for
+ * ensuring that there actually is a constant initializer.
+ */
+static nir_load_const_instr *
+get_const_initializer_load(const nir_deref_var *deref,
+                           struct lower_variables_state *state)
+{
+   nir_constant *constant = deref->var->constant_initializer;
+   const nir_deref *tail = &deref->deref;
+   unsigned matrix_offset = 0;
+   while (tail->child) {
+      switch (tail->child->deref_type) {
+      case nir_deref_type_array: {
+         nir_deref_array *arr = nir_deref_as_array(tail->child);
+         assert(arr->deref_array_type == nir_deref_array_type_direct);
+         if (glsl_type_is_matrix(tail->type)) {
+            assert(arr->deref.child == NULL);
+            matrix_offset = arr->base_offset;
+         } else {
+            constant = constant->elements[arr->base_offset];
+         }
+         break;
+      }
+
+      case nir_deref_type_struct: {
+         constant = constant->elements[nir_deref_as_struct(tail->child)->index];
+         break;
+      }
+
+      default:
+         unreachable("Invalid deref child type");
+      }
+
+      tail = tail->child;
+   }
+
+   nir_load_const_instr *load =
+      nir_load_const_instr_create(state->mem_ctx,
+                                  glsl_get_vector_elements(tail->type));
+
+   matrix_offset *= load->def.num_components;
+   for (unsigned i = 0; i < load->def.num_components; i++) {
+      switch (glsl_get_base_type(tail->type)) {
+      case GLSL_TYPE_FLOAT:
+      case GLSL_TYPE_INT:
+      case GLSL_TYPE_UINT:
+         load->value.u[i] = constant->value.u[matrix_offset + i];
+         break;
+      case GLSL_TYPE_BOOL:
+         load->value.u[i] = constant->value.b[matrix_offset + i] ?
+                             NIR_TRUE : NIR_FALSE;
+         break;
+      default:
+         unreachable("Invalid immediate type");
+      }
+   }
+
+   return load;
+}
+
+/** Pushes an SSA def onto the def stack for the given node
+ *
+ * Each node is potentially associated with a stack of SSA definitions.
+ * This stack is used for determining what SSA definition reaches a given
+ * point in the program for variable renaming.  The stack is always kept in
+ * dominance-order with at most one SSA def per block.  If the SSA
+ * definition on the top of the stack is in the same block as the one being
+ * pushed, the top element is replaced.
+ */
+static void
+def_stack_push(struct deref_node *node, nir_ssa_def *def,
+               struct lower_variables_state *state)
+{
+   if (node->def_stack == NULL) {
+      node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *,
+                                     state->impl->num_blocks);
+      node->def_stack_tail = node->def_stack - 1;
+   }
+
+   if (node->def_stack_tail >= node->def_stack) {
+      nir_ssa_def *top_def = *node->def_stack_tail;
+
+      if (def->parent_instr->block == top_def->parent_instr->block) {
+         /* They're in the same block, just replace the top */
+         *node->def_stack_tail = def;
+         return;
+      }
+   }
+
+   *(++node->def_stack_tail) = def;
+}
+
+/* Pop the top of the def stack if it's in the given block */
+static void
+def_stack_pop_if_in_block(struct deref_node *node, nir_block *block)
+{
+   /* If we're popping, then we have presumably pushed at some time in the
+    * past so this should exist.
+    */
+   assert(node->def_stack != NULL);
+
+   /* The stack is already empty.  Do nothing. */
+   if (node->def_stack_tail < node->def_stack)
+      return;
+
+   nir_ssa_def *def = *node->def_stack_tail;
+   if (def->parent_instr->block == block)
+      node->def_stack_tail--;
+}
+
+/** Retrieves the SSA definition on the top of the stack for the given
+ * node, if one exists.  If the stack is empty, then we return the constant
+ * initializer (if it exists) or an SSA undef.
+ */
+static nir_ssa_def *
+get_ssa_def_for_block(struct deref_node *node, nir_block *block,
+                      struct lower_variables_state *state)
+{
+   /* If we have something on the stack, go ahead and return it.  We're
+    * assuming that the top of the stack dominates the given block.
+    */
+   if (node->def_stack && node->def_stack_tail >= node->def_stack)
+      return *node->def_stack_tail;
+
+   /* If we got here then we don't have a definition that dominates the
+    * given block.  This means that we need to add an undef and use that.
+    */
+   nir_ssa_undef_instr *undef =
+      nir_ssa_undef_instr_create(state->mem_ctx,
+                                 glsl_get_vector_elements(node->type));
+   nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr);
+   def_stack_push(node, &undef->def, state);
+   return &undef->def;
+}
+
+/* Given a block and one of its predecessors, this function fills in the
+ * souces of the phi nodes to take SSA defs from the given predecessor.
+ * This function must be called exactly once per block/predecessor pair.
+ */
+static void
+add_phi_sources(nir_block *block, nir_block *pred,
+                struct lower_variables_state *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+      if (!entry)
+         continue;
+
+      struct deref_node *node = entry->data;
+
+      nir_phi_src *src = ralloc(state->mem_ctx, nir_phi_src);
+      src->pred = pred;
+      src->src.is_ssa = true;
+      src->src.ssa = get_ssa_def_for_block(node, pred, state);
+
+      _mesa_set_add(src->src.ssa->uses, instr);
+
+      exec_list_push_tail(&phi->srcs, &src->node);
+   }
+}
+
+/* Performs variable renaming by doing a DFS of the dominance tree
+ *
+ * This algorithm is very similar to the one outlined in "Efficiently
+ * Computing Static Single Assignment Form and the Control Dependence
+ * Graph" by Cytron et. al.  The primary difference is that we only put one
+ * SSA def on the stack per block.
+ */
+static bool
+rename_variables_block(nir_block *block, struct lower_variables_state *state)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+
+         /* This can happen if we already have phi nodes in the program
+          * that were not created in this pass.
+          */
+         if (!entry)
+            continue;
+
+         struct deref_node *node = entry->data;
+
+         def_stack_push(node, &phi->dest.ssa, state);
+      } else if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* If we hit this path then we are referencing an invalid
+                * value.  Most likely, we unrolled something and are
+                * reading past the end of some array.  In any case, this
+                * should result in an undefined value.
+                */
+               nir_ssa_undef_instr *undef =
+                  nir_ssa_undef_instr_create(state->mem_ctx,
+                                             intrin->num_components);
+
+               nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_remove(&intrin->instr);
+
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(&undef->def),
+                                        state->mem_ctx);
+               continue;
+            }
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            mov->src[0].src.is_ssa = true;
+            mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state);
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
+
+            assert(intrin->dest.is_ssa);
+
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
+            nir_instr_remove(&intrin->instr);
+
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                     nir_src_for_ssa(&mov->dest.dest.ssa),
+                                     state->mem_ctx);
+            break;
+         }
+
+         case nir_intrinsic_store_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);
+
+            if (node == NULL) {
+               /* Probably an out-of-bounds array store.  That should be a
+                * no-op. */
+               nir_instr_remove(&intrin->instr);
+               continue;
+            }
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
+
+            assert(intrin->src[0].is_ssa);
+
+            nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
+                                                      nir_op_imov);
+            mov->src[0].src.is_ssa = true;
+            mov->src[0].src.ssa = intrin->src[0].ssa;
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
+
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components, NULL);
+
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
+
+            def_stack_push(node, &mov->dest.dest.ssa, state);
+
+            /* We'll wait to remove the instruction until the next pass
+             * where we pop the node we just pushed back off the stack.
+             */
+            break;
+         }
+
+         default:
+            break;
+         }
+      }
+   }
+
+   if (block->successors[0])
+      add_phi_sources(block->successors[0], block, state);
+   if (block->successors[1])
+      add_phi_sources(block->successors[1], block, state);
+
+   for (unsigned i = 0; i < block->num_dom_children; ++i)
+      rename_variables_block(block->dom_children[i], state);
+
+   /* Now we iterate over the instructions and pop off any SSA defs that we
+    * pushed in the first loop.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+         struct hash_entry *entry =
+            _mesa_hash_table_search(state->phi_table, phi);
+
+         /* This can happen if we already have phi nodes in the program
+          * that were not created in this pass.
+          */
+         if (!entry)
+            continue;
+
+         struct deref_node *node = entry->data;
+
+         def_stack_pop_if_in_block(node, block);
+      } else if (instr->type == nir_instr_type_intrinsic) {
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+         if (intrin->intrinsic != nir_intrinsic_store_var)
+            continue;
+
+         struct deref_node *node = get_deref_node(intrin->variables[0], state);
+         if (!node)
+            continue;
+
+         if (!node->lower_to_ssa)
+            continue;
+
+         def_stack_pop_if_in_block(node, block);
+         nir_instr_remove(&intrin->instr);
+      }
+   }
+
+   return true;
+}
+
+/* Inserts phi nodes for all variables marked lower_to_ssa
+ *
+ * This is the same algorithm as presented in "Efficiently Computing Static
+ * Single Assignment Form and the Control Dependence Graph" by Cytron et.
+ * al.
+ */
+static void
+insert_phi_nodes(struct lower_variables_state *state)
+{
+   unsigned work[state->impl->num_blocks];
+   unsigned has_already[state->impl->num_blocks];
+
+   /*
+    * Since the work flags already prevent us from inserting a node that has
+    * ever been inserted into W, we don't need to use a set to represent W.
+    * Also, since no block can ever be inserted into W more than once, we know
+    * that the maximum size of W is the number of basic blocks in the
+    * function. So all we need to handle W is an array and a pointer to the
+    * next element to be inserted and the next element to be removed.
+    */
+   nir_block *W[state->impl->num_blocks];
+
+   memset(work, 0, sizeof work);
+   memset(has_already, 0, sizeof has_already);
+
+   unsigned w_start, w_end;
+   unsigned iter_count = 0;
+
+   struct hash_entry *deref_entry;
+   hash_table_foreach(state->direct_deref_nodes, deref_entry) {
+      struct deref_node *node = deref_entry->data;
+
+      if (node->stores == NULL)
+         continue;
+
+      if (!node->lower_to_ssa)
+         continue;
+
+      w_start = w_end = 0;
+      iter_count++;
+
+      struct set_entry *store_entry;
+      set_foreach(node->stores, store_entry) {
+         nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key;
+         if (work[store->instr.block->index] < iter_count)
+            W[w_end++] = store->instr.block;
+         work[store->instr.block->index] = iter_count;
+      }
+
+      while (w_start != w_end) {
+         nir_block *cur = W[w_start++];
+         struct set_entry *dom_entry;
+         set_foreach(cur->dom_frontier, dom_entry) {
+            nir_block *next = (nir_block *) dom_entry->key;
+
+            /*
+             * If there's more than one return statement, then the end block
+             * can be a join point for some definitions. However, there are
+             * no instructions in the end block, so nothing would use those
+             * phi nodes. Of course, we couldn't place those phi nodes
+             * anyways due to the restriction of having no instructions in the
+             * end block...
+             */
+            if (next == state->impl->end_block)
+               continue;
+
+            if (has_already[next->index] < iter_count) {
+               nir_phi_instr *phi = nir_phi_instr_create(state->mem_ctx);
+               nir_ssa_dest_init(&phi->instr, &phi->dest,
+                                 glsl_get_vector_elements(node->type), NULL);
+               nir_instr_insert_before_block(next, &phi->instr);
+
+               _mesa_hash_table_insert(state->phi_table, phi, node);
+
+               has_already[next->index] = iter_count;
+               if (work[next->index] < iter_count) {
+                  work[next->index] = iter_count;
+                  W[w_end++] = next;
+               }
+            }
+         }
+      }
+   }
+}
+
+
+/** Implements a pass to lower variable uses to SSA values
+ *
+ * This path walks the list of instructions and tries to lower as many
+ * local variable load/store operations to SSA defs and uses as it can.
+ * The process involves four passes:
+ *
+ *  1) Iterate over all of the instructions and mark where each local
+ *     variable deref is used in a load, store, or copy.  While we're at
+ *     it, we keep track of all of the fully-qualified (no wildcards) and
+ *     fully-direct references we see and store them in the
+ *     direct_deref_nodes hash table.
+ *
+ *  2) Walk over the the list of fully-qualified direct derefs generated in
+ *     the previous pass.  For each deref, we determine if it can ever be
+ *     aliased, i.e. if there is an indirect reference anywhere that may
+ *     refer to it.  If it cannot be aliased, we mark it for lowering to an
+ *     SSA value.  At this point, we lower any var_copy instructions that
+ *     use the given deref to load/store operations and, if the deref has a
+ *     constant initializer, we go ahead and add a load_const value at the
+ *     beginning of the function with the initialized value.
+ *
+ *  3) Walk over the list of derefs we plan to lower to SSA values and
+ *     insert phi nodes as needed.
+ *
+ *  4) Perform "variable renaming" by replacing the load/store instructions
+ *     with SSA definitions and SSA uses.
+ */
+static bool
+nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
+{
+   struct lower_variables_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(state.mem_ctx);
+   state.impl = impl;
+
+   state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx,
+                                                   _mesa_hash_pointer,
+                                                   _mesa_key_pointer_equal);
+   state.direct_deref_nodes = _mesa_hash_table_create(state.dead_ctx,
+                                                      hash_deref, derefs_equal);
+   state.phi_table = _mesa_hash_table_create(state.dead_ctx,
+                                             _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+
+   /* Build the initial deref structures and direct_deref_nodes table */
+   state.add_to_direct_deref_nodes = true;
+   nir_foreach_block(impl, register_variable_uses_block, &state);
+
+   struct set *outputs = _mesa_set_create(state.dead_ctx,
+                                          _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
+
+   bool progress = false;
+
+   nir_metadata_require(impl, nir_metadata_block_index);
+
+   /* We're about to iterate through direct_deref_nodes.  Don't modify it. */
+   state.add_to_direct_deref_nodes = false;
+
+   struct hash_entry *entry;
+   hash_table_foreach(state.direct_deref_nodes, entry) {
+      nir_deref_var *deref = (void *)entry->key;
+      struct deref_node *node = entry->data;
+
+      if (deref->var->data.mode != nir_var_local) {
+         _mesa_hash_table_remove(state.direct_deref_nodes, entry);
+         continue;
+      }
+
+      if (deref_may_be_aliased(deref, &state)) {
+         _mesa_hash_table_remove(state.direct_deref_nodes, entry);
+         continue;
+      }
+
+      node->lower_to_ssa = true;
+      progress = true;
+
+      if (deref->var->constant_initializer) {
+         nir_load_const_instr *load = get_const_initializer_load(deref, &state);
+         nir_ssa_def_init(&load->instr, &load->def,
+                          glsl_get_vector_elements(node->type), NULL);
+         nir_instr_insert_before_cf_list(&impl->body, &load->instr);
+         def_stack_push(node, &load->def, &state);
+      }
+
+      if (deref->var->data.mode == nir_var_shader_out)
+         _mesa_set_add(outputs, node);
+
+      foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
+   }
+
+   if (!progress)
+      return false;
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   /* We may have lowered some copy instructions to load/store
+    * instructions.  The uses from the copy instructions hav already been
+    * removed but we need to rescan to ensure that the uses from the newly
+    * added load/store instructions are registered.  We need this
+    * information for phi node insertion below.
+    */
+   nir_foreach_block(impl, register_variable_uses_block, &state);
+
+   insert_phi_nodes(&state);
+   rename_variables_block(impl->start_block, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   ralloc_free(state.dead_ctx);
+
+   return progress;
+}
+
+void
+nir_lower_vars_to_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_vars_to_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c
new file mode 100644
index 000000000..602853ea6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a simple pass that lowers vecN instructions to a series of
+ * moves with partial writes.
+ */
+
+static bool
+src_matches_dest_reg(nir_dest *dest, nir_src *src)
+{
+   if (dest->is_ssa || src->is_ssa)
+      return false;
+
+   return (dest->reg.reg == src->reg.reg &&
+           dest->reg.base_offset == src->reg.base_offset &&
+           !dest->reg.indirect &&
+           !src->reg.indirect);
+}
+
+/**
+ * For a given starting writemask channel and corresponding source index in
+ * the vec instruction, insert a MOV to the vec instruction's dest of all the
+ * writemask channels that get read from the same src reg.
+ *
+ * Returns the writemask of our MOV, so the parent loop calling this knows
+ * which ones have been processed.
+ */
+static unsigned
+insert_mov(nir_alu_instr *vec, unsigned start_channel,
+            unsigned start_src_idx, void *mem_ctx)
+{
+   unsigned src_idx = start_src_idx;
+   assert(src_idx < nir_op_infos[vec->op].num_inputs);
+
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
+   nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+
+   mov->dest.write_mask = (1u << start_channel);
+   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
+   src_idx++;
+
+   for (unsigned i = start_channel + 1; i < 4; i++) {
+      if (!(vec->dest.write_mask & (1 << i)))
+         continue;
+
+      if (nir_srcs_equal(vec->src[src_idx].src, vec->src[start_src_idx].src)) {
+         mov->dest.write_mask |= (1 << i);
+         mov->src[0].swizzle[i] = vec->src[src_idx].swizzle[0];
+      }
+      src_idx++;
+   }
+
+   nir_instr_insert_before(&vec->instr, &mov->instr);
+
+   return mov->dest.write_mask;
+}
+
+static bool
+lower_vec_to_movs_block(nir_block *block, void *mem_ctx)
+{
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         continue;
+
+      nir_alu_instr *vec = (nir_alu_instr *)instr;
+
+      switch (vec->op) {
+      case nir_op_vec2:
+      case nir_op_vec3:
+      case nir_op_vec4:
+         break;
+      default:
+         continue; /* The loop */
+      }
+
+      /* Since we insert multiple MOVs, we have to be non-SSA. */
+      assert(!vec->dest.dest.is_ssa);
+
+      unsigned finished_write_mask = 0;
+
+      /* First, emit a MOV for all the src channels that are in the
+       * destination reg, in case other values we're populating in the dest
+       * might overwrite them.
+       */
+      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+         if (!(vec->dest.write_mask & (1 << i)))
+            continue;
+
+         if (src_matches_dest_reg(&vec->dest.dest, &vec->src[src_idx].src)) {
+            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+            break;
+         }
+         src_idx++;
+      }
+
+      /* Now, emit MOVs for all the other src channels. */
+      for (unsigned i = 0, src_idx = 0; i < 4; i++) {
+         if (!(vec->dest.write_mask & (1 << i)))
+            continue;
+
+         if (!(finished_write_mask & (1 << i)))
+            finished_write_mask |= insert_mov(vec, i, src_idx, mem_ctx);
+
+         src_idx++;
+      }
+
+      nir_instr_remove(&vec->instr);
+      ralloc_free(vec);
+   }
+
+   return true;
+}
+
+static void
+nir_lower_vec_to_movs_impl(nir_function_impl *impl)
+{
+   nir_foreach_block(impl, lower_vec_to_movs_block, ralloc_parent(impl));
+}
+
+void
+nir_lower_vec_to_movs(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_lower_vec_to_movs_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_metadata.c b/mesalib/src/glsl/nir/nir_metadata.c
new file mode 100644
index 000000000..a03e12456
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_metadata.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ */
+
+#include "nir.h"
+
+/*
+ * Handles management of the metadata.
+ */
+
+void
+nir_metadata_require(nir_function_impl *impl, nir_metadata required)
+{
+#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X))
+
+   if (NEEDS_UPDATE(nir_metadata_block_index))
+      nir_index_blocks(impl);
+   if (NEEDS_UPDATE(nir_metadata_dominance))
+      nir_calc_dominance_impl(impl);
+   if (NEEDS_UPDATE(nir_metadata_live_variables))
+      nir_live_variables_impl(impl);
+
+#undef NEEDS_UPDATE
+
+   impl->valid_metadata |= required;
+}
+
+void
+nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
+{
+   impl->valid_metadata &= preserved;
+}
diff --git a/mesalib/src/glsl/nir/nir_opcodes.py b/mesalib/src/glsl/nir/nir_opcodes.py
new file mode 100644
index 000000000..77f3bb826
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes.py
@@ -0,0 +1,591 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Connor Abbott (cwabbott0@gmail.com)
+
+
+# Class that represents all the information we have about the opcode
+# NOTE: this must be kept in sync with nir_op_info
+
+class Opcode(object):
+   """Class that represents all the information we have about the opcode
+   NOTE: this must be kept in sync with nir_op_info
+   """
+   def __init__(self, name, output_size, output_type, input_sizes,
+                input_types, algebraic_properties, const_expr):
+      """Parameters:
+
+      - name is the name of the opcode (prepend nir_op_ for the enum name)
+      - all types are strings that get nir_type_ prepended to them
+      - input_types is a list of types
+      - algebraic_properties is a space-seperated string, where nir_op_is_ is
+        prepended before each entry
+      - const_expr is an expression or series of statements that computes the
+        constant value of the opcode given the constant values of its inputs.
+
+      Constant expressions are formed from the variables src0, src1, ...,
+      src(N-1), where N is the number of arguments.  The output of the
+      expression should be stored in the dst variable.  Per-component input
+      and output variables will be scalars and non-per-component input and
+      output variables will be a struct with fields named x, y, z, and w
+      all of the correct type.  Input and output variables can be assumed
+      to already be of the correct type and need no conversion.  In
+      particular, the conversion from the C bool type to/from  NIR_TRUE and
+      NIR_FALSE happens automatically.
+
+      For per-component instructions, the entire expression will be
+      executed once for each component.  For non-per-component
+      instructions, the expression is expected to store the correct values
+      in dst.x, dst.y, etc.  If "dst" does not exist anywhere in the
+      constant expression, an assignment to dst will happen automatically
+      and the result will be equivalent to "dst = <expression>" for
+      per-component instructions and "dst.x = dst.y = ... = <expression>"
+      for non-per-component instructions.
+      """
+      assert isinstance(name, str)
+      assert isinstance(output_size, int)
+      assert isinstance(output_type, str)
+      assert isinstance(input_sizes, list)
+      assert isinstance(input_sizes[0], int)
+      assert isinstance(input_types, list)
+      assert isinstance(input_types[0], str)
+      assert isinstance(algebraic_properties, str)
+      assert isinstance(const_expr, str)
+      assert len(input_sizes) == len(input_types)
+      assert 0 <= output_size <= 4
+      for size in input_sizes:
+         assert 0 <= size <= 4
+         if output_size != 0:
+            assert size != 0
+      self.name = name
+      self.num_inputs = len(input_sizes)
+      self.output_size = output_size
+      self.output_type = output_type
+      self.input_sizes = input_sizes
+      self.input_types = input_types
+      self.algebraic_properties = algebraic_properties
+      self.const_expr = const_expr
+
+# helper variables for strings
+tfloat = "float"
+tint = "int"
+tbool = "bool"
+tunsigned = "unsigned"
+
+commutative = "commutative "
+associative = "associative "
+
+# global dictionary of opcodes
+opcodes = {}
+
+def opcode(name, output_size, output_type, input_sizes, input_types,
+           algebraic_properties, const_expr):
+   assert name not in opcodes
+   opcodes[name] = Opcode(name, output_size, output_type, input_sizes,
+                          input_types, algebraic_properties, const_expr)
+
+def unop_convert(name, in_type, out_type, const_expr):
+   opcode(name, 0, out_type, [0], [in_type], "", const_expr)
+
+def unop(name, ty, const_expr):
+   opcode(name, 0, ty, [0], [ty], "", const_expr)
+
+def unop_horiz(name, output_size, output_type, input_size, input_type,
+               const_expr):
+   opcode(name, output_size, output_type, [input_size], [input_type], "",
+          const_expr)
+
+def unop_reduce(name, output_size, output_type, input_type, prereduce_expr,
+                reduce_expr, final_expr):
+   def prereduce(src):
+      return "(" + prereduce_expr.format(src=src) + ")"
+   def final(src):
+      return final_expr.format(src="(" + src + ")")
+   def reduce_(src0, src1):
+      return reduce_expr.format(src0=src0, src1=src1)
+   src0 = prereduce("src0.x")
+   src1 = prereduce("src0.y")
+   src2 = prereduce("src0.z")
+   src3 = prereduce("src0.w")
+   unop_horiz(name + "2", output_size, output_type, 2, input_type,
+              final(reduce_(src0, src1)))
+   unop_horiz(name + "3", output_size, output_type, 3, input_type,
+              final(reduce_(reduce_(src0, src1), src2)))
+   unop_horiz(name + "4", output_size, output_type, 4, input_type,
+              final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+
+# These two move instructions differ in what modifiers they support and what
+# the negate modifier means. Otherwise, they are identical.
+unop("fmov", tfloat, "src0")
+unop("imov", tint, "src0")
+
+unop("ineg", tint, "-src0")
+unop("fneg", tfloat, "-src0")
+unop("inot", tint, "~src0") # invert every bit of the integer
+unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f")
+unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)")
+unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
+unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
+unop("fabs", tfloat, "fabsf(src0)")
+unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)")
+unop("frcp", tfloat, "1.0f / src0")
+unop("frsq", tfloat, "1.0f / sqrtf(src0)")
+unop("fsqrt", tfloat, "sqrtf(src0)")
+unop("fexp", tfloat, "expf(src0)") # < e^x
+unop("flog", tfloat, "logf(src0)") # log base e
+unop("fexp2", tfloat, "exp2f(src0)")
+unop("flog2", tfloat, "log2f(src0)")
+unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
+# Float-to-boolean conversion
+unop_convert("f2b", tfloat, tbool, "src0 == 0.0f")
+# Boolean-to-float conversion
+unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
+# Int-to-boolean conversion
+unop_convert("i2b", tint, tbool, "src0 == 0")
+unop_convert("b2i", tbool, tint, "src0 ? 0 : -1") # Boolean-to-int conversion
+unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+
+unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
+unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
+unop_reduce("fany", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} || {src1}",
+            "{src} ? 1.0f : 0.0f")
+unop_reduce("fall", 1, tfloat, tfloat, "{src} != 0.0f", "{src0} && {src1}",
+            "{src} ? 1.0f : 0.0f")
+
+# Unary floating-point rounding operations.
+
+
+unop("ftrunc", tfloat, "truncf(src0)")
+unop("fceil", tfloat, "ceilf(src0)")
+unop("ffloor", tfloat, "floorf(src0)")
+unop("ffract", tfloat, "src0 - floorf(src0)")
+unop("fround_even", tfloat, "_mesa_round_to_even(src0)")
+
+
+# Trigonometric operations.
+
+
+unop("fsin", tfloat, "sinf(src0)")
+unop("fcos", tfloat, "cosf(src0)")
+unop("fsin_reduced", tfloat, "sinf(src0)")
+unop("fcos_reduced", tfloat, "cosf(src0)")
+
+
+# Partial derivatives.
+
+
+unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0f")
+unop("fddx_fine", tfloat, "0.0f")
+unop("fddy_fine", tfloat, "0.0f")
+unop("fddx_coarse", tfloat, "0.0f")
+unop("fddy_coarse", tfloat, "0.0f")
+
+
+# Floating point pack and unpack operations.
+
+def pack_2x16(fmt):
+   unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x16(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
+""".replace("fmt", fmt))
+
+def pack_4x8(fmt):
+   unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+dst.x = (uint32_t) pack_fmt_1x8(src0.x);
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
+dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
+""".replace("fmt", fmt))
+
+def unpack_2x16(fmt):
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
+dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
+""".replace("fmt", fmt))
+
+def unpack_4x8(fmt):
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
+dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
+dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
+dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24));
+""".replace("fmt", fmt))
+
+
+pack_2x16("snorm")
+pack_4x8("snorm")
+pack_2x16("unorm")
+pack_4x8("unorm")
+pack_2x16("half")
+unpack_2x16("snorm")
+unpack_4x8("snorm")
+unpack_2x16("unorm")
+unpack_4x8("unorm")
+unpack_2x16("half")
+
+
+# Lowered floating point unpacking operations.
+
+
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+           "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+           "unpack_half_1x16((uint16_t)(src0.x >> 16))")
+
+
+# Bit operations, part of ARB_gpu_shader5.
+
+
+unop("bitfield_reverse", tunsigned, """
+/* we're not winning any awards for speed here, but that's ok */
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++)
+   dst |= ((src0 >> bit) & 1) << (31 - bit);
+""")
+unop("bit_count", tunsigned, """
+dst = 0;
+for (unsigned bit = 0; bit < 32; bit++) {
+   if ((src0 >> bit) & 1)
+      dst++;
+}
+""")
+
+unop_convert("ufind_msb", tunsigned, tint, """
+dst = -1;
+for (int bit = 31; bit > 0; bit--) {
+   if ((src0 >> bit) & 1) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+unop("ifind_msb", tint, """
+dst = -1;
+for (int bit = 31; bit >= 0; bit--) {
+   /* If src0 < 0, we're looking for the first 0 bit.
+    * if src0 >= 0, we're looking for the first 1 bit.
+    */
+   if ((((src0 >> bit) & 1) && (src0 >= 0)) ||
+      (!((src0 >> bit) & 1) && (src0 < 0))) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+unop("find_lsb", tint, """
+dst = -1;
+for (unsigned bit = 0; bit < 32; bit++) {
+   if ((src0 >> bit) & 1) {
+      dst = bit;
+      break;
+   }
+}
+""")
+
+
+for i in xrange(1, 5):
+   for j in xrange(1, 5):
+      unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f")
+
+def binop_convert(name, out_type, in_type, alg_props, const_expr):
+   opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr)
+
+def binop(name, ty, alg_props, const_expr):
+   binop_convert(name, ty, ty, alg_props, const_expr)
+
+def binop_compare(name, ty, alg_props, const_expr):
+   binop_convert(name, tbool, ty, alg_props, const_expr)
+
+def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size,
+                src2_type, const_expr):
+   opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type],
+          "", const_expr)
+
+def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
+                 reduce_expr, final_expr):
+   def final(src):
+      return final_expr.format(src= "(" + src + ")")
+   def reduce_(src0, src1):
+      return reduce_expr.format(src0=src0, src1=src1)
+   def prereduce(src0, src1):
+      return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")"
+   src0 = prereduce("src0.x", "src1.x")
+   src1 = prereduce("src0.y", "src1.y")
+   src2 = prereduce("src0.z", "src1.z")
+   src3 = prereduce("src0.w", "src1.w")
+   opcode(name + "2", output_size, output_type,
+          [2, 2], [src_type, src_type], commutative,
+          final(reduce_(src0, src1)))
+   opcode(name + "3", output_size, output_type,
+          [3, 3], [src_type, src_type], commutative,
+          final(reduce_(reduce_(src0, src1), src2)))
+   opcode(name + "4", output_size, output_type,
+          [4, 4], [src_type, src_type], commutative,
+          final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
+
+binop("fadd", tfloat, commutative + associative, "src0 + src1")
+binop("iadd", tint, commutative + associative, "src0 + src1")
+binop("fsub", tfloat, "", "src0 - src1")
+binop("isub", tint, "", "src0 - src1")
+
+binop("fmul", tfloat, commutative + associative, "src0 * src1")
+# low 32-bits of signed/unsigned integer multiply
+binop("imul", tint, commutative + associative, "src0 * src1")
+# high 32-bits of signed integer multiply
+binop("imul_high", tint, commutative,
+      "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
+# high 32-bits of unsigned integer multiply
+binop("umul_high", tunsigned, commutative,
+      "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
+
+binop("fdiv", tfloat, "", "src0 / src1")
+binop("idiv", tint, "", "src0 / src1")
+binop("udiv", tunsigned, "", "src0 / src1")
+
+# returns a boolean representing the carry resulting from the addition of
+# the two unsigned arguments.
+
+binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+
+# returns a boolean representing the borrow resulting from the subtraction
+# of the two unsigned arguments.
+
+binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+
+binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
+binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+
+#
+# Comparisons
+#
+
+
+# these integer-aware comparisons return a boolean (0 or ~0)
+
+binop_compare("flt", tfloat, "", "src0 < src1")
+binop_compare("fge", tfloat, "", "src0 >= src1")
+binop_compare("feq", tfloat, commutative, "src0 == src1")
+binop_compare("fne", tfloat, commutative, "src0 != src1")
+binop_compare("ilt", tint, "", "src0 < src1")
+binop_compare("ige", tint, "", "src0 >= src1")
+binop_compare("ieq", tint, commutative, "src0 == src1")
+binop_compare("ine", tint, commutative, "src0 != src1")
+binop_compare("ult", tunsigned, "", "src0 < src1")
+binop_compare("uge", tunsigned, "", "src0 >= src1")
+
+# integer-aware GLSL-style comparisons that compare floats and ints
+
+binop_reduce("ball_fequal",  1, tbool, tfloat, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+binop_reduce("ball_iequal",  1, tbool, tint, "{src0} == {src1}",
+             "{src0} && {src1}", "{src}")
+binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
+             "{src0} || {src1}", "{src}")
+
+# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
+
+binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
+             "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
+binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+             "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
+
+# These comparisons for integer-less hardware return 1.0 and 0.0 for true
+# and false respectively
+
+binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+
+
+binop("ishl", tint, "", "src0 << src1")
+binop("ishr", tint, "", "src0 >> src1")
+binop("ushr", tunsigned, "", "src0 >> src1")
+
+# bitwise logic operators
+#
+# These are also used as boolean and, or, xor for hardware supporting
+# integers.
+
+
+binop("iand", tunsigned, commutative + associative, "src0 & src1")
+binop("ior", tunsigned, commutative + associative, "src0 | src1")
+binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+
+
+# floating point logic operators
+#
+# These use (src != 0.0) for testing the truth of the input, and output 1.0
+# for true and 0.0 for false
+
+binop("fand", tfloat, commutative,
+      "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("for", tfloat, commutative,
+      "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
+binop("fxor", tfloat, commutative,
+      "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
+
+binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
+             "{src}")
+
+binop("fmin", tfloat, "", "fminf(src0, src1)")
+binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("fmax", tfloat, "", "fmaxf(src0, src1)")
+binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+
+binop("fpow", tfloat, "", "powf(src0, src1)")
+
+binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+            "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
+
+binop_convert("bfm", tunsigned, tint, "", """
+int offset = src0, bits = src1;
+if (offset < 0 || bits < 0 || offset + bits > 32)
+   dst = 0; /* undefined per the spec */
+else
+   dst = ((1 << bits)- 1) << offset;
+""")
+
+opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
+dst = ldexp(src0, src1);
+/* flush denormals to zero. */
+if (!isnormal(dst))
+   dst = copysign(0.0f, src0);
+""")
+
+# Combines the first component of each input to make a 2-component vector.
+
+binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+dst.x = src0.x;
+dst.y = src1.x;
+""")
+
+def triop(name, ty, const_expr):
+   opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
+def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
+   opcode(name, output_size, tunsigned,
+   [src1_size, src2_size, src3_size],
+   [tunsigned, tunsigned, tunsigned], "", const_expr)
+
+triop("ffma", tfloat, "src0 * src1 + src2")
+
+triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
+
+# Conditional Select
+#
+# A vector conditional select instruction (like ?:, but operating per-
+# component on vectors). There are two versions, one for floating point
+# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
+
+
+triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+opcode("bcsel", 0, tunsigned, [0, 0, 0],
+      [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+
+triop("bfi", tunsigned, """
+unsigned mask = src0, insert = src1 & mask, base = src2;
+if (mask == 0) {
+   dst = base;
+} else {
+   unsigned tmp = mask;
+   while (!(tmp & 1)) {
+      tmp >>= 1;
+      insert <<= 1;
+   }
+   dst = (base & ~mask) | insert;
+}
+""")
+
+opcode("ubitfield_extract", 0, tunsigned,
+       [0, 1, 1], [tunsigned, tint, tint], "", """
+unsigned base = src0;
+int offset = src1.x, bits = src2.x;
+if (bits == 0) {
+   dst = 0;
+} else if (bits < 0 || offset < 0 || offset + bits > 32) {
+   dst = 0; /* undefined per the spec */
+} else {
+   dst = (base >> offset) & ((1 << bits) - 1);
+}
+""")
+opcode("ibitfield_extract", 0, tint,
+       [0, 1, 1], [tint, tint, tint], "", """
+int base = src0;
+int offset = src1.x, bits = src2.x;
+if (bits == 0) {
+   dst = 0;
+} else if (offset < 0 || bits < 0 || offset + bits > 32) {
+   dst = 0;
+} else {
+   dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */
+}
+""")
+
+# Combines the first component of each input to make a 3-component vector.
+
+triop_horiz("vec3", 3, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+""")
+
+def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
+                 src4_size, const_expr):
+   opcode(name, output_size, tunsigned,
+          [src1_size, src2_size, src3_size, src4_size],
+          [tunsigned, tunsigned, tunsigned, tunsigned],
+          "", const_expr)
+
+opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
+       [tunsigned, tunsigned, tint, tint], "", """
+unsigned base = src0, insert = src1;
+int offset = src2.x, bits = src3.x;
+if (bits == 0) {
+   dst = 0;
+} else if (offset < 0 || bits < 0 || bits + offset > 32) {
+   dst = 0;
+} else {
+   unsigned mask = ((1 << bits) - 1) << offset;
+   dst = (base & ~mask) | ((insert << bits) & mask);
+}
+""")
+
+quadop_horiz("vec4", 4, 1, 1, 1, 1, """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+dst.w = src3.x;
+""")
+
+
diff --git a/mesalib/src/glsl/nir/nir_opcodes_c.py b/mesalib/src/glsl/nir/nir_opcodes_c.py
new file mode 100644
index 000000000..7049c5be6
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes_c.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Connor Abbott
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Connor Abbott (cwabbott0@gmail.com)
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+template = Template("""
+#include "nir.h"
+
+const nir_op_info nir_op_infos[nir_num_opcodes] = {
+% for name, opcode in sorted(opcodes.iteritems()):
+{
+   .name = "${name}",
+   .num_inputs = ${opcode.num_inputs},
+   .output_size = ${opcode.output_size},
+   .output_type = ${"nir_type_" + opcode.output_type},
+   .input_sizes = {
+      ${ ", ".join(str(size) for size in opcode.input_sizes) }
+   },
+   .input_types = {
+      ${ ", ".join("nir_type_" + type for type in opcode.input_types) }
+   },
+   .algebraic_properties =
+      ${ "0" if opcode.algebraic_properties == "" else " | ".join(
+            "NIR_OP_IS_" + prop.upper() for prop in
+               opcode.algebraic_properties.strip().split(" ")) }
+},
+% endfor
+};
+""")
+
+print template.render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_opcodes_h.py b/mesalib/src/glsl/nir/nir_opcodes_h.py
new file mode 100644
index 000000000..be15a96d2
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opcodes_h.py
@@ -0,0 +1,47 @@
+#! /usr/bin/env python
+
+template = """\
+/* Copyright (C) 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ */
+
+#ifndef _NIR_OPCODES_
+#define _NIR_OPCODES_
+
+<% opcode_names = sorted(opcodes.iterkeys()) %>
+
+typedef enum {
+% for name in opcode_names:
+   nir_op_${name},
+% endfor
+   nir_last_opcode = nir_op_${opcode_names[-1]},
+   nir_num_opcodes = nir_last_opcode + 1
+} nir_op;
+
+#endif /* _NIR_OPCODES_ */"""
+
+from nir_opcodes import opcodes
+from mako.template import Template
+
+print Template(template).render(opcodes=opcodes)
diff --git a/mesalib/src/glsl/nir/nir_opt_algebraic.py b/mesalib/src/glsl/nir/nir_opt_algebraic.py
new file mode 100644
index 000000000..7bf643134
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_algebraic.py
@@ -0,0 +1,188 @@
+#! /usr/bin/env python
+#
+# Copyright (C) 2014 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+# Authors:
+#    Jason Ekstrand (jason@jlekstrand.net)
+
+import nir_algebraic
+
+# Convenience variables
+a = 'a'
+b = 'b'
+c = 'c'
+d = 'd'
+
+# Written in the form (<search>, <replace>) where <search> is an expression
+# and <replace> is either an expression or a value.  An expression is
+# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# where each source is either an expression or a value.  A value can be
+# either a numeric constant or a string representing a variable name.
+#
+# Variable names are specified as "[#]name[@type]" where "#" inicates that
+# the given variable will only match constants and the type indicates that
+# the given variable will only match values from ALU instructions with the
+# given output type.
+#
+# For constants, you have to be careful to make sure that it is the right
+# type because python is unaware of the source and destination types of the
+# opcodes.
+
+optimizations = [
+   (('fneg', ('fneg', a)), a),
+   (('ineg', ('ineg', a)), a),
+   (('fabs', ('fabs', a)), ('fabs', a)),
+   (('fabs', ('fneg', a)), ('fabs', a)),
+   (('iabs', ('iabs', a)), ('iabs', a)),
+   (('iabs', ('ineg', a)), ('iabs', a)),
+   (('fadd', a, 0.0), a),
+   (('iadd', a, 0), a),
+   (('fmul', a, 0.0), 0.0),
+   (('imul', a, 0), 0),
+   (('fmul', a, 1.0), a),
+   (('imul', a, 1), a),
+   (('fmul', a, -1.0), ('fneg', a)),
+   (('imul', a, -1), ('ineg', a)),
+   (('ffma', 0.0, a, b), b),
+   (('ffma', a, 0.0, b), b),
+   (('ffma', a, b, 0.0), ('fmul', a, b)),
+   (('ffma', a, 1.0, b), ('fadd', a, b)),
+   (('ffma', 1.0, a, b), ('fadd', a, b)),
+   (('flrp', a, b, 0.0), a),
+   (('flrp', a, b, 1.0), b),
+   (('flrp', a, a, b), a),
+   (('flrp', 0.0, a, b), ('fmul', a, b)),
+   (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+   (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+   # Comparison simplifications
+   (('inot', ('flt', a, b)), ('fge', a, b)),
+   (('inot', ('fge', a, b)), ('flt', a, b)),
+   (('inot', ('ilt', a, b)), ('ige', a, b)),
+   (('inot', ('ige', a, b)), ('ilt', a, b)),
+   (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
+   (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+   (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+   (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
+   (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+   (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
+   (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
+   (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
+   (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+   (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
+   (('fsat', ('fsat', a)), ('fsat', a)),
+   (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
+   # Comparison with the same args.  Note that these are not done for
+   # the float versions because NaN always returns false on float
+   # inequalities.
+   (('ilt', a, a), False),
+   (('ige', a, a), True),
+   (('ieq', a, a), True),
+   (('ine', a, a), False),
+   (('ult', a, a), False),
+   (('uge', a, a), True),
+   # Logical and bit operations
+   (('fand', a, 0.0), 0.0),
+   (('iand', a, a), a),
+   (('iand', a, 0), 0),
+   (('ior', a, a), a),
+   (('ior', a, 0), a),
+   (('fxor', a, a), 0.0),
+   (('ixor', a, a), 0),
+   (('inot', ('inot', a)), a),
+   # DeMorgan's Laws
+   (('iand', ('inot', a), ('inot', b)), ('inot', ('ior',  a, b))),
+   (('ior',  ('inot', a), ('inot', b)), ('inot', ('iand', a, b))),
+   # Shift optimizations
+   (('ishl', 0, a), 0),
+   (('ishl', a, 0), a),
+   (('ishr', 0, a), 0),
+   (('ishr', a, 0), a),
+   (('ushr', 0, a), 0),
+   (('ushr', a, 0), 0),
+   # Exponential/logarithmic identities
+   (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+   (('fexp',  ('flog',  a)), a), # e^ln(a)  = a
+   (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+   (('flog',  ('fexp',  a)), a), # ln(e^a)  = a
+   (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
+   (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+   (('fexp',  ('fmul', ('flog', a), b)),  ('fpow', a, b), '!options->lower_fpow'), # e^(ln(a)*b) = a^b
+   (('fpow', a, 1.0), a),
+   (('fpow', a, 2.0), ('fmul', a, a)),
+   (('fpow', 2.0, a), ('fexp2', a)),
+   # Division and reciprocal
+   (('fdiv', 1.0, a), ('frcp', a)),
+   (('frcp', ('frcp', a)), a),
+   (('frcp', ('fsqrt', a)), ('frsq', a)),
+   (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
+   (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+   # Boolean simplifications
+   (('ine', 'a@bool', 0), 'a'),
+   (('ieq', 'a@bool', 0), ('inot', 'a')),
+   (('bcsel', a, True, False), ('ine', a, 0)),
+   (('bcsel', a, False, True), ('ieq', a, 0)),
+   (('bcsel', True, b, c), b),
+   (('bcsel', False, b, c), c),
+   # The result of this should be hit by constant propagation and, in the
+   # next round of opt_algebraic, get picked up by one of the above two.
+   (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)),
+
+   (('bcsel', a, b, b), b),
+   (('fcsel', a, b, b), b),
+
+   # Subtracts
+   (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+   (('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+   (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
+   (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
+   (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+   (('iadd', a, ('isub', 0, b)), ('isub', a, b)),
+   (('fabs', ('fsub', 0.0, a)), ('fabs', a)),
+   (('iabs', ('isub', 0, a)), ('iabs', a)),
+
+# This one may not be exact
+   (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+]
+
+# Add optimizations to handle the case where the result of a ternary is
+# compared to a constant.  This way we can take things like
+#
+# (a ? 0 : 1) > 0
+#
+# and turn it into
+#
+# a ? (0 > 0) : (1 > 0)
+#
+# which constant folding will eat for lunch.  The resulting ternary will
+# further get cleaned up by the boolean reductions above and we will be
+# left with just the original variable "a".
+for op in ['flt', 'fge', 'feq', 'fne',
+           'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']:
+   optimizations += [
+      ((op, ('bcsel', 'a', '#b', '#c'), '#d'),
+       ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))),
+      ((op, '#d', ('bcsel', a, '#b', '#c')),
+       ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))),
+   ]
+
+print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render()
diff --git a/mesalib/src/glsl/nir/nir_opt_constant_folding.c b/mesalib/src/glsl/nir/nir_opt_constant_folding.c
new file mode 100644
index 000000000..85c09fc48
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_constant_folding.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_constant_expressions.h"
+#include <math.h>
+
+/*
+ * Implements SSA-based constant folding.
+ */
+
+struct constant_fold_state {
+   void *mem_ctx;
+   nir_function_impl *impl;
+   bool progress;
+};
+
+static bool
+constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
+{
+   nir_const_value src[4];
+
+   if (!instr->dest.dest.is_ssa)
+      return false;
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      if (!instr->src[i].src.is_ssa)
+         return false;
+
+      nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
+
+      if (src_instr->type != nir_instr_type_load_const)
+         return false;
+      nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr);
+
+      for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
+           j++) {
+         src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+      }
+
+      /* We shouldn't have any source modifiers in the optimization loop. */
+      assert(!instr->src[i].abs && !instr->src[i].negate);
+   }
+
+   /* We shouldn't have any saturate modifiers in the optimization loop. */
+   assert(!instr->dest.saturate);
+
+   nir_const_value dest =
+      nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
+                            src);
+
+   nir_load_const_instr *new_instr =
+      nir_load_const_instr_create(mem_ctx,
+                                  instr->dest.dest.ssa.num_components);
+
+   new_instr->value = dest;
+
+   nir_instr_insert_before(&instr->instr, &new_instr->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(&new_instr->def),
+                            mem_ctx);
+
+   nir_instr_remove(&instr->instr);
+   ralloc_free(instr);
+
+   return true;
+}
+
+static bool
+constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
+{
+   bool progress = false;
+
+   for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) {
+      if (tail->deref_type != nir_deref_type_array)
+         continue;
+
+      nir_deref_array *arr = nir_deref_as_array(tail);
+
+      if (arr->deref_array_type == nir_deref_array_type_indirect &&
+          arr->indirect.is_ssa &&
+          arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) {
+         nir_load_const_instr *indirect =
+            nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
+
+         arr->base_offset += indirect->value.u[0];
+
+         /* Clear out the source */
+         nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
+
+         arr->deref_array_type = nir_deref_array_type_direct;
+
+         progress = true;
+      }
+   }
+
+   return progress;
+}
+
+static bool
+constant_fold_intrinsic_instr(nir_intrinsic_instr *instr)
+{
+   bool progress = false;
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      progress |= constant_fold_deref(&instr->instr, instr->variables[i]);
+   }
+
+   return progress;
+}
+
+static bool
+constant_fold_tex_instr(nir_tex_instr *instr)
+{
+   if (instr->sampler)
+      return constant_fold_deref(&instr->instr, instr->sampler);
+   else
+      return false;
+}
+
+static bool
+constant_fold_block(nir_block *block, void *void_state)
+{
+   struct constant_fold_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      switch (instr->type) {
+      case nir_instr_type_alu:
+         state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr),
+                                                    state->mem_ctx);
+         break;
+      case nir_instr_type_intrinsic:
+         state->progress |=
+            constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr));
+         break;
+      case nir_instr_type_tex:
+         state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr));
+         break;
+      default:
+         /* Don't know how to constant fold */
+         break;
+      }
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_constant_folding_impl(nir_function_impl *impl)
+{
+   struct constant_fold_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.impl = impl;
+   state.progress = false;
+
+   nir_foreach_block(impl, constant_fold_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_constant_folding(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_constant_folding_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_copy_propagate.c b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c
new file mode 100644
index 000000000..dd0ec01ef
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_copy_propagate.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <main/imports.h>
+
+/**
+ * SSA-based copy propagation
+ */
+
+static bool is_move(nir_alu_instr *instr)
+{
+   if (instr->op != nir_op_fmov &&
+       instr->op != nir_op_imov)
+      return false;
+
+   if (instr->dest.saturate)
+      return false;
+
+   /* we handle modifiers in a separate pass */
+
+   if (instr->src[0].abs || instr->src[0].negate)
+      return false;
+
+   if (!instr->src[0].src.is_ssa)
+      return false;
+
+   return true;
+
+}
+
+static bool
+is_swizzleless_move(nir_alu_instr *instr)
+{
+   if (!is_move(instr))
+      return false;
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (!((instr->dest.write_mask >> i) & 1))
+         break;
+      if (instr->src[0].swizzle[i] != i)
+         return false;
+   }
+
+   return true;
+}
+
+static bool is_vec(nir_alu_instr *instr)
+{
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+      if (!instr->src[i].src.is_ssa)
+         return false;
+
+   return instr->op == nir_op_vec2 ||
+          instr->op == nir_op_vec3 ||
+          instr->op == nir_op_vec4;
+}
+
+typedef struct {
+   nir_ssa_def *def;
+   bool found;
+} search_def_state;
+
+static bool
+search_def(nir_src *src, void *_state)
+{
+   search_def_state *state = (search_def_state *) _state;
+
+   if (src->is_ssa && src->ssa == state->def)
+      state->found = true;
+
+   return true;
+}
+
+static void
+rewrite_src_instr(nir_src *src, nir_ssa_def *new_def, nir_instr *parent_instr)
+{
+   nir_ssa_def *old_def = src->ssa;
+
+   src->ssa = new_def;
+
+   /*
+    * The instruction could still use the old definition in one of its other
+    * sources, so only remove the instruction from the uses if there are no
+    * more uses left.
+    */
+
+   search_def_state search_state;
+   search_state.def = old_def;
+   search_state.found = false;
+   nir_foreach_src(parent_instr, search_def, &search_state);
+   if (!search_state.found) {
+      struct set_entry *entry = _mesa_set_search(old_def->uses, parent_instr);
+      assert(entry);
+      _mesa_set_remove(old_def->uses, entry);
+   }
+
+   _mesa_set_add(new_def->uses, parent_instr);
+}
+
+static void
+rewrite_src_if(nir_if *if_stmt, nir_ssa_def *new_def)
+{
+   nir_ssa_def *old_def = if_stmt->condition.ssa;
+
+   if_stmt->condition.ssa = new_def;
+
+   struct set_entry *entry = _mesa_set_search(old_def->if_uses, if_stmt);
+   assert(entry);
+   _mesa_set_remove(old_def->if_uses, entry);
+
+   _mesa_set_add(new_def->if_uses, if_stmt);
+}
+
+static bool
+copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
+{
+   if (!src->is_ssa) {
+      if (src->reg.indirect)
+         return copy_prop_src(src, parent_instr, parent_if);
+      return false;
+   }
+
+   nir_instr *src_instr = src->ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+   if (!is_swizzleless_move(alu_instr))
+      return false;
+
+   /* Don't let copy propagation land us with a phi that has more
+    * components in its source than it has in its destination.  That badly
+    * messes up out-of-ssa.
+    */
+   if (parent_instr && parent_instr->type == nir_instr_type_phi) {
+      nir_phi_instr *phi = nir_instr_as_phi(parent_instr);
+      assert(phi->dest.is_ssa);
+      if (phi->dest.ssa.num_components !=
+          alu_instr->src[0].src.ssa->num_components)
+         return false;
+   }
+
+   if (parent_instr)
+      rewrite_src_instr(src, alu_instr->src[0].src.ssa, parent_instr);
+   else
+      rewrite_src_if(parent_if, alu_instr->src[0].src.ssa);
+
+   return true;
+}
+
+static bool
+copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index)
+{
+   nir_alu_src *src = &parent_alu_instr->src[index];
+   if (!src->src.is_ssa) {
+      if (src->src.reg.indirect)
+         return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr,
+                              NULL);
+      return false;
+   }
+
+   nir_instr *src_instr =  src->src.ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+   if (!is_move(alu_instr) && !is_vec(alu_instr))
+      return false;
+
+   nir_ssa_def *def;
+   unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+   if (alu_instr->op == nir_op_fmov ||
+       alu_instr->op == nir_op_imov) {
+      for (unsigned i = 0; i < 4; i++)
+         new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]];
+      def = alu_instr->src[0].src.ssa;
+   } else {
+      def = NULL;
+
+      for (unsigned i = 0; i < 4; i++) {
+         if (!nir_alu_instr_channel_used(parent_alu_instr, index, i))
+            continue;
+
+         nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa;
+         if (def == NULL)
+            def = new_def;
+         else {
+            if (def != new_def)
+               return false;
+         }
+         new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0];
+      }
+   }
+
+   for (unsigned i = 0; i < 4; i++)
+      src->swizzle[i] = new_swizzle[i];
+
+   rewrite_src_instr(&src->src, def, &parent_alu_instr->instr);
+
+   return true;
+}
+
+typedef struct {
+   nir_instr *parent_instr;
+   bool progress;
+} copy_prop_state;
+
+static bool
+copy_prop_src_cb(nir_src *src, void *_state)
+{
+   copy_prop_state *state = (copy_prop_state *) _state;
+   while (copy_prop_src(src, state->parent_instr, NULL))
+      state->progress = true;
+
+   return true;
+}
+
+static bool
+copy_prop_instr(nir_instr *instr)
+{
+   if (instr->type == nir_instr_type_alu) {
+      nir_alu_instr *alu_instr = nir_instr_as_alu(instr);
+      bool progress = false;
+
+      for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++)
+         while (copy_prop_alu_src(alu_instr, i))
+            progress = true;
+
+      if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect)
+         while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL))
+            progress = true;
+
+      return progress;
+   }
+
+   copy_prop_state state;
+   state.parent_instr = instr;
+   state.progress = false;
+   nir_foreach_src(instr, copy_prop_src_cb, &state);
+
+   return state.progress;
+}
+
+static bool
+copy_prop_if(nir_if *if_stmt)
+{
+   return copy_prop_src(&if_stmt->condition, NULL, if_stmt);
+}
+
+static bool
+copy_prop_block(nir_block *block, void *_state)
+{
+   bool *progress = (bool *) _state;
+
+   nir_foreach_instr(block, instr) {
+      if (copy_prop_instr(instr))
+         *progress = true;
+   }
+
+   if (block->cf_node.node.next != NULL && /* check that we aren't the end node */
+       !nir_cf_node_is_last(&block->cf_node) &&
+       nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+      if (copy_prop_if(if_stmt))
+         *progress = true;
+   }
+
+   return true;
+}
+
+bool
+nir_copy_prop_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_foreach_block(impl, copy_prop_block, &progress);
+   return progress;
+}
+
+bool
+nir_copy_prop(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl && nir_copy_prop_impl(overload->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_cse.c b/mesalib/src/glsl/nir/nir_opt_cse.c
new file mode 100644
index 000000000..9b383202d
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_cse.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements common subexpression elimination
+ */
+
+struct cse_state {
+   void *mem_ctx;
+   bool progress;
+};
+
+static bool
+nir_alu_srcs_equal(nir_alu_src src1, nir_alu_src src2, uint8_t read_mask)
+{
+   if (src1.abs != src2.abs || src1.negate != src2.negate)
+      return false;
+
+   for (int i = 0; i < 4; ++i) {
+      if (!(read_mask & (1 << i)))
+         continue;
+
+      if (src1.swizzle[i] != src2.swizzle[i])
+         return false;
+   }
+
+   return nir_srcs_equal(src1.src, src2.src);
+}
+
+static bool
+nir_instrs_equal(nir_instr *instr1, nir_instr *instr2)
+{
+   if (instr1->type != instr2->type)
+      return false;
+
+   switch (instr1->type) {
+   case nir_instr_type_alu: {
+      nir_alu_instr *alu1 = nir_instr_as_alu(instr1);
+      nir_alu_instr *alu2 = nir_instr_as_alu(instr2);
+
+      if (alu1->op != alu2->op)
+         return false;
+
+      /* TODO: We can probably acutally do something more inteligent such
+       * as allowing different numbers and taking a maximum or something
+       * here */
+      if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
+         return false;
+
+      for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) {
+         if (!nir_alu_srcs_equal(alu1->src[i], alu2->src[i],
+                                 (1 << alu1->dest.dest.ssa.num_components) - 1))
+            return false;
+      }
+      return true;
+   }
+   case nir_instr_type_tex:
+      return false;
+   case nir_instr_type_load_const: {
+      nir_load_const_instr *load1 = nir_instr_as_load_const(instr1);
+      nir_load_const_instr *load2 = nir_instr_as_load_const(instr2);
+
+      if (load1->def.num_components != load2->def.num_components)
+         return false;
+
+      return memcmp(load1->value.f, load2->value.f,
+                    load1->def.num_components * sizeof(*load2->value.f)) == 0;
+   }
+   case nir_instr_type_phi: {
+      nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
+      nir_phi_instr *phi2 = nir_instr_as_phi(instr2);
+
+      if (phi1->instr.block != phi2->instr.block)
+         return false;
+
+      nir_foreach_phi_src(phi1, src1) {
+         nir_foreach_phi_src(phi2, src2) {
+            if (src1->pred == src2->pred) {
+               if (!nir_srcs_equal(src1->src, src2->src))
+                  return false;
+
+               break;
+            }
+         }
+      }
+
+      return true;
+   }
+   case nir_instr_type_intrinsic: {
+      nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1);
+      nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2);
+      const nir_intrinsic_info *info =
+         &nir_intrinsic_infos[intrinsic1->intrinsic];
+
+      if (intrinsic1->intrinsic != intrinsic2->intrinsic ||
+          intrinsic1->num_components != intrinsic2->num_components)
+         return false;
+
+      if (info->has_dest && intrinsic1->dest.ssa.num_components !=
+                            intrinsic2->dest.ssa.num_components)
+         return false;
+
+      for (unsigned i = 0; i < info->num_srcs; i++) {
+         if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i]))
+            return false;
+      }
+
+      assert(info->num_variables == 0);
+
+      for (unsigned i = 0; i < info->num_indices; i++) {
+         if (intrinsic1->const_index[i] != intrinsic2->const_index[i])
+            return false;
+      }
+
+      return true;
+   }
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+   return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+   return dest->is_ssa;
+}
+
+static bool
+nir_instr_can_cse(nir_instr *instr)
+{
+   /* We only handle SSA. */
+   if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
+       !nir_foreach_src(instr, src_is_ssa, NULL))
+      return false;
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_load_const:
+   case nir_instr_type_phi:
+      return true;
+   case nir_instr_type_tex:
+      return false; /* TODO */
+   case nir_instr_type_intrinsic: {
+      const nir_intrinsic_info *info =
+         &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+             (info->flags & NIR_INTRINSIC_CAN_REORDER) &&
+             info->num_variables == 0; /* not implemented yet */
+   }
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return false;
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+static nir_ssa_def *
+nir_instr_get_dest_ssa_def(nir_instr *instr)
+{
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      assert(nir_instr_as_alu(instr)->dest.dest.is_ssa);
+      return &nir_instr_as_alu(instr)->dest.dest.ssa;
+   case nir_instr_type_load_const:
+      return &nir_instr_as_load_const(instr)->def;
+   case nir_instr_type_phi:
+      assert(nir_instr_as_phi(instr)->dest.is_ssa);
+      return &nir_instr_as_phi(instr)->dest.ssa;
+   case nir_instr_type_intrinsic:
+      assert(nir_instr_as_intrinsic(instr)->dest.is_ssa);
+      return &nir_instr_as_intrinsic(instr)->dest.ssa;
+   default:
+      unreachable("We never ask for any of these");
+   }
+}
+
+static void
+nir_opt_cse_instr(nir_instr *instr, struct cse_state *state)
+{
+   if (!nir_instr_can_cse(instr))
+      return;
+
+   for (struct exec_node *node = instr->node.prev;
+        !exec_node_is_head_sentinel(node); node = node->prev) {
+      nir_instr *other = exec_node_data(nir_instr, node, node);
+      if (nir_instrs_equal(instr, other)) {
+         nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
+         nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
+                                  nir_src_for_ssa(other_def),
+                                  state->mem_ctx);
+         nir_instr_remove(instr);
+         state->progress = true;
+         return;
+      }
+   }
+
+   for (nir_block *block = instr->block->imm_dom;
+        block != NULL; block = block->imm_dom) {
+      nir_foreach_instr_reverse(block, other) {
+         if (nir_instrs_equal(instr, other)) {
+            nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other);
+            nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr),
+                                     nir_src_for_ssa(other_def),
+                                     state->mem_ctx);
+            nir_instr_remove(instr);
+            state->progress = true;
+            return;
+         }
+      }
+   }
+}
+
+static bool
+nir_opt_cse_block(nir_block *block, void *void_state)
+{
+   struct cse_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr)
+      nir_opt_cse_instr(instr, state);
+
+   return true;
+}
+
+static bool
+nir_opt_cse_impl(nir_function_impl *impl)
+{
+   struct cse_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   nir_foreach_block(impl, nir_opt_cse_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return state.progress;
+}
+
+bool
+nir_opt_cse(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_cse_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_dce.c b/mesalib/src/glsl/nir/nir_opt_dce.c
new file mode 100644
index 000000000..e0ebdc61c
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_dce.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/* SSA-based mark-and-sweep dead code elimination */
+
+typedef struct {
+   struct exec_node node;
+   nir_instr *instr;
+} worklist_elem;
+
+static void
+worklist_push(struct exec_list *worklist, nir_instr *instr)
+{
+   worklist_elem *elem = ralloc(worklist, worklist_elem);
+   elem->instr = instr;
+   instr->pass_flags = 1;
+   exec_list_push_tail(worklist, &elem->node);
+}
+
+static nir_instr *
+worklist_pop(struct exec_list *worklist)
+{
+   struct exec_node *node = exec_list_pop_head(worklist);
+   worklist_elem *elem = exec_node_data(worklist_elem, node, node);
+   return elem->instr;
+}
+
+static bool
+mark_live_cb(nir_src *src, void *_state)
+{
+   struct exec_list *worklist = (struct exec_list *) _state;
+
+   if (src->is_ssa && !src->ssa->parent_instr->pass_flags) {
+      worklist_push(worklist, src->ssa->parent_instr);
+   }
+
+   return true;
+}
+
+static void
+init_instr(nir_instr *instr, struct exec_list *worklist)
+{
+   nir_alu_instr *alu_instr;
+   nir_intrinsic_instr *intrin_instr;
+   nir_tex_instr *tex_instr;
+
+   /* We use the pass_flags to store the live/dead information.  In DCE, we
+    * just treat it as a zero/non-zerl boolean for whether or not the
+    * instruction is live.
+    */
+   instr->pass_flags = 0;
+
+   switch (instr->type) {
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+      worklist_push(worklist, instr);
+      break;
+
+   case nir_instr_type_alu:
+      alu_instr = nir_instr_as_alu(instr);
+      if (!alu_instr->dest.dest.is_ssa)
+         worklist_push(worklist, instr);
+      break;
+
+   case nir_instr_type_intrinsic:
+      intrin_instr = nir_instr_as_intrinsic(instr);
+      if (nir_intrinsic_infos[intrin_instr->intrinsic].flags &
+          NIR_INTRINSIC_CAN_ELIMINATE) {
+         if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest &&
+             !intrin_instr->dest.is_ssa) {
+            worklist_push(worklist, instr);
+         }
+      } else {
+         worklist_push(worklist, instr);
+      }
+      break;
+
+   case nir_instr_type_tex:
+      tex_instr = nir_instr_as_tex(instr);
+      if (!tex_instr->dest.is_ssa)
+         worklist_push(worklist, instr);
+      break;
+
+   default:
+      break;
+   }
+}
+
+static bool
+init_block_cb(nir_block *block, void *_state)
+{
+   struct exec_list *worklist = (struct exec_list *) _state;
+
+   nir_foreach_instr(block, instr)
+      init_instr(instr, worklist);
+
+   nir_if *following_if = nir_block_get_following_if(block);
+   if (following_if) {
+      if (following_if->condition.is_ssa &&
+          !following_if->condition.ssa->parent_instr->pass_flags)
+         worklist_push(worklist, following_if->condition.ssa->parent_instr);
+   }
+
+   return true;
+}
+
+static bool
+delete_block_cb(nir_block *block, void *_state)
+{
+   bool *progress = (bool *) _state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (!instr->pass_flags) {
+         nir_instr_remove(instr);
+         *progress = true;
+      }
+   }
+
+   return true;
+}
+
+bool
+nir_opt_dce_impl(nir_function_impl *impl)
+{
+   struct exec_list *worklist = ralloc(NULL, struct exec_list);
+   exec_list_make_empty(worklist);
+
+   nir_foreach_block(impl, init_block_cb, worklist);
+
+   while (!exec_list_is_empty(worklist)) {
+      nir_instr *instr = worklist_pop(worklist);
+      nir_foreach_src(instr, mark_live_cb, worklist);
+   }
+
+   ralloc_free(worklist);
+
+   bool progress = false;
+   nir_foreach_block(impl, delete_block_cb, &progress);
+
+   if (progress)
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+
+   return progress;
+}
+
+bool
+nir_opt_dce(nir_shader *shader)
+{
+   bool progress = false;
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl && nir_opt_dce_impl(overload->impl))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_gcm.c b/mesalib/src/glsl/nir/nir_opt_gcm.c
new file mode 100644
index 000000000..bf565b969
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_gcm.c
@@ -0,0 +1,492 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements Global Code Motion.  A description of GCM can be found in
+ * "Global Code Motion; Global Value Numbering" by Cliff Click.
+ * Unfortunately, the algorithm presented in the paper is broken in a
+ * number of ways.  The algorithm used here differs substantially from the
+ * one in the paper but it is, in my opinion, much easier to read and
+ * verify correcness.
+ */
+
+struct gcm_block_info {
+   /* Number of loops this block is inside */
+   unsigned loop_depth;
+
+   /* The last instruction inserted into this block.  This is used as we
+    * traverse the instructions and insert them back into the program to
+    * put them in the right order.
+    */
+   nir_instr *last_instr;
+};
+
+/* Flags used in the instr->pass_flags field for various instruction states */
+enum {
+   GCM_INSTR_PINNED =            (1 << 0),
+   GCM_INSTR_SCHEDULED_EARLY =   (1 << 1),
+   GCM_INSTR_SCHEDULED_LATE =    (1 << 2),
+   GCM_INSTR_PLACED =            (1 << 3),
+};
+
+struct gcm_state {
+   nir_function_impl *impl;
+   nir_instr *instr;
+
+   /* The list of non-pinned instructions.  As we do the late scheduling,
+    * we pull non-pinned instructions out of their blocks and place them in
+    * this list.  This saves us from having linked-list problems when we go
+    * to put instructions back in their blocks.
+    */
+   struct exec_list instrs;
+
+   struct gcm_block_info *blocks;
+};
+
+/* Recursively walks the CFG and builds the block_info structure */
+static void
+gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state,
+                     unsigned loop_depth)
+{
+   foreach_list_typed(nir_cf_node, node, node, cf_list) {
+      switch (node->type) {
+      case nir_cf_node_block: {
+         nir_block *block = nir_cf_node_as_block(node);
+         state->blocks[block->index].loop_depth = loop_depth;
+         break;
+      }
+      case nir_cf_node_if: {
+         nir_if *if_stmt = nir_cf_node_as_if(node);
+         gcm_build_block_info(&if_stmt->then_list, state, loop_depth);
+         gcm_build_block_info(&if_stmt->else_list, state, loop_depth);
+         break;
+      }
+      case nir_cf_node_loop: {
+         nir_loop *loop = nir_cf_node_as_loop(node);
+         gcm_build_block_info(&loop->body, state, loop_depth + 1);
+         break;
+      }
+      default:
+         unreachable("Invalid CF node type");
+      }
+   }
+}
+
+/* Walks the instruction list and marks immovable instructions as pinned
+ *
+ * This function also serves to initialize the instr->pass_flags field.
+ * After this is completed, all instructions' pass_flags fields will be set
+ * to either GCM_INSTR_PINNED or 0.
+ */
+static bool
+gcm_pin_instructions_block(nir_block *block, void *void_state)
+{
+   struct gcm_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      switch (instr->type) {
+      case nir_instr_type_alu:
+         switch (nir_instr_as_alu(instr)->op) {
+         case nir_op_fddx:
+         case nir_op_fddy:
+         case nir_op_fddx_fine:
+         case nir_op_fddy_fine:
+         case nir_op_fddx_coarse:
+         case nir_op_fddy_coarse:
+            /* These can only go in uniform control flow; pin them for now */
+            instr->pass_flags = GCM_INSTR_PINNED;
+
+         default:
+            instr->pass_flags = 0;
+         }
+         break;
+
+      case nir_instr_type_tex:
+         switch (nir_instr_as_tex(instr)->op) {
+         case nir_texop_tex:
+         case nir_texop_txb:
+         case nir_texop_lod:
+            /* These two take implicit derivatives so they need to be pinned */
+            instr->pass_flags = GCM_INSTR_PINNED;
+
+         default:
+            instr->pass_flags = 0;
+         }
+         break;
+
+      case nir_instr_type_load_const:
+         instr->pass_flags = 0;
+         break;
+
+      case nir_instr_type_intrinsic: {
+         const nir_intrinsic_info *info =
+            &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
+
+         if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+             (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+            instr->pass_flags = 0;
+         } else {
+            instr->pass_flags = GCM_INSTR_PINNED;
+         }
+         break;
+      }
+
+      case nir_instr_type_jump:
+      case nir_instr_type_ssa_undef:
+      case nir_instr_type_phi:
+         instr->pass_flags = GCM_INSTR_PINNED;
+         break;
+
+      default:
+         unreachable("Invalid instruction type in GCM");
+      }
+
+      if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+         /* If this is an unpinned instruction, go ahead and pull it out of
+          * the program and put it on the instrs list.  This has a couple
+          * of benifits.  First, it makes the scheduling algorithm more
+          * efficient because we can avoid walking over basic blocks and
+          * pinned instructions.  Second, it keeps us from causing linked
+          * list confusion when we're trying to put everything in its
+          * proper place at the end of the pass.
+          *
+          * Note that we don't use nir_instr_remove here because that also
+          * cleans up uses and defs and we want to keep that information.
+          */
+         exec_node_remove(&instr->node);
+         exec_list_push_tail(&state->instrs, &instr->node);
+      }
+   }
+
+   return true;
+}
+
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Update an instructions schedule for the given source
+ *
+ * This function is called iteratively as we walk the sources of an
+ * instruction.  It ensures that the given source instruction has been
+ * scheduled and then update this instruction's block if the source
+ * instruction is lower down the tree.
+ */
+static bool
+gcm_schedule_early_src(nir_src *src, void *void_state)
+{
+   struct gcm_state *state = void_state;
+   nir_instr *instr = state->instr;
+
+   assert(src->is_ssa);
+
+   gcm_schedule_early_instr(src->ssa->parent_instr, void_state);
+
+   /* While the index isn't a proper dominance depth, it does have the
+    * property that if A dominates B then A->index <= B->index.  Since we
+    * know that this instruction must have been dominated by all of its
+    * sources at some point (even if it's gone through value-numbering),
+    * all of the sources must lie on the same branch of the dominance tree.
+    * Therefore, we can just go ahead and just compare indices.
+    */
+   if (instr->block->index < src->ssa->parent_instr->block->index)
+      instr->block = src->ssa->parent_instr->block;
+
+   /* We need to restore the state instruction because it may have been
+    * changed through the gcm_schedule_early_instr call above.  Since we
+    * may still be iterating through sources and future calls to
+    * gcm_schedule_early_src for the same instruction will still need it.
+    */
+   state->instr = instr;
+
+   return true;
+}
+
+/** Schedules an instruction early
+ *
+ * This function performs a recursive depth-first search starting at the
+ * given instruction and proceeding through the sources to schedule
+ * instructions as early as they can possibly go in the dominance tree.
+ * The instructions are "scheduled" by updating their instr->block field.
+ */
+static void
+gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY;
+
+   /* Pinned instructions are already scheduled so we don't need to do
+    * anything.  Also, bailing here keeps us from ever following the
+    * sources of phi nodes which can be back-edges.
+    */
+   if (instr->pass_flags & GCM_INSTR_PINNED)
+      return;
+
+   /* Start with the instruction at the top.  As we iterate over the
+    * sources, it will get moved down as needed.
+    */
+   instr->block = state->impl->start_block;
+   state->instr = instr;
+
+   nir_foreach_src(instr, gcm_schedule_early_src, state);
+}
+
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state);
+
+/** Schedules the instruction associated with the given SSA def late
+ *
+ * This function works by first walking all of the uses of the given SSA
+ * definition, ensuring that they are scheduled, and then computing the LCA
+ * (least common ancestor) of its uses.  It then schedules this instruction
+ * as close to the LCA as possible while trying to stay out of loops.
+ */
+static bool
+gcm_schedule_late_def(nir_ssa_def *def, void *void_state)
+{
+   struct gcm_state *state = void_state;
+
+   nir_block *lca = NULL;
+
+   struct set_entry *entry;
+   set_foreach(def->uses, entry) {
+      nir_instr *use_instr = (nir_instr *)entry->key;
+
+      gcm_schedule_late_instr(use_instr, state);
+
+      /* Phi instructions are a bit special.  SSA definitions don't have to
+       * dominate the sources of the phi nodes that use them; instead, they
+       * have to dominate the predecessor block corresponding to the phi
+       * source.  We handle this by looking through the sources, finding
+       * any that are usingg this SSA def, and using those blocks instead
+       * of the one the phi lives in.
+       */
+      if (use_instr->type == nir_instr_type_phi) {
+         nir_phi_instr *phi = nir_instr_as_phi(use_instr);
+
+         nir_foreach_phi_src(phi, phi_src) {
+            if (phi_src->src.ssa == def)
+               lca = nir_dominance_lca(lca, phi_src->pred);
+         }
+      } else {
+         lca = nir_dominance_lca(lca, use_instr->block);
+      }
+   }
+
+   set_foreach(def->if_uses, entry) {
+      nir_if *if_stmt = (nir_if *)entry->key;
+
+      /* For if statements, we consider the block to be the one immediately
+       * preceding the if CF node.
+       */
+      nir_block *pred_block =
+         nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node));
+
+      lca = nir_dominance_lca(lca, pred_block);
+   }
+
+   /* Some instructions may never be used.  We'll just leave them scheduled
+    * early and let dead code clean them up.
+    */
+   if (lca == NULL)
+      return true;
+
+   /* We know have the LCA of all of the uses.  If our invariants hold,
+    * this is dominated by the block that we chose when scheduling early.
+    * We now walk up the dominance tree and pick the lowest block that is
+    * as far outside loops as we can get.
+    */
+   nir_block *best = lca;
+   while (lca != def->parent_instr->block) {
+      assert(lca);
+      if (state->blocks[lca->index].loop_depth <
+          state->blocks[best->index].loop_depth)
+         best = lca;
+      lca = lca->imm_dom;
+   }
+   def->parent_instr->block = best;
+
+   return true;
+}
+
+/** Schedules an instruction late
+ *
+ * This function performs a depth-first search starting at the given
+ * instruction and proceeding through its uses to schedule instructions as
+ * late as they can reasonably go in the dominance tree.  The instructions
+ * are "scheduled" by updating their instr->block field.
+ *
+ * The name of this function is actually a bit of a misnomer as it doesn't
+ * schedule them "as late as possible" as the paper implies.  Instead, it
+ * first finds the lates possible place it can schedule the instruction and
+ * then possibly schedules it earlier than that.  The actual location is as
+ * far down the tree as we can go while trying to stay out of loops.
+ */
+static void
+gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE;
+
+   /* Pinned instructions are already scheduled so we don't need to do
+    * anything.  Also, bailing here keeps us from ever following phi nodes
+    * which can be back-edges.
+    */
+   if (instr->pass_flags & GCM_INSTR_PINNED)
+      return;
+
+   nir_foreach_ssa_def(instr, gcm_schedule_late_def, state);
+}
+
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state);
+
+static bool
+gcm_place_instr_def(nir_ssa_def *def, void *state)
+{
+   struct set_entry *entry;
+   set_foreach(def->uses, entry)
+      gcm_place_instr((nir_instr *)entry->key, state);
+
+   return false;
+}
+
+/** Places an instrution back into the program
+ *
+ * The earlier passes of GCM simply choose blocks for each instruction and
+ * otherwise leave them alone.  This pass actually places the instructions
+ * into their chosen blocks.
+ *
+ * To do so, we use a standard post-order depth-first search linearization
+ * algorithm.  We walk over the uses of the given instruction and ensure
+ * that they are placed and then place this instruction.  Because we are
+ * working on multiple blocks at a time, we keep track of the last inserted
+ * instruction per-block in the state structure's block_info array.  When
+ * we insert an instruction in a block we insert it before the last
+ * instruction inserted in that block rather than the last instruction
+ * inserted globally.
+ */
+static void
+gcm_place_instr(nir_instr *instr, struct gcm_state *state)
+{
+   if (instr->pass_flags & GCM_INSTR_PLACED)
+      return;
+
+   instr->pass_flags |= GCM_INSTR_PLACED;
+
+   /* Phi nodes are our once source of back-edges.  Since right now we are
+    * only doing scheduling within blocks, we don't need to worry about
+    * them since they are always at the top.  Just skip them completely.
+    */
+   if (instr->type == nir_instr_type_phi) {
+      assert(instr->pass_flags & GCM_INSTR_PINNED);
+      return;
+   }
+
+   nir_foreach_ssa_def(instr, gcm_place_instr_def, state);
+
+   if (instr->pass_flags & GCM_INSTR_PINNED) {
+      /* Pinned instructions have an implicit dependence on the pinned
+       * instructions that come after them in the block.  Since the pinned
+       * instructions will naturally "chain" together, we only need to
+       * explicitly visit one of them.
+       */
+      for (nir_instr *after = nir_instr_next(instr);
+           after;
+           after = nir_instr_next(after)) {
+         if (after->pass_flags & GCM_INSTR_PINNED) {
+            gcm_place_instr(after, state);
+            break;
+         }
+      }
+   }
+
+   struct gcm_block_info *block_info = &state->blocks[instr->block->index];
+   if (!(instr->pass_flags & GCM_INSTR_PINNED)) {
+      exec_node_remove(&instr->node);
+
+      if (block_info->last_instr) {
+         exec_node_insert_node_before(&block_info->last_instr->node,
+                                      &instr->node);
+      } else {
+         /* Schedule it at the end of the block */
+         nir_instr *jump_instr = nir_block_last_instr(instr->block);
+         if (jump_instr && jump_instr->type == nir_instr_type_jump) {
+            exec_node_insert_node_before(&jump_instr->node, &instr->node);
+         } else {
+            exec_list_push_tail(&instr->block->instr_list, &instr->node);
+         }
+      }
+   }
+
+   block_info->last_instr = instr;
+}
+
+static void
+opt_gcm_impl(nir_function_impl *impl)
+{
+   struct gcm_state state;
+
+   state.impl = impl;
+   state.instr = NULL;
+   exec_list_make_empty(&state.instrs);
+   state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks);
+
+   nir_metadata_require(impl, nir_metadata_block_index |
+                              nir_metadata_dominance);
+
+   gcm_build_block_info(&impl->body, &state, 0);
+   nir_foreach_block(impl, gcm_pin_instructions_block, &state);
+
+   foreach_list_typed(nir_instr, instr, node, &state.instrs)
+      gcm_schedule_early_instr(instr, &state);
+
+   foreach_list_typed(nir_instr, instr, node, &state.instrs)
+      gcm_schedule_late_instr(instr, &state);
+
+   while (!exec_list_is_empty(&state.instrs)) {
+      nir_instr *instr = exec_node_data(nir_instr,
+                                        state.instrs.tail_pred, node);
+      gcm_place_instr(instr, &state);
+   }
+
+   ralloc_free(state.blocks);
+}
+
+void
+nir_opt_gcm(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         opt_gcm_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_global_to_local.c b/mesalib/src/glsl/nir/nir_opt_global_to_local.c
new file mode 100644
index 000000000..00db37ba7
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_global_to_local.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+static bool
+global_to_local(nir_register *reg)
+{
+   nir_function_impl *impl = NULL;
+
+   assert(reg->is_global);
+
+   struct set_entry *entry;
+   set_foreach(reg->defs, entry) {
+      nir_instr *instr = (nir_instr *) entry->key;
+      nir_function_impl *instr_impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+      if (impl != NULL) {
+         if (impl != instr_impl)
+            return false;
+      } else {
+         impl = instr_impl;
+      }
+   }
+
+   set_foreach(reg->uses, entry) {
+      nir_instr *instr = (nir_instr *) entry->key;
+      nir_function_impl *instr_impl =
+         nir_cf_node_get_function(&instr->block->cf_node);
+      if (impl != NULL) {
+         if (impl != instr_impl)
+            return false;
+      } else {
+         impl = instr_impl;
+      }
+   }
+
+   set_foreach(reg->if_uses, entry) {
+      nir_if *if_stmt = (nir_if *) entry->key;
+      nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node);
+      if (impl != NULL) {
+         if (impl != if_impl)
+            return false;
+      } else {
+         impl = if_impl;
+      }
+   }
+
+   if (impl == NULL) {
+      /* this instruction is never used/defined, delete it */
+      nir_reg_remove(reg);
+      return true;
+   }
+
+   /*
+    * if we've gotten to this point, the register is always used/defined in
+    * the same implementation so we can move it to be local to that
+    * implementation.
+    */
+
+   exec_node_remove(&reg->node);
+   exec_list_push_tail(&impl->registers, &reg->node);
+   reg->index = impl->reg_alloc++;
+   reg->is_global = false;
+   return true;
+}
+
+bool
+nir_opt_global_to_local(nir_shader *shader)
+{
+   bool progress = false;
+
+   foreach_list_typed_safe(nir_register, reg, node, &shader->registers) {
+      if (global_to_local(reg))
+         progress = true;
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_select.c b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
new file mode 100644
index 000000000..ab08f286f
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_select.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements a small peephole optimization that looks for
+ *
+ * if (cond) {
+ *    <empty>
+ * } else {
+ *    <empty>
+ * }
+ * phi
+ * ...
+ * phi
+ *
+ * and replaces it with a series of selects.  It can also handle the case
+ * where, instead of being empty, the if may contain some move operations
+ * whose only use is one of the following phi nodes.  This happens all the
+ * time when the SSA form comes from a conditional assignment with a
+ * swizzle.
+ */
+
+struct peephole_select_state {
+   void *mem_ctx;
+   bool progress;
+};
+
+static bool
+are_all_move_to_phi(nir_block *block)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_alu)
+         return false;
+
+      /* It must be a move operation */
+      nir_alu_instr *mov = nir_instr_as_alu(instr);
+      if (mov->op != nir_op_fmov && mov->op != nir_op_imov)
+         return false;
+
+      /* Can't handle saturate */
+      if (mov->dest.saturate)
+         return false;
+
+      /* It must be SSA */
+      if (!mov->dest.dest.is_ssa)
+         return false;
+
+      /* It cannot have any if-uses */
+      if (mov->dest.dest.ssa.if_uses->entries != 0)
+         return false;
+
+      /* The only uses of this definition must be phi's in the successor */
+      struct set_entry *entry;
+      set_foreach(mov->dest.dest.ssa.uses, entry) {
+         const nir_instr *dest_instr = entry->key;
+         if (dest_instr->type != nir_instr_type_phi ||
+             dest_instr->block != block->successors[0])
+            return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_select_block(nir_block *block, void *void_state)
+{
+   struct peephole_select_state *state = void_state;
+
+   /* If the block is empty, then it certainly doesn't have any phi nodes,
+    * so we can skip it.  This also ensures that we do an early skip on the
+    * end block of the function which isn't actually attached to the CFG.
+    */
+   if (exec_list_is_empty(&block->instr_list))
+      return true;
+
+   if (nir_cf_node_is_first(&block->cf_node))
+      return true;
+
+   nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+   if (prev_node->type != nir_cf_node_if)
+      return true;
+
+   nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+   nir_cf_node *then_node = nir_if_first_then_node(if_stmt);
+   nir_cf_node *else_node = nir_if_first_else_node(if_stmt);
+
+   /* We can only have one block in each side ... */
+   if (nir_if_last_then_node(if_stmt) != then_node ||
+       nir_if_last_else_node(if_stmt) != else_node)
+      return true;
+
+   nir_block *then_block = nir_cf_node_as_block(then_node);
+   nir_block *else_block = nir_cf_node_as_block(else_node);
+
+   /* ... and those blocks must only contain move-to-phi. */
+   if (!are_all_move_to_phi(then_block) || !are_all_move_to_phi(else_block))
+      return true;
+
+   /* At this point, we know that the previous CFG node is an if-then
+    * statement containing only moves to phi nodes in this block.  We can
+    * just remove that entire CF node and replace all of the phi nodes with
+    * selects.
+    */
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+      nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
+      nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+      /* Splat the condition to all channels */
+      memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
+
+      assert(exec_list_length(&phi->srcs) == 2);
+      nir_foreach_phi_src(phi, src) {
+         assert(src->pred == then_block || src->pred == else_block);
+         assert(src->src.is_ssa);
+
+         unsigned idx = src->pred == then_block ? 1 : 2;
+
+         if (src->src.ssa->parent_instr->block == src->pred) {
+            /* We already know that this instruction must be a move with
+             * this phi's in this block as its only users.
+             */
+            nir_alu_instr *mov = nir_instr_as_alu(src->src.ssa->parent_instr);
+            assert(mov->instr.type == nir_instr_type_alu);
+            assert(mov->op == nir_op_fmov || mov->op == nir_op_imov);
+
+            nir_alu_src_copy(&sel->src[idx], &mov->src[0], state->mem_ctx);
+         } else {
+            nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+         }
+      }
+
+      nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
+                        phi->dest.ssa.num_components, phi->dest.ssa.name);
+      sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
+
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa,
+                               nir_src_for_ssa(&sel->dest.dest.ssa),
+                               state->mem_ctx);
+
+      nir_instr_insert_before(&phi->instr, &sel->instr);
+      nir_instr_remove(&phi->instr);
+   }
+
+   nir_cf_node_remove(&if_stmt->cf_node);
+   state->progress = true;
+
+   return true;
+}
+
+static bool
+nir_opt_peephole_select_impl(nir_function_impl *impl)
+{
+   struct peephole_select_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.progress = false;
+
+   nir_foreach_block(impl, nir_opt_peephole_select_block, &state);
+
+   if (state.progress)
+      nir_metadata_preserve(impl, nir_metadata_none);
+
+   return state.progress;
+}
+
+bool
+nir_opt_peephole_select(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         progress |= nir_opt_peephole_select_impl(overload->impl);
+   }
+
+   return progress;
+}
diff --git a/mesalib/src/glsl/nir/nir_opt_remove_phis.c b/mesalib/src/glsl/nir/nir_opt_remove_phis.c
new file mode 100644
index 000000000..7896584b4
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_opt_remove_phis.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2015 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * This is a pass for removing phi nodes that look like:
+ * a = phi(b, b, b, ...)
+ *
+ * Note that we can't ignore undef sources here, or else we may create a
+ * situation where the definition of b isn't dominated by its uses. We're
+ * allowed to do this since the definition of b must dominate all of the
+ * phi node's predecessors, which means it must dominate the phi node as well
+ * as all of the phi node's uses. In essence, the phi node acts as a copy
+ * instruction. b can't be another phi node in the same block, since the only
+ * time when phi nodes can source other phi nodes defined in the same block is
+ * at the loop header, and in that case one of the sources of the phi has to
+ * be from before the loop and that source can't be b.
+ */
+
+static bool
+remove_phis_block(nir_block *block, void *state)
+{
+   bool *progress = state;
+
+   void *mem_ctx = ralloc_parent(block);
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_ssa_def *def = NULL;
+      bool srcs_same = true;
+
+      nir_foreach_phi_src(phi, src) {
+         assert(src->src.is_ssa);
+         
+         if (def == NULL) {
+            def  = src->src.ssa;
+         } else {
+            if (src->src.ssa != def) {
+               srcs_same = false;
+               break;
+            }
+         }
+      }
+
+      if (!srcs_same)
+         continue;
+
+      assert(phi->dest.is_ssa);
+      nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def),
+                               mem_ctx);
+      nir_instr_remove(instr);
+
+      *progress = true;
+   }
+
+   return true;
+}
+
+static bool
+remove_phis_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_foreach_block(impl, remove_phis_block, &progress);
+
+   return progress;
+}
+
+bool
+nir_opt_remove_phis(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_overload(shader, overload)
+      if (overload->impl)
+         progress = remove_phis_impl(overload->impl) || progress;
+
+   return progress;
+}
+
diff --git a/mesalib/src/glsl/nir/nir_print.c b/mesalib/src/glsl/nir/nir_print.c
new file mode 100644
index 000000000..6a3c6a027
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_print.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+static void
+print_tabs(unsigned num_tabs, FILE *fp)
+{
+   for (unsigned i = 0; i < num_tabs; i++)
+      fprintf(fp, "\t");
+}
+
+typedef struct {
+   /** map from nir_variable -> printable name */
+   struct hash_table *ht;
+
+   /** set of names used so far for nir_variables */
+   struct set *syms;
+
+   /* an index used to make new non-conflicting names */
+   unsigned index;
+} print_var_state;
+
+static void
+print_register(nir_register *reg, FILE *fp)
+{
+   if (reg->name != NULL)
+      fprintf(fp, "/* %s */ ", reg->name);
+   if (reg->is_global)
+      fprintf(fp, "gr%u", reg->index);
+   else
+      fprintf(fp, "r%u", reg->index);
+}
+
+static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" };
+
+static void
+print_register_decl(nir_register *reg, FILE *fp)
+{
+   fprintf(fp, "decl_reg %s ", sizes[reg->num_components]);
+   if (reg->is_packed)
+      fprintf(fp, "(packed) ");
+   print_register(reg, fp);
+   if (reg->num_array_elems != 0)
+      fprintf(fp, "[%u]", reg->num_array_elems);
+   fprintf(fp, "\n");
+}
+
+static void
+print_ssa_def(nir_ssa_def *def, FILE *fp)
+{
+   if (def->name != NULL)
+      fprintf(fp, "/* %s */ ", def->name);
+   fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index);
+}
+
+static void
+print_ssa_use(nir_ssa_def *def, FILE *fp)
+{
+   if (def->name != NULL)
+      fprintf(fp, "/* %s */ ", def->name);
+   fprintf(fp, "ssa_%u", def->index);
+}
+
+static void print_src(nir_src *src, FILE *fp);
+
+static void
+print_reg_src(nir_reg_src *src, FILE *fp)
+{
+   print_register(src->reg, fp);
+   if (src->reg->num_array_elems != 0) {
+      fprintf(fp, "[%u", src->base_offset);
+      if (src->indirect != NULL) {
+         fprintf(fp, " + ");
+         print_src(src->indirect, fp);
+      }
+      fprintf(fp, "]");
+   }
+}
+
+static void
+print_reg_dest(nir_reg_dest *dest, FILE *fp)
+{
+   print_register(dest->reg, fp);
+   if (dest->reg->num_array_elems != 0) {
+      fprintf(fp, "[%u", dest->base_offset);
+      if (dest->indirect != NULL) {
+         fprintf(fp, " + ");
+         print_src(dest->indirect, fp);
+      }
+      fprintf(fp, "]");
+   }
+}
+
+static void
+print_src(nir_src *src, FILE *fp)
+{
+   if (src->is_ssa)
+      print_ssa_use(src->ssa, fp);
+   else
+      print_reg_src(&src->reg, fp);
+}
+
+static void
+print_dest(nir_dest *dest, FILE *fp)
+{
+   if (dest->is_ssa)
+      print_ssa_def(&dest->ssa, fp);
+   else
+      print_reg_dest(&dest->reg, fp);
+}
+
+static void
+print_alu_src(nir_alu_src *src, FILE *fp)
+{
+   if (src->negate)
+      fprintf(fp, "-");
+   if (src->abs)
+      fprintf(fp, "abs(");
+
+   print_src(&src->src, fp);
+
+   if (src->swizzle[0] != 0 ||
+       src->swizzle[1] != 1 ||
+       src->swizzle[2] != 2 ||
+       src->swizzle[3] != 3) {
+      fprintf(fp, ".");
+      for (unsigned i = 0; i < 4; i++)
+         fprintf(fp, "%c", "xyzw"[src->swizzle[i]]);
+   }
+
+   if (src->abs)
+      fprintf(fp, ")");
+}
+
+static void
+print_alu_dest(nir_alu_dest *dest, FILE *fp)
+{
+   /* we're going to print the saturate modifier later, after the opcode */
+
+   print_dest(&dest->dest, fp);
+
+   if (!dest->dest.is_ssa &&
+       dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) {
+      fprintf(fp, ".");
+      for (unsigned i = 0; i < 4; i++)
+         if ((dest->write_mask >> i) & 1)
+            fprintf(fp, "%c", "xyzw"[i]);
+   }
+}
+
+static void
+print_alu_instr(nir_alu_instr *instr, FILE *fp)
+{
+   print_alu_dest(&instr->dest, fp);
+
+   fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+   if (instr->dest.saturate)
+      fprintf(fp, ".sat");
+   fprintf(fp, " ");
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_alu_src(&instr->src[i], fp);
+   }
+}
+
+static void
+print_var_decl(nir_variable *var, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "decl_var ");
+
+   const char *const cent = (var->data.centroid) ? "centroid " : "";
+   const char *const samp = (var->data.sample) ? "sample " : "";
+   const char *const inv = (var->data.invariant) ? "invariant " : "";
+   const char *const mode[] = { "shader_in ", "shader_out ", "", "",
+                                "uniform ", "system " };
+   const char *const interp[] = { "", "smooth", "flat", "noperspective" };
+
+   fprintf(fp, "%s%s%s%s%s ",
+      cent, samp, inv, mode[var->data.mode], interp[var->data.interpolation]);
+
+   glsl_print_type(var->type, fp);
+
+   struct set_entry *entry = NULL;
+   if (state)
+      entry = _mesa_set_search(state->syms, var->name);
+
+   char *name;
+
+   if (entry != NULL) {
+      /* we have a collision with another name, append an @ + a unique index */
+      name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++);
+   } else {
+      name = var->name;
+   }
+
+   fprintf(fp, " %s", name);
+
+   if (var->data.mode == nir_var_shader_in ||
+       var->data.mode == nir_var_shader_out ||
+       var->data.mode == nir_var_uniform) {
+      fprintf(fp, " (%u)", var->data.driver_location);
+   }
+
+   fprintf(fp, "\n");
+
+   if (state) {
+      _mesa_set_add(state->syms, name);
+      _mesa_hash_table_insert(state->ht, var, name);
+   }
+}
+
+static void
+print_var(nir_variable *var, print_var_state *state, FILE *fp)
+{
+   const char *name;
+   if (state) {
+      struct hash_entry *entry = _mesa_hash_table_search(state->ht, var);
+
+      assert(entry != NULL);
+      name = entry->data;
+   } else {
+      name = var->name;
+   }
+
+   fprintf(fp, "%s", name);
+}
+
+static void
+print_deref_var(nir_deref_var *deref, print_var_state *state, FILE *fp)
+{
+   print_var(deref->var, state, fp);
+}
+
+static void
+print_deref_array(nir_deref_array *deref, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "[");
+   switch (deref->deref_array_type) {
+   case nir_deref_array_type_direct:
+      fprintf(fp, "%u", deref->base_offset);
+      break;
+   case nir_deref_array_type_indirect:
+      if (deref->base_offset != 0)
+         fprintf(fp, "%u + ", deref->base_offset);
+      print_src(&deref->indirect, fp);
+      break;
+   case nir_deref_array_type_wildcard:
+      fprintf(fp, "*");
+      break;
+   }
+   fprintf(fp, "]");
+}
+
+static void
+print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type,
+                   print_var_state *state, FILE *fp)
+{
+   fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index));
+}
+
+static void
+print_deref(nir_deref_var *deref, print_var_state *state, FILE *fp)
+{
+   nir_deref *tail = &deref->deref;
+   nir_deref *pretail = NULL;
+   while (tail != NULL) {
+      switch (tail->deref_type) {
+      case nir_deref_type_var:
+         assert(pretail == NULL);
+         assert(tail == &deref->deref);
+         print_deref_var(deref, state, fp);
+         break;
+
+      case nir_deref_type_array:
+         assert(pretail != NULL);
+         print_deref_array(nir_deref_as_array(tail), state, fp);
+         break;
+
+      case nir_deref_type_struct:
+         assert(pretail != NULL);
+         print_deref_struct(nir_deref_as_struct(tail),
+                            pretail->type, state, fp);
+         break;
+
+      default:
+         unreachable("Invalid deref type");
+      }
+
+      pretail = tail;
+      tail = pretail->child;
+   }
+}
+
+static void
+print_intrinsic_instr(nir_intrinsic_instr *instr, print_var_state *state,
+                      FILE *fp)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      print_dest(&instr->dest, fp);
+      fprintf(fp, " = ");
+   }
+
+   fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name);
+
+   for (unsigned i = 0; i < num_srcs; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_src(&instr->src[i], fp);
+   }
+
+   fprintf(fp, ") (");
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+
+   for (unsigned i = 0; i < num_vars; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_deref(instr->variables[i], state, fp);
+   }
+
+   fprintf(fp, ") (");
+
+   unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices;
+
+   for (unsigned i = 0; i < num_indices; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      fprintf(fp, "%u", instr->const_index[i]);
+   }
+
+   fprintf(fp, ")");
+}
+
+static void
+print_tex_instr(nir_tex_instr *instr, print_var_state *state, FILE *fp)
+{
+   print_dest(&instr->dest, fp);
+
+   fprintf(fp, " = ");
+
+   switch (instr->op) {
+   case nir_texop_tex:
+      fprintf(fp, "tex ");
+      break;
+   case nir_texop_txb:
+      fprintf(fp, "txb ");
+      break;
+   case nir_texop_txl:
+      fprintf(fp, "txl ");
+      break;
+   case nir_texop_txd:
+      fprintf(fp, "txd ");
+      break;
+   case nir_texop_txf:
+      fprintf(fp, "txf ");
+      break;
+   case nir_texop_txf_ms:
+      fprintf(fp, "txf_ms ");
+      break;
+   case nir_texop_txs:
+      fprintf(fp, "txs ");
+      break;
+   case nir_texop_lod:
+      fprintf(fp, "lod ");
+      break;
+   case nir_texop_tg4:
+      fprintf(fp, "tg4 ");
+      break;
+   case nir_texop_query_levels:
+      fprintf(fp, "query_levels ");
+      break;
+
+   default:
+      unreachable("Invalid texture operation");
+      break;
+   }
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      print_src(&instr->src[i].src, fp);
+
+      fprintf(fp, " ");
+
+      switch(instr->src[i].src_type) {
+      case nir_tex_src_coord:
+         fprintf(fp, "(coord)");
+         break;
+      case nir_tex_src_projector:
+         fprintf(fp, "(projector)");
+         break;
+      case nir_tex_src_comparitor:
+         fprintf(fp, "(comparitor)");
+         break;
+      case nir_tex_src_offset:
+         fprintf(fp, "(offset)");
+         break;
+      case nir_tex_src_bias:
+         fprintf(fp, "(bias)");
+         break;
+      case nir_tex_src_lod:
+         fprintf(fp, "(lod)");
+         break;
+      case nir_tex_src_ms_index:
+         fprintf(fp, "(ms_index)");
+         break;
+      case nir_tex_src_ddx:
+         fprintf(fp, "(ddx)");
+         break;
+      case nir_tex_src_ddy:
+         fprintf(fp, "(ddy)");
+         break;
+      case nir_tex_src_sampler_offset:
+         fprintf(fp, "(sampler_offset)");
+         break;
+
+      default:
+         unreachable("Invalid texture source type");
+         break;
+      }
+
+      fprintf(fp, ", ");
+   }
+
+   bool has_nonzero_offset = false;
+   for (unsigned i = 0; i < 4; i++) {
+      if (instr->const_offset[i] != 0) {
+         has_nonzero_offset = true;
+         break;
+      }
+   }
+
+   if (has_nonzero_offset) {
+      fprintf(fp, "[%i %i %i %i] (offset), ",
+              instr->const_offset[0], instr->const_offset[1],
+              instr->const_offset[2], instr->const_offset[3]);
+   }
+
+   if (instr->op == nir_texop_tg4) {
+      fprintf(fp, "%u (gather_component), ", instr->component);
+   }
+
+   if (instr->sampler) {
+      print_deref(instr->sampler, state, fp);
+   } else {
+      fprintf(fp, "%u", instr->sampler_index);
+   }
+
+   fprintf(fp, " (sampler)");
+}
+
+static void
+print_call_instr(nir_call_instr *instr, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "call %s ", instr->callee->function->name);
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_deref(instr->params[i], state, fp);
+   }
+
+   if (instr->return_deref != NULL) {
+      if (instr->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      print_deref(instr->return_deref, state, fp);
+   }
+}
+
+static void
+print_load_const_instr(nir_load_const_instr *instr, unsigned tabs, FILE *fp)
+{
+   print_ssa_def(&instr->def, fp);
+
+   fprintf(fp, " = load_const (");
+
+   for (unsigned i = 0; i < instr->def.num_components; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      /*
+       * we don't really know the type of the constant (if it will be used as a
+       * float or an int), so just print the raw constant in hex for fidelity
+       * and then print the float in a comment for readability.
+       */
+
+      fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+   }
+}
+
+static void
+print_jump_instr(nir_jump_instr *instr, FILE *fp)
+{
+   switch (instr->type) {
+   case nir_jump_break:
+      fprintf(fp, "break");
+      break;
+
+   case nir_jump_continue:
+      fprintf(fp, "continue");
+      break;
+
+   case nir_jump_return:
+      fprintf(fp, "return");
+      break;
+   }
+}
+
+static void
+print_ssa_undef_instr(nir_ssa_undef_instr* instr, FILE *fp)
+{
+   print_ssa_def(&instr->def, fp);
+   fprintf(fp, " = undefined");
+}
+
+static void
+print_phi_instr(nir_phi_instr *instr, FILE *fp)
+{
+   print_dest(&instr->dest, fp);
+   fprintf(fp, " = phi ");
+   nir_foreach_phi_src(instr, src) {
+      if (&src->node != exec_list_get_head(&instr->srcs))
+         fprintf(fp, ", ");
+
+      fprintf(fp, "block_%u: ", src->pred->index);
+      print_src(&src->src, fp);
+   }
+}
+
+static void
+print_parallel_copy_instr(nir_parallel_copy_instr *instr, FILE *fp)
+{
+   nir_foreach_parallel_copy_entry(instr, entry) {
+      if (&entry->node != exec_list_get_head(&instr->entries))
+         fprintf(fp, "; ");
+
+      print_dest(&entry->dest, fp);
+      fprintf(fp, " = ");
+      print_src(&entry->src, fp);
+   }
+}
+
+static void
+print_instr(const nir_instr *instr, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      print_alu_instr(nir_instr_as_alu(instr), fp);
+      break;
+
+   case nir_instr_type_call:
+      print_call_instr(nir_instr_as_call(instr), state, fp);
+      break;
+
+   case nir_instr_type_intrinsic:
+      print_intrinsic_instr(nir_instr_as_intrinsic(instr), state, fp);
+      break;
+
+   case nir_instr_type_tex:
+      print_tex_instr(nir_instr_as_tex(instr), state, fp);
+      break;
+
+   case nir_instr_type_load_const:
+      print_load_const_instr(nir_instr_as_load_const(instr), tabs, fp);
+      break;
+
+   case nir_instr_type_jump:
+      print_jump_instr(nir_instr_as_jump(instr), fp);
+      break;
+
+   case nir_instr_type_ssa_undef:
+      print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), fp);
+      break;
+
+   case nir_instr_type_phi:
+      print_phi_instr(nir_instr_as_phi(instr), fp);
+      break;
+
+   case nir_instr_type_parallel_copy:
+      print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), fp);
+      break;
+
+   default:
+      unreachable("Invalid instruction type");
+      break;
+   }
+}
+
+static int
+compare_block_index(const void *p1, const void *p2)
+{
+   const nir_block *block1 = *((const nir_block **) p1);
+   const nir_block *block2 = *((const nir_block **) p2);
+
+   return (int) block1->index - (int) block2->index;
+}
+
+static void print_cf_node(nir_cf_node *node, print_var_state *state,
+                          unsigned tabs, FILE *fp);
+
+static void
+print_block(nir_block *block, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "block block_%u:\n", block->index);
+
+   /* sort the predecessors by index so we consistently print the same thing */
+
+   nir_block **preds =
+      malloc(block->predecessors->entries * sizeof(nir_block *));
+
+   struct set_entry *entry;
+   unsigned i = 0;
+   set_foreach(block->predecessors, entry) {
+      preds[i++] = (nir_block *) entry->key;
+   }
+
+   qsort(preds, block->predecessors->entries, sizeof(nir_block *),
+         compare_block_index);
+
+   print_tabs(tabs, fp);
+   fprintf(fp, "/* preds: ");
+   for (unsigned i = 0; i < block->predecessors->entries; i++) {
+      fprintf(fp, "block_%u ", preds[i]->index);
+   }
+   fprintf(fp, "*/\n");
+
+   free(preds);
+
+   nir_foreach_instr(block, instr) {
+      print_instr(instr, state, tabs, fp);
+      fprintf(fp, "\n");
+   }
+
+   print_tabs(tabs, fp);
+   fprintf(fp, "/* succs: ");
+   for (unsigned i = 0; i < 2; i++)
+      if (block->successors[i]) {
+         fprintf(fp, "block_%u ", block->successors[i]->index);
+      }
+   fprintf(fp, "*/\n");
+}
+
+static void
+print_if(nir_if *if_stmt, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "if ");
+   print_src(&if_stmt->condition, fp);
+   fprintf(fp, " {\n");
+   foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "} else {\n");
+   foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "}\n");
+}
+
+static void
+print_loop(nir_loop *loop, print_var_state *state, unsigned tabs, FILE *fp)
+{
+   print_tabs(tabs, fp);
+   fprintf(fp, "loop {\n");
+   foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+      print_cf_node(node, state, tabs + 1, fp);
+   }
+   print_tabs(tabs, fp);
+   fprintf(fp, "}\n");
+}
+
+static void
+print_cf_node(nir_cf_node *node, print_var_state *state, unsigned int tabs,
+              FILE *fp)
+{
+   switch (node->type) {
+   case nir_cf_node_block:
+      print_block(nir_cf_node_as_block(node), state, tabs, fp);
+      break;
+
+   case nir_cf_node_if:
+      print_if(nir_cf_node_as_if(node), state, tabs, fp);
+      break;
+
+   case nir_cf_node_loop:
+      print_loop(nir_cf_node_as_loop(node), state, tabs, fp);
+      break;
+
+   default:
+      unreachable("Invalid CFG node type");
+   }
+}
+
+static void
+print_function_impl(nir_function_impl *impl, print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "\nimpl %s ", impl->overload->function->name);
+
+   for (unsigned i = 0; i < impl->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      print_var(impl->params[i], state, fp);
+   }
+
+   if (impl->return_var != NULL) {
+      if (impl->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      print_var(impl->return_var, state, fp);
+   }
+
+   fprintf(fp, "{\n");
+
+   foreach_list_typed(nir_variable, var, node, &impl->locals) {
+      fprintf(fp, "\t");
+      print_var_decl(var, state, fp);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      fprintf(fp, "\t");
+      print_register_decl(reg, fp);
+   }
+
+   nir_index_blocks(impl);
+
+   foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+      print_cf_node(node, state, 1, fp);
+   }
+
+   fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index);
+}
+
+static void
+print_function_overload(nir_function_overload *overload,
+                        print_var_state *state, FILE *fp)
+{
+   fprintf(fp, "decl_overload %s ", overload->function->name);
+
+   for (unsigned i = 0; i < overload->num_params; i++) {
+      if (i != 0)
+         fprintf(fp, ", ");
+
+      switch (overload->params[i].param_type) {
+      case nir_parameter_in:
+         fprintf(fp, "in ");
+         break;
+      case nir_parameter_out:
+         fprintf(fp, "out ");
+         break;
+      case nir_parameter_inout:
+         fprintf(fp, "inout ");
+         break;
+      default:
+         unreachable("Invalid parameter type");
+      }
+
+      glsl_print_type(overload->params[i].type, fp);
+   }
+
+   if (overload->return_type != NULL) {
+      if (overload->num_params != 0)
+         fprintf(fp, ", ");
+      fprintf(fp, "returning ");
+      glsl_print_type(overload->return_type, fp);
+   }
+
+   fprintf(fp, "\n");
+
+   if (overload->impl != NULL) {
+      print_function_impl(overload->impl, state, fp);
+      return;
+   }
+}
+
+static void
+print_function(nir_function *func, print_var_state *state, FILE *fp)
+{
+   foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
+      print_function_overload(overload, state, fp);
+   }
+}
+
+static void
+init_print_state(print_var_state *state)
+{
+   state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
+   state->syms = _mesa_set_create(NULL, _mesa_key_hash_string,
+                                  _mesa_key_string_equal);
+   state->index = 0;
+}
+
+static void
+destroy_print_state(print_var_state *state)
+{
+   _mesa_hash_table_destroy(state->ht, NULL);
+   _mesa_set_destroy(state->syms, NULL);
+}
+
+void
+nir_print_shader(nir_shader *shader, FILE *fp)
+{
+   print_var_state state;
+   init_print_state(&state);
+
+   for (unsigned i = 0; i < shader->num_user_structures; i++) {
+      glsl_print_struct(shader->user_structures[i], fp);
+   }
+
+   struct hash_entry *entry;
+
+   hash_table_foreach(shader->uniforms, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   hash_table_foreach(shader->inputs, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   hash_table_foreach(shader->outputs, entry) {
+      print_var_decl((nir_variable *) entry->data, &state, fp);
+   }
+
+   foreach_list_typed(nir_variable, var, node, &shader->globals) {
+      print_var_decl(var, &state, fp);
+   }
+
+   foreach_list_typed(nir_variable, var, node, &shader->system_values) {
+      print_var_decl(var, &state, fp);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      print_register_decl(reg, fp);
+   }
+
+   foreach_list_typed(nir_function, func, node, &shader->functions) {
+      print_function(func, &state, fp);
+   }
+
+   destroy_print_state(&state);
+}
+
+void
+nir_print_instr(const nir_instr *instr, FILE *fp)
+{
+   print_instr(instr, NULL, 0, fp);
+}
diff --git a/mesalib/src/glsl/nir/nir_remove_dead_variables.c b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
new file mode 100644
index 000000000..e7f8aeacb
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_remove_dead_variables.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+
+static void
+add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live)
+{
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      nir_variable *var = instr->variables[i]->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static void
+add_var_use_call(nir_call_instr *instr, struct set *live)
+{
+   if (instr->return_deref != NULL) {
+      nir_variable *var = instr->return_deref->var;
+      _mesa_set_add(live, var);
+   }
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      nir_variable *var = instr->params[i]->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static void
+add_var_use_tex(nir_tex_instr *instr, struct set *live)
+{
+   if (instr->sampler != NULL) {
+      nir_variable *var = instr->sampler->var;
+      _mesa_set_add(live, var);
+   }
+}
+
+static bool
+add_var_use_block(nir_block *block, void *state)
+{
+   struct set *live = state;
+
+   nir_foreach_instr(block, instr) {
+      switch(instr->type) {
+      case nir_instr_type_intrinsic:
+         add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live);
+         break;
+
+      case nir_instr_type_call:
+         add_var_use_call(nir_instr_as_call(instr), live);
+         break;
+
+      case nir_instr_type_tex:
+         add_var_use_tex(nir_instr_as_tex(instr), live);
+         break;
+
+      default:
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+add_var_use_shader(nir_shader *shader, struct set *live)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl) {
+         nir_foreach_block(overload->impl, add_var_use_block, live);
+      }
+   }
+}
+
+static void
+remove_dead_local_vars(nir_function_impl *impl, struct set *live)
+{
+   foreach_list_typed_safe(nir_variable, var, node, &impl->locals) {
+      struct set_entry *entry = _mesa_set_search(live, var);
+      if (entry == NULL)
+         exec_node_remove(&var->node);
+   }
+}
+
+static void
+remove_dead_global_vars(nir_shader *shader, struct set *live)
+{
+   foreach_list_typed_safe(nir_variable, var, node, &shader->globals) {
+      struct set_entry *entry = _mesa_set_search(live, var);
+      if (entry == NULL)
+         exec_node_remove(&var->node);
+   }
+}
+
+void
+nir_remove_dead_variables(nir_shader *shader)
+{
+   struct set *live =
+      _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
+
+   add_var_use_shader(shader, live);
+
+   remove_dead_global_vars(shader, live);
+
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         remove_dead_local_vars(overload->impl, live);
+   }
+
+   _mesa_set_destroy(live, NULL);
+}
diff --git a/mesalib/src/glsl/nir/nir_search.c b/mesalib/src/glsl/nir/nir_search.c
new file mode 100644
index 000000000..73a802be7
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_search.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_search.h"
+
+struct match_state {
+   unsigned variables_seen;
+   nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES];
+};
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+                 unsigned num_components, const uint8_t *swizzle,
+                 struct match_state *state);
+
+static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };
+
+static bool alu_instr_is_bool(nir_alu_instr *instr);
+
+static bool
+src_is_bool(nir_src src)
+{
+   if (!src.is_ssa)
+      return false;
+   if (src.ssa->parent_instr->type != nir_instr_type_alu)
+      return false;
+   return alu_instr_is_bool((nir_alu_instr *)src.ssa->parent_instr);
+}
+
+static bool
+alu_instr_is_bool(nir_alu_instr *instr)
+{
+   switch (instr->op) {
+   case nir_op_iand:
+   case nir_op_ior:
+   case nir_op_ixor:
+      return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src);
+   case nir_op_inot:
+      return src_is_bool(instr->src[0].src);
+   default:
+      return nir_op_infos[instr->op].output_type == nir_type_bool;
+   }
+}
+
+static bool
+match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
+            unsigned num_components, const uint8_t *swizzle,
+            struct match_state *state)
+{
+   uint8_t new_swizzle[4];
+
+   for (int i = 0; i < num_components; ++i)
+      new_swizzle[i] = instr->src[src].swizzle[swizzle[i]];
+
+   switch (value->type) {
+   case nir_search_value_expression:
+      if (!instr->src[src].src.is_ssa)
+         return false;
+
+      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+         return false;
+
+      return match_expression(nir_search_value_as_expression(value),
+                              nir_instr_as_alu(instr->src[src].src.ssa->parent_instr),
+                              num_components, new_swizzle, state);
+
+   case nir_search_value_variable: {
+      nir_search_variable *var = nir_search_value_as_variable(value);
+
+      if (state->variables_seen & (1 << var->variable)) {
+         if (!nir_srcs_equal(state->variables[var->variable].src,
+                             instr->src[src].src))
+            return false;
+
+         assert(!instr->src[src].abs && !instr->src[src].negate);
+
+         for (int i = 0; i < num_components; ++i) {
+            if (state->variables[var->variable].swizzle[i] != new_swizzle[i])
+               return false;
+         }
+
+         return true;
+      } else {
+         if (var->is_constant &&
+             instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+            return false;
+
+         if (var->type != nir_type_invalid) {
+            if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
+               return false;
+
+            nir_alu_instr *src_alu =
+               nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
+
+            if (nir_op_infos[src_alu->op].output_type != var->type &&
+                !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+               return false;
+         }
+
+         state->variables_seen |= (1 << var->variable);
+         state->variables[var->variable].src = instr->src[src].src;
+         state->variables[var->variable].abs = false;
+         state->variables[var->variable].negate = false;
+
+         for (int i = 0; i < 4; ++i) {
+            if (i < num_components)
+               state->variables[var->variable].swizzle[i] = new_swizzle[i];
+            else
+               state->variables[var->variable].swizzle[i] = 0;
+         }
+
+         return true;
+      }
+   }
+
+   case nir_search_value_constant: {
+      nir_search_constant *const_val = nir_search_value_as_constant(value);
+
+      if (!instr->src[src].src.is_ssa)
+         return false;
+
+      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
+         return false;
+
+      nir_load_const_instr *load =
+         nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
+
+      switch (nir_op_infos[instr->op].input_types[src]) {
+      case nir_type_float:
+         for (unsigned i = 0; i < num_components; ++i) {
+            if (load->value.f[new_swizzle[i]] != const_val->data.f)
+               return false;
+         }
+         return true;
+      case nir_type_int:
+      case nir_type_unsigned:
+      case nir_type_bool:
+         for (unsigned i = 0; i < num_components; ++i) {
+            if (load->value.i[new_swizzle[i]] != const_val->data.i)
+               return false;
+         }
+         return true;
+      default:
+         unreachable("Invalid alu source type");
+      }
+   }
+
+   default:
+      unreachable("Invalid search value type");
+   }
+}
+
+static bool
+match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
+                 unsigned num_components, const uint8_t *swizzle,
+                 struct match_state *state)
+{
+   if (instr->op != expr->opcode)
+      return false;
+
+   assert(!instr->dest.saturate);
+   assert(nir_op_infos[instr->op].num_inputs > 0);
+
+   /* If we have an explicitly sized destination, we can only handle the
+    * identity swizzle.  While dot(vec3(a, b, c).zxy) is a valid
+    * expression, we don't have the information right now to propagate that
+    * swizzle through.  We can only properly propagate swizzles if the
+    * instruction is vectorized.
+    */
+   if (nir_op_infos[instr->op].output_size != 0) {
+      for (unsigned i = 0; i < num_components; i++) {
+         if (swizzle[i] != i)
+            return false;
+      }
+   }
+
+   bool matched = true;
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      /* If the source is an explicitly sized source, then we need to reset
+       * both the number of components and the swizzle.
+       */
+      if (nir_op_infos[instr->op].input_sizes[i] != 0) {
+         num_components = nir_op_infos[instr->op].input_sizes[i];
+         swizzle = identity_swizzle;
+      }
+
+      if (!match_value(expr->srcs[i], instr, i, num_components,
+                       swizzle, state)) {
+         matched = false;
+         break;
+      }
+   }
+
+   if (matched)
+      return true;
+
+   if (nir_op_infos[instr->op].num_inputs == 2 &&
+       (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE)) {
+      if (!match_value(expr->srcs[0], instr, 1, num_components,
+                       swizzle, state))
+         return false;
+
+      return match_value(expr->srcs[1], instr, 0, num_components,
+                         swizzle, state);
+   } else {
+      return false;
+   }
+}
+
+static nir_alu_src
+construct_value(const nir_search_value *value, nir_alu_type type,
+                unsigned num_components, struct match_state *state,
+                nir_instr *instr, void *mem_ctx)
+{
+   switch (value->type) {
+   case nir_search_value_expression: {
+      const nir_search_expression *expr = nir_search_value_as_expression(value);
+
+      if (nir_op_infos[expr->opcode].output_size != 0)
+         num_components = nir_op_infos[expr->opcode].output_size;
+
+      nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
+      nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+      alu->dest.write_mask = (1 << num_components) - 1;
+      alu->dest.saturate = false;
+
+      for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) {
+         /* If the source is an explicitly sized source, then we need to reset
+          * the number of components to match.
+          */
+         if (nir_op_infos[alu->op].input_sizes[i] != 0)
+            num_components = nir_op_infos[alu->op].input_sizes[i];
+
+         alu->src[i] = construct_value(expr->srcs[i],
+                                       nir_op_infos[alu->op].input_types[i],
+                                       num_components,
+                                       state, instr, mem_ctx);
+      }
+
+      nir_instr_insert_before(instr, &alu->instr);
+
+      nir_alu_src val;
+      val.src = nir_src_for_ssa(&alu->dest.dest.ssa);
+      val.negate = false;
+      val.abs = false,
+      memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle);
+
+      return val;
+   }
+
+   case nir_search_value_variable: {
+      const nir_search_variable *var = nir_search_value_as_variable(value);
+      assert(state->variables_seen & (1 << var->variable));
+
+      nir_alu_src val;
+      nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx);
+
+      assert(!var->is_constant);
+
+      return val;
+   }
+
+   case nir_search_value_constant: {
+      const nir_search_constant *c = nir_search_value_as_constant(value);
+      nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
+
+      switch (type) {
+      case nir_type_float:
+         load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
+         load->value.f[0] = c->data.f;
+         break;
+      case nir_type_int:
+         load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
+         load->value.i[0] = c->data.i;
+         break;
+      case nir_type_unsigned:
+      case nir_type_bool:
+         load->value.u[0] = c->data.u;
+         break;
+      default:
+         unreachable("Invalid alu source type");
+      }
+
+      nir_instr_insert_before(instr, &load->instr);
+
+      nir_alu_src val;
+      val.src = nir_src_for_ssa(&load->def);
+      val.negate = false;
+      val.abs = false,
+      memset(val.swizzle, 0, sizeof val.swizzle);
+
+      return val;
+   }
+
+   default:
+      unreachable("Invalid search value type");
+   }
+}
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+                  const nir_search_value *replace, void *mem_ctx)
+{
+   uint8_t swizzle[4] = { 0, 0, 0, 0 };
+
+   for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i)
+      swizzle[i] = i;
+
+   assert(instr->dest.dest.is_ssa);
+
+   struct match_state state;
+   state.variables_seen = 0;
+
+   if (!match_expression(search, instr, instr->dest.dest.ssa.num_components,
+                         swizzle, &state))
+      return NULL;
+
+   /* Inserting a mov may be unnecessary.  However, it's much easier to
+    * simply let copy propagation clean this up than to try to go through
+    * and rewrite swizzles ourselves.
+    */
+   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
+   mov->dest.write_mask = instr->dest.write_mask;
+   nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                     instr->dest.dest.ssa.num_components, NULL);
+
+   mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
+                                 instr->dest.dest.ssa.num_components, &state,
+                                 &instr->instr, mem_ctx);
+   nir_instr_insert_before(&instr->instr, &mov->instr);
+
+   nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
+                            nir_src_for_ssa(&mov->dest.dest.ssa), mem_ctx);
+
+   /* We know this one has no more uses because we just rewrote them all,
+    * so we can remove it.  The rest of the matched expression, however, we
+    * don't know so much about.  We'll just let dead code clean them up.
+    */
+   nir_instr_remove(&instr->instr);
+
+   return mov;
+}
diff --git a/mesalib/src/glsl/nir/nir_search.h b/mesalib/src/glsl/nir/nir_search.h
new file mode 100644
index 000000000..7d4779294
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_search.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#ifndef _NIR_SEARCH_
+#define _NIR_SEARCH_
+
+#include "nir.h"
+
+#define NIR_SEARCH_MAX_VARIABLES 16
+
+typedef enum {
+   nir_search_value_expression,
+   nir_search_value_variable,
+   nir_search_value_constant,
+} nir_search_value_type;
+
+typedef struct {
+   nir_search_value_type type;
+} nir_search_value;
+
+typedef struct {
+   nir_search_value value;
+
+   /** The variable index;  Must be less than NIR_SEARCH_MAX_VARIABLES */
+   unsigned variable;
+
+   /** Indicates that the given variable must be a constant
+    *
+    * This is only alloed in search expressions and indicates that the
+    * given variable is only allowed to match constant values.
+    */
+   bool is_constant;
+
+   /** Indicates that the given variable must have a certain type
+    *
+    * This is only allowed in search expressions and indicates that the
+    * given variable is only allowed to match values that come from an ALU
+    * instruction with the given output type.  A type of nir_type_void
+    * means it can match any type.
+    *
+    * Note: A variable that is both constant and has a non-void type will
+    * never match anything.
+    */
+   nir_alu_type type;
+} nir_search_variable;
+
+typedef struct {
+   nir_search_value value;
+
+   union {
+      uint32_t u;
+      int32_t i;
+      float f;
+   } data;
+} nir_search_constant;
+
+typedef struct {
+   nir_search_value value;
+
+   nir_op opcode;
+   const nir_search_value *srcs[4];
+} nir_search_expression;
+
+NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value,
+                nir_search_variable, value)
+NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value,
+                nir_search_constant, value)
+NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value,
+                nir_search_expression, value)
+
+nir_alu_instr *
+nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
+                  const nir_search_value *replace, void *mem_ctx);
+
+#endif /* _NIR_SEARCH_ */
diff --git a/mesalib/src/glsl/nir/nir_split_var_copies.c b/mesalib/src/glsl/nir/nir_split_var_copies.c
new file mode 100644
index 000000000..4d663b51b
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_split_var_copies.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+
+/*
+ * Implements "copy splitting" which is similar to structure splitting only
+ * it works on copy operations rather than the datatypes themselves.  The
+ * GLSL language allows you to copy one variable to another an entire
+ * structure (which may contain arrays or other structures) at a time.
+ * Normally, in a language such as C this would be handled by a "structure
+ * splitting" pass that breaks up the structures.  Unfortunately for us,
+ * structures used in inputs or outputs can't be split.  Therefore,
+ * regardlesss of what we do, we have to be able to copy to/from
+ * structures.
+ *
+ * The primary purpose of structure splitting is to allow you to better
+ * optimize variable access and lower things to registers where you can.
+ * The primary issue here is that, if you lower the copy to a bunch of
+ * loads and stores, you loose a lot of information about the copy
+ * operation that you would like to keep around.  To solve this problem, we
+ * have a "copy splitting" pass that, instead of splitting the structures
+ * or lowering the copy into loads and storres, splits the copy operation
+ * into a bunch of copy operations one for each leaf of the structure tree.
+ * If an intermediate array is encountered, it is referenced with a
+ * wildcard reference to indicate that the entire array is to be copied.
+ *
+ * As things become direct, array copies may be able to be losslessly
+ * lowered to having fewer and fewer wildcards.  However, until that
+ * happens we want to keep the information about the arrays intact.
+ *
+ * Prior to the copy splitting pass, there are no wildcard references but
+ * there may be incomplete references where the tail of the deref chain is
+ * an array or a structure and not a specific element.  After the copy
+ * splitting pass has completed, every variable deref will be a full-length
+ * dereference pointing to a single leaf in the structure type tree with
+ * possibly a few wildcard array dereferences.
+ */
+
+struct split_var_copies_state {
+   void *mem_ctx;
+   void *dead_ctx;
+};
+
+static nir_deref *
+get_deref_tail(nir_deref *deref)
+{
+   while (deref->child != NULL)
+      deref = deref->child;
+   return deref;
+}
+
+/* Recursively constructs deref chains to split a copy instruction into
+ * multiple (if needed) copy instructions with full-length deref chains.
+ * External callers of this function should pass the tail and head of the
+ * deref chains found as the source and destination of the copy instruction
+ * into this function.
+ *
+ * \param  old_copy  The copy instruction we are splitting
+ * \param  dest_head The head of the destination deref chain we are building
+ * \param  src_head  The head of the source deref chain we are building
+ * \param  dest_tail The tail of the destination deref chain we are building
+ * \param  src_tail  The tail of the source deref chain we are building
+ * \param  state     The current split_var_copies_state object
+ */
+static void
+split_var_copy_instr(nir_intrinsic_instr *old_copy,
+                     nir_deref *dest_head, nir_deref *src_head,
+                     nir_deref *dest_tail, nir_deref *src_tail,
+                     struct split_var_copies_state *state)
+{
+   assert(src_tail->type == dest_tail->type);
+
+   /* Make sure these really are the tails of the deref chains */
+   assert(dest_tail->child == NULL);
+   assert(src_tail->child == NULL);
+
+   switch (glsl_get_base_type(src_tail->type)) {
+   case GLSL_TYPE_ARRAY: {
+      /* Make a wildcard dereference */
+      nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+      deref->deref.type = glsl_get_array_element(src_tail->type);
+      deref->deref_array_type = nir_deref_array_type_wildcard;
+
+      /* Set the tail of both as the newly created wildcard deref.  It is
+       * safe to use the same wildcard in both places because a) we will be
+       * copying it before we put it in an actual instruction and b)
+       * everything that will potentially add another link in the deref
+       * chain will also add the same thing to both chains.
+       */
+      src_tail->child = &deref->deref;
+      dest_tail->child = &deref->deref;
+
+      split_var_copy_instr(old_copy, dest_head, src_head,
+                           dest_tail->child, src_tail->child, state);
+
+      /* Set it back to the way we found it */
+      src_tail->child = NULL;
+      dest_tail->child = NULL;
+      break;
+   }
+
+   case GLSL_TYPE_STRUCT:
+      /* This is the only part that actually does any interesting
+       * splitting.  For array types, we just use wildcards and resolve
+       * them later.  For structure types, we need to emit one copy
+       * instruction for every structure element.  Because we may have
+       * structs inside structs, we just recurse and let the next level
+       * take care of any additional structures.
+       */
+      for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) {
+         nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i);
+         deref->deref.type = glsl_get_struct_field(src_tail->type, i);
+
+         /* Set the tail of both as the newly created structure deref.  It
+          * is safe to use the same wildcard in both places because a) we
+          * will be copying it before we put it in an actual instruction
+          * and b) everything that will potentially add another link in the
+          * deref chain will also add the same thing to both chains.
+          */
+         src_tail->child = &deref->deref;
+         dest_tail->child = &deref->deref;
+
+         split_var_copy_instr(old_copy, dest_head, src_head,
+                              dest_tail->child, src_tail->child, state);
+      }
+      /* Set it back to the way we found it */
+      src_tail->child = NULL;
+      dest_tail->child = NULL;
+      break;
+
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_matrix(src_tail->type)) {
+         nir_deref_array *deref = nir_deref_array_create(state->dead_ctx);
+         deref->deref.type = glsl_get_column_type(src_tail->type);
+         deref->deref_array_type = nir_deref_array_type_wildcard;
+
+         /* Set the tail of both as the newly created wildcard deref.  It
+          * is safe to use the same wildcard in both places because a) we
+          * will be copying it before we put it in an actual instruction
+          * and b) everything that will potentially add another link in the
+          * deref chain will also add the same thing to both chains.
+          */
+         src_tail->child = &deref->deref;
+         dest_tail->child = &deref->deref;
+
+         split_var_copy_instr(old_copy, dest_head, src_head,
+                              dest_tail->child, src_tail->child, state);
+
+         /* Set it back to the way we found it */
+         src_tail->child = NULL;
+         dest_tail->child = NULL;
+      } else {
+         /* At this point, we have fully built our deref chains and can
+          * actually add the new copy instruction.
+          */
+         nir_intrinsic_instr *new_copy =
+            nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var);
+
+         /* We need to make copies because a) this deref chain actually
+          * belongs to the copy instruction and b) the deref chains may
+          * have some of the same links due to the way we constructed them
+          */
+         nir_deref *src = nir_copy_deref(state->mem_ctx, src_head);
+         nir_deref *dest = nir_copy_deref(state->mem_ctx, dest_head);
+
+         new_copy->variables[0] = nir_deref_as_var(dest);
+         new_copy->variables[1] = nir_deref_as_var(src);
+
+         /* Emit the copy instruction after the old instruction.  We'll
+          * remove the old one later.
+          */
+         nir_instr_insert_after(&old_copy->instr, &new_copy->instr);
+      }
+      break;
+
+   case GLSL_TYPE_SAMPLER:
+   case GLSL_TYPE_IMAGE:
+   case GLSL_TYPE_ATOMIC_UINT:
+   case GLSL_TYPE_INTERFACE:
+   default:
+      unreachable("Cannot copy these types");
+   }
+}
+
+static bool
+split_var_copies_block(nir_block *block, void *void_state)
+{
+   struct split_var_copies_state *state = void_state;
+
+   nir_foreach_instr_safe(block, instr) {
+      if (instr->type != nir_instr_type_intrinsic)
+         continue;
+
+      nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
+      if (intrinsic->intrinsic != nir_intrinsic_copy_var)
+         continue;
+
+      nir_deref *dest_head = &intrinsic->variables[0]->deref;
+      nir_deref *src_head = &intrinsic->variables[1]->deref;
+      nir_deref *dest_tail = get_deref_tail(dest_head);
+      nir_deref *src_tail = get_deref_tail(src_head);
+
+      switch (glsl_get_base_type(src_tail->type)) {
+      case GLSL_TYPE_ARRAY:
+      case GLSL_TYPE_STRUCT:
+         split_var_copy_instr(intrinsic, dest_head, src_head,
+                              dest_tail, src_tail, state);
+         nir_instr_remove(&intrinsic->instr);
+         ralloc_steal(state->dead_ctx, instr);
+         break;
+      case GLSL_TYPE_FLOAT:
+      case GLSL_TYPE_INT:
+      case GLSL_TYPE_UINT:
+      case GLSL_TYPE_BOOL:
+         if (glsl_type_is_matrix(src_tail->type)) {
+            split_var_copy_instr(intrinsic, dest_head, src_head,
+                                 dest_tail, src_tail, state);
+            nir_instr_remove(&intrinsic->instr);
+            ralloc_steal(state->dead_ctx, instr);
+         }
+         break;
+      default:
+         unreachable("Invalid type");
+         break;
+      }
+   }
+
+   return true;
+}
+
+static void
+split_var_copies_impl(nir_function_impl *impl)
+{
+   struct split_var_copies_state state;
+
+   state.mem_ctx = ralloc_parent(impl);
+   state.dead_ctx = ralloc_context(NULL);
+
+   nir_foreach_block(impl, split_var_copies_block, &state);
+
+   ralloc_free(state.dead_ctx);
+}
+
+void
+nir_split_var_copies(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         split_var_copies_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_to_ssa.c b/mesalib/src/glsl/nir/nir_to_ssa.c
new file mode 100644
index 000000000..47cf45393
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_to_ssa.c
@@ -0,0 +1,535 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <stdlib.h>
+#include <unistd.h>
+
+/*
+ * Implements the classic to-SSA algorithm described by Cytron et. al. in
+ * "Efficiently Computing Static Single Assignment Form and the Control
+ * Dependence Graph."
+ */
+
+/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
+
+static void
+insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
+{
+   nir_phi_instr *instr = nir_phi_instr_create(mem_ctx);
+
+   instr->dest.reg.reg = reg;
+   struct set_entry *entry;
+   set_foreach(block->predecessors, entry) {
+      nir_block *pred = (nir_block *) entry->key;
+
+      nir_phi_src *src = ralloc(mem_ctx, nir_phi_src);
+      src->pred = pred;
+      src->src.is_ssa = false;
+      src->src.reg.base_offset = 0;
+      src->src.reg.indirect = NULL;
+      src->src.reg.reg = reg;
+      exec_list_push_tail(&instr->srcs, &src->node);
+   }
+
+   nir_instr_insert_before_block(block, &instr->instr);
+}
+
+static void
+insert_phi_nodes(nir_function_impl *impl)
+{
+   void *mem_ctx = ralloc_parent(impl);
+
+   unsigned *work = calloc(impl->num_blocks, sizeof(unsigned));
+   unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned));
+
+   /*
+    * Since the work flags already prevent us from inserting a node that has
+    * ever been inserted into W, we don't need to use a set to represent W.
+    * Also, since no block can ever be inserted into W more than once, we know
+    * that the maximum size of W is the number of basic blocks in the
+    * function. So all we need to handle W is an array and a pointer to the
+    * next element to be inserted and the next element to be removed.
+    */
+   nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *));
+   unsigned w_start, w_end;
+
+   unsigned iter_count = 0;
+
+   nir_index_blocks(impl);
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      if (reg->num_array_elems != 0)
+         continue;
+
+      w_start = w_end = 0;
+      iter_count++;
+
+      struct set_entry *entry;
+      set_foreach(reg->defs, entry) {
+         nir_instr *def = (nir_instr *) entry->key;
+         if (work[def->block->index] < iter_count)
+            W[w_end++] = def->block;
+         work[def->block->index] = iter_count;
+      }
+
+      while (w_start != w_end) {
+         nir_block *cur = W[w_start++];
+         set_foreach(cur->dom_frontier, entry) {
+            nir_block *next = (nir_block *) entry->key;
+
+            /*
+             * If there's more than one return statement, then the end block
+             * can be a join point for some definitions. However, there are
+             * no instructions in the end block, so nothing would use those
+             * phi nodes. Of course, we couldn't place those phi nodes
+             * anyways due to the restriction of having no instructions in the
+             * end block...
+             */
+            if (next == impl->end_block)
+               continue;
+
+            if (has_already[next->index] < iter_count) {
+               insert_trivial_phi(reg, next, mem_ctx);
+               has_already[next->index] = iter_count;
+               if (work[next->index] < iter_count) {
+                  work[next->index] = iter_count;
+                  W[w_end++] = next;
+               }
+            }
+         }
+      }
+   }
+
+   free(work);
+   free(has_already);
+   free(W);
+}
+
+typedef struct {
+   nir_ssa_def **stack;
+   int index;
+   unsigned num_defs; /** < used to add indices to debug names */
+#ifndef NDEBUG
+   unsigned stack_size;
+#endif
+} reg_state;
+
+typedef struct {
+   reg_state *states;
+   void *mem_ctx;
+   nir_instr *parent_instr;
+   nir_if *parent_if;
+   nir_function_impl *impl;
+
+   /* map from SSA value -> original register */
+   struct hash_table *ssa_map;
+} rewrite_state;
+
+static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
+{
+   unsigned index = reg->index;
+
+   if (state->states[index].index == -1) {
+      /*
+       * We're using an undefined register, create a new undefined SSA value
+       * to preserve the information that this source is undefined
+       */
+      nir_ssa_undef_instr *instr =
+         nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components);
+
+      /*
+       * We could just insert the undefined instruction before the instruction
+       * we're rewriting, but we could be rewriting a phi source in which case
+       * we can't do that, so do the next easiest thing - insert it at the
+       * beginning of the program. In the end, it doesn't really matter where
+       * the undefined instructions are because they're going to be ignored
+       * in the backend.
+       */
+      nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr);
+      return &instr->def;
+   }
+
+   return state->states[index].stack[state->states[index].index];
+}
+
+static bool
+rewrite_use(nir_src *src, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (src->is_ssa)
+      return true;
+
+   unsigned index = src->reg.reg->index;
+
+   if (state->states[index].stack == NULL)
+      return true;
+
+   src->is_ssa = true;
+   src->ssa = get_ssa_src(src->reg.reg, state);
+
+   if (state->parent_instr)
+      _mesa_set_add(src->ssa->uses, state->parent_instr);
+   else
+      _mesa_set_add(src->ssa->if_uses, state->parent_if);
+   return true;
+}
+
+static bool
+rewrite_def_forwards(nir_dest *dest, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (dest->is_ssa)
+      return true;
+
+   nir_register *reg = dest->reg.reg;
+   unsigned index = reg->index;
+
+   if (state->states[index].stack == NULL)
+      return true;
+
+   char *name = NULL;
+   if (dest->reg.reg->name)
+      name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name,
+                             state->states[index].num_defs);
+
+   nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+
+   /* push our SSA destination on the stack */
+   state->states[index].index++;
+   assert(state->states[index].index < state->states[index].stack_size);
+   state->states[index].stack[state->states[index].index] = &dest->ssa;
+   state->states[index].num_defs++;
+
+   _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg);
+
+   return true;
+}
+
+static void
+rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
+{
+   state->parent_instr = &instr->instr;
+
+   nir_foreach_src(&instr->instr, rewrite_use, state);
+
+   if (instr->dest.dest.is_ssa)
+      return;
+
+   nir_register *reg = instr->dest.dest.reg.reg;
+   unsigned index = reg->index;
+
+   if (state->states[index].stack == NULL)
+      return;
+
+   unsigned write_mask = instr->dest.write_mask;
+   if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) {
+      /*
+       * Calculate the number of components the final instruction, which for
+       * per-component things is the number of output components of the
+       * instruction and non-per-component things is the number of enabled
+       * channels in the write mask.
+       */
+      unsigned num_components;
+      if (nir_op_infos[instr->op].output_size == 0) {
+         unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5);
+         num_components = (temp & 0x3) + ((temp >> 2) & 0x3);
+      } else {
+         num_components = nir_op_infos[instr->op].output_size;
+      }
+
+      char *name = NULL;
+      if (instr->dest.dest.reg.reg->name)
+         name = ralloc_asprintf(state->mem_ctx, "%s_%u",
+                                reg->name, state->states[index].num_defs);
+
+      instr->dest.write_mask = (1 << num_components) - 1;
+      nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+
+      if (nir_op_infos[instr->op].output_size == 0) {
+         /*
+          * When we change the output writemask, we need to change the
+          * swizzles for per-component inputs too
+          */
+         for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+            if (nir_op_infos[instr->op].input_sizes[i] != 0)
+               continue;
+
+            unsigned new_swizzle[4] = {0, 0, 0, 0};
+
+            /*
+             * We keep two indices:
+             * 1. The index of the original (non-SSA) component
+             * 2. The index of the post-SSA, compacted, component
+             *
+             * We need to map the swizzle component at index 1 to the swizzle
+             * component at index 2.
+             */
+
+            unsigned ssa_index = 0;
+            for (unsigned index = 0; index < 4; index++) {
+               if (!((write_mask >> index) & 1))
+                  continue;
+
+               new_swizzle[ssa_index] = instr->src[i].swizzle[index];
+               ssa_index++;
+            }
+
+            for (unsigned j = 0; j < 4; j++)
+               instr->src[i].swizzle[j] = new_swizzle[j];
+         }
+      }
+
+      nir_op op;
+      switch (reg->num_components) {
+      case 2: op = nir_op_vec2; break;
+      case 3: op = nir_op_vec3; break;
+      case 4: op = nir_op_vec4; break;
+      default: unreachable("not reached");
+      }
+
+      nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op);
+
+      vec->dest.dest.reg.reg = reg;
+      vec->dest.write_mask = (1 << reg->num_components) - 1;
+
+      nir_ssa_def *old_src = get_ssa_src(reg, state);
+      nir_ssa_def *new_src = &instr->dest.dest.ssa;
+
+      unsigned ssa_index = 0;
+      for (unsigned i = 0; i < reg->num_components; i++) {
+         vec->src[i].src.is_ssa = true;
+         if ((write_mask >> i) & 1) {
+            vec->src[i].src.ssa = new_src;
+            if (nir_op_infos[instr->op].output_size == 0)
+               vec->src[i].swizzle[0] = ssa_index;
+            else
+               vec->src[i].swizzle[0] = i;
+            ssa_index++;
+         } else {
+            vec->src[i].src.ssa = old_src;
+            vec->src[i].swizzle[0] = i;
+         }
+      }
+
+      nir_instr_insert_after(&instr->instr, &vec->instr);
+
+      state->parent_instr = &vec->instr;
+      rewrite_def_forwards(&vec->dest.dest, state);
+   } else {
+      rewrite_def_forwards(&instr->dest.dest, state);
+   }
+}
+
+static void
+rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state)
+{
+   state->parent_instr = &instr->instr;
+   rewrite_def_forwards(&instr->dest, state);
+}
+
+static void
+rewrite_instr_forward(nir_instr *instr, rewrite_state *state)
+{
+   if (instr->type == nir_instr_type_alu) {
+      rewrite_alu_instr_forward(nir_instr_as_alu(instr), state);
+      return;
+   }
+
+   if (instr->type == nir_instr_type_phi) {
+      rewrite_phi_instr(nir_instr_as_phi(instr), state);
+      return;
+   }
+
+   state->parent_instr = instr;
+
+   nir_foreach_src(instr, rewrite_use, state);
+   nir_foreach_dest(instr, rewrite_def_forwards, state);
+}
+
+static void
+rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state)
+{
+   nir_foreach_instr(block, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      nir_phi_instr *phi_instr = nir_instr_as_phi(instr);
+
+      state->parent_instr = instr;
+
+      nir_foreach_phi_src(phi_instr, src) {
+         if (src->pred == pred) {
+            rewrite_use(&src->src, state);
+            break;
+         }
+      }
+   }
+}
+
+static bool
+rewrite_def_backwards(nir_dest *dest, void *_state)
+{
+   rewrite_state *state = (rewrite_state *) _state;
+
+   if (!dest->is_ssa)
+      return true;
+
+   struct hash_entry *entry =
+      _mesa_hash_table_search(state->ssa_map, &dest->ssa);
+
+   if (!entry)
+      return true;
+
+   nir_register *reg = (nir_register *) entry->data;
+   unsigned index = reg->index;
+
+   state->states[index].index--;
+   assert(state->states[index].index >= -1);
+
+   return true;
+}
+
+static void
+rewrite_instr_backwards(nir_instr *instr, rewrite_state *state)
+{
+   nir_foreach_dest(instr, rewrite_def_backwards, state);
+}
+
+static void
+rewrite_block(nir_block *block, rewrite_state *state)
+{
+   /* This will skip over any instructions after the current one, which is
+    * what we want because those instructions (vector gather, conditional
+    * select) will already be in SSA form.
+    */
+   nir_foreach_instr_safe(block, instr) {
+      rewrite_instr_forward(instr, state);
+   }
+
+   if (block != state->impl->end_block &&
+       !nir_cf_node_is_last(&block->cf_node) &&
+       nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) {
+      nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node));
+      state->parent_instr = NULL;
+      state->parent_if = if_stmt;
+      rewrite_use(&if_stmt->condition, state);
+   }
+
+   if (block->successors[0])
+      rewrite_phi_sources(block->successors[0], block, state);
+   if (block->successors[1])
+      rewrite_phi_sources(block->successors[1], block, state);
+
+   for (unsigned i = 0; i < block->num_dom_children; i++)
+      rewrite_block(block->dom_children[i], state);
+
+   nir_foreach_instr_reverse(block, instr) {
+      rewrite_instr_backwards(instr, state);
+   }
+}
+
+static void
+remove_unused_regs(nir_function_impl *impl, rewrite_state *state)
+{
+   foreach_list_typed_safe(nir_register, reg, node, &impl->registers) {
+      if (state->states[reg->index].stack != NULL)
+         exec_node_remove(&reg->node);
+   }
+}
+
+static void
+init_rewrite_state(nir_function_impl *impl, rewrite_state *state)
+{
+   state->impl = impl;
+   state->mem_ctx = ralloc_parent(impl);
+   state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
+   state->states = ralloc_array(NULL, reg_state, impl->reg_alloc);
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      assert(reg->index < impl->reg_alloc);
+      if (reg->num_array_elems > 0) {
+         state->states[reg->index].stack = NULL;
+      } else {
+         /*
+          * Calculate a conservative estimate of the stack size based on the
+          * number of definitions there are. Note that this function *must* be
+          * called after phi nodes are inserted so we can count phi node
+          * definitions too.
+          */
+         unsigned stack_size = reg->defs->entries;
+
+         state->states[reg->index].stack = ralloc_array(state->states,
+                                                        nir_ssa_def *,
+                                                        stack_size);
+#ifndef NDEBUG
+         state->states[reg->index].stack_size = stack_size;
+#endif
+         state->states[reg->index].index = -1;
+         state->states[reg->index].num_defs = 0;
+      }
+   }
+}
+
+static void
+destroy_rewrite_state(rewrite_state *state)
+{
+   _mesa_hash_table_destroy(state->ssa_map, NULL);
+   ralloc_free(state->states);
+}
+
+void
+nir_convert_to_ssa_impl(nir_function_impl *impl)
+{
+   nir_metadata_require(impl, nir_metadata_dominance);
+
+   insert_phi_nodes(impl);
+
+   rewrite_state state;
+   init_rewrite_state(impl, &state);
+
+   rewrite_block(impl->start_block, &state);
+
+   remove_unused_regs(impl, &state);
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
+
+   destroy_rewrite_state(&state);
+}
+
+void
+nir_convert_to_ssa(nir_shader *shader)
+{
+   nir_foreach_overload(shader, overload) {
+      if (overload->impl)
+         nir_convert_to_ssa_impl(overload->impl);
+   }
+}
diff --git a/mesalib/src/glsl/nir/nir_types.cpp b/mesalib/src/glsl/nir/nir_types.cpp
new file mode 100644
index 000000000..a13c3e12a
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_types.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir_types.h"
+#include "ir.h"
+
+void
+glsl_print_type(const glsl_type *type, FILE *fp)
+{
+   if (type->base_type == GLSL_TYPE_ARRAY) {
+      glsl_print_type(type->fields.array, fp);
+      fprintf(fp, "[%u]", type->length);
+   } else if ((type->base_type == GLSL_TYPE_STRUCT)
+              && !is_gl_identifier(type->name)) {
+      fprintf(fp, "%s@%p", type->name, (void *) type);
+   } else {
+      fprintf(fp, "%s", type->name);
+   }
+}
+
+void
+glsl_print_struct(const glsl_type *type, FILE *fp)
+{
+   assert(type->base_type == GLSL_TYPE_STRUCT);
+
+   fprintf(fp, "struct {\n");
+   for (unsigned i = 0; i < type->length; i++) {
+      fprintf(fp, "\t");
+      glsl_print_type(type->fields.structure[i].type, fp);
+      fprintf(fp, " %s;\n", type->fields.structure[i].name);
+   }
+   fprintf(fp, "}\n");
+}
+
+const glsl_type *
+glsl_get_array_element(const glsl_type* type)
+{
+   if (type->is_matrix())
+      return type->column_type();
+   return type->fields.array;
+}
+
+const glsl_type *
+glsl_get_struct_field(const glsl_type *type, unsigned index)
+{
+   return type->fields.structure[index].type;
+}
+
+const struct glsl_type *
+glsl_get_column_type(const struct glsl_type *type)
+{
+   return type->column_type();
+}
+
+enum glsl_base_type
+glsl_get_base_type(const struct glsl_type *type)
+{
+   return type->base_type;
+}
+
+unsigned
+glsl_get_vector_elements(const struct glsl_type *type)
+{
+   return type->vector_elements;
+}
+
+unsigned
+glsl_get_components(const struct glsl_type *type)
+{
+   return type->components();
+}
+
+unsigned
+glsl_get_matrix_columns(const struct glsl_type *type)
+{
+   return type->matrix_columns;
+}
+
+unsigned
+glsl_get_length(const struct glsl_type *type)
+{
+   return type->length;
+}
+
+const char *
+glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index)
+{
+   return type->fields.structure[index].name;
+}
+
+bool
+glsl_type_is_void(const glsl_type *type)
+{
+   return type->is_void();
+}
+
+bool
+glsl_type_is_vector(const struct glsl_type *type)
+{
+   return type->is_vector();
+}
+
+bool
+glsl_type_is_scalar(const struct glsl_type *type)
+{
+   return type->is_scalar();
+}
+
+bool
+glsl_type_is_matrix(const struct glsl_type *type)
+{
+   return type->is_matrix();
+}
+
+const glsl_type *
+glsl_void_type(void)
+{
+   return glsl_type::void_type;
+}
+
+const glsl_type *
+glsl_vec4_type(void)
+{
+   return glsl_type::vec4_type;
+}
+
+const glsl_type *
+glsl_array_type(const glsl_type *base, unsigned elements)
+{
+   return glsl_type::get_array_instance(base, elements);
+}
diff --git a/mesalib/src/glsl/nir/nir_types.h b/mesalib/src/glsl/nir/nir_types.h
new file mode 100644
index 000000000..494051a67
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_types.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2014 Connor Abbott
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#pragma once
+
+/* C wrapper around glsl_types.h */
+
+#include "../glsl_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#else
+struct glsl_type;
+#endif
+
+#include <stdio.h>
+
+void glsl_print_type(const struct glsl_type *type, FILE *fp);
+void glsl_print_struct(const struct glsl_type *type, FILE *fp);
+
+const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type,
+                                              unsigned index);
+
+const struct glsl_type *glsl_get_array_element(const struct glsl_type *type);
+
+const struct glsl_type *glsl_get_column_type(const struct glsl_type *type);
+
+enum glsl_base_type glsl_get_base_type(const struct glsl_type *type);
+
+unsigned glsl_get_vector_elements(const struct glsl_type *type);
+
+unsigned glsl_get_components(const struct glsl_type *type);
+
+unsigned glsl_get_matrix_columns(const struct glsl_type *type);
+
+unsigned glsl_get_length(const struct glsl_type *type);
+
+const char *glsl_get_struct_elem_name(const struct glsl_type *type,
+                                      unsigned index);
+
+
+bool glsl_type_is_void(const struct glsl_type *type);
+bool glsl_type_is_vector(const struct glsl_type *type);
+bool glsl_type_is_scalar(const struct glsl_type *type);
+bool glsl_type_is_matrix(const struct glsl_type *type);
+
+const struct glsl_type *glsl_void_type(void);
+const struct glsl_type *glsl_vec4_type(void);
+const struct glsl_type *glsl_array_type(const struct glsl_type *base,
+                                        unsigned elements);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mesalib/src/glsl/nir/nir_validate.c b/mesalib/src/glsl/nir/nir_validate.c
new file mode 100644
index 000000000..a3fe9d620
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_validate.c
@@ -0,0 +1,979 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Connor Abbott (cwabbott0@gmail.com)
+ *
+ */
+
+#include "nir.h"
+#include <assert.h>
+
+/*
+ * This file checks for invalid IR indicating a bug somewhere in the compiler.
+ */
+
+/* Since this file is just a pile of asserts, don't bother compiling it if
+ * we're not building a debug build.
+ */
+#ifdef DEBUG
+
+/*
+ * Per-register validation state.
+ */
+
+typedef struct {
+   /*
+    * equivalent to the uses and defs in nir_register, but built up by the
+    * validator. At the end, we verify that the sets have the same entries.
+    */
+   struct set *uses, *if_uses, *defs;
+   nir_function_impl *where_defined; /* NULL for global registers */
+} reg_validate_state;
+
+typedef struct {
+   /*
+    * equivalent to the uses in nir_ssa_def, but built up by the validator.
+    * At the end, we verify that the sets have the same entries.
+    */
+   struct set *uses, *if_uses;
+   nir_function_impl *where_defined;
+} ssa_def_validate_state;
+
+typedef struct {
+   /* map of register -> validation state (struct above) */
+   struct hash_table *regs;
+
+   /* the current shader being validated */
+   nir_shader *shader;
+
+   /* the current instruction being validated */
+   nir_instr *instr;
+
+   /* the current basic block being validated */
+   nir_block *block;
+
+   /* the current if statement being validated */
+   nir_if *if_stmt;
+
+   /* the parent of the current cf node being visited */
+   nir_cf_node *parent_node;
+
+   /* the current function implementation being validated */
+   nir_function_impl *impl;
+
+   /* map of SSA value -> function implementation where it is defined */
+   struct hash_table *ssa_defs;
+
+   /* bitset of ssa definitions we have found; used to check uniqueness */
+   BITSET_WORD *ssa_defs_found;
+
+   /* bitset of registers we have currently found; used to check uniqueness */
+   BITSET_WORD *regs_found;
+
+   /* map of local variable -> function implementation where it is defined */
+   struct hash_table *var_defs;
+} validate_state;
+
+static void validate_src(nir_src *src, validate_state *state);
+
+static void
+validate_reg_src(nir_reg_src *src, validate_state *state)
+{
+   assert(src->reg != NULL);
+
+   struct hash_entry *entry;
+   entry = _mesa_hash_table_search(state->regs, src->reg);
+   assert(entry);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+   if (state->instr) {
+      _mesa_set_add(reg_state->uses, state->instr);
+
+      assert(_mesa_set_search(src->reg->uses, state->instr));
+   } else {
+      assert(state->if_stmt);
+      _mesa_set_add(reg_state->if_uses, state->if_stmt);
+
+      assert(_mesa_set_search(src->reg->if_uses, state->if_stmt));
+   }
+
+   if (!src->reg->is_global) {
+      assert(reg_state->where_defined == state->impl &&
+             "using a register declared in a different function");
+   }
+
+   assert((src->reg->num_array_elems == 0 ||
+          src->base_offset < src->reg->num_array_elems) &&
+          "definitely out-of-bounds array access");
+
+   if (src->indirect) {
+      assert(src->reg->num_array_elems != 0);
+      assert((src->indirect->is_ssa || src->indirect->reg.indirect == NULL) &&
+             "only one level of indirection allowed");
+      validate_src(src->indirect, state);
+   }
+}
+
+static void
+validate_ssa_src(nir_ssa_def *def, validate_state *state)
+{
+   assert(def != NULL);
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+
+   assert(entry);
+
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+   assert(def_state->where_defined == state->impl &&
+          "using an SSA value defined in a different function");
+
+   if (state->instr) {
+      _mesa_set_add(def_state->uses, state->instr);
+
+      assert(_mesa_set_search(def->uses, state->instr));
+   } else {
+      assert(state->if_stmt);
+      _mesa_set_add(def_state->if_uses, state->if_stmt);
+
+      assert(_mesa_set_search(def->if_uses, state->if_stmt));
+   }
+
+   /* TODO validate that the use is dominated by the definition */
+}
+
+static void
+validate_src(nir_src *src, validate_state *state)
+{
+   if (src->is_ssa)
+      validate_ssa_src(src->ssa, state);
+   else
+      validate_reg_src(&src->reg, state);
+}
+
+static void
+validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
+{
+   nir_alu_src *src = &instr->src[index];
+
+   unsigned num_components;
+   if (src->src.is_ssa)
+      num_components = src->src.ssa->num_components;
+   else {
+      if (src->src.reg.reg->is_packed)
+         num_components = 4; /* can't check anything */
+      else
+         num_components = src->src.reg.reg->num_components;
+   }
+   for (unsigned i = 0; i < 4; i++) {
+      assert(src->swizzle[i] < 4);
+
+      if (nir_alu_instr_channel_used(instr, index, i))
+         assert(src->swizzle[i] < num_components);
+   }
+
+   validate_src(&src->src, state);
+}
+
+static void
+validate_reg_dest(nir_reg_dest *dest, validate_state *state)
+{
+   assert(dest->reg != NULL);
+
+   struct set_entry *entry = _mesa_set_search(dest->reg->defs, state->instr);
+   assert(entry && "definition not in nir_register.defs");
+
+   struct hash_entry *entry2;
+   entry2 = _mesa_hash_table_search(state->regs, dest->reg);
+
+   assert(entry2);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry2->data;
+   _mesa_set_add(reg_state->defs, state->instr);
+
+   if (!dest->reg->is_global) {
+      assert(reg_state->where_defined == state->impl &&
+             "writing to a register declared in a different function");
+   }
+
+   assert((dest->reg->num_array_elems == 0 ||
+          dest->base_offset < dest->reg->num_array_elems) &&
+          "definitely out-of-bounds array access");
+
+   if (dest->indirect) {
+      assert(dest->reg->num_array_elems != 0);
+      assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) &&
+             "only one level of indirection allowed");
+      validate_src(dest->indirect, state);
+   }
+}
+
+static void
+validate_ssa_def(nir_ssa_def *def, validate_state *state)
+{
+   assert(def->index < state->impl->ssa_alloc);
+   assert(!BITSET_TEST(state->ssa_defs_found, def->index));
+   BITSET_SET(state->ssa_defs_found, def->index);
+
+   assert(def->num_components <= 4);
+
+   ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
+                                              ssa_def_validate_state);
+   def_state->where_defined = state->impl;
+   def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+   def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   _mesa_hash_table_insert(state->ssa_defs, def, def_state);
+}
+
+static void
+validate_dest(nir_dest *dest, validate_state *state)
+{
+   if (dest->is_ssa)
+      validate_ssa_def(&dest->ssa, state);
+   else
+      validate_reg_dest(&dest->reg, state);
+}
+
+static void
+validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+{
+   unsigned dest_size =
+      dest->dest.is_ssa ? dest->dest.ssa.num_components
+                        : dest->dest.reg.reg->num_components;
+   bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed;
+   /*
+    * validate that the instruction doesn't write to components not in the
+    * register/SSA value
+    */
+   assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1)));
+
+   /* validate that saturate is only ever used on instructions with
+    * destinations of type float
+    */
+   nir_alu_instr *alu = nir_instr_as_alu(state->instr);
+   assert(nir_op_infos[alu->op].output_type == nir_type_float ||
+          !dest->saturate);
+
+   validate_dest(&dest->dest, state);
+}
+
+static void
+validate_alu_instr(nir_alu_instr *instr, validate_state *state)
+{
+   assert(instr->op < nir_num_opcodes);
+
+   validate_alu_dest(&instr->dest, state);
+
+   for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
+      validate_alu_src(instr, i, state);
+   }
+}
+
+static void
+validate_deref_chain(nir_deref *deref, validate_state *state)
+{
+   nir_deref *parent = NULL;
+   while (deref != NULL) {
+      switch (deref->deref_type) {
+      case nir_deref_type_array:
+         assert(deref->type == glsl_get_array_element(parent->type));
+         if (nir_deref_as_array(deref)->deref_array_type ==
+             nir_deref_array_type_indirect)
+            validate_src(&nir_deref_as_array(deref)->indirect, state);
+         break;
+
+      case nir_deref_type_struct:
+         assert(deref->type ==
+                glsl_get_struct_field(parent->type,
+                                      nir_deref_as_struct(deref)->index));
+         break;
+
+      case nir_deref_type_var:
+         break;
+
+      default:
+         assert(!"Invalid deref type");
+         break;
+      }
+
+      parent = deref;
+      deref = deref->child;
+   }
+}
+
+static void
+validate_var_use(nir_variable *var, validate_state *state)
+{
+   if (var->data.mode == nir_var_local) {
+      struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var);
+
+      assert(entry);
+      assert((nir_function_impl *) entry->data == state->impl);
+   }
+}
+
+static void
+validate_deref_var(nir_deref_var *deref, validate_state *state)
+{
+   assert(deref != NULL);
+   assert(deref->deref.type == deref->var->type);
+
+   validate_var_use(deref->var, state);
+
+   validate_deref_chain(&deref->deref, state);
+}
+
+static void
+validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
+{
+   unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
+   for (unsigned i = 0; i < num_srcs; i++) {
+      unsigned components_read =
+         nir_intrinsic_infos[instr->intrinsic].src_components[i];
+      if (components_read == 0)
+         components_read = instr->num_components;
+
+      assert(components_read > 0);
+
+      if (instr->src[i].is_ssa) {
+         assert(components_read <= instr->src[i].ssa->num_components);
+      } else if (!instr->src[i].reg.reg->is_packed) {
+         assert(components_read <= instr->src[i].reg.reg->num_components);
+      }
+
+      validate_src(&instr->src[i], state);
+   }
+
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
+      unsigned components_written =
+         nir_intrinsic_infos[instr->intrinsic].dest_components;
+      if (components_written == 0)
+         components_written = instr->num_components;
+
+      assert(components_written > 0);
+
+      if (instr->dest.is_ssa) {
+         assert(components_written <= instr->dest.ssa.num_components);
+      } else if (!instr->dest.reg.reg->is_packed) {
+         assert(components_written <= instr->dest.reg.reg->num_components);
+      }
+
+      validate_dest(&instr->dest, state);
+   }
+
+   unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+   for (unsigned i = 0; i < num_vars; i++) {
+      validate_deref_var(instr->variables[i], state);
+   }
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
+      break;
+   case nir_intrinsic_store_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+             instr->variables[0]->var->data.mode != nir_var_uniform);
+      break;
+   case nir_intrinsic_copy_var:
+      assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
+             instr->variables[0]->var->data.mode != nir_var_uniform);
+      assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
+      break;
+   default:
+      break;
+   }
+}
+
+static void
+validate_tex_instr(nir_tex_instr *instr, validate_state *state)
+{
+   validate_dest(&instr->dest, state);
+
+   bool src_type_seen[nir_num_tex_src_types];
+   for (unsigned i = 0; i < nir_num_tex_src_types; i++)
+      src_type_seen[i] = false;
+
+   for (unsigned i = 0; i < instr->num_srcs; i++) {
+      assert(!src_type_seen[instr->src[i].src_type]);
+      src_type_seen[instr->src[i].src_type] = true;
+      validate_src(&instr->src[i].src, state);
+   }
+
+   if (instr->sampler != NULL)
+      validate_deref_var(instr->sampler, state);
+}
+
+static void
+validate_call_instr(nir_call_instr *instr, validate_state *state)
+{
+   if (instr->return_deref == NULL)
+      assert(glsl_type_is_void(instr->callee->return_type));
+   else
+      assert(instr->return_deref->deref.type == instr->callee->return_type);
+
+   assert(instr->num_params == instr->callee->num_params);
+
+   for (unsigned i = 0; i < instr->num_params; i++) {
+      assert(instr->callee->params[i].type == instr->params[i]->deref.type);
+      validate_deref_var(instr->params[i], state);
+   }
+
+   validate_deref_var(instr->return_deref, state);
+}
+
+static void
+validate_load_const_instr(nir_load_const_instr *instr, validate_state *state)
+{
+   validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state)
+{
+   validate_ssa_def(&instr->def, state);
+}
+
+static void
+validate_phi_instr(nir_phi_instr *instr, validate_state *state)
+{
+   /*
+    * don't validate the sources until we get to them from their predecessor
+    * basic blocks, to avoid validating an SSA use before its definition.
+    */
+
+   validate_dest(&instr->dest, state);
+
+   exec_list_validate(&instr->srcs);
+   assert(exec_list_length(&instr->srcs) ==
+          state->block->predecessors->entries);
+}
+
+static void
+validate_instr(nir_instr *instr, validate_state *state)
+{
+   assert(instr->block == state->block);
+
+   state->instr = instr;
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+      validate_alu_instr(nir_instr_as_alu(instr), state);
+      break;
+
+   case nir_instr_type_call:
+      validate_call_instr(nir_instr_as_call(instr), state);
+      break;
+
+   case nir_instr_type_intrinsic:
+      validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state);
+      break;
+
+   case nir_instr_type_tex:
+      validate_tex_instr(nir_instr_as_tex(instr), state);
+      break;
+
+   case nir_instr_type_load_const:
+      validate_load_const_instr(nir_instr_as_load_const(instr), state);
+      break;
+
+   case nir_instr_type_phi:
+      validate_phi_instr(nir_instr_as_phi(instr), state);
+      break;
+
+   case nir_instr_type_ssa_undef:
+      validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state);
+      break;
+
+   case nir_instr_type_jump:
+      break;
+
+   default:
+      assert(!"Invalid ALU instruction type");
+      break;
+   }
+
+   state->instr = NULL;
+}
+
+static void
+validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state)
+{
+   state->instr = &instr->instr;
+
+   assert(instr->dest.is_ssa);
+
+   exec_list_validate(&instr->srcs);
+   nir_foreach_phi_src(instr, src) {
+      if (src->pred == pred) {
+         assert(src->src.is_ssa);
+         assert(src->src.ssa->num_components ==
+                instr->dest.ssa.num_components);
+
+         validate_src(&src->src, state);
+         state->instr = NULL;
+         return;
+      }
+   }
+
+   abort();
+}
+
+static void
+validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state)
+{
+   nir_foreach_instr(succ, instr) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+
+      validate_phi_src(nir_instr_as_phi(instr), block, state);
+   }
+}
+
+static void validate_cf_node(nir_cf_node *node, validate_state *state);
+
+static void
+validate_block(nir_block *block, validate_state *state)
+{
+   assert(block->cf_node.parent == state->parent_node);
+
+   state->block = block;
+
+   exec_list_validate(&block->instr_list);
+   nir_foreach_instr(block, instr) {
+      if (instr->type == nir_instr_type_phi) {
+         assert(instr == nir_block_first_instr(block) ||
+                nir_instr_prev(instr)->type == nir_instr_type_phi);
+      }
+
+      if (instr->type == nir_instr_type_jump) {
+         assert(instr == nir_block_last_instr(block));
+      }
+
+      validate_instr(instr, state);
+   }
+
+   assert(block->successors[0] != NULL);
+
+   for (unsigned i = 0; i < 2; i++) {
+      if (block->successors[i] != NULL) {
+         struct set_entry *entry =
+            _mesa_set_search(block->successors[i]->predecessors, block);
+         assert(entry);
+
+         validate_phi_srcs(block, block->successors[i], state);
+      }
+   }
+
+   if (!exec_list_is_empty(&block->instr_list) &&
+       nir_block_last_instr(block)->type == nir_instr_type_jump)
+      assert(block->successors[1] == NULL);
+}
+
+static void
+validate_if(nir_if *if_stmt, validate_state *state)
+{
+   state->if_stmt = if_stmt;
+
+   assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev));
+   nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node);
+   assert(prev_node->type == nir_cf_node_block);
+
+   nir_block *prev_block = nir_cf_node_as_block(prev_node);
+   assert(&prev_block->successors[0]->cf_node ==
+          nir_if_first_then_node(if_stmt));
+   assert(&prev_block->successors[1]->cf_node ==
+          nir_if_first_else_node(if_stmt));
+
+   assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next));
+   nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node);
+   assert(next_node->type == nir_cf_node_block);
+
+   validate_src(&if_stmt->condition, state);
+
+   assert(!exec_list_is_empty(&if_stmt->then_list));
+   assert(!exec_list_is_empty(&if_stmt->else_list));
+
+   nir_cf_node *old_parent = state->parent_node;
+   state->parent_node = &if_stmt->cf_node;
+
+   exec_list_validate(&if_stmt->then_list);
+   foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) {
+      validate_cf_node(cf_node, state);
+   }
+
+   exec_list_validate(&if_stmt->else_list);
+   foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) {
+      validate_cf_node(cf_node, state);
+   }
+
+   state->parent_node = old_parent;
+   state->if_stmt = NULL;
+}
+
+static void
+validate_loop(nir_loop *loop, validate_state *state)
+{
+   assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev));
+   nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node);
+   assert(prev_node->type == nir_cf_node_block);
+
+   nir_block *prev_block = nir_cf_node_as_block(prev_node);
+   assert(&prev_block->successors[0]->cf_node == nir_loop_first_cf_node(loop));
+   assert(prev_block->successors[1] == NULL);
+
+   assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next));
+   nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node);
+   assert(next_node->type == nir_cf_node_block);
+
+   assert(!exec_list_is_empty(&loop->body));
+
+   nir_cf_node *old_parent = state->parent_node;
+   state->parent_node = &loop->cf_node;
+
+   exec_list_validate(&loop->body);
+   foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) {
+      validate_cf_node(cf_node, state);
+   }
+
+   state->parent_node = old_parent;
+}
+
+static void
+validate_cf_node(nir_cf_node *node, validate_state *state)
+{
+   assert(node->parent == state->parent_node);
+
+   switch (node->type) {
+   case nir_cf_node_block:
+      validate_block(nir_cf_node_as_block(node), state);
+      break;
+
+   case nir_cf_node_if:
+      validate_if(nir_cf_node_as_if(node), state);
+      break;
+
+   case nir_cf_node_loop:
+      validate_loop(nir_cf_node_as_loop(node), state);
+      break;
+
+   default:
+      assert(!"Invalid ALU instruction type");
+      break;
+   }
+}
+
+static void
+prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state)
+{
+   assert(reg->is_global == is_global);
+
+   if (is_global)
+      assert(reg->index < state->shader->reg_alloc);
+   else
+      assert(reg->index < state->impl->reg_alloc);
+   assert(!BITSET_TEST(state->regs_found, reg->index));
+   BITSET_SET(state->regs_found, reg->index);
+
+   reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state);
+   reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+   reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+
+   reg_state->where_defined = is_global ? NULL : state->impl;
+
+   _mesa_hash_table_insert(state->regs, reg, reg_state);
+}
+
+static void
+postvalidate_reg_decl(nir_register *reg, validate_state *state)
+{
+   struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg);
+
+   reg_validate_state *reg_state = (reg_validate_state *) entry->data;
+
+   if (reg_state->uses->entries != reg->uses->entries) {
+      printf("extra entries in register uses:\n");
+      struct set_entry *entry;
+      set_foreach(reg->uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (reg_state->if_uses->entries != reg->if_uses->entries) {
+      printf("extra entries in register if_uses:\n");
+      struct set_entry *entry;
+      set_foreach(reg->if_uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->if_uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (reg_state->defs->entries != reg->defs->entries) {
+      printf("extra entries in register defs:\n");
+      struct set_entry *entry;
+      set_foreach(reg->defs, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(reg_state->defs, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+}
+
+static void
+validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
+{
+   assert(is_global != (var->data.mode == nir_var_local));
+
+   /*
+    * TODO validate some things ir_validate.cpp does (requires more GLSL type
+    * support)
+    */
+
+   if (!is_global) {
+      _mesa_hash_table_insert(state->var_defs, var, state->impl);
+   }
+}
+
+static bool
+postvalidate_ssa_def(nir_ssa_def *def, void *void_state)
+{
+   validate_state *state = void_state;
+
+   struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def);
+   ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data;
+
+   if (def_state->uses->entries != def->uses->entries) {
+      printf("extra entries in SSA def uses:\n");
+      struct set_entry *entry;
+      set_foreach(def->uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(def_state->uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   if (def_state->if_uses->entries != def->if_uses->entries) {
+      printf("extra entries in SSA def uses:\n");
+      struct set_entry *entry;
+      set_foreach(def->if_uses, entry) {
+         struct set_entry *entry2 =
+            _mesa_set_search(def_state->if_uses, entry->key);
+
+         if (entry2 == NULL) {
+            printf("%p\n", entry->key);
+         }
+      }
+
+      abort();
+   }
+
+   return true;
+}
+
+static bool
+postvalidate_ssa_defs_block(nir_block *block, void *state)
+{
+   nir_foreach_instr(block, instr)
+      nir_foreach_ssa_def(instr, postvalidate_ssa_def, state);
+
+   return true;
+}
+
+static void
+validate_function_impl(nir_function_impl *impl, validate_state *state)
+{
+   assert(impl->overload->impl == impl);
+   assert(impl->cf_node.parent == NULL);
+
+   assert(impl->num_params == impl->overload->num_params);
+   for (unsigned i = 0; i < impl->num_params; i++)
+      assert(impl->params[i]->type == impl->overload->params[i].type);
+
+   if (glsl_type_is_void(impl->overload->return_type))
+      assert(impl->return_var == NULL);
+   else
+      assert(impl->return_var->type == impl->overload->return_type);
+
+   assert(exec_list_is_empty(&impl->end_block->instr_list));
+   assert(impl->end_block->successors[0] == NULL);
+   assert(impl->end_block->successors[1] == NULL);
+
+   state->impl = impl;
+   state->parent_node = &impl->cf_node;
+
+   exec_list_validate(&impl->locals);
+   foreach_list_typed(nir_variable, var, node, &impl->locals) {
+      validate_var_decl(var, false, state);
+   }
+
+   state->regs_found = realloc(state->regs_found,
+                               BITSET_WORDS(impl->reg_alloc) *
+                               sizeof(BITSET_WORD));
+   memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) *
+                                sizeof(BITSET_WORD));
+   exec_list_validate(&impl->registers);
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      prevalidate_reg_decl(reg, false, state);
+   }
+
+   state->ssa_defs_found = realloc(state->ssa_defs_found,
+                                   BITSET_WORDS(impl->ssa_alloc) *
+                                   sizeof(BITSET_WORD));
+   memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) *
+                                    sizeof(BITSET_WORD));
+   exec_list_validate(&impl->body);
+   foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+      validate_cf_node(node, state);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &impl->registers) {
+      postvalidate_reg_decl(reg, state);
+   }
+
+   nir_foreach_block(impl, postvalidate_ssa_defs_block, state);
+}
+
+static void
+validate_function_overload(nir_function_overload *overload,
+                           validate_state *state)
+{
+   if (overload->impl != NULL)
+      validate_function_impl(overload->impl, state);
+}
+
+static void
+validate_function(nir_function *func, validate_state *state)
+{
+   exec_list_validate(&func->overload_list);
+   foreach_list_typed(nir_function_overload, overload, node, &func->overload_list) {
+      assert(overload->function == func);
+      validate_function_overload(overload, state);
+   }
+}
+
+static void
+init_validate_state(validate_state *state)
+{
+   state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
+   state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+   state->ssa_defs_found = NULL;
+   state->regs_found = NULL;
+   state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                             _mesa_key_pointer_equal);
+}
+
+static void
+destroy_validate_state(validate_state *state)
+{
+   _mesa_hash_table_destroy(state->regs, NULL);
+   _mesa_hash_table_destroy(state->ssa_defs, NULL);
+   free(state->ssa_defs_found);
+   free(state->regs_found);
+   _mesa_hash_table_destroy(state->var_defs, NULL);
+}
+
+void
+nir_validate_shader(nir_shader *shader)
+{
+   validate_state state;
+   init_validate_state(&state);
+
+   state.shader = shader;
+
+   struct hash_entry *entry;
+   hash_table_foreach(shader->uniforms, entry) {
+      validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   hash_table_foreach(shader->inputs, entry) {
+     validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   hash_table_foreach(shader->outputs, entry) {
+      validate_var_decl((nir_variable *) entry->data, true, &state);
+   }
+
+   exec_list_validate(&shader->globals);
+   foreach_list_typed(nir_variable, var, node, &shader->globals) {
+     validate_var_decl(var, true, &state);
+   }
+
+   exec_list_validate(&shader->system_values);
+   foreach_list_typed(nir_variable, var, node, &shader->system_values) {
+     validate_var_decl(var, true, &state);
+   }
+
+   state.regs_found = realloc(state.regs_found,
+                              BITSET_WORDS(shader->reg_alloc) *
+                              sizeof(BITSET_WORD));
+   memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) *
+                               sizeof(BITSET_WORD));
+   exec_list_validate(&shader->registers);
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      prevalidate_reg_decl(reg, true, &state);
+   }
+
+   exec_list_validate(&shader->functions);
+   foreach_list_typed(nir_function, func, node, &shader->functions) {
+      validate_function(func, &state);
+   }
+
+   foreach_list_typed(nir_register, reg, node, &shader->registers) {
+      postvalidate_reg_decl(reg, &state);
+   }
+
+   destroy_validate_state(&state);
+}
+
+#endif /* NDEBUG */
diff --git a/mesalib/src/glsl/nir/nir_worklist.c b/mesalib/src/glsl/nir/nir_worklist.c
new file mode 100644
index 000000000..a8baae937
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_worklist.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#include "nir_worklist.h"
+
+void
+nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+                        void *mem_ctx)
+{
+   w->size = num_blocks;
+   w->count = 0;
+   w->start = 0;
+
+   w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD,
+                                     BITSET_WORDS(num_blocks));
+   w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks);
+}
+
+void
+nir_block_worklist_fini(nir_block_worklist *w)
+{
+   ralloc_free(w->blocks_present);
+   ralloc_free(w->blocks);
+}
+
+static bool
+worklist_add_block(nir_block *block, void *w)
+{
+   nir_block_worklist_push_tail(w, block);
+
+   return true;
+}
+
+void
+nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl)
+{
+   nir_foreach_block(impl, worklist_add_block, w);
+}
+
+void
+nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block)
+{
+   /* Pushing a block we already have is a no-op */
+   if (BITSET_TEST(w->blocks_present, block->index))
+      return;
+
+   assert(w->count < w->size);
+
+   if (w->start == 0)
+      w->start = w->size - 1;
+   else
+      w->start--;
+
+   w->count++;
+
+   w->blocks[w->start] = block;
+   BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_head(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   return w->blocks[w->start];
+}
+
+nir_block *
+nir_block_worklist_pop_head(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned head = w->start;
+
+   w->start = (w->start + 1) % w->size;
+   w->count--;
+
+   BITSET_CLEAR(w->blocks_present, w->blocks[head]->index);
+   return w->blocks[head];
+}
+
+void
+nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block)
+{
+   /* Pushing a block we already have is a no-op */
+   if (BITSET_TEST(w->blocks_present, block->index))
+      return;
+
+   assert(w->count < w->size);
+
+   w->count++;
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   w->blocks[tail] = block;
+   BITSET_SET(w->blocks_present, block->index);
+}
+
+nir_block *
+nir_block_worklist_peek_tail(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   return w->blocks[tail];
+}
+
+nir_block *
+nir_block_worklist_pop_tail(nir_block_worklist *w)
+{
+   assert(w->count > 0);
+
+   unsigned tail = w->start = (w->start + w->count - 1) % w->size;
+
+   w->count--;
+
+   BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index);
+   return w->blocks[tail];
+}
diff --git a/mesalib/src/glsl/nir/nir_worklist.h b/mesalib/src/glsl/nir/nir_worklist.h
new file mode 100644
index 000000000..d5a8568e4
--- /dev/null
+++ b/mesalib/src/glsl/nir/nir_worklist.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
+#pragma once
+
+#ifndef _NIR_WORKLIST_
+#define _NIR_WORKLIST_
+
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Represents a double-ended queue of unique blocks
+ *
+ * The worklist datastructure guarantees that eacy block is in the queue at
+ * most once.  Pushing a block onto either end of the queue is a no-op if
+ * the block is already in the queue.  In order for this to work, the
+ * caller must ensure that the blocks are properly indexed.
+ */
+typedef struct {
+   /* The total size of the worklist */
+   unsigned size;
+
+   /* The number of blocks currently in the worklist */
+   unsigned count;
+
+   /* The offset in the array of blocks at which the list starts */
+   unsigned start;
+
+   /* A bitset of all of the blocks currently present in the worklist */
+   BITSET_WORD *blocks_present;
+
+   /* The actual worklist */
+   nir_block **blocks;
+} nir_block_worklist;
+
+void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks,
+                             void *mem_ctx);
+void nir_block_worklist_fini(nir_block_worklist *w);
+
+void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl);
+
+static inline bool
+nir_block_worklist_is_empty(const nir_block_worklist *w)
+{
+   return w->count == 0;
+}
+
+void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_head(nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_head(nir_block_worklist *w);
+
+void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block);
+
+nir_block *nir_block_worklist_peek_tail(nir_block_worklist *w);
+
+nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _NIR_WORKLIST_ */
diff --git a/mesalib/src/glsl/opt_algebraic.cpp b/mesalib/src/glsl/opt_algebraic.cpp
index c6f4a9c78..6784242ff 100644
--- a/mesalib/src/glsl/opt_algebraic.cpp
+++ b/mesalib/src/glsl/opt_algebraic.cpp
@@ -119,6 +119,8 @@ is_valid_vec_const(ir_constant *ir)
 static inline bool
 is_less_than_one(ir_constant *ir)
 {
+   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
    if (!is_valid_vec_const(ir))
       return false;
 
@@ -134,6 +136,8 @@ is_less_than_one(ir_constant *ir)
 static inline bool
 is_greater_than_zero(ir_constant *ir)
 {
+   assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
    if (!is_valid_vec_const(ir))
       return false;
 
@@ -376,6 +380,15 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       }
       break;
 
+   case ir_unop_f2i:
+   case ir_unop_f2u:
+      if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
+         return new(mem_ctx) ir_expression(ir->operation,
+                                           ir->type,
+                                           op_expr[0]->operands[0]);
+      }
+      break;
+
    case ir_unop_logic_not: {
       enum ir_expression_operation new_op = ir_unop_logic_not;
 
@@ -514,10 +527,45 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (op_const[1] && !op_const[0])
 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
 
+      /* Optimizes
+       *
+       *    (mul (floor (add (abs x) 0.5) (sign x)))
+       *
+       * into
+       *
+       *    (trunc (add x (mul (sign x) 0.5)))
+       */
+      for (int i = 0; i < 2; i++) {
+         ir_expression *sign_expr = ir->operands[i]->as_expression();
+         ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
+
+         if (!sign_expr || sign_expr->operation != ir_unop_sign ||
+             !floor_expr || floor_expr->operation != ir_unop_floor)
+            continue;
+
+         ir_expression *add_expr = floor_expr->operands[0]->as_expression();
+
+         for (int j = 0; j < 2; j++) {
+            ir_expression *abs_expr = add_expr->operands[j]->as_expression();
+            if (!abs_expr || abs_expr->operation != ir_unop_abs)
+               continue;
+
+            ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
+            if (!point_five->is_value(0.5, 0))
+               continue;
+
+            if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
+               return trunc(add(abs_expr->operands[0],
+                                mul(sign_expr, point_five)));
+            }
+         }
+      }
       break;
 
    case ir_binop_div:
-      if (is_vec_one(op_const[0]) && ir->type->base_type == GLSL_TYPE_FLOAT) {
+      if (is_vec_one(op_const[0]) && (
+                ir->type->base_type == GLSL_TYPE_FLOAT ||
+                ir->type->base_type == GLSL_TYPE_DOUBLE)) {
 	 return new(mem_ctx) ir_expression(ir_unop_rcp,
 					   ir->operands[1]->type,
 					   ir->operands[1],
@@ -538,7 +586,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
          unsigned components[4] = { 0 }, count = 0;
 
          for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
-            if (op_const[i]->value.f[c] == 0.0)
+            if (op_const[i]->is_zero())
                continue;
 
             components[count] = c;
@@ -554,7 +602,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
 
          /* Swizzle both operands to remove the channels that were zero. */
          return new(mem_ctx)
-            ir_expression(op, glsl_type::float_type,
+            ir_expression(op, ir->type,
                           new(mem_ctx) ir_swizzle(ir->operands[0],
                                                   components, count),
                           new(mem_ctx) ir_swizzle(ir->operands[1],
@@ -747,6 +795,12 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
       if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
 	 return op_expr[0]->operands[0];
 
+      if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
+                         op_expr[0]->operation == ir_unop_exp)) {
+         return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
+                                           neg(op_expr[0]->operands[0]));
+      }
+
       /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
        * its IR level, so we can always apply this transformation.
        */
@@ -785,7 +839,19 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
          return mul(ir->operands[1], ir->operands[2]);
       } else if (is_vec_zero(op_const[1])) {
          unsigned op2_components = ir->operands[2]->type->vector_elements;
-         ir_constant *one = new(mem_ctx) ir_constant(1.0f, op2_components);
+         ir_constant *one;
+
+         switch (ir->type->base_type) {
+         case GLSL_TYPE_FLOAT:
+            one = new(mem_ctx) ir_constant(1.0f, op2_components);
+            break;
+         case GLSL_TYPE_DOUBLE:
+            one = new(mem_ctx) ir_constant(1.0, op2_components);
+            break;
+         default:
+            unreachable("unexpected type");
+         }
+
          return mul(ir->operands[0], add(one, neg(ir->operands[2])));
       }
       break;
diff --git a/mesalib/src/glsl/opt_constant_propagation.cpp b/mesalib/src/glsl/opt_constant_propagation.cpp
index c334e1276..90cc0c89b 100644
--- a/mesalib/src/glsl/opt_constant_propagation.cpp
+++ b/mesalib/src/glsl/opt_constant_propagation.cpp
@@ -194,6 +194,9 @@ ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue)
       case GLSL_TYPE_FLOAT:
 	 data.f[i] = found->constant->value.f[rhs_channel];
 	 break;
+      case GLSL_TYPE_DOUBLE:
+	 data.d[i] = found->constant->value.d[rhs_channel];
+	 break;
       case GLSL_TYPE_INT:
 	 data.i[i] = found->constant->value.i[rhs_channel];
 	 break;
diff --git a/mesalib/src/glsl/opt_copy_propagation.cpp b/mesalib/src/glsl/opt_copy_propagation.cpp
index 5c65af66b..806027b28 100644
--- a/mesalib/src/glsl/opt_copy_propagation.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation.cpp
@@ -128,6 +128,9 @@ ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir)
 
    visit_list_elements(this, &ir->body);
 
+   ralloc_free(this->acp);
+   ralloc_free(this->kills);
+
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = orig_killed_all;
@@ -215,7 +218,7 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 
    /* Populate the initial acp with a copy of the original */
    foreach_in_list(acp_entry, a, orig_acp) {
-      this->acp->push_tail(new(this->mem_ctx) acp_entry(a->lhs, a->rhs));
+      this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs));
    }
 
    visit_list_elements(this, instructions);
@@ -226,12 +229,15 @@ ir_copy_propagation_visitor::handle_if_block(exec_list *instructions)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
    foreach_in_list(kill_entry, k, new_kills) {
       kill(k->var);
    }
+
+   ralloc_free(new_kills);
 }
 
 ir_visitor_status
@@ -269,6 +275,7 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -276,6 +283,8 @@ ir_copy_propagation_visitor::visit_enter(ir_loop *ir)
       kill(k->var);
    }
 
+   ralloc_free(new_kills);
+
    /* already descended into the children. */
    return visit_continue_with_parent;
 }
@@ -294,7 +303,7 @@ ir_copy_propagation_visitor::kill(ir_variable *var)
 
    /* Add the LHS variable to the list of killed variables in this block.
     */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var));
+   this->kills->push_tail(new(this->kills) kill_entry(var));
 }
 
 /**
@@ -322,7 +331,7 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
 	 ir->condition = new(ralloc_parent(ir)) ir_constant(false);
 	 this->progress = true;
       } else {
-	 entry = new(this->mem_ctx) acp_entry(lhs_var, rhs_var);
+	 entry = new(this->acp) acp_entry(lhs_var, rhs_var);
 	 this->acp->push_tail(entry);
       }
    }
diff --git a/mesalib/src/glsl/opt_copy_propagation_elements.cpp b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
index c3e55bcd1..353a5c668 100644
--- a/mesalib/src/glsl/opt_copy_propagation_elements.cpp
+++ b/mesalib/src/glsl/opt_copy_propagation_elements.cpp
@@ -156,6 +156,9 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
 
    visit_list_elements(this, &ir->body);
 
+   ralloc_free(this->acp);
+   ralloc_free(this->kills);
+
    this->kills = orig_kills;
    this->acp = orig_acp;
    this->killed_all = orig_killed_all;
@@ -173,9 +176,9 @@ ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
       kill_entry *k;
 
       if (lhs)
-	 k = new(mem_ctx) kill_entry(var, ir->write_mask);
+	 k = new(this->kills) kill_entry(var, ir->write_mask);
       else
-	 k = new(mem_ctx) kill_entry(var, ~0);
+	 k = new(this->kills) kill_entry(var, ~0);
 
       kill(k);
    }
@@ -334,7 +337,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
 
    /* Populate the initial acp with a copy of the original */
    foreach_in_list(acp_entry, a, orig_acp) {
-      this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
+      this->acp->push_tail(new(this->acp) acp_entry(a));
    }
 
    visit_list_elements(this, instructions);
@@ -345,6 +348,7 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -354,6 +358,8 @@ ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
    foreach_in_list_safe(kill_entry, k, new_kills) {
       kill(k);
    }
+
+   ralloc_free(new_kills);
 }
 
 ir_visitor_status
@@ -391,6 +397,7 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
 
    exec_list *new_kills = this->kills;
    this->kills = orig_kills;
+   ralloc_free(this->acp);
    this->acp = orig_acp;
    this->killed_all = this->killed_all || orig_killed_all;
 
@@ -398,6 +405,8 @@ ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
       kill(k);
    }
 
+   ralloc_free(new_kills);
+
    /* already descended into the children. */
    return visit_continue_with_parent;
 }
@@ -423,6 +432,7 @@ ir_copy_propagation_elements_visitor::kill(kill_entry *k)
    if (k->next)
       k->remove();
 
+   ralloc_steal(this->kills, k);
    this->kills->push_tail(k);
 }
 
diff --git a/mesalib/src/glsl/opt_dead_builtin_variables.cpp b/mesalib/src/glsl/opt_dead_builtin_variables.cpp
index 85c75d6f2..0d4e3a8f0 100644
--- a/mesalib/src/glsl/opt_dead_builtin_variables.cpp
+++ b/mesalib/src/glsl/opt_dead_builtin_variables.cpp
@@ -52,7 +52,7 @@ optimize_dead_builtin_variables(exec_list *instructions,
           && var->data.how_declared != ir_var_declared_implicitly)
          continue;
 
-      if (strncmp(var->name, "gl_", 3) != 0)
+      if (!is_gl_identifier(var->name))
          continue;
 
       /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they
diff --git a/mesalib/src/glsl/opt_minmax.cpp b/mesalib/src/glsl/opt_minmax.cpp
index 32fb2d7ea..23d0b109d 100644
--- a/mesalib/src/glsl/opt_minmax.cpp
+++ b/mesalib/src/glsl/opt_minmax.cpp
@@ -133,6 +133,14 @@ compare_components(ir_constant *a, ir_constant *b)
          else
             foundequal = true;
          break;
+      case GLSL_TYPE_DOUBLE:
+         if (a->value.d[c0] < b->value.d[c1])
+            foundless = true;
+         else if (a->value.d[c0] > b->value.d[c1])
+            foundgreater = true;
+         else
+            foundequal = true;
+         break;
       default:
          unreachable("not reached");
       }
@@ -178,6 +186,11 @@ combine_constant(bool ismin, ir_constant *a, ir_constant *b)
              (!ismin && b->value.f[i] > c->value.f[i]))
             c->value.f[i] = b->value.f[i];
          break;
+      case GLSL_TYPE_DOUBLE:
+         if ((ismin && b->value.d[i] < c->value.d[i]) ||
+             (!ismin && b->value.d[i] > c->value.d[i]))
+            c->value.d[i] = b->value.d[i];
+         break;
       default:
          assert(!"not reached");
       }
diff --git a/mesalib/src/glsl/standalone_scaffolding.cpp b/mesalib/src/glsl/standalone_scaffolding.cpp
index 67b0d0c82..ad0d75bf8 100644
--- a/mesalib/src/glsl/standalone_scaffolding.cpp
+++ b/mesalib/src/glsl/standalone_scaffolding.cpp
@@ -127,6 +127,7 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
    ctx->Extensions.ARB_fragment_coord_conventions = true;
    ctx->Extensions.ARB_fragment_layer_viewport = true;
    ctx->Extensions.ARB_gpu_shader5 = true;
+   ctx->Extensions.ARB_gpu_shader_fp64 = true;
    ctx->Extensions.ARB_sample_shading = true;
    ctx->Extensions.ARB_shader_bit_encoding = true;
    ctx->Extensions.ARB_shader_stencil_export = true;
diff --git a/mesalib/src/loader/Makefile.am b/mesalib/src/loader/Makefile.am
index c59663681..36ddba82b 100644
--- a/mesalib/src/loader/Makefile.am
+++ b/mesalib/src/loader/Makefile.am
@@ -35,7 +35,6 @@ libloader_la_CPPFLAGS = \
 libloader_la_SOURCES = $(LOADER_C_FILES)
 libloader_la_LIBADD =
 
-if NEED_OPENGL_COMMON
 if HAVE_DRICOMMON
 libloader_la_CPPFLAGS += \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
@@ -52,7 +51,6 @@ libloader_la_LIBADD += \
 	-lm \
 	$(EXPAT_LIBS)
 endif
-endif
 
 if !HAVE_LIBDRM
 libloader_la_CPPFLAGS += \
diff --git a/mesalib/src/loader/loader.c b/mesalib/src/loader/loader.c
index 94c993ac8..9ff511522 100644
--- a/mesalib/src/loader/loader.c
+++ b/mesalib/src/loader/loader.c
@@ -207,9 +207,12 @@ libudev_get_pci_id_for_fd(int fd, int *vendor_id, int *chip_id)
    }
 
    pci_id = udev_device_get_property_value(parent, "PCI_ID");
-   if (pci_id == NULL ||
-       sscanf(pci_id, "%x:%x", vendor_id, chip_id) != 2) {
-      log_(_LOADER_WARNING, "MESA-LOADER: malformed or no PCI ID\n");
+   if (pci_id == NULL) {
+      log_(_LOADER_INFO, "MESA-LOADER: no PCI ID\n");
+      *chip_id = -1;
+      goto out;
+   } else if (sscanf(pci_id, "%x:%x", vendor_id, chip_id) != 2) {
+      log_(_LOADER_WARNING, "MESA-LOADER: malformed PCI ID\n");
       *chip_id = -1;
       goto out;
    }
diff --git a/mesalib/src/mapi/Makefile.am b/mesalib/src/mapi/Makefile.am
index 024283c55..679468291 100644
--- a/mesalib/src/mapi/Makefile.am
+++ b/mesalib/src/mapi/Makefile.am
@@ -19,11 +19,11 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+AUTOMAKE_OPTIONS = subdir-objects
+
 SUBDIRS =
 TESTS =
 
-TOP = $(top_srcdir)
-
 BUILT_SOURCES =
 CLEANFILES = $(BUILT_SOURCES)
 
@@ -47,10 +47,20 @@ AM_CPPFLAGS =							\
 	-I$(top_srcdir)/src/mapi				\
 	-I$(top_builddir)/src/mapi
 
-GLAPI = $(top_srcdir)/src/mapi/glapi
 include Makefile.sources
-include glapi/Makefile.sources
-include glapi/gen/glapi_gen.mk
+
+glapi_gen_mapi_deps := \
+	mapi_abi.py \
+	$(wildcard glapi/gen/*.xml) \
+	$(wildcard glapi/gen/*.py)
+
+# $(1): path to an XML file
+# $(2): name of the printer
+define glapi_gen_mapi
+@$(MKDIR_P) $(dir $@)
+$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
+	--mode lib --printer $(2) $(1) > $@
+endef
 
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h
@@ -81,7 +91,7 @@ shared_glapi_test_LDADD = \
 	$(top_builddir)/src/gtest/libgtest.la
 endif
 
-shared-glapi/glapi_mapi_tmp.h : $(GLAPI)/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
+shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 	$(call glapi_gen_mapi,$<,shared-glapi)
 
 if HAVE_OPENGL
@@ -89,16 +99,16 @@ noinst_LTLIBRARIES = glapi/libglapi.la
 
 if HAVE_X86_ASM
 if HAVE_X86_64_ASM
-GLAPI_ASM_SOURCES = $(X86_64_API)
+GLAPI_ASM_SOURCES = glapi/glapi_x86-64.S
 else
-GLAPI_ASM_SOURCES = $(X86_API)
+GLAPI_ASM_SOURCES = glapi/glapi_x86.S
 endif
 endif
 if HAVE_SPARC_ASM
-GLAPI_ASM_SOURCES = $(SPARC_API)
+GLAPI_ASM_SOURCES = glapi/glapi_sparc.S
 endif
 
-glapi_libglapi_la_SOURCES = $(GLAPI_UTIL_SOURCES)
+glapi_libglapi_la_SOURCES = glapi/glapi_gentable.c
 glapi_libglapi_la_CPPFLAGS = \
 	$(AM_CPPFLAGS) \
 	-I$(top_srcdir)/src/mapi/glapi \
@@ -113,7 +123,13 @@ else
 glapi_libglapi_la_CPPFLAGS += \
 	-DMAPI_MODE_UTIL
 glapi_libglapi_la_SOURCES += \
-	$(GLAPI_SOURCES) \
+	glapi/glapi_dispatch.c \
+	glapi/glapi_entrypoint.c \
+	glapi/glapi_getproc.c \
+	glapi/glapi_nop.c \
+	glapi/glapi.c \
+	glapi/glapi.h \
+	glapi/glapi_priv.h \
 	$(GLAPI_ASM_SOURCES) \
 	$(MAPI_UTIL_FILES)
 
@@ -214,8 +230,10 @@ endif
 es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 	$(call glapi_gen_mapi,$<,es2api)
 
-if HAVE_OPENVG
-SUBDIRS += vgapi
-endif
+# XXX: Inline vgapi's Makefile.am here.
+EXTRA_DIST += vgapi
+# if HAVE_OPENVG
+# SUBDIRS += vgapi
+# endif
 
 include $(top_srcdir)/install-lib-links.mk
diff --git a/mesalib/src/mapi/Makefile.sources b/mesalib/src/mapi/Makefile.sources
index 4ce1afb23..41dbb24c8 100644
--- a/mesalib/src/mapi/Makefile.sources
+++ b/mesalib/src/mapi/Makefile.sources
@@ -15,38 +15,38 @@
 #    this mode, compile MAPI_BRIDGE_FILES with MAPI_MODE_BRIDGE defined.
 
 MAPI_UTIL_FILES = \
-	$(TOP)/src/mapi/u_compiler.h \
-	$(TOP)/src/mapi/u_current.c \
-	$(TOP)/src/mapi/u_current.h \
-	$(TOP)/src/mapi/u_execmem.c \
-	$(TOP)/src/mapi/u_execmem.h \
-	$(TOP)/src/mapi/u_macros.h \
-	$(TOP)/src/mapi/u_thread.h
+	u_compiler.h \
+	u_current.c \
+	u_current.h \
+	u_execmem.c \
+	u_execmem.h \
+	u_macros.h \
+	u_thread.h
 
 MAPI_BRIDGE_FILES = \
-	$(TOP)/src/mapi/entry.c \
-	$(TOP)/src/mapi/entry.h \
-	$(TOP)/src/mapi/entry_x86-64_tls.h \
-	$(TOP)/src/mapi/entry_x86_tls.h \
-	$(TOP)/src/mapi/entry_x86_tsd.h \
-	$(TOP)/src/mapi/mapi_tmp.h
+	entry.c \
+	entry.h \
+	entry_x86-64_tls.h \
+	entry_x86_tls.h \
+	entry_x86_tsd.h \
+	mapi_tmp.h
 
 MAPI_FILES = \
-	$(TOP)/src/mapi/entry.c \
-	$(TOP)/src/mapi/mapi.c \
-	$(TOP)/src/mapi/mapi.h \
-	$(TOP)/src/mapi/stub.c \
-	$(TOP)/src/mapi/stub.h \
-	$(TOP)/src/mapi/table.c \
-	$(TOP)/src/mapi/table.h \
+	entry.c \
+	mapi.c \
+	mapi.h \
+	stub.c \
+	stub.h \
+	table.c \
+	table.h \
 	$(MAPI_UTIL_FILES)
 
 MAPI_GLAPI_FILES = \
-	$(TOP)/src/mapi/entry.c \
-	$(TOP)/src/mapi/mapi_glapi.c \
-	$(TOP)/src/mapi/stub.c \
-	$(TOP)/src/mapi/stub.h \
-	$(TOP)/src/mapi/table.c \
-	$(TOP)/src/mapi/table.h \
+	entry.c \
+	mapi_glapi.c \
+	stub.c \
+	stub.h \
+	table.c \
+	table.h \
 	$(MAPI_UTIL_FILES)
 
diff --git a/mesalib/src/mapi/glapi/Makefile.sources b/mesalib/src/mapi/glapi/Makefile.sources
deleted file mode 100644
index df149a7d8..000000000
--- a/mesalib/src/mapi/glapi/Makefile.sources
+++ /dev/null
@@ -1,22 +0,0 @@
-# src/mapi/glapi/Makefile.sources
-
-GLAPI_UTIL_SOURCES = \
-	$(top_builddir)/src/mapi/glapi/glapi_gentable.c
-
-GLAPI_SOURCES = \
-	$(top_srcdir)/src/mapi/glapi/glapi_dispatch.c \
-	$(top_srcdir)/src/mapi/glapi/glapi_entrypoint.c \
-	$(top_srcdir)/src/mapi/glapi/glapi_getproc.c \
-	$(top_srcdir)/src/mapi/glapi/glapi_nop.c \
-	$(top_srcdir)/src/mapi/glapi/glapi.c \
-	$(top_srcdir)/src/mapi/glapi/glapi.h \
-	$(top_srcdir)/src/mapi/glapi/glapi_priv.h
-
-X86_API =			\
-	$(top_builddir)/src/mapi/glapi/glapi_x86.S
-
-X86_64_API =			\
-	$(top_builddir)/src/mapi/glapi/glapi_x86-64.S
-
-SPARC_API =			\
-	$(top_builddir)/src/mapi/glapi/glapi_sparc.S
diff --git a/mesalib/src/mapi/glapi/gen/ARB_direct_state_access.xml b/mesalib/src/mapi/glapi/gen/ARB_direct_state_access.xml
new file mode 100644
index 000000000..2fe1638fd
--- /dev/null
+++ b/mesalib/src/mapi/glapi/gen/ARB_direct_state_access.xml
@@ -0,0 +1,271 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+<category name="GL_ARB_direct_state_access" number="164">
+   <enum name="TEXTURE_TARGET"  value="0x1006"/>
+   <enum name="QUERY_TARGET"    value="0x82EA"/>
+   <enum name="TEXTURE_BINDING" value="0x82EB"/>
+
+   <!-- Texture object functions -->
+
+   <function name="CreateTextures" offset="assign">
+      <param name="target" type="GLenum" />
+      <param name="n" type="GLsizei" />
+      <param name="textures" type="GLuint *" />
+   </function>
+
+   <function name="TextureBuffer" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="internalformat" type="GLenum" />
+      <param name="buffer" type="GLuint" />
+   </function>
+
+   <function name="TextureStorage1D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="levels" type="GLsizei" />
+      <param name="internalformat" type="GLenum" />
+      <param name="width" type="GLsizei" />
+   </function>
+
+   <function name="TextureStorage2D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="levels" type="GLsizei" />
+      <param name="internalformat" type="GLenum" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+   </function>
+
+   <function name="TextureStorage3D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="levels" type="GLsizei" />
+      <param name="internalformat" type="GLenum" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="depth" type="GLsizei" />
+   </function>
+
+   <function name="TextureStorage2DMultisample" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="samples" type="GLsizei" />
+      <param name="internalformat" type="GLenum" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="fixedsamplelocations" type="GLboolean" />
+   </function>
+
+   <function name="TextureStorage3DMultisample" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="samples" type="GLsizei" />
+      <param name="internalformat" type="GLenum" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="depth" type="GLsizei" />
+      <param name="fixedsamplelocations" type="GLboolean" />
+   </function>
+
+   <function name="TextureSubImage1D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="type" type="GLenum" />
+      <param name="pixels" type="const GLvoid *" />
+   </function>
+
+   <function name="TextureSubImage2D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="type" type="GLenum" />
+      <param name="pixels" type="const GLvoid *" />
+   </function>
+
+   <function name="TextureSubImage3D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="zoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="depth" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="type" type="GLenum" />
+      <param name="pixels" type="const GLvoid *" />
+   </function>
+
+   <function name="CompressedTextureSubImage1D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="imageSize" type="GLsizei" />
+      <param name="data" type="const GLvoid *" />
+   </function>
+
+   <function name="CompressedTextureSubImage2D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="imageSize" type="GLsizei" />
+      <param name="data" type="const GLvoid *" />
+   </function>
+
+   <function name="CompressedTextureSubImage3D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="zoffset" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+      <param name="depth" type="GLsizei" />
+      <param name="format" type="GLenum" />
+      <param name="imageSize" type="GLsizei" />
+      <param name="data" type="const GLvoid *" />
+   </function>
+
+   <function name="CopyTextureSubImage1D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="x" type="GLint" />
+      <param name="y" type="GLint" />
+      <param name="width" type="GLsizei" />
+   </function>
+
+   <function name="CopyTextureSubImage2D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="x" type="GLint" />
+      <param name="y" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+   </function>
+
+   <function name="CopyTextureSubImage3D" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="xoffset" type="GLint" />
+      <param name="yoffset" type="GLint" />
+      <param name="zoffset" type="GLint" />
+      <param name="x" type="GLint" />
+      <param name="y" type="GLint" />
+      <param name="width" type="GLsizei" />
+      <param name="height" type="GLsizei" />
+   </function>
+
+   <function name="TextureParameterf" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="param" type="GLfloat" />
+   </function>
+
+   <function name="TextureParameterfv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="param" type="const GLfloat *" />
+   </function>
+
+   <function name="TextureParameteri" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="param" type="GLint" />
+   </function>
+
+   <function name="TextureParameterIiv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="const GLint *" />
+   </function>
+
+   <function name="TextureParameterIuiv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="const GLuint *" />
+   </function>
+
+   <function name="TextureParameteriv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="param" type="const GLint *" />
+   </function>
+
+   <function name="GenerateTextureMipmap" offset="assign">
+      <param name="texture" type="GLuint" />
+   </function>
+
+   <function name="BindTextureUnit" offset="assign">
+      <param name="unit" type="GLuint" />
+      <param name="texture" type="GLuint" />
+   </function>
+
+   <function name="GetTextureImage" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="format" type="GLenum" />
+      <param name="type" type="GLenum" />
+      <param name="bufSize" type="GLsizei" />
+      <param name="pixels" type="GLvoid *" />
+   </function>
+
+   <function name="GetCompressedTextureImage" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="bufSize" type="GLsizei" />
+      <param name="pixels" type="GLvoid *" />
+   </function>
+
+   <function name="GetTextureLevelParameterfv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLfloat *" />
+   </function>
+
+   <function name="GetTextureLevelParameteriv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="level" type="GLint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLint *" />
+   </function>
+
+   <function name="GetTextureParameterfv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLfloat *" />
+   </function>
+
+   <function name="GetTextureParameterIiv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLint *" />
+   </function>
+
+   <function name="GetTextureParameterIuiv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLuint *" />
+   </function>
+
+   <function name="GetTextureParameteriv" offset="assign">
+      <param name="texture" type="GLuint" />
+      <param name="pname" type="GLenum" />
+      <param name="params" type="GLint *" />
+   </function>
+
+</category>
+</OpenGLAPI>
diff --git a/mesalib/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml b/mesalib/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml
new file mode 100644
index 000000000..4f860ef8c
--- /dev/null
+++ b/mesalib/src/mapi/glapi/gen/ARB_gpu_shader_fp64.xml
@@ -0,0 +1,143 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<OpenGLAPI>
+
+<category name="GL_ARB_gpu_shader_fp64" number="89">
+
+    <function name="Uniform1d" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="x" type="GLdouble"/>
+    </function>
+
+    <function name="Uniform2d" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="x" type="GLdouble"/>
+        <param name="y" type="GLdouble"/>
+    </function>
+
+    <function name="Uniform3d" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="x" type="GLdouble"/>
+        <param name="y" type="GLdouble"/>
+        <param name="z" type="GLdouble"/>
+    </function>
+
+    <function name="Uniform4d" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="x" type="GLdouble"/>
+        <param name="y" type="GLdouble"/>
+        <param name="z" type="GLdouble"/>
+        <param name="w" type="GLdouble"/>
+    </function>
+
+    <function name="Uniform1dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="Uniform2dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="Uniform3dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="Uniform4dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix2dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix3dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix4dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix2x3dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix2x4dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix3x2dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix3x4dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix4x2dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="UniformMatrix4x3dv" offset="assign">
+        <param name="location" type="GLint"/>
+        <param name="count" type="GLsizei"/>
+        <param name="transpose" type="GLboolean"/>
+        <param name="value" type="const GLdouble *"/>
+    </function>
+
+    <function name="GetUniformdv" offset="assign">
+        <param name="program" type="GLuint"/>
+        <param name="location" type="GLint"/>
+        <param name="params" type="GLdouble *"/>
+    </function>
+
+    <enum name="DOUBLE_VEC2"   value="0x8FFC"/>
+    <enum name="DOUBLE_VEC3"   value="0x8FFD"/>
+    <enum name="DOUBLE_VEC4"   value="0x8FFE"/>
+
+    <enum name="DOUBLE_MAT2"   value="0x8F46"/>
+    <enum name="DOUBLE_MAT3"   value="0x8F47"/>
+    <enum name="DOUBLE_MAT4"   value="0x8F48"/>
+    <enum name="DOUBLE_MAT2x3"   value="0x8F49"/>
+    <enum name="DOUBLE_MAT2x4"   value="0x8F4A"/>
+    <enum name="DOUBLE_MAT3x2"   value="0x8F4B"/>
+    <enum name="DOUBLE_MAT3x4"   value="0x8F4C"/>
+    <enum name="DOUBLE_MAT4x2"   value="0x8F4D"/>
+    <enum name="DOUBLE_MAT4x3"   value="0x8F4E"/>
+</category>
+
+</OpenGLAPI>
+
diff --git a/mesalib/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml b/mesalib/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml
new file mode 100644
index 000000000..5e8511783
--- /dev/null
+++ b/mesalib/src/mapi/glapi/gen/ARB_pipeline_statistics_query.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<!DOCTYPE OpenGLAPI SYSTEM "gl_API.dtd">
+
+<!-- Note: no GLX protocol info yet. -->
+
+<OpenGLAPI>
+
+<category name="GL_ARB_pipeline_statistics_query" number="171">
+
+  <enum name="VERTICES_SUBMITTED_ARB"                      value="0x82EE"/>
+  <enum name="PRIMITIVES_SUBMITTED_ARB"                    value="0x82EF"/>
+  <enum name="VERTEX_SHADER_INVOCATIONS_ARB"               value="0x82F0"/>
+  <enum name="TESS_CONTROL_SHADER_PATCHES_ARB"             value="0x82F1"/>
+  <enum name="TESS_EVALUATION_SHADER_INVOCATIONS_ARB"      value="0x82F2"/>
+  <!-- <enum name="GEOMETRY_SHADER_INVOCATIONS"             value="0x887F"/> -->
+  <enum name="GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB"      value="0x82F3"/>
+  <enum name="FRAGMENT_SHADER_INVOCATIONS_ARB"             value="0x82F4"/>
+  <enum name="COMPUTE_SHADER_INVOCATIONS_ARB"              value="0x82F5"/>
+  <enum name="CLIPPING_INPUT_PRIMITIVES_ARB"               value="0x82F6"/>
+  <enum name="CLIPPING_OUTPUT_PRIMITIVES_ARB"              value="0x82F7"/>
+
+</category>
+
+</OpenGLAPI>
diff --git a/mesalib/src/mapi/glapi/gen/ARB_separate_shader_objects.xml b/mesalib/src/mapi/glapi/gen/ARB_separate_shader_objects.xml
index d006917af..96ae2b9cb 100644
--- a/mesalib/src/mapi/glapi/gen/ARB_separate_shader_objects.xml
+++ b/mesalib/src/mapi/glapi/gen/ARB_separate_shader_objects.xml
@@ -282,7 +282,6 @@
          <param name="infoLog" type="GLchar *" />
       </function>
 
-      <!-- depends on GL_ARB_gpu_shader_fp64
       <function name="ProgramUniform1d" offset="assign" static_dispatch="false">
          <param name="program" type="GLuint" />
          <param name="location" type="GLint" />
@@ -396,6 +395,5 @@
          <param name="count" type="GLsizei" />
          <param name="value" type="const GLdouble *" />
       </function>
-      -->
    </category>
 </OpenGLAPI>
diff --git a/mesalib/src/mapi/glapi/gen/Makefile.am b/mesalib/src/mapi/glapi/gen/Makefile.am
index e56b46e3d..1c4b86aab 100644
--- a/mesalib/src/mapi/glapi/gen/Makefile.am
+++ b/mesalib/src/mapi/glapi/gen/Makefile.am
@@ -121,6 +121,7 @@ API_XML = \
 	ARB_debug_output.xml \
 	ARB_depth_buffer_float.xml \
 	ARB_depth_clamp.xml \
+	ARB_direct_state_access.xml \
 	ARB_draw_buffers.xml \
 	ARB_draw_buffers_blend.xml \
 	ARB_draw_elements_base_vertex.xml \
@@ -131,12 +132,14 @@ API_XML = \
 	ARB_framebuffer_object.xml \
 	ARB_geometry_shader4.xml \
 	ARB_get_program_binary.xml \
+	ARB_gpu_shader_fp64.xml \
 	ARB_gpu_shader5.xml \
 	ARB_instanced_arrays.xml \
 	ARB_internalformat_query.xml \
 	ARB_invalidate_subdata.xml \
 	ARB_map_buffer_range.xml \
 	ARB_multi_bind.xml \
+	ARB_pipeline_statistics_query.xml \
 	ARB_robustness.xml \
 	ARB_sample_shading.xml \
 	ARB_sampler_objects.xml \
diff --git a/mesalib/src/mapi/glapi/gen/es_EXT.xml b/mesalib/src/mapi/glapi/gen/es_EXT.xml
index e2dc39021..3a2adeb04 100644
--- a/mesalib/src/mapi/glapi/gen/es_EXT.xml
+++ b/mesalib/src/mapi/glapi/gen/es_EXT.xml
@@ -837,4 +837,13 @@
     </function>
 </category>
 
+<!-- 151. GL_EXT_draw_buffers -->
+<category name="GL_EXT_draw_buffers" number="151">
+    <function name="DrawBuffersEXT" alias="DrawBuffers"
+              static_dispatch="false" es2="2.0">
+        <param name="n" type="GLsizei" counter="true"/>
+        <param name="bufs" type="const GLenum *" count="n"/>
+    </function>
+</category>
+
 </OpenGLAPI>
diff --git a/mesalib/src/mapi/glapi/gen/gl_API.xml b/mesalib/src/mapi/glapi/gen/gl_API.xml
index e1b12462e..1ceb60a0f 100644
--- a/mesalib/src/mapi/glapi/gen/gl_API.xml
+++ b/mesalib/src/mapi/glapi/gen/gl_API.xml
@@ -8213,6 +8213,8 @@
 
 <xi:include href="ARB_gpu_shader5.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
+<xi:include href="ARB_gpu_shader_fp64.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
 <category name="GL_ARB_transform_feedback3" number="94">
   <enum name="MAX_TRANSFORM_FEEDBACK_BUFFERS" value="0x8E70"/>
   <enum name="MAX_VERTEX_STREAMS"             value="0x8E71"/>
@@ -8247,7 +8249,11 @@
 
 <xi:include href="ARB_separate_shader_objects.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
-<!-- ARB extensions #98...#108 -->
+<category name="GL_ARB_shader_precision" number="98">
+      <!-- No new functions, types, enums. -->
+</category>
+
+<!-- ARB extensions #99...#108 -->
 
 <xi:include href="ARB_ES2_compatibility.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
@@ -8375,12 +8381,19 @@
     <enum name="QUERY_BY_REGION_NO_WAIT_INVERTED"         value="0x8E1A"/>
 </category>
 
-<!-- ARB extensions 162 - 166 -->
+<!-- ARB extensions 162 - 163 -->
+
+<xi:include href="ARB_direct_state_access.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
+<!-- ARB extensions 165 - 166 -->
 
 <xi:include href="ARB_texture_barrier.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
 <xi:include href="KHR_context_flush_control.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
+<!-- ARB extension 171 -->
+<xi:include href="ARB_pipeline_statistics_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+
 <!-- Non-ARB extensions sorted by extension number. -->
 
 <category name="GL_EXT_blend_color" number="2">
@@ -12848,8 +12861,23 @@
     <enum name="SKIP_DECODE_EXT"                      value="0x8A4A"/>
 </category>
 
+<category name="GL_AMD_pinned_memory" number="411">
+    <enum name="EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD"   value="0x9160"/>
+</category>
+
 <xi:include href="INTEL_performance_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
+<category name="GL_EXT_polygon_offset_clamp" number="460">
+    <enum name="POLYGON_OFFSET_CLAMP_EXT"             value="0x8E1B">
+        <size name="Get" mode="get"/>
+    </enum>
+    <function name="PolygonOffsetClampEXT" offset="assign">
+        <param name="factor" type="GLfloat"/>
+        <param name="units"  type="GLfloat"/>
+        <param name="clamp"  type="GLfloat"/>
+    </function>
+</category>
+
 <!-- Unnumbered extensions sorted by name. -->
 
 <category name="GL_ATI_blend_equation_separate">
diff --git a/mesalib/src/mapi/glapi/gen/glapi_gen.mk b/mesalib/src/mapi/glapi/gen/glapi_gen.mk
deleted file mode 100644
index b8bb2f465..000000000
--- a/mesalib/src/mapi/glapi/gen/glapi_gen.mk
+++ /dev/null
@@ -1,40 +0,0 @@
-# Helpers for glapi header generation
-
-glapi_gen_common_deps := \
-	$(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.xml) \
-	$(wildcard $(top_srcdir)/src/mapi/glapi/gen/*.py)
-
-glapi_gen_mapi_script := $(top_srcdir)/src/mapi/mapi_abi.py
-glapi_gen_mapi_deps := \
-	$(glapi_gen_mapi_script) \
-	$(glapi_gen_common_deps)
-
-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_mapi_script) \
-	--mode lib --printer $(2) $(1) > $@
-endef
-
-glapi_gen_dispatch_script := $(top_srcdir)/src/mapi/glapi/gen/gl_table.py
-glapi_gen_dispatch_deps := $(glapi_gen_common_deps)
-
-# $(1): path to an XML file
-# $(2): empty, es1, or es2 for entry point filtering
-define glapi_gen_dispatch
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_dispatch_script) \
-	-f $(1) -m remap_table $(if $(2),-c $(2),) > $@
-endef
-
-glapi_gen_remap_script := $(top_srcdir)/src/mapi/glapi/gen/remap_helper.py
-glapi_gen_remap_deps := $(glapi_gen_common_deps)
-
-# $(1): path to an XML file
-# $(2): empty, es1, or es2 for entry point filtering
-define glapi_gen_remap
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(glapi_gen_remap_script) \
-	-f $(1) $(if $(2),-c $(2),) > $@
-endef
diff --git a/mesalib/src/mapi/glapi/glapi_dispatch.c b/mesalib/src/mapi/glapi/glapi_dispatch.c
index d2dd9654a..df907ff9d 100644
--- a/mesalib/src/mapi/glapi/glapi_dispatch.c
+++ b/mesalib/src/mapi/glapi/glapi_dispatch.c
@@ -144,6 +144,28 @@ GL_API void GL_APIENTRY glTexParameterxv (GLenum target, GLenum pname, const GLf
 GL_API void GL_APIENTRY glTranslatex (GLfixed x, GLfixed y, GLfixed z);
 GL_API void GL_APIENTRY glPointSizePointerOES (GLenum type, GLsizei stride, const GLvoid *pointer);
 
+/* Enable frame pointer elimination on Windows, otherwise forgetting to add
+ * APIENTRY to _mesa_* entrypoints will not cause crashes on debug builds, as
+ * the initial ESP value is saved in the EBP in the function prologue, then
+ * restored on the epilogue, clobbering any corruption in the ESP pointer due
+ * to mismatch in the callee calling convention.
+ *
+ * On MSVC it's not sufficient to enable /Oy -- other optimizations must be
+ * enabled or frame pointer will be used regardless.
+ *
+ * We don't do this when NDEBUG is defined since, frame pointer omission
+ * optimization compiler flag are already specified on release builds, and
+ * because on profile builds we must have frame pointers or certain profilers
+ * might fail to unwind the stack.
+ */
+#if defined(_WIN32) && !defined(NDEBUG)
+#  if defined(_MSC_VER)
+#    pragma optimize( "gty", on )
+#  elif defined(__GNUC__)
+#    pragma GCC optimize ("omit-frame-pointer")
+#  endif
+#endif
+
 #include "glapi/glapitemp.h"
 
 #endif /* USE_X86_ASM */
diff --git a/mesalib/src/mesa/Android.libmesa_dricore.mk b/mesalib/src/mesa/Android.libmesa_dricore.mk
index 2ab593d3c..e4a52677c 100644
--- a/mesalib/src/mesa/Android.libmesa_dricore.mk
+++ b/mesalib/src/mesa/Android.libmesa_dricore.mk
@@ -32,8 +32,6 @@ LOCAL_PATH := $(call my-dir)
 #     MESA_FILES
 #     X86_FILES
 include $(LOCAL_PATH)/Makefile.sources
-SRCDIR :=
-BUILDDIR :=
 
 include $(CLEAR_VARS)
 
diff --git a/mesalib/src/mesa/Android.libmesa_st_mesa.mk b/mesalib/src/mesa/Android.libmesa_st_mesa.mk
index 618d6bfb2..a08366d53 100644
--- a/mesalib/src/mesa/Android.libmesa_st_mesa.mk
+++ b/mesalib/src/mesa/Android.libmesa_st_mesa.mk
@@ -32,8 +32,6 @@ LOCAL_PATH := $(call my-dir)
 # 	MESA_GALLIUM_FILES.
 # 	X86_FILES
 include $(LOCAL_PATH)/Makefile.sources
-SRCDIR :=
-BUILDDIR :=
 
 include $(CLEAR_VARS)
 
diff --git a/mesalib/src/mesa/Makefile.am b/mesalib/src/mesa/Makefile.am
index 8d8082ae3..b6cb8f111 100644
--- a/mesalib/src/mesa/Makefile.am
+++ b/mesalib/src/mesa/Makefile.am
@@ -19,6 +19,8 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+AUTOMAKE_OPTIONS = subdir-objects
+
 SUBDIRS = . main/tests
 
 if HAVE_X11_DRIVER
@@ -36,8 +38,8 @@ endif
 gldir = $(includedir)/GL
 gl_HEADERS = $(top_srcdir)/include/GL/*.h
 
-.PHONY: $(BUILDDIR)main/git_sha1.h.tmp
-$(BUILDDIR)main/git_sha1.h.tmp:
+.PHONY: main/git_sha1.h.tmp
+main/git_sha1.h.tmp:
 	@touch main/git_sha1.h.tmp
 	@if test -d $(top_srcdir)/.git; then \
 		if which git > /dev/null; then \
@@ -47,7 +49,7 @@ $(BUILDDIR)main/git_sha1.h.tmp:
 		fi \
 	fi
 
-$(BUILDDIR)main/git_sha1.h: $(BUILDDIR)main/git_sha1.h.tmp
+main/git_sha1.h: main/git_sha1.h.tmp
 	@echo "updating main/git_sha1.h"
 	@if ! cmp -s main/git_sha1.h.tmp main/git_sha1.h; then \
 		mv main/git_sha1.h.tmp main/git_sha1.h ;\
@@ -55,11 +57,6 @@ $(BUILDDIR)main/git_sha1.h: $(BUILDDIR)main/git_sha1.h.tmp
 		rm main/git_sha1.h.tmp ;\
 	fi
 
-# include glapi_gen.mk for generating glapi headers for GLES
-GLAPI = $(top_srcdir)/src/mapi/glapi/gen
-include $(GLAPI)/glapi_gen.mk
-
-BUILDDIR = $(builddir)/
 include Makefile.sources
 
 EXTRA_DIST = \
@@ -67,7 +64,9 @@ EXTRA_DIST = \
 	drivers/SConscript \
 	drivers/windows \
 	main/format_info.py \
+	main/format_pack.py \
 	main/format_parser.py \
+	main/format_unpack.py \
 	main/formats.csv \
 	main/get_hash_generator.py \
 	main/get_hash_params.py \
@@ -82,17 +81,19 @@ EXTRA_DIST = \
 BUILT_SOURCES = \
 	main/get_hash.h \
         main/format_info.c \
-	$(BUILDDIR)main/git_sha1.h \
-	$(BUILDDIR)program/program_parse.tab.c \
-	$(BUILDDIR)program/lex.yy.c
+	main/git_sha1.h \
+	main/format_pack.c \
+	main/format_unpack.c \
+	program/program_parse.tab.c \
+	program/lex.yy.c
 CLEANFILES = \
 	$(BUILT_SOURCES) \
-	$(BUILDDIR)program/program_parse.tab.h \
-	$(BUILDDIR)main/git_sha1.h.tmp
+	program/program_parse.tab.h \
+	main/git_sha1.h.tmp
 
 GET_HASH_GEN = main/get_hash_generator.py
 
-main/get_hash.h: $(GLAPI)/gl_and_es_API.xml main/get_hash_params.py 	\
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py 	\
 		 $(GET_HASH_GEN) Makefile
 	$(AM_V_GEN)set -e;						\
 	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)		\
@@ -106,6 +107,22 @@ main/format_info.c: main/formats.csv                                    \
                    $< > $@.tmp;                                         \
 	mv $@.tmp $@;
 
+main/format_pack.c: main/format_pack.py main/formats.csv		\
+                    main/format_parser.py
+	$(AM_V_GEN)set -e;						\
+	$(PYTHON2) $(PYTHON_FLAGS)					\
+			$(srcdir)/main/format_pack.py			\
+			$(srcdir)/main/formats.csv			\
+		| $(INDENT) $(INDENT_FLAGS) > $@;
+
+main/format_unpack.c: main/format_unpack.py main/formats.csv	\
+                      main/format_parser.py
+	$(AM_V_GEN)set -e;						\
+	$(PYTHON2) $(PYTHON_FLAGS)					\
+			$(srcdir)/main/format_unpack.py			\
+			$(srcdir)/main/formats.csv			\
+		| $(INDENT) $(INDENT_FLAGS) > $@;
+
 main/formats.c: main/format_info.c
 
 noinst_LTLIBRARIES = $(ARCH_LIBS)
@@ -177,13 +194,13 @@ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc
 
-$(BUILDDIR)program/lex.yy.c: program/program_lexer.l
+program/lex.yy.c: program/program_lexer.l
 	$(AM_V_at)$(MKDIR_P) program
 	$(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
 
-$(BUILDDIR)program/program_parse.tab.c $(BUILDDIR)program/program_parse.tab.h: program/program_parse.y
+program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
 	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=$(BUILDDIR)program/program_parse.tab.c $<
+	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
 
 if GEN_ASM_OFFSETS
 matypes.h: $(gen_matypes_SOURCES)
diff --git a/mesalib/src/mesa/Makefile.sources b/mesalib/src/mesa/Makefile.sources
index 7ac3bbc71..5b4e71253 100644
--- a/mesalib/src/mesa/Makefile.sources
+++ b/mesalib/src/mesa/Makefile.sources
@@ -1,592 +1,582 @@
 ### Lists of source files, included by Makefiles
 
-# This file is among different build systems. SRCDIR must be defined with
-# a trailing slash because the Android build system leaves it undefined.
-
-SRCDIR = $(top_srcdir)/src/mesa/
-BUILDDIR = $(top_builddir)/src/mesa/
-
 # this is part of MAIN_FILES
 MAIN_ES_FILES = \
-	$(SRCDIR)main/es1_conversion.c \
-	$(SRCDIR)main/es1_conversion.h
+	main/es1_conversion.c \
+	main/es1_conversion.h
 
 MAIN_FILES = \
-	$(SRCDIR)main/accum.c \
-	$(SRCDIR)main/accum.h \
-	$(SRCDIR)main/api_arrayelt.c \
-	$(SRCDIR)main/api_arrayelt.h \
-	$(BUILDDIR)main/api_exec.c \
-	$(SRCDIR)main/api_exec.h \
-	$(SRCDIR)main/api_loopback.c \
-	$(SRCDIR)main/api_loopback.h \
-	$(SRCDIR)main/api_validate.c \
-	$(SRCDIR)main/api_validate.h \
-	$(SRCDIR)main/arbprogram.c \
-	$(SRCDIR)main/arbprogram.h \
-	$(SRCDIR)main/arrayobj.c \
-	$(SRCDIR)main/arrayobj.h \
-	$(SRCDIR)main/atifragshader.c \
-	$(SRCDIR)main/atifragshader.h \
-	$(SRCDIR)main/attrib.c \
-	$(SRCDIR)main/attrib.h \
-	$(SRCDIR)main/bitset.h \
-	$(SRCDIR)main/blend.c \
-	$(SRCDIR)main/blend.h \
-	$(SRCDIR)main/blit.c \
-	$(SRCDIR)main/blit.h \
-	$(SRCDIR)main/bufferobj.c \
-	$(SRCDIR)main/bufferobj.h \
-	$(SRCDIR)main/buffers.c \
-	$(SRCDIR)main/buffers.h \
-	$(SRCDIR)main/clear.c \
-	$(SRCDIR)main/clear.h \
-	$(SRCDIR)main/clip.c \
-	$(SRCDIR)main/clip.h \
-	$(SRCDIR)main/colormac.h \
-	$(SRCDIR)main/colortab.c \
-	$(SRCDIR)main/colortab.h \
-	$(SRCDIR)main/compute.c \
-	$(SRCDIR)main/compute.h \
-	$(SRCDIR)main/compiler.h \
-	$(SRCDIR)main/condrender.c \
-	$(SRCDIR)main/condrender.h \
-	$(SRCDIR)main/config.h \
-	$(SRCDIR)main/context.c \
-	$(SRCDIR)main/context.h \
-	$(SRCDIR)main/convolve.c \
-	$(SRCDIR)main/convolve.h \
-	$(SRCDIR)main/copyimage.c \
-	$(SRCDIR)main/copyimage.h \
-	$(SRCDIR)main/core.h \
-	$(SRCDIR)main/cpuinfo.c \
-	$(SRCDIR)main/cpuinfo.h \
-	$(SRCDIR)main/dd.h \
-	$(SRCDIR)main/debug.c \
-	$(SRCDIR)main/debug.h \
-	$(SRCDIR)main/depth.c \
-	$(SRCDIR)main/depth.h \
-	$(SRCDIR)main/dlist.c \
-	$(SRCDIR)main/dlist.h \
-	$(SRCDIR)main/dlopen.h \
-	$(SRCDIR)main/drawpix.c \
-	$(SRCDIR)main/drawpix.h \
-	$(SRCDIR)main/drawtex.c \
-	$(SRCDIR)main/drawtex.h \
-	$(SRCDIR)main/enable.c \
-	$(SRCDIR)main/enable.h \
-	$(BUILDDIR)main/enums.c \
-	$(SRCDIR)main/enums.h \
-	$(SRCDIR)main/errors.c \
-	$(SRCDIR)main/errors.h \
-	$(SRCDIR)main/eval.c \
-	$(SRCDIR)main/eval.h \
-	$(SRCDIR)main/execmem.c \
-	$(SRCDIR)main/extensions.c \
-	$(SRCDIR)main/extensions.h \
-	$(SRCDIR)main/fbobject.c \
-	$(SRCDIR)main/fbobject.h \
-	$(SRCDIR)main/feedback.c \
-	$(SRCDIR)main/feedback.h \
-	$(SRCDIR)main/ff_fragment_shader.cpp \
-	$(SRCDIR)main/ffvertex_prog.c \
-	$(SRCDIR)main/ffvertex_prog.h \
-	$(SRCDIR)main/fog.c \
-	$(SRCDIR)main/fog.h \
-	$(SRCDIR)main/format_pack.c \
-	$(SRCDIR)main/format_pack.h \
-	$(SRCDIR)main/formatquery.c \
-	$(SRCDIR)main/formatquery.h \
-	$(SRCDIR)main/formats.c \
-	$(SRCDIR)main/formats.h \
-	$(SRCDIR)main/format_unpack.c \
-	$(SRCDIR)main/format_unpack.h \
-	$(SRCDIR)main/format_utils.c \
-	$(SRCDIR)main/format_utils.h \
-	$(SRCDIR)main/framebuffer.c \
-	$(SRCDIR)main/framebuffer.h \
-	$(SRCDIR)main/get.c \
-	$(SRCDIR)main/get.h \
-	$(SRCDIR)main/genmipmap.c \
-	$(SRCDIR)main/genmipmap.h \
-	$(SRCDIR)main/getstring.c \
-	$(SRCDIR)main/glformats.c \
-	$(SRCDIR)main/glformats.h \
-	$(SRCDIR)main/glheader.h \
-	$(SRCDIR)main/hash.c \
-	$(SRCDIR)main/hash.h \
-	$(SRCDIR)main/hint.c \
-	$(SRCDIR)main/hint.h \
-	$(SRCDIR)main/histogram.c \
-	$(SRCDIR)main/histogram.h \
-	$(SRCDIR)main/image.c \
-	$(SRCDIR)main/image.h \
-	$(SRCDIR)main/imports.c \
-	$(SRCDIR)main/imports.h \
-	$(SRCDIR)main/light.c \
-	$(SRCDIR)main/light.h \
-	$(SRCDIR)main/lines.c \
-	$(SRCDIR)main/lines.h \
-	$(SRCDIR)main/macros.h \
-	$(SRCDIR)main/matrix.c \
-	$(SRCDIR)main/matrix.h \
-	$(SRCDIR)main/mipmap.c \
-	$(SRCDIR)main/mipmap.h \
-	$(SRCDIR)main/mm.c \
-	$(SRCDIR)main/mm.h \
-	$(SRCDIR)main/mtypes.h \
-	$(SRCDIR)main/multisample.c \
-	$(SRCDIR)main/multisample.h \
-	$(SRCDIR)main/objectlabel.c \
-	$(SRCDIR)main/objectlabel.h \
-	$(SRCDIR)main/pack.c \
-	$(SRCDIR)main/pack.h \
-	$(SRCDIR)main/pack_tmp.h \
-	$(SRCDIR)main/pbo.c \
-	$(SRCDIR)main/pbo.h \
-	$(SRCDIR)main/performance_monitor.c \
-	$(SRCDIR)main/performance_monitor.h \
-	$(SRCDIR)main/pipelineobj.c \
-	$(SRCDIR)main/pipelineobj.h \
-	$(SRCDIR)main/pixel.c \
-	$(SRCDIR)main/pixel.h \
-	$(SRCDIR)main/pixelstore.c \
-	$(SRCDIR)main/pixelstore.h \
-	$(SRCDIR)main/pixeltransfer.c \
-	$(SRCDIR)main/pixeltransfer.h \
-	$(SRCDIR)main/points.c \
-	$(SRCDIR)main/points.h \
-	$(SRCDIR)main/polygon.c \
-	$(SRCDIR)main/polygon.h \
-	$(SRCDIR)main/querymatrix.c \
-	$(SRCDIR)main/querymatrix.h \
-	$(SRCDIR)main/queryobj.c \
-	$(SRCDIR)main/queryobj.h \
-	$(SRCDIR)main/rastpos.c \
-	$(SRCDIR)main/rastpos.h \
-	$(SRCDIR)main/readpix.c \
-	$(SRCDIR)main/readpix.h \
-	$(SRCDIR)main/remap.c \
-	$(SRCDIR)main/remap.h \
-	$(SRCDIR)main/renderbuffer.c \
-	$(SRCDIR)main/renderbuffer.h \
-	$(SRCDIR)main/samplerobj.c \
-	$(SRCDIR)main/samplerobj.h \
-	$(SRCDIR)main/scissor.c \
-	$(SRCDIR)main/scissor.h \
-	$(SRCDIR)main/set.c \
-	$(SRCDIR)main/set.h \
-	$(SRCDIR)main/shaderapi.c \
-	$(SRCDIR)main/shaderapi.h \
-	$(SRCDIR)main/shaderimage.c \
-	$(SRCDIR)main/shaderimage.h \
-	$(SRCDIR)main/shaderobj.c \
-	$(SRCDIR)main/shaderobj.h \
-	$(SRCDIR)main/shader_query.cpp \
-	$(SRCDIR)main/shared.c \
-	$(SRCDIR)main/shared.h \
-	$(SRCDIR)main/simple_list.h \
-	$(SRCDIR)main/state.c \
-	$(SRCDIR)main/state.h \
-	$(SRCDIR)main/stencil.c \
-	$(SRCDIR)main/stencil.h \
-	$(SRCDIR)main/syncobj.c \
-	$(SRCDIR)main/syncobj.h \
-	$(SRCDIR)main/texcompress.c \
-	$(SRCDIR)main/texcompress_bptc.c \
-	$(SRCDIR)main/texcompress_bptc.h \
-	$(SRCDIR)main/texcompress_cpal.c \
-	$(SRCDIR)main/texcompress_cpal.h \
-	$(SRCDIR)main/texcompress_etc.c \
-	$(SRCDIR)main/texcompress_etc.h \
-	$(SRCDIR)main/texcompress_etc_tmp.h \
-	$(SRCDIR)main/texcompress_fxt1.c \
-	$(SRCDIR)main/texcompress_fxt1.h \
-	$(SRCDIR)main/texcompress.h \
-	$(SRCDIR)main/texcompress_rgtc.c \
-	$(SRCDIR)main/texcompress_rgtc.h \
-	$(SRCDIR)main/texcompress_s3tc.c \
-	$(SRCDIR)main/texcompress_s3tc.h \
-	$(SRCDIR)main/texenv.c \
-	$(SRCDIR)main/texenv.h \
-	$(SRCDIR)main/texenvprogram.h \
-	$(SRCDIR)main/texformat.c \
-	$(SRCDIR)main/texformat.h \
-	$(SRCDIR)main/texgen.c \
-	$(SRCDIR)main/texgen.h \
-	$(SRCDIR)main/texgetimage.c \
-	$(SRCDIR)main/texgetimage.h \
-	$(SRCDIR)main/teximage.c \
-	$(SRCDIR)main/teximage.h \
-	$(SRCDIR)main/texobj.c \
-	$(SRCDIR)main/texobj.h \
-	$(SRCDIR)main/texparam.c \
-	$(SRCDIR)main/texparam.h \
-	$(SRCDIR)main/texstate.c \
-	$(SRCDIR)main/texstate.h \
-	$(SRCDIR)main/texstorage.c \
-	$(SRCDIR)main/texstorage.h \
-	$(SRCDIR)main/texstore.c \
-	$(SRCDIR)main/texstore.h \
-	$(SRCDIR)main/textureview.c \
-	$(SRCDIR)main/textureview.h \
-	$(SRCDIR)main/texturebarrier.c \
-	$(SRCDIR)main/texturebarrier.h \
-	$(SRCDIR)main/transformfeedback.c \
-	$(SRCDIR)main/transformfeedback.h \
-	$(SRCDIR)main/uniform_query.cpp \
-	$(SRCDIR)main/uniforms.c \
-	$(SRCDIR)main/uniforms.h \
-	$(SRCDIR)main/varray.c \
-	$(SRCDIR)main/varray.h \
-	$(SRCDIR)main/vdpau.c \
-	$(SRCDIR)main/vdpau.h \
-	$(SRCDIR)main/version.c \
-	$(SRCDIR)main/version.h \
-	$(SRCDIR)main/viewport.c \
-	$(SRCDIR)main/viewport.h \
-	$(SRCDIR)main/vtxfmt.c \
-	$(SRCDIR)main/vtxfmt.h \
+	main/accum.c \
+	main/accum.h \
+	main/api_arrayelt.c \
+	main/api_arrayelt.h \
+	main/api_exec.c \
+	main/api_exec.h \
+	main/api_loopback.c \
+	main/api_loopback.h \
+	main/api_validate.c \
+	main/api_validate.h \
+	main/arbprogram.c \
+	main/arbprogram.h \
+	main/arrayobj.c \
+	main/arrayobj.h \
+	main/atifragshader.c \
+	main/atifragshader.h \
+	main/attrib.c \
+	main/attrib.h \
+	main/blend.c \
+	main/blend.h \
+	main/blit.c \
+	main/blit.h \
+	main/bufferobj.c \
+	main/bufferobj.h \
+	main/buffers.c \
+	main/buffers.h \
+	main/clear.c \
+	main/clear.h \
+	main/clip.c \
+	main/clip.h \
+	main/colormac.h \
+	main/colortab.c \
+	main/colortab.h \
+	main/compute.c \
+	main/compute.h \
+	main/compiler.h \
+	main/condrender.c \
+	main/condrender.h \
+	main/config.h \
+	main/context.c \
+	main/context.h \
+	main/convolve.c \
+	main/convolve.h \
+	main/copyimage.c \
+	main/copyimage.h \
+	main/core.h \
+	main/cpuinfo.c \
+	main/cpuinfo.h \
+	main/dd.h \
+	main/debug.c \
+	main/debug.h \
+	main/depth.c \
+	main/depth.h \
+	main/dlist.c \
+	main/dlist.h \
+	main/dlopen.h \
+	main/drawpix.c \
+	main/drawpix.h \
+	main/drawtex.c \
+	main/drawtex.h \
+	main/enable.c \
+	main/enable.h \
+	main/enums.c \
+	main/enums.h \
+	main/errors.c \
+	main/errors.h \
+	main/eval.c \
+	main/eval.h \
+	main/execmem.c \
+	main/extensions.c \
+	main/extensions.h \
+	main/fbobject.c \
+	main/fbobject.h \
+	main/feedback.c \
+	main/feedback.h \
+	main/ff_fragment_shader.cpp \
+	main/ffvertex_prog.c \
+	main/ffvertex_prog.h \
+	main/fog.c \
+	main/fog.h \
+	main/format_pack.h \
+	main/format_pack.c \
+	main/format_unpack.h \
+	main/format_unpack.c \
+	main/formatquery.c \
+	main/formatquery.h \
+	main/formats.c \
+	main/formats.h \
+	main/format_utils.c \
+	main/format_utils.h \
+	main/framebuffer.c \
+	main/framebuffer.h \
+	main/get.c \
+	main/get.h \
+	main/genmipmap.c \
+	main/genmipmap.h \
+	main/getstring.c \
+	main/glformats.c \
+	main/glformats.h \
+	main/glheader.h \
+	main/hash.c \
+	main/hash.h \
+	main/hint.c \
+	main/hint.h \
+	main/histogram.c \
+	main/histogram.h \
+	main/image.c \
+	main/image.h \
+	main/imports.c \
+	main/imports.h \
+	main/light.c \
+	main/light.h \
+	main/lines.c \
+	main/lines.h \
+	main/macros.h \
+	main/matrix.c \
+	main/matrix.h \
+	main/mipmap.c \
+	main/mipmap.h \
+	main/mm.c \
+	main/mm.h \
+	main/mtypes.h \
+	main/multisample.c \
+	main/multisample.h \
+	main/objectlabel.c \
+	main/objectlabel.h \
+	main/pack.c \
+	main/pack.h \
+	main/pbo.c \
+	main/pbo.h \
+	main/performance_monitor.c \
+	main/performance_monitor.h \
+	main/pipelineobj.c \
+	main/pipelineobj.h \
+	main/pixel.c \
+	main/pixel.h \
+	main/pixelstore.c \
+	main/pixelstore.h \
+	main/pixeltransfer.c \
+	main/pixeltransfer.h \
+	main/points.c \
+	main/points.h \
+	main/polygon.c \
+	main/polygon.h \
+	main/querymatrix.c \
+	main/querymatrix.h \
+	main/queryobj.c \
+	main/queryobj.h \
+	main/rastpos.c \
+	main/rastpos.h \
+	main/readpix.c \
+	main/readpix.h \
+	main/remap.c \
+	main/remap.h \
+	main/renderbuffer.c \
+	main/renderbuffer.h \
+	main/samplerobj.c \
+	main/samplerobj.h \
+	main/scissor.c \
+	main/scissor.h \
+	main/shaderapi.c \
+	main/shaderapi.h \
+	main/shaderimage.c \
+	main/shaderimage.h \
+	main/shaderobj.c \
+	main/shaderobj.h \
+	main/shader_query.cpp \
+	main/shared.c \
+	main/shared.h \
+	main/state.c \
+	main/state.h \
+	main/stencil.c \
+	main/stencil.h \
+	main/syncobj.c \
+	main/syncobj.h \
+	main/texcompress.c \
+	main/texcompress_bptc.c \
+	main/texcompress_bptc.h \
+	main/texcompress_cpal.c \
+	main/texcompress_cpal.h \
+	main/texcompress_etc.c \
+	main/texcompress_etc.h \
+	main/texcompress_etc_tmp.h \
+	main/texcompress_fxt1.c \
+	main/texcompress_fxt1.h \
+	main/texcompress.h \
+	main/texcompress_rgtc.c \
+	main/texcompress_rgtc.h \
+	main/texcompress_s3tc.c \
+	main/texcompress_s3tc.h \
+	main/texenv.c \
+	main/texenv.h \
+	main/texenvprogram.h \
+	main/texformat.c \
+	main/texformat.h \
+	main/texgen.c \
+	main/texgen.h \
+	main/texgetimage.c \
+	main/texgetimage.h \
+	main/teximage.c \
+	main/teximage.h \
+	main/texobj.c \
+	main/texobj.h \
+	main/texparam.c \
+	main/texparam.h \
+	main/texstate.c \
+	main/texstate.h \
+	main/texstorage.c \
+	main/texstorage.h \
+	main/texstore.c \
+	main/texstore.h \
+	main/textureview.c \
+	main/textureview.h \
+	main/texturebarrier.c \
+	main/texturebarrier.h \
+	main/transformfeedback.c \
+	main/transformfeedback.h \
+	main/uniform_query.cpp \
+	main/uniforms.c \
+	main/uniforms.h \
+	main/varray.c \
+	main/varray.h \
+	main/vdpau.c \
+	main/vdpau.h \
+	main/version.c \
+	main/version.h \
+	main/viewport.c \
+	main/viewport.h \
+	main/vtxfmt.c \
+	main/vtxfmt.h \
 	$(MAIN_ES_FILES)
 
 MATH_FILES = \
-	$(SRCDIR)math/m_debug.h \
-	$(SRCDIR)math/m_debug_clip.c \
-	$(SRCDIR)math/m_debug_norm.c \
-	$(SRCDIR)math/m_debug_util.h \
-	$(SRCDIR)math/m_debug_xform.c \
-	$(SRCDIR)math/m_eval.c \
-	$(SRCDIR)math/m_eval.h \
-	$(SRCDIR)math/m_matrix.c \
-	$(SRCDIR)math/m_matrix.h \
-	$(SRCDIR)math/m_trans_tmp.h \
-	$(SRCDIR)math/m_translate.c \
-	$(SRCDIR)math/m_translate.h \
-	$(SRCDIR)math/m_vector.c \
-	$(SRCDIR)math/m_vector.h
+	math/m_debug.h \
+	math/m_debug_clip.c \
+	math/m_debug_norm.c \
+	math/m_debug_util.h \
+	math/m_debug_xform.c \
+	math/m_eval.c \
+	math/m_eval.h \
+	math/m_matrix.c \
+	math/m_matrix.h \
+	math/m_trans_tmp.h \
+	math/m_translate.c \
+	math/m_translate.h \
+	math/m_vector.c \
+	math/m_vector.h
 
 MATH_XFORM_FILES = \
-	$(SRCDIR)math/m_clip_tmp.h \
-	$(SRCDIR)math/m_copy_tmp.h \
-	$(SRCDIR)math/m_dotprod_tmp.h \
-	$(SRCDIR)math/m_norm_tmp.h \
-	$(SRCDIR)math/m_xform.c \
-	$(SRCDIR)math/m_xform.h \
-	$(SRCDIR)math/m_xform_tmp.h
+	math/m_clip_tmp.h \
+	math/m_copy_tmp.h \
+	math/m_dotprod_tmp.h \
+	math/m_norm_tmp.h \
+	math/m_xform.c \
+	math/m_xform.h \
+	math/m_xform_tmp.h
 
 SWRAST_FILES = \
-	$(SRCDIR)swrast/s_aaline.c \
-	$(SRCDIR)swrast/s_aaline.h \
-	$(SRCDIR)swrast/s_aalinetemp.h \
-	$(SRCDIR)swrast/s_aatriangle.c \
-	$(SRCDIR)swrast/s_aatriangle.h \
-	$(SRCDIR)swrast/s_aatritemp.h \
-	$(SRCDIR)swrast/s_alpha.c \
-	$(SRCDIR)swrast/s_alpha.h \
-	$(SRCDIR)swrast/s_atifragshader.c \
-	$(SRCDIR)swrast/s_atifragshader.h \
-	$(SRCDIR)swrast/s_bitmap.c \
-	$(SRCDIR)swrast/s_blend.c \
-	$(SRCDIR)swrast/s_blend.h \
-	$(SRCDIR)swrast/s_blit.c \
-	$(SRCDIR)swrast/s_chan.h \
-	$(SRCDIR)swrast/s_clear.c \
-	$(SRCDIR)swrast/s_context.c \
-	$(SRCDIR)swrast/s_context.h \
-	$(SRCDIR)swrast/s_copypix.c \
-	$(SRCDIR)swrast/s_depth.c \
-	$(SRCDIR)swrast/s_depth.h \
-	$(SRCDIR)swrast/s_drawpix.c \
-	$(SRCDIR)swrast_setup/ss_tritmp.h \
-	$(SRCDIR)swrast_setup/ss_vb.h \
-	$(SRCDIR)swrast_setup/swrast_setup.h \
-	$(SRCDIR)swrast/s_feedback.c \
-	$(SRCDIR)swrast/s_feedback.h \
-	$(SRCDIR)swrast/s_fog.c \
-	$(SRCDIR)swrast/s_fog.h \
-	$(SRCDIR)swrast/s_fragprog.c \
-	$(SRCDIR)swrast/s_fragprog.h \
-	$(SRCDIR)swrast/s_lines.c \
-	$(SRCDIR)swrast/s_lines.h \
-	$(SRCDIR)swrast/s_linetemp.h \
-	$(SRCDIR)swrast/s_logic.c \
-	$(SRCDIR)swrast/s_logic.h \
-	$(SRCDIR)swrast/s_masking.c \
-	$(SRCDIR)swrast/s_masking.h \
-	$(SRCDIR)swrast/s_points.c \
-	$(SRCDIR)swrast/s_points.h \
-	$(SRCDIR)swrast/s_renderbuffer.c \
-	$(SRCDIR)swrast/s_renderbuffer.h \
-	$(SRCDIR)swrast/s_span.c \
-	$(SRCDIR)swrast/s_span.h \
-	$(SRCDIR)swrast/s_stencil.c \
-	$(SRCDIR)swrast/s_stencil.h \
-	$(SRCDIR)swrast/s_texcombine.c \
-	$(SRCDIR)swrast/s_texcombine.h \
-	$(SRCDIR)swrast/s_texfetch.c \
-	$(SRCDIR)swrast/s_texfetch.h \
-	$(SRCDIR)swrast/s_texfetch_tmp.h \
-	$(SRCDIR)swrast/s_texfilter.c \
-	$(SRCDIR)swrast/s_texfilter.h \
-	$(SRCDIR)swrast/s_texrender.c \
-	$(SRCDIR)swrast/s_texture.c \
-	$(SRCDIR)swrast/s_triangle.c \
-	$(SRCDIR)swrast/s_triangle.h \
-	$(SRCDIR)swrast/s_tritemp.h \
-	$(SRCDIR)swrast/swrast.h \
-	$(SRCDIR)swrast/s_zoom.c \
-	$(SRCDIR)swrast/s_zoom.h
+	swrast/s_aaline.c \
+	swrast/s_aaline.h \
+	swrast/s_aalinetemp.h \
+	swrast/s_aatriangle.c \
+	swrast/s_aatriangle.h \
+	swrast/s_aatritemp.h \
+	swrast/s_alpha.c \
+	swrast/s_alpha.h \
+	swrast/s_atifragshader.c \
+	swrast/s_atifragshader.h \
+	swrast/s_bitmap.c \
+	swrast/s_blend.c \
+	swrast/s_blend.h \
+	swrast/s_blit.c \
+	swrast/s_chan.h \
+	swrast/s_clear.c \
+	swrast/s_context.c \
+	swrast/s_context.h \
+	swrast/s_copypix.c \
+	swrast/s_depth.c \
+	swrast/s_depth.h \
+	swrast/s_drawpix.c \
+	swrast_setup/ss_tritmp.h \
+	swrast_setup/ss_vb.h \
+	swrast_setup/swrast_setup.h \
+	swrast/s_feedback.c \
+	swrast/s_feedback.h \
+	swrast/s_fog.c \
+	swrast/s_fog.h \
+	swrast/s_fragprog.c \
+	swrast/s_fragprog.h \
+	swrast/s_lines.c \
+	swrast/s_lines.h \
+	swrast/s_linetemp.h \
+	swrast/s_logic.c \
+	swrast/s_logic.h \
+	swrast/s_masking.c \
+	swrast/s_masking.h \
+	swrast/s_points.c \
+	swrast/s_points.h \
+	swrast/s_renderbuffer.c \
+	swrast/s_renderbuffer.h \
+	swrast/s_span.c \
+	swrast/s_span.h \
+	swrast/s_stencil.c \
+	swrast/s_stencil.h \
+	swrast/s_texcombine.c \
+	swrast/s_texcombine.h \
+	swrast/s_texfetch.c \
+	swrast/s_texfetch.h \
+	swrast/s_texfetch_tmp.h \
+	swrast/s_texfilter.c \
+	swrast/s_texfilter.h \
+	swrast/s_texrender.c \
+	swrast/s_texture.c \
+	swrast/s_triangle.c \
+	swrast/s_triangle.h \
+	swrast/s_tritemp.h \
+	swrast/swrast.h \
+	swrast/s_zoom.c \
+	swrast/s_zoom.h
 
 SWRAST_SETUP_FILES = \
-	$(SRCDIR)swrast_setup/ss_context.c \
-	$(SRCDIR)swrast_setup/ss_context.h \
-	$(SRCDIR)swrast_setup/ss_triangle.c \
-	$(SRCDIR)swrast_setup/ss_triangle.h
+	swrast_setup/ss_context.c \
+	swrast_setup/ss_context.h \
+	swrast_setup/ss_triangle.c \
+	swrast_setup/ss_triangle.h
 
 TNL_FILES = \
-	$(SRCDIR)tnl/t_context.c \
-	$(SRCDIR)tnl/t_context.h \
-	$(SRCDIR)tnl/t_draw.c \
-	$(SRCDIR)tnl/tnl.h \
-	$(SRCDIR)tnl/t_pipeline.c \
-	$(SRCDIR)tnl/t_pipeline.h \
-	$(SRCDIR)tnl/t_rasterpos.c \
-	$(SRCDIR)tnl/t_vb_cliptmp.h \
-	$(SRCDIR)tnl/t_vb_fog.c \
-	$(SRCDIR)tnl/t_vb_light.c \
-	$(SRCDIR)tnl/t_vb_lighttmp.h \
-	$(SRCDIR)tnl/t_vb_normals.c \
-	$(SRCDIR)tnl/t_vb_points.c \
-	$(SRCDIR)tnl/t_vb_program.c \
-	$(SRCDIR)tnl/t_vb_render.c \
-	$(SRCDIR)tnl/t_vb_rendertmp.h \
-	$(SRCDIR)tnl/t_vb_texgen.c \
-	$(SRCDIR)tnl/t_vb_texmat.c \
-	$(SRCDIR)tnl/t_vb_vertex.c \
-	$(SRCDIR)tnl/t_vertex.c \
-	$(SRCDIR)tnl/t_vertex_generic.c \
-	$(SRCDIR)tnl/t_vertex.h \
-	$(SRCDIR)tnl/t_vertex_sse.c \
-	$(SRCDIR)tnl/t_vp_build.c \
-	$(SRCDIR)tnl/t_vp_build.h
+	tnl/t_context.c \
+	tnl/t_context.h \
+	tnl/t_draw.c \
+	tnl/tnl.h \
+	tnl/t_pipeline.c \
+	tnl/t_pipeline.h \
+	tnl/t_rasterpos.c \
+	tnl/t_vb_cliptmp.h \
+	tnl/t_vb_fog.c \
+	tnl/t_vb_light.c \
+	tnl/t_vb_lighttmp.h \
+	tnl/t_vb_normals.c \
+	tnl/t_vb_points.c \
+	tnl/t_vb_program.c \
+	tnl/t_vb_render.c \
+	tnl/t_vb_rendertmp.h \
+	tnl/t_vb_texgen.c \
+	tnl/t_vb_texmat.c \
+	tnl/t_vb_vertex.c \
+	tnl/t_vertex.c \
+	tnl/t_vertex_generic.c \
+	tnl/t_vertex.h \
+	tnl/t_vertex_sse.c \
+	tnl/t_vp_build.c \
+	tnl/t_vp_build.h
 
 VBO_FILES = \
-	$(SRCDIR)vbo/vbo_attrib.h \
-	$(SRCDIR)vbo/vbo_attrib_tmp.h \
-	$(SRCDIR)vbo/vbo_context.c \
-	$(SRCDIR)vbo/vbo_context.h \
-	$(SRCDIR)vbo/vbo_exec_api.c \
-	$(SRCDIR)vbo/vbo_exec_array.c \
-	$(SRCDIR)vbo/vbo_exec.c \
-	$(SRCDIR)vbo/vbo_exec_draw.c \
-	$(SRCDIR)vbo/vbo_exec_eval.c \
-	$(SRCDIR)vbo/vbo_exec.h \
-	$(SRCDIR)vbo/vbo.h \
-	$(SRCDIR)vbo/vbo_noop.c \
-	$(SRCDIR)vbo/vbo_noop.h \
-	$(SRCDIR)vbo/vbo_primitive_restart.c \
-	$(SRCDIR)vbo/vbo_rebase.c \
-	$(SRCDIR)vbo/vbo_save_api.c \
-	$(SRCDIR)vbo/vbo_save.c \
-	$(SRCDIR)vbo/vbo_save_draw.c \
-	$(SRCDIR)vbo/vbo_save.h \
-	$(SRCDIR)vbo/vbo_save_loopback.c \
-	$(SRCDIR)vbo/vbo_split.c \
-	$(SRCDIR)vbo/vbo_split_copy.c \
-	$(SRCDIR)vbo/vbo_split.h \
-	$(SRCDIR)vbo/vbo_split_inplace.c
+	vbo/vbo_attrib.h \
+	vbo/vbo_attrib_tmp.h \
+	vbo/vbo_context.c \
+	vbo/vbo_context.h \
+	vbo/vbo_exec_api.c \
+	vbo/vbo_exec_array.c \
+	vbo/vbo_exec.c \
+	vbo/vbo_exec_draw.c \
+	vbo/vbo_exec_eval.c \
+	vbo/vbo_exec.h \
+	vbo/vbo.h \
+	vbo/vbo_noop.c \
+	vbo/vbo_noop.h \
+	vbo/vbo_primitive_restart.c \
+	vbo/vbo_rebase.c \
+	vbo/vbo_save_api.c \
+	vbo/vbo_save.c \
+	vbo/vbo_save_draw.c \
+	vbo/vbo_save.h \
+	vbo/vbo_save_loopback.c \
+	vbo/vbo_split.c \
+	vbo/vbo_split_copy.c \
+	vbo/vbo_split.h \
+	vbo/vbo_split_inplace.c
 
 STATETRACKER_FILES = \
-	$(SRCDIR)state_tracker/st_atom_array.c \
-	$(SRCDIR)state_tracker/st_atom_blend.c \
-	$(SRCDIR)state_tracker/st_atom.c \
-	$(SRCDIR)state_tracker/st_atom_clip.c \
-	$(SRCDIR)state_tracker/st_atom_constbuf.c \
-	$(SRCDIR)state_tracker/st_atom_constbuf.h \
-	$(SRCDIR)state_tracker/st_atom_depth.c \
-	$(SRCDIR)state_tracker/st_atom_framebuffer.c \
-	$(SRCDIR)state_tracker/st_atom.h \
-	$(SRCDIR)state_tracker/st_atom_msaa.c \
-	$(SRCDIR)state_tracker/st_atom_pixeltransfer.c \
-	$(SRCDIR)state_tracker/st_atom_rasterizer.c \
-	$(SRCDIR)state_tracker/st_atom_sampler.c \
-	$(SRCDIR)state_tracker/st_atom_scissor.c \
-	$(SRCDIR)state_tracker/st_atom_shader.c \
-	$(SRCDIR)state_tracker/st_atom_shader.h \
-	$(SRCDIR)state_tracker/st_atom_stipple.c \
-	$(SRCDIR)state_tracker/st_atom_texture.c \
-	$(SRCDIR)state_tracker/st_atom_viewport.c \
-	$(SRCDIR)state_tracker/st_cache.h \
-	$(SRCDIR)state_tracker/st_cb_bitmap.c \
-	$(SRCDIR)state_tracker/st_cb_bitmap.h \
-	$(SRCDIR)state_tracker/st_cb_blit.c \
-	$(SRCDIR)state_tracker/st_cb_blit.h \
-	$(SRCDIR)state_tracker/st_cb_bufferobjects.c \
-	$(SRCDIR)state_tracker/st_cb_bufferobjects.h \
-	$(SRCDIR)state_tracker/st_cb_clear.c \
-	$(SRCDIR)state_tracker/st_cb_clear.h \
-	$(SRCDIR)state_tracker/st_cb_condrender.c \
-	$(SRCDIR)state_tracker/st_cb_condrender.h \
-	$(SRCDIR)state_tracker/st_cb_drawpixels.c \
-	$(SRCDIR)state_tracker/st_cb_drawpixels.h \
-	$(SRCDIR)state_tracker/st_cb_drawtex.c \
-	$(SRCDIR)state_tracker/st_cb_drawtex.h \
-	$(SRCDIR)state_tracker/st_cb_eglimage.c \
-	$(SRCDIR)state_tracker/st_cb_eglimage.h \
-	$(SRCDIR)state_tracker/st_cb_fbo.c \
-	$(SRCDIR)state_tracker/st_cb_fbo.h \
-	$(SRCDIR)state_tracker/st_cb_feedback.c \
-	$(SRCDIR)state_tracker/st_cb_feedback.h \
-	$(SRCDIR)state_tracker/st_cb_flush.c \
-	$(SRCDIR)state_tracker/st_cb_flush.h \
-	$(SRCDIR)state_tracker/st_cb_msaa.c \
-	$(SRCDIR)state_tracker/st_cb_msaa.h \
-	$(SRCDIR)state_tracker/st_cb_program.c \
-	$(SRCDIR)state_tracker/st_cb_program.h \
-	$(SRCDIR)state_tracker/st_cb_queryobj.c \
-	$(SRCDIR)state_tracker/st_cb_queryobj.h \
-	$(SRCDIR)state_tracker/st_cb_rasterpos.c \
-	$(SRCDIR)state_tracker/st_cb_rasterpos.h \
-	$(SRCDIR)state_tracker/st_cb_readpixels.c \
-	$(SRCDIR)state_tracker/st_cb_readpixels.h \
-	$(SRCDIR)state_tracker/st_cb_strings.c \
-	$(SRCDIR)state_tracker/st_cb_strings.h \
-	$(SRCDIR)state_tracker/st_cb_syncobj.c \
-	$(SRCDIR)state_tracker/st_cb_syncobj.h \
-	$(SRCDIR)state_tracker/st_cb_texturebarrier.c \
-	$(SRCDIR)state_tracker/st_cb_texturebarrier.h \
-	$(SRCDIR)state_tracker/st_cb_texture.c \
-	$(SRCDIR)state_tracker/st_cb_texture.h \
-	$(SRCDIR)state_tracker/st_cb_viewport.c \
-	$(SRCDIR)state_tracker/st_cb_viewport.h \
-	$(SRCDIR)state_tracker/st_cb_xformfb.c \
-	$(SRCDIR)state_tracker/st_cb_xformfb.h \
-	$(SRCDIR)state_tracker/st_context.c \
-	$(SRCDIR)state_tracker/st_context.h \
-	$(SRCDIR)state_tracker/st_debug.c \
-	$(SRCDIR)state_tracker/st_debug.h \
-	$(SRCDIR)state_tracker/st_draw.c \
-	$(SRCDIR)state_tracker/st_draw_feedback.c \
-	$(SRCDIR)state_tracker/st_draw.h \
-	$(SRCDIR)state_tracker/st_extensions.c \
-	$(SRCDIR)state_tracker/st_extensions.h \
-	$(SRCDIR)state_tracker/st_format.c \
-	$(SRCDIR)state_tracker/st_format.h \
-	$(SRCDIR)state_tracker/st_gen_mipmap.c \
-	$(SRCDIR)state_tracker/st_gen_mipmap.h \
-	$(SRCDIR)state_tracker/st_gl_api.h \
-	$(SRCDIR)state_tracker/st_glsl_to_tgsi.cpp \
-	$(SRCDIR)state_tracker/st_glsl_to_tgsi.h \
-	$(SRCDIR)state_tracker/st_manager.c \
-	$(SRCDIR)state_tracker/st_manager.h \
-	$(SRCDIR)state_tracker/st_mesa_to_tgsi.c \
-	$(SRCDIR)state_tracker/st_mesa_to_tgsi.h \
-	$(SRCDIR)state_tracker/st_program.c \
-	$(SRCDIR)state_tracker/st_program.h \
-	$(SRCDIR)state_tracker/st_texture.c \
-	$(SRCDIR)state_tracker/st_texture.h \
-	$(SRCDIR)state_tracker/st_vdpau.c \
-	$(SRCDIR)state_tracker/st_vdpau.h
+	state_tracker/st_atom_array.c \
+	state_tracker/st_atom_blend.c \
+	state_tracker/st_atom.c \
+	state_tracker/st_atom_clip.c \
+	state_tracker/st_atom_constbuf.c \
+	state_tracker/st_atom_constbuf.h \
+	state_tracker/st_atom_depth.c \
+	state_tracker/st_atom_framebuffer.c \
+	state_tracker/st_atom.h \
+	state_tracker/st_atom_msaa.c \
+	state_tracker/st_atom_pixeltransfer.c \
+	state_tracker/st_atom_rasterizer.c \
+	state_tracker/st_atom_sampler.c \
+	state_tracker/st_atom_scissor.c \
+	state_tracker/st_atom_shader.c \
+	state_tracker/st_atom_shader.h \
+	state_tracker/st_atom_stipple.c \
+	state_tracker/st_atom_texture.c \
+	state_tracker/st_atom_viewport.c \
+	state_tracker/st_cache.h \
+	state_tracker/st_cb_bitmap.c \
+	state_tracker/st_cb_bitmap.h \
+	state_tracker/st_cb_blit.c \
+	state_tracker/st_cb_blit.h \
+	state_tracker/st_cb_bufferobjects.c \
+	state_tracker/st_cb_bufferobjects.h \
+	state_tracker/st_cb_clear.c \
+	state_tracker/st_cb_clear.h \
+	state_tracker/st_cb_condrender.c \
+	state_tracker/st_cb_condrender.h \
+	state_tracker/st_cb_drawpixels.c \
+	state_tracker/st_cb_drawpixels.h \
+	state_tracker/st_cb_drawtex.c \
+	state_tracker/st_cb_drawtex.h \
+	state_tracker/st_cb_eglimage.c \
+	state_tracker/st_cb_eglimage.h \
+	state_tracker/st_cb_fbo.c \
+	state_tracker/st_cb_fbo.h \
+	state_tracker/st_cb_feedback.c \
+	state_tracker/st_cb_feedback.h \
+	state_tracker/st_cb_flush.c \
+	state_tracker/st_cb_flush.h \
+	state_tracker/st_cb_msaa.c \
+	state_tracker/st_cb_msaa.h \
+	state_tracker/st_cb_program.c \
+	state_tracker/st_cb_program.h \
+	state_tracker/st_cb_queryobj.c \
+	state_tracker/st_cb_queryobj.h \
+	state_tracker/st_cb_rasterpos.c \
+	state_tracker/st_cb_rasterpos.h \
+	state_tracker/st_cb_readpixels.c \
+	state_tracker/st_cb_readpixels.h \
+	state_tracker/st_cb_strings.c \
+	state_tracker/st_cb_strings.h \
+	state_tracker/st_cb_syncobj.c \
+	state_tracker/st_cb_syncobj.h \
+	state_tracker/st_cb_texturebarrier.c \
+	state_tracker/st_cb_texturebarrier.h \
+	state_tracker/st_cb_texture.c \
+	state_tracker/st_cb_texture.h \
+	state_tracker/st_cb_viewport.c \
+	state_tracker/st_cb_viewport.h \
+	state_tracker/st_cb_xformfb.c \
+	state_tracker/st_cb_xformfb.h \
+	state_tracker/st_context.c \
+	state_tracker/st_context.h \
+	state_tracker/st_debug.c \
+	state_tracker/st_debug.h \
+	state_tracker/st_draw.c \
+	state_tracker/st_draw_feedback.c \
+	state_tracker/st_draw.h \
+	state_tracker/st_extensions.c \
+	state_tracker/st_extensions.h \
+	state_tracker/st_format.c \
+	state_tracker/st_format.h \
+	state_tracker/st_gen_mipmap.c \
+	state_tracker/st_gen_mipmap.h \
+	state_tracker/st_gl_api.h \
+	state_tracker/st_glsl_to_tgsi.cpp \
+	state_tracker/st_glsl_to_tgsi.h \
+	state_tracker/st_manager.c \
+	state_tracker/st_manager.h \
+	state_tracker/st_mesa_to_tgsi.c \
+	state_tracker/st_mesa_to_tgsi.h \
+	state_tracker/st_program.c \
+	state_tracker/st_program.h \
+	state_tracker/st_texture.c \
+	state_tracker/st_texture.h \
+	state_tracker/st_vdpau.c \
+	state_tracker/st_vdpau.h
 
 PROGRAM_FILES = \
-	$(SRCDIR)program/arbprogparse.c \
-	$(SRCDIR)program/arbprogparse.h \
-	$(SRCDIR)program/hash_table.h \
-	$(SRCDIR)program/ir_to_mesa.cpp \
-	$(SRCDIR)program/ir_to_mesa.h \
-	$(BUILDDIR)program/lex.yy.c \
-	$(SRCDIR)program/prog_cache.c \
-	$(SRCDIR)program/prog_cache.h \
-	$(SRCDIR)program/prog_execute.c \
-	$(SRCDIR)program/prog_execute.h \
-	$(SRCDIR)program/prog_hash_table.c \
-	$(SRCDIR)program/prog_instruction.c \
-	$(SRCDIR)program/prog_instruction.h \
-	$(SRCDIR)program/prog_noise.c \
-	$(SRCDIR)program/prog_noise.h \
-	$(SRCDIR)program/prog_opt_constant_fold.c \
-	$(SRCDIR)program/prog_optimize.c \
-	$(SRCDIR)program/prog_optimize.h \
-	$(SRCDIR)program/prog_parameter.c \
-	$(SRCDIR)program/prog_parameter.h \
-	$(SRCDIR)program/prog_parameter_layout.c \
-	$(SRCDIR)program/prog_parameter_layout.h \
-	$(SRCDIR)program/prog_print.c \
-	$(SRCDIR)program/prog_print.h \
-	$(SRCDIR)program/program.c \
-	$(SRCDIR)program/program.h \
-	$(SRCDIR)program/programopt.c \
-	$(SRCDIR)program/programopt.h \
-	$(SRCDIR)program/program_parse_extra.c \
-	$(BUILDDIR)program/program_parse.tab.c \
-	$(BUILDDIR)program/program_parse.tab.h \
-	$(SRCDIR)program/program_parser.h \
-	$(SRCDIR)program/prog_statevars.c \
-	$(SRCDIR)program/prog_statevars.h \
-	$(SRCDIR)program/sampler.cpp \
-	$(SRCDIR)program/sampler.h \
-	$(SRCDIR)program/string_to_uint_map.cpp \
-	$(SRCDIR)program/symbol_table.c \
-	$(SRCDIR)program/symbol_table.h
+	program/arbprogparse.c \
+	program/arbprogparse.h \
+	program/hash_table.h \
+	program/ir_to_mesa.cpp \
+	program/ir_to_mesa.h \
+	program/lex.yy.c \
+	program/prog_cache.c \
+	program/prog_cache.h \
+	program/prog_execute.c \
+	program/prog_execute.h \
+	program/prog_hash_table.c \
+	program/prog_instruction.c \
+	program/prog_instruction.h \
+	program/prog_noise.c \
+	program/prog_noise.h \
+	program/prog_opt_constant_fold.c \
+	program/prog_optimize.c \
+	program/prog_optimize.h \
+	program/prog_parameter.c \
+	program/prog_parameter.h \
+	program/prog_parameter_layout.c \
+	program/prog_parameter_layout.h \
+	program/prog_print.c \
+	program/prog_print.h \
+	program/program.c \
+	program/program.h \
+	program/programopt.c \
+	program/programopt.h \
+	program/program_parse_extra.c \
+	program/program_parse.tab.c \
+	program/program_parse.tab.h \
+	program/program_parser.h \
+	program/prog_statevars.c \
+	program/prog_statevars.h \
+	program/sampler.cpp \
+	program/sampler.h \
+	program/string_to_uint_map.cpp \
+	program/symbol_table.c \
+	program/symbol_table.h
 
 ASM_C_FILES =	\
-	$(SRCDIR)x86/common_x86.c \
-	$(SRCDIR)x86/x86_xform.c \
-	$(SRCDIR)x86/3dnow.c \
-	$(SRCDIR)x86/sse.c \
-	$(SRCDIR)x86/rtasm/x86sse.c \
-	$(SRCDIR)x86/rtasm/x86sse.h \
-	$(SRCDIR)sparc/sparc.c \
-	$(SRCDIR)x86-64/x86-64.c
+	x86/common_x86.c \
+	x86/x86_xform.c \
+	x86/3dnow.c \
+	x86/sse.c \
+	x86/rtasm/x86sse.c \
+	x86/rtasm/x86sse.h \
+	sparc/sparc.c \
+	x86-64/x86-64.c
 
 X86_FILES =			\
-	$(SRCDIR)x86/assyntax.h		\
-	$(SRCDIR)x86/clip_args.h	\
-	$(SRCDIR)x86/norm_args.h	\
-	$(SRCDIR)x86/xform_args.h	\
-	$(SRCDIR)x86/common_x86_asm.S	\
-	$(SRCDIR)x86/common_x86_asm.h	\
-	$(SRCDIR)x86/common_x86_features.h	\
-	$(SRCDIR)x86/x86_xform.h	\
-	$(SRCDIR)x86/x86_xform2.S	\
-	$(SRCDIR)x86/x86_xform3.S	\
-	$(SRCDIR)x86/x86_xform4.S	\
-	$(SRCDIR)x86/x86_cliptest.S	\
-	$(SRCDIR)x86/mmx.h		\
-	$(SRCDIR)x86/mmx_blend.S	\
-	$(SRCDIR)x86/mmx_blendtmp.h	\
-	$(SRCDIR)x86/3dnow.h		\
-	$(SRCDIR)x86/3dnow_xform1.S	\
-	$(SRCDIR)x86/3dnow_xform2.S	\
-	$(SRCDIR)x86/3dnow_xform3.S	\
-	$(SRCDIR)x86/3dnow_xform4.S	\
-	$(SRCDIR)x86/sse.h		\
-	$(SRCDIR)x86/sse_xform1.S	\
-	$(SRCDIR)x86/sse_xform2.S	\
-	$(SRCDIR)x86/sse_xform3.S	\
-	$(SRCDIR)x86/sse_xform4.S	\
-	$(SRCDIR)x86/sse_normal.S	\
-	$(SRCDIR)x86/read_rgba_span_x86.S
+	x86/assyntax.h		\
+	x86/clip_args.h	\
+	x86/norm_args.h	\
+	x86/xform_args.h	\
+	x86/common_x86_asm.S	\
+	x86/common_x86_asm.h	\
+	x86/common_x86_features.h	\
+	x86/x86_xform.h	\
+	x86/x86_xform2.S	\
+	x86/x86_xform3.S	\
+	x86/x86_xform4.S	\
+	x86/x86_cliptest.S	\
+	x86/mmx.h		\
+	x86/mmx_blend.S	\
+	x86/mmx_blendtmp.h	\
+	x86/3dnow.h		\
+	x86/3dnow_xform1.S	\
+	x86/3dnow_xform2.S	\
+	x86/3dnow_xform3.S	\
+	x86/3dnow_xform4.S	\
+	x86/sse.h		\
+	x86/sse_xform1.S	\
+	x86/sse_xform2.S	\
+	x86/sse_xform3.S	\
+	x86/sse_xform4.S	\
+	x86/sse_normal.S	\
+	x86/read_rgba_span_x86.S
 
 X86_64_FILES =		\
-	$(SRCDIR)x86-64/x86-64.h	\
-	$(SRCDIR)x86-64/xform4.S
+	x86-64/x86-64.h	\
+	x86-64/xform4.S
 
 SPARC_FILES =			\
-	$(SRCDIR)sparc/sparc.h		\
-	$(SRCDIR)sparc/sparc_clip.S	\
-	$(SRCDIR)sparc/sparc_matrix.h	\
-	$(SRCDIR)sparc/norm.S		\
-	$(SRCDIR)sparc/xform.S
+	sparc/sparc.h		\
+	sparc/sparc_clip.S	\
+	sparc/sparc_matrix.h	\
+	sparc/norm.S		\
+	sparc/xform.S
 
 COMMON_DRIVER_FILES =			\
-	$(SRCDIR)drivers/common/driverfuncs.c	\
-	$(SRCDIR)drivers/common/driverfuncs.h	\
-	$(SRCDIR)drivers/common/meta_blit.c	\
-	$(SRCDIR)drivers/common/meta_copy_image.c	\
-	$(SRCDIR)drivers/common/meta_generate_mipmap.c	\
-	$(SRCDIR)drivers/common/meta.c \
-	$(SRCDIR)drivers/common/meta.h
+	drivers/common/driverfuncs.c	\
+	drivers/common/driverfuncs.h	\
+	drivers/common/meta_blit.c	\
+	drivers/common/meta_copy_image.c	\
+	drivers/common/meta_generate_mipmap.c	\
+	drivers/common/meta_tex_subimage.c	\
+	drivers/common/meta.c \
+	drivers/common/meta.h
 
 
 # Sources for building non-Gallium drivers
@@ -607,7 +597,7 @@ MESA_GALLIUM_FILES = \
 	$(MATH_FILES)		\
 	$(VBO_FILES)		\
 	$(STATETRACKER_FILES)	\
-	$(SRCDIR)x86/common_x86.c
+	x86/common_x86.c
 
 ### Include directories
 
diff --git a/mesalib/src/mesa/SConscript b/mesalib/src/mesa/SConscript
index e7c4f5ca1..62e81ced1 100644
--- a/mesalib/src/mesa/SConscript
+++ b/mesalib/src/mesa/SConscript
@@ -15,6 +15,8 @@ env.Append(CPPPATH = [
     '#/src/mapi',
     '#/src/glsl',
     '#/src/mesa',
+    '#/src/gallium/include',
+    '#/src/gallium/auxiliary',
     Dir('../mapi'), # src/mapi build path
     Dir('.'), # src/mesa build path
 ])
@@ -66,6 +68,20 @@ format_info = env.CodeGenerate(
       command = python_cmd + ' $SCRIPT ' + ' $SOURCE > $TARGET'
 )
 
+format_pack = env.CodeGenerate(
+      target = 'main/format_pack.c',
+      script = 'main/format_pack.py',
+      source = 'main/formats.csv',
+      command = python_cmd + ' $SCRIPT ' + ' $SOURCE > $TARGET'
+)
+
+format_unpack = env.CodeGenerate(
+      target = 'main/format_unpack.c',
+      script = 'main/format_unpack.py',
+      source = 'main/formats.csv',
+      command = python_cmd + ' $SCRIPT ' + ' $SOURCE > $TARGET'
+)
+
 #
 # Assembly sources
 #
diff --git a/mesalib/src/mesa/drivers/common/driverfuncs.c b/mesalib/src/mesa/drivers/common/driverfuncs.c
index 4f0f7a686..0d094ddf4 100644
--- a/mesalib/src/mesa/drivers/common/driverfuncs.c
+++ b/mesalib/src/mesa/drivers/common/driverfuncs.c
@@ -101,7 +101,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->TestProxyTexImage = _mesa_test_proxy_teximage;
    driver->CompressedTexImage = _mesa_store_compressed_teximage;
    driver->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
-   driver->GetCompressedTexImage = _mesa_get_compressed_teximage;
+   driver->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
    driver->BindTexture = NULL;
    driver->NewTextureObject = _mesa_new_texture_object;
    driver->DeleteTexture = _mesa_delete_texture_object;
@@ -210,7 +210,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
    driver->EndCallList = NULL;
 
    /* GL_ARB_texture_storage */
-   driver->AllocTextureStorage = _mesa_alloc_texture_storage;
+   driver->AllocTextureStorage = _mesa_AllocTextureStorage_sw;
 
    /* GL_ARB_texture_view */
    driver->TextureView = NULL;
diff --git a/mesalib/src/mesa/drivers/common/meta.c b/mesalib/src/mesa/drivers/common/meta.c
index 87532c1df..3636ee83b 100644
--- a/mesalib/src/mesa/drivers/common/meta.c
+++ b/mesalib/src/mesa/drivers/common/meta.c
@@ -243,6 +243,7 @@ _mesa_meta_compile_and_link_program(struct gl_context *ctx,
 void
 _mesa_meta_setup_blit_shader(struct gl_context *ctx,
                              GLenum target,
+                             bool do_depth,
                              struct blit_shader_table *table)
 {
    char *vs_source, *fs_source;
@@ -292,10 +293,11 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
                 "void main()\n"
                 "{\n"
                 "   gl_FragColor = %s(texSampler, %s);\n"
-                "   gl_FragDepth = gl_FragColor.x;\n"
+                "%s"
                 "}\n",
                 fs_preprocess, shader->type, fs_input,
-                shader->func, shader->texcoords);
+                shader->func, shader->texcoords,
+                do_depth ?  "   gl_FragDepth = gl_FragColor.x;\n" : "");
 
    _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
                                        ralloc_asprintf(mem_ctx, "%s blit",
@@ -825,15 +827,18 @@ _mesa_meta_end(struct gl_context *ctx)
    const GLbitfield state = save->SavedState;
    int i;
 
-   /* After starting a new occlusion query, initialize the results to the
-    * values saved previously. The driver will then continue to increment
-    * these values.
-    */
+   /* Grab the result of the old occlusion query before starting it again. The
+    * old result is added to the result of the new query so the driver will
+    * continue adding where it left off. */
    if (state & MESA_META_OCCLUSION_QUERY) {
       if (save->CurrentOcclusionObject) {
-         _mesa_BeginQuery(save->CurrentOcclusionObject->Target,
-                          save->CurrentOcclusionObject->Id);
-         ctx->Query.CurrentOcclusionObject->Result = save->CurrentOcclusionObject->Result;
+         struct gl_query_object *q = save->CurrentOcclusionObject;
+         GLuint64EXT result;
+         if (!q->Ready)
+            ctx->Driver.WaitQuery(ctx, q);
+         result = q->Result;
+         _mesa_BeginQuery(q->Target, q->Id);
+         ctx->Query.CurrentOcclusionObject->Result += result;
       }
    }
 
@@ -1213,16 +1218,6 @@ _mesa_meta_end(struct gl_context *ctx)
 
 
 /**
- * Determine whether Mesa is currently in a meta state.
- */
-GLboolean
-_mesa_meta_in_progress(struct gl_context *ctx)
-{
-   return ctx->Meta->SaveStackDepth != 0;
-}
-
-
-/**
  * Convert Z from a normalized value in the range [0, 1] to an object-space
  * Z coordinate in [-1, +1] so that drawing at the new Z position with the
  * default/identity ortho projection results in the original Z value.
@@ -2801,7 +2796,8 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims,
     * are too strict for CopyTexImage.  We know meta will be fine with format
     * changes.
     */
-   mask = _mesa_meta_BlitFramebuffer(ctx, x, y,
+   mask = _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                     x, y,
                                      x + width, y + height,
                                      xoffset, yoffset,
                                      xoffset + width, yoffset + height,
@@ -3045,7 +3041,7 @@ decompress_texture_image(struct gl_context *ctx,
       _mesa_meta_setup_vertex_objects(&decompress->VAO, &decompress->VBO, true,
                                       2, 4, 0);
 
-      _mesa_meta_setup_blit_shader(ctx, target, &decompress->shaders);
+      _mesa_meta_setup_blit_shader(ctx, target, false, &decompress->shaders);
    } else {
       _mesa_meta_setup_ff_tnl_for_blit(&decompress->VAO, &decompress->VBO, 3);
    }
@@ -3177,7 +3173,7 @@ _mesa_meta_GetTexImage(struct gl_context *ctx,
 {
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       GLuint slice;
-      bool result;
+      bool result = true;
 
       for (slice = 0; slice < texImage->Depth; slice++) {
          void *dst;
@@ -3208,7 +3204,7 @@ _mesa_meta_GetTexImage(struct gl_context *ctx,
          return;
    }
 
-   _mesa_get_teximage(ctx, format, type, pixels, texImage);
+   _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
 }
 
 
diff --git a/mesalib/src/mesa/drivers/common/meta.h b/mesalib/src/mesa/drivers/common/meta.h
index 6ecf3c005..e7d894df1 100644
--- a/mesalib/src/mesa/drivers/common/meta.h
+++ b/mesalib/src/mesa/drivers/common/meta.h
@@ -298,7 +298,8 @@ struct blit_state
 {
    GLuint VAO;
    GLuint VBO;
-   struct blit_shader_table shaders;
+   struct blit_shader_table shaders_with_depth;
+   struct blit_shader_table shaders_without_depth;
    GLuint msaa_shaders[BLIT_MSAA_SHADER_COUNT];
    struct temp_texture depthTex;
    bool no_ctsi_fallback;
@@ -446,8 +447,11 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state);
 extern void
 _mesa_meta_end(struct gl_context *ctx);
 
-extern GLboolean
-_mesa_meta_in_progress(struct gl_context *ctx);
+static inline bool
+_mesa_meta_in_progress(struct gl_context *ctx)
+{
+   return ctx->Meta->SaveStackDepth != 0;
+}
 
 extern void
 _mesa_meta_fb_tex_blit_begin(const struct gl_context *ctx,
@@ -471,12 +475,16 @@ _mesa_meta_setup_sampler(struct gl_context *ctx,
 
 extern GLbitfield
 _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
+                           const struct gl_framebuffer *readFb,
+                           const struct gl_framebuffer *drawFb,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                            GLbitfield mask, GLenum filter);
 
 extern void
 _mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
+                                      struct gl_framebuffer *readFb,
+                                      struct gl_framebuffer *drawFb,
                                       GLint srcX0, GLint srcY0,
                                       GLint srcX1, GLint srcY1,
                                       GLint dstX0, GLint dstY0,
@@ -519,6 +527,23 @@ extern void
 _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
                           struct gl_texture_object *texObj);
 
+extern bool
+_mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
+                           struct gl_texture_image *tex_image,
+                           int xoffset, int yoffset, int zoffset,
+                           int width, int height, int depth,
+                           GLenum format, GLenum type, const void *pixels,
+                           bool allocate_storage, bool create_pbo,
+                           const struct gl_pixelstore_attrib *packing);
+
+extern bool
+_mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
+                              struct gl_texture_image *tex_image,
+                              int xoffset, int yoffset, int zoffset,
+                              int width, int height, int depth,
+                              GLenum format, GLenum type, const void *pixels,
+                              const struct gl_pixelstore_attrib *packing);
+
 extern void
 _mesa_meta_CopyTexSubImage(struct gl_context *ctx, GLuint dims,
                            struct gl_texture_image *texImage,
@@ -612,6 +637,7 @@ _mesa_meta_setup_copypix_texture(struct gl_context *ctx,
 void
 _mesa_meta_setup_blit_shader(struct gl_context *ctx,
                              GLenum target,
+                             bool do_depth,
                              struct blit_shader_table *table);
 
 void
diff --git a/mesalib/src/mesa/drivers/common/meta_blit.c b/mesalib/src/mesa/drivers/common/meta_blit.c
index 01cb532fe..3406be1ed 100644
--- a/mesalib/src/mesa/drivers/common/meta_blit.c
+++ b/mesalib/src/mesa/drivers/common/meta_blit.c
@@ -232,6 +232,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
 static void
 setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                             struct blit_state *blit,
+                            const struct gl_framebuffer *drawFb,
                             struct gl_renderbuffer *src_rb,
                             GLenum target)
 {
@@ -267,7 +268,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
    /* Update the assert if we plan to support more than 16X MSAA. */
    assert(shader_offset >= 0 && shader_offset <= 4);
 
-   if (ctx->DrawBuffer->Visual.samples > 1) {
+   if (drawFb->Visual.samples > 1) {
       /* If you're calling meta_BlitFramebuffer with the destination
        * multisampled, this is the only path that will work -- swrast and
        * CopyTexImage won't work on it either.
@@ -508,9 +509,11 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
 static void
 setup_glsl_blit_framebuffer(struct gl_context *ctx,
                             struct blit_state *blit,
+                            const struct gl_framebuffer *drawFb,
                             struct gl_renderbuffer *src_rb,
                             GLenum target, GLenum filter,
-                            bool is_scaled_blit)
+                            bool is_scaled_blit,
+                            bool do_depth)
 {
    unsigned texcoord_size;
    bool is_target_multisample = target == GL_TEXTURE_2D_MULTISAMPLE ||
@@ -529,9 +532,11 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx,
    if (is_target_multisample && is_filter_scaled_resolve && is_scaled_blit) {
       setup_glsl_msaa_blit_scaled_shader(ctx, blit, src_rb, target, filter);
    } else if (is_target_multisample) {
-      setup_glsl_msaa_blit_shader(ctx, blit, src_rb, target);
+      setup_glsl_msaa_blit_shader(ctx, blit, drawFb, src_rb, target);
    } else {
-      _mesa_meta_setup_blit_shader(ctx, target, &blit->shaders);
+      _mesa_meta_setup_blit_shader(ctx, target, do_depth,
+                                   do_depth ? &blit->shaders_with_depth
+                                            : &blit->shaders_without_depth);
    }
 }
 
@@ -543,12 +548,13 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx,
  */
 static bool
 blitframebuffer_texture(struct gl_context *ctx,
+                        const struct gl_framebuffer *readFb,
+                        const struct gl_framebuffer *drawFb,
                         GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                         GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                         GLenum filter, GLint flipX, GLint flipY,
                         GLboolean glsl_version, GLboolean do_depth)
 {
-   const struct gl_framebuffer *readFb = ctx->ReadBuffer;
    int att_index = do_depth ? BUFFER_DEPTH : readFb->_ColorReadBufferIndex;
    const struct gl_renderbuffer_attachment *readAtt =
       &readFb->Attachment[att_index];
@@ -642,7 +648,8 @@ blitframebuffer_texture(struct gl_context *ctx,
    scaled_blit = dstW != srcW || dstH != srcH;
 
    if (glsl_version) {
-      setup_glsl_blit_framebuffer(ctx, blit, rb, target, filter, scaled_blit);
+      setup_glsl_blit_framebuffer(ctx, blit, drawFb, rb, target, filter, scaled_blit,
+                                  do_depth);
    }
    else {
       _mesa_meta_setup_ff_tnl_for_blit(&ctx->Meta->Blit.VAO,
@@ -677,7 +684,7 @@ blitframebuffer_texture(struct gl_context *ctx,
     */
    if (ctx->Extensions.EXT_texture_sRGB_decode) {
       if (_mesa_get_format_color_encoding(rb->Format) == GL_SRGB &&
-          ctx->DrawBuffer->Visual.sRGBCapable) {
+          drawFb->Visual.sRGBCapable) {
          _mesa_SamplerParameteri(fb_tex_blit.sampler,
                                  GL_TEXTURE_SRGB_DECODE_EXT, GL_DECODE_EXT);
          _mesa_set_framebuffer_srgb(ctx, GL_TRUE);
@@ -701,7 +708,7 @@ blitframebuffer_texture(struct gl_context *ctx,
 
       if (target == GL_TEXTURE_2D) {
          const struct gl_texture_image *texImage
-            = _mesa_select_tex_image(ctx, texObj, target, srcLevel);
+            = _mesa_select_tex_image(texObj, target, srcLevel);
          s0 = srcX0 / (float) texImage->Width;
          s1 = srcX1 / (float) texImage->Width;
          t0 = srcY0 / (float) texImage->Height;
@@ -869,6 +876,8 @@ _mesa_meta_setup_sampler(struct gl_context *ctx,
  */
 GLbitfield
 _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
+                           const struct gl_framebuffer *readFb,
+                           const struct gl_framebuffer *drawFb,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                            GLbitfield mask, GLenum filter)
@@ -890,7 +899,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
                                       ctx->Extensions.ARB_fragment_shader;
 
    /* Multisample texture blit support requires texture multisample. */
-   if (ctx->ReadBuffer->Visual.samples > 0 &&
+   if (readFb->Visual.samples > 0 &&
        !ctx->Extensions.ARB_texture_multisample) {
       return mask;
    }
@@ -898,7 +907,8 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
    /* Clip a copy of the blit coordinates. If these differ from the input
     * coordinates, then we'll set the scissor.
     */
-   if (!_mesa_clip_blit(ctx, &clip.srcX0, &clip.srcY0, &clip.srcX1, &clip.srcY1,
+   if (!_mesa_clip_blit(ctx, readFb, drawFb,
+                        &clip.srcX0, &clip.srcY0, &clip.srcX1, &clip.srcY1,
                         &clip.dstX0, &clip.dstY0, &clip.dstX1, &clip.dstY1)) {
       /* clipped/scissored everything away */
       return 0;
@@ -926,7 +936,8 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
 
    /* Try faster, direct texture approach first */
    if (mask & GL_COLOR_BUFFER_BIT) {
-      if (blitframebuffer_texture(ctx, srcX0, srcY0, srcX1, srcY1,
+      if (blitframebuffer_texture(ctx, readFb, drawFb,
+                                  srcX0, srcY0, srcX1, srcY1,
                                   dstX0, dstY0, dstX1, dstY1,
                                   filter, dstFlipX, dstFlipY,
                                   use_glsl_version, false)) {
@@ -935,7 +946,8 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
    }
 
    if (mask & GL_DEPTH_BUFFER_BIT && use_glsl_version) {
-      if (blitframebuffer_texture(ctx, srcX0, srcY0, srcX1, srcY1,
+      if (blitframebuffer_texture(ctx, readFb, drawFb,
+                                  srcX0, srcY0, srcX1, srcY1,
                                   dstX0, dstY0, dstX1, dstY1,
                                   filter, dstFlipX, dstFlipY,
                                   use_glsl_version, true)) {
@@ -962,7 +974,8 @@ _mesa_meta_glsl_blit_cleanup(struct blit_state *blit)
       blit->VBO = 0;
    }
 
-   _mesa_meta_blit_shader_table_cleanup(&blit->shaders);
+   _mesa_meta_blit_shader_table_cleanup(&blit->shaders_with_depth);
+   _mesa_meta_blit_shader_table_cleanup(&blit->shaders_without_depth);
 
    _mesa_DeleteTextures(1, &blit->depthTex.TexObj);
    blit->depthTex.TexObj = 0;
@@ -970,20 +983,22 @@ _mesa_meta_glsl_blit_cleanup(struct blit_state *blit)
 
 void
 _mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
+                                      struct gl_framebuffer *readFb,
+                                      struct gl_framebuffer *drawFb,
                                       GLint srcX0, GLint srcY0,
                                       GLint srcX1, GLint srcY1,
                                       GLint dstX0, GLint dstY0,
                                       GLint dstX1, GLint dstY1,
                                       GLbitfield mask, GLenum filter)
 {
-   mask = _mesa_meta_BlitFramebuffer(ctx,
+   mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb,
                                      srcX0, srcY0, srcX1, srcY1,
                                      dstX0, dstY0, dstX1, dstY1,
                                      mask, filter);
    if (mask == 0x0)
       return;
 
-   _swrast_BlitFramebuffer(ctx,
+   _swrast_BlitFramebuffer(ctx, readFb, drawFb,
                            srcX0, srcY0, srcX1, srcY1,
                            dstX0, dstY0, dstX1, dstY1,
                            mask, filter);
diff --git a/mesalib/src/mesa/drivers/common/meta_copy_image.c b/mesalib/src/mesa/drivers/common/meta_copy_image.c
index fc0cbaf1b..1729766f7 100644
--- a/mesalib/src/mesa/drivers/common/meta_copy_image.c
+++ b/mesalib/src/mesa/drivers/common/meta_copy_image.c
@@ -189,7 +189,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
     * We have already created views to ensure that the texture formats
     * match.
     */
-   ctx->Driver.BlitFramebuffer(ctx, src_x, src_y,
+   ctx->Driver.BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                               src_x, src_y,
                                src_x + src_width, src_y + src_height,
                                dst_x, dst_y,
                                dst_x + src_width, dst_y + src_height,
diff --git a/mesalib/src/mesa/drivers/common/meta_generate_mipmap.c b/mesalib/src/mesa/drivers/common/meta_generate_mipmap.c
index 8ffd8da3b..c1b6d3c1f 100644
--- a/mesalib/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/mesalib/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -71,7 +71,7 @@ fallback_required(struct gl_context *ctx, GLenum target,
    }
 
    srcLevel = texObj->BaseLevel;
-   baseImage = _mesa_select_tex_image(ctx, texObj, target, srcLevel);
+   baseImage = _mesa_select_tex_image(texObj, target, srcLevel);
    if (!baseImage) {
       _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
                        "glGenerateMipmap() couldn't find base teximage\n");
@@ -193,7 +193,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
    if (use_glsl_version) {
       _mesa_meta_setup_vertex_objects(&mipmap->VAO, &mipmap->VBO, true,
                                       2, 4, 0);
-      _mesa_meta_setup_blit_shader(ctx, target, &mipmap->shaders);
+      _mesa_meta_setup_blit_shader(ctx, target, false, &mipmap->shaders);
    } else {
       _mesa_meta_setup_ff_tnl_for_blit(&mipmap->VAO, &mipmap->VBO, 3);
       _mesa_set_enable(ctx, target, GL_TRUE);
@@ -265,7 +265,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
       GLsizei srcWidth, srcHeight, srcDepth;
       GLsizei dstWidth, dstHeight, dstDepth;
 
-      srcImage = _mesa_select_tex_image(ctx, texObj, faceTarget, srcLevel);
+      srcImage = _mesa_select_tex_image(texObj, faceTarget, srcLevel);
       assert(srcImage->Border == 0);
 
       /* src size */
@@ -304,7 +304,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
           */
          break;
       }
-      dstImage = _mesa_select_tex_image(ctx, texObj, faceTarget, dstLevel);
+      dstImage = _mesa_select_tex_image(texObj, faceTarget, dstLevel);
 
       /* limit minification to src level */
       _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
diff --git a/mesalib/src/mesa/drivers/common/meta_tex_subimage.c b/mesalib/src/mesa/drivers/common/meta_tex_subimage.c
new file mode 100644
index 000000000..68c8273fe
--- /dev/null
+++ b/mesalib/src/mesa/drivers/common/meta_tex_subimage.c
@@ -0,0 +1,361 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2015 Intel Corporation.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jason Ekstrand <jason.ekstrand@intel.com>
+ */
+
+#include "bufferobj.h"
+#include "buffers.h"
+#include "fbobject.h"
+#include "glformats.h"
+#include "glheader.h"
+#include "image.h"
+#include "macros.h"
+#include "meta.h"
+#include "pbo.h"
+#include "shaderapi.h"
+#include "state.h"
+#include "teximage.h"
+#include "texobj.h"
+#include "texstate.h"
+#include "uniforms.h"
+#include "varray.h"
+
+static struct gl_texture_image *
+create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
+                       GLenum pbo_target, int width, int height, int depth,
+                       GLenum format, GLenum type, const void *pixels,
+                       const struct gl_pixelstore_attrib *packing,
+                       GLuint *tmp_pbo, GLuint *tmp_tex)
+{
+   uint32_t pbo_format;
+   GLenum internal_format;
+   unsigned row_stride;
+   struct gl_buffer_object *buffer_obj;
+   struct gl_texture_object *tex_obj;
+   struct gl_texture_image *tex_image;
+   bool read_only;
+
+   if ((packing->ImageHeight != 0 && packing->ImageHeight != height) ||
+       packing->SwapBytes ||
+       packing->LsbFirst ||
+       packing->Invert)
+      return NULL;
+
+   pbo_format = _mesa_format_from_format_and_type(format, type);
+   if (_mesa_format_is_mesa_array_format(pbo_format))
+      pbo_format = _mesa_format_from_array_format(pbo_format);
+
+   if (!pbo_format || !ctx->TextureFormatSupported[pbo_format])
+      return NULL;
+
+   /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
+   pixels = _mesa_image_address3d(packing, pixels,
+                                  width, height, format, type, 0, 0, 0);
+   row_stride = _mesa_image_row_stride(packing, width, format, type);
+
+   if (_mesa_is_bufferobj(packing->BufferObj)) {
+      *tmp_pbo = 0;
+      buffer_obj = packing->BufferObj;
+   } else {
+      assert(create_pbo);
+
+      _mesa_GenBuffers(1, tmp_pbo);
+
+      /* We are not doing this inside meta_begin/end.  However, we know the
+       * client doesn't have the given target bound, so we can go ahead and
+       * squash it.  We'll set it back when we're done.
+       */
+      _mesa_BindBuffer(pbo_target, *tmp_pbo);
+
+      _mesa_BufferData(pbo_target, row_stride * height, pixels, GL_STREAM_DRAW);
+
+      buffer_obj = ctx->Unpack.BufferObj;
+      pixels = NULL;
+
+      _mesa_BindBuffer(pbo_target, 0);
+   }
+
+   _mesa_GenTextures(1, tmp_tex);
+   tex_obj = _mesa_lookup_texture(ctx, *tmp_tex);
+   tex_obj->Target = depth > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
+   tex_obj->Immutable = GL_TRUE;
+   _mesa_initialize_texture_object(ctx, tex_obj, *tmp_tex, GL_TEXTURE_2D);
+
+   internal_format = _mesa_get_format_base_format(pbo_format);
+
+   tex_image = _mesa_get_tex_image(ctx, tex_obj, tex_obj->Target, 0);
+   _mesa_init_teximage_fields(ctx, tex_image, width, height, depth,
+                              0, internal_format, pbo_format);
+
+   read_only = pbo_target == GL_PIXEL_UNPACK_BUFFER;
+   if (!ctx->Driver.SetTextureStorageForBufferObject(ctx, tex_obj,
+                                                     buffer_obj,
+                                                     (intptr_t)pixels,
+                                                     row_stride,
+                                                     read_only)) {
+      _mesa_DeleteTextures(1, tmp_tex);
+      _mesa_DeleteBuffers(1, tmp_pbo);
+      return NULL;
+   }
+
+   return tex_image;
+}
+
+bool
+_mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
+                           struct gl_texture_image *tex_image,
+                           int xoffset, int yoffset, int zoffset,
+                           int width, int height, int depth,
+                           GLenum format, GLenum type, const void *pixels,
+                           bool allocate_storage, bool create_pbo,
+                           const struct gl_pixelstore_attrib *packing)
+{
+   GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
+   struct gl_texture_image *pbo_tex_image;
+   GLenum status;
+   bool success = false;
+   int z;
+
+   /* XXX: This should probably be passed in from somewhere */
+   const char *where = "_mesa_meta_pbo_TexSubImage";
+
+   if (!_mesa_is_bufferobj(packing->BufferObj) && !create_pbo)
+      return false;
+
+   if (format == GL_DEPTH_COMPONENT ||
+       format == GL_DEPTH_STENCIL ||
+       format == GL_STENCIL_INDEX ||
+       format == GL_COLOR_INDEX)
+      return false;
+
+   if (ctx->_ImageTransferState)
+      return false;
+
+   if (!_mesa_validate_pbo_access(dims, packing, width, height, depth,
+                                  format, type, INT_MAX, pixels)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(out of bounds PBO access)", where);
+      return true;
+   }
+
+   if (_mesa_check_disallowed_mapping(packing->BufferObj)) {
+      /* buffer is mapped - that's an error */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where);
+      return true;
+   }
+
+   pbo_tex_image = create_texture_for_pbo(ctx, create_pbo,
+                                          GL_PIXEL_UNPACK_BUFFER,
+                                          width, height, depth,
+                                          format, type, pixels, packing,
+                                          &pbo, &pbo_tex);
+   if (!pbo_tex_image)
+      return false;
+
+   if (allocate_storage)
+      ctx->Driver.AllocTextureImageBuffer(ctx, tex_image);
+
+   /* Only stash the current FBO */
+   _mesa_meta_begin(ctx, 0);
+
+   _mesa_GenFramebuffers(2, fbos);
+   _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
+   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);
+
+   if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
+      assert(depth == 1);
+      depth = height;
+      height = 1;
+   }
+
+   _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                             pbo_tex_image, 0);
+   /* If this passes on the first layer it should pass on the others */
+   status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER);
+   if (status != GL_FRAMEBUFFER_COMPLETE)
+      goto fail;
+
+   _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                             tex_image, zoffset);
+   /* If this passes on the first layer it should pass on the others */
+   status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
+   if (status != GL_FRAMEBUFFER_COMPLETE)
+      goto fail;
+
+   _mesa_update_state(ctx);
+
+   if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                  0, 0, width, height,
+                                  xoffset, yoffset,
+                                  xoffset + width, yoffset + height,
+                                  GL_COLOR_BUFFER_BIT, GL_NEAREST))
+      goto fail;
+
+   for (z = 1; z < depth; z++) {
+      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                pbo_tex_image, z);
+      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                tex_image, zoffset + z);
+
+      _mesa_update_state(ctx);
+
+      _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                 0, 0, width, height,
+                                 xoffset, yoffset,
+                                 xoffset + width, yoffset + height,
+                                 GL_COLOR_BUFFER_BIT, GL_NEAREST);
+   }
+
+   success = true;
+
+fail:
+   _mesa_DeleteFramebuffers(2, fbos);
+   _mesa_DeleteTextures(1, &pbo_tex);
+   _mesa_DeleteBuffers(1, &pbo);
+
+   _mesa_meta_end(ctx);
+
+   return success;
+}
+
+bool
+_mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
+                              struct gl_texture_image *tex_image,
+                              int xoffset, int yoffset, int zoffset,
+                              int width, int height, int depth,
+                              GLenum format, GLenum type, const void *pixels,
+                              const struct gl_pixelstore_attrib *packing)
+{
+   GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
+   struct gl_texture_image *pbo_tex_image;
+   GLenum status;
+   bool success = false;
+   int z;
+
+   /* XXX: This should probably be passed in from somewhere */
+   const char *where = "_mesa_meta_pbo_GetTexSubImage";
+
+   if (!_mesa_is_bufferobj(packing->BufferObj))
+      return false;
+
+   if (format == GL_DEPTH_COMPONENT ||
+       format == GL_DEPTH_STENCIL ||
+       format == GL_STENCIL_INDEX ||
+       format == GL_COLOR_INDEX)
+      return false;
+
+   if (ctx->_ImageTransferState)
+      return false;
+
+   if (!_mesa_validate_pbo_access(dims, packing, width, height, depth,
+                                  format, type, INT_MAX, pixels)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(out of bounds PBO access)", where);
+      return true;
+   }
+
+   if (_mesa_check_disallowed_mapping(packing->BufferObj)) {
+      /* buffer is mapped - that's an error */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(PBO is mapped)", where);
+      return true;
+   }
+
+   pbo_tex_image = create_texture_for_pbo(ctx, false, GL_PIXEL_PACK_BUFFER,
+                                          width, height, depth,
+                                          format, type, pixels, packing,
+                                          &pbo, &pbo_tex);
+   if (!pbo_tex_image)
+      return false;
+
+   /* Only stash the current FBO */
+   _mesa_meta_begin(ctx, 0);
+
+   _mesa_GenFramebuffers(2, fbos);
+
+   if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
+      assert(depth == 1);
+      depth = height;
+      height = 1;
+   }
+
+   /* If we were given a texture, bind it to the read framebuffer.  If not,
+    * we're doing a ReadPixels and we should just use whatever framebuffer
+    * the client has bound.
+    */
+   if (tex_image) {
+      _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
+      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                tex_image, zoffset);
+      /* If this passes on the first layer it should pass on the others */
+      status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER);
+      if (status != GL_FRAMEBUFFER_COMPLETE)
+         goto fail;
+   } else {
+      assert(depth == 1);
+   }
+
+   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);
+   _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                             pbo_tex_image, 0);
+   /* If this passes on the first layer it should pass on the others */
+   status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
+   if (status != GL_FRAMEBUFFER_COMPLETE)
+      goto fail;
+
+   _mesa_update_state(ctx);
+
+   if (_mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                  xoffset, yoffset,
+                                  xoffset + width, yoffset + height,
+                                  0, 0, width, height,
+                                  GL_COLOR_BUFFER_BIT, GL_NEAREST))
+      goto fail;
+
+   for (z = 1; z < depth; z++) {
+      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                tex_image, zoffset + z);
+      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                pbo_tex_image, z);
+
+      _mesa_update_state(ctx);
+
+      _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                 xoffset, yoffset,
+                                 xoffset + width, yoffset + height,
+                                 0, 0, width, height,
+                                 GL_COLOR_BUFFER_BIT, GL_NEAREST);
+   }
+
+   success = true;
+
+fail:
+   _mesa_DeleteFramebuffers(2, fbos);
+   _mesa_DeleteTextures(1, &pbo_tex);
+   _mesa_DeleteBuffers(1, &pbo);
+
+   _mesa_meta_end(ctx);
+
+   return success;
+}
diff --git a/mesalib/src/mesa/drivers/dri/common/Makefile.am b/mesalib/src/mesa/drivers/dri/common/Makefile.am
index af6f742a0..da8f97a98 100644
--- a/mesalib/src/mesa/drivers/dri/common/Makefile.am
+++ b/mesalib/src/mesa/drivers/dri/common/Makefile.am
@@ -30,6 +30,8 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/ \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gallium/auxiliary \
 	$(DEFINES) \
 	$(EXPAT_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
diff --git a/mesalib/src/mesa/drivers/dri/common/drirc b/mesalib/src/mesa/drivers/dri/common/drirc
index 4b9841bd2..cecd6a953 100644
--- a/mesalib/src/mesa/drivers/dri/common/drirc
+++ b/mesalib/src/mesa/drivers/dri/common/drirc
@@ -87,5 +87,9 @@ TODO: document the other workarounds.
         <application name="Topogun (64-bit)" executable="topogun64">
             <option name="always_have_depth_buffer" value="true" />
         </application>
+
+        <application name="Dead Island" executable="DeadIslandGame">
+            <option name="allow_glsl_extension_directive_midshader" value="true" />
+        </application>
     </device>
 </driconf>
diff --git a/mesalib/src/mesa/drivers/dri/swrast/Makefile.am b/mesalib/src/mesa/drivers/dri/swrast/Makefile.am
index 0837b4518..bfc3c10e3 100644
--- a/mesalib/src/mesa/drivers/dri/swrast/Makefile.am
+++ b/mesalib/src/mesa/drivers/dri/swrast/Makefile.am
@@ -29,6 +29,8 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/ \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_builddir)/src/mesa/drivers/dri/common \
 	$(DEFINES) \
diff --git a/mesalib/src/mesa/drivers/dri/swrast/swrast.c b/mesalib/src/mesa/drivers/dri/swrast/swrast.c
index d62fed30c..8005f7d69 100644
--- a/mesalib/src/mesa/drivers/dri/swrast/swrast.c
+++ b/mesalib/src/mesa/drivers/dri/swrast/swrast.c
@@ -54,6 +54,7 @@
 
 #include "main/teximage.h"
 #include "main/texformat.h"
+#include "main/texobj.h"
 #include "main/texstate.h"
 
 #include "swrast_priv.h"
diff --git a/mesalib/src/mesa/main/.gitignore b/mesalib/src/mesa/main/.gitignore
index fec06291a..e65472d63 100644
--- a/mesalib/src/mesa/main/.gitignore
+++ b/mesalib/src/mesa/main/.gitignore
@@ -9,3 +9,5 @@ remap_helper.h
 get_hash.h
 get_hash.h.tmp
 format_info.c
+format_pack.c
+format_unpack.c
diff --git a/mesalib/src/mesa/main/api_validate.c b/mesalib/src/mesa/main/api_validate.c
index 7d9893385..9c2e29e64 100644
--- a/mesalib/src/mesa/main/api_validate.c
+++ b/mesalib/src/mesa/main/api_validate.c
@@ -36,25 +36,6 @@
 
 
 /**
- * \return  number of bytes in array [count] of type.
- */
-static GLsizei
-index_bytes(GLenum type, GLsizei count)
-{
-   if (type == GL_UNSIGNED_INT) {
-      return count * sizeof(GLuint);
-   }
-   else if (type == GL_UNSIGNED_BYTE) {
-      return count * sizeof(GLubyte);
-   }
-   else {
-      ASSERT(type == GL_UNSIGNED_SHORT);
-      return count * sizeof(GLushort);
-   }
-}
-
-
-/**
  * Check if OK to draw arrays/elements.
  */
 static bool
@@ -67,9 +48,7 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
    switch (ctx->API) {
    case API_OPENGLES2:
       /* For ES2, we can draw if we have a vertex program/shader). */
-      if (!ctx->VertexProgram._Current)
-	 return false;
-      break;
+      return ctx->VertexProgram._Current != NULL;
 
    case API_OPENGLES:
       /* For OpenGL ES, only draw if we have vertex positions
@@ -89,14 +68,21 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
          _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no VAO bound)", function);
          return false;
       }
-      /* fallthrough */
-   case API_OPENGL_COMPAT: {
-      const struct gl_shader_program *const vsProg =
-         ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-      const bool haveVertexShader = (vsProg && vsProg->LinkStatus);
-      const bool haveVertexProgram = ctx->VertexProgram._Enabled;
-
-      if (haveVertexShader || haveVertexProgram) {
+
+      /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec
+       * says:
+       *
+       *     "If there is no active program for the vertex or fragment shader
+       *     stages, the results of vertex and/or fragment processing will be
+       *     undefined. However, this is not an error."
+       *
+       * The fragment shader is not tested here because other state (e.g.,
+       * GL_RASTERIZER_DISCARD) affects whether or not we actually care.
+       */
+      return ctx->VertexProgram._Current != NULL;
+
+   case API_OPENGL_COMPAT:
+      if (ctx->VertexProgram._Current != NULL) {
          /* Draw regardless of whether or not we have any vertex arrays.
           * (Ex: could draw a point using a constant vertex pos)
           */
@@ -109,7 +95,6 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
                  ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled);
       }
       break;
-   }
 
    default:
       unreachable("Invalid API value in check_valid_to_render()");
@@ -128,27 +113,21 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
 bool
 _mesa_is_valid_prim_mode(struct gl_context *ctx, GLenum mode)
 {
-   switch (mode) {
-   case GL_POINTS:
-   case GL_LINES:
-   case GL_LINE_LOOP:
-   case GL_LINE_STRIP:
-   case GL_TRIANGLES:
-   case GL_TRIANGLE_STRIP:
-   case GL_TRIANGLE_FAN:
+   /* The overwhelmingly common case is (mode <= GL_TRIANGLE_FAN).  Test that
+    * first and exit.  You would think that a switch-statement would be the
+    * right approach, but at least GCC 4.7.2 generates some pretty dire code
+    * for the common case.
+    */
+   if (likely(mode <= GL_TRIANGLE_FAN))
       return true;
-   case GL_QUADS:
-   case GL_QUAD_STRIP:
-   case GL_POLYGON:
+
+   if (mode <= GL_POLYGON)
       return (ctx->API == API_OPENGL_COMPAT);
-   case GL_LINES_ADJACENCY:
-   case GL_LINE_STRIP_ADJACENCY:
-   case GL_TRIANGLES_ADJACENCY:
-   case GL_TRIANGLE_STRIP_ADJACENCY:
+
+   if (mode <= GL_TRIANGLE_STRIP_ADJACENCY)
       return _mesa_has_geometry_shaders(ctx);
-   default:
-      return false;
-   }
+
+   return false;
 }
 
 
@@ -351,20 +330,10 @@ validate_DrawElements_common(struct gl_context *ctx,
    if (!check_valid_to_render(ctx, caller))
       return false;
 
-   /* Vertex buffer object tests */
-   if (_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
-      /* use indices in the buffer object */
-      /* make sure count doesn't go outside buffer bounds */
-      if (index_bytes(type, count) > ctx->Array.VAO->IndexBufferObj->Size) {
-         _mesa_warning(ctx, "%s index out of buffer bounds", caller);
-         return false;
-      }
-   }
-   else {
-      /* not using a VBO */
-      if (!indices)
-         return false;
-   }
+   /* Not using a VBO for indices, so avoid NULL pointer derefs later.
+    */
+   if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj) && indices == NULL)
+      return false;
 
    if (count == 0)
       return false;
@@ -422,21 +391,9 @@ _mesa_validate_MultiDrawElements(struct gl_context *ctx,
    if (!check_valid_to_render(ctx, "glMultiDrawElements"))
       return GL_FALSE;
 
-   /* Vertex buffer object tests */
-   if (_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
-      /* use indices in the buffer object */
-      /* make sure count doesn't go outside buffer bounds */
-      for (i = 0; i < primcount; i++) {
-         if (index_bytes(type, count[i]) >
-             ctx->Array.VAO->IndexBufferObj->Size) {
-            _mesa_warning(ctx,
-                          "glMultiDrawElements index out of buffer bounds");
-            return GL_FALSE;
-         }
-      }
-   }
-   else {
-      /* not using a VBO */
+   /* Not using a VBO for indices, so avoid NULL pointer derefs later.
+    */
+   if (!_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
       for (i = 0; i < primcount; i++) {
          if (!indices[i])
             return GL_FALSE;
diff --git a/mesalib/src/mesa/main/attrib.c b/mesalib/src/mesa/main/attrib.c
index 4684615a8..07934b9bc 100644
--- a/mesalib/src/mesa/main/attrib.c
+++ b/mesalib/src/mesa/main/attrib.c
@@ -1248,8 +1248,10 @@ _mesa_PopAttrib(void)
                _mesa_FrontFace(polygon->FrontFace);
                _mesa_PolygonMode(GL_FRONT, polygon->FrontMode);
                _mesa_PolygonMode(GL_BACK, polygon->BackMode);
-               _mesa_PolygonOffset(polygon->OffsetFactor,
-                                   polygon->OffsetUnits);
+               _mesa_polygon_offset_clamp(ctx,
+                                          polygon->OffsetFactor,
+                                          polygon->OffsetUnits,
+                                          polygon->OffsetClamp);
                _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, polygon->SmoothFlag);
                _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, polygon->StippleFlag);
                _mesa_set_enable(ctx, GL_CULL_FACE, polygon->CullFlag);
diff --git a/mesalib/src/mesa/main/blit.c b/mesalib/src/mesa/main/blit.c
index 0b70a3da4..b97b56479 100644
--- a/mesalib/src/mesa/main/blit.c
+++ b/mesalib/src/mesa/main/blit.c
@@ -506,7 +506,7 @@ _mesa_BlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
    }
 
    ASSERT(ctx->Driver.BlitFramebuffer);
-   ctx->Driver.BlitFramebuffer(ctx,
+   ctx->Driver.BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
                                srcX0, srcY0, srcX1, srcY1,
                                dstX0, dstY0, dstX1, dstY1,
                                mask, filter);
diff --git a/mesalib/src/mesa/main/bufferobj.c b/mesalib/src/mesa/main/bufferobj.c
index 2bae1bc72..b372c68f2 100644
--- a/mesalib/src/mesa/main/bufferobj.c
+++ b/mesalib/src/mesa/main/bufferobj.c
@@ -117,6 +117,11 @@ get_buffer_target(struct gl_context *ctx, GLenum target)
          return &ctx->AtomicBuffer;
       }
       break;
+   case GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD:
+      if (ctx->Extensions.AMD_pinned_memory) {
+         return &ctx->ExternalVirtualMemoryBuffer;
+      }
+      break;
    default:
       return NULL;
    }
@@ -1226,7 +1231,7 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
             }
          }
 
-         if (ctx->UniformBuffer == bufObj) {
+         if (ctx->AtomicBuffer == bufObj) {
             _mesa_BindBuffer( GL_ATOMIC_COUNTER_BUFFER, 0 );
          }
 
@@ -1242,6 +1247,10 @@ _mesa_DeleteBuffers(GLsizei n, const GLuint *ids)
             _mesa_BindBuffer( GL_TEXTURE_BUFFER, 0 );
          }
 
+         if (ctx->ExternalVirtualMemoryBuffer == bufObj) {
+            _mesa_BindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
+         }
+
          /* The ID is immediately freed for re-use */
          _mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
          /* Make sure we do not run into the classic ABA problem on bind.
@@ -1381,7 +1390,16 @@ _mesa_BufferStorage(GLenum target, GLsizeiptr size, const GLvoid *data,
    ASSERT(ctx->Driver.BufferData);
    if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
                                flags, bufObj)) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferStorage()");
+      if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+         /* Even though the interaction between AMD_pinned_memory and
+          * glBufferStorage is not described in the spec, Graham Sellers
+          * said that it should behave the same as glBufferData.
+          */
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferStorage()");
+      }
+      else {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferStorage()");
+      }
    }
 }
 
@@ -1465,7 +1483,18 @@ _mesa_BufferData(GLenum target, GLsizeiptrARB size,
                                GL_MAP_WRITE_BIT |
                                GL_DYNAMIC_STORAGE_BIT,
                                bufObj)) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferDataARB()");
+      if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+         /* From GL_AMD_pinned_memory:
+          *
+          *   INVALID_OPERATION is generated by BufferData if <target> is
+          *   EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, and the store cannot be
+          *   mapped to the GPU address space.
+          */
+         _mesa_error(ctx, GL_INVALID_OPERATION, "glBufferData()");
+      }
+      else {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBufferData()");
+      }
    }
 }
 
@@ -2887,7 +2916,7 @@ static void
 unbind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count)
 {
    struct gl_buffer_object *bufObj = ctx->Shared->NullBufferObj;
-   GLuint i;
+   GLint i;
 
    for (i = 0; i < count; i++)
       set_ubo_binding(ctx, &ctx->UniformBufferBindings[first + i],
@@ -2898,7 +2927,7 @@ static void
 bind_uniform_buffers_base(struct gl_context *ctx, GLuint first, GLsizei count,
                           const GLuint *buffers)
 {
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_uniform_buffers(ctx, first, count, "glBindBuffersBase"))
       return;
@@ -2965,7 +2994,7 @@ bind_uniform_buffers_range(struct gl_context *ctx, GLuint first, GLsizei count,
                            const GLuint *buffers,
                            const GLintptr *offsets, const GLsizeiptr *sizes)
 {
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_uniform_buffers(ctx, first, count,
                                          "glBindBuffersRange"))
@@ -3122,7 +3151,7 @@ unbind_xfb_buffers(struct gl_context *ctx,
                    GLuint first, GLsizei count)
 {
    struct gl_buffer_object * const bufObj = ctx->Shared->NullBufferObj;
-   GLuint i;
+   GLint i;
 
    for (i = 0; i < count; i++)
       _mesa_set_transform_feedback_binding(ctx, tfObj, first + i,
@@ -3136,7 +3165,7 @@ bind_xfb_buffers_base(struct gl_context *ctx,
 {
    struct gl_transform_feedback_object *tfObj =
       ctx->TransformFeedback.CurrentObject;
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_xfb_buffers(ctx, tfObj, first, count,
                                      "glBindBuffersBase"))
@@ -3204,7 +3233,7 @@ bind_xfb_buffers_range(struct gl_context *ctx,
 {
    struct gl_transform_feedback_object *tfObj =
        ctx->TransformFeedback.CurrentObject;
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_xfb_buffers(ctx, tfObj, first, count,
                                      "glBindBuffersRange"))
@@ -3342,7 +3371,7 @@ static void
 unbind_atomic_buffers(struct gl_context *ctx, GLuint first, GLsizei count)
 {
    struct gl_buffer_object * const bufObj = ctx->Shared->NullBufferObj;
-   GLuint i;
+   GLint i;
 
    for (i = 0; i < count; i++)
       set_atomic_buffer_binding(ctx, &ctx->AtomicBufferBindings[first + i],
@@ -3355,7 +3384,7 @@ bind_atomic_buffers_base(struct gl_context *ctx,
                          GLsizei count,
                          const GLuint *buffers)
 {
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_atomic_buffers(ctx, first, count,
                                         "glBindBuffersBase"))
@@ -3422,7 +3451,7 @@ bind_atomic_buffers_range(struct gl_context *ctx,
                           const GLintptr *offsets,
                           const GLsizeiptr *sizes)
 {
-   GLuint i;
+   GLint i;
 
    if (!error_check_bind_atomic_buffers(ctx, first, count,
                                         "glBindBuffersRange"))
diff --git a/mesalib/src/mesa/main/buffers.c b/mesalib/src/mesa/main/buffers.c
index 1ee20098d..e5076e9bb 100644
--- a/mesalib/src/mesa/main/buffers.c
+++ b/mesalib/src/mesa/main/buffers.c
@@ -301,7 +301,7 @@ _mesa_DrawBuffer(GLenum buffer)
 void GLAPIENTRY
 _mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
 {
-   GLint output;
+   GLuint output;
    GLbitfield usedBufferMask, supportedMask;
    GLbitfield destMask[MAX_DRAW_BUFFERS];
    GET_CURRENT_CONTEXT(ctx);
@@ -326,8 +326,9 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
    /* From the ES 3.0 specification, page 180:
     * "If the GL is bound to the default framebuffer, then n must be 1
     *  and the constant must be BACK or NONE."
+    * (same restriction applies with GL_EXT_draw_buffers specification)
     */
-   if (_mesa_is_gles3(ctx) && _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
+   if (ctx->API == API_OPENGLES2 && _mesa_is_winsys_fbo(ctx->DrawBuffer) &&
        (n != 1 || (buffers[0] != GL_NONE && buffers[0] != GL_BACK))) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)");
       return;
@@ -335,6 +336,20 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
 
    /* complicated error checking... */
    for (output = 0; output < n; output++) {
+      /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL 3.0
+       * specification says:
+       *
+       *     "Each buffer listed in bufs must be BACK, NONE, or one of the values
+       *      from table 4.3 (NONE, COLOR_ATTACHMENTi)"
+       */
+      if (_mesa_is_gles3(ctx) && buffers[output] != GL_NONE &&
+          buffers[output] != GL_BACK &&
+          (buffers[output] < GL_COLOR_ATTACHMENT0 ||
+           buffers[output] >= GL_COLOR_ATTACHMENT0 + ctx->Const.MaxColorAttachments)) {
+         _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffers(buffer)");
+         return;
+      }
+
       if (buffers[output] == GL_NONE) {
          destMask[output] = 0x0;
       }
@@ -399,8 +414,9 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
          /* ES 3.0 is even more restrictive.  From the ES 3.0 spec, page 180:
           * "If the GL is bound to a framebuffer object, the ith buffer listed
           *  in bufs must be COLOR_ATTACHMENTi or NONE. [...] INVALID_OPERATION."
+          * (same restriction applies with GL_EXT_draw_buffers specification)
           */
-         if (_mesa_is_gles3(ctx) && _mesa_is_user_fbo(ctx->DrawBuffer) &&
+         if (ctx->API == API_OPENGLES2 && _mesa_is_user_fbo(ctx->DrawBuffer) &&
              buffers[output] != GL_NONE &&
              buffers[output] != GL_COLOR_ATTACHMENT0 + output) {
             _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawBuffers(buffer)");
diff --git a/mesalib/src/mesa/main/clear.c b/mesalib/src/mesa/main/clear.c
index 4671ee245..3c4ced8ed 100644
--- a/mesalib/src/mesa/main/clear.c
+++ b/mesalib/src/mesa/main/clear.c
@@ -225,7 +225,7 @@ _mesa_Clear( GLbitfield mask )
 
 
 /** Returned by make_color_buffer_mask() for errors */
-#define INVALID_MASK ~0x0
+#define INVALID_MASK ~0x0U
 
 
 /**
diff --git a/mesalib/src/mesa/main/colormac.h b/mesalib/src/mesa/main/colormac.h
index c8adca6b6..bc69f4673 100644
--- a/mesalib/src/mesa/main/colormac.h
+++ b/mesalib/src/mesa/main/colormac.h
@@ -69,9 +69,6 @@ _mesa_unclamped_float_rgba_to_ubyte(GLubyte dst[4], const GLfloat src[4])
 #define PACK_COLOR_565( X, Y, Z )                                  \
    ((((X) & 0xf8) << 8) | (((Y) & 0xfc) << 3) | (((Z) & 0xf8) >> 3))
 
-#define PACK_COLOR_565_REV( X, Y, Z ) \
-   (((X) & 0xf8) | ((Y) & 0xe0) >> 5 | (((Y) & 0x1c) << 11) | (((Z) & 0xf8) << 5))
-
 #define PACK_COLOR_5551( R, G, B, A )					\
    ((((R) & 0xf8) << 8) | (((G) & 0xf8) << 3) | (((B) & 0xf8) >> 2) |	\
     ((A) >> 7))
diff --git a/mesalib/src/mesa/main/config.h b/mesalib/src/mesa/main/config.h
index 4ec4b7502..5a66a4eec 100644
--- a/mesalib/src/mesa/main/config.h
+++ b/mesalib/src/mesa/main/config.h
@@ -178,7 +178,7 @@
 #define MAX_COMBINED_ATOMIC_BUFFERS    (MAX_UNIFORM_BUFFERS * 6)
 /* Size of an atomic counter in bytes according to ARB_shader_atomic_counters */
 #define ATOMIC_COUNTER_SIZE            4
-#define MAX_IMAGE_UNIFORMS             16
+#define MAX_IMAGE_UNIFORMS             32
 /* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
 #define MAX_IMAGE_UNITS                (MAX_IMAGE_UNIFORMS * 6)
 /*@}*/
@@ -300,6 +300,9 @@
 #define MAX_COMPUTE_IMAGE_UNIFORMS          8
 /*@}*/
 
+/** For GL_ARB_pipeline_statistics_query */
+#define MAX_PIPELINE_STATISTICS             11
+
 /*
  * Color channel component order
  * 
diff --git a/mesalib/src/mesa/main/context.c b/mesalib/src/mesa/main/context.c
index 400c158a7..b186a1fad 100644
--- a/mesalib/src/mesa/main/context.c
+++ b/mesalib/src/mesa/main/context.c
@@ -118,7 +118,7 @@
 #include "scissor.h"
 #include "shared.h"
 #include "shaderobj.h"
-#include "simple_list.h"
+#include "util/simple_list.h"
 #include "state.h"
 #include "stencil.h"
 #include "texcompress_s3tc.h"
@@ -908,6 +908,9 @@ nop_glFlush(void)
 #endif
 
 
+extern void (*__glapi_noop_table[])(void);
+
+
 /**
  * Allocate and initialize a new dispatch table.  All the dispatch
  * function pointers will point at the _mesa_generic_nop() function
@@ -929,7 +932,13 @@ _mesa_alloc_dispatch_table(void)
       _glapi_proc *entry = (_glapi_proc *) table;
       GLint i;
       for (i = 0; i < numEntries; i++) {
+#if defined(_WIN32)
+         /* FIXME: This will not generate an error, but at least it won't
+          * corrupt the stack like _mesa_generic_nop does. */
+         entry[i] = __glapi_noop_table[i];
+#else
          entry[i] = (_glapi_proc) _mesa_generic_nop;
+#endif
       }
 
 #if defined(_WIN32)
@@ -1271,7 +1280,6 @@ _mesa_free_context_data( struct gl_context *ctx )
 
    _mesa_free_attrib_data(ctx);
    _mesa_free_buffer_objects(ctx);
-   _mesa_free_lighting_data( ctx );
    _mesa_free_eval_data( ctx );
    _mesa_free_texture_data( ctx );
    _mesa_free_matrix_data( ctx );
@@ -1903,49 +1911,69 @@ shader_linked_or_absent(struct gl_context *ctx,
 GLboolean
 _mesa_valid_to_render(struct gl_context *ctx, const char *where)
 {
-   bool from_glsl_shader[MESA_SHADER_COMPUTE] = { false };
    unsigned i;
 
    /* This depends on having up to date derived state (shaders) */
    if (ctx->NewState)
       _mesa_update_state(ctx);
 
-   for (i = 0; i < MESA_SHADER_COMPUTE; i++) {
-      if (!shader_linked_or_absent(ctx, ctx->_Shader->CurrentProgram[i],
-                                   &from_glsl_shader[i], where))
-         return GL_FALSE;
-   }
+   if (ctx->API == API_OPENGL_CORE || ctx->API == API_OPENGLES2) {
+      bool from_glsl_shader[MESA_SHADER_COMPUTE] = { false };
 
-   /* Any shader stages that are not supplied by the GLSL shader and have
-    * assembly shaders enabled must now be validated.
-    */
-   if (!from_glsl_shader[MESA_SHADER_VERTEX]
-       && ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-		  "%s(vertex program not valid)", where);
-      return GL_FALSE;
-   }
+      for (i = 0; i < MESA_SHADER_COMPUTE; i++) {
+         if (!shader_linked_or_absent(ctx, ctx->_Shader->CurrentProgram[i],
+                                      &from_glsl_shader[i], where))
+            return GL_FALSE;
+      }
 
-   /* FINISHME: If GL_NV_geometry_program4 is ever supported, the current
-    * FINISHME: geometry program should validated here.
-    */
-   (void) from_glsl_shader[MESA_SHADER_GEOMETRY];
+      /* In OpenGL Core Profile and OpenGL ES 2.0 / 3.0, there are no assembly
+       * shaders.  Don't check state related to those.
+       */
+   } else {
+      bool has_vertex_shader = false;
+      bool has_fragment_shader = false;
+
+      /* In OpenGL Compatibility Profile, there is only vertex shader and
+       * fragment shader.  We take this path also for API_OPENGLES because
+       * optimizing that path would make the other (more common) paths
+       * slightly slower.
+       */
+      if (!shader_linked_or_absent(ctx,
+                                   ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX],
+                                   &has_vertex_shader, where))
+         return GL_FALSE;
 
-   if (!from_glsl_shader[MESA_SHADER_FRAGMENT]) {
-      if (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled) {
-	 _mesa_error(ctx, GL_INVALID_OPERATION,
-		     "%s(fragment program not valid)", where);
-	 return GL_FALSE;
-      }
+      if (!shader_linked_or_absent(ctx,
+                                   ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT],
+                                   &has_fragment_shader, where))
+         return GL_FALSE;
 
-      /* If drawing to integer-valued color buffers, there must be an
-       * active fragment shader (GL_EXT_texture_integer).
+      /* Any shader stages that are not supplied by the GLSL shader and have
+       * assembly shaders enabled must now be validated.
        */
-      if (ctx->DrawBuffer && ctx->DrawBuffer->_IntegerColor) {
+      if (!has_vertex_shader
+          && ctx->VertexProgram.Enabled && !ctx->VertexProgram._Enabled) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s(integer format but no fragment shader)", where);
+                     "%s(vertex program not valid)", where);
          return GL_FALSE;
       }
+
+      if (!has_fragment_shader) {
+         if (ctx->FragmentProgram.Enabled && !ctx->FragmentProgram._Enabled) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "%s(fragment program not valid)", where);
+            return GL_FALSE;
+         }
+
+         /* If drawing to integer-valued color buffers, there must be an
+          * active fragment shader (GL_EXT_texture_integer).
+          */
+         if (ctx->DrawBuffer && ctx->DrawBuffer->_IntegerColor) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "%s(integer format but no fragment shader)", where);
+            return GL_FALSE;
+         }
+      }
    }
 
    /* A pipeline object is bound */
diff --git a/mesalib/src/mesa/main/context.h b/mesalib/src/mesa/main/context.h
index d902ea76e..d5650877e 100644
--- a/mesalib/src/mesa/main/context.h
+++ b/mesalib/src/mesa/main/context.h
@@ -326,6 +326,17 @@ _mesa_has_geometry_shaders(const struct gl_context *ctx)
 }
 
 
+/**
+ * Checks if the context supports compute shaders.
+ */
+static inline bool
+_mesa_has_compute_shaders(const struct gl_context *ctx)
+{
+   return (ctx->API == API_OPENGL_CORE && ctx->Extensions.ARB_compute_shader) ||
+      (ctx->API == API_OPENGLES2 && ctx->Version >= 31);
+}
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/mesalib/src/mesa/main/copyimage.c b/mesalib/src/mesa/main/copyimage.c
index df7d7c272..455929dc2 100644
--- a/mesalib/src/mesa/main/copyimage.c
+++ b/mesalib/src/mesa/main/copyimage.c
@@ -152,7 +152,7 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum *target, int level,
          return false;
       }
 
-      *tex_image = _mesa_select_tex_image(ctx, *tex_obj, *target, level);
+      *tex_image = _mesa_select_tex_image(*tex_obj, *target, level);
       if (!*tex_image) {
          _mesa_error(ctx, GL_INVALID_VALUE,
                      "glCopyImageSubData(%sLevel = %u)", dbg_prefix, level);
diff --git a/mesalib/src/mesa/main/dd.h b/mesalib/src/mesa/main/dd.h
index 2f40915d9..ec8662b30 100644
--- a/mesalib/src/mesa/main/dd.h
+++ b/mesalib/src/mesa/main/dd.h
@@ -415,6 +415,22 @@ struct dd_function_table {
                             struct gl_texture_object *texObj,
                             struct gl_texture_object *origTexObj);
 
+   /** Sets the given buffer object as the texture's storage.  The given
+    * texture must have target GL_TEXTURE_1D, GL_TEXTURE_2D,
+    * GL_TEXTURE_RECTANGLE, and GL_TEXTURE_2D_ARRAY; have only a single
+    * mipmap level; be immutable; and must not have any assigned storage.
+    * The format and dimensions of the gl_texture_object will already be
+    * initialized.
+    *
+    * This function is used by the meta PBO texture upload path.
+    */
+   bool (*SetTextureStorageForBufferObject)(struct gl_context *ctx,
+                                            struct gl_texture_object *texObj,
+                                            struct gl_buffer_object *bufferObj,
+                                            uint32_t buffer_offset,
+                                            uint32_t row_stride,
+                                            bool read_only);
+
    /**
     * Map a renderbuffer into user space.
     * \param mode  bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT and
@@ -563,7 +579,7 @@ struct dd_function_table {
    /** Select a polygon rasterization mode */
    void (*PolygonMode)(struct gl_context *ctx, GLenum face, GLenum mode);
    /** Set the scale and units used to calculate depth values */
-   void (*PolygonOffset)(struct gl_context *ctx, GLfloat factor, GLfloat units);
+   void (*PolygonOffset)(struct gl_context *ctx, GLfloat factor, GLfloat units, GLfloat clamp);
    /** Set the polygon stippling pattern */
    void (*PolygonStipple)(struct gl_context *ctx, const GLubyte *mask );
    /* Specifies the current buffer for reading */
@@ -697,6 +713,8 @@ struct dd_function_table {
                                struct gl_framebuffer *fb);
    /*@}*/
    void (*BlitFramebuffer)(struct gl_context *ctx,
+                           struct gl_framebuffer *readFb,
+                           struct gl_framebuffer *drawFb,
                            GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                            GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                            GLbitfield mask, GLenum filter);
diff --git a/mesalib/src/mesa/main/dlist.c b/mesalib/src/mesa/main/dlist.c
index d297f5120..025f6abd2 100644
--- a/mesalib/src/mesa/main/dlist.c
+++ b/mesalib/src/mesa/main/dlist.c
@@ -484,9 +484,13 @@ typedef enum
    /* ARB_uniform_buffer_object */
    OPCODE_UNIFORM_BLOCK_BINDING,
 
+   /* EXT_polygon_offset_clamp */
+   OPCODE_POLYGON_OFFSET_CLAMP,
+
    /* The following three are meta instructions */
    OPCODE_ERROR,                /* raise compiled-in error */
    OPCODE_CONTINUE,
+   OPCODE_NOP,                  /* No-op (used for 8-byte alignment */
    OPCODE_END_OF_LIST,
    OPCODE_EXT_0
 } OpCode;
@@ -545,13 +549,13 @@ union pointer
  * Save a 4 or 8-byte pointer at dest (and dest+1).
  */
 static inline void
-save_pointer(union gl_dlist_node *dest, void *src)
+save_pointer(Node *dest, void *src)
 {
    union pointer p;
    unsigned i;
 
    STATIC_ASSERT(POINTER_DWORDS == 1 || POINTER_DWORDS == 2);
-   STATIC_ASSERT(sizeof(union gl_dlist_node) == 4);
+   STATIC_ASSERT(sizeof(Node) == 4);
 
    p.ptr = src;
 
@@ -564,7 +568,7 @@ save_pointer(union gl_dlist_node *dest, void *src)
  * Retrieve a 4 or 8-byte pointer from node (node+1).
  */
 static inline void *
-get_pointer(const union gl_dlist_node *node)
+get_pointer(const Node *node)
 {
    union pointer p;
    unsigned i;
@@ -578,7 +582,7 @@ get_pointer(const union gl_dlist_node *node)
 
 /**
  * Used to store a 64-bit uint in a pair of "Nodes" for the sake of 32-bit
- * environment.  In 64-bit env, sizeof(Node)==8 anyway.
+ * environment.
  */
 union uint64_pair
 {
@@ -957,11 +961,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       /* no PBO */
       GLvoid *image;
 
-      if (type == GL_BITMAP)
-         image = _mesa_unpack_bitmap(width, height, pixels, unpack);
-      else
-         image = _mesa_unpack_image(dimensions, width, height, depth,
-                                    format, type, pixels, unpack);
+      image = _mesa_unpack_image(dimensions, width, height, depth,
+                                 format, type, pixels, unpack);
       if (pixels && !image) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "display list construction");
       }
@@ -983,11 +984,8 @@ unpack_image(struct gl_context *ctx, GLuint dimensions,
       }
 
       src = ADD_POINTERS(map, pixels);
-      if (type == GL_BITMAP)
-         image = _mesa_unpack_bitmap(width, height, src, unpack);
-      else
-         image = _mesa_unpack_image(dimensions, width, height, depth,
-                                    format, type, src, unpack);
+      image = _mesa_unpack_image(dimensions, width, height, depth,
+                                 format, type, src, unpack);
 
       ctx->Driver.UnmapBuffer(ctx, unpack->BufferObj, MAP_INTERNAL);
 
@@ -1018,16 +1016,19 @@ memdup(const void *src, GLsizei bytes)
  * Allocate space for a display list instruction (opcode + payload space).
  * \param opcode  the instruction opcode (OPCODE_* value)
  * \param bytes   instruction payload size (not counting opcode)
- * \return pointer to allocated memory (the opcode space)
+ * \param align8  does the payload need to be 8-byte aligned?
+ *                This is only relevant in 64-bit environments.
+ * \return pointer to allocated memory (the payload will be at pointer+1)
  */
 static Node *
-dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
+dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes, bool align8)
 {
    const GLuint numNodes = 1 + (bytes + sizeof(Node) - 1) / sizeof(Node);
    const GLuint contNodes = 1 + POINTER_DWORDS;  /* size of continue info */
+   GLuint nopNode;
    Node *n;
 
-   if (opcode < (GLuint) OPCODE_EXT_0) {
+   if (opcode < OPCODE_EXT_0) {
       if (InstSize[opcode] == 0) {
          /* save instruction size now */
          InstSize[opcode] = numNodes;
@@ -1038,7 +1039,20 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
       }
    }
 
-   if (ctx->ListState.CurrentPos + numNodes + contNodes > BLOCK_SIZE) {
+   if (sizeof(void *) > sizeof(Node) && align8
+       && ctx->ListState.CurrentPos % 2 == 0) {
+      /* The opcode would get placed at node[0] and the payload would start
+       * at node[1].  But the payload needs to be at an even offset (8-byte
+       * multiple).
+       */
+      nopNode = 1;
+   }
+   else {
+      nopNode = 0;
+   }
+
+   if (ctx->ListState.CurrentPos + nopNode + numNodes + contNodes
+       > BLOCK_SIZE) {
       /* This block is full.  Allocate a new block and chain to it */
       Node *newblock;
       n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
@@ -1048,13 +1062,34 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
          _mesa_error(ctx, GL_OUT_OF_MEMORY, "Building display list");
          return NULL;
       }
+
+      /* a fresh block should be 8-byte aligned on 64-bit systems */
+      assert(((GLintptr) newblock) % sizeof(void *) == 0);
+
       save_pointer(&n[1], newblock);
       ctx->ListState.CurrentBlock = newblock;
       ctx->ListState.CurrentPos = 0;
+
+      /* Display list nodes are always 4 bytes.  If we need 8-byte alignment
+       * we have to insert a NOP so that the payload of the real opcode lands
+       * on an even location:
+       *   node[0] = OPCODE_NOP
+       *   node[1] = OPCODE_x;
+       *   node[2] = start of payload
+       */
+      nopNode = sizeof(void *) > sizeof(Node) && align8;
    }
 
    n = ctx->ListState.CurrentBlock + ctx->ListState.CurrentPos;
-   ctx->ListState.CurrentPos += numNodes;
+   if (nopNode) {
+      assert(ctx->ListState.CurrentPos % 2 == 0); /* even value */
+      n[0].opcode = OPCODE_NOP;
+      n++;
+      /* The "real" opcode will now be at an odd location and the payload
+       * will be at an even location.
+       */
+   }
+   ctx->ListState.CurrentPos += nopNode + numNodes;
 
    n[0].opcode = opcode;
 
@@ -1075,7 +1110,22 @@ dlist_alloc(struct gl_context *ctx, OpCode opcode, GLuint bytes)
 void *
 _mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint bytes)
 {
-   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes);
+   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes, false);
+   if (n)
+      return n + 1;  /* return pointer to payload area, after opcode */
+   else
+      return NULL;
+}
+
+
+/**
+ * Same as _mesa_dlist_alloc(), but return a pointer which is 8-byte
+ * aligned in 64-bit environments, 4-byte aligned otherwise.
+ */
+void *
+_mesa_dlist_alloc_aligned(struct gl_context *ctx, GLuint opcode, GLuint bytes)
+{
+   Node *n = dlist_alloc(ctx, (OpCode) opcode, bytes, true);
    if (n)
       return n + 1;  /* return pointer to payload area, after opcode */
    else
@@ -1125,7 +1175,7 @@ _mesa_dlist_alloc_opcode(struct gl_context *ctx,
 static inline Node *
 alloc_instruction(struct gl_context *ctx, OpCode opcode, GLuint nparams)
 {
-   return dlist_alloc(ctx, opcode, nparams * sizeof(Node));
+   return dlist_alloc(ctx, opcode, nparams * sizeof(Node), false);
 }
 
 
@@ -3144,6 +3194,22 @@ save_PolygonOffsetEXT(GLfloat factor, GLfloat bias)
    save_PolygonOffset(factor, ctx->DrawBuffer->_DepthMaxF * bias);
 }
 
+static void GLAPIENTRY
+save_PolygonOffsetClampEXT(GLfloat factor, GLfloat units, GLfloat clamp)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   Node *n;
+   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
+   n = alloc_instruction(ctx, OPCODE_POLYGON_OFFSET_CLAMP, 3);
+   if (n) {
+      n[1].f = factor;
+      n[2].f = units;
+      n[3].f = clamp;
+   }
+   if (ctx->ExecuteFlag) {
+      CALL_PolygonOffsetClampEXT(ctx->Exec, (factor, units, clamp));
+   }
+}
 
 static void GLAPIENTRY
 save_PopAttrib(void)
@@ -7985,17 +8051,8 @@ execute_list(struct gl_context *ctx, GLuint list)
             CALL_LoadIdentity(ctx->Exec, ());
             break;
          case OPCODE_LOAD_MATRIX:
-            if (sizeof(Node) == sizeof(GLfloat)) {
-               CALL_LoadMatrixf(ctx->Exec, (&n[1].f));
-            }
-            else {
-               GLfloat m[16];
-               GLuint i;
-               for (i = 0; i < 16; i++) {
-                  m[i] = n[1 + i].f;
-               }
-               CALL_LoadMatrixf(ctx->Exec, (m));
-            }
+            STATIC_ASSERT(sizeof(Node) == sizeof(GLfloat));
+            CALL_LoadMatrixf(ctx->Exec, (&n[1].f));
             break;
          case OPCODE_LOAD_NAME:
             CALL_LoadName(ctx->Exec, (n[1].ui));
@@ -8041,17 +8098,7 @@ execute_list(struct gl_context *ctx, GLuint list)
             CALL_MatrixMode(ctx->Exec, (n[1].e));
             break;
          case OPCODE_MULT_MATRIX:
-            if (sizeof(Node) == sizeof(GLfloat)) {
-               CALL_MultMatrixf(ctx->Exec, (&n[1].f));
-            }
-            else {
-               GLfloat m[16];
-               GLuint i;
-               for (i = 0; i < 16; i++) {
-                  m[i] = n[1 + i].f;
-               }
-               CALL_MultMatrixf(ctx->Exec, (m));
-            }
+            CALL_MultMatrixf(ctx->Exec, (&n[1].f));
             break;
          case OPCODE_ORTHO:
             CALL_Ortho(ctx->Exec,
@@ -8096,6 +8143,9 @@ execute_list(struct gl_context *ctx, GLuint list)
          case OPCODE_POLYGON_OFFSET:
             CALL_PolygonOffset(ctx->Exec, (n[1].f, n[2].f));
             break;
+         case OPCODE_POLYGON_OFFSET_CLAMP:
+            CALL_PolygonOffsetClampEXT(ctx->Exec, (n[1].f, n[2].f, n[3].f));
+            break;
          case OPCODE_POP_ATTRIB:
             CALL_PopAttrib(ctx->Exec, ());
             break;
@@ -8648,84 +8698,34 @@ execute_list(struct gl_context *ctx, GLuint list)
             CALL_BindFragmentShaderATI(ctx->Exec, (n[1].i));
             break;
          case OPCODE_SET_FRAGMENT_SHADER_CONSTANTS_ATI:
-            {
-               GLfloat values[4];
-               GLuint i, dst = n[1].ui;
-
-               for (i = 0; i < 4; i++)
-                  values[i] = n[1 + i].f;
-               CALL_SetFragmentShaderConstantATI(ctx->Exec, (dst, values));
-            }
+            CALL_SetFragmentShaderConstantATI(ctx->Exec, (n[1].ui, &n[2].f));
             break;
          case OPCODE_ATTR_1F_NV:
             CALL_VertexAttrib1fNV(ctx->Exec, (n[1].e, n[2].f));
             break;
          case OPCODE_ATTR_2F_NV:
-            /* Really shouldn't have to do this - the Node structure
-             * is convenient, but it would be better to store the data
-             * packed appropriately so that it can be sent directly
-             * on.  With x86_64 becoming common, this will start to
-             * matter more.
-             */
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib2fvNV(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib2fNV(ctx->Exec, (n[1].e, n[2].f, n[3].f));
+            CALL_VertexAttrib2fvNV(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_ATTR_3F_NV:
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib3fvNV(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib3fNV(ctx->Exec, (n[1].e, n[2].f, n[3].f,
-                                                 n[4].f));
+            CALL_VertexAttrib3fvNV(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_ATTR_4F_NV:
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib4fvNV(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib4fNV(ctx->Exec, (n[1].e, n[2].f, n[3].f,
-                                                 n[4].f, n[5].f));
+            CALL_VertexAttrib4fvNV(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_ATTR_1F_ARB:
             CALL_VertexAttrib1fARB(ctx->Exec, (n[1].e, n[2].f));
             break;
          case OPCODE_ATTR_2F_ARB:
-            /* Really shouldn't have to do this - the Node structure
-             * is convenient, but it would be better to store the data
-             * packed appropriately so that it can be sent directly
-             * on.  With x86_64 becoming common, this will start to
-             * matter more.
-             */
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib2fvARB(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib2fARB(ctx->Exec, (n[1].e, n[2].f, n[3].f));
+            CALL_VertexAttrib2fvARB(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_ATTR_3F_ARB:
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib3fvARB(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib3fARB(ctx->Exec, (n[1].e, n[2].f, n[3].f,
-                                                  n[4].f));
+            CALL_VertexAttrib3fvARB(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_ATTR_4F_ARB:
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_VertexAttrib4fvARB(ctx->Exec, (n[1].e, &n[2].f));
-            else
-               CALL_VertexAttrib4fARB(ctx->Exec, (n[1].e, n[2].f, n[3].f,
-                                                  n[4].f, n[5].f));
+            CALL_VertexAttrib4fvARB(ctx->Exec, (n[1].e, &n[2].f));
             break;
          case OPCODE_MATERIAL:
-            if (sizeof(Node) == sizeof(GLfloat))
-               CALL_Materialfv(ctx->Exec, (n[1].e, n[2].e, &n[3].f));
-            else {
-               GLfloat f[4];
-               f[0] = n[3].f;
-               f[1] = n[4].f;
-               f[2] = n[5].f;
-               f[3] = n[6].f;
-               CALL_Materialfv(ctx->Exec, (n[1].e, n[2].e, f));
-            }
+            CALL_Materialfv(ctx->Exec, (n[1].e, n[2].e, &n[3].f));
             break;
          case OPCODE_BEGIN:
             CALL_Begin(ctx->Exec, (n[1].e));
@@ -8903,6 +8903,9 @@ execute_list(struct gl_context *ctx, GLuint list)
          case OPCODE_CONTINUE:
             n = (Node *) get_pointer(&n[1]);
             break;
+         case OPCODE_NOP:
+            /* no-op */
+            break;
          case OPCODE_END_OF_LIST:
             done = GL_TRUE;
             break;
@@ -9702,6 +9705,9 @@ _mesa_initialize_save_table(const struct gl_context *ctx)
    SET_ProgramUniformMatrix4x2fv(table, save_ProgramUniformMatrix4x2fv);
    SET_ProgramUniformMatrix3x4fv(table, save_ProgramUniformMatrix3x4fv);
    SET_ProgramUniformMatrix4x3fv(table, save_ProgramUniformMatrix4x3fv);
+
+   /* GL_EXT_polygon_offset_clamp */
+   SET_PolygonOffsetClampEXT(table, save_PolygonOffsetClampEXT);
 }
 
 
@@ -9953,6 +9959,9 @@ print_list(struct gl_context *ctx, GLuint list, const char *fname)
             fprintf(f, "DISPLAY-LIST-CONTINUE\n");
             n = (Node *) get_pointer(&n[1]);
             break;
+         case OPCODE_NOP:
+            fprintf(f, "NOP\n");
+            break;
          case OPCODE_END_OF_LIST:
             fprintf(f, "END-LIST %u\n", list);
             done = GL_TRUE;
@@ -10103,6 +10112,8 @@ _mesa_init_display_list(struct gl_context *ctx)
    ctx->List.ListBase = 0;
 
    save_vtxfmt_init(&ctx->ListState.ListVtxfmt);
+
+   InstSize[OPCODE_NOP] = 1;
 }
 
 
diff --git a/mesalib/src/mesa/main/dlist.h b/mesalib/src/mesa/main/dlist.h
index c57eb74da..6189632d4 100644
--- a/mesalib/src/mesa/main/dlist.h
+++ b/mesalib/src/mesa/main/dlist.h
@@ -60,6 +60,9 @@ extern void _mesa_compile_error( struct gl_context *ctx, GLenum error, const cha
 
 extern void *_mesa_dlist_alloc(struct gl_context *ctx, GLuint opcode, GLuint sz);
 
+extern void *
+_mesa_dlist_alloc_aligned(struct gl_context *ctx, GLuint opcode, GLuint bytes);
+
 extern GLint _mesa_dlist_alloc_opcode( struct gl_context *ctx, GLuint sz,
                                        void (*execute)( struct gl_context *, void * ),
                                        void (*destroy)( struct gl_context *, void * ),
diff --git a/mesalib/src/mesa/main/enable.c b/mesalib/src/mesa/main/enable.c
index 417548a3c..11365ecc4 100644
--- a/mesalib/src/mesa/main/enable.c
+++ b/mesalib/src/mesa/main/enable.c
@@ -34,7 +34,7 @@
 #include "enable.h"
 #include "errors.h"
 #include "light.h"
-#include "simple_list.h"
+#include "util/simple_list.h"
 #include "mtypes.h"
 #include "enums.h"
 #include "api_arrayelt.h"
diff --git a/mesalib/src/mesa/main/errors.c b/mesalib/src/mesa/main/errors.c
index 4e7853b90..478e4ed33 100644
--- a/mesalib/src/mesa/main/errors.c
+++ b/mesalib/src/mesa/main/errors.c
@@ -134,7 +134,7 @@ static const GLenum debug_severity_enums[] = {
 static enum mesa_debug_source
 gl_enum_to_debug_source(GLenum e)
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < Elements(debug_source_enums); i++) {
       if (debug_source_enums[i] == e)
@@ -146,7 +146,7 @@ gl_enum_to_debug_source(GLenum e)
 static enum mesa_debug_type
 gl_enum_to_debug_type(GLenum e)
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < Elements(debug_type_enums); i++) {
       if (debug_type_enums[i] == e)
@@ -158,7 +158,7 @@ gl_enum_to_debug_type(GLenum e)
 static enum mesa_debug_severity
 gl_enum_to_debug_severity(GLenum e)
 {
-   int i;
+   unsigned i;
 
    for (i = 0; i < Elements(debug_severity_enums); i++) {
       if (debug_severity_enums[i] == e)
@@ -633,7 +633,7 @@ debug_fetch_message(const struct gl_debug_state *debug)
  * Delete the oldest debug messages out of the log.
  */
 static void
-debug_delete_messages(struct gl_debug_state *debug, unsigned count)
+debug_delete_messages(struct gl_debug_state *debug, int count)
 {
    struct gl_debug_log *log = &debug->Log;
 
diff --git a/mesalib/src/mesa/main/extensions.c b/mesalib/src/mesa/main/extensions.c
index 0df04c2e6..f21201538 100644
--- a/mesalib/src/mesa/main/extensions.c
+++ b/mesalib/src/mesa/main/extensions.c
@@ -104,6 +104,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_depth_clamp",                         o(ARB_depth_clamp),                         GL,             2003 },
    { "GL_ARB_depth_texture",                       o(ARB_depth_texture),                       GLL,            2001 },
    { "GL_ARB_derivative_control",                  o(ARB_derivative_control),                  GL,             2014 },
+   { "GL_ARB_direct_state_access",                 o(dummy_false),                             GL,             2014 },
    { "GL_ARB_draw_buffers",                        o(dummy_true),                              GL,             2002 },
    { "GL_ARB_draw_buffers_blend",                  o(ARB_draw_buffers_blend),                  GL,             2009 },
    { "GL_ARB_draw_elements_base_vertex",           o(ARB_draw_elements_base_vertex),           GL,             2009 },
@@ -120,6 +121,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_framebuffer_sRGB",                    o(EXT_framebuffer_sRGB),                    GL,             1998 },
    { "GL_ARB_get_program_binary",                  o(dummy_true),                              GL,             2010 },
    { "GL_ARB_gpu_shader5",                         o(ARB_gpu_shader5),                         GLC,            2010 },
+   { "GL_ARB_gpu_shader_fp64",                     o(ARB_gpu_shader_fp64),                     GLC,            2010 },
    { "GL_ARB_half_float_pixel",                    o(dummy_true),                              GL,             2003 },
    { "GL_ARB_half_float_vertex",                   o(ARB_half_float_vertex),                   GL,             2008 },
    { "GL_ARB_instanced_arrays",                    o(ARB_instanced_arrays),                    GL,             2008 },
@@ -133,6 +135,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_multitexture",                        o(dummy_true),                              GLL,            1998 },
    { "GL_ARB_occlusion_query2",                    o(ARB_occlusion_query2),                    GL,             2003 },
    { "GL_ARB_occlusion_query",                     o(ARB_occlusion_query),                     GLL,            2001 },
+   { "GL_ARB_pipeline_statistics_query",           o(ARB_pipeline_statistics_query),           GL,             2014 },
    { "GL_ARB_pixel_buffer_object",                 o(EXT_pixel_buffer_object),                 GL,             2004 },
    { "GL_ARB_point_parameters",                    o(EXT_point_parameters),                    GLL,            1997 },
    { "GL_ARB_point_sprite",                        o(ARB_point_sprite),                        GL,             2003 },
@@ -147,6 +150,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_shader_bit_encoding",                 o(ARB_shader_bit_encoding),                 GL,             2010 },
    { "GL_ARB_shader_image_load_store",             o(ARB_shader_image_load_store),             GL,             2011 },
    { "GL_ARB_shader_objects",                      o(dummy_true),                              GL,             2002 },
+   { "GL_ARB_shader_precision",                    o(ARB_shader_precision),                    GL,             2010 },
    { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
    { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },
    { "GL_ARB_shading_language_100",                o(dummy_true),                              GLL,            2003 },
@@ -156,6 +160,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_stencil_texturing",                   o(ARB_stencil_texturing),                   GL,             2012 },
    { "GL_ARB_sync",                                o(ARB_sync),                                GL,             2003 },
    { "GL_ARB_texture_barrier",                     o(NV_texture_barrier),                      GL,             2014 },
+   { "GL_ARB_tessellation_shader",                 o(ARB_tessellation_shader),                 GLC,            2009 },
    { "GL_ARB_texture_border_clamp",                o(ARB_texture_border_clamp),                GLL,            2000 },
    { "GL_ARB_texture_buffer_object",               o(ARB_texture_buffer_object),               GLC,            2008 },
    { "GL_ARB_texture_buffer_object_rgb32",         o(ARB_texture_buffer_object_rgb32),         GLC,            2009 },
@@ -212,6 +217,7 @@ static const struct extension extension_table[] = {
    { "GL_EXT_compiled_vertex_array",               o(dummy_true),                              GLL,            1996 },
    { "GL_EXT_copy_texture",                        o(dummy_true),                              GLL,            1995 },
    { "GL_EXT_depth_bounds_test",                   o(EXT_depth_bounds_test),                   GL,             2002 },
+   { "GL_EXT_draw_buffers",                        o(dummy_true),                                         ES2, 2012 },
    { "GL_EXT_draw_buffers2",                       o(EXT_draw_buffers2),                       GL,             2006 },
    { "GL_EXT_draw_instanced",                      o(ARB_draw_instanced),                      GL,             2006 },
    { "GL_EXT_draw_range_elements",                 o(dummy_true),                              GLL,            1997 },
@@ -231,6 +237,7 @@ static const struct extension extension_table[] = {
    { "GL_EXT_pixel_buffer_object",                 o(EXT_pixel_buffer_object),                 GL,             2004 },
    { "GL_EXT_point_parameters",                    o(EXT_point_parameters),                    GLL,            1997 },
    { "GL_EXT_polygon_offset",                      o(dummy_true),                              GLL,            1995 },
+   { "GL_EXT_polygon_offset_clamp",                o(EXT_polygon_offset_clamp),                GL,             2014 },
    { "GL_EXT_provoking_vertex",                    o(EXT_provoking_vertex),                    GL,             2009 },
    { "GL_EXT_rescale_normal",                      o(dummy_true),                              GLL,            1997 },
    { "GL_EXT_secondary_color",                     o(dummy_true),                              GLL,            1999 },
@@ -314,6 +321,10 @@ static const struct extension extension_table[] = {
    { "GL_OES_texture_3D",                          o(EXT_texture3D),                                      ES2, 2005 },
    { "GL_OES_texture_cube_map",                    o(ARB_texture_cube_map),                         ES1,       2007 },
    { "GL_OES_texture_env_crossbar",                o(ARB_texture_env_crossbar),                     ES1,       2005 },
+   { "GL_OES_texture_float",                       o(OES_texture_float),                                  ES2, 2005 },
+   { "GL_OES_texture_float_linear",                o(OES_texture_float_linear),                           ES2, 2005 },
+   { "GL_OES_texture_half_float",                  o(OES_texture_half_float),                             ES2, 2005 },
+   { "GL_OES_texture_half_float_linear",           o(OES_texture_half_float_linear),                      ES2, 2005 },
    { "GL_OES_texture_mirrored_repeat",             o(dummy_true),                                   ES1,       2005 },
    { "GL_OES_texture_npot",                        o(ARB_texture_non_power_of_two),                 ES1 | ES2, 2005 },
    { "GL_OES_vertex_array_object",                 o(dummy_true),                                   ES1 | ES2, 2010 },
@@ -327,6 +338,7 @@ static const struct extension extension_table[] = {
    { "GL_AMD_conservative_depth",                  o(ARB_conservative_depth),                  GL,             2009 },
    { "GL_AMD_draw_buffers_blend",                  o(ARB_draw_buffers_blend),                  GL,             2009 },
    { "GL_AMD_performance_monitor",                 o(AMD_performance_monitor),                 GL,             2007 },
+   { "GL_AMD_pinned_memory",                       o(AMD_pinned_memory),                       GL,             2013 },
    { "GL_AMD_seamless_cubemap_per_texture",        o(AMD_seamless_cubemap_per_texture),        GL,             2009 },
    { "GL_AMD_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
    { "GL_AMD_shader_trinary_minmax",               o(dummy_true),                              GL,             2012 },
@@ -503,7 +515,6 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
    ctx->Extensions.NV_point_sprite = GL_TRUE;
    ctx->Extensions.NV_texture_env_combine4 = GL_TRUE;
    ctx->Extensions.NV_texture_rectangle = GL_TRUE;
-   ctx->Extensions.NV_fragment_program_option = GL_TRUE;
    ctx->Extensions.EXT_gpu_program_parameters = GL_TRUE;
    ctx->Extensions.OES_standard_derivatives = GL_TRUE;
    ctx->Extensions.TDFX_texture_compression_FXT1 = GL_TRUE;
diff --git a/mesalib/src/mesa/main/fbobject.c b/mesalib/src/mesa/main/fbobject.c
index 4c3c157a4..305362297 100644
--- a/mesalib/src/mesa/main/fbobject.c
+++ b/mesalib/src/mesa/main/fbobject.c
@@ -468,6 +468,7 @@ set_renderbuffer_attachment(struct gl_context *ctx,
    remove_attachment(ctx, att);
    att->Type = GL_RENDERBUFFER_EXT;
    att->Texture = NULL; /* just to be safe */
+   att->Layered = GL_FALSE;
    att->Complete = GL_FALSE;
    _mesa_reference_renderbuffer(&att->Renderbuffer, rb);
 }
@@ -780,6 +781,18 @@ test_attachment_completeness(const struct gl_context *ctx, GLenum format,
             att->Complete = GL_FALSE;
             return;
          }
+
+         /* OES_texture_float allows creation and use of floating point
+          * textures with GL_FLOAT, GL_HALF_FLOAT but it does not allow
+          * these textures to be used as a render target, this is done via
+          * GL_EXT_color_buffer(_half)_float with set of new sized types.
+          */
+         if (_mesa_is_gles(ctx) && (texImage->TexObject->_IsFloat ||
+                                    texImage->TexObject->_IsHalfFloat)) {
+            att_incomplete("bad internal format");
+            att->Complete = GL_FALSE;
+            return;
+         }
       }
       else if (format == GL_DEPTH) {
          if (baseFormat == GL_DEPTH_COMPONENT) {
@@ -886,6 +899,8 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx,
    GLuint max_layer_count = 0, att_layer_count;
    bool is_layered = false;
    GLenum layer_tex_target = 0;
+   bool has_depth_attachment = false;
+   bool has_stencil_attachment = false;
 
    assert(_mesa_is_user_fbo(fb));
 
@@ -923,6 +938,8 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx,
             fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT;
             fbo_incomplete(ctx, "depth attachment incomplete", -1);
             return;
+         } else if (att->Type != GL_NONE) {
+            has_depth_attachment = true;
          }
       }
       else if (i == -1) {
@@ -932,6 +949,8 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx,
             fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT;
             fbo_incomplete(ctx, "stencil attachment incomplete", -1);
             return;
+         } else if (att->Type != GL_NONE) {
+            has_stencil_attachment = true;
          }
       }
       else {
@@ -960,7 +979,7 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx,
          if (!is_format_color_renderable(ctx, attFormat,
                                          texImg->InternalFormat) &&
              !is_legal_depth_format(ctx, f)) {
-            fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT;
+            fb->_Status = GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT;
             fbo_incomplete(ctx, "texture attachment incomplete", -1);
             return;
          }
@@ -1128,6 +1147,20 @@ _mesa_test_framebuffer_completeness(struct gl_context *ctx,
       }
    }
 
+   /* The OpenGL ES3 spec, in chapter 9.4. FRAMEBUFFER COMPLETENESS, says:
+    *
+    *    "Depth and stencil attachments, if present, are the same image."
+    *
+    * This restriction is not present in the OpenGL ES2 spec.
+    */
+   if (_mesa_is_gles3(ctx) &&
+       has_stencil_attachment && has_depth_attachment &&
+       !_mesa_has_depthstencil_combined(fb)) {
+      fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+      fbo_incomplete(ctx, "Depth and stencil attachments must be the same image", -1);
+      return;
+   }
+
    /* Provisionally set status = COMPLETE ... */
    fb->_Status = GL_FRAMEBUFFER_COMPLETE_EXT;
 
@@ -1291,6 +1324,11 @@ _mesa_DeleteRenderbuffers(GLsizei n, const GLuint *renderbuffers)
    GLint i;
    GET_CURRENT_CONTEXT(ctx);
 
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteRenderbuffers(n < 0)");
+      return;
+   }
+
    FLUSH_VERTICES(ctx, _NEW_BUFFERS);
 
    for (i = 0; i < n; i++) {
@@ -1430,9 +1468,6 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat)
    case GL_RGB8:
       return GL_RGB;
    case GL_RGB:
-      if (_mesa_is_gles3(ctx))
-         return GL_RGB;
-      /* fallthrough */
    case GL_R3_G3_B2:
    case GL_RGB4:
    case GL_RGB5:
@@ -1447,9 +1482,6 @@ _mesa_base_fbo_format(struct gl_context *ctx, GLenum internalFormat)
    case GL_RGBA8:
       return GL_RGBA;
    case GL_RGBA:
-      if (_mesa_is_gles3(ctx))
-         return GL_RGBA;
-      /* fallthrough */
    case GL_RGBA2:
    case GL_RGBA12:
    case GL_RGBA16:
@@ -2183,6 +2215,11 @@ _mesa_DeleteFramebuffers(GLsizei n, const GLuint *framebuffers)
    GLint i;
    GET_CURRENT_CONTEXT(ctx);
 
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteFramebuffers(n < 0)");
+      return;
+   }
+
    FLUSH_VERTICES(ctx, _NEW_BUFFERS);
 
    for (i = 0; i < n; i++) {
@@ -2323,7 +2360,7 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb,
 static void
 framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
                     GLenum attachment, GLenum textarget, GLuint texture,
-                    GLint level, GLint zoffset, GLboolean layered)
+                    GLint level, GLuint zoffset, GLboolean layered)
 {
    struct gl_renderbuffer_attachment *att;
    struct gl_texture_object *texObj = NULL;
@@ -2417,8 +2454,8 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
       }
 
       if (texObj->Target == GL_TEXTURE_3D) {
-         const GLint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1);
-         if (zoffset < 0 || zoffset >= maxSize) {
+         const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1);
+         if (zoffset >= maxSize) {
             _mesa_error(ctx, GL_INVALID_VALUE,
                         "glFramebufferTexture%s(zoffset)", caller);
             return;
@@ -2428,8 +2465,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
                (texObj->Target == GL_TEXTURE_2D_ARRAY_EXT) ||
                (texObj->Target == GL_TEXTURE_CUBE_MAP_ARRAY) ||
                (texObj->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)) {
-         if (zoffset < 0 ||
-             zoffset >= (GLint) ctx->Const.MaxArrayTextureLayers) {
+         if (zoffset >= ctx->Const.MaxArrayTextureLayers) {
             _mesa_error(ctx, GL_INVALID_VALUE,
                         "glFramebufferTexture%s(layer)", caller);
             return;
@@ -2766,7 +2802,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment,
 
       if (_mesa_is_gles3(ctx) && attachment != GL_BACK &&
           attachment != GL_DEPTH && attachment != GL_STENCIL) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
+         _mesa_error(ctx, GL_INVALID_ENUM,
                      "glGetFramebufferAttachmentParameteriv(attachment)");
          return;
       }
@@ -2869,7 +2905,8 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment,
          _mesa_error(ctx, err,
                      "glGetFramebufferAttachmentParameteriv(pname)");
       } else if (att->Type == GL_TEXTURE) {
-         if (att->Texture && att->Texture->Target == GL_TEXTURE_3D) {
+         if (att->Texture && (att->Texture->Target == GL_TEXTURE_3D ||
+             att->Texture->Target == GL_TEXTURE_2D_ARRAY)) {
             *params = att->Zoffset;
          }
          else {
@@ -2967,7 +3004,7 @@ _mesa_GetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment,
       }
       else if (att->Texture) {
          const struct gl_texture_image *texImage =
-            _mesa_select_tex_image(ctx, att->Texture, att->Texture->Target,
+            _mesa_select_tex_image(att->Texture, att->Texture->Target,
                                    att->TextureLevel);
          if (texImage) {
             *params = get_component_bits(pname, texImage->_BaseFormat,
diff --git a/mesalib/src/mesa/main/feedback.c b/mesalib/src/mesa/main/feedback.c
index 9ea0b92f3..6bc4294f9 100644
--- a/mesalib/src/mesa/main/feedback.c
+++ b/mesalib/src/mesa/main/feedback.c
@@ -89,7 +89,7 @@ _mesa_FeedbackBuffer( GLsizei size, GLenum type, GLfloat *buffer )
    ctx->Feedback.Type = type;
    ctx->Feedback.BufferSize = size;
    ctx->Feedback.Buffer = buffer;
-   ctx->Feedback.Count = 0;	              /* Becaues of this. */
+   ctx->Feedback.Count = 0;	              /* Because of this. */
 }
 
 
diff --git a/mesalib/src/mesa/main/ff_fragment_shader.cpp b/mesalib/src/mesa/main/ff_fragment_shader.cpp
index bc6fdbdd8..c6828925f 100644
--- a/mesalib/src/mesa/main/ff_fragment_shader.cpp
+++ b/mesalib/src/mesa/main/ff_fragment_shader.cpp
@@ -418,7 +418,7 @@ static GLuint make_state_key( struct gl_context *ctx,  struct state_key *key )
          continue;
 
       samp = _mesa_get_samplerobj(ctx, i);
-      format = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+      format = _mesa_texture_base_format(texObj);
 
       key->unit[i].enabled = 1;
       key->enabled_units |= (1<<i);
diff --git a/mesalib/src/mesa/main/ffvertex_prog.c b/mesalib/src/mesa/main/ffvertex_prog.c
index d5afc3d81..c51c20ddf 100644
--- a/mesalib/src/mesa/main/ffvertex_prog.c
+++ b/mesalib/src/mesa/main/ffvertex_prog.c
@@ -302,7 +302,7 @@ struct ureg {
 struct tnl_program {
    const struct state_key *state;
    struct gl_vertex_program *program;
-   GLint max_inst;  /** number of instructions allocated for program */
+   GLuint max_inst;  /** number of instructions allocated for program */
    GLboolean mvp_with_dp4;
 
    GLuint temp_in_use;
@@ -578,7 +578,7 @@ static void emit_op3fn(struct tnl_program *p,
    GLuint nr;
    struct prog_instruction *inst;
 
-   assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
+   assert(p->program->Base.NumInstructions <= p->max_inst);
 
    if (p->program->Base.NumInstructions == p->max_inst) {
       /* need to extend the program's instruction array */
diff --git a/mesalib/src/mesa/main/format_info.py b/mesalib/src/mesa/main/format_info.py
index 7424fe0cd..3bae57e54 100644
--- a/mesalib/src/mesa/main/format_info.py
+++ b/mesalib/src/mesa/main/format_info.py
@@ -58,7 +58,7 @@ def get_gl_base_format(fmat):
    elif fmat.has_channel('i') and fmat.num_channels() == 1:
       return 'GL_INTENSITY'
    else:
-      assert False
+      sys.exit("error, could not determine base format for {0}, check swizzle".format(fmat.name));
 
 def get_gl_data_type(fmat):
    if fmat.is_compressed():
@@ -192,6 +192,22 @@ for fmat in formats:
                                        int(fmat.block_size() / 8))
 
    print '      {{ {0} }},'.format(', '.join(map(str, fmat.swizzle)))
+   if fmat.is_array():
+      chan = fmat.array_element()
+      norm = chan.norm or chan.type == parser.FLOAT
+      print '      MESA_ARRAY_FORMAT({0}),'.format(', '.join([
+         str(chan.size / 8),
+         str(int(chan.sign)),
+         str(int(chan.type == parser.FLOAT)),
+         str(int(norm)),
+         str(len(fmat.channels)),
+         str(fmat.swizzle[0]),
+         str(fmat.swizzle[1]),
+         str(fmat.swizzle[2]),
+         str(fmat.swizzle[3]),
+      ]))
+   else:
+      print '      0,'
    print '   },'
 
 print '};'
diff --git a/mesalib/src/mesa/main/format_pack.c b/mesalib/src/mesa/main/format_pack.c
deleted file mode 100644
index 31c9f7767..000000000
--- a/mesalib/src/mesa/main/format_pack.c
+++ /dev/null
@@ -1,2994 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (c) 2011 VMware, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * Color, depth, stencil packing functions.
- * Used to pack basic color, depth and stencil formats to specific
- * hardware formats.
- *
- * There are both per-pixel and per-row packing functions:
- * - The former will be used by swrast to write values to the color, depth,
- *   stencil buffers when drawing points, lines and masked spans.
- * - The later will be used for image-oriented functions like glDrawPixels,
- *   glAccum, and glTexImage.
- */
-
-
-#include "colormac.h"
-#include "format_pack.h"
-#include "macros.h"
-#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
-#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
-#include "util/format_srgb.h"
-
-
-/** Helper struct for MESA_FORMAT_Z32_FLOAT_S8X24_UINT */
-struct z32f_x24s8
-{
-   float z;
-   uint32_t x24s8;
-};
-
-
-typedef void (*pack_ubyte_rgba_row_func)(GLuint n,
-                                         const GLubyte src[][4], void *dst);
-
-typedef void (*pack_float_rgba_row_func)(GLuint n,
-                                         const GLfloat src[][4], void *dst);
-
-
-/*
- * MESA_FORMAT_A8B8G8R8_UNORM
- */
-
-static void
-pack_ubyte_A8B8G8R8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(src[RCOMP], src[GCOMP], src[BCOMP], src[ACOMP]);
-}
-
-static void
-pack_float_A8B8G8R8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_A8B8G8R8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_A8B8G8R8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(src[i][RCOMP], src[i][GCOMP],
-                             src[i][BCOMP], src[i][ACOMP]);
-   }
-}
-
-static void
-pack_row_float_A8B8G8R8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_A8B8G8R8_UNORM(v, d + i);
-   }
-}
-
-
-
-/*
- * MESA_FORMAT_R8G8B8A8_UNORM
- */
-
-static void
-pack_ubyte_R8G8B8A8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(src[ACOMP], src[BCOMP], src[GCOMP], src[RCOMP]);
-}
-
-static void
-pack_float_R8G8B8A8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_R8G8B8A8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_R8G8B8A8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(src[i][ACOMP], src[i][BCOMP],
-                             src[i][GCOMP], src[i][RCOMP]);
-   }
-}
-
-static void
-pack_row_float_R8G8B8A8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_R8G8B8A8_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_B8G8R8A8_UNORM
- */
-
-static void
-pack_ubyte_B8G8R8A8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(src[ACOMP], src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B8G8R8A8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_B8G8R8A8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_B8G8R8A8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(src[i][ACOMP], src[i][RCOMP],
-                             src[i][GCOMP], src[i][BCOMP]);
-   }
-}
-
-static void
-pack_row_float_B8G8R8A8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_B8G8R8A8_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_A8R8G8B8_UNORM
- */
-
-static void
-pack_ubyte_A8R8G8B8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(src[BCOMP], src[GCOMP], src[RCOMP], src[ACOMP]);
-}
-
-static void
-pack_float_A8R8G8B8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_A8R8G8B8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_A8R8G8B8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(src[i][BCOMP], src[i][GCOMP],
-                             src[i][RCOMP], src[i][ACOMP]);
-   }
-}
-
-static void
-pack_row_float_A8R8G8B8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_A8R8G8B8_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_B8G8R8X8_UNORM
- */
-
-static void
-pack_ubyte_B8G8R8X8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(0x0, src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B8G8R8X8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_B8G8R8X8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_B8G8R8X8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(0, src[i][RCOMP], src[i][GCOMP], src[i][BCOMP]);
-   }
-}
-
-static void
-pack_row_float_B8G8R8X8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_B8G8R8X8_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_X8R8G8B8_UNORM
- */
-
-static void
-pack_ubyte_X8R8G8B8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = PACK_COLOR_8888(src[BCOMP], src[GCOMP], src[RCOMP], 0);
-}
-
-static void
-pack_float_X8R8G8B8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_X8R8G8B8_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_X8R8G8B8_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i] = PACK_COLOR_8888(src[i][BCOMP], src[i][GCOMP], src[i][RCOMP], 0);
-   }
-}
-
-static void
-pack_row_float_X8R8G8B8_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_X8R8G8B8_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_BGR_UNORM8
- */
-
-static void
-pack_ubyte_BGR_UNORM8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   d[2] = src[RCOMP];
-   d[1] = src[GCOMP];
-   d[0] = src[BCOMP];
-}
-
-static void
-pack_float_BGR_UNORM8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[2], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[0], src[BCOMP]);
-}
-
-static void
-pack_row_ubyte_BGR_UNORM8(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i*3+2] = src[i][RCOMP];
-      d[i*3+1] = src[i][GCOMP];
-      d[i*3+0] = src[i][BCOMP];
-   }
-}
-
-static void
-pack_row_float_BGR_UNORM8(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      d[i*3+2] = v[RCOMP];
-      d[i*3+1] = v[GCOMP];
-      d[i*3+0] = v[BCOMP];
-   }
-}
-
-
-/*
- * MESA_FORMAT_RGB_UNORM8
- */
-
-static void
-pack_ubyte_RGB_UNORM8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   d[2] = src[BCOMP];
-   d[1] = src[GCOMP];
-   d[0] = src[RCOMP];
-}
-
-static void
-pack_float_RGB_UNORM8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[2], src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[0], src[RCOMP]);
-}
-
-static void
-pack_row_ubyte_RGB_UNORM8(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      d[i*3+2] = src[i][BCOMP];
-      d[i*3+1] = src[i][GCOMP];
-      d[i*3+0] = src[i][RCOMP];
-   }
-}
-
-static void
-pack_row_float_RGB_UNORM8(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      d[i*3+2] = v[BCOMP];
-      d[i*3+1] = v[GCOMP];
-      d[i*3+0] = v[RCOMP];
-   }
-}
-
-
-/*
- * MESA_FORMAT_B5G6R5_UNORM
- */
-
-static void
-pack_ubyte_B5G6R5_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_565(src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B5G6R5_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[3];
-   UNCLAMPED_FLOAT_TO_UBYTE(v[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[2], src[BCOMP]);
-   pack_ubyte_B5G6R5_UNORM(v, dst);
-}
-
-static void
-pack_row_ubyte_B5G6R5_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      pack_ubyte_B5G6R5_UNORM(src[i], d + i);
-   }
-}
-
-static void
-pack_row_float_B5G6R5_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_B5G6R5_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_R5G6B5_UNORM
- * Warning: these functions do not match the current Mesa definition
- * of MESA_FORMAT_R5G6B5_UNORM.
- */
-
-static void
-pack_ubyte_R5G6B5_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_565_REV(src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_R5G6B5_UNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte r, g, b;
-   UNCLAMPED_FLOAT_TO_UBYTE(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(g, src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(b, src[BCOMP]);
-   *d = PACK_COLOR_565_REV(r, g, b);
-}
-
-static void
-pack_row_ubyte_R5G6B5_UNORM(GLuint n, const GLubyte src[][4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      pack_ubyte_R5G6B5_UNORM(src[i], d + i);
-   }
-}
-
-static void
-pack_row_float_R5G6B5_UNORM(GLuint n, const GLfloat src[][4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLubyte v[4];
-      _mesa_unclamped_float_rgba_to_ubyte(v, src[i]);
-      pack_ubyte_R5G6B5_UNORM(v, d + i);
-   }
-}
-
-
-/*
- * MESA_FORMAT_B4G4R4A4_UNORM
- */
-
-static void
-pack_ubyte_B4G4R4A4_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_4444(src[ACOMP], src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B4G4R4A4_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_B4G4R4A4_UNORM(v, dst);
-}
-
-/* use fallback row packing functions */
-
-
-/*
- * MESA_FORMAT_A4R4G4B4_UNORM
- */
-
-static void
-pack_ubyte_A4R4G4B4_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_4444(src[BCOMP], src[GCOMP], src[RCOMP], src[ACOMP]);
-}
-
-static void
-pack_float_A4R4G4B4_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_A4R4G4B4_UNORM(v, dst);
-}
-
-/* use fallback row packing functions */
-
-
-/*
- * MESA_FORMAT_A1B5G5R5_UNORM
- */
-
-static void
-pack_ubyte_A1B5G5R5_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_5551(src[RCOMP], src[GCOMP], src[BCOMP], src[ACOMP]);
-}
-
-static void
-pack_float_A1B5G5R5_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_A1B5G5R5_UNORM(v, dst);
-}
-
-/* use fallback row packing functions */
-
-
-/*
- * MESA_FORMAT_B5G5R5A1_UNORM
- */
-
-static void
-pack_ubyte_B5G5R5A1_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_1555(src[ACOMP], src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B5G5R5A1_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_B5G5R5A1_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_A1R5G5B5_UNORM
- * Warning: these functions do not match the current Mesa definition
- * of MESA_FORMAT_A1R5G5B5_UNORM.
- */
-
-static void
-pack_ubyte_A1R5G5B5_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst), tmp;
-   tmp = PACK_COLOR_1555(src[ACOMP], src[RCOMP], src[GCOMP], src[BCOMP]);
-   *d = (tmp >> 8) | (tmp << 8);
-}
-
-static void
-pack_float_A1R5G5B5_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_A1R5G5B5_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_L4A4_UNORM */
-
-static void
-pack_ubyte_L4A4_UNORM(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = PACK_COLOR_44(src[ACOMP], src[RCOMP]);
-}
-
-static void
-pack_float_L4A4_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   UNCLAMPED_FLOAT_TO_UBYTE(v[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[3], src[ACOMP]);
-   pack_ubyte_L4A4_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_L8A8_UNORM */
-
-static void
-pack_ubyte_L8A8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_88(src[ACOMP], src[RCOMP]);
-}
-
-static void
-pack_float_L8A8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   UNCLAMPED_FLOAT_TO_UBYTE(v[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[3], src[ACOMP]);
-   pack_ubyte_L8A8_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_A8L8_UNORM */
-
-static void
-pack_ubyte_A8L8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_88(src[RCOMP], src[ACOMP]);
-}
-
-static void
-pack_float_A8L8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   UNCLAMPED_FLOAT_TO_UBYTE(v[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[3], src[ACOMP]);
-   pack_ubyte_A8L8_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_L16A16_UNORM */
-
-static void
-pack_ubyte_L16A16_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort l = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort a = UBYTE_TO_USHORT(src[ACOMP]);
-   *d = PACK_COLOR_1616(a, l);
-}
-
-static void
-pack_float_L16A16_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort l, a;
-   UNCLAMPED_FLOAT_TO_USHORT(l, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(a, src[ACOMP]);
-   *d = PACK_COLOR_1616(a, l);
-}
-
-
-/* MESA_FORMAT_A16L16_UNORM */
-
-static void
-pack_ubyte_A16L16_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort l = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort a = UBYTE_TO_USHORT(src[ACOMP]);
-   *d = PACK_COLOR_1616(l, a);
-}
-
-static void
-pack_float_A16L16_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort l, a;
-   UNCLAMPED_FLOAT_TO_USHORT(l, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(a, src[ACOMP]);
-   *d = PACK_COLOR_1616(l, a);
-}
-
-
-/* MESA_FORMAT_B2G3R3_UNORM */
-
-static void
-pack_ubyte_B2G3R3_UNORM(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = PACK_COLOR_332(src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_B2G3R3_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   UNCLAMPED_FLOAT_TO_UBYTE(v[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(v[2], src[BCOMP]);
-   pack_ubyte_B2G3R3_UNORM(v, dst);
-}
-
-
-/* MESA_FORMAT_A_UNORM8 */
-
-static void
-pack_ubyte_A_UNORM8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = src[ACOMP];
-}
-
-static void
-pack_float_A_UNORM8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[0], src[ACOMP]);
-}
-
-
-/* MESA_FORMAT_A_UNORM16 */
-
-static void
-pack_ubyte_A_UNORM16(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = UBYTE_TO_USHORT(src[ACOMP]);
-}
-
-static void
-pack_float_A_UNORM16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_USHORT(d[0], src[ACOMP]);
-}
-
-
-/* MESA_FORMAT_L_UNORM8 */
-
-static void
-pack_ubyte_L_UNORM8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = src[RCOMP];
-}
-
-static void
-pack_float_L_UNORM8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   UNCLAMPED_FLOAT_TO_UBYTE(d[0], src[RCOMP]);
-}
-
-
-/* MESA_FORMAT_L_UNORM16 */
-
-static void
-pack_ubyte_L_UNORM16(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = UBYTE_TO_USHORT(src[RCOMP]);
-}
-
-static void
-pack_float_L_UNORM16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_USHORT(d[0], src[RCOMP]);
-}
-
-
-/* MESA_FORMAT_YCBCR */
-
-static void
-pack_ubyte_YCBCR(const GLubyte src[4], void *dst)
-{
-   /* todo */
-}
-
-static void
-pack_float_YCBCR(const GLfloat src[4], void *dst)
-{
-   /* todo */
-}
-
-
-/* MESA_FORMAT_YCBCR_REV */
-
-static void
-pack_ubyte_YCBCR_REV(const GLubyte src[4], void *dst)
-{
-   /* todo */
-}
-
-static void
-pack_float_YCBCR_REV(const GLfloat src[4], void *dst)
-{
-   /* todo */
-}
-
-
-/* MESA_FORMAT_R_UNORM8 */
-
-static void
-pack_ubyte_R_UNORM8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = src[RCOMP];
-}
-
-static void
-pack_float_R_UNORM8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLubyte r;
-   UNCLAMPED_FLOAT_TO_UBYTE(r, src[RCOMP]);
-   d[0] = r;
-}
-
-
-/* MESA_FORMAT_R8G8_UNORM */
-
-static void
-pack_ubyte_R8G8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_88(src[GCOMP], src[RCOMP]);
-}
-
-static void
-pack_float_R8G8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte r, g;
-   UNCLAMPED_FLOAT_TO_UBYTE(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(g, src[GCOMP]);
-   *d = PACK_COLOR_88(g, r);
-}
-
-
-/* MESA_FORMAT_G8R8_UNORM */
-
-static void
-pack_ubyte_G8R8_UNORM(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = PACK_COLOR_88(src[RCOMP], src[GCOMP]);
-}
-
-static void
-pack_float_G8R8_UNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte r, g;
-   UNCLAMPED_FLOAT_TO_UBYTE(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(g, src[GCOMP]);
-   *d = PACK_COLOR_88(r, g);
-}
-
-
-/* MESA_FORMAT_R_UNORM16 */
-
-static void
-pack_ubyte_R_UNORM16(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = UBYTE_TO_USHORT(src[RCOMP]);
-}
-
-static void
-pack_float_R_UNORM16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_USHORT(d[0], src[RCOMP]);
-}
-
-
-/* MESA_FORMAT_R16G16_UNORM */
-
-static void
-pack_ubyte_R16G16_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   *d = PACK_COLOR_1616(g, r);
-}
-
-static void
-pack_float_R16G16_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   *d = PACK_COLOR_1616(g, r);
-}
-
-
-/* MESA_FORMAT_G16R16_UNORM */
-
-static void
-pack_ubyte_G16R16_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   *d = PACK_COLOR_1616(r, g);
-}
-
-
-static void
-pack_float_G16R16_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   *d = PACK_COLOR_1616(r, g);
-}
-
-
-/* MESA_FORMAT_B10G10R10A2_UNORM */
-
-static void
-pack_ubyte_B10G10R10A2_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   GLushort b = UBYTE_TO_USHORT(src[BCOMP]);
-   GLushort a = UBYTE_TO_USHORT(src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, r, g, b);
-}
-
-static void
-pack_float_B10G10R10A2_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g, b, a;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(b, src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(a, src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, r, g, b);
-}
-
-
-/* MESA_FORMAT_R10G10B10A2_UINT */
-
-static void
-pack_ubyte_R10G10B10A2_UINT(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   GLushort b = UBYTE_TO_USHORT(src[BCOMP]);
-   GLushort a = UBYTE_TO_USHORT(src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, b, g, r);
-}
-
-static void
-pack_float_R10G10B10A2_UINT(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g, b, a;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(b, src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(a, src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, b, g, r);
-}
-
-
-/* MESA_FORMAT_BGR_SRGB8 */
-
-static void
-pack_ubyte_BGR_SRGB8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   d[2] = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   d[1] = util_format_linear_to_srgb_8unorm(src[GCOMP]);
-   d[0] = util_format_linear_to_srgb_8unorm(src[BCOMP]);
-}
-
-static void
-pack_float_BGR_SRGB8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   d[2] = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   d[1] = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   d[0] = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-}
-
-
-/* MESA_FORMAT_A8B8G8R8_SRGB */
-
-static void
-pack_ubyte_A8B8G8R8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(r, g, b, src[ACOMP]);
-}
-
-static void
-pack_float_A8B8G8R8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r, g, b, a;
-   r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_8888(r, g, b, a);
-}
-
-
-/* MESA_FORMAT_B8G8R8A8_SRGB */
-
-static void
-pack_ubyte_B8G8R8A8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(src[ACOMP], r, g, b);
-}
-
-static void
-pack_float_B8G8R8A8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r, g, b, a;
-   r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_8888(a, r, g, b);
-}
-
-
-/* MESA_FORMAT_A8R8G8B8_SRGB */
-
-static void
-pack_ubyte_A8R8G8B8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(b, g, r, src[ACOMP]);
-}
-
-static void
-pack_float_A8R8G8B8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r, g, b, a;
-   r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_8888(b, g, r, a);
-}
-
-
-/* MESA_FORMAT_R8G8B8A8_SRGB */
-
-static void
-pack_ubyte_R8G8B8A8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(src[ACOMP], b, g, r);
-}
-
-static void
-pack_float_R8G8B8A8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLubyte r, g, b, a;
-   r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_8888(a, b, g, r);
-}
-
-
-/* MESA_FORMAT_L_SRGB8 */
-
-static void
-pack_ubyte_L_SRGB8(const GLubyte src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   *d = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-}
-
-static void
-pack_float_L_SRGB8(const GLfloat src[4], void *dst)
-{
-   GLubyte *d = ((GLubyte *) dst);
-   GLubyte l = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   *d = l;
-}
-
-
-/* MESA_FORMAT_L8A8_SRGB */
-
-static void
-pack_ubyte_L8A8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte l = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   *d = PACK_COLOR_88(src[ACOMP], l);
-}
-
-/* MESA_FORMAT_A8L8_SRGB */
-
-static void
-pack_ubyte_A8L8_SRGB(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte l = util_format_linear_to_srgb_8unorm(src[RCOMP]);
-   *d = PACK_COLOR_88(l, src[ACOMP]);
-}
-
-static void
-pack_float_L8A8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte a, l = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   CLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_88(a, l);
-}
-
-static void
-pack_float_A8L8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   GLubyte a, l = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   CLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
-   *d = PACK_COLOR_88(l, a);
-}
-
-
-/* MESA_FORMAT_RGBA_FLOAT32 */
-
-static void
-pack_ubyte_RGBA_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[0]);
-   d[1] = UBYTE_TO_FLOAT(src[1]);
-   d[2] = UBYTE_TO_FLOAT(src[2]);
-   d[3] = UBYTE_TO_FLOAT(src[3]);
-}
-
-static void
-pack_float_RGBA_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[0];
-   d[1] = src[1];
-   d[2] = src[2];
-   d[3] = src[3];
-}
-
-
-/* MESA_FORMAT_RGBA_FLOAT16 */
-
-static void
-pack_ubyte_RGBA_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[0]));
-   d[1] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[1]));
-   d[2] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[2]));
-   d[3] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[3]));
-}
-
-static void
-pack_float_RGBA_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[0]);
-   d[1] = _mesa_float_to_half(src[1]);
-   d[2] = _mesa_float_to_half(src[2]);
-   d[3] = _mesa_float_to_half(src[3]);
-}
-
-
-/* MESA_FORMAT_RGB_FLOAT32 */
-
-static void
-pack_ubyte_RGB_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[0]);
-   d[1] = UBYTE_TO_FLOAT(src[1]);
-   d[2] = UBYTE_TO_FLOAT(src[2]);
-}
-
-static void
-pack_float_RGB_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[0];
-   d[1] = src[1];
-   d[2] = src[2];
-}
-
-
-/* MESA_FORMAT_RGB_FLOAT16 */
-
-static void
-pack_ubyte_RGB_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[0]));
-   d[1] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[1]));
-   d[2] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[2]));
-}
-
-static void
-pack_float_RGB_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[0]);
-   d[1] = _mesa_float_to_half(src[1]);
-   d[2] = _mesa_float_to_half(src[2]);
-}
-
-
-/* MESA_FORMAT_A_FLOAT32 */
-
-static void
-pack_ubyte_A_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[ACOMP]);
-}
-
-static void
-pack_float_A_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[ACOMP];
-}
-
-
-/* MESA_FORMAT_A_FLOAT16 */
-
-static void
-pack_ubyte_A_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[ACOMP]));
-}
-
-static void
-pack_float_A_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[ACOMP]);
-}
-
-
-/* MESA_FORMAT_L_FLOAT32 (and I_FLOAT32, R_FLOAT32) */
-
-static void
-pack_ubyte_L_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[RCOMP]);
-}
-
-static void
-pack_float_L_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[RCOMP];
-}
-
-
-/* MESA_FORMAT_L_FLOAT16 (and I_FLOAT16, R_FLOAT32) */
-
-static void
-pack_ubyte_L_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[RCOMP]));
-}
-
-static void
-pack_float_L_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[RCOMP]);
-}
-
-
-/* MESA_FORMAT_LA_FLOAT32 */
-
-static void
-pack_ubyte_LA_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[RCOMP]);
-   d[1] = UBYTE_TO_FLOAT(src[ACOMP]);
-}
-
-static void
-pack_float_LA_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[RCOMP];
-   d[1] = src[ACOMP];
-}
-
-
-/* MESA_FORMAT_LA_FLOAT16 */
-
-static void
-pack_ubyte_LA_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[RCOMP]));
-   d[1] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[ACOMP]));
-}
-
-static void
-pack_float_LA_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[RCOMP]);
-   d[1] = _mesa_float_to_half(src[ACOMP]);
-}
-
-
-/* MESA_FORMAT_RG_FLOAT32 */
-
-static void
-pack_ubyte_RG_FLOAT32(const GLubyte src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = UBYTE_TO_FLOAT(src[RCOMP]);
-   d[1] = UBYTE_TO_FLOAT(src[GCOMP]);
-}
-
-static void
-pack_float_RG_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[RCOMP];
-   d[1] = src[GCOMP];
-}
-
-
-/* MESA_FORMAT_RG_FLOAT16 */
-
-static void
-pack_ubyte_RG_FLOAT16(const GLubyte src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[RCOMP]));
-   d[1] = _mesa_float_to_half(UBYTE_TO_FLOAT(src[GCOMP]));
-}
-
-static void
-pack_float_RG_FLOAT16(const GLfloat src[4], void *dst)
-{
-   GLhalfARB *d = ((GLhalfARB *) dst);
-   d[0] = _mesa_float_to_half(src[RCOMP]);
-   d[1] = _mesa_float_to_half(src[GCOMP]);
-}
-
-
-/* MESA_FORMAT_RGBA_UNORM16 */
-
-static void
-pack_ubyte_RGBA_16(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   d[0] = UBYTE_TO_USHORT(src[RCOMP]);
-   d[1] = UBYTE_TO_USHORT(src[GCOMP]);
-   d[2] = UBYTE_TO_USHORT(src[BCOMP]);
-   d[3] = UBYTE_TO_USHORT(src[ACOMP]);
-}
-
-static void
-pack_float_RGBA_16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_USHORT(d[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(d[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(d[2], src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(d[3], src[ACOMP]);
-}
-
-
-
-/*
- * MESA_FORMAT_R_SNORM8
- */
-
-static void
-pack_float_R_SNORM8(const GLfloat src[4], void *dst)
-{
-   GLbyte *d = (GLbyte *) dst;
-   *d = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_R8G8_SNORM
- */
-
-static void
-pack_float_R8G8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = (GLushort *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   *d = (g << 8) | r;
-}
-
-
-/*
- * MESA_FORMAT_X8B8G8R8_SNORM
- */
-
-static void
-pack_float_X8B8G8R8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   GLbyte b = FLOAT_TO_BYTE(CLAMP(src[BCOMP], -1.0f, 1.0f));
-   GLbyte a = 127;
-   *d = PACK_COLOR_8888(r, g, b, a);
-}
-
-
-/*
- * MESA_FORMAT_A8B8G8R8_SNORM
- */
-
-static void
-pack_float_A8B8G8R8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   GLbyte b = FLOAT_TO_BYTE(CLAMP(src[BCOMP], -1.0f, 1.0f));
-   GLbyte a = FLOAT_TO_BYTE(CLAMP(src[ACOMP], -1.0f, 1.0f));
-   *d = PACK_COLOR_8888(r, g, b, a);
-}
-
-
-/*
- * MESA_FORMAT_R8G8B8A8_SNORM
- */
-
-static void
-pack_float_R8G8B8A8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   GLbyte b = FLOAT_TO_BYTE(CLAMP(src[BCOMP], -1.0f, 1.0f));
-   GLbyte a = FLOAT_TO_BYTE(CLAMP(src[ACOMP], -1.0f, 1.0f));
-   *d = PACK_COLOR_8888(a, b, g, r);
-}
-
-
-/*
- * MESA_FORMAT_R_SNORM16
- */
-
-static void
-pack_float_R_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLshort *d = (GLshort *) dst;
-   *d = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_R16G16_SNORM
- */
-
-static void
-pack_float_R16G16_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLshort r = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLshort g = FLOAT_TO_SHORT(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   *d = (g << 16) | (r & 0xffff);
-}
-
-
-/*
- * MESA_FORMAT_RGB_SNORM16
- */
-
-static void
-pack_float_RGB_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLshort *d = (GLshort *) dst;
-   d[0] = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   d[1] = FLOAT_TO_SHORT(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   d[2] = FLOAT_TO_SHORT(CLAMP(src[BCOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_RGBA_SNORM16
- */
-
-static void
-pack_float_RGBA_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLshort *d = (GLshort *) dst;
-   d[0] = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   d[1] = FLOAT_TO_SHORT(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   d[2] = FLOAT_TO_SHORT(CLAMP(src[BCOMP], -1.0f, 1.0f));
-   d[3] = FLOAT_TO_SHORT(CLAMP(src[ACOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_A_SNORM8
- */
-
-static void
-pack_float_A_SNORM8(const GLfloat src[4], void *dst)
-{
-   GLbyte *d = (GLbyte *) dst;
-   *d = FLOAT_TO_BYTE(CLAMP(src[ACOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_L_SNORM8
- */
-
-static void
-pack_float_L_SNORM8(const GLfloat src[4], void *dst)
-{
-   GLbyte *d = (GLbyte *) dst;
-   *d = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_L8A8_SNORM
- */
-
-static void
-pack_float_L8A8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = (GLushort *) dst;
-   GLbyte l = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte a = FLOAT_TO_BYTE(CLAMP(src[ACOMP], -1.0f, 1.0f));
-   *d = (a << 8) | l;
-}
-
-
-/*
- * MESA_FORMAT_A8L8_SNORM
- */
-
-static void
-pack_float_A8L8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = (GLushort *) dst;
-   GLbyte l = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte a = FLOAT_TO_BYTE(CLAMP(src[ACOMP], -1.0f, 1.0f));
-   *d = (l << 8) | a;
-}
-
-
-/*
- * MESA_FORMAT_A_SNORM16
- */
-
-static void
-pack_float_A_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLshort *d = (GLshort *) dst;
-   *d = FLOAT_TO_SHORT(CLAMP(src[ACOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_L_SNORM16
- */
-
-static void
-pack_float_L_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLshort *d = (GLshort *) dst;
-   *d = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-}
-
-
-/*
- * MESA_FORMAT_LA_SNORM16
- */
-
-static void
-pack_float_LA_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLshort l = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLshort a = FLOAT_TO_SHORT(CLAMP(src[ACOMP], -1.0f, 1.0f));
-   *d = PACK_COLOR_1616(a, l);
-}
-
-
-/*
- * MESA_FORMAT_R9G9B9E5_FLOAT;
- */
-
-static void
-pack_float_R9G9B9E5_FLOAT(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   *d = float3_to_rgb9e5(src);
-}
-
-static void
-pack_ubyte_R9G9B9E5_FLOAT(const GLubyte src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLfloat rgb[3];
-   rgb[0] = UBYTE_TO_FLOAT(src[RCOMP]);
-   rgb[1] = UBYTE_TO_FLOAT(src[GCOMP]);
-   rgb[2] = UBYTE_TO_FLOAT(src[BCOMP]);
-   *d = float3_to_rgb9e5(rgb);
-}
-
-
-
-/*
- * MESA_FORMAT_R11G11B10_FLOAT;
- */
-
-static void
-pack_ubyte_R11G11B10_FLOAT(const GLubyte src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLfloat rgb[3];
-   rgb[0] = UBYTE_TO_FLOAT(src[RCOMP]);
-   rgb[1] = UBYTE_TO_FLOAT(src[GCOMP]);
-   rgb[2] = UBYTE_TO_FLOAT(src[BCOMP]);
-   *d = float3_to_r11g11b10f(rgb);
-}
-
-static void
-pack_float_R11G11B10_FLOAT(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   *d = float3_to_r11g11b10f(src);
-}
-
-
-/*
- * MESA_FORMAT_B4G4R4X4_UNORM
- */
-
-static void
-pack_ubyte_XRGB4444_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_4444(255, src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_XRGB4444_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_XRGB4444_UNORM(v, dst);
-}
-
-
-/*
- * MESA_FORMAT_B5G5R5X1_UNORM
- */
-
-static void
-pack_ubyte_XRGB1555_UNORM(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = PACK_COLOR_1555(255, src[RCOMP], src[GCOMP], src[BCOMP]);
-}
-
-static void
-pack_float_XRGB1555_UNORM(const GLfloat src[4], void *dst)
-{
-   GLubyte v[4];
-   _mesa_unclamped_float_rgba_to_ubyte(v, src);
-   pack_ubyte_XRGB1555_UNORM(v, dst);
-}
-
-
-/*
- * MESA_FORMAT_R8G8B8X8_SNORM
- */
-
-static void
-pack_float_XBGR8888_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   GLbyte b = FLOAT_TO_BYTE(CLAMP(src[BCOMP], -1.0f, 1.0f));
-   *d = PACK_COLOR_8888(127, b, g, r);
-}
-
-
-/*
- * MESA_FORMAT_R8G8B8X8_SRGB
- */
-
-static void
-pack_float_R8G8B8X8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLubyte r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(255, b, g, r);
-}
-
-
-/*
- * MESA_FORMAT_X8B8G8R8_SRGB
- */
-
-static void
-pack_float_X8B8G8R8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLubyte r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(r, g, b, 255);
-}
-
-
-/* MESA_FORMAT_B10G10R10X2_UNORM */
-
-static void
-pack_ubyte_B10G10R10X2_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   GLushort b = UBYTE_TO_USHORT(src[BCOMP]);
-   *d = PACK_COLOR_2101010_US(3, r, g, b);
-}
-
-static void
-pack_float_B10G10R10X2_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g, b;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(b, src[BCOMP]);
-   *d = PACK_COLOR_2101010_US(3, r, g, b);
-}
-
-
-/* MESA_FORMAT_RGBX_UNORM16 */
-
-static void
-pack_ubyte_RGBX_UNORM16(const GLubyte src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   d[0] = UBYTE_TO_USHORT(src[RCOMP]);
-   d[1] = UBYTE_TO_USHORT(src[GCOMP]);
-   d[2] = UBYTE_TO_USHORT(src[BCOMP]);
-   d[3] = 65535;
-}
-
-static void
-pack_float_RGBX_UNORM16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_USHORT(d[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(d[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(d[2], src[BCOMP]);
-   d[3] = 65535;
-}
-
-
-/* MESA_FORMAT_RGBX_SNORM16 */
-
-static void
-pack_float_RGBX_SNORM16(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   UNCLAMPED_FLOAT_TO_SHORT(d[0], src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_SHORT(d[1], src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_SHORT(d[2], src[BCOMP]);
-   d[3] = 32767;
-}
-
-
-/* MESA_FORMAT_RGBX_FLOAT16 */
-
-static void
-pack_float_XBGR16161616_FLOAT(const GLfloat src[4], void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   d[0] = _mesa_float_to_half(src[RCOMP]);
-   d[1] = _mesa_float_to_half(src[GCOMP]);
-   d[2] = _mesa_float_to_half(src[BCOMP]);
-   d[3] = _mesa_float_to_half(1.0);
-}
-
-/* MESA_FORMAT_RGBX_FLOAT32 */
-
-static void
-pack_float_RGBX_FLOAT32(const GLfloat src[4], void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[0] = src[RCOMP];
-   d[1] = src[GCOMP];
-   d[2] = src[BCOMP];
-   d[3] = 1.0;
-}
-
-/* MESA_FORMAT_R10G10B10A2_UNORM */
-
-static void
-pack_ubyte_R10G10B10A2_UNORM(const GLubyte src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r = UBYTE_TO_USHORT(src[RCOMP]);
-   GLushort g = UBYTE_TO_USHORT(src[GCOMP]);
-   GLushort b = UBYTE_TO_USHORT(src[BCOMP]);
-   GLushort a = UBYTE_TO_USHORT(src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, b, g, r);
-}
-
-static void
-pack_float_R10G10B10A2_UNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   GLushort r, g, b, a;
-   UNCLAMPED_FLOAT_TO_USHORT(r, src[RCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(g, src[GCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(b, src[BCOMP]);
-   UNCLAMPED_FLOAT_TO_USHORT(a, src[ACOMP]);
-   *d = PACK_COLOR_2101010_US(a, b, g, r);
-}
-
-/*
- * MESA_FORMAT_G8R8_SNORM
- */
-
-static void
-pack_float_G8R8_SNORM(const GLfloat src[4], void *dst)
-{
-   GLushort *d = (GLushort *) dst;
-   GLbyte r = FLOAT_TO_BYTE(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLbyte g = FLOAT_TO_BYTE(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   *d = (r << 8) | (g & 0xff);
-}
-
-/*
- * MESA_FORMAT_G16R16_SNORM
- */
-
-static void
-pack_float_G16R16_SNORM(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLshort r = FLOAT_TO_SHORT(CLAMP(src[RCOMP], -1.0f, 1.0f));
-   GLshort g = FLOAT_TO_SHORT(CLAMP(src[GCOMP], -1.0f, 1.0f));
-   *d = (r << 16) | (g & 0xffff);
-}
-
-/*
- * MESA_FORMAT_B8G8R8X8_SRGB
- */
-
-static void
-pack_float_B8G8R8X8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLubyte r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(127, r, g, b);
-}
-
-/*
- * MESA_FORMAT_X8R8G8B8_SRGB
- */
-
-static void
-pack_float_X8R8G8B8_SRGB(const GLfloat src[4], void *dst)
-{
-   GLuint *d = (GLuint *) dst;
-   GLubyte r = util_format_linear_float_to_srgb_8unorm(src[RCOMP]);
-   GLubyte g = util_format_linear_float_to_srgb_8unorm(src[GCOMP]);
-   GLubyte b = util_format_linear_float_to_srgb_8unorm(src[BCOMP]);
-   *d = PACK_COLOR_8888(b, g, r, 255);
-}
-
-/**
- * Return a function that can pack a GLubyte rgba[4] color.
- */
-gl_pack_ubyte_rgba_func
-_mesa_get_pack_ubyte_rgba_function(mesa_format format)
-{
-   static gl_pack_ubyte_rgba_func table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      memset(table, 0, sizeof(table));
-
-      table[MESA_FORMAT_NONE] = NULL;
-
-      table[MESA_FORMAT_A8B8G8R8_UNORM] = pack_ubyte_A8B8G8R8_UNORM;
-      table[MESA_FORMAT_R8G8B8A8_UNORM] = pack_ubyte_R8G8B8A8_UNORM;
-      table[MESA_FORMAT_B8G8R8A8_UNORM] = pack_ubyte_B8G8R8A8_UNORM;
-      table[MESA_FORMAT_A8R8G8B8_UNORM] = pack_ubyte_A8R8G8B8_UNORM;
-      table[MESA_FORMAT_X8B8G8R8_UNORM] = pack_ubyte_A8B8G8R8_UNORM; /* reused */
-      table[MESA_FORMAT_R8G8B8X8_UNORM] = pack_ubyte_R8G8B8A8_UNORM; /* reused */
-      table[MESA_FORMAT_B8G8R8X8_UNORM] = pack_ubyte_B8G8R8X8_UNORM;
-      table[MESA_FORMAT_X8R8G8B8_UNORM] = pack_ubyte_X8R8G8B8_UNORM;
-      table[MESA_FORMAT_BGR_UNORM8] = pack_ubyte_BGR_UNORM8;
-      table[MESA_FORMAT_RGB_UNORM8] = pack_ubyte_RGB_UNORM8;
-      table[MESA_FORMAT_B5G6R5_UNORM] = pack_ubyte_B5G6R5_UNORM;
-      table[MESA_FORMAT_R5G6B5_UNORM] = pack_ubyte_R5G6B5_UNORM;
-      table[MESA_FORMAT_B4G4R4A4_UNORM] = pack_ubyte_B4G4R4A4_UNORM;
-      table[MESA_FORMAT_A4R4G4B4_UNORM] = pack_ubyte_A4R4G4B4_UNORM;
-      table[MESA_FORMAT_A1B5G5R5_UNORM] = pack_ubyte_A1B5G5R5_UNORM;
-      table[MESA_FORMAT_B5G5R5A1_UNORM] = pack_ubyte_B5G5R5A1_UNORM;
-      table[MESA_FORMAT_A1R5G5B5_UNORM] = pack_ubyte_A1R5G5B5_UNORM;
-      table[MESA_FORMAT_L4A4_UNORM] = pack_ubyte_L4A4_UNORM;
-      table[MESA_FORMAT_L8A8_UNORM] = pack_ubyte_L8A8_UNORM;
-      table[MESA_FORMAT_A8L8_UNORM] = pack_ubyte_A8L8_UNORM;
-      table[MESA_FORMAT_L16A16_UNORM] = pack_ubyte_L16A16_UNORM;
-      table[MESA_FORMAT_A16L16_UNORM] = pack_ubyte_A16L16_UNORM;
-      table[MESA_FORMAT_B2G3R3_UNORM] = pack_ubyte_B2G3R3_UNORM;
-      table[MESA_FORMAT_A_UNORM8] = pack_ubyte_A_UNORM8;
-      table[MESA_FORMAT_A_UNORM16] = pack_ubyte_A_UNORM16;
-      table[MESA_FORMAT_L_UNORM8] = pack_ubyte_L_UNORM8;
-      table[MESA_FORMAT_L_UNORM16] = pack_ubyte_L_UNORM16;
-      table[MESA_FORMAT_I_UNORM8] = pack_ubyte_L_UNORM8; /* reuse pack_ubyte_L_UNORM8 */
-      table[MESA_FORMAT_I_UNORM16] = pack_ubyte_L_UNORM16; /* reuse pack_ubyte_L_UNORM16 */
-      table[MESA_FORMAT_YCBCR] = pack_ubyte_YCBCR;
-      table[MESA_FORMAT_YCBCR_REV] = pack_ubyte_YCBCR_REV;
-      table[MESA_FORMAT_R_UNORM8] = pack_ubyte_R_UNORM8;
-      table[MESA_FORMAT_R8G8_UNORM] = pack_ubyte_R8G8_UNORM;
-      table[MESA_FORMAT_G8R8_UNORM] = pack_ubyte_G8R8_UNORM;
-      table[MESA_FORMAT_R_UNORM16] = pack_ubyte_R_UNORM16;
-      table[MESA_FORMAT_R16G16_UNORM] = pack_ubyte_R16G16_UNORM;
-      table[MESA_FORMAT_G16R16_UNORM] = pack_ubyte_G16R16_UNORM;
-      table[MESA_FORMAT_B10G10R10A2_UNORM] = pack_ubyte_B10G10R10A2_UNORM;
-      table[MESA_FORMAT_R10G10B10A2_UINT] = pack_ubyte_R10G10B10A2_UINT;
-
-      /* should never convert RGBA to these formats */
-      table[MESA_FORMAT_S8_UINT_Z24_UNORM] = NULL;
-      table[MESA_FORMAT_Z24_UNORM_S8_UINT] = NULL;
-      table[MESA_FORMAT_Z_UNORM16] = NULL;
-      table[MESA_FORMAT_Z24_UNORM_X8_UINT] = NULL;
-      table[MESA_FORMAT_X8_UINT_Z24_UNORM] = NULL;
-      table[MESA_FORMAT_Z_UNORM32] = NULL;
-      table[MESA_FORMAT_S_UINT8] = NULL;
-
-      /* sRGB */
-      table[MESA_FORMAT_BGR_SRGB8] = pack_ubyte_BGR_SRGB8;
-      table[MESA_FORMAT_A8B8G8R8_SRGB] = pack_ubyte_A8B8G8R8_SRGB;
-      table[MESA_FORMAT_B8G8R8A8_SRGB] = pack_ubyte_B8G8R8A8_SRGB;
-      table[MESA_FORMAT_A8R8G8B8_SRGB] = pack_ubyte_A8R8G8B8_SRGB;
-      table[MESA_FORMAT_R8G8B8A8_SRGB] = pack_ubyte_R8G8B8A8_SRGB;
-      table[MESA_FORMAT_L_SRGB8] = pack_ubyte_L_SRGB8;
-      table[MESA_FORMAT_L8A8_SRGB] = pack_ubyte_L8A8_SRGB;
-      table[MESA_FORMAT_A8L8_SRGB] = pack_ubyte_A8L8_SRGB;
-      /* n/a */
-      table[MESA_FORMAT_SRGB_DXT1] = NULL; /* pack_ubyte_SRGB_DXT1; */
-      table[MESA_FORMAT_SRGBA_DXT1] = NULL; /* pack_ubyte_SRGBA_DXT1; */
-      table[MESA_FORMAT_SRGBA_DXT3] = NULL; /* pack_ubyte_SRGBA_DXT3; */
-      table[MESA_FORMAT_SRGBA_DXT5] = NULL; /* pack_ubyte_SRGBA_DXT5; */
-
-      table[MESA_FORMAT_RGB_FXT1] = NULL; /* pack_ubyte_RGB_FXT1; */
-      table[MESA_FORMAT_RGBA_FXT1] = NULL; /* pack_ubyte_RGBA_FXT1; */
-      table[MESA_FORMAT_RGB_DXT1] = NULL; /* pack_ubyte_RGB_DXT1; */
-      table[MESA_FORMAT_RGBA_DXT1] = NULL; /* pack_ubyte_RGBA_DXT1; */
-      table[MESA_FORMAT_RGBA_DXT3] = NULL; /* pack_ubyte_RGBA_DXT3; */
-      table[MESA_FORMAT_RGBA_DXT5] = NULL; /* pack_ubyte_RGBA_DXT5; */
-
-      table[MESA_FORMAT_RGBA_FLOAT32] = pack_ubyte_RGBA_FLOAT32;
-      table[MESA_FORMAT_RGBA_FLOAT16] = pack_ubyte_RGBA_FLOAT16;
-      table[MESA_FORMAT_RGB_FLOAT32] = pack_ubyte_RGB_FLOAT32;
-      table[MESA_FORMAT_RGB_FLOAT16] = pack_ubyte_RGB_FLOAT16;
-      table[MESA_FORMAT_A_FLOAT32] = pack_ubyte_A_FLOAT32;
-      table[MESA_FORMAT_A_FLOAT16] = pack_ubyte_A_FLOAT16;
-      table[MESA_FORMAT_L_FLOAT32] = pack_ubyte_L_FLOAT32;
-      table[MESA_FORMAT_L_FLOAT16] = pack_ubyte_L_FLOAT16;
-      table[MESA_FORMAT_LA_FLOAT32] = pack_ubyte_LA_FLOAT32;
-      table[MESA_FORMAT_LA_FLOAT16] = pack_ubyte_LA_FLOAT16;
-      table[MESA_FORMAT_I_FLOAT32] = pack_ubyte_L_FLOAT32;
-      table[MESA_FORMAT_I_FLOAT16] = pack_ubyte_L_FLOAT16;
-      table[MESA_FORMAT_R_FLOAT32] = pack_ubyte_L_FLOAT32;
-      table[MESA_FORMAT_R_FLOAT16] = pack_ubyte_L_FLOAT16;
-      table[MESA_FORMAT_RG_FLOAT32] = pack_ubyte_RG_FLOAT32;
-      table[MESA_FORMAT_RG_FLOAT16] = pack_ubyte_RG_FLOAT16;
-
-      /* n/a */
-      table[MESA_FORMAT_RGBA_SINT8] = NULL; /* pack_ubyte_RGBA_INT8 */
-      table[MESA_FORMAT_RGBA_SINT16] = NULL; /* pack_ubyte_RGBA_INT16 */
-      table[MESA_FORMAT_RGBA_SINT32] = NULL; /* pack_ubyte_RGBA_INT32 */
-      table[MESA_FORMAT_RGBA_UINT8] = NULL; /* pack_ubyte_RGBA_UINT8 */
-      table[MESA_FORMAT_RGBA_UINT16] = NULL; /* pack_ubyte_RGBA_UINT16 */
-      table[MESA_FORMAT_RGBA_UINT32] = NULL; /* pack_ubyte_RGBA_UINT32 */
-
-      table[MESA_FORMAT_RGBA_UNORM16] = pack_ubyte_RGBA_16;
-
-      /* n/a */
-      table[MESA_FORMAT_R_SNORM8] = NULL;
-      table[MESA_FORMAT_R8G8_SNORM] = NULL;
-      table[MESA_FORMAT_X8B8G8R8_SNORM] = NULL;
-      table[MESA_FORMAT_A8B8G8R8_SNORM] = NULL;
-      table[MESA_FORMAT_R8G8B8A8_SNORM] = NULL;
-      table[MESA_FORMAT_R_SNORM16] = NULL;
-      table[MESA_FORMAT_R16G16_SNORM] = NULL;
-      table[MESA_FORMAT_RGB_SNORM16] = NULL;
-      table[MESA_FORMAT_RGBA_SNORM16] = NULL;
-      table[MESA_FORMAT_A_SNORM8] = NULL;
-      table[MESA_FORMAT_L_SNORM8] = NULL;
-      table[MESA_FORMAT_L8A8_SNORM] = NULL;
-      table[MESA_FORMAT_A8L8_SNORM] = NULL;
-      table[MESA_FORMAT_I_SNORM8] = NULL;
-      table[MESA_FORMAT_A_SNORM16] = NULL;
-      table[MESA_FORMAT_L_SNORM16] = NULL;
-      table[MESA_FORMAT_LA_SNORM16] = NULL;
-      table[MESA_FORMAT_I_SNORM16] = NULL;
-
-
-      table[MESA_FORMAT_RGBA_UNORM16] = pack_ubyte_RGBA_16;
-
-      table[MESA_FORMAT_R9G9B9E5_FLOAT] = pack_ubyte_R9G9B9E5_FLOAT;
-      table[MESA_FORMAT_R11G11B10_FLOAT] = pack_ubyte_R11G11B10_FLOAT;
-
-      table[MESA_FORMAT_B4G4R4X4_UNORM] = pack_ubyte_XRGB4444_UNORM;
-      table[MESA_FORMAT_B5G5R5X1_UNORM] = pack_ubyte_XRGB1555_UNORM;
-      table[MESA_FORMAT_R8G8B8X8_SNORM] = NULL;
-      table[MESA_FORMAT_R8G8B8X8_SRGB] = NULL;
-      table[MESA_FORMAT_X8B8G8R8_SRGB] = NULL;
-      table[MESA_FORMAT_RGBX_UINT8] = NULL;
-      table[MESA_FORMAT_RGBX_SINT8] = NULL;
-      table[MESA_FORMAT_B10G10R10X2_UNORM] = pack_ubyte_B10G10R10X2_UNORM;
-      table[MESA_FORMAT_RGBX_UNORM16] = pack_ubyte_RGBX_UNORM16;
-      table[MESA_FORMAT_RGBX_SNORM16] = NULL;
-      table[MESA_FORMAT_RGBX_FLOAT16] = NULL;
-      table[MESA_FORMAT_RGBX_UINT16] = NULL;
-      table[MESA_FORMAT_RGBX_SINT16] = NULL;
-      table[MESA_FORMAT_RGBX_FLOAT32] = NULL;
-      table[MESA_FORMAT_RGBX_UINT32] = NULL;
-      table[MESA_FORMAT_RGBX_SINT32] = NULL;
-
-      table[MESA_FORMAT_R10G10B10A2_UNORM] = pack_ubyte_R10G10B10A2_UNORM;
-
-      table[MESA_FORMAT_B8G8R8X8_SRGB] = NULL;
-      table[MESA_FORMAT_X8R8G8B8_SRGB] = NULL;
-
-      initialized = GL_TRUE;
-   }
-
-   return table[format];
-}
-
-
-
-/**
- * Return a function that can pack a GLfloat rgba[4] color.
- */
-gl_pack_float_rgba_func
-_mesa_get_pack_float_rgba_function(mesa_format format)
-{
-   static gl_pack_float_rgba_func table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      memset(table, 0, sizeof(table));
-
-      table[MESA_FORMAT_NONE] = NULL;
-
-      table[MESA_FORMAT_A8B8G8R8_UNORM] = pack_float_A8B8G8R8_UNORM;
-      table[MESA_FORMAT_R8G8B8A8_UNORM] = pack_float_R8G8B8A8_UNORM;
-      table[MESA_FORMAT_B8G8R8A8_UNORM] = pack_float_B8G8R8A8_UNORM;
-      table[MESA_FORMAT_A8R8G8B8_UNORM] = pack_float_A8R8G8B8_UNORM;
-      table[MESA_FORMAT_X8B8G8R8_UNORM] = pack_float_A8B8G8R8_UNORM; /* reused */
-      table[MESA_FORMAT_R8G8B8X8_UNORM] = pack_float_R8G8B8A8_UNORM; /* reused */
-      table[MESA_FORMAT_B8G8R8X8_UNORM] = pack_float_B8G8R8X8_UNORM;
-      table[MESA_FORMAT_X8R8G8B8_UNORM] = pack_float_X8R8G8B8_UNORM;
-      table[MESA_FORMAT_BGR_UNORM8] = pack_float_BGR_UNORM8;
-      table[MESA_FORMAT_RGB_UNORM8] = pack_float_RGB_UNORM8;
-      table[MESA_FORMAT_B5G6R5_UNORM] = pack_float_B5G6R5_UNORM;
-      table[MESA_FORMAT_R5G6B5_UNORM] = pack_float_R5G6B5_UNORM;
-      table[MESA_FORMAT_B4G4R4A4_UNORM] = pack_float_B4G4R4A4_UNORM;
-      table[MESA_FORMAT_A4R4G4B4_UNORM] = pack_float_A4R4G4B4_UNORM;
-      table[MESA_FORMAT_A1B5G5R5_UNORM] = pack_float_A1B5G5R5_UNORM;
-      table[MESA_FORMAT_B5G5R5A1_UNORM] = pack_float_B5G5R5A1_UNORM;
-      table[MESA_FORMAT_A1R5G5B5_UNORM] = pack_float_A1R5G5B5_UNORM;
-
-      table[MESA_FORMAT_L4A4_UNORM] = pack_float_L4A4_UNORM;
-      table[MESA_FORMAT_L8A8_UNORM] = pack_float_L8A8_UNORM;
-      table[MESA_FORMAT_A8L8_UNORM] = pack_float_A8L8_UNORM;
-      table[MESA_FORMAT_L16A16_UNORM] = pack_float_L16A16_UNORM;
-      table[MESA_FORMAT_A16L16_UNORM] = pack_float_A16L16_UNORM;
-      table[MESA_FORMAT_B2G3R3_UNORM] = pack_float_B2G3R3_UNORM;
-      table[MESA_FORMAT_A_UNORM8] = pack_float_A_UNORM8;
-      table[MESA_FORMAT_A_UNORM16] = pack_float_A_UNORM16;
-      table[MESA_FORMAT_L_UNORM8] = pack_float_L_UNORM8;
-      table[MESA_FORMAT_L_UNORM16] = pack_float_L_UNORM16;
-      table[MESA_FORMAT_I_UNORM8] = pack_float_L_UNORM8; /* reuse pack_float_L_UNORM8 */
-      table[MESA_FORMAT_I_UNORM16] = pack_float_L_UNORM16; /* reuse pack_float_L_UNORM16 */
-      table[MESA_FORMAT_YCBCR] = pack_float_YCBCR;
-      table[MESA_FORMAT_YCBCR_REV] = pack_float_YCBCR_REV;
-      table[MESA_FORMAT_R_UNORM8] = pack_float_R_UNORM8;
-      table[MESA_FORMAT_R8G8_UNORM] = pack_float_R8G8_UNORM;
-      table[MESA_FORMAT_G8R8_UNORM] = pack_float_G8R8_UNORM;
-      table[MESA_FORMAT_R_UNORM16] = pack_float_R_UNORM16;
-      table[MESA_FORMAT_R16G16_UNORM] = pack_float_R16G16_UNORM;
-      table[MESA_FORMAT_G16R16_UNORM] = pack_float_G16R16_UNORM;
-      table[MESA_FORMAT_B10G10R10A2_UNORM] = pack_float_B10G10R10A2_UNORM;
-      table[MESA_FORMAT_R10G10B10A2_UINT] = pack_float_R10G10B10A2_UINT;
-
-      /* should never convert RGBA to these formats */
-      table[MESA_FORMAT_S8_UINT_Z24_UNORM] = NULL;
-      table[MESA_FORMAT_Z24_UNORM_S8_UINT] = NULL;
-      table[MESA_FORMAT_Z_UNORM16] = NULL;
-      table[MESA_FORMAT_Z24_UNORM_X8_UINT] = NULL;
-      table[MESA_FORMAT_X8_UINT_Z24_UNORM] = NULL;
-      table[MESA_FORMAT_Z_UNORM32] = NULL;
-      table[MESA_FORMAT_S_UINT8] = NULL;
-
-      table[MESA_FORMAT_BGR_SRGB8] = pack_float_BGR_SRGB8;
-      table[MESA_FORMAT_A8B8G8R8_SRGB] = pack_float_A8B8G8R8_SRGB;
-      table[MESA_FORMAT_B8G8R8A8_SRGB] = pack_float_B8G8R8A8_SRGB;
-      table[MESA_FORMAT_A8R8G8B8_SRGB] = pack_float_A8R8G8B8_SRGB;
-      table[MESA_FORMAT_R8G8B8A8_SRGB] = pack_float_R8G8B8A8_SRGB;
-      table[MESA_FORMAT_L_SRGB8] = pack_float_L_SRGB8;
-      table[MESA_FORMAT_L8A8_SRGB] = pack_float_L8A8_SRGB;
-      table[MESA_FORMAT_A8L8_SRGB] = pack_float_A8L8_SRGB;
-
-      /* n/a */
-      table[MESA_FORMAT_SRGB_DXT1] = NULL;
-      table[MESA_FORMAT_SRGBA_DXT1] = NULL;
-      table[MESA_FORMAT_SRGBA_DXT3] = NULL;
-      table[MESA_FORMAT_SRGBA_DXT5] = NULL;
-
-      table[MESA_FORMAT_RGB_FXT1] = NULL;
-      table[MESA_FORMAT_RGBA_FXT1] = NULL;
-      table[MESA_FORMAT_RGB_DXT1] = NULL;
-      table[MESA_FORMAT_RGBA_DXT1] = NULL;
-      table[MESA_FORMAT_RGBA_DXT3] = NULL;
-      table[MESA_FORMAT_RGBA_DXT5] = NULL;
-
-      table[MESA_FORMAT_RGBA_FLOAT32] = pack_float_RGBA_FLOAT32;
-      table[MESA_FORMAT_RGBA_FLOAT16] = pack_float_RGBA_FLOAT16;
-      table[MESA_FORMAT_RGB_FLOAT32] = pack_float_RGB_FLOAT32;
-      table[MESA_FORMAT_RGB_FLOAT16] = pack_float_RGB_FLOAT16;
-      table[MESA_FORMAT_A_FLOAT32] = pack_float_A_FLOAT32;
-      table[MESA_FORMAT_A_FLOAT16] = pack_float_A_FLOAT16;
-      table[MESA_FORMAT_L_FLOAT32] = pack_float_L_FLOAT32;
-      table[MESA_FORMAT_L_FLOAT16] = pack_float_L_FLOAT16;
-      table[MESA_FORMAT_LA_FLOAT32] = pack_float_LA_FLOAT32;
-      table[MESA_FORMAT_LA_FLOAT16] = pack_float_LA_FLOAT16;
-
-      table[MESA_FORMAT_I_FLOAT32] = pack_float_L_FLOAT32;
-      table[MESA_FORMAT_I_FLOAT16] = pack_float_L_FLOAT16;
-      table[MESA_FORMAT_R_FLOAT32] = pack_float_L_FLOAT32;
-      table[MESA_FORMAT_R_FLOAT16] = pack_float_L_FLOAT16;
-      table[MESA_FORMAT_RG_FLOAT32] = pack_float_RG_FLOAT32;
-      table[MESA_FORMAT_RG_FLOAT16] = pack_float_RG_FLOAT16;
-
-      /* n/a */
-      table[MESA_FORMAT_RGBA_SINT8] = NULL;
-      table[MESA_FORMAT_RGBA_SINT16] = NULL;
-      table[MESA_FORMAT_RGBA_SINT32] = NULL;
-      table[MESA_FORMAT_RGBA_UINT8] = NULL;
-      table[MESA_FORMAT_RGBA_UINT16] = NULL;
-      table[MESA_FORMAT_RGBA_UINT32] = NULL;
-
-      table[MESA_FORMAT_RGBA_UNORM16] = pack_float_RGBA_16;
-
-      table[MESA_FORMAT_R_SNORM8] = pack_float_R_SNORM8;
-      table[MESA_FORMAT_R8G8_SNORM] = pack_float_R8G8_SNORM;
-      table[MESA_FORMAT_X8B8G8R8_SNORM] = pack_float_X8B8G8R8_SNORM;
-      table[MESA_FORMAT_A8B8G8R8_SNORM] = pack_float_A8B8G8R8_SNORM;
-      table[MESA_FORMAT_R8G8B8A8_SNORM] = pack_float_R8G8B8A8_SNORM;
-      table[MESA_FORMAT_R_SNORM16] = pack_float_R_SNORM16;
-      table[MESA_FORMAT_R16G16_SNORM] = pack_float_R16G16_SNORM;
-      table[MESA_FORMAT_RGB_SNORM16] = pack_float_RGB_SNORM16;
-      table[MESA_FORMAT_RGBA_SNORM16] = pack_float_RGBA_SNORM16;
-      table[MESA_FORMAT_A_SNORM8] = pack_float_A_SNORM8;
-      table[MESA_FORMAT_L_SNORM8] = pack_float_L_SNORM8;
-      table[MESA_FORMAT_L8A8_SNORM] = pack_float_L8A8_SNORM;
-      table[MESA_FORMAT_A8L8_SNORM] = pack_float_A8L8_SNORM;
-      table[MESA_FORMAT_I_SNORM8] = pack_float_L_SNORM8; /* reused */
-      table[MESA_FORMAT_A_SNORM16] = pack_float_A_SNORM16;
-      table[MESA_FORMAT_L_SNORM16] = pack_float_L_SNORM16;
-      table[MESA_FORMAT_LA_SNORM16] = pack_float_LA_SNORM16;
-      table[MESA_FORMAT_I_SNORM16] = pack_float_L_SNORM16; /* reused */
-
-      table[MESA_FORMAT_R9G9B9E5_FLOAT] = pack_float_R9G9B9E5_FLOAT;
-      table[MESA_FORMAT_R11G11B10_FLOAT] = pack_float_R11G11B10_FLOAT;
-
-      table[MESA_FORMAT_B4G4R4X4_UNORM] = pack_float_XRGB4444_UNORM;
-      table[MESA_FORMAT_B5G5R5X1_UNORM] = pack_float_XRGB1555_UNORM;
-      table[MESA_FORMAT_R8G8B8X8_SNORM] = pack_float_XBGR8888_SNORM;
-      table[MESA_FORMAT_R8G8B8X8_SRGB] = pack_float_R8G8B8X8_SRGB;
-      table[MESA_FORMAT_X8B8G8R8_SRGB] = pack_float_X8B8G8R8_SRGB;
-      table[MESA_FORMAT_RGBX_UINT8] = NULL;
-      table[MESA_FORMAT_RGBX_SINT8] = NULL;
-      table[MESA_FORMAT_B10G10R10X2_UNORM] = pack_float_B10G10R10X2_UNORM;
-      table[MESA_FORMAT_RGBX_UNORM16] = pack_float_RGBX_UNORM16;
-      table[MESA_FORMAT_RGBX_SNORM16] = pack_float_RGBX_SNORM16;
-      table[MESA_FORMAT_RGBX_FLOAT16] = pack_float_XBGR16161616_FLOAT;
-      table[MESA_FORMAT_RGBX_UINT16] = NULL;
-      table[MESA_FORMAT_RGBX_SINT16] = NULL;
-      table[MESA_FORMAT_RGBX_FLOAT32] = pack_float_RGBX_FLOAT32;
-      table[MESA_FORMAT_RGBX_UINT32] = NULL;
-      table[MESA_FORMAT_RGBX_SINT32] = NULL;
-
-      table[MESA_FORMAT_R10G10B10A2_UNORM] = pack_float_R10G10B10A2_UNORM;
-
-      table[MESA_FORMAT_G8R8_SNORM] = pack_float_G8R8_SNORM;
-      table[MESA_FORMAT_G16R16_SNORM] = pack_float_G16R16_SNORM;
-
-      table[MESA_FORMAT_B8G8R8X8_SRGB] = pack_float_B8G8R8X8_SRGB;
-      table[MESA_FORMAT_X8R8G8B8_SRGB] = pack_float_X8R8G8B8_SRGB;
-
-      initialized = GL_TRUE;
-   }
-
-   return table[format];
-}
-
-
-
-static pack_float_rgba_row_func
-get_pack_float_rgba_row_function(mesa_format format)
-{
-   static pack_float_rgba_row_func table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      /* We don't need a special row packing function for each format.
-       * There's a generic fallback which uses a per-pixel packing function.
-       */
-      memset(table, 0, sizeof(table));
-
-      table[MESA_FORMAT_A8B8G8R8_UNORM] = pack_row_float_A8B8G8R8_UNORM;
-      table[MESA_FORMAT_R8G8B8A8_UNORM] = pack_row_float_R8G8B8A8_UNORM;
-      table[MESA_FORMAT_B8G8R8A8_UNORM] = pack_row_float_B8G8R8A8_UNORM;
-      table[MESA_FORMAT_A8R8G8B8_UNORM] = pack_row_float_A8R8G8B8_UNORM;
-      table[MESA_FORMAT_X8B8G8R8_UNORM] = pack_row_float_A8B8G8R8_UNORM; /* reused */
-      table[MESA_FORMAT_R8G8B8X8_UNORM] = pack_row_float_R8G8B8A8_UNORM; /* reused */
-      table[MESA_FORMAT_B8G8R8X8_UNORM] = pack_row_float_B8G8R8X8_UNORM;
-      table[MESA_FORMAT_X8R8G8B8_UNORM] = pack_row_float_X8R8G8B8_UNORM;
-      table[MESA_FORMAT_BGR_UNORM8] = pack_row_float_BGR_UNORM8;
-      table[MESA_FORMAT_RGB_UNORM8] = pack_row_float_RGB_UNORM8;
-      table[MESA_FORMAT_B5G6R5_UNORM] = pack_row_float_B5G6R5_UNORM;
-      table[MESA_FORMAT_R5G6B5_UNORM] = pack_row_float_R5G6B5_UNORM;
-
-      initialized = GL_TRUE;
-   }
-
-   return table[format];
-}
-
-
-
-static pack_ubyte_rgba_row_func
-get_pack_ubyte_rgba_row_function(mesa_format format)
-{
-   static pack_ubyte_rgba_row_func table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      /* We don't need a special row packing function for each format.
-       * There's a generic fallback which uses a per-pixel packing function.
-       */
-      memset(table, 0, sizeof(table));
-
-      table[MESA_FORMAT_A8B8G8R8_UNORM] = pack_row_ubyte_A8B8G8R8_UNORM;
-      table[MESA_FORMAT_R8G8B8A8_UNORM] = pack_row_ubyte_R8G8B8A8_UNORM;
-      table[MESA_FORMAT_B8G8R8A8_UNORM] = pack_row_ubyte_B8G8R8A8_UNORM;
-      table[MESA_FORMAT_A8R8G8B8_UNORM] = pack_row_ubyte_A8R8G8B8_UNORM;
-      table[MESA_FORMAT_X8B8G8R8_UNORM] = pack_row_ubyte_A8B8G8R8_UNORM; /* reused */
-      table[MESA_FORMAT_R8G8B8X8_UNORM] = pack_row_ubyte_R8G8B8A8_UNORM; /* reused */
-      table[MESA_FORMAT_B8G8R8X8_UNORM] = pack_row_ubyte_B8G8R8X8_UNORM;
-      table[MESA_FORMAT_X8R8G8B8_UNORM] = pack_row_ubyte_X8R8G8B8_UNORM;
-      table[MESA_FORMAT_BGR_UNORM8] = pack_row_ubyte_BGR_UNORM8;
-      table[MESA_FORMAT_RGB_UNORM8] = pack_row_ubyte_RGB_UNORM8;
-      table[MESA_FORMAT_B5G6R5_UNORM] = pack_row_ubyte_B5G6R5_UNORM;
-      table[MESA_FORMAT_R5G6B5_UNORM] = pack_row_ubyte_R5G6B5_UNORM;
-
-      initialized = GL_TRUE;
-   }
-
-   return table[format];
-}
-
-
-
-/**
- * Pack a row of GLfloat rgba[4] values to the destination.
- */
-void
-_mesa_pack_float_rgba_row(mesa_format format, GLuint n,
-                          const GLfloat src[][4], void *dst)
-{
-   pack_float_rgba_row_func packrow = get_pack_float_rgba_row_function(format);
-   if (packrow) {
-      /* use "fast" function */
-      packrow(n, src, dst);
-   }
-   else {
-      /* slower fallback */
-      gl_pack_float_rgba_func pack = _mesa_get_pack_float_rgba_function(format);
-      GLuint dstStride = _mesa_get_format_bytes(format);
-      GLubyte *dstPtr = (GLubyte *) dst;
-      GLuint i;
-
-      assert(pack);
-      if (!pack)
-         return;
-
-      for (i = 0; i < n; i++) {
-         pack(src[i], dstPtr);
-         dstPtr += dstStride;
-      }
-   }
-}
-
-
-/**
- * Pack a row of GLubyte rgba[4] values to the destination.
- */
-void
-_mesa_pack_ubyte_rgba_row(mesa_format format, GLuint n,
-                          const GLubyte src[][4], void *dst)
-{
-   pack_ubyte_rgba_row_func packrow = get_pack_ubyte_rgba_row_function(format);
-   if (packrow) {
-      /* use "fast" function */
-      packrow(n, src, dst);
-   }
-   else {
-      /* slower fallback */
-      gl_pack_ubyte_rgba_func pack = _mesa_get_pack_ubyte_rgba_function(format);
-      const GLuint stride = _mesa_get_format_bytes(format);
-      GLubyte *d = ((GLubyte *) dst);
-      GLuint i;
-
-      assert(pack);
-      if (!pack)
-         return;
-
-      for (i = 0; i < n; i++) {
-         pack(src[i], d);
-         d += stride;
-      }
-   }
-}
-
-
-/**
- * Pack a 2D image of ubyte RGBA pixels in the given format.
- * \param srcRowStride  source image row stride in bytes
- * \param dstRowStride  destination image row stride in bytes
- */
-void
-_mesa_pack_ubyte_rgba_rect(mesa_format format, GLuint width, GLuint height,
-                           const GLubyte *src, GLint srcRowStride,
-                           void *dst, GLint dstRowStride)
-{
-   pack_ubyte_rgba_row_func packrow = get_pack_ubyte_rgba_row_function(format);
-   GLubyte *dstUB = (GLubyte *) dst;
-   GLuint i;
-
-   if (packrow) {
-      if (srcRowStride == width * 4 * sizeof(GLubyte) &&
-          dstRowStride == _mesa_format_row_stride(format, width)) {
-         /* do whole image at once */
-         packrow(width * height, (const GLubyte (*)[4]) src, dst);
-      }
-      else {
-         /* row by row */
-         for (i = 0; i < height; i++) {
-            packrow(width, (const GLubyte (*)[4]) src, dstUB);
-            src += srcRowStride;
-            dstUB += dstRowStride;
-         }
-      }
-   }
-   else {
-      /* slower fallback */
-      for (i = 0; i < height; i++) {
-         _mesa_pack_ubyte_rgba_row(format, width,
-                                   (const GLubyte (*)[4]) src, dstUB);
-         src += srcRowStride;
-         dstUB += dstRowStride;
-      }
-   }
-}
-
-
-
-/**
- ** Pack float Z pixels
- **/
-
-static void
-pack_float_S8_UINT_Z24_UNORM(const GLfloat *src, void *dst)
-{
-   /* don't disturb the stencil values */
-   GLuint *d = ((GLuint *) dst);
-   const GLdouble scale = (GLdouble) 0xffffff;
-   GLuint s = *d & 0xff;
-   GLuint z = (GLuint) (*src * scale);
-   assert(z <= 0xffffff);
-   *d = (z << 8) | s;
-}
-
-static void
-pack_float_Z24_UNORM_S8_UINT(const GLfloat *src, void *dst)
-{
-   /* don't disturb the stencil values */
-   GLuint *d = ((GLuint *) dst);
-   const GLdouble scale = (GLdouble) 0xffffff;
-   GLuint s = *d & 0xff000000;
-   GLuint z = (GLuint) (*src * scale);
-   assert(z <= 0xffffff);
-   *d = s | z;
-}
-
-static void
-pack_float_Z_UNORM16(const GLfloat *src, void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   const GLfloat scale = (GLfloat) 0xffff;
-   *d = (GLushort) (*src * scale);
-}
-
-static void
-pack_float_Z_UNORM32(const GLfloat *src, void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   const GLdouble scale = (GLdouble) 0xffffffff;
-   *d = (GLuint) (*src * scale);
-}
-
-static void
-pack_float_Z_FLOAT32(const GLfloat *src, void *dst)
-{
-   GLfloat *d = (GLfloat *) dst;
-   *d = *src;
-}
-
-gl_pack_float_z_func
-_mesa_get_pack_float_z_func(mesa_format format)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      return pack_float_S8_UINT_Z24_UNORM;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      return pack_float_Z24_UNORM_S8_UINT;
-   case MESA_FORMAT_Z_UNORM16:
-      return pack_float_Z_UNORM16;
-   case MESA_FORMAT_Z_UNORM32:
-      return pack_float_Z_UNORM32;
-   case MESA_FORMAT_Z_FLOAT32:
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return pack_float_Z_FLOAT32;
-   default:
-      _mesa_problem(NULL,
-                    "unexpected format in _mesa_get_pack_float_z_func()");
-      return NULL;
-   }
-}
-
-
-
-/**
- ** Pack uint Z pixels.  The incoming src value is always in
- ** the range [0, 2^32-1].
- **/
-
-static void
-pack_uint_S8_UINT_Z24_UNORM(const GLuint *src, void *dst)
-{
-   /* don't disturb the stencil values */
-   GLuint *d = ((GLuint *) dst);
-   GLuint s = *d & 0xff;
-   GLuint z = *src & 0xffffff00;
-   *d = z | s;
-}
-
-static void
-pack_uint_Z24_UNORM_S8_UINT(const GLuint *src, void *dst)
-{
-   /* don't disturb the stencil values */
-   GLuint *d = ((GLuint *) dst);
-   GLuint s = *d & 0xff000000;
-   GLuint z = *src >> 8;
-   *d = s | z;
-}
-
-static void
-pack_uint_Z_UNORM16(const GLuint *src, void *dst)
-{
-   GLushort *d = ((GLushort *) dst);
-   *d = *src >> 16;
-}
-
-static void
-pack_uint_Z_UNORM32(const GLuint *src, void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   *d = *src;
-}
-
-static void
-pack_uint_Z_FLOAT32(const GLuint *src, void *dst)
-{
-   GLuint *d = ((GLuint *) dst);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
-   *d = (GLuint) (*src * scale);
-   assert(*d >= 0.0f);
-   assert(*d <= 1.0f);
-}
-
-static void
-pack_uint_Z_FLOAT32_X24S8(const GLuint *src, void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
-   *d = (GLfloat) (*src * scale);
-   assert(*d >= 0.0f);
-   assert(*d <= 1.0f);
-}
-
-gl_pack_uint_z_func
-_mesa_get_pack_uint_z_func(mesa_format format)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      return pack_uint_S8_UINT_Z24_UNORM;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      return pack_uint_Z24_UNORM_S8_UINT;
-   case MESA_FORMAT_Z_UNORM16:
-      return pack_uint_Z_UNORM16;
-   case MESA_FORMAT_Z_UNORM32:
-      return pack_uint_Z_UNORM32;
-   case MESA_FORMAT_Z_FLOAT32:
-      return pack_uint_Z_FLOAT32;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return pack_uint_Z_FLOAT32_X24S8;
-   default:
-      _mesa_problem(NULL, "unexpected format in _mesa_get_pack_uint_z_func()");
-      return NULL;
-   }
-}
-
-
-/**
- ** Pack ubyte stencil pixels
- **/
-
-static void
-pack_ubyte_stencil_Z24_S8(const GLubyte *src, void *dst)
-{
-   /* don't disturb the Z values */
-   GLuint *d = ((GLuint *) dst);
-   GLuint s = *src;
-   GLuint z = *d & 0xffffff00;
-   *d = z | s;
-}
-
-static void
-pack_ubyte_stencil_S8_Z24(const GLubyte *src, void *dst)
-{
-   /* don't disturb the Z values */
-   GLuint *d = ((GLuint *) dst);
-   GLuint s = *src << 24;
-   GLuint z = *d & 0xffffff;
-   *d = s | z;
-}
-
-static void
-pack_ubyte_stencil_S8(const GLubyte *src, void *dst)
-{
-   GLubyte *d = (GLubyte *) dst;
-   *d = *src;
-}
-
-static void
-pack_ubyte_stencil_Z32_FLOAT_X24S8(const GLubyte *src, void *dst)
-{
-   GLfloat *d = ((GLfloat *) dst);
-   d[1] = *src;
-}
-
-
-gl_pack_ubyte_stencil_func
-_mesa_get_pack_ubyte_stencil_func(mesa_format format)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      return pack_ubyte_stencil_Z24_S8;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      return pack_ubyte_stencil_S8_Z24;
-   case MESA_FORMAT_S_UINT8:
-      return pack_ubyte_stencil_S8;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      return pack_ubyte_stencil_Z32_FLOAT_X24S8;
-   default:
-      _mesa_problem(NULL,
-                    "unexpected format in _mesa_pack_ubyte_stencil_func()");
-      return NULL;
-   }
-}
-
-
-
-void
-_mesa_pack_float_z_row(mesa_format format, GLuint n,
-                       const GLfloat *src, void *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      {
-         /* don't disturb the stencil values */
-         GLuint *d = ((GLuint *) dst);
-         const GLdouble scale = (GLdouble) 0xffffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = d[i] & 0xff;
-            GLuint z = (GLuint) (src[i] * scale);
-            assert(z <= 0xffffff);
-            d[i] = (z << 8) | s;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      {
-         /* don't disturb the stencil values */
-         GLuint *d = ((GLuint *) dst);
-         const GLdouble scale = (GLdouble) 0xffffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = d[i] & 0xff000000;
-            GLuint z = (GLuint) (src[i] * scale);
-            assert(z <= 0xffffff);
-            d[i] = s | z;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z_UNORM16:
-      {
-         GLushort *d = ((GLushort *) dst);
-         const GLfloat scale = (GLfloat) 0xffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i] = (GLushort) (src[i] * scale);
-         }
-      }
-      break;
-   case MESA_FORMAT_Z_UNORM32:
-      {
-         GLuint *d = ((GLuint *) dst);
-         const GLdouble scale = (GLdouble) 0xffffffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i] = (GLuint) (src[i] * scale);
-         }
-      }
-      break;
-   case MESA_FORMAT_Z_FLOAT32:
-      memcpy(dst, src, n * sizeof(GLfloat));
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      {
-         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i].z = src[i];
-         }
-      }
-      break;
-   default:
-      _mesa_problem(NULL, "unexpected format in _mesa_pack_float_z_row()");
-   }
-}
-
-
-/**
- * The incoming Z values are always in the range [0, 0xffffffff].
- */
-void
-_mesa_pack_uint_z_row(mesa_format format, GLuint n,
-                      const GLuint *src, void *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      {
-         /* don't disturb the stencil values */
-         GLuint *d = ((GLuint *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = d[i] & 0xff;
-            GLuint z = src[i] & 0xffffff00;
-            d[i] = z | s;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      {
-         /* don't disturb the stencil values */
-         GLuint *d = ((GLuint *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = d[i] & 0xff000000;
-            GLuint z = src[i] >> 8;
-            d[i] = s | z;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z_UNORM16:
-      {
-         GLushort *d = ((GLushort *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i] = src[i] >> 16;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z_UNORM32:
-      memcpy(dst, src, n * sizeof(GLfloat));
-      break;
-   case MESA_FORMAT_Z_FLOAT32:
-      {
-         GLuint *d = ((GLuint *) dst);
-         const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i] = (GLuint) (src[i] * scale);
-            assert(d[i] >= 0.0f);
-            assert(d[i] <= 1.0f);
-         }
-      }
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      {
-         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-         const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i].z = (GLfloat) (src[i] * scale);
-            assert(d[i].z >= 0.0f);
-            assert(d[i].z <= 1.0f);
-         }
-      }
-      break;
-   default:
-      _mesa_problem(NULL, "unexpected format in _mesa_pack_uint_z_row()");
-   }
-}
-
-
-void
-_mesa_pack_ubyte_stencil_row(mesa_format format, GLuint n,
-                             const GLubyte *src, void *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      {
-         /* don't disturb the Z values */
-         GLuint *d = ((GLuint *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = src[i];
-            GLuint z = d[i] & 0xffffff00;
-            d[i] = z | s;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      {
-         /* don't disturb the Z values */
-         GLuint *d = ((GLuint *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = src[i] << 24;
-            GLuint z = d[i] & 0xffffff;
-            d[i] = s | z;
-         }
-      }
-      break;
-   case MESA_FORMAT_S_UINT8:
-      memcpy(dst, src, n * sizeof(GLubyte));
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      {
-         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            d[i].x24s8 = src[i];
-         }
-      }
-      break;
-   default:
-      _mesa_problem(NULL, "unexpected format in _mesa_pack_ubyte_stencil_row()");
-   }
-}
-
-
-/**
- * Incoming Z/stencil values are always in uint_24_8 format.
- */
-void
-_mesa_pack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
-                                       const GLuint *src, void *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      memcpy(dst, src, n * sizeof(GLuint));
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      {
-         GLuint *d = ((GLuint *) dst);
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLuint s = src[i] << 24;
-            GLuint z = src[i] >> 8;
-            d[i] = s | z;
-         }
-      }
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      {
-         const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            GLfloat z = (GLfloat) ((src[i] >> 8) * scale);
-            d[i].z = z;
-            d[i].x24s8 = src[i];
-         }
-      }
-      break;
-   default:
-      _mesa_problem(NULL, "bad format %s in _mesa_pack_ubyte_s_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-}
-
-
-
-/**
- * Convert a boolean color mask to a packed color where each channel of
- * the packed value at dst will be 0 or ~0 depending on the colorMask.
- */
-void
-_mesa_pack_colormask(mesa_format format, const GLubyte colorMask[4], void *dst)
-{
-   GLfloat maskColor[4];
-
-   switch (_mesa_get_format_datatype(format)) {
-   case GL_UNSIGNED_NORMALIZED:
-      /* simple: 1.0 will convert to ~0 in the right bit positions */
-      maskColor[0] = colorMask[0] ? 1.0f : 0.0f;
-      maskColor[1] = colorMask[1] ? 1.0f : 0.0f;
-      maskColor[2] = colorMask[2] ? 1.0f : 0.0f;
-      maskColor[3] = colorMask[3] ? 1.0f : 0.0f;
-      _mesa_pack_float_rgba_row(format, 1,
-                                (const GLfloat (*)[4]) maskColor, dst);
-      break;
-   case GL_SIGNED_NORMALIZED:
-   case GL_FLOAT:
-      /* These formats are harder because it's hard to know the floating
-       * point values that will convert to ~0 for each color channel's bits.
-       * This solution just generates a non-zero value for each color channel
-       * then fixes up the non-zero values to be ~0.
-       * Note: we'll need to add special case code if we ever have to deal
-       * with formats with unequal color channel sizes, like R11_G11_B10.
-       * We issue a warning below for channel sizes other than 8,16,32.
-       */
-      {
-         GLuint bits = _mesa_get_format_max_bits(format); /* bits per chan */
-         GLuint bytes = _mesa_get_format_bytes(format);
-         GLuint i;
-
-         /* this should put non-zero values into the channels of dst */
-         maskColor[0] = colorMask[0] ? -1.0f : 0.0f;
-         maskColor[1] = colorMask[1] ? -1.0f : 0.0f;
-         maskColor[2] = colorMask[2] ? -1.0f : 0.0f;
-         maskColor[3] = colorMask[3] ? -1.0f : 0.0f;
-         _mesa_pack_float_rgba_row(format, 1,
-                                   (const GLfloat (*)[4]) maskColor, dst);
-
-         /* fix-up the dst channels by converting non-zero values to ~0 */
-         if (bits == 8) {
-            GLubyte *d = (GLubyte *) dst;
-            for (i = 0; i < bytes; i++) {
-               d[i] = d[i] ? 0xff : 0x0;
-            }
-         }
-         else if (bits == 16) {
-            GLushort *d = (GLushort *) dst;
-            for (i = 0; i < bytes / 2; i++) {
-               d[i] = d[i] ? 0xffff : 0x0;
-            }
-         }
-         else if (bits == 32) {
-            GLuint *d = (GLuint *) dst;
-            for (i = 0; i < bytes / 4; i++) {
-               d[i] = d[i] ? 0xffffffffU : 0x0;
-            }
-         }
-         else {
-            _mesa_problem(NULL, "unexpected size in _mesa_pack_colormask()");
-            return;
-         }
-      }
-      break;
-   default:
-      _mesa_problem(NULL, "unexpected format data type in gen_color_mask()");
-      return;
-   }
-}
diff --git a/mesalib/src/mesa/main/format_pack.h b/mesalib/src/mesa/main/format_pack.h
index 2577def41..aa7113e9b 100644
--- a/mesalib/src/mesa/main/format_pack.h
+++ b/mesalib/src/mesa/main/format_pack.h
@@ -68,7 +68,6 @@ extern gl_pack_ubyte_stencil_func
 _mesa_get_pack_ubyte_stencil_func(mesa_format format);
 
 
-
 extern void
 _mesa_pack_float_rgba_row(mesa_format format, GLuint n,
                           const GLfloat src[][4], void *dst);
@@ -77,6 +76,9 @@ extern void
 _mesa_pack_ubyte_rgba_row(mesa_format format, GLuint n,
                           const GLubyte src[][4], void *dst);
 
+extern void
+_mesa_pack_uint_rgba_row(mesa_format format, GLuint n,
+                         const GLuint src[][4], void *dst);
 
 extern void
 _mesa_pack_ubyte_rgba_rect(mesa_format format, GLuint width, GLuint height,
diff --git a/mesalib/src/mesa/main/format_pack.py b/mesalib/src/mesa/main/format_pack.py
new file mode 100644
index 000000000..f141da83c
--- /dev/null
+++ b/mesalib/src/mesa/main/format_pack.py
@@ -0,0 +1,1004 @@
+#!/usr/bin/env python
+
+from mako.template import Template
+from sys import argv
+
+string = """/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (c) 2011 VMware, Inc.
+ * Copyright (c) 2014 Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * Color, depth, stencil packing functions.
+ * Used to pack basic color, depth and stencil formats to specific
+ * hardware formats.
+ *
+ * There are both per-pixel and per-row packing functions:
+ * - The former will be used by swrast to write values to the color, depth,
+ *   stencil buffers when drawing points, lines and masked spans.
+ * - The later will be used for image-oriented functions like glDrawPixels,
+ *   glAccum, and glTexImage.
+ */
+
+#include <stdint.h>
+
+#include "colormac.h"
+#include "format_pack.h"
+#include "format_utils.h"
+#include "macros.h"
+#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
+#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
+#include "util/format_srgb.h"
+
+#define UNPACK(SRC, OFFSET, BITS) (((SRC) >> (OFFSET)) & MAX_UINT(BITS))
+#define PACK(SRC, OFFSET, BITS) (((SRC) & MAX_UINT(BITS)) << (OFFSET))
+
+<%
+import format_parser as parser
+
+formats = parser.parse(argv[1])
+
+rgb_formats = []
+for f in formats:
+   if f.name == 'MESA_FORMAT_NONE':
+      continue
+   if f.colorspace not in ('rgb', 'srgb'):
+      continue
+
+   rgb_formats.append(f)
+%>
+
+/* ubyte packing functions */
+
+%for f in rgb_formats:
+   %if f.name in ('MESA_FORMAT_R9G9B9E5_FLOAT', 'MESA_FORMAT_R11G11B10_FLOAT'):
+      <% continue %>
+   %elif f.is_compressed():
+      <% continue %>
+   %endif
+
+static inline void
+pack_ubyte_${f.short_name()}(const GLubyte src[4], void *dst)
+{
+   %for (i, c) in enumerate(f.channels):
+      <% i = f.swizzle.inverse()[i] %>
+      %if c.type == 'x':
+         <% continue %>
+      %endif
+
+      ${c.datatype()} ${c.name} =
+      %if not f.is_normalized() and f.is_int():
+          %if c.type == parser.SIGNED:
+              _mesa_unsigned_to_signed(src[${i}], ${c.size});
+          %else:
+              _mesa_unsigned_to_unsigned(src[${i}], ${c.size});
+          %endif
+      %elif c.type == parser.UNSIGNED:
+         %if f.colorspace == 'srgb' and c.name in 'rgb':
+            <% assert c.size == 8 %>
+            util_format_linear_to_srgb_8unorm(src[${i}]);
+         %else:
+            _mesa_unorm_to_unorm(src[${i}], 8, ${c.size});
+         %endif
+      %elif c.type == parser.SIGNED:
+         _mesa_unorm_to_snorm(src[${i}], 8, ${c.size});
+      %elif c.type == parser.FLOAT:
+         %if c.size == 32:
+            _mesa_unorm_to_float(src[${i}], 8);
+         %elif c.size == 16:
+            _mesa_unorm_to_half(src[${i}], 8);
+         %else:
+            <% assert False %>
+         %endif
+      %else:
+         <% assert False %>
+      %endif
+   %endfor
+
+   %if f.layout == parser.ARRAY:
+      ${f.datatype()} *d = (${f.datatype()} *)dst;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d[${i}] = ${c.name};
+      %endfor
+   %elif f.layout == parser.PACKED:
+      ${f.datatype()} d = 0;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d |= PACK(${c.name}, ${c.shift}, ${c.size});
+      %endfor
+      (*(${f.datatype()} *)dst) = d;
+   %else:
+      <% assert False %>
+   %endif
+}
+%endfor
+
+static inline void
+pack_ubyte_r9g9b9e5_float(const GLubyte src[4], void *dst)
+{
+   GLuint *d = (GLuint *) dst;
+   GLfloat rgb[3];
+   rgb[0] = _mesa_unorm_to_float(src[RCOMP], 8);
+   rgb[1] = _mesa_unorm_to_float(src[GCOMP], 8);
+   rgb[2] = _mesa_unorm_to_float(src[BCOMP], 8);
+   *d = float3_to_rgb9e5(rgb);
+}
+
+static inline void
+pack_ubyte_r11g11b10_float(const GLubyte src[4], void *dst)
+{
+   GLuint *d = (GLuint *) dst;
+   GLfloat rgb[3];
+   rgb[0] = _mesa_unorm_to_float(src[RCOMP], 8);
+   rgb[1] = _mesa_unorm_to_float(src[GCOMP], 8);
+   rgb[2] = _mesa_unorm_to_float(src[BCOMP], 8);
+   *d = float3_to_r11g11b10f(rgb);
+}
+
+/* uint packing functions */
+
+%for f in rgb_formats:
+   %if not f.is_int():
+      <% continue %>
+   %elif f.is_normalized():
+      <% continue %>
+   %elif f.is_compressed():
+      <% continue %>
+   %endif
+
+static inline void
+pack_uint_${f.short_name()}(const GLuint src[4], void *dst)
+{
+   %for (i, c) in enumerate(f.channels):
+      <% i = f.swizzle.inverse()[i] %>
+      %if c.type == 'x':
+         <% continue %>
+      %endif
+
+      ${c.datatype()} ${c.name} =
+      %if c.type == parser.SIGNED:
+         _mesa_signed_to_signed(src[${i}], ${c.size});
+      %elif c.type == parser.UNSIGNED:
+         _mesa_unsigned_to_unsigned(src[${i}], ${c.size});
+      %else:
+         assert(!"Invalid type: only integer types are allowed");
+      %endif
+   %endfor
+
+   %if f.layout == parser.ARRAY:
+      ${f.datatype()} *d = (${f.datatype()} *)dst;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d[${i}] = ${c.name};
+      %endfor
+   %elif f.layout == parser.PACKED:
+      ${f.datatype()} d = 0;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d |= PACK(${c.name}, ${c.shift}, ${c.size});
+      %endfor
+      (*(${f.datatype()} *)dst) = d;
+   %else:
+      <% assert False %>
+   %endif
+}
+%endfor
+
+/* float packing functions */
+
+%for f in rgb_formats:
+   %if f.name in ('MESA_FORMAT_R9G9B9E5_FLOAT', 'MESA_FORMAT_R11G11B10_FLOAT'):
+      <% continue %>
+   %elif f.is_int() and not f.is_normalized():
+      <% continue %>
+   %elif f.is_compressed():
+      <% continue %>
+   %endif
+
+static inline void
+pack_float_${f.short_name()}(const GLfloat src[4], void *dst)
+{
+   %for (i, c) in enumerate(f.channels):
+      <% i = f.swizzle.inverse()[i] %>
+      %if c.type == 'x':
+         <% continue %>
+      %endif
+
+      ${c.datatype()} ${c.name} =
+      %if c.type == parser.UNSIGNED:
+         %if f.colorspace == 'srgb' and c.name in 'rgb':
+            <% assert c.size == 8 %>
+            util_format_linear_float_to_srgb_8unorm(src[${i}]);
+         %else:
+            _mesa_float_to_unorm(src[${i}], ${c.size});
+         %endif
+      %elif c.type == parser.SIGNED:
+         _mesa_float_to_snorm(src[${i}], ${c.size});
+      %elif c.type == parser.FLOAT:
+         %if c.size == 32:
+            src[${i}];
+         %elif c.size == 16:
+            _mesa_float_to_half(src[${i}]);
+         %else:
+            <% assert False %>
+         %endif
+      %else:
+         <% assert False %>
+      %endif
+   %endfor
+
+   %if f.layout == parser.ARRAY:
+      ${f.datatype()} *d = (${f.datatype()} *)dst;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d[${i}] = ${c.name};
+      %endfor
+   %elif f.layout == parser.PACKED:
+      ${f.datatype()} d = 0;
+      %for (i, c) in enumerate(f.channels):
+         %if c.type == 'x':
+            <% continue %>
+         %endif
+         d |= PACK(${c.name}, ${c.shift}, ${c.size});
+      %endfor
+      (*(${f.datatype()} *)dst) = d;
+   %else:
+      <% assert False %>
+   %endif
+}
+%endfor
+
+static inline void
+pack_float_r9g9b9e5_float(const GLfloat src[4], void *dst)
+{
+   GLuint *d = (GLuint *) dst;
+   *d = float3_to_rgb9e5(src);
+}
+
+static inline void
+pack_float_r11g11b10_float(const GLfloat src[4], void *dst)
+{
+   GLuint *d = (GLuint *) dst;
+   *d = float3_to_r11g11b10f(src);
+}
+
+/**
+ * Return a function that can pack a GLubyte rgba[4] color.
+ */
+gl_pack_ubyte_rgba_func
+_mesa_get_pack_ubyte_rgba_function(mesa_format format)
+{
+   switch (format) {
+%for f in rgb_formats:
+   %if f.is_compressed():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      return pack_ubyte_${f.short_name()};
+%endfor
+   default:
+      return NULL;
+   }
+}
+
+/**
+ * Return a function that can pack a GLfloat rgba[4] color.
+ */
+gl_pack_float_rgba_func
+_mesa_get_pack_float_rgba_function(mesa_format format)
+{
+   switch (format) {
+%for f in rgb_formats:
+   %if f.is_compressed():
+      <% continue %>
+   %elif f.is_int() and not f.is_normalized():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      return pack_float_${f.short_name()};
+%endfor
+   default:
+      return NULL;
+   }
+}
+
+/**
+ * Pack a row of GLubyte rgba[4] values to the destination.
+ */
+void
+_mesa_pack_ubyte_rgba_row(mesa_format format, GLuint n,
+                          const GLubyte src[][4], void *dst)
+{
+   GLuint i;
+   GLubyte *d = dst;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if f.is_compressed():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         pack_ubyte_${f.short_name()}(src[i], d);
+         d += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   default:
+      assert(!"Invalid format");
+   }
+}
+
+/**
+ * Pack a row of GLuint rgba[4] values to the destination.
+ */
+void
+_mesa_pack_uint_rgba_row(mesa_format format, GLuint n,
+                          const GLuint src[][4], void *dst)
+{
+   GLuint i;
+   GLubyte *d = dst;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if not f.is_int():
+      <% continue %>
+   %elif f.is_normalized():
+      <% continue %>
+   %elif f.is_compressed():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         pack_uint_${f.short_name()}(src[i], d);
+         d += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   default:
+      assert(!"Invalid format");
+   }
+}
+
+/**
+ * Pack a row of GLfloat rgba[4] values to the destination.
+ */
+void
+_mesa_pack_float_rgba_row(mesa_format format, GLuint n,
+                          const GLfloat src[][4], void *dst)
+{
+   GLuint i;
+   GLubyte *d = dst;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if f.is_compressed():
+      <% continue %>
+   %elif f.is_int() and not f.is_normalized():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         pack_float_${f.short_name()}(src[i], d);
+         d += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   default:
+      assert(!"Invalid format");
+   }
+}
+
+/**
+ * Pack a 2D image of ubyte RGBA pixels in the given format.
+ * \param srcRowStride  source image row stride in bytes
+ * \param dstRowStride  destination image row stride in bytes
+ */
+void
+_mesa_pack_ubyte_rgba_rect(mesa_format format, GLuint width, GLuint height,
+                           const GLubyte *src, GLint srcRowStride,
+                           void *dst, GLint dstRowStride)
+{
+   GLubyte *dstUB = dst;
+   GLuint i;
+
+   if (srcRowStride == width * 4 * sizeof(GLubyte) &&
+       dstRowStride == _mesa_format_row_stride(format, width)) {
+      /* do whole image at once */
+      _mesa_pack_ubyte_rgba_row(format, width * height,
+                                (const GLubyte (*)[4]) src, dst);
+   }
+   else {
+      /* row by row */
+      for (i = 0; i < height; i++) {
+         _mesa_pack_ubyte_rgba_row(format, width,
+                                   (const GLubyte (*)[4]) src, dstUB);
+         src += srcRowStride;
+         dstUB += dstRowStride;
+      }
+   }
+}
+
+
+/** Helper struct for MESA_FORMAT_Z32_FLOAT_S8X24_UINT */
+struct z32f_x24s8
+{
+   float z;
+   uint32_t x24s8;
+};
+
+
+/**
+ ** Pack float Z pixels
+ **/
+
+static void
+pack_float_S8_UINT_Z24_UNORM(const GLfloat *src, void *dst)
+{
+   /* don't disturb the stencil values */
+   GLuint *d = ((GLuint *) dst);
+   const GLdouble scale = (GLdouble) 0xffffff;
+   GLuint s = *d & 0xff;
+   GLuint z = (GLuint) (*src * scale);
+   assert(z <= 0xffffff);
+   *d = (z << 8) | s;
+}
+
+static void
+pack_float_Z24_UNORM_S8_UINT(const GLfloat *src, void *dst)
+{
+   /* don't disturb the stencil values */
+   GLuint *d = ((GLuint *) dst);
+   const GLdouble scale = (GLdouble) 0xffffff;
+   GLuint s = *d & 0xff000000;
+   GLuint z = (GLuint) (*src * scale);
+   assert(z <= 0xffffff);
+   *d = s | z;
+}
+
+static void
+pack_float_Z_UNORM16(const GLfloat *src, void *dst)
+{
+   GLushort *d = ((GLushort *) dst);
+   const GLfloat scale = (GLfloat) 0xffff;
+   *d = (GLushort) (*src * scale);
+}
+
+static void
+pack_float_Z_UNORM32(const GLfloat *src, void *dst)
+{
+   GLuint *d = ((GLuint *) dst);
+   const GLdouble scale = (GLdouble) 0xffffffff;
+   *d = (GLuint) (*src * scale);
+}
+
+static void
+pack_float_Z_FLOAT32(const GLfloat *src, void *dst)
+{
+   GLfloat *d = (GLfloat *) dst;
+   *d = *src;
+}
+
+gl_pack_float_z_func
+_mesa_get_pack_float_z_func(mesa_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      return pack_float_S8_UINT_Z24_UNORM;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      return pack_float_Z24_UNORM_S8_UINT;
+   case MESA_FORMAT_Z_UNORM16:
+      return pack_float_Z_UNORM16;
+   case MESA_FORMAT_Z_UNORM32:
+      return pack_float_Z_UNORM32;
+   case MESA_FORMAT_Z_FLOAT32:
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return pack_float_Z_FLOAT32;
+   default:
+      _mesa_problem(NULL,
+                    "unexpected format in _mesa_get_pack_float_z_func()");
+      return NULL;
+   }
+}
+
+
+
+/**
+ ** Pack uint Z pixels.  The incoming src value is always in
+ ** the range [0, 2^32-1].
+ **/
+
+static void
+pack_uint_S8_UINT_Z24_UNORM(const GLuint *src, void *dst)
+{
+   /* don't disturb the stencil values */
+   GLuint *d = ((GLuint *) dst);
+   GLuint s = *d & 0xff;
+   GLuint z = *src & 0xffffff00;
+   *d = z | s;
+}
+
+static void
+pack_uint_Z24_UNORM_S8_UINT(const GLuint *src, void *dst)
+{
+   /* don't disturb the stencil values */
+   GLuint *d = ((GLuint *) dst);
+   GLuint s = *d & 0xff000000;
+   GLuint z = *src >> 8;
+   *d = s | z;
+}
+
+static void
+pack_uint_Z_UNORM16(const GLuint *src, void *dst)
+{
+   GLushort *d = ((GLushort *) dst);
+   *d = *src >> 16;
+}
+
+static void
+pack_uint_Z_UNORM32(const GLuint *src, void *dst)
+{
+   GLuint *d = ((GLuint *) dst);
+   *d = *src;
+}
+
+static void
+pack_uint_Z_FLOAT32(const GLuint *src, void *dst)
+{
+   GLuint *d = ((GLuint *) dst);
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
+   *d = (GLuint) (*src * scale);
+   assert(*d >= 0.0f);
+   assert(*d <= 1.0f);
+}
+
+static void
+pack_uint_Z_FLOAT32_X24S8(const GLuint *src, void *dst)
+{
+   GLfloat *d = ((GLfloat *) dst);
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
+   *d = (GLfloat) (*src * scale);
+   assert(*d >= 0.0f);
+   assert(*d <= 1.0f);
+}
+
+gl_pack_uint_z_func
+_mesa_get_pack_uint_z_func(mesa_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      return pack_uint_S8_UINT_Z24_UNORM;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      return pack_uint_Z24_UNORM_S8_UINT;
+   case MESA_FORMAT_Z_UNORM16:
+      return pack_uint_Z_UNORM16;
+   case MESA_FORMAT_Z_UNORM32:
+      return pack_uint_Z_UNORM32;
+   case MESA_FORMAT_Z_FLOAT32:
+      return pack_uint_Z_FLOAT32;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return pack_uint_Z_FLOAT32_X24S8;
+   default:
+      _mesa_problem(NULL, "unexpected format in _mesa_get_pack_uint_z_func()");
+      return NULL;
+   }
+}
+
+
+/**
+ ** Pack ubyte stencil pixels
+ **/
+
+static void
+pack_ubyte_stencil_Z24_S8(const GLubyte *src, void *dst)
+{
+   /* don't disturb the Z values */
+   GLuint *d = ((GLuint *) dst);
+   GLuint s = *src;
+   GLuint z = *d & 0xffffff00;
+   *d = z | s;
+}
+
+static void
+pack_ubyte_stencil_S8_Z24(const GLubyte *src, void *dst)
+{
+   /* don't disturb the Z values */
+   GLuint *d = ((GLuint *) dst);
+   GLuint s = *src << 24;
+   GLuint z = *d & 0xffffff;
+   *d = s | z;
+}
+
+static void
+pack_ubyte_stencil_S8(const GLubyte *src, void *dst)
+{
+   GLubyte *d = (GLubyte *) dst;
+   *d = *src;
+}
+
+static void
+pack_ubyte_stencil_Z32_FLOAT_X24S8(const GLubyte *src, void *dst)
+{
+   GLfloat *d = ((GLfloat *) dst);
+   d[1] = *src;
+}
+
+
+gl_pack_ubyte_stencil_func
+_mesa_get_pack_ubyte_stencil_func(mesa_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      return pack_ubyte_stencil_Z24_S8;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      return pack_ubyte_stencil_S8_Z24;
+   case MESA_FORMAT_S_UINT8:
+      return pack_ubyte_stencil_S8;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      return pack_ubyte_stencil_Z32_FLOAT_X24S8;
+   default:
+      _mesa_problem(NULL,
+                    "unexpected format in _mesa_pack_ubyte_stencil_func()");
+      return NULL;
+   }
+}
+
+
+
+void
+_mesa_pack_float_z_row(mesa_format format, GLuint n,
+                       const GLfloat *src, void *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      {
+         /* don't disturb the stencil values */
+         GLuint *d = ((GLuint *) dst);
+         const GLdouble scale = (GLdouble) 0xffffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = d[i] & 0xff;
+            GLuint z = (GLuint) (src[i] * scale);
+            assert(z <= 0xffffff);
+            d[i] = (z << 8) | s;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      {
+         /* don't disturb the stencil values */
+         GLuint *d = ((GLuint *) dst);
+         const GLdouble scale = (GLdouble) 0xffffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = d[i] & 0xff000000;
+            GLuint z = (GLuint) (src[i] * scale);
+            assert(z <= 0xffffff);
+            d[i] = s | z;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z_UNORM16:
+      {
+         GLushort *d = ((GLushort *) dst);
+         const GLfloat scale = (GLfloat) 0xffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i] = (GLushort) (src[i] * scale);
+         }
+      }
+      break;
+   case MESA_FORMAT_Z_UNORM32:
+      {
+         GLuint *d = ((GLuint *) dst);
+         const GLdouble scale = (GLdouble) 0xffffffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i] = (GLuint) (src[i] * scale);
+         }
+      }
+      break;
+   case MESA_FORMAT_Z_FLOAT32:
+      memcpy(dst, src, n * sizeof(GLfloat));
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      {
+         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i].z = src[i];
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "unexpected format in _mesa_pack_float_z_row()");
+   }
+}
+
+
+/**
+ * The incoming Z values are always in the range [0, 0xffffffff].
+ */
+void
+_mesa_pack_uint_z_row(mesa_format format, GLuint n,
+                      const GLuint *src, void *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      {
+         /* don't disturb the stencil values */
+         GLuint *d = ((GLuint *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = d[i] & 0xff;
+            GLuint z = src[i] & 0xffffff00;
+            d[i] = z | s;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      {
+         /* don't disturb the stencil values */
+         GLuint *d = ((GLuint *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = d[i] & 0xff000000;
+            GLuint z = src[i] >> 8;
+            d[i] = s | z;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z_UNORM16:
+      {
+         GLushort *d = ((GLushort *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i] = src[i] >> 16;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z_UNORM32:
+      memcpy(dst, src, n * sizeof(GLfloat));
+      break;
+   case MESA_FORMAT_Z_FLOAT32:
+      {
+         GLuint *d = ((GLuint *) dst);
+         const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i] = (GLuint) (src[i] * scale);
+            assert(d[i] >= 0.0f);
+            assert(d[i] <= 1.0f);
+         }
+      }
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      {
+         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+         const GLdouble scale = 1.0 / (GLdouble) 0xffffffff;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i].z = (GLfloat) (src[i] * scale);
+            assert(d[i].z >= 0.0f);
+            assert(d[i].z <= 1.0f);
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "unexpected format in _mesa_pack_uint_z_row()");
+   }
+}
+
+
+void
+_mesa_pack_ubyte_stencil_row(mesa_format format, GLuint n,
+                             const GLubyte *src, void *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      {
+         /* don't disturb the Z values */
+         GLuint *d = ((GLuint *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = src[i];
+            GLuint z = d[i] & 0xffffff00;
+            d[i] = z | s;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      {
+         /* don't disturb the Z values */
+         GLuint *d = ((GLuint *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = src[i] << 24;
+            GLuint z = d[i] & 0xffffff;
+            d[i] = s | z;
+         }
+      }
+      break;
+   case MESA_FORMAT_S_UINT8:
+      memcpy(dst, src, n * sizeof(GLubyte));
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      {
+         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            d[i].x24s8 = src[i];
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "unexpected format in _mesa_pack_ubyte_stencil_row()");
+   }
+}
+
+
+/**
+ * Incoming Z/stencil values are always in uint_24_8 format.
+ */
+void
+_mesa_pack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
+                                       const GLuint *src, void *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      memcpy(dst, src, n * sizeof(GLuint));
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      {
+         GLuint *d = ((GLuint *) dst);
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint s = src[i] << 24;
+            GLuint z = src[i] >> 8;
+            d[i] = s | z;
+         }
+      }
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      {
+         const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
+         struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLfloat z = (GLfloat) ((src[i] >> 8) * scale);
+            d[i].z = z;
+            d[i].x24s8 = src[i];
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "bad format %s in _mesa_pack_ubyte_s_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+
+
+/**
+ * Convert a boolean color mask to a packed color where each channel of
+ * the packed value at dst will be 0 or ~0 depending on the colorMask.
+ */
+void
+_mesa_pack_colormask(mesa_format format, const GLubyte colorMask[4], void *dst)
+{
+   GLfloat maskColor[4];
+
+   switch (_mesa_get_format_datatype(format)) {
+   case GL_UNSIGNED_NORMALIZED:
+      /* simple: 1.0 will convert to ~0 in the right bit positions */
+      maskColor[0] = colorMask[0] ? 1.0f : 0.0f;
+      maskColor[1] = colorMask[1] ? 1.0f : 0.0f;
+      maskColor[2] = colorMask[2] ? 1.0f : 0.0f;
+      maskColor[3] = colorMask[3] ? 1.0f : 0.0f;
+      _mesa_pack_float_rgba_row(format, 1,
+                                (const GLfloat (*)[4]) maskColor, dst);
+      break;
+   case GL_SIGNED_NORMALIZED:
+   case GL_FLOAT:
+      /* These formats are harder because it's hard to know the floating
+       * point values that will convert to ~0 for each color channel's bits.
+       * This solution just generates a non-zero value for each color channel
+       * then fixes up the non-zero values to be ~0.
+       * Note: we'll need to add special case code if we ever have to deal
+       * with formats with unequal color channel sizes, like R11_G11_B10.
+       * We issue a warning below for channel sizes other than 8,16,32.
+       */
+      {
+         GLuint bits = _mesa_get_format_max_bits(format); /* bits per chan */
+         GLuint bytes = _mesa_get_format_bytes(format);
+         GLuint i;
+
+         /* this should put non-zero values into the channels of dst */
+         maskColor[0] = colorMask[0] ? -1.0f : 0.0f;
+         maskColor[1] = colorMask[1] ? -1.0f : 0.0f;
+         maskColor[2] = colorMask[2] ? -1.0f : 0.0f;
+         maskColor[3] = colorMask[3] ? -1.0f : 0.0f;
+         _mesa_pack_float_rgba_row(format, 1,
+                                   (const GLfloat (*)[4]) maskColor, dst);
+
+         /* fix-up the dst channels by converting non-zero values to ~0 */
+         if (bits == 8) {
+            GLubyte *d = (GLubyte *) dst;
+            for (i = 0; i < bytes; i++) {
+               d[i] = d[i] ? 0xff : 0x0;
+            }
+         }
+         else if (bits == 16) {
+            GLushort *d = (GLushort *) dst;
+            for (i = 0; i < bytes / 2; i++) {
+               d[i] = d[i] ? 0xffff : 0x0;
+            }
+         }
+         else if (bits == 32) {
+            GLuint *d = (GLuint *) dst;
+            for (i = 0; i < bytes / 4; i++) {
+               d[i] = d[i] ? 0xffffffffU : 0x0;
+            }
+         }
+         else {
+            _mesa_problem(NULL, "unexpected size in _mesa_pack_colormask()");
+            return;
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "unexpected format data type in gen_color_mask()");
+      return;
+   }
+}
+"""
+
+template = Template(string);
+
+print template.render(argv = argv[0:])
diff --git a/mesalib/src/mesa/main/format_parser.py b/mesalib/src/mesa/main/format_parser.py
index 5e45c74de..11184f78e 100644
--- a/mesalib/src/mesa/main/format_parser.py
+++ b/mesalib/src/mesa/main/format_parser.py
@@ -24,6 +24,8 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
+import sys
+
 VOID = 'x'
 UNSIGNED = 'u'
 SIGNED = 's'
@@ -102,6 +104,10 @@ class Channel:
       """Returns true if the size of this channel is a power of two."""
       return is_power_of_two(self.size)
 
+   def datatype(self):
+      """Returns the datatype corresponding to a channel type and size"""
+      return _get_datatype(self.type, self.size)
+
 class Swizzle:
    """Describes a swizzle operation.
 
@@ -469,6 +475,49 @@ class Format:
             return channel
       return None
 
+   def datatype(self):
+      """Returns the datatype corresponding to a format's channel type and size"""
+      if self.layout == PACKED:
+         if self.block_size() == 8:
+            return 'uint8_t'
+         if self.block_size() == 16:
+            return 'uint16_t'
+         if self.block_size() == 32:
+            return 'uint32_t'
+         else:
+            assert False
+      else:
+         return _get_datatype(self.channel_type(), self.channel_size())
+
+def _get_datatype(type, size):
+   if type == FLOAT:
+      if size == 32:
+         return 'float'
+      elif size == 16:
+         return 'uint16_t'
+      else:
+         assert False
+   elif type == UNSIGNED:
+      if size <= 8:
+         return 'uint8_t'
+      elif size <= 16:
+         return 'uint16_t'
+      elif size <= 32:
+         return 'uint32_t'
+      else:
+         assert False
+   elif type == SIGNED:
+      if size <= 8:
+         return 'int8_t'
+      elif size <= 16:
+         return 'int16_t'
+      elif size <= 32:
+         return 'int32_t'
+      else:
+         assert False
+   else:
+      assert False
+
 def _parse_channels(fields, layout, colorspace, swizzle):
    channels = []
    for field in fields:
@@ -515,7 +564,10 @@ def parse(filename):
          block_height = int(fields[3])
          colorspace = fields[9]
 
-         swizzle = Swizzle(fields[8])
+         try:
+            swizzle = Swizzle(fields[8])
+         except:
+            sys.exit("error parsing swizzle for format " + name)
          channels = _parse_channels(fields[4:8], layout, colorspace, swizzle)
 
          yield Format(name, layout, block_width, block_height, channels, swizzle, colorspace)
diff --git a/mesalib/src/mesa/main/format_unpack.c b/mesalib/src/mesa/main/format_unpack.c
deleted file mode 100644
index d5628a9e7..000000000
--- a/mesalib/src/mesa/main/format_unpack.c
+++ /dev/null
@@ -1,4400 +0,0 @@
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (c) 2011 VMware, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-#include "colormac.h"
-#include "format_unpack.h"
-#include "macros.h"
-#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
-#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
-#include "util/format_srgb.h"
-
-
-/** Helper struct for MESA_FORMAT_Z32_FLOAT_S8X24_UINT */
-struct z32f_x24s8
-{
-   float z;
-   uint32_t x24s8;
-};
-
-
-/* Expand 1, 2, 3, 4, 5, 6-bit values to fill 8 bits */
-
-#define EXPAND_1_8(X)  ( (X) ? 0xff : 0x0 )
-
-#define EXPAND_2_8(X)  ( ((X) << 6) | ((X) << 4) | ((X) << 2) | (X) )
-
-#define EXPAND_3_8(X)  ( ((X) << 5) | ((X) << 2) | ((X) >> 1) )
-
-#define EXPAND_4_8(X)  ( ((X) << 4) | (X) )
-
-#define EXPAND_5_8(X)  ( ((X) << 3) | ((X) >> 2) )
-
-#define EXPAND_6_8(X)  ( ((X) << 2) | ((X) >> 4) )
-
-
-/**********************************************************************/
-/*  Unpack, returning GLfloat colors                                  */
-/**********************************************************************/
-
-typedef void (*unpack_rgba_func)(const void *src, GLfloat dst[][4], GLuint n);
-
-
-static void
-unpack_A8B8G8R8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-   }
-}
-
-static void
-unpack_R8G8B8A8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-   }
-}
-
-static void
-unpack_B8G8R8A8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-   }
-}
-
-static void
-unpack_A8R8G8B8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-   }
-}
-
-static void
-unpack_RGBX8888(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_RGBX8888_REV(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_B8G8R8X8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i]      ) & 0xff );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_X8R8G8B8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( (s[i] >>  8) & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( (s[i] >> 24)        );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_BGR_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( s[i*3+2] );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( s[i*3+1] );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( s[i*3+0] );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RGB_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( s[i*3+0] );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( s[i*3+1] );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( s[i*3+2] );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_B5G6R5_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 11) & 0x1f) * (1.0F / 31.0F);
-      dst[i][GCOMP] = ((s[i] >> 5 ) & 0x3f) * (1.0F / 63.0F);
-      dst[i][BCOMP] = ((s[i]      ) & 0x1f) * (1.0F / 31.0F);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R5G6B5_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* Warning: this function does not match the current Mesa definition
-    * of MESA_FORMAT_R5G6B5_UNORM.
-    */
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLuint t = (s[i] >> 8) | (s[i] << 8); /* byte swap */
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( ((t >> 8) & 0xf8) | ((t >> 13) & 0x7) );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( ((t >> 3) & 0xfc) | ((t >>  9) & 0x3) );
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( ((t << 3) & 0xf8) | ((t >>  2) & 0x7) );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_B4G4R4A4_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >>  8) & 0xf) * (1.0F / 15.0F);
-      dst[i][GCOMP] = ((s[i] >>  4) & 0xf) * (1.0F / 15.0F);
-      dst[i][BCOMP] = ((s[i]      ) & 0xf) * (1.0F / 15.0F);
-      dst[i][ACOMP] = ((s[i] >> 12) & 0xf) * (1.0F / 15.0F);
-   }
-}
-
-static void
-unpack_A4R4G4B4_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >>  4) & 0xf) * (1.0F / 15.0F);
-      dst[i][GCOMP] = ((s[i] >>  8) & 0xf) * (1.0F / 15.0F);
-      dst[i][BCOMP] = ((s[i] >> 12) & 0xf) * (1.0F / 15.0F);
-      dst[i][ACOMP] = ((s[i]      ) & 0xf) * (1.0F / 15.0F);
-   }
-}
-
-static void
-unpack_A1B5G5R5_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 11) & 0x1f) * (1.0F / 31.0F);
-      dst[i][GCOMP] = ((s[i] >>  6) & 0x1f) * (1.0F / 31.0F);
-      dst[i][BCOMP] = ((s[i] >>  1) & 0x1f) * (1.0F / 31.0F);
-      dst[i][ACOMP] = ((s[i]      ) & 0x01) * 1.0F;
-   }
-}
-
-static void
-unpack_B5G5R5A1_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 10) & 0x1f) * (1.0F / 31.0F);
-      dst[i][GCOMP] = ((s[i] >>  5) & 0x1f) * (1.0F / 31.0F);
-      dst[i][BCOMP] = ((s[i] >>  0) & 0x1f) * (1.0F / 31.0F);
-      dst[i][ACOMP] = ((s[i] >> 15) & 0x01) * 1.0F;
-   }
-}
-
-static void
-unpack_A1R5G5B5_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* Warning: this function does not match the current Mesa definition
-    * of MESA_FORMAT_A1R5G5B5_UNORM.
-    */
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLushort tmp = (s[i] << 8) | (s[i] >> 8); /* byteswap */
-      dst[i][RCOMP] = ((tmp >> 10) & 0x1f) * (1.0F / 31.0F);
-      dst[i][GCOMP] = ((tmp >>  5) & 0x1f) * (1.0F / 31.0F);
-      dst[i][BCOMP] = ((tmp >>  0) & 0x1f) * (1.0F / 31.0F);
-      dst[i][ACOMP] = ((tmp >> 15) & 0x01) * 1.0F;
-   }
-}
-
-static void
-unpack_L4A4_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (s[i] & 0xf) * (1.0F / 15.0F);
-      dst[i][ACOMP] = ((s[i] >> 4) & 0xf) * (1.0F / 15.0F);
-   }
-}
-
-static void
-unpack_L8A8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 
-      dst[i][GCOMP] = 
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( s[i] & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] >> 8 );
-   }
-}
-
-static void
-unpack_A8L8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 
-      dst[i][GCOMP] = 
-      dst[i][BCOMP] = UBYTE_TO_FLOAT( s[i] >> 8 );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] & 0xff );
-   }
-}
-
-static void
-unpack_L16A16_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = USHORT_TO_FLOAT( s[i] & 0xffff );
-      dst[i][ACOMP] = USHORT_TO_FLOAT( s[i] >> 16 );
-   }
-}
-
-static void
-unpack_A16L16_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = USHORT_TO_FLOAT( s[i] >> 16 );
-      dst[i][ACOMP] = USHORT_TO_FLOAT( s[i] & 0xffff );
-   }
-}
-
-static void
-unpack_B2G3R3_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 5) & 0x7) * (1.0F / 7.0F);
-      dst[i][GCOMP] = ((s[i] >> 2) & 0x7) * (1.0F / 7.0F);
-      dst[i][BCOMP] = ((s[i]     ) & 0x3) * (1.0F / 3.0F);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-
-static void
-unpack_A_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = UBYTE_TO_FLOAT(s[i]);
-   }
-}
-
-static void
-unpack_A_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = USHORT_TO_FLOAT(s[i]);
-   }
-}
-
-static void
-unpack_L_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = UBYTE_TO_FLOAT(s[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_L_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = USHORT_TO_FLOAT(s[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_I_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = UBYTE_TO_FLOAT(s[i]);
-   }
-}
-
-static void
-unpack_I_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = USHORT_TO_FLOAT(s[i]);
-   }
-}
-
-static void
-unpack_YCBCR(const void *src, GLfloat dst[][4], GLuint n)
-{
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      const GLushort *src0 = ((const GLushort *) src) + i * 2; /* even */
-      const GLushort *src1 = src0 + 1;         /* odd */
-      const GLubyte y0 = (*src0 >> 8) & 0xff;  /* luminance */
-      const GLubyte cb = *src0 & 0xff;         /* chroma U */
-      const GLubyte y1 = (*src1 >> 8) & 0xff;  /* luminance */
-      const GLubyte cr = *src1 & 0xff;         /* chroma V */
-      const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
-      GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
-      GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
-      GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
-      r *= (1.0F / 255.0F);
-      g *= (1.0F / 255.0F);
-      b *= (1.0F / 255.0F);
-      dst[i][RCOMP] = CLAMP(r, 0.0F, 1.0F);
-      dst[i][GCOMP] = CLAMP(g, 0.0F, 1.0F);
-      dst[i][BCOMP] = CLAMP(b, 0.0F, 1.0F);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_YCBCR_REV(const void *src, GLfloat dst[][4], GLuint n)
-{
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      const GLushort *src0 = ((const GLushort *) src) + i * 2; /* even */
-      const GLushort *src1 = src0 + 1;         /* odd */
-      const GLubyte y0 = *src0 & 0xff;         /* luminance */
-      const GLubyte cr = (*src0 >> 8) & 0xff;  /* chroma V */
-      const GLubyte y1 = *src1 & 0xff;         /* luminance */
-      const GLubyte cb = (*src1 >> 8) & 0xff;  /* chroma U */
-      const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
-      GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
-      GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
-      GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
-      r *= (1.0F / 255.0F);
-      g *= (1.0F / 255.0F);
-      b *= (1.0F / 255.0F);
-      dst[i][RCOMP] = CLAMP(r, 0.0F, 1.0F);
-      dst[i][GCOMP] = CLAMP(g, 0.0F, 1.0F);
-      dst[i][BCOMP] = CLAMP(b, 0.0F, 1.0F);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R_UNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] = UBYTE_TO_FLOAT(s[i]);
-      dst[i][1] =
-      dst[i][2] = 0.0F;
-      dst[i][3] = 1.0F;
-   }
-}
-
-static void
-unpack_R8G8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( s[i] & 0xff );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( s[i] >> 8 );
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_G8R8_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = UBYTE_TO_FLOAT( s[i] >> 8 );
-      dst[i][GCOMP] = UBYTE_TO_FLOAT( s[i] & 0xff );
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_R_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = USHORT_TO_FLOAT(s[i]);
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_R16G16_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = USHORT_TO_FLOAT( s[i] & 0xffff );
-      dst[i][GCOMP] = USHORT_TO_FLOAT( s[i] >> 16 );
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_G16R16_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = USHORT_TO_FLOAT( s[i] >> 16 );
-      dst[i][GCOMP] = USHORT_TO_FLOAT( s[i] & 0xffff );
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_B10G10R10A2_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 20) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][GCOMP] = ((s[i] >> 10) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][BCOMP] = ((s[i] >>  0) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][ACOMP] = ((s[i] >> 30) &  0x03) * (1.0F / 3.0F);
-   }
-}
-
-
-static void
-unpack_B10G10R10A2_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat)((s[i] >> 20) & 0x3ff);
-      dst[i][GCOMP] = (GLfloat)((s[i] >> 10) & 0x3ff);
-      dst[i][BCOMP] = (GLfloat)((s[i] >>  0) & 0x3ff);
-      dst[i][ACOMP] = (GLfloat)((s[i] >> 30) &  0x03);
-   }
-}
-
-
-static void
-unpack_R10G10B10A2_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat)((s[i] >>  0) & 0x3ff);
-      dst[i][GCOMP] = (GLfloat)((s[i] >> 10) & 0x3ff);
-      dst[i][BCOMP] = (GLfloat)((s[i] >> 20) & 0x3ff);
-      dst[i][ACOMP] = (GLfloat)((s[i] >> 30) &  0x03);
-   }
-}
-
-
-static void
-unpack_S8_UINT_Z24_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = (GLfloat) ((s[i] >> 8) * scale);
-      dst[i][3] = 1.0F;
-      ASSERT(dst[i][0] >= 0.0F);
-      ASSERT(dst[i][0] <= 1.0F);
-   }
-}
-
-static void
-unpack_Z24_UNORM_S8_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = (float) ((s[i] & 0x00ffffff) * scale);
-      dst[i][3] = 1.0F;
-      ASSERT(dst[i][0] >= 0.0F);
-      ASSERT(dst[i][0] <= 1.0F);
-   }
-}
-
-static void
-unpack_Z_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = s[i] * (1.0F / 65535.0F);
-      dst[i][3] = 1.0F;
-   }
-}
-
-static void
-unpack_Z24_UNORM_X8_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   unpack_Z24_UNORM_S8_UINT(src, dst, n);
-}
-
-static void
-unpack_X8_UINT_Z24_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   unpack_S8_UINT_Z24_UNORM(src, dst, n);
-}
-
-static void
-unpack_Z_UNORM32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = s[i] * (1.0F / 0xffffffff);
-      dst[i][3] = 1.0F;
-   }
-}
-
-static void
-unpack_Z32_FLOAT_S8X24_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = s[i].z;
-      dst[i][3] = 1.0F;
-   }
-}
-
-static void
-unpack_Z_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = ((const GLfloat *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = s[i];
-      dst[i][3] = 1.0F;
-   }
-}
-
-
-static void
-unpack_S8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* should never be used */
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] =
-      dst[i][1] =
-      dst[i][2] = 0.0F;
-      dst[i][3] = 1.0F;
-   }
-}
-
-
-static void
-unpack_BGR_SRGB8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float(s[i*3+2]);
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float(s[i*3+1]);
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float(s[i*3+0]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_A8B8G8R8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 24) );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] & 0xff ); /* linear! */
-   }
-}
-
-static void
-unpack_B8G8R8A8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i]      ) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] >> 24 ); /* linear! */
-   }
-}
-
-static void
-unpack_A8R8G8B8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 24) );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] & 0xff ); /* linear! */
-   }
-}
-
-static void
-unpack_R8G8B8A8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i]      ) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][ACOMP] = UBYTE_TO_FLOAT( s[i] >> 24 ); /* linear! */
-   }
-}
-
-static void
-unpack_L_SRGB8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 
-      dst[i][GCOMP] = 
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float(s[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_L8A8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float(s[i] & 0xff);
-      dst[i][ACOMP] = UBYTE_TO_FLOAT(s[i] >> 8); /* linear! */
-   }
-}
-
-static void
-unpack_A8L8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float(s[i] >> 8);
-      dst[i][ACOMP] = UBYTE_TO_FLOAT(s[i] & 0xff); /* linear! */
-   }
-}
-
-static void
-unpack_SRGB_DXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_SRGBA_DXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_SRGBA_DXT3(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_SRGBA_DXT5(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGB_FXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGBA_FXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGB_DXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGBA_DXT1(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGBA_DXT3(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-static void
-unpack_RGBA_DXT5(const void *src, GLfloat dst[][4], GLuint n)
-{
-}
-
-
-static void
-unpack_RGBA_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*4+0];
-      dst[i][GCOMP] = s[i*4+1];
-      dst[i][BCOMP] = s[i*4+2];
-      dst[i][ACOMP] = s[i*4+3];
-   }
-}
-
-static void
-unpack_RGBA_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = _mesa_half_to_float(s[i*4+0]);
-      dst[i][GCOMP] = _mesa_half_to_float(s[i*4+1]);
-      dst[i][BCOMP] = _mesa_half_to_float(s[i*4+2]);
-      dst[i][ACOMP] = _mesa_half_to_float(s[i*4+3]);
-   }
-}
-
-static void
-unpack_RGB_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*3+0];
-      dst[i][GCOMP] = s[i*3+1];
-      dst[i][BCOMP] = s[i*3+2];
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RGB_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = _mesa_half_to_float(s[i*3+0]);
-      dst[i][GCOMP] = _mesa_half_to_float(s[i*3+1]);
-      dst[i][BCOMP] = _mesa_half_to_float(s[i*3+2]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_A_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = s[i];
-   }
-}
-
-static void
-unpack_A_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = _mesa_half_to_float(s[i]);
-   }
-}
-
-static void
-unpack_L_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = s[i];
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_L_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = _mesa_half_to_float(s[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_LA_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = s[i*2+0];
-      dst[i][ACOMP] = s[i*2+1];
-   }
-}
-
-static void
-unpack_LA_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = _mesa_half_to_float(s[i*2+0]);
-      dst[i][ACOMP] = _mesa_half_to_float(s[i*2+1]);
-   }
-}
-
-static void
-unpack_I_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = s[i];
-   }
-}
-
-static void
-unpack_I_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = _mesa_half_to_float(s[i]);
-   }
-}
-
-static void
-unpack_R_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i];
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = _mesa_half_to_float(s[i]);
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RG_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*2+0];
-      dst[i][GCOMP] = s[i*2+1];
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RG_FLOAT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLhalfARB *s = (const GLhalfARB *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = _mesa_half_to_float(s[i*2+0]);
-      dst[i][GCOMP] = _mesa_half_to_float(s[i*2+1]);
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_ALPHA_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_ALPHA_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_ALPHA_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_ALPHA_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_ALPHA_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_ALPHA_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_INTENSITY_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = (GLfloat) s[i];
-   }
-}
-
-static void
-unpack_LUMINANCE_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = dst[i][GCOMP] = dst[i][BCOMP] = (GLfloat) s[i];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_LUMINANCE_ALPHA_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = (GLfloat) s[2*i+0];
-      dst[i][ACOMP] = (GLfloat) s[2*i+1];
-   }
-}
-
-static void
-unpack_R_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBA_INT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBA_INT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-
-static void
-unpack_RGBA_INT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBA_UINT8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBA_UINT16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i];
-      dst[i][GCOMP] = 0.0;
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RG_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*2+0];
-      dst[i][GCOMP] = (GLfloat) s[i*2+1];
-      dst[i][BCOMP] = 0.0;
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGB_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*3+0];
-      dst[i][GCOMP] = (GLfloat) s[i*3+1];
-      dst[i][BCOMP] = (GLfloat) s[i*3+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBA_UINT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = (GLfloat) s[i*4+3];
-   }
-}
-
-static void
-unpack_R_SNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = ((const GLbyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( s[i] );
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R8G8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] & 0xff) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 8) );
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_X8B8G8R8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 24) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 16) );
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >>  8) );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_A8B8G8R8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 24) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 16) );
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >>  8) );
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i]      ) );
-   }
-}
-
-static void
-unpack_R8G8B8A8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i]      ) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >>  8) );
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 16) );
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 24) );
-   }
-}
-
-static void
-unpack_R_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( s[i] );
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R16G16_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( (GLshort) (s[i] & 0xffff) );
-      dst[i][GCOMP] = SHORT_TO_FLOAT_TEX( (GLshort) (s[i] >> 16) );
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RGB_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( s[i*3+0] );
-      dst[i][GCOMP] = SHORT_TO_FLOAT_TEX( s[i*3+1] );
-      dst[i][BCOMP] = SHORT_TO_FLOAT_TEX( s[i*3+2] );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_RGBA_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+0] );
-      dst[i][GCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+1] );
-      dst[i][BCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+2] );
-      dst[i][ACOMP] = SHORT_TO_FLOAT_TEX( s[i*4+3] );
-   }
-}
-
-static void
-unpack_RGBA_16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = USHORT_TO_FLOAT( s[i*4+0] );
-      dst[i][GCOMP] = USHORT_TO_FLOAT( s[i*4+1] );
-      dst[i][BCOMP] = USHORT_TO_FLOAT( s[i*4+2] );
-      dst[i][ACOMP] = USHORT_TO_FLOAT( s[i*4+3] );
-   }
-}
-
-static void
-unpack_RED_RGTC1(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_SIGNED_RED_RGTC1(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_RG_RGTC2(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_SIGNED_RG_RGTC2(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_L_LATC1(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_SIGNED_L_LATC1(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_LA_LATC2(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_SIGNED_LA_LATC2(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC1_RGB8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_RGB8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_SRGB8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_RGBA8_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_SRGB8_ALPHA8_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_R11_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_RG11_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_SIGNED_R11_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_SIGNED_RG11_EAC(const void *src, GLfloat dst[][4], GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_RGB8_PUNCHTHROUGH_ALPHA1(const void *src, GLfloat dst[][4],
-                                      GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1(const void *src, GLfloat dst[][4],
-                                      GLuint n)
-{
-   /* XXX to do */
-}
-
-static void
-unpack_A_SNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = ((const GLbyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 0.0F;
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( s[i] );
-   }
-}
-
-static void
-unpack_L_SNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = ((const GLbyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( s[i] );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_L8A8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] & 0xff) );
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 8) );
-   }
-}
-
-
-static void
-unpack_A8L8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 8) );
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] & 0xff) );
-   }
-}
-
-static void
-unpack_I_SNORM8(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = ((const GLbyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = BYTE_TO_FLOAT_TEX( s[i] );
-   }
-}
-
-static void
-unpack_A_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 0.0F;
-      dst[i][GCOMP] = 0.0F;
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = SHORT_TO_FLOAT_TEX( s[i] );
-   }
-}
-
-static void
-unpack_L_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = SHORT_TO_FLOAT_TEX( s[i] );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_LA_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = SHORT_TO_FLOAT_TEX( s[i*2+0] );
-      dst[i][ACOMP] = SHORT_TO_FLOAT_TEX( s[i*2+1] );
-   }
-}
-
-static void
-unpack_I_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = ((const GLshort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = SHORT_TO_FLOAT_TEX( s[i] );
-   }
-}
-
-static void
-unpack_R9G9B9E5_FLOAT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      rgb9e5_to_float3(s[i], dst[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_R11G11B10_FLOAT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      r11g11b10f_to_float3(s[i], dst[i]);
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_XRGB4444_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >>  8) & 0xf) * (1.0F / 15.0F);
-      dst[i][GCOMP] = ((s[i] >>  4) & 0xf) * (1.0F / 15.0F);
-      dst[i][BCOMP] = ((s[i]      ) & 0xf) * (1.0F / 15.0F);
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XRGB1555_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 10) & 0x1f) * (1.0F / 31.0F);
-      dst[i][GCOMP] = ((s[i] >>  5) & 0x1f) * (1.0F / 31.0F);
-      dst[i][BCOMP] = ((s[i] >>  0) & 0x1f) * (1.0F / 31.0F);
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_R8G8B8X8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i]      ) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >>  8) );
-      dst[i][BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 16) );
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_R8G8B8X8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i]      ) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_X8B8G8R8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 24) );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][ACOMP] = 1.0f;
-   }
-}
-
-static void
-unpack_XBGR8888_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*4+0];
-      dst[i][GCOMP] = s[i*4+1];
-      dst[i][BCOMP] = s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR8888_SINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLbyte *s = (const GLbyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*4+0];
-      dst[i][GCOMP] = s[i*4+1];
-      dst[i][BCOMP] = s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_B10G10R10X2_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 20) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][GCOMP] = ((s[i] >> 10) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][BCOMP] = ((s[i] >>  0) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBX_UNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = USHORT_TO_FLOAT( s[i*4+0] );
-      dst[i][GCOMP] = USHORT_TO_FLOAT( s[i*4+1] );
-      dst[i][BCOMP] = USHORT_TO_FLOAT( s[i*4+2] );
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBX_SNORM16(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+0] );
-      dst[i][GCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+1] );
-      dst[i][BCOMP] = SHORT_TO_FLOAT_TEX( s[i*4+2] );
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR16161616_FLOAT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = _mesa_half_to_float(s[i*4+0]);
-      dst[i][GCOMP] = _mesa_half_to_float(s[i*4+1]);
-      dst[i][BCOMP] = _mesa_half_to_float(s[i*4+2]);
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR16161616_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = (const GLushort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR16161616_SINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLshort *s = (const GLshort *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_RGBX_FLOAT32(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLfloat *s = (const GLfloat *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*4+0];
-      dst[i][GCOMP] = s[i*4+1];
-      dst[i][BCOMP] = s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR32323232_UINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = (const GLuint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_XBGR32323232_SINT(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLint *s = (const GLint *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLfloat) s[i*4+0];
-      dst[i][GCOMP] = (GLfloat) s[i*4+1];
-      dst[i][BCOMP] = (GLfloat) s[i*4+2];
-      dst[i][ACOMP] = 1.0;
-   }
-}
-
-static void
-unpack_R10G10B10A2_UNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = ((s[i] >> 0) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][GCOMP] = ((s[i] >> 10) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][BCOMP] = ((s[i] >> 20) & 0x3ff) * (1.0F / 1023.0F);
-      dst[i][ACOMP] = ((s[i] >> 30) &  0x03) * (1.0F / 3.0F);
-   }
-}
-
-static void
-unpack_G8R8_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] >> 8) );
-      dst[i][GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s[i] & 0xff) );
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_G16R16_SNORM(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = SHORT_TO_FLOAT_TEX( (GLshort) (s[i] >> 16) );
-      dst[i][GCOMP] = SHORT_TO_FLOAT_TEX( (GLshort) (s[i] & 0xffff) );
-      dst[i][BCOMP] = 0.0F;
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_B8G8R8X8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i]      ) & 0xff );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-static void
-unpack_X8R8G8B8_SRGB(const void *src, GLfloat dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >>  8) & 0xff );
-      dst[i][GCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 16) & 0xff );
-      dst[i][BCOMP] = util_format_srgb_8unorm_to_linear_float( (s[i] >> 24) );
-      dst[i][ACOMP] = 1.0F;
-   }
-}
-
-/**
- * Return the unpacker function for the given format.
- */
-static unpack_rgba_func
-get_unpack_rgba_function(mesa_format format)
-{
-   static unpack_rgba_func table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      table[MESA_FORMAT_NONE] = NULL;
-
-      table[MESA_FORMAT_A8B8G8R8_UNORM] = unpack_A8B8G8R8_UNORM;
-      table[MESA_FORMAT_R8G8B8A8_UNORM] = unpack_R8G8B8A8_UNORM;
-      table[MESA_FORMAT_B8G8R8A8_UNORM] = unpack_B8G8R8A8_UNORM;
-      table[MESA_FORMAT_A8R8G8B8_UNORM] = unpack_A8R8G8B8_UNORM;
-      table[MESA_FORMAT_X8B8G8R8_UNORM] = unpack_RGBX8888;
-      table[MESA_FORMAT_R8G8B8X8_UNORM] = unpack_RGBX8888_REV;
-      table[MESA_FORMAT_B8G8R8X8_UNORM] = unpack_B8G8R8X8_UNORM;
-      table[MESA_FORMAT_X8R8G8B8_UNORM] = unpack_X8R8G8B8_UNORM;
-      table[MESA_FORMAT_BGR_UNORM8] = unpack_BGR_UNORM8;
-      table[MESA_FORMAT_RGB_UNORM8] = unpack_RGB_UNORM8;
-      table[MESA_FORMAT_B5G6R5_UNORM] = unpack_B5G6R5_UNORM;
-      table[MESA_FORMAT_R5G6B5_UNORM] = unpack_R5G6B5_UNORM;
-      table[MESA_FORMAT_B4G4R4A4_UNORM] = unpack_B4G4R4A4_UNORM;
-      table[MESA_FORMAT_A4R4G4B4_UNORM] = unpack_A4R4G4B4_UNORM;
-      table[MESA_FORMAT_A1B5G5R5_UNORM] = unpack_A1B5G5R5_UNORM;
-      table[MESA_FORMAT_B5G5R5A1_UNORM] = unpack_B5G5R5A1_UNORM;
-      table[MESA_FORMAT_A1R5G5B5_UNORM] = unpack_A1R5G5B5_UNORM;
-      table[MESA_FORMAT_L4A4_UNORM] = unpack_L4A4_UNORM;
-      table[MESA_FORMAT_L8A8_UNORM] = unpack_L8A8_UNORM;
-      table[MESA_FORMAT_A8L8_UNORM] = unpack_A8L8_UNORM;
-      table[MESA_FORMAT_L16A16_UNORM] = unpack_L16A16_UNORM;
-      table[MESA_FORMAT_A16L16_UNORM] = unpack_A16L16_UNORM;
-      table[MESA_FORMAT_B2G3R3_UNORM] = unpack_B2G3R3_UNORM;
-      table[MESA_FORMAT_A_UNORM8] = unpack_A_UNORM8;
-      table[MESA_FORMAT_A_UNORM16] = unpack_A_UNORM16;
-      table[MESA_FORMAT_L_UNORM8] = unpack_L_UNORM8;
-      table[MESA_FORMAT_L_UNORM16] = unpack_L_UNORM16;
-      table[MESA_FORMAT_I_UNORM8] = unpack_I_UNORM8;
-      table[MESA_FORMAT_I_UNORM16] = unpack_I_UNORM16;
-      table[MESA_FORMAT_YCBCR] = unpack_YCBCR;
-      table[MESA_FORMAT_YCBCR_REV] = unpack_YCBCR_REV;
-      table[MESA_FORMAT_R_UNORM8] = unpack_R_UNORM8;
-      table[MESA_FORMAT_R8G8_UNORM] = unpack_R8G8_UNORM;
-      table[MESA_FORMAT_G8R8_UNORM] = unpack_G8R8_UNORM;
-      table[MESA_FORMAT_R_UNORM16] = unpack_R_UNORM16;
-      table[MESA_FORMAT_R16G16_UNORM] = unpack_R16G16_UNORM;
-      table[MESA_FORMAT_G16R16_UNORM] = unpack_G16R16_UNORM;
-      table[MESA_FORMAT_B10G10R10A2_UNORM] = unpack_B10G10R10A2_UNORM;
-      table[MESA_FORMAT_B10G10R10A2_UINT] = unpack_B10G10R10A2_UINT;
-      table[MESA_FORMAT_R10G10B10A2_UINT] = unpack_R10G10B10A2_UINT;
-      table[MESA_FORMAT_S8_UINT_Z24_UNORM] = unpack_S8_UINT_Z24_UNORM;
-      table[MESA_FORMAT_Z24_UNORM_S8_UINT] = unpack_Z24_UNORM_S8_UINT;
-      table[MESA_FORMAT_Z_UNORM16] = unpack_Z_UNORM16;
-      table[MESA_FORMAT_Z24_UNORM_X8_UINT] = unpack_Z24_UNORM_X8_UINT;
-      table[MESA_FORMAT_X8_UINT_Z24_UNORM] = unpack_X8_UINT_Z24_UNORM;
-      table[MESA_FORMAT_Z_UNORM32] = unpack_Z_UNORM32;
-      table[MESA_FORMAT_S_UINT8] = unpack_S8;
-      table[MESA_FORMAT_BGR_SRGB8] = unpack_BGR_SRGB8;
-      table[MESA_FORMAT_A8B8G8R8_SRGB] = unpack_A8B8G8R8_SRGB;
-      table[MESA_FORMAT_B8G8R8A8_SRGB] = unpack_B8G8R8A8_SRGB;
-      table[MESA_FORMAT_A8R8G8B8_SRGB] = unpack_A8R8G8B8_SRGB;
-      table[MESA_FORMAT_R8G8B8A8_SRGB] = unpack_R8G8B8A8_SRGB;
-      table[MESA_FORMAT_L_SRGB8] = unpack_L_SRGB8;
-      table[MESA_FORMAT_L8A8_SRGB] = unpack_L8A8_SRGB;
-      table[MESA_FORMAT_A8L8_SRGB] = unpack_A8L8_SRGB;
-      table[MESA_FORMAT_SRGB_DXT1] = unpack_SRGB_DXT1;
-      table[MESA_FORMAT_SRGBA_DXT1] = unpack_SRGBA_DXT1;
-      table[MESA_FORMAT_SRGBA_DXT3] = unpack_SRGBA_DXT3;
-      table[MESA_FORMAT_SRGBA_DXT5] = unpack_SRGBA_DXT5;
-
-      table[MESA_FORMAT_RGB_FXT1] = unpack_RGB_FXT1;
-      table[MESA_FORMAT_RGBA_FXT1] = unpack_RGBA_FXT1;
-      table[MESA_FORMAT_RGB_DXT1] = unpack_RGB_DXT1;
-      table[MESA_FORMAT_RGBA_DXT1] = unpack_RGBA_DXT1;
-      table[MESA_FORMAT_RGBA_DXT3] = unpack_RGBA_DXT3;
-      table[MESA_FORMAT_RGBA_DXT5] = unpack_RGBA_DXT5;
-
-      table[MESA_FORMAT_RGBA_FLOAT32] = unpack_RGBA_FLOAT32;
-      table[MESA_FORMAT_RGBA_FLOAT16] = unpack_RGBA_FLOAT16;
-      table[MESA_FORMAT_RGB_FLOAT32] = unpack_RGB_FLOAT32;
-      table[MESA_FORMAT_RGB_FLOAT16] = unpack_RGB_FLOAT16;
-      table[MESA_FORMAT_A_FLOAT32] = unpack_A_FLOAT32;
-      table[MESA_FORMAT_A_FLOAT16] = unpack_A_FLOAT16;
-      table[MESA_FORMAT_L_FLOAT32] = unpack_L_FLOAT32;
-      table[MESA_FORMAT_L_FLOAT16] = unpack_L_FLOAT16;
-      table[MESA_FORMAT_LA_FLOAT32] = unpack_LA_FLOAT32;
-      table[MESA_FORMAT_LA_FLOAT16] = unpack_LA_FLOAT16;
-      table[MESA_FORMAT_I_FLOAT32] = unpack_I_FLOAT32;
-      table[MESA_FORMAT_I_FLOAT16] = unpack_I_FLOAT16;
-      table[MESA_FORMAT_R_FLOAT32] = unpack_R_FLOAT32;
-      table[MESA_FORMAT_R_FLOAT16] = unpack_R_FLOAT16;
-      table[MESA_FORMAT_RG_FLOAT32] = unpack_RG_FLOAT32;
-      table[MESA_FORMAT_RG_FLOAT16] = unpack_RG_FLOAT16;
-
-      table[MESA_FORMAT_A_UINT8] = unpack_ALPHA_UINT8;
-      table[MESA_FORMAT_A_UINT16] = unpack_ALPHA_UINT16;
-      table[MESA_FORMAT_A_UINT32] = unpack_ALPHA_UINT32;
-      table[MESA_FORMAT_A_SINT8] = unpack_ALPHA_INT8;
-      table[MESA_FORMAT_A_SINT16] = unpack_ALPHA_INT16;
-      table[MESA_FORMAT_A_SINT32] = unpack_ALPHA_INT32;
-
-      table[MESA_FORMAT_I_UINT8] = unpack_INTENSITY_UINT8;
-      table[MESA_FORMAT_I_UINT16] = unpack_INTENSITY_UINT16;
-      table[MESA_FORMAT_I_UINT32] = unpack_INTENSITY_UINT32;
-      table[MESA_FORMAT_I_SINT8] = unpack_INTENSITY_INT8;
-      table[MESA_FORMAT_I_SINT16] = unpack_INTENSITY_INT16;
-      table[MESA_FORMAT_I_SINT32] = unpack_INTENSITY_INT32;
-
-      table[MESA_FORMAT_L_UINT8] = unpack_LUMINANCE_UINT8;
-      table[MESA_FORMAT_L_UINT16] = unpack_LUMINANCE_UINT16;
-      table[MESA_FORMAT_L_UINT32] = unpack_LUMINANCE_UINT32;
-      table[MESA_FORMAT_L_SINT8] = unpack_LUMINANCE_INT8;
-      table[MESA_FORMAT_L_SINT16] = unpack_LUMINANCE_INT16;
-      table[MESA_FORMAT_L_SINT32] = unpack_LUMINANCE_INT32;
-
-      table[MESA_FORMAT_LA_UINT8] = unpack_LUMINANCE_ALPHA_UINT8;
-      table[MESA_FORMAT_LA_UINT16] = unpack_LUMINANCE_ALPHA_UINT16;
-      table[MESA_FORMAT_LA_UINT32] = unpack_LUMINANCE_ALPHA_UINT32;
-      table[MESA_FORMAT_LA_SINT8] = unpack_LUMINANCE_ALPHA_INT8;
-      table[MESA_FORMAT_LA_SINT16] = unpack_LUMINANCE_ALPHA_INT16;
-      table[MESA_FORMAT_LA_SINT32] = unpack_LUMINANCE_ALPHA_INT32;
-
-      table[MESA_FORMAT_R_SINT8] = unpack_R_INT8;
-      table[MESA_FORMAT_RG_SINT8] = unpack_RG_INT8;
-      table[MESA_FORMAT_RGB_SINT8] = unpack_RGB_INT8;
-      table[MESA_FORMAT_RGBA_SINT8] = unpack_RGBA_INT8;
-      table[MESA_FORMAT_R_SINT16] = unpack_R_INT16;
-      table[MESA_FORMAT_RG_SINT16] = unpack_RG_INT16;
-      table[MESA_FORMAT_RGB_SINT16] = unpack_RGB_INT16;
-      table[MESA_FORMAT_RGBA_SINT16] = unpack_RGBA_INT16;
-      table[MESA_FORMAT_R_SINT32] = unpack_R_INT32;
-      table[MESA_FORMAT_RG_SINT32] = unpack_RG_INT32;
-      table[MESA_FORMAT_RGB_SINT32] = unpack_RGB_INT32;
-      table[MESA_FORMAT_RGBA_SINT32] = unpack_RGBA_INT32;
-      table[MESA_FORMAT_R_UINT8] = unpack_R_UINT8;
-      table[MESA_FORMAT_RG_UINT8] = unpack_RG_UINT8;
-      table[MESA_FORMAT_RGB_UINT8] = unpack_RGB_UINT8;
-      table[MESA_FORMAT_RGBA_UINT8] = unpack_RGBA_UINT8;
-      table[MESA_FORMAT_R_UINT16] = unpack_R_UINT16;
-      table[MESA_FORMAT_RG_UINT16] = unpack_RG_UINT16;
-      table[MESA_FORMAT_RGB_UINT16] = unpack_RGB_UINT16;
-      table[MESA_FORMAT_RGBA_UINT16] = unpack_RGBA_UINT16;
-      table[MESA_FORMAT_R_UINT32] = unpack_R_UINT32;
-      table[MESA_FORMAT_RG_UINT32] = unpack_RG_UINT32;
-      table[MESA_FORMAT_RGB_UINT32] = unpack_RGB_UINT32;
-      table[MESA_FORMAT_RGBA_UINT32] = unpack_RGBA_UINT32;
-
-      table[MESA_FORMAT_R_SNORM8] = unpack_R_SNORM8;
-      table[MESA_FORMAT_R8G8_SNORM] = unpack_R8G8_SNORM;
-      table[MESA_FORMAT_X8B8G8R8_SNORM] = unpack_X8B8G8R8_SNORM;
-      table[MESA_FORMAT_A8B8G8R8_SNORM] = unpack_A8B8G8R8_SNORM;
-      table[MESA_FORMAT_R8G8B8A8_SNORM] = unpack_R8G8B8A8_SNORM;
-      table[MESA_FORMAT_R_SNORM16] = unpack_R_SNORM16;
-      table[MESA_FORMAT_R16G16_SNORM] = unpack_R16G16_SNORM;
-      table[MESA_FORMAT_RGB_SNORM16] = unpack_RGB_SNORM16;
-      table[MESA_FORMAT_RGBA_SNORM16] = unpack_RGBA_SNORM16;
-      table[MESA_FORMAT_RGBA_UNORM16] = unpack_RGBA_16;
-
-      table[MESA_FORMAT_R_RGTC1_UNORM] = unpack_RED_RGTC1;
-      table[MESA_FORMAT_R_RGTC1_SNORM] = unpack_SIGNED_RED_RGTC1;
-      table[MESA_FORMAT_RG_RGTC2_UNORM] = unpack_RG_RGTC2;
-      table[MESA_FORMAT_RG_RGTC2_SNORM] = unpack_SIGNED_RG_RGTC2;
-
-      table[MESA_FORMAT_L_LATC1_UNORM] = unpack_L_LATC1;
-      table[MESA_FORMAT_L_LATC1_SNORM] = unpack_SIGNED_L_LATC1;
-      table[MESA_FORMAT_LA_LATC2_UNORM] = unpack_LA_LATC2;
-      table[MESA_FORMAT_LA_LATC2_SNORM] = unpack_SIGNED_LA_LATC2;
-
-      table[MESA_FORMAT_ETC1_RGB8] = unpack_ETC1_RGB8;
-      table[MESA_FORMAT_ETC2_RGB8] = unpack_ETC2_RGB8;
-      table[MESA_FORMAT_ETC2_SRGB8] = unpack_ETC2_SRGB8;
-      table[MESA_FORMAT_ETC2_RGBA8_EAC] = unpack_ETC2_RGBA8_EAC;
-      table[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = unpack_ETC2_SRGB8_ALPHA8_EAC;
-      table[MESA_FORMAT_ETC2_R11_EAC] = unpack_ETC2_R11_EAC;
-      table[MESA_FORMAT_ETC2_RG11_EAC] = unpack_ETC2_RG11_EAC;
-      table[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = unpack_ETC2_SIGNED_R11_EAC;
-      table[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = unpack_ETC2_SIGNED_RG11_EAC;
-      table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] =
-         unpack_ETC2_RGB8_PUNCHTHROUGH_ALPHA1;
-      table[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] =
-         unpack_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1;
-      table[MESA_FORMAT_A_SNORM8] = unpack_A_SNORM8;
-      table[MESA_FORMAT_L_SNORM8] = unpack_L_SNORM8;
-      table[MESA_FORMAT_L8A8_SNORM] = unpack_L8A8_SNORM;
-      table[MESA_FORMAT_A8L8_SNORM] = unpack_A8L8_SNORM;
-      table[MESA_FORMAT_I_SNORM8] = unpack_I_SNORM8;
-      table[MESA_FORMAT_A_SNORM16] = unpack_A_SNORM16;
-      table[MESA_FORMAT_L_SNORM16] = unpack_L_SNORM16;
-      table[MESA_FORMAT_LA_SNORM16] = unpack_LA_SNORM16;
-      table[MESA_FORMAT_I_SNORM16] = unpack_I_SNORM16;
-
-      table[MESA_FORMAT_R9G9B9E5_FLOAT] = unpack_R9G9B9E5_FLOAT;
-      table[MESA_FORMAT_R11G11B10_FLOAT] = unpack_R11G11B10_FLOAT;
-
-      table[MESA_FORMAT_Z_FLOAT32] = unpack_Z_FLOAT32;
-      table[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = unpack_Z32_FLOAT_S8X24_UINT;
-
-      table[MESA_FORMAT_B4G4R4X4_UNORM] = unpack_XRGB4444_UNORM;
-      table[MESA_FORMAT_B5G5R5X1_UNORM] = unpack_XRGB1555_UNORM;
-      table[MESA_FORMAT_R8G8B8X8_SNORM] = unpack_R8G8B8X8_SNORM;
-      table[MESA_FORMAT_R8G8B8X8_SRGB] = unpack_R8G8B8X8_SRGB;
-      table[MESA_FORMAT_X8B8G8R8_SRGB] = unpack_X8B8G8R8_SRGB;
-      table[MESA_FORMAT_RGBX_UINT8] = unpack_XBGR8888_UINT;
-      table[MESA_FORMAT_RGBX_SINT8] = unpack_XBGR8888_SINT;
-      table[MESA_FORMAT_B10G10R10X2_UNORM] = unpack_B10G10R10X2_UNORM;
-      table[MESA_FORMAT_RGBX_UNORM16] = unpack_RGBX_UNORM16;
-      table[MESA_FORMAT_RGBX_SNORM16] = unpack_RGBX_SNORM16;
-      table[MESA_FORMAT_RGBX_FLOAT16] = unpack_XBGR16161616_FLOAT;
-      table[MESA_FORMAT_RGBX_UINT16] = unpack_XBGR16161616_UINT;
-      table[MESA_FORMAT_RGBX_SINT16] = unpack_XBGR16161616_SINT;
-      table[MESA_FORMAT_RGBX_FLOAT32] = unpack_RGBX_FLOAT32;
-      table[MESA_FORMAT_RGBX_UINT32] = unpack_XBGR32323232_UINT;
-      table[MESA_FORMAT_RGBX_SINT32] = unpack_XBGR32323232_SINT;
-
-      table[MESA_FORMAT_R10G10B10A2_UNORM] = unpack_R10G10B10A2_UNORM;
-
-      table[MESA_FORMAT_G8R8_SNORM] = unpack_G8R8_SNORM;
-      table[MESA_FORMAT_G16R16_SNORM] = unpack_G16R16_SNORM;
-
-      table[MESA_FORMAT_B8G8R8X8_SRGB] = unpack_B8G8R8X8_SRGB;
-      table[MESA_FORMAT_X8R8G8B8_SRGB] = unpack_X8R8G8B8_SRGB;
-
-      initialized = GL_TRUE;
-   }
-
-   if (table[format] == NULL) {
-      _mesa_problem(NULL, "unsupported unpack for format %s",
-                    _mesa_get_format_name(format));
-   }
-
-   return table[format];
-}
-
-
-/**
- * Unpack rgba colors, returning as GLfloat values.
- */
-void
-_mesa_unpack_rgba_row(mesa_format format, GLuint n,
-                      const void *src, GLfloat dst[][4])
-{
-   unpack_rgba_func unpack = get_unpack_rgba_function(format);
-   unpack(src, dst, n);
-}
-
-
-/**********************************************************************/
-/*  Unpack, returning GLubyte colors                                  */
-/**********************************************************************/
-
-
-static void
-unpack_ubyte_A8B8G8R8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >> 24);
-      dst[i][GCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][BCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][ACOMP] = (s[i]      ) & 0xff;
-   }
-}
-
-static void
-unpack_ubyte_R8G8B8A8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i]      ) & 0xff;
-      dst[i][GCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][BCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][ACOMP] = (s[i] >> 24);
-   }
-}
-
-static void
-unpack_ubyte_B8G8R8A8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][GCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][BCOMP] = (s[i]      ) & 0xff;
-      dst[i][ACOMP] = (s[i] >> 24);
-   }
-}
-
-static void
-unpack_ubyte_A8R8G8B8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][GCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][BCOMP] = (s[i] >> 24);
-      dst[i][ACOMP] = (s[i]      ) & 0xff;
-   }
-}
-
-static void
-unpack_ubyte_RGBX8888(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >> 24);
-      dst[i][GCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][BCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_RGBX8888_REV(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i]      ) & 0xff;
-      dst[i][GCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][BCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_B8G8R8X8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][GCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][BCOMP] = (s[i]      ) & 0xff;
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_X8R8G8B8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (s[i] >>  8) & 0xff;
-      dst[i][GCOMP] = (s[i] >> 16) & 0xff;
-      dst[i][BCOMP] = (s[i] >> 24);
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_BGR_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*3+2];
-      dst[i][GCOMP] = s[i*3+1];
-      dst[i][BCOMP] = s[i*3+0];
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_RGB_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = (const GLubyte *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i*3+0];
-      dst[i][GCOMP] = s[i*3+1];
-      dst[i][BCOMP] = s[i*3+2];
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_B5G6R5_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_5_8((s[i] >> 11) & 0x1f);
-      dst[i][GCOMP] = EXPAND_6_8((s[i] >> 5 ) & 0x3f);
-      dst[i][BCOMP] = EXPAND_5_8( s[i]        & 0x1f);
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_R5G6B5_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   /* Warning: this function does not match the current Mesa definition
-    * of MESA_FORMAT_R5G6B5_UNORM.
-    */
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLuint t = (s[i] >> 8) | (s[i] << 8); /* byte swap */
-      dst[i][RCOMP] = EXPAND_5_8((t >> 11) & 0x1f);
-      dst[i][GCOMP] = EXPAND_6_8((t >> 5 ) & 0x3f);
-      dst[i][BCOMP] = EXPAND_5_8( t        & 0x1f);
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_B4G4R4A4_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_4_8((s[i] >>  8) & 0xf);
-      dst[i][GCOMP] = EXPAND_4_8((s[i] >>  4) & 0xf);
-      dst[i][BCOMP] = EXPAND_4_8((s[i]      ) & 0xf);
-      dst[i][ACOMP] = EXPAND_4_8((s[i] >> 12) & 0xf);
-   }
-}
-
-static void
-unpack_ubyte_A4R4G4B4_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_4_8((s[i] >>  4) & 0xf);
-      dst[i][GCOMP] = EXPAND_4_8((s[i] >>  8) & 0xf);
-      dst[i][BCOMP] = EXPAND_4_8((s[i] >> 12) & 0xf);
-      dst[i][ACOMP] = EXPAND_4_8((s[i]      ) & 0xf);
-   }
-}
-
-static void
-unpack_ubyte_A1B5G5R5_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_5_8((s[i] >> 11) & 0x1f);
-      dst[i][GCOMP] = EXPAND_5_8((s[i] >>  6) & 0x1f);
-      dst[i][BCOMP] = EXPAND_5_8((s[i] >>  1) & 0x1f);
-      dst[i][ACOMP] = EXPAND_1_8((s[i]      ) & 0x01);
-   }
-}
-
-static void
-unpack_ubyte_B5G5R5A1_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_5_8((s[i] >> 10) & 0x1f);
-      dst[i][GCOMP] = EXPAND_5_8((s[i] >>  5) & 0x1f);
-      dst[i][BCOMP] = EXPAND_5_8((s[i] >>  0) & 0x1f);
-      dst[i][ACOMP] = EXPAND_1_8((s[i] >> 15) & 0x01);
-   }
-}
-
-static void
-unpack_ubyte_A1R5G5B5_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   /* Warning: this function does not match the current Mesa definition
-    * of MESA_FORMAT_A1R5G5B5_UNORM.
-    */
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      GLushort tmp = (s[i] << 8) | (s[i] >> 8); /* byteswap */
-      dst[i][RCOMP] = EXPAND_5_8((tmp >> 10) & 0x1f);
-      dst[i][GCOMP] = EXPAND_5_8((tmp >>  5) & 0x1f);
-      dst[i][BCOMP] = EXPAND_5_8((tmp >>  0) & 0x1f);
-      dst[i][ACOMP] = EXPAND_1_8((tmp >> 15) & 0x01);
-   }
-}
-
-static void
-unpack_ubyte_L4A4_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = EXPAND_4_8(s[i] & 0xf);
-      dst[i][ACOMP] = EXPAND_4_8(s[i] >> 4);
-   }
-}
-
-static void
-unpack_ubyte_L8A8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 
-      dst[i][GCOMP] = 
-      dst[i][BCOMP] = EXPAND_4_8(s[i] & 0xff);
-      dst[i][ACOMP] = EXPAND_4_8(s[i] >> 8);
-   }
-}
-
-static void
-unpack_ubyte_A8L8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = 
-      dst[i][GCOMP] = 
-      dst[i][BCOMP] = EXPAND_4_8(s[i] >> 8);
-      dst[i][ACOMP] = EXPAND_4_8(s[i] & 0xff);
-   }
-}
-
-static void
-unpack_ubyte_B2G3R3_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = EXPAND_3_8((s[i] >> 5) & 0x7);
-      dst[i][GCOMP] = EXPAND_3_8((s[i] >> 2) & 0x7);
-      dst[i][BCOMP] = EXPAND_2_8((s[i]     ) & 0x3);
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_A_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = 0;
-      dst[i][ACOMP] = s[i];
-   }
-}
-
-static void
-unpack_ubyte_L_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] = s[i];
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-
-static void
-unpack_ubyte_I_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] =
-      dst[i][GCOMP] =
-      dst[i][BCOMP] =
-      dst[i][ACOMP] = s[i];
-   }
-}
-
-static void
-unpack_ubyte_R_UNORM8(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLubyte *s = ((const GLubyte *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][0] = s[i];
-      dst[i][1] =
-      dst[i][2] = 0;
-      dst[i][3] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_R8G8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i] & 0xff;
-      dst[i][GCOMP] = s[i] >> 8;
-      dst[i][BCOMP] = 0;
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-static void
-unpack_ubyte_G8R8_UNORM(const void *src, GLubyte dst[][4], GLuint n)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = s[i] >> 8;
-      dst[i][GCOMP] = s[i] & 0xff;
-      dst[i][BCOMP] = 0;
-      dst[i][ACOMP] = 0xff;
-   }
-}
-
-
-/**
- * Unpack rgba colors, returning as GLubyte values.  This should usually
- * only be used for unpacking formats that use 8 bits or less per channel.
- */
-void
-_mesa_unpack_ubyte_rgba_row(mesa_format format, GLuint n,
-                            const void *src, GLubyte dst[][4])
-{
-   switch (format) {
-   case MESA_FORMAT_A8B8G8R8_UNORM:
-      unpack_ubyte_A8B8G8R8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_R8G8B8A8_UNORM:
-      unpack_ubyte_R8G8B8A8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_B8G8R8A8_UNORM:
-      unpack_ubyte_B8G8R8A8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A8R8G8B8_UNORM:
-      unpack_ubyte_A8R8G8B8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_X8B8G8R8_UNORM:
-      unpack_ubyte_RGBX8888(src, dst, n);
-      break;
-   case MESA_FORMAT_R8G8B8X8_UNORM:
-      unpack_ubyte_RGBX8888_REV(src, dst, n);
-      break;
-   case MESA_FORMAT_B8G8R8X8_UNORM:
-      unpack_ubyte_B8G8R8X8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_X8R8G8B8_UNORM:
-      unpack_ubyte_X8R8G8B8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_BGR_UNORM8:
-      unpack_ubyte_BGR_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_RGB_UNORM8:
-      unpack_ubyte_RGB_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_B5G6R5_UNORM:
-      unpack_ubyte_B5G6R5_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_R5G6B5_UNORM:
-      unpack_ubyte_R5G6B5_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_B4G4R4A4_UNORM:
-      unpack_ubyte_B4G4R4A4_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A4R4G4B4_UNORM:
-      unpack_ubyte_A4R4G4B4_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A1B5G5R5_UNORM:
-      unpack_ubyte_A1B5G5R5_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_B5G5R5A1_UNORM:
-      unpack_ubyte_B5G5R5A1_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A1R5G5B5_UNORM:
-      unpack_ubyte_A1R5G5B5_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_L4A4_UNORM:
-      unpack_ubyte_L4A4_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_L8A8_UNORM:
-      unpack_ubyte_L8A8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A8L8_UNORM:
-      unpack_ubyte_A8L8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_B2G3R3_UNORM:
-      unpack_ubyte_B2G3R3_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_A_UNORM8:
-      unpack_ubyte_A_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_L_UNORM8:
-      unpack_ubyte_L_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_I_UNORM8:
-      unpack_ubyte_I_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_R_UNORM8:
-      unpack_ubyte_R_UNORM8(src, dst, n);
-      break;
-   case MESA_FORMAT_R8G8_UNORM:
-      unpack_ubyte_R8G8_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_G8R8_UNORM:
-      unpack_ubyte_G8R8_UNORM(src, dst, n);
-      break;
-   default:
-      /* get float values, convert to ubyte */
-      {
-         GLfloat *tmp = malloc(n * 4 * sizeof(GLfloat));
-         if (tmp) {
-            GLuint i;
-            _mesa_unpack_rgba_row(format, n, src, (GLfloat (*)[4]) tmp);
-            for (i = 0; i < n; i++) {
-               UNCLAMPED_FLOAT_TO_UBYTE(dst[i][0], tmp[i*4+0]);
-               UNCLAMPED_FLOAT_TO_UBYTE(dst[i][1], tmp[i*4+1]);
-               UNCLAMPED_FLOAT_TO_UBYTE(dst[i][2], tmp[i*4+2]);
-               UNCLAMPED_FLOAT_TO_UBYTE(dst[i][3], tmp[i*4+3]);
-            }
-            free(tmp);
-         }
-      }
-      break;
-   }
-}
-
-
-/**********************************************************************/
-/*  Unpack, returning GLuint colors                                   */
-/**********************************************************************/
-
-static void
-unpack_int_rgba_RGBA_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   memcpy(dst, src, n * 4 * sizeof(GLuint));
-}
-
-static void
-unpack_int_rgba_RGBA_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = src[i * 4 + 3];
-   }
-}
-
-static void
-unpack_int_rgba_RGBA_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = src[i * 4 + 3];
-   }
-}
-
-static void
-unpack_int_rgba_RGBA_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = src[i * 4 + 3];
-   }
-}
-
-static void
-unpack_int_rgba_RGBA_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = src[i * 4 + 3];
-   }
-}
-
-static void
-unpack_int_rgba_B8G8R8A8_UNORM(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLubyte) src[i * 4 + 2];
-      dst[i][GCOMP] = (GLubyte) src[i * 4 + 1];
-      dst[i][BCOMP] = (GLubyte) src[i * 4 + 0];
-      dst[i][ACOMP] = (GLubyte) src[i * 4 + 3];
-   }
-}
-
-static void
-unpack_int_rgba_B8G8R8X8_UNORM(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][RCOMP] = (GLubyte) src[i * 4 + 2];
-      dst[i][GCOMP] = (GLubyte) src[i * 4 + 1];
-      dst[i][BCOMP] = (GLubyte) src[i * 4 + 0];
-      dst[i][ACOMP] = (GLubyte) 0xff;
-   }
-}
-
-static void
-unpack_int_rgba_RGB_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 3 + 0];
-      dst[i][1] = src[i * 3 + 1];
-      dst[i][2] = src[i * 3 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RGB_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 3 + 0];
-      dst[i][1] = src[i * 3 + 1];
-      dst[i][2] = src[i * 3 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RGB_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 3 + 0];
-      dst[i][1] = src[i * 3 + 1];
-      dst[i][2] = src[i * 3 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RGB_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 3 + 0];
-      dst[i][1] = src[i * 3 + 1];
-      dst[i][2] = src[i * 3 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RGB_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 3 + 0];
-      dst[i][1] = src[i * 3 + 1];
-      dst[i][2] = src[i * 3 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RG_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 2 + 0];
-      dst[i][1] = src[i * 2 + 1];
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RG_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 2 + 0];
-      dst[i][1] = src[i * 2 + 1];
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RG_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 2 + 0];
-      dst[i][1] = src[i * 2 + 1];
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RG_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 2 + 0];
-      dst[i][1] = src[i * 2 + 1];
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_RG_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 2 + 0];
-      dst[i][1] = src[i * 2 + 1];
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i];
-      dst[i][1] = 0;
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i];
-      dst[i][1] = 0;
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i];
-      dst[i][1] = 0;
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i];
-      dst[i][1] = 0;
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i];
-      dst[i][1] = 0;
-      dst[i][2] = 0;
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_ALPHA_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = 0;
-      dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_ALPHA_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = 0;
-      dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_ALPHA_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = 0;
-      dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_ALPHA_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = 0;
-      dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_ALPHA_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = 0;
-      dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i];
-      dst[i][3] = 1;
-   }
-}
-
-
-static void
-unpack_int_rgba_LUMINANCE_ALPHA_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i * 2 + 0];
-      dst[i][3] = src[i * 2 + 1];
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_ALPHA_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i * 2 + 0];
-      dst[i][3] = src[i * 2 + 1];
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_ALPHA_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i * 2 + 0];
-      dst[i][3] = src[i * 2 + 1];
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_ALPHA_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i * 2 + 0];
-      dst[i][3] = src[i * 2 + 1];
-   }
-}
-
-static void
-unpack_int_rgba_LUMINANCE_ALPHA_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = src[i * 2 + 0];
-      dst[i][3] = src[i * 2 + 1];
-   }
-}
-
-static void
-unpack_int_rgba_INTENSITY_UINT32(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_INTENSITY_UINT16(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_INTENSITY_INT16(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_INTENSITY_UINT8(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_INTENSITY_INT8(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = dst[i][1] = dst[i][2] = dst[i][3] = src[i];
-   }
-}
-
-static void
-unpack_int_rgba_B10G10R10A2_UINT(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      GLuint tmp = src[i];
-      dst[i][0] = (tmp >> 20) & 0x3ff;
-      dst[i][1] = (tmp >> 10) & 0x3ff;
-      dst[i][2] = (tmp >> 0) & 0x3ff;
-      dst[i][3] = (tmp >> 30) & 0x3;
-   }
-}
-
-static void
-unpack_int_rgba_R10G10B10A2_UINT(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      GLuint tmp = src[i];
-      dst[i][0] = (tmp >> 0) & 0x3ff;
-      dst[i][1] = (tmp >> 10) & 0x3ff;
-      dst[i][2] = (tmp >> 20) & 0x3ff;
-      dst[i][3] = (tmp >> 30) & 0x3;
-   }
-}
-
-static void
-unpack_int_rgba_B10G10R10A2_UNORM(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      GLuint tmp = src[i];
-      dst[i][0] = (tmp >> 20) & 0x3ff;
-      dst[i][1] = (tmp >> 10) & 0x3ff;
-      dst[i][2] = (tmp >> 0) & 0x3ff;
-      dst[i][3] = (tmp >> 30) & 0x3;
-   }
-}
-
-static void
-unpack_int_rgba_XBGR8888_UINT(const GLubyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_XBGR8888_SINT(const GLbyte *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_XBGR16161616_UINT(const GLushort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_XBGR16161616_SINT(const GLshort *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_XBGR32323232_UINT(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      dst[i][0] = src[i * 4 + 0];
-      dst[i][1] = src[i * 4 + 1];
-      dst[i][2] = src[i * 4 + 2];
-      dst[i][3] = 1;
-   }
-}
-
-static void
-unpack_int_rgba_R10G10B10A2_UNORM(const GLuint *src, GLuint dst[][4], GLuint n)
-{
-   unsigned int i;
-
-   for (i = 0; i < n; i++) {
-      GLuint tmp = src[i];
-      dst[i][0] = (tmp >> 0) & 0x3ff;
-      dst[i][1] = (tmp >> 10) & 0x3ff;
-      dst[i][2] = (tmp >> 20) & 0x3ff;
-      dst[i][3] = (tmp >> 30) & 0x3;
-   }
-}
-
-void
-_mesa_unpack_uint_rgba_row(mesa_format format, GLuint n,
-                           const void *src, GLuint dst[][4])
-{
-   switch (format) {
-      /* Since there won't be any sign extension happening, there's no need to
-       * make separate paths for 32-bit-to-32-bit integer unpack.
-       */
-   case MESA_FORMAT_RGBA_UINT32:
-   case MESA_FORMAT_RGBA_SINT32:
-      unpack_int_rgba_RGBA_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBA_UINT16:
-      unpack_int_rgba_RGBA_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_RGBA_SINT16:
-      unpack_int_rgba_RGBA_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBA_UINT8:
-      unpack_int_rgba_RGBA_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_RGBA_SINT8:
-      unpack_int_rgba_RGBA_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_B8G8R8A8_UNORM:
-      unpack_int_rgba_B8G8R8A8_UNORM(src, dst, n);
-      break;
-
-   case MESA_FORMAT_B8G8R8X8_UNORM:
-      unpack_int_rgba_B8G8R8X8_UNORM(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGB_UINT32:
-   case MESA_FORMAT_RGB_SINT32:
-      unpack_int_rgba_RGB_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGB_UINT16:
-      unpack_int_rgba_RGB_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_RGB_SINT16:
-      unpack_int_rgba_RGB_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGB_UINT8:
-      unpack_int_rgba_RGB_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_RGB_SINT8:
-      unpack_int_rgba_RGB_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RG_UINT32:
-   case MESA_FORMAT_RG_SINT32:
-      unpack_int_rgba_RG_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RG_UINT16:
-      unpack_int_rgba_RG_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_RG_SINT16:
-      unpack_int_rgba_RG_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RG_UINT8:
-      unpack_int_rgba_RG_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_RG_SINT8:
-      unpack_int_rgba_RG_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_R_UINT32:
-   case MESA_FORMAT_R_SINT32:
-      unpack_int_rgba_R_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_R_UINT16:
-      unpack_int_rgba_R_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_R_SINT16:
-      unpack_int_rgba_R_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_R_UINT8:
-      unpack_int_rgba_R_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_R_SINT8:
-      unpack_int_rgba_R_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_A_UINT32:
-   case MESA_FORMAT_A_SINT32:
-      unpack_int_rgba_ALPHA_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_A_UINT16:
-      unpack_int_rgba_ALPHA_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_A_SINT16:
-      unpack_int_rgba_ALPHA_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_A_UINT8:
-      unpack_int_rgba_ALPHA_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_A_SINT8:
-      unpack_int_rgba_ALPHA_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_L_UINT32:
-   case MESA_FORMAT_L_SINT32:
-      unpack_int_rgba_LUMINANCE_UINT32(src, dst, n);
-      break;
-   case MESA_FORMAT_L_UINT16:
-      unpack_int_rgba_LUMINANCE_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_L_SINT16:
-      unpack_int_rgba_LUMINANCE_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_L_UINT8:
-      unpack_int_rgba_LUMINANCE_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_L_SINT8:
-      unpack_int_rgba_LUMINANCE_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_LA_UINT32:
-   case MESA_FORMAT_LA_SINT32:
-      unpack_int_rgba_LUMINANCE_ALPHA_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_LA_UINT16:
-      unpack_int_rgba_LUMINANCE_ALPHA_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_LA_SINT16:
-      unpack_int_rgba_LUMINANCE_ALPHA_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_LA_UINT8:
-      unpack_int_rgba_LUMINANCE_ALPHA_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_LA_SINT8:
-      unpack_int_rgba_LUMINANCE_ALPHA_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_I_UINT32:
-   case MESA_FORMAT_I_SINT32:
-      unpack_int_rgba_INTENSITY_UINT32(src, dst, n);
-      break;
-
-   case MESA_FORMAT_I_UINT16:
-      unpack_int_rgba_INTENSITY_UINT16(src, dst, n);
-      break;
-   case MESA_FORMAT_I_SINT16:
-      unpack_int_rgba_INTENSITY_INT16(src, dst, n);
-      break;
-
-   case MESA_FORMAT_I_UINT8:
-      unpack_int_rgba_INTENSITY_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_I_SINT8:
-      unpack_int_rgba_INTENSITY_INT8(src, dst, n);
-      break;
-
-   case MESA_FORMAT_B10G10R10A2_UINT:
-      unpack_int_rgba_B10G10R10A2_UINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_R10G10B10A2_UINT:
-      unpack_int_rgba_R10G10B10A2_UINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_B10G10R10A2_UNORM:
-      unpack_int_rgba_B10G10R10A2_UNORM(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBX_UINT8:
-      unpack_int_rgba_XBGR8888_UINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBX_SINT8:
-      unpack_int_rgba_XBGR8888_SINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBX_UINT16:
-      unpack_int_rgba_XBGR16161616_UINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBX_SINT16:
-      unpack_int_rgba_XBGR16161616_SINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_RGBX_UINT32:
-   case MESA_FORMAT_RGBX_SINT32:
-      unpack_int_rgba_XBGR32323232_UINT(src, dst, n);
-      break;
-
-   case MESA_FORMAT_R10G10B10A2_UNORM:
-      unpack_int_rgba_R10G10B10A2_UNORM(src, dst, n);
-      break;
-
-   default:
-      _mesa_problem(NULL, "%s: bad format %s", __FUNCTION__,
-                    _mesa_get_format_name(format));
-      return;
-   }
-}
-
-/**
- * Unpack a 2D rect of pixels returning float RGBA colors.
- * \param format  the source image format
- * \param src  start address of the source image
- * \param srcRowStride  source image row stride in bytes
- * \param dst  start address of the dest image
- * \param dstRowStride  dest image row stride in bytes
- * \param x  source image start X pos
- * \param y  source image start Y pos
- * \param width  width of rect region to convert
- * \param height  height of rect region to convert
- */
-void
-_mesa_unpack_rgba_block(mesa_format format,
-                        const void *src, GLint srcRowStride,
-                        GLfloat dst[][4], GLint dstRowStride,
-                        GLuint x, GLuint y, GLuint width, GLuint height)
-{
-   unpack_rgba_func unpack = get_unpack_rgba_function(format);
-   const GLuint srcPixStride = _mesa_get_format_bytes(format);
-   const GLuint dstPixStride = 4 * sizeof(GLfloat);
-   const GLubyte *srcRow;
-   GLubyte *dstRow;
-   GLuint i;
-
-   /* XXX needs to be fixed for compressed formats */
-
-   srcRow = ((const GLubyte *) src) + srcRowStride * y + srcPixStride * x;
-   dstRow = ((GLubyte *) dst) + dstRowStride * y + dstPixStride * x;
-
-   for (i = 0; i < height; i++) {
-      unpack(srcRow, (GLfloat (*)[4]) dstRow, width);
-
-      dstRow += dstRowStride;
-      srcRow += srcRowStride;
-   }
-}
-
-
-
-
-typedef void (*unpack_float_z_func)(GLuint n, const void *src, GLfloat *dst);
-
-static void
-unpack_float_z_X8_UINT_Z24_UNORM(GLuint n, const void *src, GLfloat *dst)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = (GLfloat) ((s[i] >> 8) * scale);
-      ASSERT(dst[i] >= 0.0F);
-      ASSERT(dst[i] <= 1.0F);
-   }
-}
-
-static void
-unpack_float_z_Z24_UNORM_X8_UINT(GLuint n, const void *src, GLfloat *dst)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = (GLfloat) ((s[i] & 0x00ffffff) * scale);
-      ASSERT(dst[i] >= 0.0F);
-      ASSERT(dst[i] <= 1.0F);
-   }
-}
-
-static void
-unpack_float_Z_UNORM16(GLuint n, const void *src, GLfloat *dst)
-{
-   const GLushort *s = ((const GLushort *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = s[i] * (1.0F / 65535.0F);
-   }
-}
-
-static void
-unpack_float_Z_UNORM32(GLuint n, const void *src, GLfloat *dst)
-{
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = s[i] * (1.0F / 0xffffffff);
-   }
-}
-
-static void
-unpack_float_Z_FLOAT32(GLuint n, const void *src, GLfloat *dst)
-{
-   memcpy(dst, src, n * sizeof(float));
-}
-
-static void
-unpack_float_z_Z32X24S8(GLuint n, const void *src, GLfloat *dst)
-{
-   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = s[i].z;
-   }
-}
-
-
-
-/**
- * Unpack Z values.
- * The returned values will always be in the range [0.0, 1.0].
- */
-void
-_mesa_unpack_float_z_row(mesa_format format, GLuint n,
-                         const void *src, GLfloat *dst)
-{
-   unpack_float_z_func unpack;
-
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      unpack = unpack_float_z_X8_UINT_Z24_UNORM;
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      unpack = unpack_float_z_Z24_UNORM_X8_UINT;
-      break;
-   case MESA_FORMAT_Z_UNORM16:
-      unpack = unpack_float_Z_UNORM16;
-      break;
-   case MESA_FORMAT_Z_UNORM32:
-      unpack = unpack_float_Z_UNORM32;
-      break;
-   case MESA_FORMAT_Z_FLOAT32:
-      unpack = unpack_float_Z_FLOAT32;
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      unpack = unpack_float_z_Z32X24S8;
-      break;
-   default:
-      _mesa_problem(NULL, "bad format %s in _mesa_unpack_float_z_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-
-   unpack(n, src, dst);
-}
-
-
-
-typedef void (*unpack_uint_z_func)(const void *src, GLuint *dst, GLuint n);
-
-static void
-unpack_uint_z_X8_UINT_Z24_UNORM(const void *src, GLuint *dst, GLuint n)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = (s[i] & 0xffffff00) | (s[i] >> 24);
-   }
-}
-
-static void
-unpack_uint_z_Z24_UNORM_X8_UINT(const void *src, GLuint *dst, GLuint n)
-{
-   /* only return Z, not stencil data */
-   const GLuint *s = ((const GLuint *) src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = (s[i] << 8) | ((s[i] >> 16) & 0xff);
-   }
-}
-
-static void
-unpack_uint_Z_UNORM16(const void *src, GLuint *dst, GLuint n)
-{
-   const GLushort *s = ((const GLushort *)src);
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = (s[i] << 16) | s[i];
-   }
-}
-
-static void
-unpack_uint_Z_UNORM32(const void *src, GLuint *dst, GLuint n)
-{
-   memcpy(dst, src, n * sizeof(GLuint));
-}
-
-static void
-unpack_uint_Z_FLOAT32(const void *src, GLuint *dst, GLuint n)
-{
-   const float *s = (const float *)src;
-   GLuint i;
-   for (i = 0; i < n; i++) {
-      dst[i] = FLOAT_TO_UINT(CLAMP(s[i], 0.0F, 1.0F));
-   }
-}
-
-static void
-unpack_uint_Z_FLOAT32_X24S8(const void *src, GLuint *dst, GLuint n)
-{
-   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
-   GLuint i;
-
-   for (i = 0; i < n; i++) {
-      dst[i] = FLOAT_TO_UINT(CLAMP(s[i].z, 0.0F, 1.0F));
-   }
-}
-
-
-/**
- * Unpack Z values.
- * The returned values will always be in the range [0, 0xffffffff].
- */
-void
-_mesa_unpack_uint_z_row(mesa_format format, GLuint n,
-                        const void *src, GLuint *dst)
-{
-   unpack_uint_z_func unpack;
-   const GLubyte *srcPtr = (GLubyte *) src;
-
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-   case MESA_FORMAT_X8_UINT_Z24_UNORM:
-      unpack = unpack_uint_z_X8_UINT_Z24_UNORM;
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-   case MESA_FORMAT_Z24_UNORM_X8_UINT:
-      unpack = unpack_uint_z_Z24_UNORM_X8_UINT;
-      break;
-   case MESA_FORMAT_Z_UNORM16:
-      unpack = unpack_uint_Z_UNORM16;
-      break;
-   case MESA_FORMAT_Z_UNORM32:
-      unpack = unpack_uint_Z_UNORM32;
-      break;
-   case MESA_FORMAT_Z_FLOAT32:
-      unpack = unpack_uint_Z_FLOAT32;
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      unpack = unpack_uint_Z_FLOAT32_X24S8;
-      break;
-   default:
-      _mesa_problem(NULL, "bad format %s in _mesa_unpack_uint_z_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-
-   unpack(srcPtr, dst, n);
-}
-
-
-static void
-unpack_ubyte_s_S_UINT8(const void *src, GLubyte *dst, GLuint n)
-{
-   memcpy(dst, src, n);
-}
-
-static void
-unpack_ubyte_s_S8_UINT_Z24_UNORM(const void *src, GLubyte *dst, GLuint n)
-{
-   GLuint i;
-   const GLuint *src32 = src;
-
-   for (i = 0; i < n; i++)
-      dst[i] = src32[i] & 0xff;
-}
-
-static void
-unpack_ubyte_s_Z24_UNORM_S8_UINT(const void *src, GLubyte *dst, GLuint n)
-{
-   GLuint i;
-   const GLuint *src32 = src;
-
-   for (i = 0; i < n; i++)
-      dst[i] = src32[i] >> 24;
-}
-
-static void
-unpack_ubyte_s_Z32_FLOAT_S8X24_UINT(const void *src, GLubyte *dst, GLuint n)
-{
-   GLuint i;
-   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
-
-   for (i = 0; i < n; i++)
-      dst[i] = s[i].x24s8 & 0xff;
-}
-
-void
-_mesa_unpack_ubyte_stencil_row(mesa_format format, GLuint n,
-			       const void *src, GLubyte *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S_UINT8:
-      unpack_ubyte_s_S_UINT8(src, dst, n);
-      break;
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      unpack_ubyte_s_S8_UINT_Z24_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      unpack_ubyte_s_Z24_UNORM_S8_UINT(src, dst, n);
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      unpack_ubyte_s_Z32_FLOAT_S8X24_UINT(src, dst, n);
-      break;
-   default:
-      _mesa_problem(NULL, "bad format %s in _mesa_unpack_ubyte_s_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-}
-
-static void
-unpack_uint_24_8_depth_stencil_Z24_UNORM_S8_UINT(const GLuint *src, GLuint *dst, GLuint n)
-{
-   GLuint i;
-
-   for (i = 0; i < n; i++) {
-      GLuint val = src[i];
-      dst[i] = val >> 24 | val << 8;
-   }
-}
-
-static void
-unpack_uint_24_8_depth_stencil_Z32_S8X24(const GLuint *src,
-                                         GLuint *dst, GLuint n)
-{
-   GLuint i;
-
-   for (i = 0; i < n; i++) {
-      /* 8 bytes per pixel (float + uint32) */
-      GLfloat zf = ((GLfloat *) src)[i * 2 + 0];
-      GLuint z24 = (GLuint) (zf * (GLfloat) 0xffffff);
-      GLuint s = src[i * 2 + 1] & 0xff;
-      dst[i] = (z24 << 8) | s;
-   }
-}
-
-static void
-unpack_uint_24_8_depth_stencil_S8_UINT_Z24_UNORM(const GLuint *src, GLuint *dst, GLuint n)
-{
-   memcpy(dst, src, n * 4);
-}
-
-/**
- * Unpack depth/stencil returning as GL_UNSIGNED_INT_24_8.
- * \param format  the source data format
- */
-void
-_mesa_unpack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
-					 const void *src, GLuint *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      unpack_uint_24_8_depth_stencil_S8_UINT_Z24_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      unpack_uint_24_8_depth_stencil_Z24_UNORM_S8_UINT(src, dst, n);
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      unpack_uint_24_8_depth_stencil_Z32_S8X24(src, dst, n);
-      break;
-   default:
-      _mesa_problem(NULL,
-                    "bad format %s in _mesa_unpack_uint_24_8_depth_stencil_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-}
-
-static void
-unpack_float_32_uint_24_8_Z24_UNORM_S8_UINT(const GLuint *src,
-                                            GLuint *dst, GLuint n)
-{
-   GLuint i;
-   struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-
-   for (i = 0; i < n; i++) {
-      const GLuint z24 = src[i] & 0xffffff;
-      d[i].z = z24 * scale;
-      d[i].x24s8 = src[i] >> 24;
-      assert(d[i].z >= 0.0f);
-      assert(d[i].z <= 1.0f);
-   }
-}
-
-static void
-unpack_float_32_uint_24_8_Z32_FLOAT_S8X24_UINT(const GLuint *src,
-                                               GLuint *dst, GLuint n)
-{
-   memcpy(dst, src, n * sizeof(struct z32f_x24s8));
-}
-
-static void
-unpack_float_32_uint_24_8_S8_UINT_Z24_UNORM(const GLuint *src,
-                                            GLuint *dst, GLuint n)
-{
-   GLuint i;
-   struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-
-   for (i = 0; i < n; i++) {
-      const GLuint z24 = src[i] >> 8;
-      d[i].z = z24 * scale;
-      d[i].x24s8 = src[i] & 0xff;
-      assert(d[i].z >= 0.0f);
-      assert(d[i].z <= 1.0f);
-   }
-}
-
-/**
- * Unpack depth/stencil returning as GL_FLOAT_32_UNSIGNED_INT_24_8_REV.
- * \param format  the source data format
- *
- * In GL_FLOAT_32_UNSIGNED_INT_24_8_REV lower 4 bytes contain float
- * component and higher 4 bytes contain packed 24-bit and 8-bit
- * components.
- *
- *    31 30 29 28 ... 4 3 2 1 0    31 30 29 ... 9 8 7 6 5 ... 2 1 0
- *    +-------------------------+  +--------------------------------+
- *    |    Float Component      |  | Unused         | 8 bit stencil |
- *    +-------------------------+  +--------------------------------+
- *          lower 4 bytes                  higher 4 bytes
- */
-void
-_mesa_unpack_float_32_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
-			                          const void *src, GLuint *dst)
-{
-   switch (format) {
-   case MESA_FORMAT_S8_UINT_Z24_UNORM:
-      unpack_float_32_uint_24_8_S8_UINT_Z24_UNORM(src, dst, n);
-      break;
-   case MESA_FORMAT_Z24_UNORM_S8_UINT:
-      unpack_float_32_uint_24_8_Z24_UNORM_S8_UINT(src, dst, n);
-      break;
-   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
-      unpack_float_32_uint_24_8_Z32_FLOAT_S8X24_UINT(src, dst, n);
-      break;
-   default:
-      _mesa_problem(NULL,
-                    "bad format %s in _mesa_unpack_uint_24_8_depth_stencil_row",
-                    _mesa_get_format_name(format));
-      return;
-   }
-}
-
-/**
- * Unpack depth/stencil
- * \param format  the source data format
- * \param type the destination data type
- */
-void
-_mesa_unpack_depth_stencil_row(mesa_format format, GLuint n,
-	                       const void *src, GLenum type,
-                               GLuint *dst)
-{
-   assert(type == GL_UNSIGNED_INT_24_8 ||
-          type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
-
-   switch (type) {
-   case GL_UNSIGNED_INT_24_8:
-      _mesa_unpack_uint_24_8_depth_stencil_row(format, n, src, dst);
-      break;
-   case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
-      _mesa_unpack_float_32_uint_24_8_depth_stencil_row(format, n, src, dst);
-      break;
-   default:
-      _mesa_problem(NULL,
-                    "bad type 0x%x in _mesa_unpack_depth_stencil_row",
-                    type);
-      return;
-   }
-}
diff --git a/mesalib/src/mesa/main/format_unpack.py b/mesalib/src/mesa/main/format_unpack.py
new file mode 100644
index 000000000..2276a1063
--- /dev/null
+++ b/mesalib/src/mesa/main/format_unpack.py
@@ -0,0 +1,895 @@
+#!/usr/bin/env python
+
+from mako.template import Template
+from sys import argv
+
+string = """/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (c) 2011 VMware, Inc.
+ * Copyright (c) 2014 Intel Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * Color, depth, stencil packing functions.
+ * Used to pack basic color, depth and stencil formats to specific
+ * hardware formats.
+ *
+ * There are both per-pixel and per-row packing functions:
+ * - The former will be used by swrast to write values to the color, depth,
+ *   stencil buffers when drawing points, lines and masked spans.
+ * - The later will be used for image-oriented functions like glDrawPixels,
+ *   glAccum, and glTexImage.
+ */
+
+#include <stdint.h>
+
+#include "colormac.h"
+#include "format_unpack.h"
+#include "format_utils.h"
+#include "macros.h"
+#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
+#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
+#include "util/format_srgb.h"
+
+#define UNPACK(SRC, OFFSET, BITS) (((SRC) >> (OFFSET)) & MAX_UINT(BITS))
+
+<%
+import format_parser as parser
+
+formats = parser.parse(argv[1])
+
+rgb_formats = []
+for f in formats:
+   if f.name == 'MESA_FORMAT_NONE':
+      continue
+   if f.colorspace not in ('rgb', 'srgb'):
+      continue
+
+   rgb_formats.append(f)
+%>
+
+/* float unpacking functions */
+
+%for f in rgb_formats:
+   %if f.name in ('MESA_FORMAT_R9G9B9E5_FLOAT', 'MESA_FORMAT_R11G11B10_FLOAT'):
+      <% continue %>
+   %elif f.is_int() and not f.is_normalized():
+      <% continue %>
+   %elif f.is_compressed():
+      <% continue %>
+   %endif
+
+static inline void
+unpack_float_${f.short_name()}(const void *void_src, GLfloat dst[4])
+{
+   ${f.datatype()} *src = (${f.datatype()} *)void_src;
+   %if f.layout == parser.PACKED:
+      %for c in f.channels:
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = UNPACK(*src, ${c.shift}, ${c.size});
+         %endif
+      %endfor
+   %elif f.layout == parser.ARRAY:
+      %for (i, c) in enumerate(f.channels):
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = src[${i}];
+         %endif
+      %endfor
+   %else:
+      <% assert False %>
+   %endif
+
+   %for i in range(4):
+      <% s = f.swizzle[i] %>
+      %if 0 <= s and s <= parser.Swizzle.SWIZZLE_W:
+         <% c = f.channels[s] %>
+         %if c.type == parser.UNSIGNED:
+            %if f.colorspace == 'srgb' and c.name in 'rgb':
+               <% assert c.size == 8 %>
+               dst[${i}] = util_format_srgb_8unorm_to_linear_float(${c.name});
+            %else:
+               dst[${i}] = _mesa_unorm_to_float(${c.name}, ${c.size});
+            %endif
+         %elif c.type == parser.SIGNED:
+            dst[${i}] = _mesa_snorm_to_float(${c.name}, ${c.size});
+         %elif c.type == parser.FLOAT:
+            %if c.size == 32:
+               dst[${i}] = ${c.name};
+            %elif c.size == 16:
+               dst[${i}] = _mesa_half_to_float(${c.name});
+            %else:
+               <% assert False %>
+            %endif
+         %else:
+            <% assert False %>
+         %endif
+      %elif s == parser.Swizzle.SWIZZLE_ZERO:
+         dst[${i}] = 0.0f;
+      %elif s == parser.Swizzle.SWIZZLE_ONE:
+         dst[${i}] = 1.0f;
+      %else:
+         <% assert False %>
+      %endif
+   %endfor
+}
+%endfor
+
+static void
+unpack_float_r9g9b9e5_float(const void *src, GLfloat dst[4])
+{
+   rgb9e5_to_float3(*(const GLuint *)src, dst);
+   dst[3] = 1.0f;
+}
+
+static void
+unpack_float_r11g11b10_float(const void *src, GLfloat dst[4])
+{
+   r11g11b10f_to_float3(*(const GLuint *)src, dst);
+   dst[3] = 1.0f;
+}
+
+static void
+unpack_float_ycbcr(const void *src, GLfloat dst[][4], GLuint n)
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      const GLushort *src0 = ((const GLushort *) src) + i * 2; /* even */
+      const GLushort *src1 = src0 + 1;         /* odd */
+      const GLubyte y0 = (*src0 >> 8) & 0xff;  /* luminance */
+      const GLubyte cb = *src0 & 0xff;         /* chroma U */
+      const GLubyte y1 = (*src1 >> 8) & 0xff;  /* luminance */
+      const GLubyte cr = *src1 & 0xff;         /* chroma V */
+      const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
+      GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
+      GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
+      GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
+      r *= (1.0F / 255.0F);
+      g *= (1.0F / 255.0F);
+      b *= (1.0F / 255.0F);
+      dst[i][0] = CLAMP(r, 0.0F, 1.0F);
+      dst[i][1] = CLAMP(g, 0.0F, 1.0F);
+      dst[i][2] = CLAMP(b, 0.0F, 1.0F);
+      dst[i][3] = 1.0F;
+   }
+}
+
+static void
+unpack_float_ycbcr_rev(const void *src, GLfloat dst[][4], GLuint n)
+{
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      const GLushort *src0 = ((const GLushort *) src) + i * 2; /* even */
+      const GLushort *src1 = src0 + 1;         /* odd */
+      const GLubyte y0 = *src0 & 0xff;         /* luminance */
+      const GLubyte cr = (*src0 >> 8) & 0xff;  /* chroma V */
+      const GLubyte y1 = *src1 & 0xff;         /* luminance */
+      const GLubyte cb = (*src1 >> 8) & 0xff;  /* chroma U */
+      const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
+      GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
+      GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
+      GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
+      r *= (1.0F / 255.0F);
+      g *= (1.0F / 255.0F);
+      b *= (1.0F / 255.0F);
+      dst[i][0] = CLAMP(r, 0.0F, 1.0F);
+      dst[i][1] = CLAMP(g, 0.0F, 1.0F);
+      dst[i][2] = CLAMP(b, 0.0F, 1.0F);
+      dst[i][3] = 1.0F;
+   }
+}
+
+/* ubyte packing functions */
+
+%for f in rgb_formats:
+   %if not f.is_normalized():
+      <% continue %>
+   %endif
+
+static inline void
+unpack_ubyte_${f.short_name()}(const void *void_src, GLubyte dst[4])
+{
+   ${f.datatype()} *src = (${f.datatype()} *)void_src;
+   %if f.layout == parser.PACKED:
+      %for c in f.channels:
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = UNPACK(*src, ${c.shift}, ${c.size});
+         %endif
+      %endfor
+   %elif f.layout == parser.ARRAY:
+      %for (i, c) in enumerate(f.channels):
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = src[${i}];
+         %endif
+      %endfor
+   %else:
+      <% assert False %>
+   %endif
+
+   %for i in range(4):
+      <% s = f.swizzle[i] %>
+      %if 0 <= s and s <= parser.Swizzle.SWIZZLE_W:
+         <% c = f.channels[s] %>
+         %if c.type == parser.UNSIGNED:
+            %if f.colorspace == 'srgb' and c.name in 'rgb':
+               <% assert c.size == 8 %>
+               dst[${i}] = util_format_srgb_to_linear_8unorm(${c.name});
+            %else:
+               dst[${i}] = _mesa_unorm_to_unorm(${c.name}, ${c.size}, 8);
+            %endif
+         %elif c.type == parser.SIGNED:
+            dst[${i}] = _mesa_snorm_to_unorm(${c.name}, ${c.size}, 8);
+         %elif c.type == parser.FLOAT:
+            %if c.size == 32:
+               dst[${i}] = _mesa_float_to_unorm(${c.name}, 8);
+            %elif c.size == 16:
+               dst[${i}] = _mesa_half_to_unorm(${c.name}, 8);
+            %else:
+               <% assert False %>
+            %endif
+         %else:
+            <% assert False %>
+         %endif
+      %elif s == parser.Swizzle.SWIZZLE_ZERO:
+         dst[${i}] = 0;
+      %elif s == parser.Swizzle.SWIZZLE_ONE:
+         dst[${i}] = 255;
+      %else:
+         <% assert False %>
+      %endif
+   %endfor
+}
+%endfor
+
+/* integer packing functions */
+
+%for f in rgb_formats:
+   %if not f.is_int():
+      <% continue %>
+   %elif f.is_normalized():
+      <% continue %>
+   %endif
+
+static inline void
+unpack_int_${f.short_name()}(const void *void_src, GLuint dst[4])
+{
+   ${f.datatype()} *src = (${f.datatype()} *)void_src;
+   %if f.layout == parser.PACKED:
+      %for c in f.channels:
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = UNPACK(*src, ${c.shift}, ${c.size});
+         %endif
+      %endfor
+   %elif f.layout == parser.ARRAY:
+      %for (i, c) in enumerate(f.channels):
+         %if c.type != 'x':
+            ${c.datatype()} ${c.name} = src[${i}];
+         %endif
+      %endfor
+   %else:
+      <% assert False %>
+   %endif
+
+   %for i in range(4):
+      <% s = f.swizzle[i] %>
+      %if 0 <= s and s <= parser.Swizzle.SWIZZLE_W:
+         dst[${i}] = ${f.channels[s].name};
+      %elif s == parser.Swizzle.SWIZZLE_ZERO:
+         dst[${i}] = 0;
+      %elif s == parser.Swizzle.SWIZZLE_ONE:
+         dst[${i}] = 1;
+      %else:
+         <% assert False %>
+      %endif
+   %endfor
+}
+%endfor
+
+
+void
+_mesa_unpack_rgba_row(mesa_format format, GLuint n,
+                      const void *src, GLfloat dst[][4])
+{
+   GLubyte *s = (GLubyte *)src;
+   GLuint i;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if f.is_compressed():
+      <% continue %>
+   %elif f.is_int() and not f.is_normalized():
+      <% continue %>
+   %endif
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         unpack_float_${f.short_name()}(s, dst[i]);
+         s += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   case MESA_FORMAT_YCBCR:
+      unpack_float_ycbcr(src, dst, n);
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      unpack_float_ycbcr_rev(src, dst, n);
+      break;
+   default:
+      _mesa_problem(NULL, "%s: bad format %s", __FUNCTION__,
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+void
+_mesa_unpack_ubyte_rgba_row(mesa_format format, GLuint n,
+                            const void *src, GLubyte dst[][4])
+{
+   GLubyte *s = (GLubyte *)src;
+   GLuint i;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if not f.is_normalized():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         unpack_ubyte_${f.short_name()}(s, dst[i]);
+         s += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   default:
+      /* get float values, convert to ubyte */
+      {
+         GLfloat *tmp = malloc(n * 4 * sizeof(GLfloat));
+         if (tmp) {
+            GLuint i;
+            _mesa_unpack_rgba_row(format, n, src, (GLfloat (*)[4]) tmp);
+            for (i = 0; i < n; i++) {
+               dst[i][0] = _mesa_float_to_unorm(tmp[i*4+0], 8);
+               dst[i][1] = _mesa_float_to_unorm(tmp[i*4+1], 8);
+               dst[i][2] = _mesa_float_to_unorm(tmp[i*4+2], 8);
+               dst[i][3] = _mesa_float_to_unorm(tmp[i*4+3], 8);
+            }
+            free(tmp);
+         }
+      }
+      break;
+   }
+}
+
+void
+_mesa_unpack_uint_rgba_row(mesa_format format, GLuint n,
+                           const void *src, GLuint dst[][4])
+{
+   GLubyte *s = (GLubyte *)src;
+   GLuint i;
+
+   switch (format) {
+%for f in rgb_formats:
+   %if not f.is_int():
+      <% continue %>
+   %elif f.is_normalized():
+      <% continue %>
+   %endif
+
+   case ${f.name}:
+      for (i = 0; i < n; ++i) {
+         unpack_int_${f.short_name()}(s, dst[i]);
+         s += ${f.block_size() / 8};
+      }
+      break;
+%endfor
+   default:
+      _mesa_problem(NULL, "%s: bad format %s", __FUNCTION__,
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+/**
+ * Unpack a 2D rect of pixels returning float RGBA colors.
+ * \param format  the source image format
+ * \param src  start address of the source image
+ * \param srcRowStride  source image row stride in bytes
+ * \param dst  start address of the dest image
+ * \param dstRowStride  dest image row stride in bytes
+ * \param x  source image start X pos
+ * \param y  source image start Y pos
+ * \param width  width of rect region to convert
+ * \param height  height of rect region to convert
+ */
+void
+_mesa_unpack_rgba_block(mesa_format format,
+                        const void *src, GLint srcRowStride,
+                        GLfloat dst[][4], GLint dstRowStride,
+                        GLuint x, GLuint y, GLuint width, GLuint height)
+{
+   const GLuint srcPixStride = _mesa_get_format_bytes(format);
+   const GLuint dstPixStride = 4 * sizeof(GLfloat);
+   const GLubyte *srcRow;
+   GLubyte *dstRow;
+   GLuint i;
+
+   /* XXX needs to be fixed for compressed formats */
+
+   srcRow = ((const GLubyte *) src) + srcRowStride * y + srcPixStride * x;
+   dstRow = ((GLubyte *) dst) + dstRowStride * y + dstPixStride * x;
+
+   for (i = 0; i < height; i++) {
+      _mesa_unpack_rgba_row(format, width, srcRow, (GLfloat (*)[4]) dstRow);
+
+      dstRow += dstRowStride;
+      srcRow += srcRowStride;
+   }
+}
+
+/** Helper struct for MESA_FORMAT_Z32_FLOAT_S8X24_UINT */
+struct z32f_x24s8
+{
+   float z;
+   uint32_t x24s8;
+};
+
+typedef void (*unpack_float_z_func)(GLuint n, const void *src, GLfloat *dst);
+
+static void
+unpack_float_z_X8_UINT_Z24_UNORM(GLuint n, const void *src, GLfloat *dst)
+{
+   /* only return Z, not stencil data */
+   const GLuint *s = ((const GLuint *) src);
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = (GLfloat) ((s[i] >> 8) * scale);
+      ASSERT(dst[i] >= 0.0F);
+      ASSERT(dst[i] <= 1.0F);
+   }
+}
+
+static void
+unpack_float_z_Z24_UNORM_X8_UINT(GLuint n, const void *src, GLfloat *dst)
+{
+   /* only return Z, not stencil data */
+   const GLuint *s = ((const GLuint *) src);
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = (GLfloat) ((s[i] & 0x00ffffff) * scale);
+      ASSERT(dst[i] >= 0.0F);
+      ASSERT(dst[i] <= 1.0F);
+   }
+}
+
+static void
+unpack_float_Z_UNORM16(GLuint n, const void *src, GLfloat *dst)
+{
+   const GLushort *s = ((const GLushort *) src);
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = s[i] * (1.0F / 65535.0F);
+   }
+}
+
+static void
+unpack_float_Z_UNORM32(GLuint n, const void *src, GLfloat *dst)
+{
+   const GLuint *s = ((const GLuint *) src);
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = s[i] * (1.0F / 0xffffffff);
+   }
+}
+
+static void
+unpack_float_Z_FLOAT32(GLuint n, const void *src, GLfloat *dst)
+{
+   memcpy(dst, src, n * sizeof(float));
+}
+
+static void
+unpack_float_z_Z32X24S8(GLuint n, const void *src, GLfloat *dst)
+{
+   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = s[i].z;
+   }
+}
+
+
+
+/**
+ * Unpack Z values.
+ * The returned values will always be in the range [0.0, 1.0].
+ */
+void
+_mesa_unpack_float_z_row(mesa_format format, GLuint n,
+                         const void *src, GLfloat *dst)
+{
+   unpack_float_z_func unpack;
+
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      unpack = unpack_float_z_X8_UINT_Z24_UNORM;
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      unpack = unpack_float_z_Z24_UNORM_X8_UINT;
+      break;
+   case MESA_FORMAT_Z_UNORM16:
+      unpack = unpack_float_Z_UNORM16;
+      break;
+   case MESA_FORMAT_Z_UNORM32:
+      unpack = unpack_float_Z_UNORM32;
+      break;
+   case MESA_FORMAT_Z_FLOAT32:
+      unpack = unpack_float_Z_FLOAT32;
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack = unpack_float_z_Z32X24S8;
+      break;
+   default:
+      _mesa_problem(NULL, "bad format %s in _mesa_unpack_float_z_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+
+   unpack(n, src, dst);
+}
+
+
+
+typedef void (*unpack_uint_z_func)(const void *src, GLuint *dst, GLuint n);
+
+static void
+unpack_uint_z_X8_UINT_Z24_UNORM(const void *src, GLuint *dst, GLuint n)
+{
+   /* only return Z, not stencil data */
+   const GLuint *s = ((const GLuint *) src);
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = (s[i] & 0xffffff00) | (s[i] >> 24);
+   }
+}
+
+static void
+unpack_uint_z_Z24_UNORM_X8_UINT(const void *src, GLuint *dst, GLuint n)
+{
+   /* only return Z, not stencil data */
+   const GLuint *s = ((const GLuint *) src);
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = (s[i] << 8) | ((s[i] >> 16) & 0xff);
+   }
+}
+
+static void
+unpack_uint_Z_UNORM16(const void *src, GLuint *dst, GLuint n)
+{
+   const GLushort *s = ((const GLushort *)src);
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = (s[i] << 16) | s[i];
+   }
+}
+
+static void
+unpack_uint_Z_UNORM32(const void *src, GLuint *dst, GLuint n)
+{
+   memcpy(dst, src, n * sizeof(GLuint));
+}
+
+static void
+unpack_uint_Z_FLOAT32(const void *src, GLuint *dst, GLuint n)
+{
+   const float *s = (const float *)src;
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dst[i] = FLOAT_TO_UINT(CLAMP(s[i], 0.0F, 1.0F));
+   }
+}
+
+static void
+unpack_uint_Z_FLOAT32_X24S8(const void *src, GLuint *dst, GLuint n)
+{
+   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
+   GLuint i;
+
+   for (i = 0; i < n; i++) {
+      dst[i] = FLOAT_TO_UINT(CLAMP(s[i].z, 0.0F, 1.0F));
+   }
+}
+
+
+/**
+ * Unpack Z values.
+ * The returned values will always be in the range [0, 0xffffffff].
+ */
+void
+_mesa_unpack_uint_z_row(mesa_format format, GLuint n,
+                        const void *src, GLuint *dst)
+{
+   unpack_uint_z_func unpack;
+   const GLubyte *srcPtr = (GLubyte *) src;
+
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+   case MESA_FORMAT_X8_UINT_Z24_UNORM:
+      unpack = unpack_uint_z_X8_UINT_Z24_UNORM;
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+   case MESA_FORMAT_Z24_UNORM_X8_UINT:
+      unpack = unpack_uint_z_Z24_UNORM_X8_UINT;
+      break;
+   case MESA_FORMAT_Z_UNORM16:
+      unpack = unpack_uint_Z_UNORM16;
+      break;
+   case MESA_FORMAT_Z_UNORM32:
+      unpack = unpack_uint_Z_UNORM32;
+      break;
+   case MESA_FORMAT_Z_FLOAT32:
+      unpack = unpack_uint_Z_FLOAT32;
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack = unpack_uint_Z_FLOAT32_X24S8;
+      break;
+   default:
+      _mesa_problem(NULL, "bad format %s in _mesa_unpack_uint_z_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+
+   unpack(srcPtr, dst, n);
+}
+
+
+static void
+unpack_ubyte_s_S_UINT8(const void *src, GLubyte *dst, GLuint n)
+{
+   memcpy(dst, src, n);
+}
+
+static void
+unpack_ubyte_s_S8_UINT_Z24_UNORM(const void *src, GLubyte *dst, GLuint n)
+{
+   GLuint i;
+   const GLuint *src32 = src;
+
+   for (i = 0; i < n; i++)
+      dst[i] = src32[i] & 0xff;
+}
+
+static void
+unpack_ubyte_s_Z24_UNORM_S8_UINT(const void *src, GLubyte *dst, GLuint n)
+{
+   GLuint i;
+   const GLuint *src32 = src;
+
+   for (i = 0; i < n; i++)
+      dst[i] = src32[i] >> 24;
+}
+
+static void
+unpack_ubyte_s_Z32_FLOAT_S8X24_UINT(const void *src, GLubyte *dst, GLuint n)
+{
+   GLuint i;
+   const struct z32f_x24s8 *s = (const struct z32f_x24s8 *) src;
+
+   for (i = 0; i < n; i++)
+      dst[i] = s[i].x24s8 & 0xff;
+}
+
+void
+_mesa_unpack_ubyte_stencil_row(mesa_format format, GLuint n,
+			       const void *src, GLubyte *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S_UINT8:
+      unpack_ubyte_s_S_UINT8(src, dst, n);
+      break;
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      unpack_ubyte_s_S8_UINT_Z24_UNORM(src, dst, n);
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      unpack_ubyte_s_Z24_UNORM_S8_UINT(src, dst, n);
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack_ubyte_s_Z32_FLOAT_S8X24_UINT(src, dst, n);
+      break;
+   default:
+      _mesa_problem(NULL, "bad format %s in _mesa_unpack_ubyte_s_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+static void
+unpack_uint_24_8_depth_stencil_Z24_UNORM_S8_UINT(const GLuint *src, GLuint *dst, GLuint n)
+{
+   GLuint i;
+
+   for (i = 0; i < n; i++) {
+      GLuint val = src[i];
+      dst[i] = val >> 24 | val << 8;
+   }
+}
+
+static void
+unpack_uint_24_8_depth_stencil_Z32_S8X24(const GLuint *src,
+                                         GLuint *dst, GLuint n)
+{
+   GLuint i;
+
+   for (i = 0; i < n; i++) {
+      /* 8 bytes per pixel (float + uint32) */
+      GLfloat zf = ((GLfloat *) src)[i * 2 + 0];
+      GLuint z24 = (GLuint) (zf * (GLfloat) 0xffffff);
+      GLuint s = src[i * 2 + 1] & 0xff;
+      dst[i] = (z24 << 8) | s;
+   }
+}
+
+static void
+unpack_uint_24_8_depth_stencil_S8_UINT_Z24_UNORM(const GLuint *src, GLuint *dst, GLuint n)
+{
+   memcpy(dst, src, n * 4);
+}
+
+/**
+ * Unpack depth/stencil returning as GL_UNSIGNED_INT_24_8.
+ * \param format  the source data format
+ */
+void
+_mesa_unpack_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
+					 const void *src, GLuint *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      unpack_uint_24_8_depth_stencil_S8_UINT_Z24_UNORM(src, dst, n);
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      unpack_uint_24_8_depth_stencil_Z24_UNORM_S8_UINT(src, dst, n);
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack_uint_24_8_depth_stencil_Z32_S8X24(src, dst, n);
+      break;
+   default:
+      _mesa_problem(NULL,
+                    "bad format %s in _mesa_unpack_uint_24_8_depth_stencil_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+static void
+unpack_float_32_uint_24_8_Z24_UNORM_S8_UINT(const GLuint *src,
+                                            GLuint *dst, GLuint n)
+{
+   GLuint i;
+   struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
+
+   for (i = 0; i < n; i++) {
+      const GLuint z24 = src[i] & 0xffffff;
+      d[i].z = z24 * scale;
+      d[i].x24s8 = src[i] >> 24;
+      assert(d[i].z >= 0.0f);
+      assert(d[i].z <= 1.0f);
+   }
+}
+
+static void
+unpack_float_32_uint_24_8_Z32_FLOAT_S8X24_UINT(const GLuint *src,
+                                               GLuint *dst, GLuint n)
+{
+   memcpy(dst, src, n * sizeof(struct z32f_x24s8));
+}
+
+static void
+unpack_float_32_uint_24_8_S8_UINT_Z24_UNORM(const GLuint *src,
+                                            GLuint *dst, GLuint n)
+{
+   GLuint i;
+   struct z32f_x24s8 *d = (struct z32f_x24s8 *) dst;
+   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
+
+   for (i = 0; i < n; i++) {
+      const GLuint z24 = src[i] >> 8;
+      d[i].z = z24 * scale;
+      d[i].x24s8 = src[i] & 0xff;
+      assert(d[i].z >= 0.0f);
+      assert(d[i].z <= 1.0f);
+   }
+}
+
+/**
+ * Unpack depth/stencil returning as GL_FLOAT_32_UNSIGNED_INT_24_8_REV.
+ * \param format  the source data format
+ *
+ * In GL_FLOAT_32_UNSIGNED_INT_24_8_REV lower 4 bytes contain float
+ * component and higher 4 bytes contain packed 24-bit and 8-bit
+ * components.
+ *
+ *    31 30 29 28 ... 4 3 2 1 0    31 30 29 ... 9 8 7 6 5 ... 2 1 0
+ *    +-------------------------+  +--------------------------------+
+ *    |    Float Component      |  | Unused         | 8 bit stencil |
+ *    +-------------------------+  +--------------------------------+
+ *          lower 4 bytes                  higher 4 bytes
+ */
+void
+_mesa_unpack_float_32_uint_24_8_depth_stencil_row(mesa_format format, GLuint n,
+			                          const void *src, GLuint *dst)
+{
+   switch (format) {
+   case MESA_FORMAT_S8_UINT_Z24_UNORM:
+      unpack_float_32_uint_24_8_S8_UINT_Z24_UNORM(src, dst, n);
+      break;
+   case MESA_FORMAT_Z24_UNORM_S8_UINT:
+      unpack_float_32_uint_24_8_Z24_UNORM_S8_UINT(src, dst, n);
+      break;
+   case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
+      unpack_float_32_uint_24_8_Z32_FLOAT_S8X24_UINT(src, dst, n);
+      break;
+   default:
+      _mesa_problem(NULL,
+                    "bad format %s in _mesa_unpack_uint_24_8_depth_stencil_row",
+                    _mesa_get_format_name(format));
+      return;
+   }
+}
+
+/**
+ * Unpack depth/stencil
+ * \param format  the source data format
+ * \param type the destination data type
+ */
+void
+_mesa_unpack_depth_stencil_row(mesa_format format, GLuint n,
+	                       const void *src, GLenum type,
+                               GLuint *dst)
+{
+   assert(type == GL_UNSIGNED_INT_24_8 ||
+          type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
+
+   switch (type) {
+   case GL_UNSIGNED_INT_24_8:
+      _mesa_unpack_uint_24_8_depth_stencil_row(format, n, src, dst);
+      break;
+   case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+      _mesa_unpack_float_32_uint_24_8_depth_stencil_row(format, n, src, dst);
+      break;
+   default:
+      _mesa_problem(NULL,
+                    "bad type 0x%x in _mesa_unpack_depth_stencil_row",
+                    type);
+      return;
+   }
+}
+"""
+
+template = Template(string);
+
+print template.render(argv = argv[0:])
diff --git a/mesalib/src/mesa/main/format_utils.c b/mesalib/src/mesa/main/format_utils.c
index 93a0ceac0..810bb1634 100644
--- a/mesalib/src/mesa/main/format_utils.c
+++ b/mesalib/src/mesa/main/format_utils.c
@@ -24,6 +24,549 @@
 
 #include "format_utils.h"
 #include "glformats.h"
+#include "format_pack.h"
+#include "format_unpack.h"
+
+const mesa_array_format RGBA32_FLOAT =
+   MESA_ARRAY_FORMAT(4, 1, 1, 1, 4, 0, 1, 2, 3);
+
+const mesa_array_format RGBA8_UBYTE =
+   MESA_ARRAY_FORMAT(1, 0, 0, 1, 4, 0, 1, 2, 3);
+
+const mesa_array_format RGBA32_UINT =
+   MESA_ARRAY_FORMAT(4, 0, 0, 0, 4, 0, 1, 2, 3);
+
+const mesa_array_format RGBA32_INT =
+   MESA_ARRAY_FORMAT(4, 1, 0, 0, 4, 0, 1, 2, 3);
+
+static void
+invert_swizzle(uint8_t dst[4], const uint8_t src[4])
+{
+   int i, j;
+
+   dst[0] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[1] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[2] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[3] = MESA_FORMAT_SWIZZLE_NONE;
+
+   for (i = 0; i < 4; ++i)
+      for (j = 0; j < 4; ++j)
+         if (src[j] == i && dst[i] == MESA_FORMAT_SWIZZLE_NONE)
+            dst[i] = j;
+}
+
+/* Takes a src to RGBA swizzle and applies a rebase swizzle to it. This
+ * is used when we need to rebase a format to match a different
+ * base internal format.
+ *
+ * The rebase swizzle can be NULL, which means that no rebase is necessary,
+ * in which case the src to RGBA swizzle is copied to the output without
+ * changes.
+ *
+ * The resulting rebased swizzle and well as the input swizzles are
+ * all 4-element swizzles, but the rebase swizzle can be NULL if no rebase
+ * is necessary.
+ */
+static void
+compute_rebased_rgba_component_mapping(uint8_t *src2rgba,
+                                       uint8_t *rebase_swizzle,
+                                       uint8_t *rebased_src2rgba)
+{
+   int i;
+
+   if (rebase_swizzle) {
+      for (i = 0; i < 4; i++) {
+         if (rebase_swizzle[i] > MESA_FORMAT_SWIZZLE_W)
+            rebased_src2rgba[i] = rebase_swizzle[i];
+         else
+            rebased_src2rgba[i] = src2rgba[rebase_swizzle[i]];
+      }
+   } else {
+      /* No rebase needed, so src2rgba is all that we need */
+      memcpy(rebased_src2rgba, src2rgba, 4 * sizeof(uint8_t));
+   }
+}
+
+/* Computes the final swizzle transform to apply from src to dst in a
+ * conversion that might involve a rebase swizzle.
+ *
+ * This is used to compute the swizzle transform to apply in conversions
+ * between array formats where we have a src2rgba swizzle, a rgba2dst swizzle
+ * and possibly, a rebase swizzle.
+ *
+ * The final swizzle transform to apply (src2dst) when a rebase swizzle is
+ * involved is: src -> rgba -> base -> rgba -> dst
+ */
+static void
+compute_src2dst_component_mapping(uint8_t *src2rgba, uint8_t *rgba2dst,
+                                  uint8_t *rebase_swizzle, uint8_t *src2dst)
+{
+   int i;
+
+   if (!rebase_swizzle) {
+      for (i = 0; i < 4; i++) {
+         if (rgba2dst[i] > MESA_FORMAT_SWIZZLE_W) {
+            src2dst[i] = rgba2dst[i];
+         } else {
+            src2dst[i] = src2rgba[rgba2dst[i]];
+         }
+      }
+   } else {
+      for (i = 0; i < 4; i++) {
+         if (rgba2dst[i] > MESA_FORMAT_SWIZZLE_W) {
+            src2dst[i] = rgba2dst[i];
+         } else if (rebase_swizzle[rgba2dst[i]] > MESA_FORMAT_SWIZZLE_W) {
+            src2dst[i] = rebase_swizzle[rgba2dst[i]];
+         } else {
+            src2dst[i] = src2rgba[rebase_swizzle[rgba2dst[i]]];
+         }
+      }
+   }
+}
+
+/**
+ * This function is used by clients of _mesa_format_convert to obtain
+ * the rebase swizzle to use in a format conversion based on the base
+ * format involved.
+ *
+ * \param baseFormat  the base internal format involved in the conversion.
+ * \param map  the rebase swizzle to consider
+ *
+ * This function computes 'map' as rgba -> baseformat -> rgba and returns true
+ * if the resulting swizzle transform is not the identity transform (thus, a
+ * rebase is needed). If the function returns false then a rebase swizzle
+ * is not necessary and the value of 'map' is undefined. In this situation
+ * clients of _mesa_format_convert should pass NULL in the 'rebase_swizzle'
+ * parameter.
+ */
+bool
+_mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map)
+{
+   uint8_t rgba2base[6], base2rgba[6];
+   int i;
+
+   switch (baseFormat) {
+   case GL_ALPHA:
+   case GL_RED:
+   case GL_GREEN:
+   case GL_BLUE:
+   case GL_RG:
+   case GL_RGB:
+   case GL_BGR:
+   case GL_RGBA:
+   case GL_BGRA:
+   case GL_ABGR_EXT:
+   case GL_LUMINANCE:
+   case GL_INTENSITY:
+   case GL_LUMINANCE_ALPHA:
+      {
+         bool needRebase = false;
+         _mesa_compute_component_mapping(GL_RGBA, baseFormat, rgba2base);
+         _mesa_compute_component_mapping(baseFormat, GL_RGBA, base2rgba);
+         for (i = 0; i < 4; i++) {
+            if (base2rgba[i] > MESA_FORMAT_SWIZZLE_W) {
+               map[i] = base2rgba[i];
+            } else {
+               map[i] = rgba2base[base2rgba[i]];
+            }
+            if (map[i] != i)
+               needRebase = true;
+         }
+         return needRebase;
+      }
+   default:
+      unreachable("Unexpected base format");
+   }
+}
+
+/**
+ * This can be used to convert between most color formats.
+ *
+ * Limitations:
+ * - This function doesn't handle GL_COLOR_INDEX or YCBCR formats.
+ * - This function doesn't handle byte-swapping or transferOps, these should
+ *   be handled by the caller.
+ *
+ * \param void_dst  The address where converted color data will be stored.
+ *                  The caller must ensure that the buffer is large enough
+ *                  to hold the converted pixel data.
+ * \param dst_format  The destination color format. It can be a mesa_format
+ *                    or a mesa_array_format represented as an uint32_t.
+ * \param dst_stride  The stride of the destination format in bytes.
+ * \param void_src  The address of the source color data to convert.
+ * \param src_format  The source color format. It can be a mesa_format
+ *                    or a mesa_array_format represented as an uint32_t.
+ * \param src_stride  The stride of the source format in bytes.
+ * \param width  The width, in pixels, of the source image to convert.
+ * \param height  The height, in pixels, of the source image to convert.
+ * \param rebase_swizzle  A swizzle transform to apply during the conversion,
+ *                        typically used to match a different internal base
+ *                        format involved. NULL if no rebase transform is needed
+ *                        (i.e. the internal base format and the base format of
+ *                        the dst or the src -depending on whether we are doing
+ *                        an upload or a download respectively- are the same).
+ */
+void
+_mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
+                     void *void_src, uint32_t src_format, size_t src_stride,
+                     size_t width, size_t height, uint8_t *rebase_swizzle)
+{
+   uint8_t *dst = (uint8_t *)void_dst;
+   uint8_t *src = (uint8_t *)void_src;
+   mesa_array_format src_array_format, dst_array_format;
+   bool src_format_is_mesa_array_format, dst_format_is_mesa_array_format;
+   uint8_t src2dst[4], src2rgba[4], rgba2dst[4], dst2rgba[4];
+   uint8_t rebased_src2rgba[4];
+   enum mesa_array_format_datatype src_type = 0, dst_type = 0, common_type;
+   bool normalized, dst_integer, src_integer, is_signed;
+   int src_num_channels = 0, dst_num_channels = 0;
+   uint8_t (*tmp_ubyte)[4];
+   float (*tmp_float)[4];
+   uint32_t (*tmp_uint)[4];
+   int bits;
+   size_t row;
+
+   if (_mesa_format_is_mesa_array_format(src_format)) {
+      src_format_is_mesa_array_format = true;
+      src_array_format = src_format;
+   } else {
+      assert(_mesa_is_format_color_format(src_format));
+      src_format_is_mesa_array_format = false;
+      src_array_format = _mesa_format_to_array_format(src_format);
+   }
+
+   if (_mesa_format_is_mesa_array_format(dst_format)) {
+      dst_format_is_mesa_array_format = true;
+      dst_array_format = dst_format;
+   } else {
+      assert(_mesa_is_format_color_format(dst_format));
+      dst_format_is_mesa_array_format = false;
+      dst_array_format = _mesa_format_to_array_format(dst_format);
+   }
+
+   /* First we see if we can implement the conversion with a direct pack
+    * or unpack.
+    *
+    * In this case we want to be careful when we need to apply a swizzle to
+    * match an internal base format, since in these cases a simple pack/unpack
+    * to the dst format from the src format may not match the requirements
+    * of the internal base format. For now we decide to be safe and
+    * avoid this path in these scenarios but in the future we may want to
+    * enable it for specific combinations that are known to work.
+    */
+   if (!rebase_swizzle) {
+      /* Handle the cases where we can directly unpack */
+      if (!src_format_is_mesa_array_format) {
+         if (dst_array_format == RGBA32_FLOAT) {
+            for (row = 0; row < height; ++row) {
+               _mesa_unpack_rgba_row(src_format, width,
+                                     src, (float (*)[4])dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         } else if (dst_array_format == RGBA8_UBYTE) {
+            assert(!_mesa_is_format_integer_color(src_format));
+            for (row = 0; row < height; ++row) {
+               _mesa_unpack_ubyte_rgba_row(src_format, width,
+                                           src, (uint8_t (*)[4])dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         } else if (dst_array_format == RGBA32_UINT &&
+                    _mesa_is_format_unsigned(src_format)) {
+            assert(_mesa_is_format_integer_color(src_format));
+            for (row = 0; row < height; ++row) {
+               _mesa_unpack_uint_rgba_row(src_format, width,
+                                          src, (uint32_t (*)[4])dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         }
+      }
+
+      /* Handle the cases where we can directly pack */
+      if (!dst_format_is_mesa_array_format) {
+         if (src_array_format == RGBA32_FLOAT) {
+            for (row = 0; row < height; ++row) {
+               _mesa_pack_float_rgba_row(dst_format, width,
+                                         (const float (*)[4])src, dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         } else if (src_array_format == RGBA8_UBYTE) {
+            assert(!_mesa_is_format_integer_color(dst_format));
+            for (row = 0; row < height; ++row) {
+               _mesa_pack_ubyte_rgba_row(dst_format, width,
+                                         (const uint8_t (*)[4])src, dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         } else if (src_array_format == RGBA32_UINT &&
+                    _mesa_is_format_unsigned(dst_format)) {
+            assert(_mesa_is_format_integer_color(dst_format));
+            for (row = 0; row < height; ++row) {
+               _mesa_pack_uint_rgba_row(dst_format, width,
+                                        (const uint32_t (*)[4])src, dst);
+               src += src_stride;
+               dst += dst_stride;
+            }
+            return;
+         }
+      }
+   }
+
+   /* Handle conversions between array formats */
+   normalized = false;
+   if (src_array_format) {
+      src_type = _mesa_array_format_get_datatype(src_array_format);
+
+      src_num_channels = _mesa_array_format_get_num_channels(src_array_format);
+
+      _mesa_array_format_get_swizzle(src_array_format, src2rgba);
+
+      normalized = _mesa_array_format_is_normalized(src_array_format);
+   }
+
+   if (dst_array_format) {
+      dst_type = _mesa_array_format_get_datatype(dst_array_format);
+
+      dst_num_channels = _mesa_array_format_get_num_channels(dst_array_format);
+
+      _mesa_array_format_get_swizzle(dst_array_format, dst2rgba);
+      invert_swizzle(rgba2dst, dst2rgba);
+
+      normalized |= _mesa_array_format_is_normalized(dst_array_format);
+   }
+
+   if (src_array_format && dst_array_format) {
+      assert(_mesa_array_format_is_normalized(src_array_format) ==
+             _mesa_array_format_is_normalized(dst_array_format));
+
+      compute_src2dst_component_mapping(src2rgba, rgba2dst, rebase_swizzle,
+                                        src2dst);
+
+      for (row = 0; row < height; ++row) {
+         _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
+                                   src, src_type, src_num_channels,
+                                   src2dst, normalized, width);
+         src += src_stride;
+         dst += dst_stride;
+      }
+      return;
+   }
+
+   /* At this point, we're fresh out of fast-paths and we need to convert
+    * to float, uint32, or, if we're lucky, uint8.
+    */
+   dst_integer = false;
+   src_integer = false;
+
+   if (src_array_format) {
+      if (!_mesa_array_format_is_float(src_array_format) &&
+          !_mesa_array_format_is_normalized(src_array_format))
+         src_integer = true;
+   } else {
+      switch (_mesa_get_format_datatype(src_format)) {
+      case GL_UNSIGNED_INT:
+      case GL_INT:
+         src_integer = true;
+         break;
+      }
+   }
+
+   /* If the destination format is signed but the source is unsigned, then we
+    * don't loose any data by converting to a signed intermediate format above
+    * and beyond the precision that we loose in the conversion itself. If the
+    * destination is unsigned then, by using an unsigned intermediate format,
+    * we make the conversion function that converts from the source to the
+    * intermediate format take care of truncating at zero. The exception here
+    * is if the intermediate format is float, in which case the first
+    * conversion will leave it signed and the second conversion will truncate
+    * at zero.
+    */
+   is_signed = false;
+   if (dst_array_format) {
+      if (!_mesa_array_format_is_float(dst_array_format) &&
+          !_mesa_array_format_is_normalized(dst_array_format))
+         dst_integer = true;
+      is_signed = _mesa_array_format_is_signed(dst_array_format);
+      bits = 8 * _mesa_array_format_get_type_size(dst_array_format);
+   } else {
+      switch (_mesa_get_format_datatype(dst_format)) {
+      case GL_UNSIGNED_NORMALIZED:
+         is_signed = false;
+         break;
+      case GL_SIGNED_NORMALIZED:
+         is_signed = true;
+         break;
+      case GL_FLOAT:
+         is_signed = true;
+         break;
+      case GL_UNSIGNED_INT:
+         is_signed = false;
+         dst_integer = true;
+         break;
+      case GL_INT:
+         is_signed = true;
+         dst_integer = true;
+         break;
+      }
+      bits = _mesa_get_format_max_bits(dst_format);
+   }
+
+   assert(src_integer == dst_integer);
+
+   if (src_integer && dst_integer) {
+      tmp_uint = malloc(width * height * sizeof(*tmp_uint));
+
+      /* The [un]packing functions for unsigned datatypes treat the 32-bit
+       * integer array as signed for signed formats and as unsigned for
+       * unsigned formats. This is a bit of a problem if we ever convert from
+       * a signed to an unsigned format because the unsigned packing function
+       * doesn't know that the input is signed and will treat it as unsigned
+       * and not do the trunctation. The thing that saves us here is that all
+       * of the packed formats are unsigned, so we can just always use
+       * _mesa_swizzle_and_convert for signed formats, which is aware of the
+       * truncation problem.
+       */
+      common_type = is_signed ? MESA_ARRAY_FORMAT_TYPE_INT :
+                                MESA_ARRAY_FORMAT_TYPE_UINT;
+      if (src_array_format) {
+         compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
+                                                rebased_src2rgba);
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 4,
+                                      src, src_type, src_num_channels,
+                                      rebased_src2rgba, normalized, width);
+            src += src_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_unpack_uint_rgba_row(src_format, width,
+                                       src, tmp_uint + row * width);
+            if (rebase_swizzle)
+               _mesa_swizzle_and_convert(tmp_uint + row * width, common_type, 4,
+                                         tmp_uint + row * width, common_type, 4,
+                                         rebase_swizzle, false, width);
+            src += src_stride;
+         }
+      }
+
+      /* At this point, we have already done the truncation if the source is
+       * signed but the destination is unsigned, so no need to force the
+       * _mesa_swizzle_and_convert path.
+       */
+      if (dst_format_is_mesa_array_format) {
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
+                                      tmp_uint + row * width, common_type, 4,
+                                      rgba2dst, normalized, width);
+            dst += dst_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_pack_uint_rgba_row(dst_format, width,
+                                     (const uint32_t (*)[4])tmp_uint + row * width, dst);
+            dst += dst_stride;
+         }
+      }
+
+      free(tmp_uint);
+   } else if (is_signed || bits > 8) {
+      tmp_float = malloc(width * height * sizeof(*tmp_float));
+
+      if (src_format_is_mesa_array_format) {
+         compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
+                                                rebased_src2rgba);
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(tmp_float + row * width,
+                                      MESA_ARRAY_FORMAT_TYPE_FLOAT, 4,
+                                      src, src_type, src_num_channels,
+                                      rebased_src2rgba, normalized, width);
+            src += src_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_unpack_rgba_row(src_format, width,
+                                  src, tmp_float + row * width);
+            if (rebase_swizzle)
+               _mesa_swizzle_and_convert(tmp_float + row * width,
+                                         MESA_ARRAY_FORMAT_TYPE_FLOAT, 4,
+                                         tmp_float + row * width,
+                                         MESA_ARRAY_FORMAT_TYPE_FLOAT, 4,
+                                         rebase_swizzle, normalized, width);
+            src += src_stride;
+         }
+      }
+
+      if (dst_format_is_mesa_array_format) {
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
+                                      tmp_float + row * width,
+                                      MESA_ARRAY_FORMAT_TYPE_FLOAT, 4,
+                                      rgba2dst, normalized, width);
+            dst += dst_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_pack_float_rgba_row(dst_format, width,
+                                      (const float (*)[4])tmp_float + row * width, dst);
+            dst += dst_stride;
+         }
+      }
+
+      free(tmp_float);
+   } else {
+      tmp_ubyte = malloc(width * height * sizeof(*tmp_ubyte));
+
+      if (src_format_is_mesa_array_format) {
+         compute_rebased_rgba_component_mapping(src2rgba, rebase_swizzle,
+                                                rebased_src2rgba);
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(tmp_ubyte + row * width,
+                                      MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                      src, src_type, src_num_channels,
+                                      rebased_src2rgba, normalized, width);
+            src += src_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_unpack_ubyte_rgba_row(src_format, width,
+                                        src, tmp_ubyte + row * width);
+            if (rebase_swizzle)
+               _mesa_swizzle_and_convert(tmp_ubyte + row * width,
+                                         MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                         tmp_ubyte + row * width,
+                                         MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                         rebase_swizzle, normalized, width);
+            src += src_stride;
+         }
+      }
+
+      if (dst_format_is_mesa_array_format) {
+         for (row = 0; row < height; ++row) {
+            _mesa_swizzle_and_convert(dst, dst_type, dst_num_channels,
+                                      tmp_ubyte + row * width,
+                                      MESA_ARRAY_FORMAT_TYPE_UBYTE, 4,
+                                      rgba2dst, normalized, width);
+            dst += dst_stride;
+         }
+      } else {
+         for (row = 0; row < height; ++row) {
+            _mesa_pack_ubyte_rgba_row(dst_format, width,
+                                      (const uint8_t (*)[4])tmp_ubyte + row * width, dst);
+            dst += dst_stride;
+         }
+      }
+
+      free(tmp_ubyte);
+   }
+}
 
 static const uint8_t map_identity[7] = { 0, 1, 2, 3, 4, 5, 6 };
 static const uint8_t map_3210[7] = { 3, 2, 1, 0, 4, 5, 6 };
@@ -132,131 +675,6 @@ _mesa_format_to_array(mesa_format format, GLenum *type, int *num_components,
    }
 }
 
-/* A bunch of format conversion macros and helper functions used below */
-
-/* Only guaranteed to work for BITS <= 32 */
-#define MAX_UINT(BITS) ((BITS) == 32 ? UINT32_MAX : ((1u << (BITS)) - 1))
-#define MAX_INT(BITS) ((int)MAX_UINT((BITS) - 1))
-
-/* Extends an integer of size SRC_BITS to one of size DST_BITS linearly */
-#define EXTEND_NORMALIZED_INT(X, SRC_BITS, DST_BITS) \
-      (((X) * (int)(MAX_UINT(DST_BITS) / MAX_UINT(SRC_BITS))) + \
-       ((DST_BITS % SRC_BITS) ? ((X) >> (SRC_BITS - DST_BITS % SRC_BITS)) : 0))
-
-static inline float
-unorm_to_float(unsigned x, unsigned src_bits)
-{
-   return x * (1.0f / (float)MAX_UINT(src_bits));
-}
-
-static inline float
-snorm_to_float(int x, unsigned src_bits)
-{
-   if (x == -MAX_INT(src_bits))
-      return -1.0f;
-   else
-      return x * (1.0f / (float)MAX_INT(src_bits));
-}
-
-static inline uint16_t
-unorm_to_half(unsigned x, unsigned src_bits)
-{
-   return _mesa_float_to_half(unorm_to_float(x, src_bits));
-}
-
-static inline uint16_t
-snorm_to_half(int x, unsigned src_bits)
-{
-   return _mesa_float_to_half(snorm_to_float(x, src_bits));
-}
-
-static inline unsigned
-float_to_unorm(float x, unsigned dst_bits)
-{
-   if (x < 0.0f)
-      return 0;
-   else if (x > 1.0f)
-      return MAX_UINT(dst_bits);
-   else
-      return F_TO_I(x * MAX_UINT(dst_bits));
-}
-
-static inline unsigned
-half_to_unorm(uint16_t x, unsigned dst_bits)
-{
-   return float_to_unorm(_mesa_half_to_float(x), dst_bits);
-}
-
-static inline unsigned
-unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits)
-{
-   if (src_bits < dst_bits)
-      return EXTEND_NORMALIZED_INT(x, src_bits, dst_bits);
-   else
-      return x >> (src_bits - dst_bits);
-}
-
-static inline unsigned
-snorm_to_unorm(int x, unsigned src_bits, unsigned dst_bits)
-{
-   if (x < 0)
-      return 0;
-   else
-      return unorm_to_unorm(x, src_bits - 1, dst_bits);
-}
-
-static inline int
-float_to_snorm(float x, unsigned dst_bits)
-{
-   if (x < -1.0f)
-      return -MAX_INT(dst_bits);
-   else if (x > 1.0f)
-      return MAX_INT(dst_bits);
-   else
-      return F_TO_I(x * MAX_INT(dst_bits));
-}
-
-static inline int
-half_to_snorm(uint16_t x, unsigned dst_bits)
-{
-   return float_to_snorm(_mesa_half_to_float(x), dst_bits);
-}
-
-static inline int
-unorm_to_snorm(unsigned x, unsigned src_bits, unsigned dst_bits)
-{
-   return unorm_to_unorm(x, src_bits, dst_bits - 1);
-}
-
-static inline int
-snorm_to_snorm(int x, unsigned src_bits, unsigned dst_bits)
-{
-   if (x < -MAX_INT(src_bits))
-      return -MAX_INT(dst_bits);
-   else if (src_bits < dst_bits)
-      return EXTEND_NORMALIZED_INT(x, src_bits - 1, dst_bits - 1);
-   else
-      return x >> (src_bits - dst_bits);
-}
-
-static inline unsigned
-float_to_uint(float x)
-{
-   if (x < 0.0f)
-      return 0;
-   else
-      return x;
-}
-
-static inline unsigned
-half_to_uint(uint16_t x)
-{
-   if (_mesa_half_is_negative(x))
-      return 0;
-   else
-      return _mesa_float_to_half(x);
-}
-
 /**
  * Attempts to perform the given swizzle-and-convert operation with memcpy
  *
@@ -270,8 +688,12 @@ half_to_uint(uint16_t x)
  *          operation with memcpy, false otherwise
  */
 static bool
-swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
-                           const void *src, GLenum src_type, int num_src_channels,
+swizzle_convert_try_memcpy(void *dst,
+                           enum mesa_array_format_datatype dst_type,
+                           int num_dst_channels,
+                           const void *src,
+                           enum mesa_array_format_datatype src_type,
+                           int num_src_channels,
                            const uint8_t swizzle[4], bool normalized, int count)
 {
    int i;
@@ -285,7 +707,8 @@ swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
       if (swizzle[i] != i && swizzle[i] != MESA_FORMAT_SWIZZLE_NONE)
          return false;
 
-   memcpy(dst, src, count * num_src_channels * _mesa_sizeof_type(src_type));
+   memcpy(dst, src, count * num_src_channels *
+          _mesa_array_format_datatype_get_size(src_type));
 
    return true;
 }
@@ -441,50 +864,50 @@ convert_float(void *void_dst, int num_dst_channels,
    const float one = 1.0f;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       SWIZZLE_CONVERT(float, float, src);
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       SWIZZLE_CONVERT(float, uint16_t, _mesa_half_to_float(src));
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(float, uint8_t, unorm_to_float(src, 8));
+         SWIZZLE_CONVERT(float, uint8_t, _mesa_unorm_to_float(src, 8));
       } else {
          SWIZZLE_CONVERT(float, uint8_t, src);
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(float, int8_t, snorm_to_float(src, 8));
+         SWIZZLE_CONVERT(float, int8_t, _mesa_snorm_to_float(src, 8));
       } else {
          SWIZZLE_CONVERT(float, int8_t, src);
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(float, uint16_t, unorm_to_float(src, 16));
+         SWIZZLE_CONVERT(float, uint16_t, _mesa_unorm_to_float(src, 16));
       } else {
          SWIZZLE_CONVERT(float, uint16_t, src);
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(float, int16_t, snorm_to_float(src, 16));
+         SWIZZLE_CONVERT(float, int16_t, _mesa_snorm_to_float(src, 16));
       } else {
          SWIZZLE_CONVERT(float, int16_t, src);
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(float, uint32_t, unorm_to_float(src, 32));
+         SWIZZLE_CONVERT(float, uint32_t, _mesa_unorm_to_float(src, 32));
       } else {
          SWIZZLE_CONVERT(float, uint32_t, src);
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(float, int32_t, snorm_to_float(src, 32));
+         SWIZZLE_CONVERT(float, int32_t, _mesa_snorm_to_float(src, 32));
       } else {
          SWIZZLE_CONVERT(float, int32_t, src);
       }
@@ -503,50 +926,50 @@ convert_half_float(void *void_dst, int num_dst_channels,
    const uint16_t one = _mesa_float_to_half(1.0f);
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       SWIZZLE_CONVERT(uint16_t, float, _mesa_float_to_half(src));
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       SWIZZLE_CONVERT(uint16_t, uint16_t, src);
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint8_t, unorm_to_half(src, 8));
+         SWIZZLE_CONVERT(uint16_t, uint8_t, _mesa_unorm_to_half(src, 8));
       } else {
          SWIZZLE_CONVERT(uint16_t, uint8_t, _mesa_float_to_half(src));
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int8_t, snorm_to_half(src, 8));
+         SWIZZLE_CONVERT(uint16_t, int8_t, _mesa_snorm_to_half(src, 8));
       } else {
          SWIZZLE_CONVERT(uint16_t, int8_t, _mesa_float_to_half(src));
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint16_t, unorm_to_half(src, 16));
+         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_unorm_to_half(src, 16));
       } else {
          SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_float_to_half(src));
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int16_t, snorm_to_half(src, 16));
+         SWIZZLE_CONVERT(uint16_t, int16_t, _mesa_snorm_to_half(src, 16));
       } else {
          SWIZZLE_CONVERT(uint16_t, int16_t, _mesa_float_to_half(src));
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint32_t, unorm_to_half(src, 32));
+         SWIZZLE_CONVERT(uint16_t, uint32_t, _mesa_unorm_to_half(src, 32));
       } else {
          SWIZZLE_CONVERT(uint16_t, uint32_t, _mesa_float_to_half(src));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int32_t, snorm_to_half(src, 32));
+         SWIZZLE_CONVERT(uint16_t, int32_t, _mesa_snorm_to_half(src, 32));
       } else {
          SWIZZLE_CONVERT(uint16_t, int32_t, _mesa_float_to_half(src));
       }
@@ -556,7 +979,6 @@ convert_half_float(void *void_dst, int num_dst_channels,
    }
 }
 
-
 static void
 convert_ubyte(void *void_dst, int num_dst_channels,
               const void *void_src, GLenum src_type, int num_src_channels,
@@ -565,56 +987,56 @@ convert_ubyte(void *void_dst, int num_dst_channels,
    const uint8_t one = normalized ? UINT8_MAX : 1;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, float, float_to_unorm(src, 8));
+         SWIZZLE_CONVERT(uint8_t, float, _mesa_float_to_unorm(src, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, float, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint8_t, float, _mesa_float_to_unsigned(src, 8));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, half_to_unorm(src, 8));
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_half_to_unorm(src, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, half_to_uint(src));
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_half_to_unsigned(src, 8));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       SWIZZLE_CONVERT(uint8_t, uint8_t, src);
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, int8_t, snorm_to_unorm(src, 8, 8));
+         SWIZZLE_CONVERT(uint8_t, int8_t, _mesa_snorm_to_unorm(src, 8, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, int8_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint8_t, int8_t, _mesa_signed_to_unsigned(src, 8));
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, unorm_to_unorm(src, 16, 8));
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_unorm_to_unorm(src, 16, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, src);
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_unsigned_to_unsigned(src, 8));
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, int16_t, snorm_to_unorm(src, 16, 8));
+         SWIZZLE_CONVERT(uint8_t, int16_t, _mesa_snorm_to_unorm(src, 16, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, int16_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint8_t, int16_t, _mesa_signed_to_unsigned(src, 8));
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, uint32_t, unorm_to_unorm(src, 32, 8));
+         SWIZZLE_CONVERT(uint8_t, uint32_t, _mesa_unorm_to_unorm(src, 32, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, uint32_t, src);
+         SWIZZLE_CONVERT(uint8_t, uint32_t, _mesa_unsigned_to_unsigned(src, 8));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, int32_t, snorm_to_unorm(src, 32, 8));
+         SWIZZLE_CONVERT(uint8_t, int32_t, _mesa_snorm_to_unorm(src, 32, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, int32_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint8_t, int32_t, _mesa_signed_to_unsigned(src, 8));
       }
       break;
    default:
@@ -631,56 +1053,56 @@ convert_byte(void *void_dst, int num_dst_channels,
    const int8_t one = normalized ? INT8_MAX : 1;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, float, float_to_snorm(src, 8));
+         SWIZZLE_CONVERT(uint8_t, float, _mesa_float_to_snorm(src, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, float, src);
+         SWIZZLE_CONVERT(uint8_t, float, _mesa_float_to_signed(src, 8));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, half_to_snorm(src, 8));
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_half_to_snorm(src, 8));
       } else {
-         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_half_to_float(src));
+         SWIZZLE_CONVERT(uint8_t, uint16_t, _mesa_half_to_signed(src, 8));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(int8_t, uint8_t, unorm_to_snorm(src, 8, 8));
+         SWIZZLE_CONVERT(int8_t, uint8_t, _mesa_unorm_to_snorm(src, 8, 8));
       } else {
-         SWIZZLE_CONVERT(int8_t, uint8_t, src);
+         SWIZZLE_CONVERT(int8_t, uint8_t, _mesa_unsigned_to_signed(src, 8));
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       SWIZZLE_CONVERT(int8_t, int8_t, src);
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(int8_t, uint16_t, unorm_to_snorm(src, 16, 8));
+         SWIZZLE_CONVERT(int8_t, uint16_t, _mesa_unorm_to_snorm(src, 16, 8));
       } else {
-         SWIZZLE_CONVERT(int8_t, uint16_t, src);
+         SWIZZLE_CONVERT(int8_t, uint16_t, _mesa_unsigned_to_signed(src, 8));
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(int8_t, int16_t, snorm_to_snorm(src, 16, 8));
+         SWIZZLE_CONVERT(int8_t, int16_t, _mesa_snorm_to_snorm(src, 16, 8));
       } else {
-         SWIZZLE_CONVERT(int8_t, int16_t, src);
+         SWIZZLE_CONVERT(int8_t, int16_t, _mesa_signed_to_signed(src, 8));
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(int8_t, uint32_t, unorm_to_snorm(src, 32, 8));
+         SWIZZLE_CONVERT(int8_t, uint32_t, _mesa_unorm_to_snorm(src, 32, 8));
       } else {
-         SWIZZLE_CONVERT(int8_t, uint32_t, src);
+         SWIZZLE_CONVERT(int8_t, uint32_t, _mesa_unsigned_to_signed(src, 8));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(int8_t, int32_t, snorm_to_snorm(src, 32, 8));
+         SWIZZLE_CONVERT(int8_t, int32_t, _mesa_snorm_to_snorm(src, 32, 8));
       } else {
-         SWIZZLE_CONVERT(int8_t, int32_t, src);
+         SWIZZLE_CONVERT(int8_t, int32_t, _mesa_signed_to_signed(src, 8));
       }
       break;
    default:
@@ -697,56 +1119,56 @@ convert_ushort(void *void_dst, int num_dst_channels,
    const uint16_t one = normalized ? UINT16_MAX : 1;
    
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, float, float_to_unorm(src, 16));
+         SWIZZLE_CONVERT(uint16_t, float, _mesa_float_to_unorm(src, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, float, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint16_t, float, _mesa_float_to_unsigned(src, 16));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint16_t, half_to_unorm(src, 16));
+         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_half_to_unorm(src, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, uint16_t, half_to_uint(src));
+         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_half_to_unsigned(src, 16));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint8_t, unorm_to_unorm(src, 8, 16));
+         SWIZZLE_CONVERT(uint16_t, uint8_t, _mesa_unorm_to_unorm(src, 8, 16));
       } else {
          SWIZZLE_CONVERT(uint16_t, uint8_t, src);
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int8_t, snorm_to_unorm(src, 8, 16));
+         SWIZZLE_CONVERT(uint16_t, int8_t, _mesa_snorm_to_unorm(src, 8, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, int8_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint16_t, int8_t, _mesa_signed_to_unsigned(src, 16));
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       SWIZZLE_CONVERT(uint16_t, uint16_t, src);
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int16_t, snorm_to_unorm(src, 16, 16));
+         SWIZZLE_CONVERT(uint16_t, int16_t, _mesa_snorm_to_unorm(src, 16, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, int16_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint16_t, int16_t, _mesa_signed_to_unsigned(src, 16));
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint32_t, unorm_to_unorm(src, 32, 16));
+         SWIZZLE_CONVERT(uint16_t, uint32_t, _mesa_unorm_to_unorm(src, 32, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, uint32_t, src);
+         SWIZZLE_CONVERT(uint16_t, uint32_t, _mesa_unsigned_to_unsigned(src, 16));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, int32_t, snorm_to_unorm(src, 32, 16));
+         SWIZZLE_CONVERT(uint16_t, int32_t, _mesa_snorm_to_unorm(src, 32, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, int32_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint16_t, int32_t, _mesa_signed_to_unsigned(src, 16));
       }
       break;
    default:
@@ -763,56 +1185,56 @@ convert_short(void *void_dst, int num_dst_channels,
    const int16_t one = normalized ? INT16_MAX : 1;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, float, float_to_snorm(src, 16));
+         SWIZZLE_CONVERT(uint16_t, float, _mesa_float_to_snorm(src, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, float, src);
+         SWIZZLE_CONVERT(uint16_t, float, _mesa_float_to_signed(src, 16));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint16_t, uint16_t, half_to_snorm(src, 16));
+         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_half_to_snorm(src, 16));
       } else {
-         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_half_to_float(src));
+         SWIZZLE_CONVERT(uint16_t, uint16_t, _mesa_half_to_signed(src, 16));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(int16_t, uint8_t, unorm_to_snorm(src, 8, 16));
+         SWIZZLE_CONVERT(int16_t, uint8_t, _mesa_unorm_to_snorm(src, 8, 16));
       } else {
          SWIZZLE_CONVERT(int16_t, uint8_t, src);
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(int16_t, int8_t, snorm_to_snorm(src, 8, 16));
+         SWIZZLE_CONVERT(int16_t, int8_t, _mesa_snorm_to_snorm(src, 8, 16));
       } else {
          SWIZZLE_CONVERT(int16_t, int8_t, src);
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(int16_t, uint16_t, unorm_to_snorm(src, 16, 16));
+         SWIZZLE_CONVERT(int16_t, uint16_t, _mesa_unorm_to_snorm(src, 16, 16));
       } else {
-         SWIZZLE_CONVERT(int16_t, uint16_t, src);
+         SWIZZLE_CONVERT(int16_t, uint16_t, _mesa_unsigned_to_signed(src, 16));
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       SWIZZLE_CONVERT(int16_t, int16_t, src);
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(int16_t, uint32_t, unorm_to_snorm(src, 32, 16));
+         SWIZZLE_CONVERT(int16_t, uint32_t, _mesa_unorm_to_snorm(src, 32, 16));
       } else {
-         SWIZZLE_CONVERT(int16_t, uint32_t, src);
+         SWIZZLE_CONVERT(int16_t, uint32_t, _mesa_unsigned_to_signed(src, 16));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(int16_t, int32_t, snorm_to_snorm(src, 32, 16));
+         SWIZZLE_CONVERT(int16_t, int32_t, _mesa_snorm_to_snorm(src, 32, 16));
       } else {
-         SWIZZLE_CONVERT(int16_t, int32_t, src);
+         SWIZZLE_CONVERT(int16_t, int32_t, _mesa_signed_to_signed(src, 16));
       }
       break;
    default:
@@ -828,56 +1250,56 @@ convert_uint(void *void_dst, int num_dst_channels,
    const uint32_t one = normalized ? UINT32_MAX : 1;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, float, float_to_unorm(src, 32));
+         SWIZZLE_CONVERT(uint32_t, float, _mesa_float_to_unorm(src, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, float, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint32_t, float, _mesa_float_to_unsigned(src, 32));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, uint16_t, half_to_unorm(src, 32));
+         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_half_to_unorm(src, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, uint16_t, half_to_uint(src));
+         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_half_to_unsigned(src, 32));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, uint8_t, unorm_to_unorm(src, 8, 32));
+         SWIZZLE_CONVERT(uint32_t, uint8_t, _mesa_unorm_to_unorm(src, 8, 32));
       } else {
          SWIZZLE_CONVERT(uint32_t, uint8_t, src);
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, int8_t, snorm_to_unorm(src, 8, 32));
+         SWIZZLE_CONVERT(uint32_t, int8_t, _mesa_snorm_to_unorm(src, 8, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, int8_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint32_t, int8_t, _mesa_signed_to_unsigned(src, 32));
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, uint16_t, unorm_to_unorm(src, 16, 32));
+         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_unorm_to_unorm(src, 16, 32));
       } else {
          SWIZZLE_CONVERT(uint32_t, uint16_t, src);
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, int16_t, snorm_to_unorm(src, 16, 32));
+         SWIZZLE_CONVERT(uint32_t, int16_t, _mesa_snorm_to_unorm(src, 16, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, int16_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint32_t, int16_t, _mesa_signed_to_unsigned(src, 32));
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       SWIZZLE_CONVERT(uint32_t, uint32_t, src);
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, int32_t, snorm_to_unorm(src, 32, 32));
+         SWIZZLE_CONVERT(uint32_t, int32_t, _mesa_snorm_to_unorm(src, 32, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, int32_t, (src < 0) ? 0 : src);
+         SWIZZLE_CONVERT(uint32_t, int32_t, _mesa_signed_to_unsigned(src, 32));
       }
       break;
    default:
@@ -891,59 +1313,59 @@ convert_int(void *void_dst, int num_dst_channels,
             const void *void_src, GLenum src_type, int num_src_channels,
             const uint8_t swizzle[4], bool normalized, int count)
 {
-   const int32_t one = normalized ? INT32_MAX : 12;
+   const int32_t one = normalized ? INT32_MAX : 1;
 
    switch (src_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, float, float_to_snorm(src, 32));
+         SWIZZLE_CONVERT(uint32_t, float, _mesa_float_to_snorm(src, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, float, src);
+         SWIZZLE_CONVERT(uint32_t, float, _mesa_float_to_signed(src, 32));
       }
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       if (normalized) {
-         SWIZZLE_CONVERT(uint32_t, uint16_t, half_to_snorm(src, 32));
+         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_half_to_snorm(src, 32));
       } else {
-         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_half_to_float(src));
+         SWIZZLE_CONVERT(uint32_t, uint16_t, _mesa_half_to_signed(src, 32));
       }
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(int32_t, uint8_t, unorm_to_snorm(src, 8, 32));
+         SWIZZLE_CONVERT(int32_t, uint8_t, _mesa_unorm_to_snorm(src, 8, 32));
       } else {
          SWIZZLE_CONVERT(int32_t, uint8_t, src);
       }
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       if (normalized) {
-         SWIZZLE_CONVERT(int32_t, int8_t, snorm_to_snorm(src, 8, 32));
+         SWIZZLE_CONVERT(int32_t, int8_t, _mesa_snorm_to_snorm(src, 8, 32));
       } else {
          SWIZZLE_CONVERT(int32_t, int8_t, src);
       }
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(int32_t, uint16_t, unorm_to_snorm(src, 16, 32));
+         SWIZZLE_CONVERT(int32_t, uint16_t, _mesa_unorm_to_snorm(src, 16, 32));
       } else {
          SWIZZLE_CONVERT(int32_t, uint16_t, src);
       }
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       if (normalized) {
-         SWIZZLE_CONVERT(int32_t, int16_t, snorm_to_snorm(src, 16, 32));
+         SWIZZLE_CONVERT(int32_t, int16_t, _mesa_snorm_to_snorm(src, 16, 32));
       } else {
          SWIZZLE_CONVERT(int32_t, int16_t, src);
       }
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       if (normalized) {
-         SWIZZLE_CONVERT(int32_t, uint32_t, unorm_to_snorm(src, 32, 32));
+         SWIZZLE_CONVERT(int32_t, uint32_t, _mesa_unorm_to_snorm(src, 32, 32));
       } else {
-         SWIZZLE_CONVERT(int32_t, uint32_t, src);
+         SWIZZLE_CONVERT(int32_t, uint32_t, _mesa_unsigned_to_signed(src, 32));
       }
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       SWIZZLE_CONVERT(int32_t, int32_t, src);
       break;
    default:
@@ -1001,8 +1423,8 @@ convert_int(void *void_dst, int num_dst_channels,
  * \param[in]  count             the number of pixels to convert
  */
 void
-_mesa_swizzle_and_convert(void *void_dst, GLenum dst_type, int num_dst_channels,
-                          const void *void_src, GLenum src_type, int num_src_channels,
+_mesa_swizzle_and_convert(void *void_dst, enum mesa_array_format_datatype dst_type, int num_dst_channels,
+                          const void *void_src, enum mesa_array_format_datatype src_type, int num_src_channels,
                           const uint8_t swizzle[4], bool normalized, int count)
 {
    if (swizzle_convert_try_memcpy(void_dst, dst_type, num_dst_channels,
@@ -1011,35 +1433,35 @@ _mesa_swizzle_and_convert(void *void_dst, GLenum dst_type, int num_dst_channels,
       return;
 
    switch (dst_type) {
-   case GL_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_FLOAT:
       convert_float(void_dst, num_dst_channels, void_src, src_type,
                     num_src_channels, swizzle, normalized, count);
       break;
-   case GL_HALF_FLOAT:
+   case MESA_ARRAY_FORMAT_TYPE_HALF:
       convert_half_float(void_dst, num_dst_channels, void_src, src_type,
                     num_src_channels, swizzle, normalized, count);
       break;
-   case GL_UNSIGNED_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_UBYTE:
       convert_ubyte(void_dst, num_dst_channels, void_src, src_type,
                     num_src_channels, swizzle, normalized, count);
       break;
-   case GL_BYTE:
+   case MESA_ARRAY_FORMAT_TYPE_BYTE:
       convert_byte(void_dst, num_dst_channels, void_src, src_type,
                    num_src_channels, swizzle, normalized, count);
       break;
-   case GL_UNSIGNED_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_USHORT:
       convert_ushort(void_dst, num_dst_channels, void_src, src_type,
                      num_src_channels, swizzle, normalized, count);
       break;
-   case GL_SHORT:
+   case MESA_ARRAY_FORMAT_TYPE_SHORT:
       convert_short(void_dst, num_dst_channels, void_src, src_type,
                     num_src_channels, swizzle, normalized, count);
       break;
-   case GL_UNSIGNED_INT:
+   case MESA_ARRAY_FORMAT_TYPE_UINT:
       convert_uint(void_dst, num_dst_channels, void_src, src_type,
                    num_src_channels, swizzle, normalized, count);
       break;
-   case GL_INT:
+   case MESA_ARRAY_FORMAT_TYPE_INT:
       convert_int(void_dst, num_dst_channels, void_src, src_type,
                   num_src_channels, swizzle, normalized, count);
       break;
diff --git a/mesalib/src/mesa/main/format_utils.h b/mesalib/src/mesa/main/format_utils.h
index 9f778e377..7f500ec78 100644
--- a/mesalib/src/mesa/main/format_utils.h
+++ b/mesalib/src/mesa/main/format_utils.h
@@ -32,14 +32,205 @@
 #define FORMAT_UTILS_H
 
 #include "imports.h"
+#include "macros.h"
+
+extern const mesa_array_format RGBA32_FLOAT;
+extern const mesa_array_format RGBA8_UBYTE;
+extern const mesa_array_format RGBA32_UINT;
+extern const mesa_array_format RGBA32_INT;
+
+/* Only guaranteed to work for BITS <= 32 */
+#define MAX_UINT(BITS) ((BITS) == 32 ? UINT32_MAX : ((1u << (BITS)) - 1))
+#define MAX_INT(BITS) ((int)MAX_UINT((BITS) - 1))
+#define MIN_INT(BITS) ((BITS) == 32 ? INT32_MIN : (-(1 << (BITS - 1))))
+
+/* Extends an integer of size SRC_BITS to one of size DST_BITS linearly */
+#define EXTEND_NORMALIZED_INT(X, SRC_BITS, DST_BITS) \
+      (((X) * (int)(MAX_UINT(DST_BITS) / MAX_UINT(SRC_BITS))) + \
+       ((DST_BITS % SRC_BITS) ? ((X) >> (SRC_BITS - DST_BITS % SRC_BITS)) : 0))
+
+static inline float
+_mesa_unorm_to_float(unsigned x, unsigned src_bits)
+{
+   return x * (1.0f / (float)MAX_UINT(src_bits));
+}
+
+static inline float
+_mesa_snorm_to_float(int x, unsigned src_bits)
+{
+   if (x <= -MAX_INT(src_bits))
+      return -1.0f;
+   else
+      return x * (1.0f / (float)MAX_INT(src_bits));
+}
+
+static inline uint16_t
+_mesa_unorm_to_half(unsigned x, unsigned src_bits)
+{
+   return _mesa_float_to_half(_mesa_unorm_to_float(x, src_bits));
+}
+
+static inline uint16_t
+_mesa_snorm_to_half(int x, unsigned src_bits)
+{
+   return _mesa_float_to_half(_mesa_snorm_to_float(x, src_bits));
+}
+
+static inline unsigned
+_mesa_float_to_unorm(float x, unsigned dst_bits)
+{
+   if (x < 0.0f)
+      return 0;
+   else if (x > 1.0f)
+      return MAX_UINT(dst_bits);
+   else
+      return F_TO_I(x * MAX_UINT(dst_bits));
+}
+
+static inline unsigned
+_mesa_half_to_unorm(uint16_t x, unsigned dst_bits)
+{
+   return _mesa_float_to_unorm(_mesa_half_to_float(x), dst_bits);
+}
+
+static inline unsigned
+_mesa_unorm_to_unorm(unsigned x, unsigned src_bits, unsigned dst_bits)
+{
+   if (src_bits < dst_bits) {
+      return EXTEND_NORMALIZED_INT(x, src_bits, dst_bits);
+   } else {
+      unsigned src_half = (1 << (src_bits - 1)) - 1;
+
+      if (src_bits + dst_bits > sizeof(x) * 8) {
+         assert(src_bits + dst_bits <= sizeof(uint64_t) * 8);
+         return (((uint64_t) x * MAX_UINT(dst_bits) + src_half) /
+                 MAX_UINT(src_bits));
+      } else {
+         return (x * MAX_UINT(dst_bits) + src_half) / MAX_UINT(src_bits);
+      }
+   }
+}
+
+static inline unsigned
+_mesa_snorm_to_unorm(int x, unsigned src_bits, unsigned dst_bits)
+{
+   if (x < 0)
+      return 0;
+   else
+      return _mesa_unorm_to_unorm(x, src_bits - 1, dst_bits);
+}
+
+static inline int
+_mesa_float_to_snorm(float x, unsigned dst_bits)
+{
+   if (x < -1.0f)
+      return -MAX_INT(dst_bits);
+   else if (x > 1.0f)
+      return MAX_INT(dst_bits);
+   else
+      return F_TO_I(x * MAX_INT(dst_bits));
+}
+
+static inline int
+_mesa_half_to_snorm(uint16_t x, unsigned dst_bits)
+{
+   return _mesa_float_to_snorm(_mesa_half_to_float(x), dst_bits);
+}
+
+static inline int
+_mesa_unorm_to_snorm(unsigned x, unsigned src_bits, unsigned dst_bits)
+{
+   return _mesa_unorm_to_unorm(x, src_bits, dst_bits - 1);
+}
+
+static inline int
+_mesa_snorm_to_snorm(int x, unsigned src_bits, unsigned dst_bits)
+{
+   if (x < -MAX_INT(src_bits))
+      return -MAX_INT(dst_bits);
+   else if (src_bits < dst_bits)
+      return EXTEND_NORMALIZED_INT(x, src_bits - 1, dst_bits - 1);
+   else
+      return x >> (src_bits - dst_bits);
+}
+
+static inline unsigned
+_mesa_unsigned_to_unsigned(unsigned src, unsigned dst_size)
+{
+   return MIN2(src, MAX_UINT(dst_size));
+}
+
+static inline int
+_mesa_unsigned_to_signed(unsigned src, unsigned dst_size)
+{
+   return MIN2(src, (unsigned)MAX_INT(dst_size));
+}
+
+static inline int
+_mesa_signed_to_signed(int src, unsigned dst_size)
+{
+   return CLAMP(src, MIN_INT(dst_size), MAX_INT(dst_size));
+}
+
+static inline unsigned
+_mesa_signed_to_unsigned(int src, unsigned dst_size)
+{
+   return CLAMP(src, 0, MAX_UINT(dst_size));
+}
+
+static inline unsigned
+_mesa_float_to_unsigned(float src, unsigned dst_bits)
+{
+   if (src < 0.0f)
+      return 0;
+   if (src > (float)MAX_UINT(dst_bits))
+       return MAX_UINT(dst_bits);
+   return _mesa_signed_to_unsigned(src, dst_bits);
+}
+
+static inline unsigned
+_mesa_float_to_signed(float src, unsigned dst_bits)
+{
+   if (src < (float)(-MAX_INT(dst_bits)))
+      return -MAX_INT(dst_bits);
+   if (src > (float)MAX_INT(dst_bits))
+       return MAX_INT(dst_bits);
+   return _mesa_signed_to_signed(src, dst_bits);
+}
+
+static inline unsigned
+_mesa_half_to_unsigned(uint16_t src, unsigned dst_bits)
+{
+   if (_mesa_half_is_negative(src))
+      return 0;
+   return _mesa_unsigned_to_unsigned(_mesa_float_to_half(src), dst_bits);
+}
+
+static inline unsigned
+_mesa_half_to_signed(uint16_t src, unsigned dst_bits)
+{
+   return _mesa_float_to_signed(_mesa_half_to_float(src), dst_bits);
+}
 
 bool
 _mesa_format_to_array(mesa_format, GLenum *type, int *num_components,
                       uint8_t swizzle[4], bool *normalized);
 
 void
-_mesa_swizzle_and_convert(void *dst, GLenum dst_type, int num_dst_channels,
-                          const void *src, GLenum src_type, int num_src_channels,
+_mesa_swizzle_and_convert(void *dst,
+                          enum mesa_array_format_datatype dst_type,
+                          int num_dst_channels,
+                          const void *src,
+                          enum mesa_array_format_datatype src_type,
+                          int num_src_channels,
                           const uint8_t swizzle[4], bool normalized, int count);
 
+bool
+_mesa_compute_rgba2base2rgba_component_mapping(GLenum baseFormat, uint8_t *map);
+
+void
+_mesa_format_convert(void *void_dst, uint32_t dst_format, size_t dst_stride,
+                     void *void_src, uint32_t src_format, size_t src_stride,
+                     size_t width, size_t height, uint8_t *rebase_swizzle);
+
 #endif
diff --git a/mesalib/src/mesa/main/formatquery.c b/mesalib/src/mesa/main/formatquery.c
index f6274fe30..7741cabad 100644
--- a/mesalib/src/mesa/main/formatquery.c
+++ b/mesalib/src/mesa/main/formatquery.c
@@ -89,8 +89,22 @@ _mesa_GetInternalformativ(GLenum target, GLenum internalformat, GLenum pname,
     *     "If the <internalformat> parameter to GetInternalformativ is not
     *     color-, depth- or stencil-renderable, then an INVALID_ENUM error is
     *     generated."
+    *
+    * Page 243 of the GLES 3.0.4 spec says this for GetInternalformativ:
+    *
+    *     "internalformat must be color-renderable, depth-renderable or
+    *     stencilrenderable (as defined in section 4.4.4)."
+    *
+    * Section 4.4.4 on page 212 of the same spec says:
+    *
+    *     "An internal format is color-renderable if it is one of the
+    *     formats from table 3.13 noted as color-renderable or if it
+    *     is unsized format RGBA or RGB."
+    *
+    * Therefore, we must accept GL_RGB and GL_RGBA here.
     */
-   if (_mesa_base_fbo_format(ctx, internalformat) == 0) {
+   if (internalformat != GL_RGB && internalformat != GL_RGBA &&
+       _mesa_base_fbo_format(ctx, internalformat) == 0) {
       _mesa_error(ctx, GL_INVALID_ENUM,
                   "glGetInternalformativ(internalformat=%s)",
                   _mesa_lookup_enum_by_nr(internalformat));
diff --git a/mesalib/src/mesa/main/formats.c b/mesalib/src/mesa/main/formats.c
index 58c32e23b..958d6f245 100644
--- a/mesalib/src/mesa/main/formats.c
+++ b/mesalib/src/mesa/main/formats.c
@@ -28,7 +28,8 @@
 #include "formats.h"
 #include "macros.h"
 #include "glformats.h"
-
+#include "c11/threads.h"
+#include "util/hash_table.h"
 
 /**
  * Information about texture formats.
@@ -71,6 +72,7 @@ struct gl_format_info
    GLubyte BytesPerBlock;
 
    uint8_t Swizzle[4];
+   mesa_array_format ArrayFormat;
 };
 
 #include "format_info.c"
@@ -213,17 +215,87 @@ _mesa_get_format_datatype(mesa_format format)
    return info->DataType;
 }
 
+static GLenum
+get_base_format_for_array_format(mesa_array_format format)
+{
+   uint8_t swizzle[4];
+   int num_channels;
+
+   _mesa_array_format_get_swizzle(format, swizzle);
+   num_channels = _mesa_array_format_get_num_channels(format);
+
+   switch (num_channels) {
+   case 4:
+      /* FIXME: RGBX formats have 4 channels, but their base format is GL_RGB.
+       * This is not really a problem for now because we only create array
+       * formats from GL format/type combinations, and these cannot specify
+       * RGBX formats.
+       */
+      return GL_RGBA;
+   case 3:
+      return GL_RGB;
+   case 2:
+      if (swizzle[0] == 0 &&
+          swizzle[1] == 0 &&
+          swizzle[2] == 0 &&
+          swizzle[3] == 1)
+         return GL_LUMINANCE_ALPHA;
+      if (swizzle[0] == 1 &&
+          swizzle[1] == 1 &&
+          swizzle[2] == 1 &&
+          swizzle[3] == 0)
+         return GL_LUMINANCE_ALPHA;
+      if (swizzle[0] == 0 &&
+          swizzle[1] == 1 &&
+          swizzle[2] == 4 &&
+          swizzle[3] == 5)
+         return GL_RG;
+      if (swizzle[0] == 1 &&
+          swizzle[1] == 0 &&
+          swizzle[2] == 4 &&
+          swizzle[3] == 5)
+         return GL_RG;
+      break;
+   case 1:
+      if (swizzle[0] == 0 &&
+          swizzle[1] == 0 &&
+          swizzle[2] == 0 &&
+          swizzle[3] == 5)
+         return GL_LUMINANCE;
+      if (swizzle[0] == 0 &&
+          swizzle[1] == 0 &&
+          swizzle[2] == 0 &&
+          swizzle[3] == 0)
+         return GL_INTENSITY;
+      if (swizzle[0] <= MESA_FORMAT_SWIZZLE_W)
+         return GL_RED;
+      if (swizzle[1] <= MESA_FORMAT_SWIZZLE_W)
+         return GL_GREEN;
+      if (swizzle[2] <= MESA_FORMAT_SWIZZLE_W)
+         return GL_BLUE;
+      if (swizzle[3] <= MESA_FORMAT_SWIZZLE_W)
+         return GL_ALPHA;
+      break;
+   }
+
+   unreachable("Unsupported format");
+}
 
 /**
  * Return the basic format for the given type.  The result will be one of
  * GL_RGB, GL_RGBA, GL_ALPHA, GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_INTENSITY,
  * GL_YCBCR_MESA, GL_DEPTH_COMPONENT, GL_STENCIL_INDEX, GL_DEPTH_STENCIL.
+ * This functions accepts a mesa_format or a mesa_array_format.
  */
 GLenum
-_mesa_get_format_base_format(mesa_format format)
+_mesa_get_format_base_format(uint32_t format)
 {
-   const struct gl_format_info *info = _mesa_get_format_info(format);
-   return info->BaseFormat;
+   if (!_mesa_format_is_mesa_array_format(format)) {
+      const struct gl_format_info *info = _mesa_get_format_info(format);
+      return info->BaseFormat;
+   } else {
+      return get_base_format_for_array_format(format);
+   }
 }
 
 
@@ -269,6 +341,105 @@ _mesa_get_format_swizzle(mesa_format format, uint8_t swizzle_out[4])
    memcpy(swizzle_out, info->Swizzle, sizeof(info->Swizzle));
 }
 
+mesa_array_format
+_mesa_array_format_flip_channels(mesa_array_format format)
+{
+   int num_channels;
+   uint8_t swizzle[4];
+
+   num_channels = _mesa_array_format_get_num_channels(format);
+   _mesa_array_format_get_swizzle(format, swizzle);
+
+   if (num_channels == 1)
+      return format;
+
+   if (num_channels == 2) {
+      _mesa_array_format_set_swizzle(&format, swizzle[1], swizzle[0],
+                                     swizzle[2], swizzle[3]);
+      return format;
+   }
+
+   if (num_channels == 4) {
+      _mesa_array_format_set_swizzle(&format, swizzle[3], swizzle[2],
+                                     swizzle[1], swizzle[0]);
+      return format;
+   }
+
+   unreachable("Invalid array format");
+}
+
+uint32_t
+_mesa_format_to_array_format(mesa_format format)
+{
+   const struct gl_format_info *info = _mesa_get_format_info(format);
+   if (_mesa_little_endian())
+      return info->ArrayFormat;
+   else
+      return _mesa_array_format_flip_channels(info->ArrayFormat);
+}
+
+static struct hash_table *format_array_format_table;
+static once_flag format_array_format_table_exists = ONCE_FLAG_INIT;
+
+static bool
+array_formats_equal(const void *a, const void *b)
+{
+   return (intptr_t)a == (intptr_t)b;
+}
+
+static void
+format_array_format_table_init()
+{
+   const struct gl_format_info *info;
+   mesa_array_format array_format;
+   unsigned f;
+
+   format_array_format_table = _mesa_hash_table_create(NULL, NULL,
+                                                       array_formats_equal);
+
+   for (f = 1; f < MESA_FORMAT_COUNT; ++f) {
+      info = _mesa_get_format_info(f);
+      if (!info->ArrayFormat)
+         continue;
+
+      if (_mesa_little_endian()) {
+         array_format = info->ArrayFormat;
+      } else {
+         array_format = _mesa_array_format_flip_channels(info->ArrayFormat);
+      }
+
+      /* This can happen and does for some of the BGR formats.  Let's take
+       * the first one in the list.
+       */
+      if (_mesa_hash_table_search_pre_hashed(format_array_format_table,
+                                             array_format,
+                                             (void *)(intptr_t)array_format))
+         continue;
+
+      _mesa_hash_table_insert_pre_hashed(format_array_format_table,
+                                         array_format,
+                                         (void *)(intptr_t)array_format,
+                                         (void *)(intptr_t)f);
+   }
+}
+
+mesa_format
+_mesa_format_from_array_format(uint32_t array_format)
+{
+   struct hash_entry *entry;
+
+   assert(_mesa_format_is_mesa_array_format(array_format));
+
+   call_once(&format_array_format_table_exists, format_array_format_table_init);
+
+   entry = _mesa_hash_table_search_pre_hashed(format_array_format_table,
+                                              array_format,
+                                              (void *)(intptr_t)array_format);
+   if (entry)
+      return (intptr_t)entry->data;
+   else
+      return MESA_FORMAT_NONE;
+}
 
 /** Is the given format a compressed format? */
 GLboolean
@@ -345,6 +516,25 @@ _mesa_is_format_integer(mesa_format format)
    return (info->DataType == GL_INT || info->DataType == GL_UNSIGNED_INT);
 }
 
+
+/**
+ * Return true if the given format is a color format.
+ */
+GLenum
+_mesa_is_format_color_format(mesa_format format)
+{
+   const struct gl_format_info *info = _mesa_get_format_info(format);
+   switch (info->BaseFormat) {
+   case GL_DEPTH_COMPONENT:
+   case GL_STENCIL_INDEX:
+   case GL_DEPTH_STENCIL:
+      return false;
+   default:
+      return true;
+   }
+}
+
+
 /**
  * Return color encoding for given format.
  * \return GL_LINEAR or GL_SRGB
@@ -864,6 +1054,34 @@ _mesa_format_to_type_and_comps(mesa_format format,
       *comps = 1;
       return;
 
+   case MESA_FORMAT_R3G3B2_UNORM:
+      *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
+      *comps = 3;
+      return;
+   case MESA_FORMAT_A4B4G4R4_UNORM:
+      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
+      *comps = 4;
+      return;
+
+   case MESA_FORMAT_R4G4B4A4_UNORM:
+      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
+      *comps = 4;
+      return;
+   case MESA_FORMAT_R5G5B5A1_UNORM:
+      *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
+      *comps = 4;
+      return;
+   case MESA_FORMAT_A2B10G10R10_UNORM:
+   case MESA_FORMAT_A2B10G10R10_UINT:
+      *datatype = GL_UNSIGNED_INT_10_10_10_2;
+      *comps = 4;
+      return;
+   case MESA_FORMAT_A2R10G10B10_UNORM:
+   case MESA_FORMAT_A2R10G10B10_UINT:
+      *datatype = GL_UNSIGNED_INT_10_10_10_2;
+      *comps = 4;
+      return;
+
    case MESA_FORMAT_B2G3R3_UNORM:
       *datatype = GL_UNSIGNED_BYTE_3_3_2;
       *comps = 3;
@@ -1265,6 +1483,7 @@ _mesa_format_to_type_and_comps(mesa_format format,
       return;
 
    case MESA_FORMAT_B10G10R10X2_UNORM:
+   case MESA_FORMAT_R10G10B10X2_UNORM:
       *datatype = GL_UNSIGNED_INT_2_10_10_10_REV;
       *comps = 4;
       return;
@@ -1462,14 +1681,14 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
       return format == GL_RGB && type == GL_UNSIGNED_BYTE && littleEndian;
 
    case MESA_FORMAT_B5G6R5_UNORM:
-      return format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 && !swapBytes;
+      return ((format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) ||
+              (format == GL_BGR && type == GL_UNSIGNED_SHORT_5_6_5_REV)) &&
+              !swapBytes;
 
    case MESA_FORMAT_R5G6B5_UNORM:
-      /* Some of the 16-bit MESA_FORMATs that would seem to correspond to
-       * GL_UNSIGNED_SHORT_* are byte-swapped instead of channel-reversed,
-       * according to formats.h, so they can't be matched.
-       */
-      return GL_FALSE;
+      return ((format == GL_BGR && type == GL_UNSIGNED_SHORT_5_6_5) ||
+              (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5_REV)) &&
+              !swapBytes;
 
    case MESA_FORMAT_B4G4R4A4_UNORM:
       return format == GL_BGRA && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
@@ -1487,7 +1706,8 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
          !swapBytes;
 
    case MESA_FORMAT_A1R5G5B5_UNORM:
-      return GL_FALSE;
+      return format == GL_BGRA && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
 
    case MESA_FORMAT_L4A4_UNORM:
       return GL_FALSE;
@@ -1506,6 +1726,54 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
    case MESA_FORMAT_B2G3R3_UNORM:
       return format == GL_RGB && type == GL_UNSIGNED_BYTE_3_3_2;
 
+   case MESA_FORMAT_R3G3B2_UNORM:
+      return format == GL_RGB && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UNORM:
+      if (format == GL_RGBA && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_ABGR_EXT && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_ABGR_EXT && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UNORM:
+      if (format == GL_ABGR_EXT && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_ABGR_EXT && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R5G5B5A1_UNORM:
+      return format == GL_RGBA && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+   case MESA_FORMAT_A2B10G10R10_UNORM:
+      return format == GL_RGBA && type == GL_UNSIGNED_INT_10_10_10_2;
+
+   case MESA_FORMAT_A2B10G10R10_UINT:
+      return format == GL_RGBA_INTEGER_EXT && type == GL_UNSIGNED_INT_10_10_10_2;
+
+   case MESA_FORMAT_A2R10G10B10_UNORM:
+      return format == GL_BGRA && type == GL_UNSIGNED_INT_10_10_10_2;
+
+   case MESA_FORMAT_A2R10G10B10_UINT:
+      return format == GL_BGRA_INTEGER_EXT && type == GL_UNSIGNED_INT_10_10_10_2;
+
    case MESA_FORMAT_A_UNORM8:
       return format == GL_ALPHA && type == GL_UNSIGNED_BYTE;
    case MESA_FORMAT_A_UNORM16:
@@ -1867,6 +2135,7 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
    case MESA_FORMAT_RGBX_UINT8:
    case MESA_FORMAT_RGBX_SINT8:
    case MESA_FORMAT_B10G10R10X2_UNORM:
+   case MESA_FORMAT_R10G10B10X2_UNORM:
    case MESA_FORMAT_RGBX_UNORM16:
    case MESA_FORMAT_RGBX_SNORM16:
    case MESA_FORMAT_RGBX_FLOAT16:
diff --git a/mesalib/src/mesa/main/formats.csv b/mesalib/src/mesa/main/formats.csv
index 39bcdbdd2..e159e7dd6 100644
--- a/mesalib/src/mesa/main/formats.csv
+++ b/mesalib/src/mesa/main/formats.csv
@@ -82,12 +82,20 @@ MESA_FORMAT_G16R16_UNORM                  , packed, 1, 1, un16, un16,     ,
 MESA_FORMAT_B10G10R10A2_UNORM             , packed, 1, 1, un10, un10, un10, un2 , zyxw, rgb
 MESA_FORMAT_B10G10R10X2_UNORM             , packed, 1, 1, un10, un10, un10, x2  , zyx1, rgb
 MESA_FORMAT_R10G10B10A2_UNORM             , packed, 1, 1, un10, un10, un10, un2 , xyzw, rgb
+MESA_FORMAT_R10G10B10X2_UNORM             , packed, 1, 1, un10, un10, un10, x2  , xyz1, rgb
 
 MESA_FORMAT_S8_UINT_Z24_UNORM             , packed, 1, 1, un24, u8  ,     ,     , xy__, zs
 MESA_FORMAT_X8_UINT_Z24_UNORM             , packed, 1, 1, un24, x8  ,     ,     , x___, zs
 MESA_FORMAT_Z24_UNORM_S8_UINT             , packed, 1, 1, u8  , un24,     ,     , yx__, zs
 MESA_FORMAT_Z24_UNORM_X8_UINT             , packed, 1, 1, x8  , un24,     ,     , y___, zs
 
+MESA_FORMAT_R3G3B2_UNORM                  , packed, 1, 1, un3 , un3 , un2 ,     , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UNORM                , packed, 1, 1, un4 , un4 , un4 , un4 , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UNORM                , packed, 1, 1, un4 , un4 , un4 , un4 , xyzw, rgb
+MESA_FORMAT_R5G5B5A1_UNORM                , packed, 1, 1, un5 , un5 , un5 , un1 , xyzw, rgb
+MESA_FORMAT_A2B10G10R10_UNORM             , packed, 1, 1, un2 , un10, un10, un10, wzyx, rgb
+MESA_FORMAT_A2R10G10B10_UNORM             , packed, 1, 1, un2 , un10, un10, un10, yzwx, rgb
+
 MESA_FORMAT_YCBCR                         , other , 1, 1, x16 ,     ,     ,     , xyzw, yuv
 MESA_FORMAT_YCBCR_REV                     , other , 1, 1, x16 ,     ,     ,     , xyzw, yuv
 
@@ -180,6 +188,8 @@ MESA_FORMAT_Z_FLOAT32                     , array , 1, 1, f32 ,     ,     ,
 # Packed signed/unsigned non-normalized integer formats
 MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , zyxw, rgb
 MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
+MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
+MESA_FORMAT_A2R10G10B10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , yzwx, rgb
 
 # Array signed/unsigned non-normalized integer formats
 MESA_FORMAT_A_UINT8                       , array , 1, 1, u8  ,     ,     ,     , 000x, rgb
diff --git a/mesalib/src/mesa/main/formats.h b/mesalib/src/mesa/main/formats.h
index 213ab563d..7e451caf0 100644
--- a/mesalib/src/mesa/main/formats.h
+++ b/mesalib/src/mesa/main/formats.h
@@ -36,7 +36,7 @@
 #include <GL/gl.h>
 #include <stdbool.h>
 #include <stdint.h>
-
+#include "compiler.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -82,6 +82,132 @@ enum {
 };
 
 /**
+ * An uint32_t that encodes the information necessary to represent an
+ * array format
+ */
+typedef uint32_t mesa_array_format;
+
+/**
+ * Encoding for valid array format data types
+ */
+enum mesa_array_format_datatype {
+   MESA_ARRAY_FORMAT_TYPE_UBYTE = 0x0,
+   MESA_ARRAY_FORMAT_TYPE_USHORT = 0x1,
+   MESA_ARRAY_FORMAT_TYPE_UINT = 0x2,
+   MESA_ARRAY_FORMAT_TYPE_BYTE = 0x4,
+   MESA_ARRAY_FORMAT_TYPE_SHORT = 0x5,
+   MESA_ARRAY_FORMAT_TYPE_INT = 0x6,
+   MESA_ARRAY_FORMAT_TYPE_HALF = 0xd,
+   MESA_ARRAY_FORMAT_TYPE_FLOAT = 0xe,
+};
+
+/**
+ * An enum useful to encode/decode information stored in a mesa_array_format
+ */
+enum {
+   MESA_ARRAY_FORMAT_TYPE_IS_SIGNED = 0x4,
+   MESA_ARRAY_FORMAT_TYPE_IS_FLOAT = 0x8,
+   MESA_ARRAY_FORMAT_TYPE_NORMALIZED = 0x10,
+   MESA_ARRAY_FORMAT_DATATYPE_MASK = 0xf,
+   MESA_ARRAY_FORMAT_TYPE_MASK = 0x1f,
+   MESA_ARRAY_FORMAT_TYPE_SIZE_MASK = 0x3,
+   MESA_ARRAY_FORMAT_NUM_CHANS_MASK = 0xe0,
+   MESA_ARRAY_FORMAT_SWIZZLE_X_MASK = 0x00700,
+   MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK = 0x03800,
+   MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK = 0x1c000,
+   MESA_ARRAY_FORMAT_SWIZZLE_W_MASK = 0xe0000,
+   MESA_ARRAY_FORMAT_BIT = 0x80000000
+};
+
+#define MESA_ARRAY_FORMAT(SIZE, SIGNED, IS_FLOAT, NORM, NUM_CHANS, \
+      SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W) (                \
+   (((SIZE >> 1)      ) & MESA_ARRAY_FORMAT_TYPE_SIZE_MASK) |      \
+   (((SIGNED)    << 2 ) & MESA_ARRAY_FORMAT_TYPE_IS_SIGNED) |      \
+   (((IS_FLOAT)  << 3 ) & MESA_ARRAY_FORMAT_TYPE_IS_FLOAT) |       \
+   (((NORM)      << 4 ) & MESA_ARRAY_FORMAT_TYPE_NORMALIZED) |     \
+   (((NUM_CHANS) << 5 ) & MESA_ARRAY_FORMAT_NUM_CHANS_MASK) |      \
+   (((SWIZZLE_X) << 8 ) & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) |      \
+   (((SWIZZLE_Y) << 11) & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) |      \
+   (((SWIZZLE_Z) << 14) & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) |      \
+   (((SWIZZLE_W) << 17) & MESA_ARRAY_FORMAT_SWIZZLE_W_MASK) |      \
+   MESA_ARRAY_FORMAT_BIT)
+
+/**
+ * Various helpers to access the data encoded in a mesa_array_format
+ */
+static inline bool
+_mesa_array_format_is_signed(mesa_array_format f)
+{
+   return (f & MESA_ARRAY_FORMAT_TYPE_IS_SIGNED) != 0;
+}
+
+static inline bool
+_mesa_array_format_is_float(mesa_array_format f)
+{
+   return (f & MESA_ARRAY_FORMAT_TYPE_IS_FLOAT) != 0;
+}
+
+static inline bool
+_mesa_array_format_is_normalized(mesa_array_format f)
+{
+   return (f & MESA_ARRAY_FORMAT_TYPE_NORMALIZED) !=0;
+}
+
+static inline enum mesa_array_format_datatype
+_mesa_array_format_get_datatype(mesa_array_format f)
+{
+   return (enum mesa_array_format_datatype)
+            (f & MESA_ARRAY_FORMAT_DATATYPE_MASK);
+}
+
+static inline int
+_mesa_array_format_datatype_get_size(enum mesa_array_format_datatype type)
+{
+   return 1 << (type & MESA_ARRAY_FORMAT_TYPE_SIZE_MASK);
+}
+
+static inline int
+_mesa_array_format_get_type_size(mesa_array_format f)
+{
+   return 1 << (f & MESA_ARRAY_FORMAT_TYPE_SIZE_MASK);
+}
+
+static inline int
+_mesa_array_format_get_num_channels(mesa_array_format f)
+{
+   return (f & MESA_ARRAY_FORMAT_NUM_CHANS_MASK) >> 5;
+}
+
+static inline void
+_mesa_array_format_get_swizzle(mesa_array_format f, uint8_t *swizzle)
+{
+   swizzle[0] = (f & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) >> 8;
+   swizzle[1] = (f & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) >> 11;
+   swizzle[2] = (f & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) >> 14;
+   swizzle[3] = (f & MESA_ARRAY_FORMAT_SWIZZLE_W_MASK) >> 17;
+}
+
+static inline void
+_mesa_array_format_set_swizzle(mesa_array_format *f,
+                               int32_t x, int32_t y, int32_t z, int32_t w)
+{
+   *f |= ((x << 8 ) & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) |
+         ((y << 11) & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) |
+         ((z << 14) & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) |
+         ((w << 17) & MESA_ARRAY_FORMAT_SWIZZLE_W_MASK);
+}
+
+/**
+ * A helper to know if the format stored in a uint32_t is a mesa_format
+ * or a mesa_array_format
+ */
+static inline bool
+_mesa_format_is_mesa_array_format(uint32_t f)
+{
+   return (f & MESA_ARRAY_FORMAT_BIT) != 0;
+}
+
+/**
  * Mesa texture/renderbuffer image formats.
  */
 typedef enum
@@ -139,9 +265,9 @@ typedef enum
     *   ** when type applies to all components
     *
     *  examples:                   msb <------ TEXEL BITS -----------> lsb
-    *  MESA_FORMAT_A8B8G8R8_UNORM, AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR
-    *  MESA_FORMAT_R5G6B5_UNORM                        RRRR RGGG GGGB BBBB
-    *  MESA_FORMAT_B4G4R4X4_UNORM                      BBBB GGGG RRRR XXXX
+    *  MESA_FORMAT_A8B8G8R8_UNORM, RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA
+    *  MESA_FORMAT_R5G6B5_UNORM                        BBBB BGGG GGGR RRRR
+    *  MESA_FORMAT_B4G4R4X4_UNORM                      XXXX RRRR GGGG BBBB
     *  MESA_FORMAT_Z32_FLOAT_S8X24_UINT
     *  MESA_FORMAT_R10G10B10A2_UINT
     *  MESA_FORMAT_R9G9B9E5_FLOAT
@@ -226,12 +352,21 @@ typedef enum
    MESA_FORMAT_B10G10R10A2_UNORM,/* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
    MESA_FORMAT_B10G10R10X2_UNORM,/* xxRR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
    MESA_FORMAT_R10G10B10A2_UNORM,/* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
+   MESA_FORMAT_R10G10B10X2_UNORM,/* xxBB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
 
    MESA_FORMAT_S8_UINT_Z24_UNORM,/* ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ SSSS SSSS */
    MESA_FORMAT_X8_UINT_Z24_UNORM,/* ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ xxxx xxxx */
    MESA_FORMAT_Z24_UNORM_S8_UINT,/* SSSS SSSS ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ */
    MESA_FORMAT_Z24_UNORM_X8_UINT,/* xxxx xxxx ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ ZZZZ */
 
+   /* Other formats */
+   MESA_FORMAT_R3G3B2_UNORM,                                   /* BBGG GRRR */
+   MESA_FORMAT_A4B4G4R4_UNORM,                       /* RRRR GGGG BBBB AAAA */
+   MESA_FORMAT_R4G4B4A4_UNORM,                       /* AAAA BBBB GGGG RRRR */
+   MESA_FORMAT_R5G5B5A1_UNORM,                       /* ABBB BBGG GGGR RRRR */
+   MESA_FORMAT_A2B10G10R10_UNORM,/* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
+   MESA_FORMAT_A2R10G10B10_UNORM,/* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+
    MESA_FORMAT_YCBCR,            /*                     YYYY YYYY UorV UorV */
    MESA_FORMAT_YCBCR_REV,        /*                     UorV UorV YYYY YYYY */
 
@@ -326,6 +461,8 @@ typedef enum
    /* Packed signed/unsigned non-normalized integer formats */
    MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
    MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
+   MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
+   MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
 
    /* Array signed/unsigned non-normalized integer formats */
    MESA_FORMAT_A_UINT8,
@@ -461,14 +598,23 @@ extern GLenum
 _mesa_get_format_datatype(mesa_format format);
 
 extern GLenum
-_mesa_get_format_base_format(mesa_format format);
+_mesa_get_format_base_format(uint32_t format);
 
 extern void
 _mesa_get_format_block_size(mesa_format format, GLuint *bw, GLuint *bh);
 
+extern mesa_array_format
+_mesa_array_format_flip_channels(mesa_array_format format);
+
 extern void
 _mesa_get_format_swizzle(mesa_format format, uint8_t swizzle_out[4]);
 
+extern uint32_t
+_mesa_format_to_array_format(mesa_format format);
+
+extern mesa_format
+_mesa_format_from_array_format(uint32_t array_format);
+
 extern GLboolean
 _mesa_is_format_compressed(mesa_format format);
 
@@ -490,6 +636,9 @@ _mesa_is_format_integer(mesa_format format);
 extern bool
 _mesa_is_format_etc2(mesa_format format);
 
+GLenum
+_mesa_is_format_color_format(mesa_format format);
+
 extern GLenum
 _mesa_get_format_color_encoding(mesa_format format);
 
diff --git a/mesalib/src/mesa/main/framebuffer.c b/mesalib/src/mesa/main/framebuffer.c
index 7416bb118..dc0386d23 100644
--- a/mesalib/src/mesa/main/framebuffer.c
+++ b/mesalib/src/mesa/main/framebuffer.c
@@ -345,7 +345,7 @@ update_framebuffer_size(struct gl_context *ctx, struct gl_framebuffer *fb)
       }
    }
 
-   if (minWidth != ~0) {
+   if (minWidth != ~0U) {
       fb->Width = minWidth;
       fb->Height = minHeight;
    }
diff --git a/mesalib/src/mesa/main/genmipmap.c b/mesalib/src/mesa/main/genmipmap.c
index 9d111cab2..9aef09019 100644
--- a/mesalib/src/mesa/main/genmipmap.c
+++ b/mesalib/src/mesa/main/genmipmap.c
@@ -36,21 +36,20 @@
 #include "mtypes.h"
 #include "teximage.h"
 #include "texobj.h"
-
+#include "hash.h"
 
 /**
- * Generate all the mipmap levels below the base level.
- * Note: this GL function would be more useful if one could specify a
- * cube face, a set of array slices, etc.
+ * Implements glGenerateMipmap and glGenerateTextureMipmap.
+ * Generates all the mipmap levels below the base level.
  */
-void GLAPIENTRY
-_mesa_GenerateMipmap(GLenum target)
+void
+_mesa_generate_texture_mipmap(struct gl_context *ctx,
+                              struct gl_texture_object *texObj, GLenum target,
+                              bool dsa)
 {
    struct gl_texture_image *srcImage;
-   struct gl_texture_object *texObj;
    GLboolean error;
-
-   GET_CURRENT_CONTEXT(ctx);
+   const char *suffix = dsa ? "Texture" : "";
 
    FLUSH_VERTICES(ctx, 0);
 
@@ -83,13 +82,11 @@ _mesa_GenerateMipmap(GLenum target)
    }
 
    if (error) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerateMipmapEXT(target=%s)",
-                  _mesa_lookup_enum_by_nr(target));
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGenerate%sMipmap(target=%s)",
+                  suffix, _mesa_lookup_enum_by_nr(target));
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
    if (texObj->BaseLevel >= texObj->MaxLevel) {
       /* nothing to do */
       return;
@@ -98,17 +95,17 @@ _mesa_GenerateMipmap(GLenum target)
    if (texObj->Target == GL_TEXTURE_CUBE_MAP &&
        !_mesa_cube_complete(texObj)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGenerateMipmap(incomplete cube map)");
+                  "glGenerate%sMipmap(incomplete cube map)", suffix);
       return;
    }
 
    _mesa_lock_texture(ctx, texObj);
 
-   srcImage = _mesa_select_tex_image(ctx, texObj, target, texObj->BaseLevel);
+   srcImage = _mesa_select_tex_image(texObj, target, texObj->BaseLevel);
    if (!srcImage) {
       _mesa_unlock_texture(ctx, texObj);
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGenerateMipmap(zero size base image)");
+                  "glGenerate%sMipmap(zero size base image)", suffix);
       return;
    }
 
@@ -117,19 +114,53 @@ _mesa_GenerateMipmap(GLenum target)
        _mesa_is_stencil_format(srcImage->InternalFormat)) {
       _mesa_unlock_texture(ctx, texObj);
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGenerateMipmap(invalid internal format)");
+                  "glGenerate%sMipmap(invalid internal format)", suffix);
       return;
    }
 
    if (target == GL_TEXTURE_CUBE_MAP) {
       GLuint face;
-      for (face = 0; face < 6; face++)
-	 ctx->Driver.GenerateMipmap(ctx,
-				    GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face,
-				    texObj);
+      for (face = 0; face < 6; face++) {
+         ctx->Driver.GenerateMipmap(ctx,
+                      GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB + face, texObj);
+      }
    }
    else {
       ctx->Driver.GenerateMipmap(ctx, target, texObj);
    }
    _mesa_unlock_texture(ctx, texObj);
 }
+
+/**
+ * Generate all the mipmap levels below the base level.
+ * Note: this GL function would be more useful if one could specify a
+ * cube face, a set of array slices, etc.
+ */
+void GLAPIENTRY
+_mesa_GenerateMipmap(GLenum target)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_generate_texture_mipmap(ctx, texObj, target, false);
+}
+
+/**
+ * Generate all the mipmap levels below the base level.
+ */
+void GLAPIENTRY
+_mesa_GenerateTextureMipmap(GLuint texture)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, "glGenerateTextureMipmap");
+   if (!texObj)
+      return;
+
+   _mesa_generate_texture_mipmap(ctx, texObj, texObj->Target, true);
+}
diff --git a/mesalib/src/mesa/main/genmipmap.h b/mesalib/src/mesa/main/genmipmap.h
index d546a8d7b..f4ef85951 100644
--- a/mesalib/src/mesa/main/genmipmap.h
+++ b/mesalib/src/mesa/main/genmipmap.h
@@ -28,9 +28,15 @@
 
 #include "glheader.h"
 
+extern void
+_mesa_generate_texture_mipmap(struct gl_context *ctx,
+                              struct gl_texture_object *texObj, GLenum target,
+                              bool dsa);
 
 extern void GLAPIENTRY
 _mesa_GenerateMipmap(GLenum target);
 
+extern void GLAPIENTRY
+_mesa_GenerateTextureMipmap(GLuint texture);
 
 #endif /* GENMIPMAP_H */
diff --git a/mesalib/src/mesa/main/get.c b/mesalib/src/mesa/main/get.c
index 6091efc7f..3f9d74516 100644
--- a/mesalib/src/mesa/main/get.c
+++ b/mesalib/src/mesa/main/get.c
@@ -392,6 +392,7 @@ EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
 EXTRA_EXT(INTEL_performance_query);
 EXTRA_EXT(ARB_explicit_uniform_location);
 EXTRA_EXT(ARB_clip_control);
+EXTRA_EXT(EXT_polygon_offset_clamp);
 
 static const int
 extra_ARB_color_buffer_float_or_glcore[] = {
diff --git a/mesalib/src/mesa/main/get_hash_params.py b/mesalib/src/mesa/main/get_hash_params.py
index 09a61acc1..41cb2c17b 100644
--- a/mesalib/src/mesa/main/get_hash_params.py
+++ b/mesalib/src/mesa/main/get_hash_params.py
@@ -343,6 +343,7 @@ descriptor=[
 
 # GL_ARB_ES3_compatibility
   [ "MAX_ELEMENT_INDEX", "CONTEXT_INT64(Const.MaxElementIndex), extra_ARB_ES3_compatibility_api_es3"],
+  [ "PRIMITIVE_RESTART_FIXED_INDEX", "CONTEXT_BOOL(Array.PrimitiveRestartFixedIndex), extra_ARB_ES3_compatibility_api_es3" ],
 
 # GL_ARB_fragment_shader
   [ "MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents), extra_ARB_fragment_shader" ],
@@ -403,6 +404,11 @@ descriptor=[
   [ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ],
 ]},
 
+{ "apis": ["GL", "GL_CORE", "GLES3"], "params": [
+# GL_ARB_sampler_objects / GL 3.3 / GLES 3.0
+  [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
+]},
+
 # Remaining enums are only in OpenGL
 { "apis": ["GL", "GL_CORE"], "params": [
   [ "ACCUM_RED_BITS", "BUFFER_INT(Visual.accumRedBits), NO_EXTRA" ],
@@ -695,10 +701,6 @@ descriptor=[
   [ "SAMPLE_MASK", "CONTEXT_BOOL(Multisample.SampleMask), extra_ARB_texture_multisample" ],
   [ "MAX_SAMPLE_MASK_WORDS", "CONST(1), extra_ARB_texture_multisample" ],
 
-
-# GL_ARB_sampler_objects / GL 3.3
-  [ "SAMPLER_BINDING", "LOC_CUSTOM, TYPE_INT, GL_SAMPLER_BINDING, NO_EXTRA" ],
-
 # GL 3.0
   [ "CONTEXT_FLAGS", "CONTEXT_INT(Const.ContextFlags), extra_version_30" ],
 
@@ -811,6 +813,9 @@ descriptor=[
   [ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ],
   [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ],
   [ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array" ],
+
+# GL_EXT_polygon_offset_clamp
+  [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
 ]}
 
 ]
diff --git a/mesalib/src/mesa/main/glformats.c b/mesalib/src/mesa/main/glformats.c
index 00478f989..4e05229cf 100644
--- a/mesalib/src/mesa/main/glformats.c
+++ b/mesalib/src/mesa/main/glformats.c
@@ -27,7 +27,205 @@
 
 #include "context.h"
 #include "glformats.h"
+#include "formats.h"
+#include "enums.h"
+
+enum {
+   ZERO = 4,
+   ONE = 5
+};
+
+enum {
+   IDX_LUMINANCE = 0,
+   IDX_ALPHA,
+   IDX_INTENSITY,
+   IDX_LUMINANCE_ALPHA,
+   IDX_RGB,
+   IDX_RGBA,
+   IDX_RED,
+   IDX_GREEN,
+   IDX_BLUE,
+   IDX_BGR,
+   IDX_BGRA,
+   IDX_ABGR,
+   IDX_RG,
+   MAX_IDX
+};
+
+#define MAP1(x)       MAP4(x, ZERO, ZERO, ZERO)
+#define MAP2(x,y)     MAP4(x, y, ZERO, ZERO)
+#define MAP3(x,y,z)   MAP4(x, y, z, ZERO)
+#define MAP4(x,y,z,w) { x, y, z, w, ZERO, ONE }
+
+static const struct {
+   GLubyte format_idx;
+   GLubyte to_rgba[6];
+   GLubyte from_rgba[6];
+} mappings[MAX_IDX] =
+{
+   {
+      IDX_LUMINANCE,
+      MAP4(0,0,0,ONE),
+      MAP1(0)
+   },
+
+   {
+      IDX_ALPHA,
+      MAP4(ZERO, ZERO, ZERO, 0),
+      MAP1(3)
+   },
+
+   {
+      IDX_INTENSITY,
+      MAP4(0, 0, 0, 0),
+      MAP1(0),
+   },
+
+   {
+      IDX_LUMINANCE_ALPHA,
+      MAP4(0,0,0,1),
+      MAP2(0,3)
+   },
+
+   {
+      IDX_RGB,
+      MAP4(0,1,2,ONE),
+      MAP3(0,1,2)
+   },
+
+   {
+      IDX_RGBA,
+      MAP4(0,1,2,3),
+      MAP4(0,1,2,3),
+   },
+
+   {
+      IDX_RED,
+      MAP4(0, ZERO, ZERO, ONE),
+      MAP1(0),
+   },
+
+   {
+      IDX_GREEN,
+      MAP4(ZERO, 0, ZERO, ONE),
+      MAP1(1),
+   },
+
+   {
+      IDX_BLUE,
+      MAP4(ZERO, ZERO, 0, ONE),
+      MAP1(2),
+   },
+
+   {
+      IDX_BGR,
+      MAP4(2,1,0,ONE),
+      MAP3(2,1,0)
+   },
+
+   {
+      IDX_BGRA,
+      MAP4(2,1,0,3),
+      MAP4(2,1,0,3)
+   },
+
+   {
+      IDX_ABGR,
+      MAP4(3,2,1,0),
+      MAP4(3,2,1,0)
+   },
+
+   {
+      IDX_RG,
+      MAP4(0, 1, ZERO, ONE),
+      MAP2(0, 1)
+   },
+};
+
+/**
+ * Convert a GL image format enum to an IDX_* value (see above).
+ */
+static int
+get_map_idx(GLenum value)
+{
+   switch (value) {
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_INTEGER_EXT:
+      return IDX_LUMINANCE;
+   case GL_ALPHA:
+   case GL_ALPHA_INTEGER:
+      return IDX_ALPHA;
+   case GL_INTENSITY:
+      return IDX_INTENSITY;
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+      return IDX_LUMINANCE_ALPHA;
+   case GL_RGB:
+   case GL_RGB_INTEGER:
+      return IDX_RGB;
+   case GL_RGBA:
+   case GL_RGBA_INTEGER:
+      return IDX_RGBA;
+   case GL_RED:
+   case GL_RED_INTEGER:
+      return IDX_RED;
+   case GL_GREEN:
+      return IDX_GREEN;
+   case GL_BLUE:
+      return IDX_BLUE;
+   case GL_BGR:
+   case GL_BGR_INTEGER:
+      return IDX_BGR;
+   case GL_BGRA:
+   case GL_BGRA_INTEGER:
+      return IDX_BGRA;
+   case GL_ABGR_EXT:
+      return IDX_ABGR;
+   case GL_RG:
+   case GL_RG_INTEGER:
+      return IDX_RG;
+   default:
+      _mesa_problem(NULL, "Unexpected inFormat %s",
+                    _mesa_lookup_enum_by_nr(value));
+      return 0;
+   }
+}
 
+/**
+ * When promoting texture formats (see below) we need to compute the
+ * mapping of dest components back to source components.
+ * This function does that.
+ * \param inFormat  the incoming format of the texture
+ * \param outFormat  the final texture format
+ * \return map[6]  a full 6-component map
+ */
+void
+_mesa_compute_component_mapping(GLenum inFormat, GLenum outFormat, GLubyte *map)
+{
+   const int inFmt = get_map_idx(inFormat);
+   const int outFmt = get_map_idx(outFormat);
+   const GLubyte *in2rgba = mappings[inFmt].to_rgba;
+   const GLubyte *rgba2out = mappings[outFmt].from_rgba;
+   int i;
+
+   for (i = 0; i < 4; i++)
+      map[i] = in2rgba[rgba2out[i]];
+
+   map[ZERO] = ZERO;
+   map[ONE] = ONE;
+
+#if 0
+   printf("from %x/%s to %x/%s map %d %d %d %d %d %d\n",
+	  inFormat, _mesa_lookup_enum_by_nr(inFormat),
+	  outFormat, _mesa_lookup_enum_by_nr(outFormat),
+	  map[0],
+	  map[1],
+	  map[2],
+	  map[3],
+	  map[4],
+	  map[5]);
+#endif
+}
 
 /**
  * \return GL_TRUE if type is packed pixel type, GL_FALSE otherwise.
@@ -93,6 +291,7 @@ _mesa_sizeof_type(GLenum type)
    case GL_DOUBLE:
       return sizeof(GLdouble);
    case GL_HALF_FLOAT_ARB:
+   case GL_HALF_FLOAT_OES:
       return sizeof(GLhalfARB);
    case GL_FIXED:
       return sizeof(GLfixed);
@@ -125,6 +324,7 @@ _mesa_sizeof_packed_type(GLenum type)
    case GL_INT:
       return sizeof(GLint);
    case GL_HALF_FLOAT_ARB:
+   case GL_HALF_FLOAT_OES:
       return sizeof(GLhalfARB);
    case GL_FLOAT:
       return sizeof(GLfloat);
@@ -241,6 +441,7 @@ _mesa_bytes_per_pixel(GLenum format, GLenum type)
    case GL_FLOAT:
       return comps * sizeof(GLfloat);
    case GL_HALF_FLOAT_ARB:
+   case GL_HALF_FLOAT_OES:
       return comps * sizeof(GLhalfARB);
    case GL_UNSIGNED_BYTE_3_3_2:
    case GL_UNSIGNED_BYTE_2_3_3_REV:
@@ -258,18 +459,29 @@ _mesa_bytes_per_pixel(GLenum format, GLenum type)
          return -1;  /* error */
    case GL_UNSIGNED_SHORT_4_4_4_4:
    case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+      if (format == GL_RGBA || format == GL_BGRA || format == GL_ABGR_EXT ||
+          format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT)
+         return sizeof(GLushort);
+      else
+         return -1;
    case GL_UNSIGNED_SHORT_5_5_5_1:
    case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-      if (format == GL_RGBA || format == GL_BGRA || format == GL_ABGR_EXT ||
+      if (format == GL_RGBA || format == GL_BGRA ||
           format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT)
          return sizeof(GLushort);
       else
          return -1;
    case GL_UNSIGNED_INT_8_8_8_8:
    case GL_UNSIGNED_INT_8_8_8_8_REV:
+      if (format == GL_RGBA || format == GL_BGRA || format == GL_ABGR_EXT ||
+          format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT ||
+          format == GL_RGB)
+         return sizeof(GLuint);
+      else
+         return -1;
    case GL_UNSIGNED_INT_10_10_10_2:
    case GL_UNSIGNED_INT_2_10_10_10_REV:
-      if (format == GL_RGBA || format == GL_BGRA || format == GL_ABGR_EXT ||
+      if (format == GL_RGBA || format == GL_BGRA ||
           format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT ||
           format == GL_RGB)
          return sizeof(GLuint);
@@ -1403,12 +1615,8 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
 
    case GL_UNSIGNED_SHORT_4_4_4_4:
    case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-   case GL_UNSIGNED_SHORT_5_5_5_1:
-   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
    case GL_UNSIGNED_INT_8_8_8_8:
    case GL_UNSIGNED_INT_8_8_8_8_REV:
-   case GL_UNSIGNED_INT_10_10_10_2:
-   case GL_UNSIGNED_INT_2_10_10_10_REV:
       if (format == GL_RGBA ||
           format == GL_BGRA ||
           format == GL_ABGR_EXT) {
@@ -1418,6 +1626,20 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
           ctx->Extensions.ARB_texture_rgb10_a2ui) {
          break; /* OK */
       }
+      return GL_INVALID_OPERATION;
+
+   case GL_UNSIGNED_SHORT_5_5_5_1:
+   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+   case GL_UNSIGNED_INT_10_10_10_2:
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      if (format == GL_RGBA ||
+          format == GL_BGRA) {
+         break; /* OK */
+      }
+      if ((format == GL_RGBA_INTEGER_EXT || format == GL_BGRA_INTEGER_EXT) &&
+          ctx->Extensions.ARB_texture_rgb10_a2ui) {
+         break; /* OK */
+      }
       if (type == GL_UNSIGNED_INT_2_10_10_10_REV && format == GL_RGB &&
           ctx->API == API_OPENGLES2) {
          break; /* OK by GL_EXT_texture_type_2_10_10_10_REV */
@@ -1448,6 +1670,18 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
       }
       return GL_NO_ERROR;
 
+   case GL_HALF_FLOAT_OES:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+         return GL_NO_ERROR;
+      default:
+         return GL_INVALID_OPERATION;
+      }
+
    default:
       ; /* fall-through */
    }
@@ -1561,7 +1795,6 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
 
       case GL_RGBA:
       case GL_BGRA:
-      case GL_ABGR_EXT:
          switch (type) {
             case GL_BYTE:
             case GL_UNSIGNED_BYTE:
@@ -1584,6 +1817,25 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
                return GL_INVALID_ENUM;
          }
 
+      case GL_ABGR_EXT:
+         switch (type) {
+            case GL_BYTE:
+            case GL_UNSIGNED_BYTE:
+            case GL_SHORT:
+            case GL_UNSIGNED_SHORT:
+            case GL_INT:
+            case GL_UNSIGNED_INT:
+            case GL_FLOAT:
+            case GL_UNSIGNED_SHORT_4_4_4_4:
+            case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+            case GL_UNSIGNED_INT_8_8_8_8:
+            case GL_UNSIGNED_INT_8_8_8_8_REV:
+            case GL_HALF_FLOAT:
+               return GL_NO_ERROR;
+            default:
+               return GL_INVALID_ENUM;
+         }
+
       case GL_YCBCR_MESA:
          if (!ctx->Extensions.MESA_ycbcr_texture)
             return GL_INVALID_ENUM;
@@ -1782,7 +2034,8 @@ _mesa_es_error_check_format_and_type(GLenum format, GLenum type,
  * \return error code, or GL_NO_ERROR.
  */
 GLenum
-_mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
+_mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
+                                      GLenum format, GLenum type,
                                       GLenum internalFormat)
 {
    switch (format) {
@@ -1847,11 +2100,17 @@ _mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
          case GL_RGBA16F:
          case GL_RGBA32F:
             break;
+         case GL_RGBA:
+            if (ctx->Extensions.OES_texture_float && internalFormat == format)
+               break;
          default:
             return GL_INVALID_OPERATION;
          }
          break;
 
+      case GL_HALF_FLOAT_OES:
+         if (ctx->Extensions.OES_texture_half_float && internalFormat == format)
+            break;
       default:
          return GL_INVALID_OPERATION;
       }
@@ -1956,11 +2215,19 @@ _mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
          case GL_R11F_G11F_B10F:
          case GL_RGB9_E5:
             break;
+         case GL_RGB:
+            if (ctx->Extensions.OES_texture_float && internalFormat == format)
+               break;
          default:
             return GL_INVALID_OPERATION;
          }
          break;
 
+      case GL_HALF_FLOAT_OES:
+         if (!ctx->Extensions.OES_texture_half_float || internalFormat != format)
+            return GL_INVALID_OPERATION;
+         break;
+
       case GL_UNSIGNED_INT_2_10_10_10_REV:
          switch (internalFormat) {
          case GL_RGB: /* GL_EXT_texture_type_2_10_10_10_REV */
@@ -2200,10 +2467,289 @@ _mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
    case GL_ALPHA:
    case GL_LUMINANCE:
    case GL_LUMINANCE_ALPHA:
-      if (type != GL_UNSIGNED_BYTE || format != internalFormat)
-         return GL_INVALID_OPERATION;
-      break;
+      switch (type) {
+      case GL_FLOAT:
+         if (ctx->Extensions.OES_texture_float && internalFormat == format)
+            break;
+      case GL_HALF_FLOAT_OES:
+         if (ctx->Extensions.OES_texture_half_float && internalFormat == format)
+            break;
+      default:
+         if (type != GL_UNSIGNED_BYTE || format != internalFormat)
+            return GL_INVALID_OPERATION;
+      }
    }
 
    return GL_NO_ERROR;
 }
+
+static void
+set_swizzle(uint8_t *swizzle, int x, int y, int z, int w)
+{
+   swizzle[MESA_FORMAT_SWIZZLE_X] = x;
+   swizzle[MESA_FORMAT_SWIZZLE_Y] = y;
+   swizzle[MESA_FORMAT_SWIZZLE_Z] = z;
+   swizzle[MESA_FORMAT_SWIZZLE_W] = w;
+}
+
+static bool
+get_swizzle_from_gl_format(GLenum format, uint8_t *swizzle)
+{
+   switch (format) {
+   case GL_RGBA:
+   case GL_RGBA_INTEGER_EXT:
+      set_swizzle(swizzle, 0, 1, 2, 3);
+      return true;
+   case GL_BGRA:
+   case GL_BGRA_INTEGER_EXT:
+      set_swizzle(swizzle, 2, 1, 0, 3);
+      return true;
+   case GL_ABGR_EXT:
+      set_swizzle(swizzle, 3, 2, 1, 0);
+      return true;
+   case GL_RGB:
+   case GL_RGB_INTEGER_EXT:
+      set_swizzle(swizzle, 0, 1, 2, 5);
+      return true;
+   case GL_BGR:
+   case GL_BGR_INTEGER_EXT:
+      set_swizzle(swizzle, 2, 1, 0, 5);
+      return true;
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+      set_swizzle(swizzle, 0, 0, 0, 1);
+      return true;
+   case GL_RG:
+   case GL_RG_INTEGER:
+      set_swizzle(swizzle, 0, 1, 4, 5);
+      return true;
+   case GL_RED:
+   case GL_RED_INTEGER_EXT:
+      set_swizzle(swizzle, 0, 4, 4, 5);
+      return true;
+   case GL_GREEN:
+   case GL_GREEN_INTEGER_EXT:
+      set_swizzle(swizzle, 4, 0, 4, 5);
+      return true;
+   case GL_BLUE:
+   case GL_BLUE_INTEGER_EXT:
+      set_swizzle(swizzle, 4, 4, 0, 5);
+      return true;
+   case GL_ALPHA:
+   case GL_ALPHA_INTEGER_EXT:
+      set_swizzle(swizzle, 4, 4, 4, 0);
+      return true;
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_INTEGER_EXT:
+      set_swizzle(swizzle, 0, 0, 0, 5);
+      return true;
+   case GL_INTENSITY:
+      set_swizzle(swizzle, 0, 0, 0, 0);
+      return true;
+   default:
+      return false;
+   }
+}
+
+/**
+* Take an OpenGL format (GL_RGB, GL_RGBA, etc), OpenGL data type (GL_INT,
+* GL_FOAT, etc) and return a matching mesa_array_format or a mesa_format
+* otherwise (for non-array formats).
+*
+* This function will typically be used to compute a mesa format from a GL type
+* so we can then call _mesa_format_convert. This function does
+* not consider byte swapping, so it returns types assuming that no byte
+* swapping is involved. If byte swapping is involved then clients are supposed
+* to handle that on their side before calling _mesa_format_convert.
+*
+* This function returns an uint32_t that can pack a mesa_format or a
+* mesa_array_format. Clients must check the mesa array format bit
+* (MESA_ARRAY_FORMAT_BIT) on the return value to know if the returned
+* format is a mesa_array_format or a mesa_format.
+*/
+uint32_t
+_mesa_format_from_format_and_type(GLenum format, GLenum type)
+{
+   mesa_array_format array_format;
+
+   bool is_array_format = true;
+   uint8_t swizzle[4];
+   bool normalized = false, is_float = false, is_signed = false;
+   int num_channels = 0, type_size = 0;
+
+   /* Extract array format type information from the OpenGL data type */
+   switch (type) {
+   case GL_UNSIGNED_BYTE:
+      type_size = 1;
+      break;
+   case GL_BYTE:
+      type_size = 1;
+      is_signed = true;
+      break;
+   case GL_UNSIGNED_SHORT:
+      type_size = 2;
+      break;
+   case GL_SHORT:
+      type_size = 2;
+      is_signed = true;
+      break;
+   case GL_UNSIGNED_INT:
+      type_size = 4;
+      break;
+   case GL_INT:
+      type_size = 4;
+      is_signed = true;
+      break;
+   case GL_HALF_FLOAT:
+   case GL_HALF_FLOAT_OES:
+      type_size = 2;
+      is_signed = true;
+      is_float = true;
+      break;
+   case GL_FLOAT:
+      type_size = 4;
+      is_signed = true;
+      is_float = true;
+      break;
+   default:
+      is_array_format = false;
+      break;
+   }
+
+   /* Extract array format swizzle information from the OpenGL format */
+   if (is_array_format)
+      is_array_format = get_swizzle_from_gl_format(format, swizzle);
+
+   /* If this is an array format type after checking data type and format,
+    * create the array format
+    */
+   if (is_array_format) {
+      normalized = !_mesa_is_enum_format_integer(format);
+      num_channels = _mesa_components_in_format(format);
+
+      array_format =
+         MESA_ARRAY_FORMAT(type_size, is_signed, is_float,
+                           normalized, num_channels,
+                           swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+      if (!_mesa_little_endian())
+         array_format = _mesa_array_format_flip_channels(array_format);
+
+      return array_format;
+   }
+
+   /* Otherwise this is not an array format, so return the mesa_format
+    * matching the OpenGL format and data type
+    */
+   switch (type) {
+   case GL_UNSIGNED_SHORT_5_6_5:
+     if (format == GL_RGB)
+         return MESA_FORMAT_B5G6R5_UNORM;
+      else if (format == GL_BGR)
+         return MESA_FORMAT_R5G6B5_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_5_6_5_REV:
+      if (format == GL_RGB)
+         return MESA_FORMAT_R5G6B5_UNORM;
+      else if (format == GL_BGR)
+         return MESA_FORMAT_B5G6R5_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_4_4_4_4:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_A4B4G4R4_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_A4R4G4B4_UNORM;
+      else if (format == GL_ABGR_EXT)
+         return MESA_FORMAT_R4G4B4A4_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_R4G4B4A4_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_B4G4R4A4_UNORM;
+      else if (format == GL_ABGR_EXT)
+         return MESA_FORMAT_A4B4G4R4_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_5_5_5_1:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_A1B5G5R5_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_A1R5G5B5_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_R5G5B5A1_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_B5G5R5A1_UNORM;
+      break;
+   case GL_UNSIGNED_BYTE_3_3_2:
+      if (format == GL_RGB)
+         return MESA_FORMAT_B2G3R3_UNORM;
+      break;
+   case GL_UNSIGNED_BYTE_2_3_3_REV:
+      if (format == GL_RGB)
+         return MESA_FORMAT_R3G3B2_UNORM;
+      break;
+   case GL_UNSIGNED_INT_5_9_9_9_REV:
+      if (format == GL_RGB)
+         return MESA_FORMAT_R9G9B9E5_FLOAT;
+      break;
+   case GL_UNSIGNED_INT_10_10_10_2:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_A2B10G10R10_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A2B10G10R10_UINT;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_A2R10G10B10_UNORM;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A2R10G10B10_UINT;
+      break;
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      if (format == GL_RGB)
+         return MESA_FORMAT_R10G10B10X2_UNORM;
+      if (format == GL_RGBA)
+         return MESA_FORMAT_R10G10B10A2_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R10G10B10A2_UINT;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_B10G10R10A2_UNORM;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B10G10R10A2_UINT;
+      break;
+   case GL_UNSIGNED_INT_8_8_8_8:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_A8B8G8R8_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_A8R8G8B8_UNORM;
+      else if (format == GL_ABGR_EXT)
+         return MESA_FORMAT_R8G8B8A8_UNORM;
+      break;
+   case GL_UNSIGNED_INT_8_8_8_8_REV:
+      if (format == GL_RGBA)
+         return MESA_FORMAT_R8G8B8A8_UNORM;
+      else if (format == GL_BGRA)
+         return MESA_FORMAT_B8G8R8A8_UNORM;
+      else if (format == GL_ABGR_EXT)
+         return MESA_FORMAT_A8B8G8R8_UNORM;
+      break;
+   case GL_UNSIGNED_SHORT_8_8_MESA:
+      if (format == GL_YCBCR_MESA)
+         return MESA_FORMAT_YCBCR;
+      break;
+   case GL_UNSIGNED_SHORT_8_8_REV_MESA:
+      if (format == GL_YCBCR_MESA)
+         return MESA_FORMAT_YCBCR_REV;
+      break;
+   case GL_UNSIGNED_INT_10F_11F_11F_REV:
+      if (format == GL_RGB)
+         return MESA_FORMAT_R11G11B10_FLOAT;
+   default:
+      break;
+   }
+
+   /* If we got here it means that we could not find a Mesa format that
+    * matches the GL format/type provided. We may need to add a new Mesa
+    * format in that case.
+    */
+   unreachable("Unsupported format");
+}
diff --git a/mesalib/src/mesa/main/glformats.h b/mesalib/src/mesa/main/glformats.h
index 7b0321570..e1ecd64d5 100644
--- a/mesalib/src/mesa/main/glformats.h
+++ b/mesalib/src/mesa/main/glformats.h
@@ -35,6 +35,9 @@
 extern "C" {
 #endif
 
+extern void
+_mesa_compute_component_mapping(GLenum inFormat, GLenum outFormat, GLubyte *map);
+
 extern GLboolean
 _mesa_type_is_packed(GLenum type);
 
@@ -122,9 +125,12 @@ _mesa_es_error_check_format_and_type(GLenum format, GLenum type,
                                      unsigned dimensions);
 
 extern GLenum
-_mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
+_mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
+                                      GLenum format, GLenum type,
                                       GLenum internalFormat);
 
+extern uint32_t
+_mesa_format_from_format_and_type(GLenum format, GLenum type);
 
 #ifdef __cplusplus
 }
diff --git a/mesalib/src/mesa/main/hash.c b/mesalib/src/mesa/main/hash.c
index a8c796b9a..1a152ec34 100644
--- a/mesalib/src/mesa/main/hash.c
+++ b/mesalib/src/mesa/main/hash.c
@@ -277,7 +277,7 @@ _mesa_HashInsert_unlocked(struct _mesa_HashTable *table, GLuint key, void *data)
       if (entry) {
          entry->data = data;
       } else {
-         _mesa_hash_table_insert_with_hash(table->ht, hash, uint_key(key), data);
+         _mesa_hash_table_insert_pre_hashed(table->ht, hash, uint_key(key), data);
       }
    }
 }
diff --git a/mesalib/src/mesa/main/image.c b/mesalib/src/mesa/main/image.c
index 4ea5f04c9..e97b006e0 100644
--- a/mesalib/src/mesa/main/image.c
+++ b/mesalib/src/mesa/main/image.c
@@ -41,36 +41,45 @@
 
 
 /**
- * Flip the order of the 2 bytes in each word in the given array.
+ * Flip the order of the 2 bytes in each word in the given array (src) and
+ * store the result in another array (dst). For in-place byte-swapping this
+ * function can be called with the same array for src and dst.
  *
- * \param p array.
+ * \param dst the array where byte-swapped data will be stored.
+ * \param src the array with the source data we want to byte-swap.
  * \param n number of words.
  */
 void
-_mesa_swap2( GLushort *p, GLuint n )
+_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
 {
    GLuint i;
    for (i = 0; i < n; i++) {
-      p[i] = (p[i] >> 8) | ((p[i] << 8) & 0xff00);
+      dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00);
    }
 }
 
 
 
 /*
- * Flip the order of the 4 bytes in each word in the given array.
+ * Flip the order of the 4 bytes in each word in the given array (src) and
+ * store the result in another array (dst). For in-place byte-swapping this
+ * function can be called with the same array for src and dst.
+ *
+ * \param dst the array where byte-swapped data will be stored.
+ * \param src the array with the source data we want to byte-swap.
+ * \param n number of words.
  */
 void
-_mesa_swap4( GLuint *p, GLuint n )
+_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
 {
    GLuint i, a, b;
    for (i = 0; i < n; i++) {
-      b = p[i];
+      b = src[i];
       a =  (b >> 24)
 	| ((b >> 8) & 0xff00)
 	| ((b << 8) & 0xff0000)
 	| ((b << 24) & 0xff000000);
-      p[i] = a;
+      dst[i] = a;
    }
 }
 
@@ -142,7 +151,7 @@ _mesa_image_offset( GLuint dimensions,
       assert(format == GL_COLOR_INDEX || format == GL_STENCIL_INDEX);
 
       bytes_per_row = alignment
-                    * CEILING( comp_per_pixel*pixels_per_row, 8*alignment );
+                    * DIV_ROUND_UP( comp_per_pixel*pixels_per_row, 8*alignment );
 
       bytes_per_image = bytes_per_row * rows_per_image;
 
@@ -852,19 +861,21 @@ clip_left_or_bottom(GLint *srcX0, GLint *srcX1,
  */
 GLboolean
 _mesa_clip_blit(struct gl_context *ctx,
+                const struct gl_framebuffer *readFb,
+                const struct gl_framebuffer *drawFb,
                 GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                 GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1)
 {
    const GLint srcXmin = 0;
-   const GLint srcXmax = ctx->ReadBuffer->Width;
+   const GLint srcXmax = readFb->Width;
    const GLint srcYmin = 0;
-   const GLint srcYmax = ctx->ReadBuffer->Height;
+   const GLint srcYmax = readFb->Height;
 
    /* these include scissor bounds */
-   const GLint dstXmin = ctx->DrawBuffer->_Xmin;
-   const GLint dstXmax = ctx->DrawBuffer->_Xmax;
-   const GLint dstYmin = ctx->DrawBuffer->_Ymin;
-   const GLint dstYmax = ctx->DrawBuffer->_Ymax;
+   const GLint dstXmin = drawFb->_Xmin;
+   const GLint dstXmax = drawFb->_Xmax;
+   const GLint dstYmin = drawFb->_Ymin;
+   const GLint dstYmax = drawFb->_Ymax;
 
    /*
    printf("PreClipX:  src: %d .. %d  dst: %d .. %d\n",
diff --git a/mesalib/src/mesa/main/image.h b/mesalib/src/mesa/main/image.h
index abd84bf2f..501586bfb 100644
--- a/mesalib/src/mesa/main/image.h
+++ b/mesalib/src/mesa/main/image.h
@@ -28,15 +28,29 @@
 
 
 #include "glheader.h"
+#include "compiler.h"
 
 struct gl_context;
 struct gl_pixelstore_attrib;
+struct gl_framebuffer;
 
 extern void
-_mesa_swap2( GLushort *p, GLuint n );
+_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
 
 extern void
-_mesa_swap4( GLuint *p, GLuint n );
+_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+
+static inline void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+   _mesa_swap2_copy(p, p, n);
+}
+
+static inline void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+   _mesa_swap4_copy(p, p, n);
+}
 
 extern GLintptr
 _mesa_image_offset( GLuint dimensions,
@@ -127,6 +141,8 @@ _mesa_clip_to_region(GLint xmin, GLint ymin,
 
 extern GLboolean
 _mesa_clip_blit(struct gl_context *ctx,
+                const struct gl_framebuffer *readFb,
+                const struct gl_framebuffer *drawFb,
                 GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                 GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);
 
diff --git a/mesalib/src/mesa/main/imports.c b/mesalib/src/mesa/main/imports.c
index 6945c2f62..4f5a2d11f 100644
--- a/mesalib/src/mesa/main/imports.c
+++ b/mesalib/src/mesa/main/imports.c
@@ -94,7 +94,7 @@ _mesa_align_malloc(size_t bytes, unsigned long alignment)
 
    ASSERT( alignment > 0 );
 
-   ptr = malloc(bytes + alignment + sizeof(void *));
+   ptr = (uintptr_t)malloc(bytes + alignment + sizeof(void *));
    if (!ptr)
       return NULL;
 
@@ -143,7 +143,7 @@ _mesa_align_calloc(size_t bytes, unsigned long alignment)
 
    ASSERT( alignment > 0 );
 
-   ptr = calloc(1, bytes + alignment + sizeof(void *));
+   ptr = (uintptr_t)calloc(1, bytes + alignment + sizeof(void *));
    if (!ptr)
       return NULL;
 
diff --git a/mesalib/src/mesa/main/light.c b/mesalib/src/mesa/main/light.c
index d8ef8f258..e483b826e 100644
--- a/mesalib/src/mesa/main/light.c
+++ b/mesalib/src/mesa/main/light.c
@@ -30,7 +30,7 @@
 #include "enums.h"
 #include "light.h"
 #include "macros.h"
-#include "simple_list.h"
+#include "util/simple_list.h"
 #include "mtypes.h"
 #include "math/m_matrix.h"
 
@@ -1207,12 +1207,3 @@ _mesa_init_lighting( struct gl_context *ctx )
    ctx->_ForceEyeCoords = GL_FALSE;
    ctx->_ModelViewInvScale = 1.0;
 }
-
-
-/**
- * Deallocate malloc'd lighting state attached to given context.
- */
-void
-_mesa_free_lighting_data( struct gl_context *ctx )
-{
-}
diff --git a/mesalib/src/mesa/main/light.h b/mesalib/src/mesa/main/light.h
index c6fba2ea3..d009aa175 100644
--- a/mesalib/src/mesa/main/light.h
+++ b/mesalib/src/mesa/main/light.h
@@ -102,8 +102,6 @@ extern void _mesa_update_color_material( struct gl_context *ctx,
 
 extern void _mesa_init_lighting( struct gl_context *ctx );
 
-extern void _mesa_free_lighting_data( struct gl_context *ctx );
-
 extern void _mesa_allow_light_in_model( struct gl_context *ctx, GLboolean flag );
 
 #endif
diff --git a/mesalib/src/mesa/main/macros.h b/mesalib/src/mesa/main/macros.h
index cd5f2d6f2..cf1f0e9c9 100644
--- a/mesalib/src/mesa/main/macros.h
+++ b/mesalib/src/mesa/main/macros.h
@@ -31,6 +31,7 @@
 #ifndef MACROS_H
 #define MACROS_H
 
+#include "util/u_math.h"
 #include "imports.h"
 
 
@@ -274,14 +275,6 @@ COPY_4UBV(GLubyte dst[4], const GLubyte src[4])
 #endif
 }
 
-/** Copy a 4-element float vector */
-static inline void
-COPY_4FV(GLfloat dst[4], const GLfloat src[4])
-{
-   /* memcpy seems to be most efficient */
-   memcpy(dst, src, sizeof(GLfloat) * 4);
-}
-
 /** Copy \p SZ elements into a 4-element vector */
 #define COPY_SZ_4V(DST, SZ, SRC)  \
 do {                              \
@@ -373,15 +366,6 @@ do {                                   \
       (DST)[3] *= S;                   \
 } while (0)
 
-/** Assignment */
-#define ASSIGN_4V( V, V0, V1, V2, V3 )  \
-do {                                    \
-    V[0] = V0;                          \
-    V[1] = V1;                          \
-    V[2] = V2;                          \
-    V[3] = V3;                          \
-} while(0)
-
 /*@}*/
 
 
@@ -808,7 +792,7 @@ DIFFERENT_SIGNS(GLfloat x, GLfloat y)
 
 
 /** Compute ceiling of integer quotient of A divided by B. */
-#define CEILING( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
+#define DIV_ROUND_UP( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
 
 
 /** casts to silence warnings with some compilers */
diff --git a/mesalib/src/mesa/main/matrix.c b/mesalib/src/mesa/main/matrix.c
index 99a501321..0539caa47 100644
--- a/mesalib/src/mesa/main/matrix.c
+++ b/mesalib/src/mesa/main/matrix.c
@@ -690,7 +690,7 @@ free_matrix_stack( struct gl_matrix_stack *stack )
  */
 void _mesa_init_matrix( struct gl_context * ctx )
 {
-   GLint i;
+   GLuint i;
 
    /* Initialize matrix stacks */
    init_matrix_stack(&ctx->ModelviewMatrixStack, MAX_MODELVIEW_STACK_DEPTH,
@@ -701,7 +701,7 @@ void _mesa_init_matrix( struct gl_context * ctx )
       init_matrix_stack(&ctx->TextureMatrixStack[i], MAX_TEXTURE_STACK_DEPTH,
                         _NEW_TEXTURE_MATRIX);
    for (i = 0; i < Elements(ctx->ProgramMatrixStack); i++)
-      init_matrix_stack(&ctx->ProgramMatrixStack[i], 
+      init_matrix_stack(&ctx->ProgramMatrixStack[i],
 		        MAX_PROGRAM_MATRIX_STACK_DEPTH, _NEW_TRACK_MATRIX);
    ctx->CurrentStack = &ctx->ModelviewMatrixStack;
 
@@ -720,7 +720,7 @@ void _mesa_init_matrix( struct gl_context * ctx )
  */
 void _mesa_free_matrix_data( struct gl_context *ctx )
 {
-   GLint i;
+   GLuint i;
 
    free_matrix_stack(&ctx->ModelviewMatrixStack);
    free_matrix_stack(&ctx->ProjectionMatrixStack);
diff --git a/mesalib/src/mesa/main/mipmap.c b/mesalib/src/mesa/main/mipmap.c
index fdaa68282..75a12cd16 100644
--- a/mesalib/src/mesa/main/mipmap.c
+++ b/mesalib/src/mesa/main/mipmap.c
@@ -1901,7 +1901,7 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
       GLboolean success = GL_TRUE;
 
       /* get src image parameters */
-      srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      srcImage = _mesa_select_tex_image(texObj, target, level);
       ASSERT(srcImage);
       srcWidth = srcImage->Width;
       srcHeight = srcImage->Height;
@@ -2093,10 +2093,10 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
       GLint border;
       GLboolean nextLevel;
       GLuint temp_dst_row_stride, temp_dst_img_stride; /* in bytes */
-      GLuint i;
+      GLint i;
 
       /* get src image parameters */
-      srcImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      srcImage = _mesa_select_tex_image(texObj, target, level);
       ASSERT(srcImage);
       srcWidth = srcImage->Width;
       srcHeight = srcImage->Height;
@@ -2193,7 +2193,7 @@ _mesa_generate_mipmap(struct gl_context *ctx, GLenum target,
    GLint maxLevel;
 
    ASSERT(texObj);
-   srcImage = _mesa_select_tex_image(ctx, texObj, target, texObj->BaseLevel);
+   srcImage = _mesa_select_tex_image(texObj, target, texObj->BaseLevel);
    ASSERT(srcImage);
 
    maxLevel = _mesa_max_texture_levels(ctx, texObj->Target) - 1;
diff --git a/mesalib/src/mesa/main/mtypes.h b/mesalib/src/mesa/main/mtypes.h
index b95dfb9f7..6e9977309 100644
--- a/mesalib/src/mesa/main/mtypes.h
+++ b/mesalib/src/mesa/main/mtypes.h
@@ -41,7 +41,7 @@
 #include "main/config.h"
 #include "glapi/glapi.h"
 #include "math/m_matrix.h"	/* GLmatrix */
-#include "main/simple_list.h"	/* struct simple_node */
+#include "util/simple_list.h"	/* struct simple_node */
 #include "main/formats.h"       /* MESA_FORMAT_COUNT */
 
 
@@ -1004,6 +1004,7 @@ struct gl_polygon_attrib
    GLenum CullFaceMode;		/**< Culling mode GL_FRONT or GL_BACK */
    GLfloat OffsetFactor;	/**< Polygon offset factor, from user */
    GLfloat OffsetUnits;		/**< Polygon offset units, from user */
+   GLfloat OffsetClamp;		/**< Polygon offset clamp, from user */
    GLboolean OffsetPoint;	/**< Offset in GL_POINT mode */
    GLboolean OffsetLine;	/**< Offset in GL_LINE mode */
    GLboolean OffsetFill;	/**< Offset in GL_FILL mode */
@@ -1220,6 +1221,8 @@ struct gl_texture_object
    GLboolean Purgeable;        /**< Is the buffer purgeable under memory
                                     pressure? */
    GLboolean Immutable;        /**< GL_ARB_texture_storage */
+   GLboolean _IsFloat;         /**< GL_OES_float_texture */
+   GLboolean _IsHalfFloat;     /**< GL_OES_half_float_texture */
 
    GLuint MinLevel;            /**< GL_ARB_texture_view */
    GLuint MinLayer;            /**< GL_ARB_texture_view */
@@ -3031,6 +3034,8 @@ struct gl_shader_compiler_options
    GLboolean OptimizeForAOS;
 
    struct gl_sl_pragmas DefaultPragmas; /**< Default #pragma settings */
+
+   struct nir_shader_compiler_options *NirOptions;
 };
 
 
@@ -3069,6 +3074,9 @@ struct gl_query_state
    /** GL_ARB_timer_query */
    struct gl_query_object *TimeElapsed;
 
+   /** GL_ARB_pipeline_statistics_query */
+   struct gl_query_object *pipeline_stats[MAX_PIPELINE_STATISTICS];
+
    GLenum CondRenderMode;
 };
 
@@ -3455,6 +3463,17 @@ struct gl_constants
       GLuint Timestamp;
       GLuint PrimitivesGenerated;
       GLuint PrimitivesWritten;
+      GLuint VerticesSubmitted;
+      GLuint PrimitivesSubmitted;
+      GLuint VsInvocations;
+      GLuint TessPatches;
+      GLuint TessInvocations;
+      GLuint GsInvocations;
+      GLuint GsPrimitives;
+      GLuint FsInvocations;
+      GLuint ComputeInvocations;
+      GLuint ClInPrimitives;
+      GLuint ClOutPrimitives;
    } QueryCounterBits;
 
    GLuint MaxDrawBuffers;    /**< GL_ARB_draw_buffers */
@@ -3745,18 +3764,21 @@ struct gl_extensions
    GLboolean ARB_explicit_uniform_location;
    GLboolean ARB_geometry_shader4;
    GLboolean ARB_gpu_shader5;
+   GLboolean ARB_gpu_shader_fp64;
    GLboolean ARB_half_float_vertex;
    GLboolean ARB_instanced_arrays;
    GLboolean ARB_internalformat_query;
    GLboolean ARB_map_buffer_range;
    GLboolean ARB_occlusion_query;
    GLboolean ARB_occlusion_query2;
+   GLboolean ARB_pipeline_statistics_query;
    GLboolean ARB_point_sprite;
    GLboolean ARB_sample_shading;
    GLboolean ARB_seamless_cube_map;
    GLboolean ARB_shader_atomic_counters;
    GLboolean ARB_shader_bit_encoding;
    GLboolean ARB_shader_image_load_store;
+   GLboolean ARB_shader_precision;
    GLboolean ARB_shader_stencil_export;
    GLboolean ARB_shader_texture_lod;
    GLboolean ARB_shading_language_packing;
@@ -3764,6 +3786,7 @@ struct gl_extensions
    GLboolean ARB_shadow;
    GLboolean ARB_stencil_texturing;
    GLboolean ARB_sync;
+   GLboolean ARB_tessellation_shader;
    GLboolean ARB_texture_border_clamp;
    GLboolean ARB_texture_buffer_object;
    GLboolean ARB_texture_buffer_object_rgb32;
@@ -3810,6 +3833,7 @@ struct gl_extensions
    GLboolean EXT_packed_float;
    GLboolean EXT_pixel_buffer_object;
    GLboolean EXT_point_parameters;
+   GLboolean EXT_polygon_offset_clamp;
    GLboolean EXT_provoking_vertex;
    GLboolean EXT_shader_integer_mix;
    GLboolean EXT_stencil_two_side;
@@ -3832,6 +3856,7 @@ struct gl_extensions
    GLboolean OES_standard_derivatives;
    /* vendor extensions */
    GLboolean AMD_performance_monitor;
+   GLboolean AMD_pinned_memory;
    GLboolean AMD_seamless_cubemap_per_texture;
    GLboolean AMD_vertex_shader_layer;
    GLboolean AMD_vertex_shader_viewport_index;
@@ -3858,6 +3883,10 @@ struct gl_extensions
    GLboolean OES_draw_texture;
    GLboolean OES_depth_texture_cube_map;
    GLboolean OES_EGL_image_external;
+   GLboolean OES_texture_float;
+   GLboolean OES_texture_float_linear;
+   GLboolean OES_texture_half_float;
+   GLboolean OES_texture_half_float_linear;
    GLboolean OES_compressed_ETC1_RGB8_texture;
    GLboolean extension_sentinel;
    /** The extension string */
@@ -4369,6 +4398,12 @@ struct gl_context
    struct gl_buffer_object *AtomicBuffer;
 
    /**
+    * Object currently associated w/ the GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
+    * target.
+    */
+   struct gl_buffer_object *ExternalVirtualMemoryBuffer;
+
+   /**
     * Array of atomic counter buffer binding points.
     */
    struct gl_atomic_buffer_binding
@@ -4477,9 +4512,6 @@ extern int MESA_DEBUG_FLAGS;
 # define MESA_VERBOSE 0
 # define MESA_DEBUG_FLAGS 0
 # define MESA_FUNCTION "a function"
-# ifndef NDEBUG
-#  define NDEBUG
-# endif
 #endif
 
 
diff --git a/mesalib/src/mesa/main/multisample.c b/mesalib/src/mesa/main/multisample.c
index 1f3fa0c15..b696de9f2 100644
--- a/mesalib/src/mesa/main/multisample.c
+++ b/mesalib/src/mesa/main/multisample.c
@@ -150,6 +150,16 @@ GLenum
 _mesa_check_sample_count(struct gl_context *ctx, GLenum target,
                          GLenum internalFormat, GLsizei samples)
 {
+   /* Section 4.4 (Framebuffer objects) of the OpenGL 3.0 specification says:
+    *
+    *     "If internalformat is a signed or unsigned integer format and samples
+    *     is greater than zero, then the error INVALID_OPERATION is generated."
+    */
+   if (_mesa_is_gles3(ctx) && _mesa_is_enum_format_integer(internalFormat)) {
+      return GL_INVALID_OPERATION;
+   }
+
+
    /* If ARB_internalformat_query is supported, then treat its highest
     * returned sample count as the absolute maximum for this format; it is
     * allowed to exceed MAX_SAMPLES.
diff --git a/mesalib/src/mesa/main/pack.c b/mesalib/src/mesa/main/pack.c
index 649a74cce..2111a7604 100644
--- a/mesalib/src/mesa/main/pack.c
+++ b/mesalib/src/mesa/main/pack.c
@@ -53,8 +53,8 @@
 #include "pixeltransfer.h"
 #include "imports.h"
 #include "glformats.h"
-#include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
-#include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
+#include "format_utils.h"
+#include "format_pack.h"
 
 
 /**
@@ -98,7 +98,8 @@ void
 _mesa_unpack_polygon_stipple( const GLubyte *pattern, GLuint dest[32],
                               const struct gl_pixelstore_attrib *unpacking )
 {
-   GLubyte *ptrn = (GLubyte *) _mesa_unpack_bitmap(32, 32, pattern, unpacking);
+   GLubyte *ptrn = (GLubyte *) _mesa_unpack_image(2, 32, 32, 1, GL_COLOR_INDEX,
+                                                  GL_BITMAP, pattern, unpacking);
    if (ptrn) {
       /* Convert pattern from GLubytes to GLuints and handle big/little
        * endian differences
@@ -142,108 +143,6 @@ _mesa_pack_polygon_stipple( const GLuint pattern[32], GLubyte *dest,
 
 
 /*
- * Unpack bitmap data.  Resulting data will be in most-significant-bit-first
- * order with row alignment = 1 byte.
- */
-GLvoid *
-_mesa_unpack_bitmap( GLint width, GLint height, const GLubyte *pixels,
-                     const struct gl_pixelstore_attrib *packing )
-{
-   GLint bytes, row, width_in_bytes;
-   GLubyte *buffer, *dst;
-
-   if (!pixels)
-      return NULL;
-
-   /* Alloc dest storage */
-   bytes = ((width + 7) / 8 * height);
-   buffer = malloc( bytes );
-   if (!buffer)
-      return NULL;
-
-   width_in_bytes = CEILING( width, 8 );
-   dst = buffer;
-   for (row = 0; row < height; row++) {
-      const GLubyte *src = (const GLubyte *)
-         _mesa_image_address2d(packing, pixels, width, height,
-                               GL_COLOR_INDEX, GL_BITMAP, row, 0);
-      if (!src) {
-         free(buffer);
-         return NULL;
-      }
-
-      if ((packing->SkipPixels & 7) == 0) {
-         memcpy( dst, src, width_in_bytes );
-         if (packing->LsbFirst) {
-            flip_bytes( dst, width_in_bytes );
-         }
-      }
-      else {
-         /* handling SkipPixels is a bit tricky (no pun intended!) */
-         GLint i;
-         if (packing->LsbFirst) {
-            GLubyte srcMask = 1 << (packing->SkipPixels & 0x7);
-            GLubyte dstMask = 128;
-            const GLubyte *s = src;
-            GLubyte *d = dst;
-            *d = 0;
-            for (i = 0; i < width; i++) {
-               if (*s & srcMask) {
-                  *d |= dstMask;
-               }
-               if (srcMask == 128) {
-                  srcMask = 1;
-                  s++;
-               }
-               else {
-                  srcMask = srcMask << 1;
-               }
-               if (dstMask == 1) {
-                  dstMask = 128;
-                  d++;
-                  *d = 0;
-               }
-               else {
-                  dstMask = dstMask >> 1;
-               }
-            }
-         }
-         else {
-            GLubyte srcMask = 128 >> (packing->SkipPixels & 0x7);
-            GLubyte dstMask = 128;
-            const GLubyte *s = src;
-            GLubyte *d = dst;
-            *d = 0;
-            for (i = 0; i < width; i++) {
-               if (*s & srcMask) {
-                  *d |= dstMask;
-               }
-               if (srcMask == 1) {
-                  srcMask = 128;
-                  s++;
-               }
-               else {
-                  srcMask = srcMask >> 1;
-               }
-               if (dstMask == 1) {
-                  dstMask = 128;
-                  d++;
-                  *d = 0;
-               }
-               else {
-                  dstMask = dstMask >> 1;
-               }
-            }
-         }
-      }
-      dst += width_in_bytes;
-   }
-
-   return buffer;
-}
-
-
-/*
  * Pack bitmap data.
  */
 void
@@ -256,7 +155,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source,
    if (!source)
       return;
 
-   width_in_bytes = CEILING( width, 8 );
+   width_in_bytes = DIV_ROUND_UP( width, 8 );
    src = source;
    for (row = 0; row < height; row++) {
       GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing, dest,
@@ -333,2361 +232,6 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source,
 }
 
 
-/**
- * Get indexes of color components for a basic color format, such as
- * GL_RGBA, GL_RED, GL_LUMINANCE_ALPHA, etc.  Return -1 for indexes
- * that do not apply.
- */
-static void
-get_component_indexes(GLenum format,
-                      GLint *redIndex,
-                      GLint *greenIndex,
-                      GLint *blueIndex,
-                      GLint *alphaIndex,
-                      GLint *luminanceIndex,
-                      GLint *intensityIndex)
-{
-   *redIndex = -1;
-   *greenIndex = -1;
-   *blueIndex = -1;
-   *alphaIndex = -1;
-   *luminanceIndex = -1;
-   *intensityIndex = -1;
-
-   switch (format) {
-   case GL_LUMINANCE:
-   case GL_LUMINANCE_INTEGER_EXT:
-      *luminanceIndex = 0;
-      break;
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-      *luminanceIndex = 0;
-      *alphaIndex = 1;
-      break;
-   case GL_INTENSITY:
-      *intensityIndex = 0;
-      break;
-   case GL_RED:
-   case GL_RED_INTEGER_EXT:
-      *redIndex = 0;
-      break;
-   case GL_GREEN:
-   case GL_GREEN_INTEGER_EXT:
-      *greenIndex = 0;
-      break;
-   case GL_BLUE:
-   case GL_BLUE_INTEGER_EXT:
-      *blueIndex = 0;
-      break;
-   case GL_ALPHA:
-   case GL_ALPHA_INTEGER_EXT:
-      *alphaIndex = 0;
-      break;
-   case GL_RG:
-   case GL_RG_INTEGER:
-      *redIndex = 0;
-      *greenIndex = 1;
-      break;
-   case GL_RGB:
-   case GL_RGB_INTEGER_EXT:
-      *redIndex = 0;
-      *greenIndex = 1;
-      *blueIndex = 2;
-      break;
-   case GL_BGR:
-   case GL_BGR_INTEGER_EXT:
-      *blueIndex = 0;
-      *greenIndex = 1;
-      *redIndex = 2;
-      break;
-   case GL_RGBA:
-   case GL_RGBA_INTEGER_EXT:
-      *redIndex = 0;
-      *greenIndex = 1;
-      *blueIndex = 2;
-      *alphaIndex = 3;
-      break;
-   case GL_BGRA:
-   case GL_BGRA_INTEGER:
-      *redIndex = 2;
-      *greenIndex = 1;
-      *blueIndex = 0;
-      *alphaIndex = 3;
-      break;
-   case GL_ABGR_EXT:
-      *redIndex = 3;
-      *greenIndex = 2;
-      *blueIndex = 1;
-      *alphaIndex = 0;
-      break;
-   default:
-      assert(0 && "bad format in get_component_indexes()");
-   }
-}
-
-
-
-/**
- * For small integer types, return the min and max possible values.
- * Used for clamping floats to unscaled integer types.
- * \return GL_TRUE if type is handled, GL_FALSE otherwise.
- */
-static GLboolean
-get_type_min_max(GLenum type, GLfloat *min, GLfloat *max)
-{
-   switch (type) {
-   case GL_BYTE:
-      *min = -128.0;
-      *max = 127.0;
-      return GL_TRUE;
-   case GL_UNSIGNED_BYTE:
-      *min = 0.0;
-      *max = 255.0;
-      return GL_TRUE;
-   case GL_SHORT:
-      *min = -32768.0;
-      *max = 32767.0;
-      return GL_TRUE;
-   case GL_UNSIGNED_SHORT:
-      *min = 0.0;
-      *max = 65535.0;
-      return GL_TRUE;
-   default:
-      return GL_FALSE;
-   }
-}
-
-/* Customization of unsigned integer packing.
- */
-#define SRC_TYPE GLuint
-
-#define DST_TYPE GLuint
-#define SRC_CONVERT(x) (x)
-#define FN_NAME pack_uint_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLint
-#define SRC_CONVERT(x) MIN2(x, 0x7fffffff)
-#define FN_NAME pack_int_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLushort
-#define SRC_CONVERT(x) MIN2(x, 0xffff)
-#define FN_NAME pack_ushort_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLshort
-#define SRC_CONVERT(x) CLAMP((int)x, -32768, 32767)
-#define FN_NAME pack_short_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLubyte
-#define SRC_CONVERT(x) MIN2(x, 0xff)
-#define FN_NAME pack_ubyte_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLbyte
-#define SRC_CONVERT(x) CLAMP((int)x, -128, 127)
-#define FN_NAME pack_byte_from_uint_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#undef SRC_TYPE
-
-static void
-_pack_rgba_span_from_uints_problem(struct gl_context *ctx,
-                                   GLenum dstFormat, GLenum dstType)
-{
-   _mesa_problem(ctx,
-                 "Unsupported type (%s) / format (%s) "
-                 "in _mesa_pack_rgba_span_from_uints",
-                 _mesa_lookup_enum_by_nr(dstType),
-                 _mesa_lookup_enum_by_nr(dstFormat));
-}
-
-void
-_mesa_pack_rgba_span_from_uints(struct gl_context *ctx, GLuint n, GLuint rgba[][4],
-                                GLenum dstFormat, GLenum dstType,
-                                GLvoid *dstAddr)
-{
-   GLuint i;
-
-   switch(dstType) {
-   case GL_UNSIGNED_INT:
-      pack_uint_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_INT:
-      pack_int_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_UNSIGNED_SHORT:
-      pack_ushort_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_SHORT:
-      pack_short_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_UNSIGNED_BYTE:
-      pack_ubyte_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_BYTE:
-      pack_byte_from_uint_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_UNSIGNED_BYTE_3_3_2:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLubyte *dst = (GLubyte *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 7) << 5)
-                   | (MIN2(rgba[i][GCOMP], 7) << 2)
-                   | (MIN2(rgba[i][BCOMP], 3)     );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_BYTE_2_3_3_REV:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLubyte *dst = (GLubyte *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 7)     )
-                   | (MIN2(rgba[i][GCOMP], 7) << 3)
-                   | (MIN2(rgba[i][BCOMP], 3) << 6);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_6_5:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 31) << 11)
-                   | (MIN2(rgba[i][GCOMP], 63) <<  5)
-                   | (MIN2(rgba[i][BCOMP], 31)      );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_6_5_REV:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 31)      )
-                   | (MIN2(rgba[i][GCOMP], 63) <<  5)
-                   | (MIN2(rgba[i][BCOMP], 31) << 11);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_4_4_4_4:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 15) << 12)
-                   | (MIN2(rgba[i][GCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][BCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][ACOMP], 15)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 15) << 12)
-                   | (MIN2(rgba[i][GCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][RCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][ACOMP], 15)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 15) << 12)
-                   | (MIN2(rgba[i][BCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][GCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][RCOMP], 15)      );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 15)      )
-                   | (MIN2(rgba[i][GCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][BCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][ACOMP], 15) << 12);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 15)      )
-                   | (MIN2(rgba[i][GCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][RCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][ACOMP], 15) << 12);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 15)      )
-                   | (MIN2(rgba[i][BCOMP], 15) <<  4)
-                   | (MIN2(rgba[i][GCOMP], 15) <<  8)
-                   | (MIN2(rgba[i][RCOMP], 15) << 12);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_5_5_1:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 31) << 11)
-                   | (MIN2(rgba[i][GCOMP], 31) <<  6)
-                   | (MIN2(rgba[i][BCOMP], 31) <<  1)
-                   | (MIN2(rgba[i][ACOMP],  1)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 31) << 11)
-                   | (MIN2(rgba[i][GCOMP], 31) <<  6)
-                   | (MIN2(rgba[i][RCOMP], 31) <<  1)
-                   | (MIN2(rgba[i][ACOMP],  1)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 31) << 11)
-                   | (MIN2(rgba[i][BCOMP], 31) <<  6)
-                   | (MIN2(rgba[i][GCOMP], 31) <<  1)
-                   | (MIN2(rgba[i][RCOMP],  1)      );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 31)      )
-                   | (MIN2(rgba[i][GCOMP], 31) <<  5)
-                   | (MIN2(rgba[i][BCOMP], 31) << 10)
-                   | (MIN2(rgba[i][ACOMP],  1) << 15);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 31)      )
-                   | (MIN2(rgba[i][GCOMP], 31) <<  5)
-                   | (MIN2(rgba[i][RCOMP], 31) << 10)
-                   | (MIN2(rgba[i][ACOMP],  1) << 15);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 31)      )
-                   | (MIN2(rgba[i][BCOMP], 31) <<  5)
-                   | (MIN2(rgba[i][GCOMP], 31) << 10)
-                   | (MIN2(rgba[i][RCOMP],  1) << 15);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 255) << 24)
-                   | (MIN2(rgba[i][GCOMP], 255) << 16)
-                   | (MIN2(rgba[i][BCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][ACOMP], 255)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 255) << 24)
-                   | (MIN2(rgba[i][GCOMP], 255) << 16)
-                   | (MIN2(rgba[i][RCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][ACOMP], 255)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 255) << 24)
-                   | (MIN2(rgba[i][BCOMP], 255) << 16)
-                   | (MIN2(rgba[i][GCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][RCOMP], 255)      );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 255)      )
-                   | (MIN2(rgba[i][GCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][BCOMP], 255) << 16)
-                   | (MIN2(rgba[i][ACOMP], 255) << 24);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 255)      )
-                   | (MIN2(rgba[i][GCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][RCOMP], 255) << 16)
-                   | (MIN2(rgba[i][ACOMP], 255) << 24);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 255)      )
-                   | (MIN2(rgba[i][BCOMP], 255) <<  8)
-                   | (MIN2(rgba[i][GCOMP], 255) << 16)
-                   | (MIN2(rgba[i][RCOMP], 255) << 24);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_10_10_10_2:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 1023) << 22)
-                   | (MIN2(rgba[i][GCOMP], 1023) << 12)
-                   | (MIN2(rgba[i][BCOMP], 1023) <<  2)
-                   | (MIN2(rgba[i][ACOMP],    3)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 1023) << 22)
-                   | (MIN2(rgba[i][GCOMP], 1023) << 12)
-                   | (MIN2(rgba[i][RCOMP], 1023) <<  2)
-                   | (MIN2(rgba[i][ACOMP],    3)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 1023) << 22)
-                   | (MIN2(rgba[i][BCOMP], 1023) << 12)
-                   | (MIN2(rgba[i][GCOMP], 1023) <<  2)
-                   | (MIN2(rgba[i][RCOMP],    3)      );
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_2_10_10_10_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][RCOMP], 1023)      )
-                   | (MIN2(rgba[i][GCOMP], 1023) << 10)
-                   | (MIN2(rgba[i][BCOMP], 1023) << 20)
-                   | (MIN2(rgba[i][ACOMP],    3) << 30);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][BCOMP], 1023)      )
-                   | (MIN2(rgba[i][GCOMP], 1023) << 10)
-                   | (MIN2(rgba[i][RCOMP], 1023) << 20)
-                   | (MIN2(rgba[i][ACOMP],    3) << 30);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (MIN2(rgba[i][ACOMP], 1023)      )
-                   | (MIN2(rgba[i][BCOMP], 1023) << 10)
-                   | (MIN2(rgba[i][GCOMP], 1023) << 20)
-                   | (MIN2(rgba[i][RCOMP],    3) << 30);
-         }
-      } else {
-         _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   default:
-      _pack_rgba_span_from_uints_problem(ctx, dstFormat, dstType);
-      return;
-   }
-}
-
-
-/* Customization of signed integer packing.
- */
-#define SRC_TYPE GLint
-
-#define DST_TYPE GLuint
-#define SRC_CONVERT(x) MAX2(x, 0)
-#define FN_NAME pack_uint_from_int_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLushort
-#define SRC_CONVERT(x) MAX2(x, 0)
-#define FN_NAME pack_ushort_from_int_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLshort
-#define SRC_CONVERT(x) CLAMP(x, -0x8000, 0x7fff)
-#define FN_NAME pack_short_from_int_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLubyte
-#define SRC_CONVERT(x) MAX2(x, 0)
-#define FN_NAME pack_ubyte_from_int_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#define DST_TYPE GLbyte
-#define SRC_CONVERT(x) CLAMP(x, -0x80, 0x7f)
-#define FN_NAME pack_byte_from_int_rgba
-#include "pack_tmp.h"
-#undef DST_TYPE
-#undef SRC_CONVERT
-#undef FN_NAME
-
-#undef SRC_TYPE
-
-static void
-_pack_rgba_span_from_ints_problem(struct gl_context *ctx,
-                                   GLenum dstFormat, GLenum dstType)
-{
-   _mesa_problem(ctx,
-                 "Unsupported type (%s) / format (%s) "
-                 "in _mesa_pack_rgba_span_from_ints",
-                 _mesa_lookup_enum_by_nr(dstType),
-                 _mesa_lookup_enum_by_nr(dstFormat));
-}
-
-void
-_mesa_pack_rgba_span_from_ints(struct gl_context *ctx, GLuint n, GLint rgba[][4],
-                               GLenum dstFormat, GLenum dstType,
-                               GLvoid *dstAddr)
-{
-   GLuint i;
-
-   switch(dstType) {
-   case GL_UNSIGNED_INT:
-      pack_uint_from_int_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_INT:
-      /* No conversion necessary. */
-      pack_uint_from_uint_rgba(ctx, dstAddr, dstFormat, (GLuint (*)[4]) rgba, n);
-      break;
-   case GL_UNSIGNED_SHORT:
-      pack_ushort_from_int_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_SHORT:
-      pack_short_from_int_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_UNSIGNED_BYTE:
-      pack_ubyte_from_int_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_BYTE:
-      pack_byte_from_int_rgba(ctx, dstAddr, dstFormat, rgba, n);
-      break;
-   case GL_UNSIGNED_BYTE_3_3_2:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLubyte *dst = (GLubyte *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 7) << 5)
-                   | (CLAMP(rgba[i][GCOMP], 0, 7) << 2)
-                   | (CLAMP(rgba[i][BCOMP], 0, 3)     );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_BYTE_2_3_3_REV:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLubyte *dst = (GLubyte *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 7)     )
-                   | (CLAMP(rgba[i][GCOMP], 0, 7) << 3)
-                   | (CLAMP(rgba[i][BCOMP], 0, 3) << 6);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_6_5:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 31) << 11)
-                   | (CLAMP(rgba[i][GCOMP], 0, 63) <<  5)
-                   | (CLAMP(rgba[i][BCOMP], 0, 31)      );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_6_5_REV:
-      if ((dstFormat == GL_RGB) || (dstFormat == GL_RGB_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 31)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 63) <<  5)
-                   | (CLAMP(rgba[i][BCOMP], 0, 31) << 11);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_4_4_4_4:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 15) << 12)
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][BCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][ACOMP], 0, 15)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 15) << 12)
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][RCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][ACOMP], 0, 15)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 15) << 12)
-                   | (CLAMP(rgba[i][BCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][RCOMP], 0, 15)      );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 15)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][BCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][ACOMP], 0, 15) << 12);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 15)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][RCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][ACOMP], 0, 15) << 12);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 15)      )
-                   | (CLAMP(rgba[i][BCOMP], 0, 15) <<  4)
-                   | (CLAMP(rgba[i][GCOMP], 0, 15) <<  8)
-                   | (CLAMP(rgba[i][RCOMP], 0, 15) << 12);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_5_5_5_1:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 31) << 11)
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) <<  6)
-                   | (CLAMP(rgba[i][BCOMP], 0, 31) <<  1)
-                   | (CLAMP(rgba[i][ACOMP], 0,  1)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 31) << 11)
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) <<  6)
-                   | (CLAMP(rgba[i][RCOMP], 0, 31) <<  1)
-                   | (CLAMP(rgba[i][ACOMP], 0,  1)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 31) << 11)
-                   | (CLAMP(rgba[i][BCOMP], 0, 31) <<  6)
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) <<  1)
-                   | (CLAMP(rgba[i][RCOMP], 0,  1)      );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 31)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) <<  5)
-                   | (CLAMP(rgba[i][BCOMP], 0, 31) << 10)
-                   | (CLAMP(rgba[i][ACOMP], 0,  1) << 15);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 31)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) <<  5)
-                   | (CLAMP(rgba[i][RCOMP], 0, 31) << 10)
-                   | (CLAMP(rgba[i][ACOMP], 0,  1) << 15);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLushort *dst = (GLushort *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 31)      )
-                   | (CLAMP(rgba[i][BCOMP], 0, 31) <<  5)
-                   | (CLAMP(rgba[i][GCOMP], 0, 31) << 10)
-                   | (CLAMP(rgba[i][RCOMP], 0,  1) << 15);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 255) << 24)
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][BCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][ACOMP], 0, 255)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 255) << 24)
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][RCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][ACOMP], 0, 255)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 255) << 24)
-                   | (CLAMP(rgba[i][BCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][RCOMP], 0, 255)      );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 255)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][BCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][ACOMP], 0, 255) << 24);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 255)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][RCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][ACOMP], 0, 255) << 24);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 255)      )
-                   | (CLAMP(rgba[i][BCOMP], 0, 255) <<  8)
-                   | (CLAMP(rgba[i][GCOMP], 0, 255) << 16)
-                   | (CLAMP(rgba[i][RCOMP], 0, 255) << 24);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_10_10_10_2:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 1023) << 22)
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) << 12)
-                   | (CLAMP(rgba[i][BCOMP], 0, 1023) <<  2)
-                   | (CLAMP(rgba[i][ACOMP], 0,    3)      );
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 1023) << 22)
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) << 12)
-                   | (CLAMP(rgba[i][RCOMP], 0, 1023) <<  2)
-                   | (CLAMP(rgba[i][ACOMP], 0,    3)      );
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 1023) << 22)
-                   | (CLAMP(rgba[i][BCOMP], 0, 1023) << 12)
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) <<  2)
-                   | (CLAMP(rgba[i][RCOMP], 0,    3)      );
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   case GL_UNSIGNED_INT_2_10_10_10_REV:
-      if ((dstFormat == GL_RGBA) || (dstFormat == GL_RGBA_INTEGER_EXT)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][RCOMP], 0, 1023)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) << 10)
-                   | (CLAMP(rgba[i][BCOMP], 0, 1023) << 20)
-                   | (CLAMP(rgba[i][ACOMP], 0,    3) << 30);
-         }
-      }
-      else if ((dstFormat == GL_BGRA) || (dstFormat == GL_BGRA_INTEGER)) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][BCOMP], 0, 1023)      )
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) << 10)
-                   | (CLAMP(rgba[i][RCOMP], 0, 1023) << 20)
-                   | (CLAMP(rgba[i][ACOMP], 0,    3) << 30);
-         }
-      }
-      else if (dstFormat == GL_ABGR_EXT) {
-         GLuint *dst = (GLuint *) dstAddr;
-         for (i=0;i<n;i++) {
-            dst[i] = (CLAMP(rgba[i][ACOMP], 0, 1023)      )
-                   | (CLAMP(rgba[i][BCOMP], 0, 1023) << 10)
-                   | (CLAMP(rgba[i][GCOMP], 0, 1023) << 20)
-                   | (CLAMP(rgba[i][RCOMP], 0,    3) << 30);
-         }
-      } else {
-         _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      }
-      break;
-   default:
-      _pack_rgba_span_from_ints_problem(ctx, dstFormat, dstType);
-      return;
-   }
-}
-
-
-/**
- * Used to pack an array [][4] of RGBA float colors as specified
- * by the dstFormat, dstType and dstPacking.  Used by glReadPixels.
- * Historically, the RGBA values were in [0,1] and rescaled to fit
- * into GLubytes, etc.  But with new integer formats, the RGBA values
- * may have any value and we don't always rescale when converting to
- * integers.
- *
- * Note: the rgba values will be modified by this function when any pixel
- * transfer ops are enabled.
- */
-void
-_mesa_pack_rgba_span_float(struct gl_context *ctx, GLuint n, GLfloat rgba[][4],
-                           GLenum dstFormat, GLenum dstType,
-                           GLvoid *dstAddr,
-                           const struct gl_pixelstore_attrib *dstPacking,
-                           GLbitfield transferOps)
-{
-   GLfloat *luminance;
-   const GLint comps = _mesa_components_in_format(dstFormat);
-   const GLboolean intDstFormat = _mesa_is_enum_format_integer(dstFormat);
-   GLuint i;
-
-   if (dstFormat == GL_LUMINANCE ||
-       dstFormat == GL_LUMINANCE_ALPHA ||
-       dstFormat == GL_LUMINANCE_INTEGER_EXT ||
-       dstFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT) {
-      luminance = malloc(n * sizeof(GLfloat));
-      if (!luminance) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing");
-         return;
-      }
-   }
-   else {
-      luminance = NULL;
-   }
-
-   /* EXT_texture_integer specifies no transfer ops on integer
-    * types in the resolved issues section. Just set them to 0
-    * for integer surfaces.
-    */
-   if (intDstFormat)
-      transferOps = 0;
-
-   if (transferOps) {
-      _mesa_apply_rgba_transfer_ops(ctx, transferOps, n, rgba);
-   }
-
-   /*
-    * Component clamping (besides clamping to [0,1] in
-    * _mesa_apply_rgba_transfer_ops()).
-    */
-   if (intDstFormat) {
-      /* clamping to dest type's min/max values */
-      GLfloat min, max;
-      if (get_type_min_max(dstType, &min, &max)) {
-         for (i = 0; i < n; i++) {
-            rgba[i][RCOMP] = CLAMP(rgba[i][RCOMP], min, max);
-            rgba[i][GCOMP] = CLAMP(rgba[i][GCOMP], min, max);
-            rgba[i][BCOMP] = CLAMP(rgba[i][BCOMP], min, max);
-            rgba[i][ACOMP] = CLAMP(rgba[i][ACOMP], min, max);
-         }
-      }
-   }
-   else if (dstFormat == GL_LUMINANCE || dstFormat == GL_LUMINANCE_ALPHA) {
-      /* compute luminance values */
-      if (transferOps & IMAGE_CLAMP_BIT) {
-         for (i = 0; i < n; i++) {
-            GLfloat sum = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
-            luminance[i] = CLAMP(sum, 0.0F, 1.0F);
-         }
-      }
-      else {
-         for (i = 0; i < n; i++) {
-            luminance[i] = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
-         }
-      }
-   }
-
-   /*
-    * Pack/store the pixels.  Ugh!  Lots of cases!!!
-    */
-   switch (dstType) {
-      case GL_UNSIGNED_BYTE:
-         {
-            GLubyte *dst = (GLubyte *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UBYTE(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UBYTE(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_UBYTE(luminance[i]);
-                     dst[i*2+1] = FLOAT_TO_UBYTE(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                     dst[i*2+1] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                     dst[i*3+1] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                     dst[i*4+1] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UBYTE(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                     dst[i*3+1] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                     dst[i*4+1] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UBYTE(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UBYTE(rgba[i][ACOMP]);
-                     dst[i*4+1] = FLOAT_TO_UBYTE(rgba[i][BCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UBYTE(rgba[i][GCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UBYTE(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLubyte) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLubyte) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLubyte) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLubyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLubyte) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLubyte) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLubyte) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLubyte) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLubyte) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLubyte) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLubyte) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLubyte) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLubyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLubyte) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLubyte) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLubyte) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLubyte) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLubyte) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLubyte) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLubyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLubyte) (rgba[i][RCOMP] +
-                                             rgba[i][GCOMP] +
-                                             rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLubyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLubyte) (rgba[i][RCOMP] +
-                                         rgba[i][GCOMP] +
-                                         rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_BYTE:
-         {
-            GLbyte *dst = (GLbyte *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_BYTE_TEX(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_BYTE_TEX(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_BYTE_TEX(luminance[i]);
-                     dst[i*2+1] = FLOAT_TO_BYTE_TEX(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                     dst[i*2+1] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                     dst[i*3+1] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                     dst[i*4+1] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                     dst[i*4+3] = FLOAT_TO_BYTE_TEX(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                     dst[i*3+1] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                     dst[i*4+1] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                     dst[i*4+3] = FLOAT_TO_BYTE_TEX(rgba[i][ACOMP]);
-                  }
-		  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_BYTE_TEX(rgba[i][ACOMP]);
-                     dst[i*4+1] = FLOAT_TO_BYTE_TEX(rgba[i][BCOMP]);
-                     dst[i*4+2] = FLOAT_TO_BYTE_TEX(rgba[i][GCOMP]);
-                     dst[i*4+3] = FLOAT_TO_BYTE_TEX(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLbyte) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLbyte) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLbyte) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLbyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLbyte) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLbyte) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLbyte) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLbyte) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLbyte) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLbyte) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLbyte) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLbyte) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLbyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLbyte) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLbyte) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLbyte) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLbyte) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLbyte) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLbyte) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLbyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLbyte) (rgba[i][RCOMP] +
-                                            rgba[i][GCOMP] +
-                                            rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLbyte) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLbyte) (rgba[i][RCOMP] +
-                                        rgba[i][GCOMP] +
-                                        rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT:
-         {
-            GLushort *dst = (GLushort *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     CLAMPED_FLOAT_TO_USHORT(dst[i], rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     CLAMPED_FLOAT_TO_USHORT(dst[i], rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     CLAMPED_FLOAT_TO_USHORT(dst[i], rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     CLAMPED_FLOAT_TO_USHORT(dst[i], rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     UNCLAMPED_FLOAT_TO_USHORT(dst[i], luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     UNCLAMPED_FLOAT_TO_USHORT(dst[i*2+0], luminance[i]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*2+1], rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*2+0], rgba[i][RCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*2+1], rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+0], rgba[i][RCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+1], rgba[i][GCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+2], rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+0], rgba[i][RCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+1], rgba[i][GCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+2], rgba[i][BCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+3], rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+0], rgba[i][BCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+1], rgba[i][GCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*3+2], rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+0], rgba[i][BCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+1], rgba[i][GCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+2], rgba[i][RCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+3], rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+0], rgba[i][ACOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+1], rgba[i][BCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+2], rgba[i][GCOMP]);
-                     CLAMPED_FLOAT_TO_USHORT(dst[i*4+3], rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLushort) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLushort) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLushort) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLushort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLushort) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLushort) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLushort) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLushort) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLushort) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLushort) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLushort) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLushort) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLushort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLushort) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLushort) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLushort) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLushort) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLushort) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLushort) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLushort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLushort) (rgba[i][RCOMP] +
-                                              rgba[i][GCOMP] +
-                                              rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLushort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLushort) (rgba[i][RCOMP] +
-                                          rgba[i][GCOMP] +
-                                          rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_SHORT:
-         {
-            GLshort *dst = (GLshort *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_SHORT_TEX(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_SHORT_TEX(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_SHORT_TEX(luminance[i]);
-                     dst[i*2+1] = FLOAT_TO_SHORT_TEX(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                     dst[i*2+1] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                     dst[i*3+1] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                     dst[i*4+1] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                     dst[i*4+3] = FLOAT_TO_SHORT_TEX(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                     dst[i*3+1] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                     dst[i*4+1] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                     dst[i*4+3] = FLOAT_TO_SHORT_TEX(rgba[i][ACOMP]);
-                  }
-		  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_SHORT_TEX(rgba[i][ACOMP]);
-                     dst[i*4+1] = FLOAT_TO_SHORT_TEX(rgba[i][BCOMP]);
-                     dst[i*4+2] = FLOAT_TO_SHORT_TEX(rgba[i][GCOMP]);
-                     dst[i*4+3] = FLOAT_TO_SHORT_TEX(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLshort) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLshort) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLshort) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLshort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLshort) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLshort) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLshort) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLshort) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLshort) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLshort) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLshort) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLshort) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLshort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLshort) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLshort) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLshort) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLshort) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLshort) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLshort) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLshort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLshort) (rgba[i][RCOMP] +
-                                             rgba[i][GCOMP] +
-                                             rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLshort) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLshort) (rgba[i][RCOMP] +
-                                         rgba[i][GCOMP] +
-                                         rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT:
-         {
-            GLuint *dst = (GLuint *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UINT(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_UINT(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_UINT(luminance[i]);
-                     dst[i*2+1] = FLOAT_TO_UINT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                     dst[i*2+1] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                     dst[i*3+1] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                     dst[i*4+1] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UINT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                     dst[i*3+1] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                     dst[i*4+1] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UINT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_UINT(rgba[i][ACOMP]);
-                     dst[i*4+1] = FLOAT_TO_UINT(rgba[i][BCOMP]);
-                     dst[i*4+2] = FLOAT_TO_UINT(rgba[i][GCOMP]);
-                     dst[i*4+3] = FLOAT_TO_UINT(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLuint) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLuint) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLuint) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLuint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLuint) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLuint) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLuint) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLuint) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLuint) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLuint) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLuint) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLuint) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLuint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLuint) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLuint) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLuint) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLuint) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLuint) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLuint) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLuint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLuint) (rgba[i][RCOMP] +
-                                            rgba[i][GCOMP] +
-                                            rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLuint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLuint) (rgba[i][RCOMP] +
-                                        rgba[i][GCOMP] +
-                                        rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_INT:
-         {
-            GLint *dst = (GLint *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_INT(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = FLOAT_TO_INT(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_INT(luminance[i]);
-                     dst[i*2+1] = FLOAT_TO_INT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                     dst[i*2+1] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                     dst[i*3+1] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                     dst[i*4+1] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                     dst[i*4+3] = FLOAT_TO_INT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                     dst[i*3+1] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                     dst[i*3+2] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                     dst[i*4+1] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                     dst[i*4+2] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                     dst[i*4+3] = FLOAT_TO_INT(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = FLOAT_TO_INT(rgba[i][ACOMP]);
-                     dst[i*4+1] = FLOAT_TO_INT(rgba[i][BCOMP]);
-                     dst[i*4+2] = FLOAT_TO_INT(rgba[i][GCOMP]);
-                     dst[i*4+3] = FLOAT_TO_INT(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_RED_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLint) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_GREEN_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLint) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_BLUE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLint) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG_INTEGER:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLint) rgba[i][RCOMP];
-                     dst[i*2+1] = (GLint) rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLint) rgba[i][RCOMP];
-                     dst[i*3+1] = (GLint) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLint) rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLint) rgba[i][RCOMP];
-                     dst[i*4+1] = (GLint) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLint) rgba[i][BCOMP];
-                     dst[i*4+3] = (GLint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = (GLint) rgba[i][BCOMP];
-                     dst[i*3+1] = (GLint) rgba[i][GCOMP];
-                     dst[i*3+2] = (GLint) rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = (GLint) rgba[i][BCOMP];
-                     dst[i*4+1] = (GLint) rgba[i][GCOMP];
-                     dst[i*4+2] = (GLint) rgba[i][RCOMP];
-                     dst[i*4+3] = (GLint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = (GLint) (rgba[i][RCOMP] +
-                                           rgba[i][GCOMP] +
-                                           rgba[i][BCOMP]);
-                     dst[i*2+1] = (GLint) rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i] = (GLint) (rgba[i][RCOMP] +
-                                       rgba[i][GCOMP] +
-                                       rgba[i][BCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_FLOAT:
-         {
-            GLfloat *dst = (GLfloat *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = rgba[i][RCOMP];
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = rgba[i][GCOMP];
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = rgba[i][BCOMP];
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = rgba[i][ACOMP];
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = luminance[i];
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = luminance[i];
-                     dst[i*2+1] = rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = rgba[i][RCOMP];
-                     dst[i*2+1] = rgba[i][GCOMP];
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = rgba[i][RCOMP];
-                     dst[i*3+1] = rgba[i][GCOMP];
-                     dst[i*3+2] = rgba[i][BCOMP];
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = rgba[i][RCOMP];
-                     dst[i*4+1] = rgba[i][GCOMP];
-                     dst[i*4+2] = rgba[i][BCOMP];
-                     dst[i*4+3] = rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = rgba[i][BCOMP];
-                     dst[i*3+1] = rgba[i][GCOMP];
-                     dst[i*3+2] = rgba[i][RCOMP];
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = rgba[i][BCOMP];
-                     dst[i*4+1] = rgba[i][GCOMP];
-                     dst[i*4+2] = rgba[i][RCOMP];
-                     dst[i*4+3] = rgba[i][ACOMP];
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = rgba[i][ACOMP];
-                     dst[i*4+1] = rgba[i][BCOMP];
-                     dst[i*4+2] = rgba[i][GCOMP];
-                     dst[i*4+3] = rgba[i][RCOMP];
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_HALF_FLOAT_ARB:
-         {
-            GLhalfARB *dst = (GLhalfARB *) dstAddr;
-            switch (dstFormat) {
-               case GL_RED:
-                  for (i=0;i<n;i++)
-                     dst[i] = _mesa_float_to_half(rgba[i][RCOMP]);
-                  break;
-               case GL_GREEN:
-                  for (i=0;i<n;i++)
-                     dst[i] = _mesa_float_to_half(rgba[i][GCOMP]);
-                  break;
-               case GL_BLUE:
-                  for (i=0;i<n;i++)
-                     dst[i] = _mesa_float_to_half(rgba[i][BCOMP]);
-                  break;
-               case GL_ALPHA:
-                  for (i=0;i<n;i++)
-                     dst[i] = _mesa_float_to_half(rgba[i][ACOMP]);
-                  break;
-               case GL_LUMINANCE:
-                  for (i=0;i<n;i++)
-                     dst[i] = _mesa_float_to_half(luminance[i]);
-                  break;
-               case GL_LUMINANCE_ALPHA:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = _mesa_float_to_half(luminance[i]);
-                     dst[i*2+1] = _mesa_float_to_half(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_RG:
-                  for (i=0;i<n;i++) {
-                     dst[i*2+0] = _mesa_float_to_half(rgba[i][RCOMP]);
-                     dst[i*2+1] = _mesa_float_to_half(rgba[i][GCOMP]);
-                  }
-                  break;
-               case GL_RGB:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = _mesa_float_to_half(rgba[i][RCOMP]);
-                     dst[i*3+1] = _mesa_float_to_half(rgba[i][GCOMP]);
-                     dst[i*3+2] = _mesa_float_to_half(rgba[i][BCOMP]);
-                  }
-                  break;
-               case GL_RGBA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = _mesa_float_to_half(rgba[i][RCOMP]);
-                     dst[i*4+1] = _mesa_float_to_half(rgba[i][GCOMP]);
-                     dst[i*4+2] = _mesa_float_to_half(rgba[i][BCOMP]);
-                     dst[i*4+3] = _mesa_float_to_half(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_BGR:
-                  for (i=0;i<n;i++) {
-                     dst[i*3+0] = _mesa_float_to_half(rgba[i][BCOMP]);
-                     dst[i*3+1] = _mesa_float_to_half(rgba[i][GCOMP]);
-                     dst[i*3+2] = _mesa_float_to_half(rgba[i][RCOMP]);
-                  }
-                  break;
-               case GL_BGRA:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = _mesa_float_to_half(rgba[i][BCOMP]);
-                     dst[i*4+1] = _mesa_float_to_half(rgba[i][GCOMP]);
-                     dst[i*4+2] = _mesa_float_to_half(rgba[i][RCOMP]);
-                     dst[i*4+3] = _mesa_float_to_half(rgba[i][ACOMP]);
-                  }
-                  break;
-               case GL_ABGR_EXT:
-                  for (i=0;i<n;i++) {
-                     dst[i*4+0] = _mesa_float_to_half(rgba[i][ACOMP]);
-                     dst[i*4+1] = _mesa_float_to_half(rgba[i][BCOMP]);
-                     dst[i*4+2] = _mesa_float_to_half(rgba[i][GCOMP]);
-                     dst[i*4+3] = _mesa_float_to_half(rgba[i][RCOMP]);
-                  }
-                  break;
-               default:
-                  _mesa_problem(ctx, "bad format in _mesa_pack_rgba_span\n");
-            }
-         }
-         break;
-      case GL_UNSIGNED_BYTE_3_3_2:
-         if (dstFormat == GL_RGB) {
-            GLubyte *dst = (GLubyte *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 7.0F) << 5)
-                      | (F_TO_I(rgba[i][GCOMP] * 7.0F) << 2)
-                      | (F_TO_I(rgba[i][BCOMP] * 3.0F)     );
-            }
-         }
-         break;
-      case GL_UNSIGNED_BYTE_2_3_3_REV:
-         if (dstFormat == GL_RGB) {
-            GLubyte *dst = (GLubyte *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 7.0F)     )
-                      | (F_TO_I(rgba[i][GCOMP] * 7.0F) << 3)
-                      | (F_TO_I(rgba[i][BCOMP] * 3.0F) << 6);
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5:
-         if (dstFormat == GL_RGB) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 31.0F) << 11)
-                      | (F_TO_I(rgba[i][GCOMP] * 63.0F) <<  5)
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-         if (dstFormat == GL_RGB) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 31.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 63.0F) <<  5)
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F) << 11);
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4:
-         if (dstFormat == GL_RGBA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 15.0F) << 12)
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][BCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][ACOMP] * 15.0F)      );
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 15.0F) << 12)
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][RCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][ACOMP] * 15.0F)      );
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 15.0F) << 12)
-                      | (F_TO_I(rgba[i][BCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][RCOMP] * 15.0F)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-         if (dstFormat == GL_RGBA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 15.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][BCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][ACOMP] * 15.0F) << 12);
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 15.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][RCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][ACOMP] * 15.0F) << 12);
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 15.0F)      )
-                      | (F_TO_I(rgba[i][BCOMP] * 15.0F) <<  4)
-                      | (F_TO_I(rgba[i][GCOMP] * 15.0F) <<  8)
-                      | (F_TO_I(rgba[i][RCOMP] * 15.0F) << 12);
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_5_5_1:
-         if (dstFormat == GL_RGBA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 31.0F) << 11)
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) <<  6)
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F) <<  1)
-                      | (F_TO_I(rgba[i][ACOMP] *  1.0F)      );
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 31.0F) << 11)
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) <<  6)
-                      | (F_TO_I(rgba[i][RCOMP] * 31.0F) <<  1)
-                      | (F_TO_I(rgba[i][ACOMP] *  1.0F)      );
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 31.0F) << 11)
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F) <<  6)
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) <<  1)
-                      | (F_TO_I(rgba[i][RCOMP] *  1.0F)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-         if (dstFormat == GL_RGBA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 31.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) <<  5)
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F) << 10)
-                      | (F_TO_I(rgba[i][ACOMP] *  1.0F) << 15);
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 31.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) <<  5)
-                      | (F_TO_I(rgba[i][RCOMP] * 31.0F) << 10)
-                      | (F_TO_I(rgba[i][ACOMP] *  1.0F) << 15);
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLushort *dst = (GLushort *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 31.0F)      )
-                      | (F_TO_I(rgba[i][BCOMP] * 31.0F) <<  5)
-                      | (F_TO_I(rgba[i][GCOMP] * 31.0F) << 10)
-                      | (F_TO_I(rgba[i][RCOMP] *  1.0F) << 15);
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8:
-         if (dstFormat == GL_RGBA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 255.F) << 24)
-                      | (F_TO_I(rgba[i][GCOMP] * 255.F) << 16)
-                      | (F_TO_I(rgba[i][BCOMP] * 255.F) <<  8)
-                      | (F_TO_I(rgba[i][ACOMP] * 255.F)      );
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 255.F) << 24)
-                      | (F_TO_I(rgba[i][GCOMP] * 255.F) << 16)
-                      | (F_TO_I(rgba[i][RCOMP] * 255.F) <<  8)
-                      | (F_TO_I(rgba[i][ACOMP] * 255.F)      );
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 255.F) << 24)
-                      | (F_TO_I(rgba[i][BCOMP] * 255.F) << 16)
-                      | (F_TO_I(rgba[i][GCOMP] * 255.F) <<  8)
-                      | (F_TO_I(rgba[i][RCOMP] * 255.F)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-         if (dstFormat == GL_RGBA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 255.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 255.0F) <<  8)
-                      | (F_TO_I(rgba[i][BCOMP] * 255.0F) << 16)
-                      | (F_TO_I(rgba[i][ACOMP] * 255.0F) << 24);
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 255.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 255.0F) <<  8)
-                      | (F_TO_I(rgba[i][RCOMP] * 255.0F) << 16)
-                      | (F_TO_I(rgba[i][ACOMP] * 255.0F) << 24);
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 255.0F)      )
-                      | (F_TO_I(rgba[i][BCOMP] * 255.0F) <<  8)
-                      | (F_TO_I(rgba[i][GCOMP] * 255.0F) << 16)
-                      | (F_TO_I(rgba[i][RCOMP] * 255.0F) << 24);
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10_10_10_2:
-         if (dstFormat == GL_RGBA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 1023.0F) << 22)
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) << 12)
-                      | (F_TO_I(rgba[i][BCOMP] * 1023.0F) <<  2)
-                      | (F_TO_I(rgba[i][ACOMP] *    3.0F)      );
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 1023.0F) << 22)
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) << 12)
-                      | (F_TO_I(rgba[i][RCOMP] * 1023.0F) <<  2)
-                      | (F_TO_I(rgba[i][ACOMP] *    3.0F)      );
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 1023.0F) << 22)
-                      | (F_TO_I(rgba[i][BCOMP] * 1023.0F) << 12)
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) <<  2)
-                      | (F_TO_I(rgba[i][RCOMP] *    3.0F)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_2_10_10_10_REV:
-         if (dstFormat == GL_RGBA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][RCOMP] * 1023.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) << 10)
-                      | (F_TO_I(rgba[i][BCOMP] * 1023.0F) << 20)
-                      | (F_TO_I(rgba[i][ACOMP] *    3.0F) << 30);
-            }
-         }
-         else if (dstFormat == GL_BGRA) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][BCOMP] * 1023.0F)      )
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) << 10)
-                      | (F_TO_I(rgba[i][RCOMP] * 1023.0F) << 20)
-                      | (F_TO_I(rgba[i][ACOMP] *    3.0F) << 30);
-            }
-         }
-         else if (dstFormat == GL_ABGR_EXT) {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i=0;i<n;i++) {
-               dst[i] = (F_TO_I(rgba[i][ACOMP] * 1023.0F)      )
-                      | (F_TO_I(rgba[i][BCOMP] * 1023.0F) << 10)
-                      | (F_TO_I(rgba[i][GCOMP] * 1023.0F) << 20)
-                      | (F_TO_I(rgba[i][RCOMP] *    3.0F) << 30);
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_5_9_9_9_REV:
-         {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i = 0; i < n; i++) {
-               dst[i] = float3_to_rgb9e5(rgba[i]);
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10F_11F_11F_REV:
-         {
-            GLuint *dst = (GLuint *) dstAddr;
-            for (i = 0; i < n; i++) {
-               dst[i] = float3_to_r11g11b10f(rgba[i]);
-            }
-         }
-         break;
-      default:
-         _mesa_problem(ctx, "bad type in _mesa_pack_rgba_span_float");
-         free(luminance);
-         return;
-   }
-
-   if (dstPacking->SwapBytes) {
-      GLint swapSize = _mesa_sizeof_packed_type(dstType);
-      if (swapSize == 2) {
-         _mesa_swap2((GLushort *) dstAddr, n * comps);
-      }
-      else if (swapSize == 4) {
-         _mesa_swap4((GLuint *) dstAddr, n * comps);
-      }
-   }
-
-   free(luminance);
-}
-
-
-
 #define SWAP2BYTE(VALUE)			\
    {						\
       GLubyte *bytes = (GLubyte *) &(VALUE);	\
@@ -2724,6 +268,7 @@ extract_uint_indexes(GLuint n, GLuint indexes[],
           srcType == GL_INT ||
           srcType == GL_UNSIGNED_INT_24_8_EXT ||
           srcType == GL_HALF_FLOAT_ARB ||
+          srcType == GL_HALF_FLOAT_OES ||
           srcType == GL_FLOAT ||
           srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
 
@@ -2863,6 +408,7 @@ extract_uint_indexes(GLuint n, GLuint indexes[],
          }
          break;
       case GL_HALF_FLOAT_ARB:
+      case GL_HALF_FLOAT_OES:
          {
             GLuint i;
             const GLhalfARB *s = (const GLhalfARB *) src;
@@ -2921,743 +467,6 @@ extract_uint_indexes(GLuint n, GLuint indexes[],
 }
 
 
-/**
- * Return source/dest RGBA indexes for unpacking pixels.
- */
-static void
-get_component_mapping(GLenum format,
-                      GLint *rSrc,
-                      GLint *gSrc,
-                      GLint *bSrc,
-                      GLint *aSrc,
-                      GLint *rDst,
-                      GLint *gDst,
-                      GLint *bDst,
-                      GLint *aDst)
-{
-   switch (format) {
-   case GL_RED:
-   case GL_RED_INTEGER_EXT:
-      *rSrc = 0;
-      *gSrc = *bSrc = *aSrc = -1;
-      break;
-   case GL_GREEN:
-   case GL_GREEN_INTEGER_EXT:
-      *gSrc = 0;
-      *rSrc = *bSrc = *aSrc = -1;
-      break;
-   case GL_BLUE:
-   case GL_BLUE_INTEGER_EXT:
-      *bSrc = 0;
-      *rSrc = *gSrc = *aSrc = -1;
-      break;
-   case GL_ALPHA:
-   case GL_ALPHA_INTEGER_EXT:
-      *rSrc = *gSrc = *bSrc = -1;
-      *aSrc = 0;
-      break;
-   case GL_LUMINANCE:
-   case GL_LUMINANCE_INTEGER_EXT:
-      *rSrc = *gSrc = *bSrc = 0;
-      *aSrc = -1;
-      break;
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-      *rSrc = *gSrc = *bSrc = 0;
-      *aSrc = 1;
-      break;
-   case GL_INTENSITY:
-      *rSrc = *gSrc = *bSrc = *aSrc = 0;
-      break;
-   case GL_RG:
-   case GL_RG_INTEGER:
-      *rSrc = 0;
-      *gSrc = 1;
-      *bSrc = -1;
-      *aSrc = -1;
-      *rDst = 0;
-      *gDst = 1;
-      *bDst = 2;
-      *aDst = 3;
-      break;
-   case GL_RGB:
-   case GL_RGB_INTEGER:
-      *rSrc = 0;
-      *gSrc = 1;
-      *bSrc = 2;
-      *aSrc = -1;
-      *rDst = 0;
-      *gDst = 1;
-      *bDst = 2;
-      *aDst = 3;
-      break;
-   case GL_BGR:
-   case GL_BGR_INTEGER:
-      *rSrc = 2;
-      *gSrc = 1;
-      *bSrc = 0;
-      *aSrc = -1;
-      *rDst = 2;
-      *gDst = 1;
-      *bDst = 0;
-      *aDst = 3;
-      break;
-   case GL_RGBA:
-   case GL_RGBA_INTEGER:
-      *rSrc = 0;
-      *gSrc = 1;
-      *bSrc = 2;
-      *aSrc = 3;
-      *rDst = 0;
-      *gDst = 1;
-      *bDst = 2;
-      *aDst = 3;
-      break;
-   case GL_BGRA:
-   case GL_BGRA_INTEGER:
-      *rSrc = 2;
-      *gSrc = 1;
-      *bSrc = 0;
-      *aSrc = 3;
-      *rDst = 2;
-      *gDst = 1;
-      *bDst = 0;
-      *aDst = 3;
-      break;
-   case GL_ABGR_EXT:
-      *rSrc = 3;
-      *gSrc = 2;
-      *bSrc = 1;
-      *aSrc = 0;
-      *rDst = 3;
-      *gDst = 2;
-      *bDst = 1;
-      *aDst = 0;
-      break;
-   default:
-      _mesa_problem(NULL, "bad srcFormat %s in get_component_mapping",
-                    _mesa_lookup_enum_by_nr(format));
-      return;
-   }
-}
-
-
-
-/*
- * This function extracts floating point RGBA values from arbitrary
- * image data.  srcFormat and srcType are the format and type parameters
- * passed to glDrawPixels, glTexImage[123]D, glTexSubImage[123]D, etc.
- *
- * Refering to section 3.6.4 of the OpenGL 1.2 spec, this function
- * implements the "Conversion to floating point", "Conversion to RGB",
- * and "Final Expansion to RGBA" operations.
- *
- * Args:  n - number of pixels
- *        rgba - output colors
- *        srcFormat - format of incoming data
- *        srcType - data type of incoming data
- *        src - source data pointer
- *        swapBytes - perform byteswapping of incoming data?
- */
-static void
-extract_float_rgba(GLuint n, GLfloat rgba[][4],
-                   GLenum srcFormat, GLenum srcType, const GLvoid *src,
-                   GLboolean swapBytes)
-{
-   GLint rSrc, gSrc, bSrc, aSrc;
-   GLint stride;
-   GLint rDst, bDst, gDst, aDst;
-   GLboolean intFormat;
-   GLfloat rs = 1.0f, gs = 1.0f, bs = 1.0f, as = 1.0f; /* scale factors */
-
-   ASSERT(srcFormat == GL_RED ||
-          srcFormat == GL_GREEN ||
-          srcFormat == GL_BLUE ||
-          srcFormat == GL_ALPHA ||
-          srcFormat == GL_LUMINANCE ||
-          srcFormat == GL_LUMINANCE_ALPHA ||
-          srcFormat == GL_INTENSITY ||
-          srcFormat == GL_RG ||
-          srcFormat == GL_RGB ||
-          srcFormat == GL_BGR ||
-          srcFormat == GL_RGBA ||
-          srcFormat == GL_BGRA ||
-          srcFormat == GL_ABGR_EXT ||
-          srcFormat == GL_RED_INTEGER_EXT ||
-          srcFormat == GL_GREEN_INTEGER_EXT ||
-          srcFormat == GL_BLUE_INTEGER_EXT ||
-          srcFormat == GL_ALPHA_INTEGER_EXT ||
-          srcFormat == GL_RG_INTEGER ||
-          srcFormat == GL_RGB_INTEGER_EXT ||
-          srcFormat == GL_RGBA_INTEGER_EXT ||
-          srcFormat == GL_BGR_INTEGER_EXT ||
-          srcFormat == GL_BGRA_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT);
-
-   ASSERT(srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT ||
-          srcType == GL_UNSIGNED_BYTE_3_3_2 ||
-          srcType == GL_UNSIGNED_BYTE_2_3_3_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5 ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5_REV ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4 ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_5_5_1 ||
-          srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8 ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
-          srcType == GL_UNSIGNED_INT_10_10_10_2 ||
-          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
-          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
-          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
-
-   get_component_mapping(srcFormat,
-                         &rSrc, &gSrc, &bSrc, &aSrc,
-                         &rDst, &gDst, &bDst, &aDst);
-
-   stride = _mesa_components_in_format(srcFormat);
-
-   intFormat = _mesa_is_enum_format_integer(srcFormat);
-
-#define PROCESS(SRC_INDEX, DST_INDEX, DEFAULT_FLT, DEFAULT_INT, TYPE, CONVERSION) \
-   if ((SRC_INDEX) < 0) {						\
-      GLuint i;								\
-      if (intFormat) {							\
-         for (i = 0; i < n; i++) {					\
-            rgba[i][DST_INDEX] = DEFAULT_INT;				\
-         }								\
-      }									\
-      else {								\
-         for (i = 0; i < n; i++) {					\
-            rgba[i][DST_INDEX] = DEFAULT_FLT;				\
-         }								\
-      }									\
-   }									\
-   else if (swapBytes) {						\
-      const TYPE *s = (const TYPE *) src;				\
-      GLuint i;								\
-      for (i = 0; i < n; i++) {						\
-         TYPE value = s[SRC_INDEX];					\
-         if (sizeof(TYPE) == 2) {					\
-            SWAP2BYTE(value);						\
-         }								\
-         else if (sizeof(TYPE) == 4) {					\
-            SWAP4BYTE(value);						\
-         }								\
-         if (intFormat)							\
-            rgba[i][DST_INDEX] = (GLfloat) value;			\
-         else								\
-            rgba[i][DST_INDEX] = (GLfloat) CONVERSION(value);		\
-         s += stride;							\
-      }									\
-   }									\
-   else {								\
-      const TYPE *s = (const TYPE *) src;				\
-      GLuint i;								\
-      if (intFormat) {							\
-         for (i = 0; i < n; i++) {					\
-            rgba[i][DST_INDEX] = (GLfloat) s[SRC_INDEX];		\
-            s += stride;						\
-         }								\
-      }									\
-      else {								\
-         for (i = 0; i < n; i++) {					\
-            rgba[i][DST_INDEX] = (GLfloat) CONVERSION(s[SRC_INDEX]);	\
-            s += stride;						\
-         }								\
-      }									\
-   }
-
-   switch (srcType) {
-      case GL_UNSIGNED_BYTE:
-         PROCESS(rSrc, RCOMP, 0.0F,   0, GLubyte, UBYTE_TO_FLOAT);
-         PROCESS(gSrc, GCOMP, 0.0F,   0, GLubyte, UBYTE_TO_FLOAT);
-         PROCESS(bSrc, BCOMP, 0.0F,   0, GLubyte, UBYTE_TO_FLOAT);
-         PROCESS(aSrc, ACOMP, 1.0F, 255, GLubyte, UBYTE_TO_FLOAT);
-         break;
-      case GL_BYTE:
-         PROCESS(rSrc, RCOMP, 0.0F,   0, GLbyte, BYTE_TO_FLOAT_TEX);
-         PROCESS(gSrc, GCOMP, 0.0F,   0, GLbyte, BYTE_TO_FLOAT_TEX);
-         PROCESS(bSrc, BCOMP, 0.0F,   0, GLbyte, BYTE_TO_FLOAT_TEX);
-         PROCESS(aSrc, ACOMP, 1.0F, 127, GLbyte, BYTE_TO_FLOAT_TEX);
-         break;
-      case GL_UNSIGNED_SHORT:
-         PROCESS(rSrc, RCOMP, 0.0F,      0, GLushort, USHORT_TO_FLOAT);
-         PROCESS(gSrc, GCOMP, 0.0F,      0, GLushort, USHORT_TO_FLOAT);
-         PROCESS(bSrc, BCOMP, 0.0F,      0, GLushort, USHORT_TO_FLOAT);
-         PROCESS(aSrc, ACOMP, 1.0F, 0xffff, GLushort, USHORT_TO_FLOAT);
-         break;
-      case GL_SHORT:
-         PROCESS(rSrc, RCOMP, 0.0F,     0, GLshort, SHORT_TO_FLOAT_TEX);
-         PROCESS(gSrc, GCOMP, 0.0F,     0, GLshort, SHORT_TO_FLOAT_TEX);
-         PROCESS(bSrc, BCOMP, 0.0F,     0, GLshort, SHORT_TO_FLOAT_TEX);
-         PROCESS(aSrc, ACOMP, 1.0F, 32767, GLshort, SHORT_TO_FLOAT_TEX);
-         break;
-      case GL_UNSIGNED_INT:
-         PROCESS(rSrc, RCOMP, 0.0F,          0, GLuint, UINT_TO_FLOAT);
-         PROCESS(gSrc, GCOMP, 0.0F,          0, GLuint, UINT_TO_FLOAT);
-         PROCESS(bSrc, BCOMP, 0.0F,          0, GLuint, UINT_TO_FLOAT);
-         PROCESS(aSrc, ACOMP, 1.0F, 0xffffffff, GLuint, UINT_TO_FLOAT);
-         break;
-      case GL_INT:
-         PROCESS(rSrc, RCOMP, 0.0F,          0, GLint, INT_TO_FLOAT);
-         PROCESS(gSrc, GCOMP, 0.0F,          0, GLint, INT_TO_FLOAT);
-         PROCESS(bSrc, BCOMP, 0.0F,          0, GLint, INT_TO_FLOAT);
-         PROCESS(aSrc, ACOMP, 1.0F, 2147483647, GLint, INT_TO_FLOAT);
-         break;
-      case GL_FLOAT:
-         PROCESS(rSrc, RCOMP, 0.0F, 0.0F, GLfloat, (GLfloat));
-         PROCESS(gSrc, GCOMP, 0.0F, 0.0F, GLfloat, (GLfloat));
-         PROCESS(bSrc, BCOMP, 0.0F, 0.0F, GLfloat, (GLfloat));
-         PROCESS(aSrc, ACOMP, 1.0F, 1.0F, GLfloat, (GLfloat));
-         break;
-      case GL_HALF_FLOAT_ARB:
-         PROCESS(rSrc, RCOMP, 0.0F, 0.0F, GLhalfARB, _mesa_half_to_float);
-         PROCESS(gSrc, GCOMP, 0.0F, 0.0F, GLhalfARB, _mesa_half_to_float);
-         PROCESS(bSrc, BCOMP, 0.0F, 0.0F, GLhalfARB, _mesa_half_to_float);
-         PROCESS(aSrc, ACOMP, 1.0F, 1.0F, GLhalfARB, _mesa_half_to_float);
-         break;
-      case GL_UNSIGNED_BYTE_3_3_2:
-         {
-            const GLubyte *ubsrc = (const GLubyte *) src;
-            GLuint i;
-            if (!intFormat) {
-               rs = 1.0F / 7.0F;
-               gs = 1.0F / 7.0F;
-               bs = 1.0F / 3.0F;
-            }
-            for (i = 0; i < n; i ++) {
-               GLubyte p = ubsrc[i];
-               rgba[i][rDst] = ((p >> 5)      ) * rs;
-               rgba[i][gDst] = ((p >> 2) & 0x7) * gs;
-               rgba[i][bDst] = ((p     ) & 0x3) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      case GL_UNSIGNED_BYTE_2_3_3_REV:
-         {
-            const GLubyte *ubsrc = (const GLubyte *) src;
-            GLuint i;
-            if (!intFormat) {
-               rs = 1.0F / 7.0F;
-               gs = 1.0F / 7.0F;
-               bs = 1.0F / 3.0F;
-            }
-            for (i = 0; i < n; i ++) {
-               GLubyte p = ubsrc[i];
-               rgba[i][rDst] = ((p     ) & 0x7) * rs;
-               rgba[i][gDst] = ((p >> 3) & 0x7) * gs;
-               rgba[i][bDst] = ((p >> 6)      ) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5:
-         if (!intFormat) {
-            rs = 1.0F / 31.0F;
-            gs = 1.0F / 63.0F;
-            bs = 1.0F / 31.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 11)       ) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x3f) * gs;
-               rgba[i][bDst] = ((p      ) & 0x1f) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 11)       ) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x3f) * gs;
-               rgba[i][bDst] = ((p      ) & 0x1f) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-         if (!intFormat) {
-            rs = 1.0F / 31.0F;
-            gs = 1.0F / 63.0F;
-            bs = 1.0F / 31.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x1f) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x3f) * gs;
-               rgba[i][bDst] = ((p >> 11)       ) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0x1f) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x3f) * gs;
-               rgba[i][bDst] = ((p >> 11)       ) * bs;
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4:
-         if (!intFormat) {
-            rs = gs = bs = as = 1.0F / 15.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 12)      ) * rs;
-               rgba[i][gDst] = ((p >>  8) & 0xf) * gs;
-               rgba[i][bDst] = ((p >>  4) & 0xf) * bs;
-               rgba[i][aDst] = ((p      ) & 0xf) * as;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 12)      ) * rs;
-               rgba[i][gDst] = ((p >>  8) & 0xf) * gs;
-               rgba[i][bDst] = ((p >>  4) & 0xf) * bs;
-               rgba[i][aDst] = ((p      ) & 0xf) * as;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-         if (!intFormat) {
-            rs = gs = bs = as = 1.0F / 15.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0xf) * rs;
-               rgba[i][gDst] = ((p >>  4) & 0xf) * gs;
-               rgba[i][bDst] = ((p >>  8) & 0xf) * bs;
-               rgba[i][aDst] = ((p >> 12)      ) * as;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0xf) * rs;
-               rgba[i][gDst] = ((p >>  4) & 0xf) * gs;
-               rgba[i][bDst] = ((p >>  8) & 0xf) * bs;
-               rgba[i][aDst] = ((p >> 12)      ) * as;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_5_5_1:
-         if (!intFormat) {
-            rs = gs = bs = 1.0F / 31.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 11)       ) * rs;
-               rgba[i][gDst] = ((p >>  6) & 0x1f) * gs;
-               rgba[i][bDst] = ((p >>  1) & 0x1f) * bs;
-               rgba[i][aDst] = ((p      ) & 0x1)  * as;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 11)       ) * rs;
-               rgba[i][gDst] = ((p >>  6) & 0x1f) * gs;
-               rgba[i][bDst] = ((p >>  1) & 0x1f) * bs;
-               rgba[i][aDst] = ((p      ) & 0x1)  * as;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-         if (!intFormat) {
-            rs = gs = bs = 1.0F / 31.0F;
-         }
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x1f) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x1f) * gs;
-               rgba[i][bDst] = ((p >> 10) & 0x1f) * bs;
-               rgba[i][aDst] = ((p >> 15)       ) * as;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0x1f) * rs;
-               rgba[i][gDst] = ((p >>  5) & 0x1f) * gs;
-               rgba[i][bDst] = ((p >> 10) & 0x1f) * bs;
-               rgba[i][aDst] = ((p >> 15)       ) * as;
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            if (intFormat) {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = (GLfloat) ((p      ) & 0xff);
-                  rgba[i][gDst] = (GLfloat) ((p >>  8) & 0xff);
-                  rgba[i][bDst] = (GLfloat) ((p >> 16) & 0xff);
-                  rgba[i][aDst] = (GLfloat) ((p >> 24)       );
-               }
-            }
-            else {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = UBYTE_TO_FLOAT((p      ) & 0xff);
-                  rgba[i][gDst] = UBYTE_TO_FLOAT((p >>  8) & 0xff);
-                  rgba[i][bDst] = UBYTE_TO_FLOAT((p >> 16) & 0xff);
-                  rgba[i][aDst] = UBYTE_TO_FLOAT((p >> 24)       );
-               }
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            if (intFormat) {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = (GLfloat) ((p >> 24)       );
-                  rgba[i][gDst] = (GLfloat) ((p >> 16) & 0xff);
-                  rgba[i][bDst] = (GLfloat) ((p >>  8) & 0xff);
-                  rgba[i][aDst] = (GLfloat) ((p      ) & 0xff);
-               }
-            }
-            else {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = UBYTE_TO_FLOAT((p >> 24)       );
-                  rgba[i][gDst] = UBYTE_TO_FLOAT((p >> 16) & 0xff);
-                  rgba[i][bDst] = UBYTE_TO_FLOAT((p >>  8) & 0xff);
-                  rgba[i][aDst] = UBYTE_TO_FLOAT((p      ) & 0xff);
-               }
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            if (intFormat) {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = (GLfloat) ((p >> 24)       );
-                  rgba[i][gDst] = (GLfloat) ((p >> 16) & 0xff);
-                  rgba[i][bDst] = (GLfloat) ((p >>  8) & 0xff);
-                  rgba[i][aDst] = (GLfloat) ((p      ) & 0xff);
-               }
-            }
-            else {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = UBYTE_TO_FLOAT((p >> 24)       );
-                  rgba[i][gDst] = UBYTE_TO_FLOAT((p >> 16) & 0xff);
-                  rgba[i][bDst] = UBYTE_TO_FLOAT((p >>  8) & 0xff);
-                  rgba[i][aDst] = UBYTE_TO_FLOAT((p      ) & 0xff);
-               }
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            if (intFormat) {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = (GLfloat) ((p      ) & 0xff);
-                  rgba[i][gDst] = (GLfloat) ((p >>  8) & 0xff);
-                  rgba[i][bDst] = (GLfloat) ((p >> 16) & 0xff);
-                  rgba[i][aDst] = (GLfloat) ((p >> 24)       );
-               }
-            }
-            else {
-               for (i = 0; i < n; i ++) {
-                  GLuint p = uisrc[i];
-                  rgba[i][rDst] = UBYTE_TO_FLOAT((p      ) & 0xff);
-                  rgba[i][gDst] = UBYTE_TO_FLOAT((p >>  8) & 0xff);
-                  rgba[i][bDst] = UBYTE_TO_FLOAT((p >> 16) & 0xff);
-                  rgba[i][aDst] = UBYTE_TO_FLOAT((p >> 24)       );
-               }
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10_10_10_2:
-         if (!intFormat) {
-            rs = 1.0F / 1023.0F;
-            gs = 1.0F / 1023.0F;
-            bs = 1.0F / 1023.0F;
-            as = 1.0F / 3.0F;
-         }
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgba[i][rDst] = ((p >> 22)        ) * rs;
-               rgba[i][gDst] = ((p >> 12) & 0x3ff) * gs;
-               rgba[i][bDst] = ((p >>  2) & 0x3ff) * bs;
-               rgba[i][aDst] = ((p      ) & 0x3  ) * as;
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p >> 22)        ) * rs;
-               rgba[i][gDst] = ((p >> 12) & 0x3ff) * gs;
-               rgba[i][bDst] = ((p >>  2) & 0x3ff) * bs;
-               rgba[i][aDst] = ((p      ) & 0x3  ) * as;
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_2_10_10_10_REV:
-         if (!intFormat) {
-            rs = 1.0F / 1023.0F;
-            gs = 1.0F / 1023.0F;
-            bs = 1.0F / 1023.0F;
-            as = 1.0F / 3.0F;
-         }
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x3ff) * rs;
-               rgba[i][gDst] = ((p >> 10) & 0x3ff) * gs;
-               rgba[i][bDst] = ((p >> 20) & 0x3ff) * bs;
-               if (aSrc < 0) {
-                  rgba[i][aDst] = 1.0F;
-               } else {
-                  rgba[i][aDst] = (p >> 30) * as;
-               }
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p      ) & 0x3ff) * rs;
-               rgba[i][gDst] = ((p >> 10) & 0x3ff) * gs;
-               rgba[i][bDst] = ((p >> 20) & 0x3ff) * bs;
-               if (aSrc < 0) {
-                  rgba[i][aDst] = 1.0F;
-               } else {
-                  rgba[i][aDst] = (p >> 30) * as;
-               }
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_5_9_9_9_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            GLfloat f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgb9e5_to_float3(p, f);
-               rgba[i][rDst] = f[0];
-               rgba[i][gDst] = f[1];
-               rgba[i][bDst] = f[2];
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            GLfloat f[3];
-            for (i = 0; i < n; i ++) {
-               rgb9e5_to_float3(uisrc[i], f);
-               rgba[i][rDst] = f[0];
-               rgba[i][gDst] = f[1];
-               rgba[i][bDst] = f[2];
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10F_11F_11F_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            GLfloat f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               r11g11b10f_to_float3(p, f);
-               rgba[i][rDst] = f[0];
-               rgba[i][gDst] = f[1];
-               rgba[i][bDst] = f[2];
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            GLfloat f[3];
-            for (i = 0; i < n; i ++) {
-               r11g11b10f_to_float3(uisrc[i], f);
-               rgba[i][rDst] = f[0];
-               rgba[i][gDst] = f[1];
-               rgba[i][bDst] = f[2];
-               rgba[i][aDst] = 1.0F;
-            }
-         }
-         break;
-      default:
-         _mesa_problem(NULL, "bad srcType in extract float data");
-         break;
-   }
-#undef PROCESS
-}
-
-
 static inline GLuint
 clamp_float_to_uint(GLfloat f)
 {
@@ -3673,1326 +482,6 @@ clamp_half_to_uint(GLhalfARB h)
 }
 
 
-/**
- * \sa extract_float_rgba()
- */
-static void
-extract_uint_rgba(GLuint n, GLuint rgba[][4],
-                  GLenum srcFormat, GLenum srcType, const GLvoid *src,
-                  GLboolean swapBytes)
-{
-   GLint rSrc, gSrc, bSrc, aSrc;
-   GLint stride;
-   GLint rDst, bDst, gDst, aDst;
-
-   ASSERT(srcFormat == GL_RED ||
-          srcFormat == GL_GREEN ||
-          srcFormat == GL_BLUE ||
-          srcFormat == GL_ALPHA ||
-          srcFormat == GL_LUMINANCE ||
-          srcFormat == GL_LUMINANCE_ALPHA ||
-          srcFormat == GL_INTENSITY ||
-          srcFormat == GL_RG ||
-          srcFormat == GL_RGB ||
-          srcFormat == GL_BGR ||
-          srcFormat == GL_RGBA ||
-          srcFormat == GL_BGRA ||
-          srcFormat == GL_ABGR_EXT ||
-          srcFormat == GL_RED_INTEGER_EXT ||
-          srcFormat == GL_RG_INTEGER ||
-          srcFormat == GL_GREEN_INTEGER_EXT ||
-          srcFormat == GL_BLUE_INTEGER_EXT ||
-          srcFormat == GL_ALPHA_INTEGER_EXT ||
-          srcFormat == GL_RGB_INTEGER_EXT ||
-          srcFormat == GL_RGBA_INTEGER_EXT ||
-          srcFormat == GL_BGR_INTEGER_EXT ||
-          srcFormat == GL_BGRA_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT);
-
-   ASSERT(srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT ||
-          srcType == GL_UNSIGNED_BYTE_3_3_2 ||
-          srcType == GL_UNSIGNED_BYTE_2_3_3_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5 ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5_REV ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4 ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_5_5_1 ||
-          srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8 ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
-          srcType == GL_UNSIGNED_INT_10_10_10_2 ||
-          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
-          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
-          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
-
-   get_component_mapping(srcFormat,
-                         &rSrc, &gSrc, &bSrc, &aSrc,
-                         &rDst, &gDst, &bDst, &aDst);
-
-   stride = _mesa_components_in_format(srcFormat);
-
-#define PROCESS(SRC_INDEX, DST_INDEX, DEFAULT, TYPE, CONVERSION)	\
-   if ((SRC_INDEX) < 0) {						\
-      GLuint i;								\
-      for (i = 0; i < n; i++) {						\
-         rgba[i][DST_INDEX] = DEFAULT;					\
-      }									\
-   }									\
-   else if (swapBytes) {						\
-      const TYPE *s = (const TYPE *) src;				\
-      GLuint i;								\
-      for (i = 0; i < n; i++) {						\
-         TYPE value = s[SRC_INDEX];					\
-         if (sizeof(TYPE) == 2) {					\
-            SWAP2BYTE(value);						\
-         }								\
-         else if (sizeof(TYPE) == 4) {					\
-            SWAP4BYTE(value);						\
-         }								\
-         rgba[i][DST_INDEX] = CONVERSION(value);                        \
-         s += stride;							\
-      }									\
-   }									\
-   else {								\
-      const TYPE *s = (const TYPE *) src;				\
-      GLuint i;								\
-      for (i = 0; i < n; i++) {						\
-         rgba[i][DST_INDEX] = CONVERSION(s[SRC_INDEX]);			\
-         s += stride;							\
-      }									\
-   }
-
-   switch (srcType) {
-      case GL_UNSIGNED_BYTE:
-         PROCESS(rSrc, RCOMP, 0, GLubyte, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLubyte, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLubyte, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLubyte, (GLuint));
-         break;
-      case GL_BYTE:
-         PROCESS(rSrc, RCOMP, 0, GLbyte, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLbyte, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLbyte, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLbyte, (GLuint));
-         break;
-      case GL_UNSIGNED_SHORT:
-         PROCESS(rSrc, RCOMP, 0, GLushort, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLushort, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLushort, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLushort, (GLuint));
-         break;
-      case GL_SHORT:
-         PROCESS(rSrc, RCOMP, 0, GLshort, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLshort, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLshort, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLshort, (GLuint));
-         break;
-      case GL_UNSIGNED_INT:
-         PROCESS(rSrc, RCOMP, 0, GLuint, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLuint, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLuint, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLuint, (GLuint));
-         break;
-      case GL_INT:
-         PROCESS(rSrc, RCOMP, 0, GLint, (GLuint));
-         PROCESS(gSrc, GCOMP, 0, GLint, (GLuint));
-         PROCESS(bSrc, BCOMP, 0, GLint, (GLuint));
-         PROCESS(aSrc, ACOMP, 1, GLint, (GLuint));
-         break;
-      case GL_FLOAT:
-         PROCESS(rSrc, RCOMP, 0, GLfloat, clamp_float_to_uint);
-         PROCESS(gSrc, GCOMP, 0, GLfloat, clamp_float_to_uint);
-         PROCESS(bSrc, BCOMP, 0, GLfloat, clamp_float_to_uint);
-         PROCESS(aSrc, ACOMP, 1, GLfloat, clamp_float_to_uint);
-         break;
-      case GL_HALF_FLOAT_ARB:
-         PROCESS(rSrc, RCOMP, 0, GLhalfARB, clamp_half_to_uint);
-         PROCESS(gSrc, GCOMP, 0, GLhalfARB, clamp_half_to_uint);
-         PROCESS(bSrc, BCOMP, 0, GLhalfARB, clamp_half_to_uint);
-         PROCESS(aSrc, ACOMP, 1, GLhalfARB, clamp_half_to_uint);
-         break;
-      case GL_UNSIGNED_BYTE_3_3_2:
-         {
-            const GLubyte *ubsrc = (const GLubyte *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLubyte p = ubsrc[i];
-               rgba[i][rDst] = ((p >> 5)      );
-               rgba[i][gDst] = ((p >> 2) & 0x7);
-               rgba[i][bDst] = ((p     ) & 0x3);
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      case GL_UNSIGNED_BYTE_2_3_3_REV:
-         {
-            const GLubyte *ubsrc = (const GLubyte *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLubyte p = ubsrc[i];
-               rgba[i][rDst] = ((p     ) & 0x7);
-               rgba[i][gDst] = ((p >> 3) & 0x7);
-               rgba[i][bDst] = ((p >> 6)      );
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 11)       );
-               rgba[i][gDst] = ((p >>  5) & 0x3f);
-               rgba[i][bDst] = ((p      ) & 0x1f);
-               rgba[i][aDst] = 1;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 11)       );
-               rgba[i][gDst] = ((p >>  5) & 0x3f);
-               rgba[i][bDst] = ((p      ) & 0x1f);
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_6_5_REV:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x1f);
-               rgba[i][gDst] = ((p >>  5) & 0x3f);
-               rgba[i][bDst] = ((p >> 11)       );
-               rgba[i][aDst] = 1;
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0x1f);
-               rgba[i][gDst] = ((p >>  5) & 0x3f);
-               rgba[i][bDst] = ((p >> 11)       );
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 12)      );
-               rgba[i][gDst] = ((p >>  8) & 0xf);
-               rgba[i][bDst] = ((p >>  4) & 0xf);
-               rgba[i][aDst] = ((p      ) & 0xf);
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 12)      );
-               rgba[i][gDst] = ((p >>  8) & 0xf);
-               rgba[i][bDst] = ((p >>  4) & 0xf);
-               rgba[i][aDst] = ((p      ) & 0xf);
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0xf);
-               rgba[i][gDst] = ((p >>  4) & 0xf);
-               rgba[i][bDst] = ((p >>  8) & 0xf);
-               rgba[i][aDst] = ((p >> 12)      );
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0xf);
-               rgba[i][gDst] = ((p >>  4) & 0xf);
-               rgba[i][bDst] = ((p >>  8) & 0xf);
-               rgba[i][aDst] = ((p >> 12)      );
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_5_5_5_1:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p >> 11)       );
-               rgba[i][gDst] = ((p >>  6) & 0x1f);
-               rgba[i][bDst] = ((p >>  1) & 0x1f);
-               rgba[i][aDst] = ((p      ) & 0x1 );
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p >> 11)       );
-               rgba[i][gDst] = ((p >>  6) & 0x1f);
-               rgba[i][bDst] = ((p >>  1) & 0x1f);
-               rgba[i][aDst] = ((p      ) & 0x1 );
-            }
-         }
-         break;
-      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
-         if (swapBytes) {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               SWAP2BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x1f);
-               rgba[i][gDst] = ((p >>  5) & 0x1f);
-               rgba[i][bDst] = ((p >> 10) & 0x1f);
-               rgba[i][aDst] = ((p >> 15)       );
-            }
-         }
-         else {
-            const GLushort *ussrc = (const GLushort *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLushort p = ussrc[i];
-               rgba[i][rDst] = ((p      ) & 0x1f);
-               rgba[i][gDst] = ((p >>  5) & 0x1f);
-               rgba[i][bDst] = ((p >> 10) & 0x1f);
-               rgba[i][aDst] = ((p >> 15)       );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p      ) & 0xff);
-               rgba[i][gDst] = ((p >>  8) & 0xff);
-               rgba[i][bDst] = ((p >> 16) & 0xff);
-               rgba[i][aDst] = ((p >> 24)       );
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p >> 24)       );
-               rgba[i][gDst] = ((p >> 16) & 0xff);
-               rgba[i][bDst] = ((p >>  8) & 0xff);
-               rgba[i][aDst] = ((p      ) & 0xff);
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_8_8_8_8_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p >> 24)       );
-               rgba[i][gDst] = ((p >> 16) & 0xff);
-               rgba[i][bDst] = ((p >>  8) & 0xff);
-               rgba[i][aDst] = ((p      ) & 0xff);
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p      ) & 0xff);
-               rgba[i][gDst] = ((p >>  8) & 0xff);
-               rgba[i][bDst] = ((p >> 16) & 0xff);
-               rgba[i][aDst] = ((p >> 24)       );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10_10_10_2:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgba[i][rDst] = ((p >> 22)        );
-               rgba[i][gDst] = ((p >> 12) & 0x3ff);
-               rgba[i][bDst] = ((p >>  2) & 0x3ff);
-               rgba[i][aDst] = ((p      ) & 0x3  );
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p >> 22)        );
-               rgba[i][gDst] = ((p >> 12) & 0x3ff);
-               rgba[i][bDst] = ((p >>  2) & 0x3ff);
-               rgba[i][aDst] = ((p      ) & 0x3  );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_2_10_10_10_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgba[i][rDst] = ((p      ) & 0x3ff);
-               rgba[i][gDst] = ((p >> 10) & 0x3ff);
-               rgba[i][bDst] = ((p >> 20) & 0x3ff);
-               rgba[i][aDst] = ((p >> 30)        );
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgba[i][rDst] = ((p      ) & 0x3ff);
-               rgba[i][gDst] = ((p >> 10) & 0x3ff);
-               rgba[i][bDst] = ((p >> 20) & 0x3ff);
-               rgba[i][aDst] = ((p >> 30)        );
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_5_9_9_9_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            float f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               rgb9e5_to_float3(p, f);
-               rgba[i][rDst] = clamp_float_to_uint(f[0]);
-               rgba[i][gDst] = clamp_float_to_uint(f[1]);
-               rgba[i][bDst] = clamp_float_to_uint(f[2]);
-               rgba[i][aDst] = 1;
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            float f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               rgb9e5_to_float3(p, f);
-               rgba[i][rDst] = clamp_float_to_uint(f[0]);
-               rgba[i][gDst] = clamp_float_to_uint(f[1]);
-               rgba[i][bDst] = clamp_float_to_uint(f[2]);
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      case GL_UNSIGNED_INT_10F_11F_11F_REV:
-         if (swapBytes) {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            float f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               SWAP4BYTE(p);
-               r11g11b10f_to_float3(p, f);
-               rgba[i][rDst] = clamp_float_to_uint(f[0]);
-               rgba[i][gDst] = clamp_float_to_uint(f[1]);
-               rgba[i][bDst] = clamp_float_to_uint(f[2]);
-               rgba[i][aDst] = 1;
-            }
-         }
-         else {
-            const GLuint *uisrc = (const GLuint *) src;
-            GLuint i;
-            float f[3];
-            for (i = 0; i < n; i ++) {
-               GLuint p = uisrc[i];
-               r11g11b10f_to_float3(p, f);
-               rgba[i][rDst] = clamp_float_to_uint(f[0]);
-               rgba[i][gDst] = clamp_float_to_uint(f[1]);
-               rgba[i][bDst] = clamp_float_to_uint(f[2]);
-               rgba[i][aDst] = 1;
-            }
-         }
-         break;
-      default:
-         _mesa_problem(NULL, "bad srcType in extract uint data");
-         break;
-   }
-#undef PROCESS
-}
-
-
-
-/*
- * Unpack a row of color image data from a client buffer according to
- * the pixel unpacking parameters.
- * Return GLubyte values in the specified dest image format.
- * This is used by glDrawPixels and glTexImage?D().
- * \param ctx - the context
- *         n - number of pixels in the span
- *         dstFormat - format of destination color array
- *         dest - the destination color array
- *         srcFormat - source image format
- *         srcType - source image  data type
- *         source - source image pointer
- *         srcPacking - pixel unpacking parameters
- *         transferOps - bitmask of IMAGE_*_BIT values of operations to apply
- *
- * XXX perhaps expand this to process whole images someday.
- */
-void
-_mesa_unpack_color_span_ubyte(struct gl_context *ctx,
-                              GLuint n, GLenum dstFormat, GLubyte dest[],
-                              GLenum srcFormat, GLenum srcType,
-                              const GLvoid *source,
-                              const struct gl_pixelstore_attrib *srcPacking,
-                              GLbitfield transferOps )
-{
-   GLboolean intFormat = _mesa_is_enum_format_integer(srcFormat);
-   ASSERT(dstFormat == GL_ALPHA ||
-          dstFormat == GL_LUMINANCE ||
-          dstFormat == GL_LUMINANCE_ALPHA ||
-          dstFormat == GL_INTENSITY ||
-          dstFormat == GL_RED ||
-          dstFormat == GL_RG ||
-          dstFormat == GL_RGB ||
-          dstFormat == GL_RGBA);
-
-   ASSERT(srcFormat == GL_RED ||
-          srcFormat == GL_GREEN ||
-          srcFormat == GL_BLUE ||
-          srcFormat == GL_ALPHA ||
-          srcFormat == GL_LUMINANCE ||
-          srcFormat == GL_LUMINANCE_ALPHA ||
-          srcFormat == GL_INTENSITY ||
-          srcFormat == GL_RG ||
-          srcFormat == GL_RGB ||
-          srcFormat == GL_BGR ||
-          srcFormat == GL_RGBA ||
-          srcFormat == GL_BGRA ||
-          srcFormat == GL_ABGR_EXT ||
-          srcFormat == GL_COLOR_INDEX);
-
-   ASSERT(srcType == GL_BITMAP ||
-          srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT ||
-          srcType == GL_UNSIGNED_BYTE_3_3_2 ||
-          srcType == GL_UNSIGNED_BYTE_2_3_3_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5 ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5_REV ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4 ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_5_5_1 ||
-          srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8 ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
-          srcType == GL_UNSIGNED_INT_10_10_10_2 ||
-          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
-          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
-          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
-
-   /* EXT_texture_integer specifies no transfer ops on integer
-    * types in the resolved issues section. Just set them to 0
-    * for integer surfaces.
-    */
-   if (intFormat)
-      transferOps = 0;
-
-   /* Try simple cases first */
-   if (transferOps == 0) {
-      if (srcType == GL_UNSIGNED_BYTE) {
-         if (dstFormat == GL_RGBA) {
-            if (srcFormat == GL_RGBA) {
-               memcpy( dest, source, n * 4 * sizeof(GLubyte) );
-               return;
-            }
-            else if (srcFormat == GL_RGB) {
-               GLuint i;
-               const GLubyte *src = (const GLubyte *) source;
-               GLubyte *dst = dest;
-               for (i = 0; i < n; i++) {
-                  dst[0] = src[0];
-                  dst[1] = src[1];
-                  dst[2] = src[2];
-                  dst[3] = 255;
-                  src += 3;
-                  dst += 4;
-               }
-               return;
-            }
-         }
-         else if (dstFormat == GL_RGB) {
-            if (srcFormat == GL_RGB) {
-               memcpy( dest, source, n * 3 * sizeof(GLubyte) );
-               return;
-            }
-            else if (srcFormat == GL_RGBA) {
-               GLuint i;
-               const GLubyte *src = (const GLubyte *) source;
-               GLubyte *dst = dest;
-               for (i = 0; i < n; i++) {
-                  dst[0] = src[0];
-                  dst[1] = src[1];
-                  dst[2] = src[2];
-                  src += 4;
-                  dst += 3;
-               }
-               return;
-            }
-         }
-         else if (dstFormat == srcFormat) {
-            GLint comps = _mesa_components_in_format(srcFormat);
-            assert(comps > 0);
-            memcpy( dest, source, n * comps * sizeof(GLubyte) );
-            return;
-         }
-      }
-   }
-
-
-   /* general solution begins here */
-   {
-      GLint dstComponents;
-      GLint rDst, gDst, bDst, aDst, lDst, iDst;
-      GLfloat (*rgba)[4] = malloc(4 * n * sizeof(GLfloat));
-
-      if (!rgba) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-         return;
-      }
-
-      dstComponents = _mesa_components_in_format( dstFormat );
-      /* source & dest image formats should have been error checked by now */
-      assert(dstComponents > 0);
-
-      /*
-       * Extract image data and convert to RGBA floats
-       */
-      if (srcFormat == GL_COLOR_INDEX) {
-         GLuint *indexes = malloc(n * sizeof(GLuint));
-
-         if (!indexes) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-            free(rgba);
-            return;
-         }
-
-         extract_uint_indexes(n, indexes, srcFormat, srcType, source,
-                              srcPacking);
-
-	 /* Convert indexes to RGBA */
-	 if (transferOps & IMAGE_SHIFT_OFFSET_BIT) {
-	    _mesa_shift_and_offset_ci(ctx, n, indexes);
-	 }
-	 _mesa_map_ci_to_rgba(ctx, n, indexes, rgba);
-
-         /* Don't do RGBA scale/bias or RGBA->RGBA mapping if starting
-          * with color indexes.
-          */
-         transferOps &= ~(IMAGE_SCALE_BIAS_BIT | IMAGE_MAP_COLOR_BIT);
-
-         free(indexes);
-      }
-      else {
-         /* non-color index data */
-         extract_float_rgba(n, rgba, srcFormat, srcType, source,
-                            srcPacking->SwapBytes);
-      }
-
-      /* Need to clamp if returning GLubytes */
-      transferOps |= IMAGE_CLAMP_BIT;
-
-      if (transferOps) {
-         _mesa_apply_rgba_transfer_ops(ctx, transferOps, n, rgba);
-      }
-
-      get_component_indexes(dstFormat,
-                            &rDst, &gDst, &bDst, &aDst, &lDst, &iDst);
-
-      /* Now return the GLubyte data in the requested dstFormat */
-      if (rDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            CLAMPED_FLOAT_TO_UBYTE(dst[rDst], rgba[i][RCOMP]);
-            dst += dstComponents;
-         }
-      }
-
-      if (gDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            CLAMPED_FLOAT_TO_UBYTE(dst[gDst], rgba[i][GCOMP]);
-            dst += dstComponents;
-         }
-      }
-
-      if (bDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            CLAMPED_FLOAT_TO_UBYTE(dst[bDst], rgba[i][BCOMP]);
-            dst += dstComponents;
-         }
-      }
-
-      if (aDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            CLAMPED_FLOAT_TO_UBYTE(dst[aDst], rgba[i][ACOMP]);
-            dst += dstComponents;
-         }
-      }
-
-      if (iDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         assert(iDst == 0);
-         assert(dstComponents == 1);
-         for (i = 0; i < n; i++) {
-            /* Intensity comes from red channel */
-            CLAMPED_FLOAT_TO_UBYTE(dst[i], rgba[i][RCOMP]);
-         }
-      }
-
-      if (lDst >= 0) {
-         GLubyte *dst = dest;
-         GLuint i;
-         assert(lDst == 0);
-         for (i = 0; i < n; i++) {
-            /* Luminance comes from red channel */
-            CLAMPED_FLOAT_TO_UBYTE(dst[0], rgba[i][RCOMP]);
-            dst += dstComponents;
-         }
-      }
-
-      free(rgba);
-   }
-}
-
-
-/**
- * Same as _mesa_unpack_color_span_ubyte(), but return GLfloat data
- * instead of GLubyte.
- */
-void
-_mesa_unpack_color_span_float( struct gl_context *ctx,
-                               GLuint n, GLenum dstFormat, GLfloat dest[],
-                               GLenum srcFormat, GLenum srcType,
-                               const GLvoid *source,
-                               const struct gl_pixelstore_attrib *srcPacking,
-                               GLbitfield transferOps )
-{
-   ASSERT(dstFormat == GL_ALPHA ||
-          dstFormat == GL_LUMINANCE ||
-          dstFormat == GL_LUMINANCE_ALPHA ||
-          dstFormat == GL_INTENSITY ||
-          dstFormat == GL_RED ||
-          dstFormat == GL_RG ||
-          dstFormat == GL_RGB ||
-          dstFormat == GL_RGBA);
-
-   ASSERT(srcFormat == GL_RED ||
-          srcFormat == GL_GREEN ||
-          srcFormat == GL_BLUE ||
-          srcFormat == GL_ALPHA ||
-          srcFormat == GL_LUMINANCE ||
-          srcFormat == GL_LUMINANCE_ALPHA ||
-          srcFormat == GL_INTENSITY ||
-          srcFormat == GL_RG ||
-          srcFormat == GL_RGB ||
-          srcFormat == GL_BGR ||
-          srcFormat == GL_RGBA ||
-          srcFormat == GL_BGRA ||
-          srcFormat == GL_ABGR_EXT ||
-          srcFormat == GL_RED_INTEGER_EXT ||
-          srcFormat == GL_GREEN_INTEGER_EXT ||
-          srcFormat == GL_BLUE_INTEGER_EXT ||
-          srcFormat == GL_ALPHA_INTEGER_EXT ||
-          srcFormat == GL_RG_INTEGER ||
-          srcFormat == GL_RGB_INTEGER_EXT ||
-          srcFormat == GL_RGBA_INTEGER_EXT ||
-          srcFormat == GL_BGR_INTEGER_EXT ||
-          srcFormat == GL_BGRA_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT ||
-          srcFormat == GL_COLOR_INDEX);
-
-   ASSERT(srcType == GL_BITMAP ||
-          srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT ||
-          srcType == GL_UNSIGNED_BYTE_3_3_2 ||
-          srcType == GL_UNSIGNED_BYTE_2_3_3_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5 ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5_REV ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4 ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_5_5_1 ||
-          srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8 ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
-          srcType == GL_UNSIGNED_INT_10_10_10_2 ||
-          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
-          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
-          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
-
-   /* general solution, no special cases, yet */
-   {
-      GLint dstComponents;
-      GLint rDst, gDst, bDst, aDst, lDst, iDst;
-      GLfloat (*rgba)[4] = malloc(4 * n * sizeof(GLfloat));
-      GLboolean intFormat = _mesa_is_enum_format_integer(srcFormat);
-
-      if (!rgba) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-         return;
-      }
-
-      dstComponents = _mesa_components_in_format( dstFormat );
-      /* source & dest image formats should have been error checked by now */
-      assert(dstComponents > 0);
-
-      /* EXT_texture_integer specifies no transfer ops on integer
-       * types in the resolved issues section. Just set them to 0
-       * for integer surfaces.
-       */
-      if (intFormat)
-         transferOps = 0;
-
-      /*
-       * Extract image data and convert to RGBA floats
-       */
-      if (srcFormat == GL_COLOR_INDEX) {
-         GLuint *indexes = malloc(n * sizeof(GLuint));
-
-         if (!indexes) {
-            _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-            free(rgba);
-            return;
-         }
-
-         extract_uint_indexes(n, indexes, srcFormat, srcType, source,
-                              srcPacking);
-
-	 /* Convert indexes to RGBA */
-	 if (transferOps & IMAGE_SHIFT_OFFSET_BIT) {
-	    _mesa_shift_and_offset_ci(ctx, n, indexes);
-	 }
-	 _mesa_map_ci_to_rgba(ctx, n, indexes, rgba);
-
-         /* Don't do RGBA scale/bias or RGBA->RGBA mapping if starting
-          * with color indexes.
-          */
-         transferOps &= ~(IMAGE_SCALE_BIAS_BIT | IMAGE_MAP_COLOR_BIT);
-
-         free(indexes);
-      }
-      else {
-         /* non-color index data */
-         extract_float_rgba(n, rgba, srcFormat, srcType, source,
-                            srcPacking->SwapBytes);
-      }
-
-      if (transferOps) {
-         _mesa_apply_rgba_transfer_ops(ctx, transferOps, n, rgba);
-      }
-
-      get_component_indexes(dstFormat,
-                            &rDst, &gDst, &bDst, &aDst, &lDst, &iDst);
-
-      /* Now pack results in the requested dstFormat */
-      if (rDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[rDst] = rgba[i][RCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (gDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[gDst] = rgba[i][GCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (bDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[bDst] = rgba[i][BCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (aDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[aDst] = rgba[i][ACOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (iDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         assert(iDst == 0);
-         assert(dstComponents == 1);
-         for (i = 0; i < n; i++) {
-            /* Intensity comes from red channel */
-            dst[i] = rgba[i][RCOMP];
-         }
-      }
-
-      if (lDst >= 0) {
-         GLfloat *dst = dest;
-         GLuint i;
-         assert(lDst == 0);
-         for (i = 0; i < n; i++) {
-            /* Luminance comes from red channel */
-            dst[0] = rgba[i][RCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      free(rgba);
-   }
-}
-
-
-/**
- * Same as _mesa_unpack_color_span_ubyte(), but return GLuint data
- * instead of GLubyte.
- * No pixel transfer ops are applied.
- */
-void
-_mesa_unpack_color_span_uint(struct gl_context *ctx,
-                             GLuint n, GLenum dstFormat, GLuint *dest,
-                             GLenum srcFormat, GLenum srcType,
-                             const GLvoid *source,
-                             const struct gl_pixelstore_attrib *srcPacking)
-{
-   GLuint (*rgba)[4] = malloc(n * 4 * sizeof(GLfloat));
-
-   if (!rgba) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-      return;
-   }
-
-   ASSERT(dstFormat == GL_ALPHA ||
-          dstFormat == GL_LUMINANCE ||
-          dstFormat == GL_LUMINANCE_ALPHA ||
-          dstFormat == GL_INTENSITY ||
-          dstFormat == GL_RED ||
-          dstFormat == GL_RG ||
-          dstFormat == GL_RGB ||
-          dstFormat == GL_RGBA);
-
-   ASSERT(srcFormat == GL_RED ||
-          srcFormat == GL_GREEN ||
-          srcFormat == GL_BLUE ||
-          srcFormat == GL_ALPHA ||
-          srcFormat == GL_LUMINANCE ||
-          srcFormat == GL_LUMINANCE_ALPHA ||
-          srcFormat == GL_INTENSITY ||
-          srcFormat == GL_RG ||
-          srcFormat == GL_RGB ||
-          srcFormat == GL_BGR ||
-          srcFormat == GL_RGBA ||
-          srcFormat == GL_BGRA ||
-          srcFormat == GL_ABGR_EXT ||
-          srcFormat == GL_RED_INTEGER_EXT ||
-          srcFormat == GL_GREEN_INTEGER_EXT ||
-          srcFormat == GL_BLUE_INTEGER_EXT ||
-          srcFormat == GL_ALPHA_INTEGER_EXT ||
-          srcFormat == GL_RG_INTEGER ||
-          srcFormat == GL_RGB_INTEGER_EXT ||
-          srcFormat == GL_RGBA_INTEGER_EXT ||
-          srcFormat == GL_BGR_INTEGER_EXT ||
-          srcFormat == GL_BGRA_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_INTEGER_EXT ||
-          srcFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT);
-
-   ASSERT(srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT ||
-          srcType == GL_UNSIGNED_BYTE_3_3_2 ||
-          srcType == GL_UNSIGNED_BYTE_2_3_3_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5 ||
-          srcType == GL_UNSIGNED_SHORT_5_6_5_REV ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4 ||
-          srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ||
-          srcType == GL_UNSIGNED_SHORT_5_5_5_1 ||
-          srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8 ||
-          srcType == GL_UNSIGNED_INT_8_8_8_8_REV ||
-          srcType == GL_UNSIGNED_INT_10_10_10_2 ||
-          srcType == GL_UNSIGNED_INT_2_10_10_10_REV ||
-          srcType == GL_UNSIGNED_INT_5_9_9_9_REV ||
-          srcType == GL_UNSIGNED_INT_10F_11F_11F_REV);
-
-
-   /* Extract image data as uint[4] pixels */
-   extract_uint_rgba(n, rgba, srcFormat, srcType, source,
-                     srcPacking->SwapBytes);
-
-   if (dstFormat == GL_RGBA) {
-      /* simple case */
-      memcpy(dest, rgba, 4 * sizeof(GLuint) * n);
-   }
-   else {
-      /* general case */
-      GLint rDst, gDst, bDst, aDst, lDst, iDst;
-      GLint dstComponents = _mesa_components_in_format( dstFormat );
-
-      assert(dstComponents > 0);
-
-      get_component_indexes(dstFormat,
-                            &rDst, &gDst, &bDst, &aDst, &lDst, &iDst);
-
-      /* Now pack values in the requested dest format */
-      if (rDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[rDst] = rgba[i][RCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (gDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[gDst] = rgba[i][GCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (bDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[bDst] = rgba[i][BCOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (aDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[aDst] = rgba[i][ACOMP];
-            dst += dstComponents;
-         }
-      }
-
-      if (iDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         assert(iDst == 0);
-         assert(dstComponents == 1);
-         for (i = 0; i < n; i++) {
-            /* Intensity comes from red channel */
-            dst[i] = rgba[i][RCOMP];
-         }
-      }
-
-      if (lDst >= 0) {
-         GLuint *dst = dest;
-         GLuint i;
-         assert(lDst == 0);
-         for (i = 0; i < n; i++) {
-            /* Luminance comes from red channel */
-            dst[0] = rgba[i][RCOMP];
-            dst += dstComponents;
-         }
-      }
-   }
-
-   free(rgba);
-}
-
-
-/*
- * Unpack a row of color index data from a client buffer according to
- * the pixel unpacking parameters.
- * This is (or will be) used by glDrawPixels, glTexImage[123]D, etc.
- *
- * Args:  ctx - the context
- *        n - number of pixels
- *        dstType - destination data type
- *        dest - destination array
- *        srcType - source pixel type
- *        source - source data pointer
- *        srcPacking - pixel unpacking parameters
- *        transferOps - the pixel transfer operations to apply
- */
-void
-_mesa_unpack_index_span( struct gl_context *ctx, GLuint n,
-                         GLenum dstType, GLvoid *dest,
-                         GLenum srcType, const GLvoid *source,
-                         const struct gl_pixelstore_attrib *srcPacking,
-                         GLbitfield transferOps )
-{
-   ASSERT(srcType == GL_BITMAP ||
-          srcType == GL_UNSIGNED_BYTE ||
-          srcType == GL_BYTE ||
-          srcType == GL_UNSIGNED_SHORT ||
-          srcType == GL_SHORT ||
-          srcType == GL_UNSIGNED_INT ||
-          srcType == GL_INT ||
-          srcType == GL_HALF_FLOAT_ARB ||
-          srcType == GL_FLOAT);
-
-   ASSERT(dstType == GL_UNSIGNED_BYTE ||
-          dstType == GL_UNSIGNED_SHORT ||
-          dstType == GL_UNSIGNED_INT);
-
-
-   transferOps &= (IMAGE_MAP_COLOR_BIT | IMAGE_SHIFT_OFFSET_BIT);
-
-   /*
-    * Try simple cases first
-    */
-   if (transferOps == 0 && srcType == GL_UNSIGNED_BYTE
-       && dstType == GL_UNSIGNED_BYTE) {
-      memcpy(dest, source, n * sizeof(GLubyte));
-   }
-   else if (transferOps == 0 && srcType == GL_UNSIGNED_INT
-            && dstType == GL_UNSIGNED_INT && !srcPacking->SwapBytes) {
-      memcpy(dest, source, n * sizeof(GLuint));
-   }
-   else {
-      /*
-       * general solution
-       */
-      GLuint *indexes = malloc(n * sizeof(GLuint));
-
-      if (!indexes) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
-         return;
-      }
-
-      extract_uint_indexes(n, indexes, GL_COLOR_INDEX, srcType, source,
-                           srcPacking);
-
-      if (transferOps)
-         _mesa_apply_ci_transfer_ops(ctx, transferOps, n, indexes);
-
-      /* convert to dest type */
-      switch (dstType) {
-         case GL_UNSIGNED_BYTE:
-            {
-               GLubyte *dst = (GLubyte *) dest;
-               GLuint i;
-               for (i = 0; i < n; i++) {
-                  dst[i] = (GLubyte) (indexes[i] & 0xff);
-               }
-            }
-            break;
-         case GL_UNSIGNED_SHORT:
-            {
-               GLuint *dst = (GLuint *) dest;
-               GLuint i;
-               for (i = 0; i < n; i++) {
-                  dst[i] = (GLushort) (indexes[i] & 0xffff);
-               }
-            }
-            break;
-         case GL_UNSIGNED_INT:
-            memcpy(dest, indexes, n * sizeof(GLuint));
-            break;
-         default:
-            _mesa_problem(ctx, "bad dstType in _mesa_unpack_index_span");
-      }
-
-      free(indexes);
-   }
-}
-
-
-void
-_mesa_pack_index_span( struct gl_context *ctx, GLuint n,
-                       GLenum dstType, GLvoid *dest, const GLuint *source,
-                       const struct gl_pixelstore_attrib *dstPacking,
-                       GLbitfield transferOps )
-{
-   GLuint *indexes = malloc(n * sizeof(GLuint));
-
-   if (!indexes) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel packing");
-      return;
-   }
-
-   transferOps &= (IMAGE_MAP_COLOR_BIT | IMAGE_SHIFT_OFFSET_BIT);
-
-   if (transferOps & (IMAGE_MAP_COLOR_BIT | IMAGE_SHIFT_OFFSET_BIT)) {
-      /* make a copy of input */
-      memcpy(indexes, source, n * sizeof(GLuint));
-      _mesa_apply_ci_transfer_ops(ctx, transferOps, n, indexes);
-      source = indexes;
-   }
-
-   switch (dstType) {
-   case GL_UNSIGNED_BYTE:
-      {
-         GLubyte *dst = (GLubyte *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            *dst++ = (GLubyte) source[i];
-         }
-      }
-      break;
-   case GL_BYTE:
-      {
-         GLbyte *dst = (GLbyte *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLbyte) source[i];
-         }
-      }
-      break;
-   case GL_UNSIGNED_SHORT:
-      {
-         GLushort *dst = (GLushort *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLushort) source[i];
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap2( (GLushort *) dst, n );
-         }
-      }
-      break;
-   case GL_SHORT:
-      {
-         GLshort *dst = (GLshort *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLshort) source[i];
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap2( (GLushort *) dst, n );
-         }
-      }
-      break;
-   case GL_UNSIGNED_INT:
-      {
-         GLuint *dst = (GLuint *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLuint) source[i];
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap4( (GLuint *) dst, n );
-         }
-      }
-      break;
-   case GL_INT:
-      {
-         GLint *dst = (GLint *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLint) source[i];
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap4( (GLuint *) dst, n );
-         }
-      }
-      break;
-   case GL_FLOAT:
-      {
-         GLfloat *dst = (GLfloat *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = (GLfloat) source[i];
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap4( (GLuint *) dst, n );
-         }
-      }
-      break;
-   case GL_HALF_FLOAT_ARB:
-      {
-         GLhalfARB *dst = (GLhalfARB *) dest;
-         GLuint i;
-         for (i = 0; i < n; i++) {
-            dst[i] = _mesa_float_to_half((GLfloat) source[i]);
-         }
-         if (dstPacking->SwapBytes) {
-            _mesa_swap2( (GLushort *) dst, n );
-         }
-      }
-      break;
-   default:
-      _mesa_problem(ctx, "bad type in _mesa_pack_index_span");
-   }
-
-   free(indexes);
-}
-
-
 /*
  * Unpack a row of stencil data from a client buffer according to
  * the pixel unpacking parameters.
@@ -5023,6 +512,7 @@ _mesa_unpack_stencil_span( struct gl_context *ctx, GLuint n,
           srcType == GL_INT ||
           srcType == GL_UNSIGNED_INT_24_8_EXT ||
           srcType == GL_HALF_FLOAT_ARB ||
+          srcType == GL_HALF_FLOAT_OES ||
           srcType == GL_FLOAT ||
           srcType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
 
@@ -5213,6 +703,7 @@ _mesa_pack_stencil_span( struct gl_context *ctx, GLuint n,
       }
       break;
    case GL_HALF_FLOAT_ARB:
+   case GL_HALF_FLOAT_OES:
       {
          GLhalfARB *dst = (GLhalfARB *) dest;
          GLuint i;
@@ -5430,6 +921,7 @@ _mesa_unpack_depth_span( struct gl_context *ctx, GLuint n,
          needClamp = GL_TRUE;
          break;
       case GL_HALF_FLOAT_ARB:
+      case GL_HALF_FLOAT_OES:
          {
             GLuint i;
             const GLhalfARB *src = (const GLhalfARB *) source;
@@ -5619,6 +1111,7 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
       }
       break;
    case GL_HALF_FLOAT_ARB:
+   case GL_HALF_FLOAT_OES:
       {
          GLhalfARB *dst = (GLhalfARB *) dest;
          GLuint i;
@@ -5699,7 +1192,6 @@ _mesa_pack_depth_stencil_span(struct gl_context *ctx,GLuint n,
 
 
 
-
 /**
  * Unpack image data.  Apply byte swapping, byte flipping (bitmap).
  * Return all image data in a contiguous block.  This is used when we
@@ -5839,130 +1331,303 @@ _mesa_unpack_image( GLuint dimensions,
    }
 }
 
-
-
-/**
- * If we unpack colors from a luminance surface, we'll get pixel colors
- * such as (l, l, l, a).
- * When we call _mesa_pack_rgba_span_float(format=GL_LUMINANCE), that
- * function will compute L=R+G+B before packing.  The net effect is we'll
- * accidentally store luminance values = 3*l.
- * This function compensates for that by converting (aka rebasing) (l,l,l,a)
- * to be (l,0,0,a).
- * It's a similar story for other formats such as LUMINANCE_ALPHA, ALPHA
- * and INTENSITY.
- *
- * Finally, we also need to do this when the actual surface format does
- * not match the logical surface format.  For example, suppose the user
- * requests a GL_LUMINANCE texture but the driver stores it as RGBA.
- * Again, we'll get pixel values like (l,l,l,a).
- */
 void
-_mesa_rebase_rgba_float(GLuint n, GLfloat rgba[][4], GLenum baseFormat)
+_mesa_pack_luminance_from_rgba_float(GLuint n, GLfloat rgba[][4],
+                                     GLvoid *dstAddr, GLenum dst_format,
+                                     GLbitfield transferOps)
 {
-   GLuint i;
+   int i;
+   GLfloat *dst = (GLfloat *) dstAddr;
 
-   switch (baseFormat) {
-   case GL_ALPHA:
-      for (i = 0; i < n; i++) {
-         rgba[i][RCOMP] = 0.0F;
-         rgba[i][GCOMP] = 0.0F;
-         rgba[i][BCOMP] = 0.0F;
-      }
-      break;
-   case GL_INTENSITY:
-      /* fall-through */
+   switch (dst_format) {
    case GL_LUMINANCE:
-      for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0.0F;
-         rgba[i][BCOMP] = 0.0F;
-         rgba[i][ACOMP] = 1.0F;
+      if (transferOps & IMAGE_CLAMP_BIT) {
+         for (i = 0; i < n; i++) {
+            GLfloat sum = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
+            dst[i] = CLAMP(sum, 0.0F, 1.0F);
+         }
+      } else {
+         for (i = 0; i < n; i++) {
+            dst[i] = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
+         }
       }
-      break;
+      return;
    case GL_LUMINANCE_ALPHA:
-      for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0.0F;
-         rgba[i][BCOMP] = 0.0F;
-      }
-      break;
-   case GL_RGB:
-      for (i = 0; i < n; i++) {
-         rgba[i][ACOMP] = 1.0F;
-      }
-      break;
-   case GL_RG:
-      for (i = 0; i < n; i++) {
-         rgba[i][BCOMP] = 0.0F;
-         rgba[i][ACOMP] = 1.0F;
-      }
-      break;
-   case GL_RED:
-      for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0.0F;
-         rgba[i][BCOMP] = 0.0F;
-         rgba[i][ACOMP] = 1.0F;
+      if (transferOps & IMAGE_CLAMP_BIT) {
+         for (i = 0; i < n; i++) {
+            GLfloat sum = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
+            dst[2*i] = CLAMP(sum, 0.0F, 1.0F);
+            dst[2*i+1] = rgba[i][ACOMP];
+         }
+      } else {
+         for (i = 0; i < n; i++) {
+            dst[2*i] = rgba[i][RCOMP] + rgba[i][GCOMP] + rgba[i][BCOMP];
+            dst[2*i+1] = rgba[i][ACOMP];
+         }
       }
-      break;
-
+      return;
    default:
-      /* no-op */
-      ;
+      assert(!"Unsupported format");
    }
 }
 
+static int32_t
+clamp_sint64_to_sint32(int64_t src)
+{
+   return CLAMP(src, INT32_MIN, INT32_MAX);
+}
+
+static int32_t
+clamp_sint64_to_uint32(int64_t src)
+{
+   return CLAMP(src, 0, UINT32_MAX);
+}
+
+static int32_t
+clamp_uint64_to_uint32(uint64_t src)
+{
+   return MIN2(src, UINT32_MAX);
+}
+
+static int32_t
+clamp_uint64_to_sint32(uint64_t src)
+{
+   return MIN2(src, INT32_MAX);
+}
+
+static int32_t
+convert_integer_luminance64(int64_t src64, int bits,
+                            bool dst_is_signed, bool src_is_signed)
+{
+   int32_t src32;
+
+   /* Clamp Luminance value from 64-bit to 32-bit. Consider if we need
+    * any signed<->unsigned conversion too.
+    */
+   if (src_is_signed && dst_is_signed)
+      src32 = clamp_sint64_to_sint32(src64);
+   else if (src_is_signed && !dst_is_signed)
+      src32 = clamp_sint64_to_uint32(src64);
+   else if (!src_is_signed && dst_is_signed)
+      src32 = clamp_uint64_to_sint32(src64);
+   else
+      src32 = clamp_uint64_to_uint32(src64);
+
+   /* If the dst type is < 32-bit, we need an extra clamp */
+   if (bits == 32) {
+      return src32;
+   } else {
+      if (dst_is_signed)
+         return _mesa_signed_to_signed(src32, bits);
+      else
+         return _mesa_unsigned_to_unsigned(src32, bits);
+   }
+}
+
+static int32_t
+convert_integer(int32_t src, int bits, bool dst_is_signed, bool src_is_signed)
+{
+   if (src_is_signed && dst_is_signed)
+      return _mesa_signed_to_signed(src, bits);
+   else if (src_is_signed && !dst_is_signed)
+      return _mesa_signed_to_unsigned(src, bits);
+   else if (!src_is_signed && dst_is_signed)
+      return _mesa_unsigned_to_signed(src, bits);
+   else
+      return _mesa_unsigned_to_unsigned(src, bits);
+}
 
-/**
- * As above, but GLuint components.
- */
 void
-_mesa_rebase_rgba_uint(GLuint n, GLuint rgba[][4], GLenum baseFormat)
+_mesa_pack_luminance_from_rgba_integer(GLuint n,
+                                       GLuint rgba[][4], bool rgba_is_signed,
+                                       GLvoid *dstAddr,
+                                       GLenum dst_format,
+                                       GLenum dst_type)
 {
-   GLuint i;
+   int i;
+   int64_t lum64;
+   int32_t lum32, alpha;
+   bool dst_is_signed;
+   int dst_bits;
+
+   assert(dst_format == GL_LUMINANCE_INTEGER_EXT ||
+          dst_format == GL_LUMINANCE_ALPHA_INTEGER_EXT);
+
+   /* We first compute luminance values as a 64-bit addition of the
+    * 32-bit R,G,B components, then we clamp the result to the dst type size.
+    *
+    * Notice that this operation involves casting the 32-bit R,G,B components
+    * to 64-bit before the addition. Since rgba is defined as a GLuint array
+    * we need to be careful when rgba packs signed data and make sure
+    * that we cast to a 32-bit signed integer values before casting them to
+    * 64-bit signed integers.
+    */
+   dst_is_signed = (dst_type == GL_BYTE || dst_type == GL_SHORT ||
+                    dst_type == GL_INT);
 
-   switch (baseFormat) {
-   case GL_ALPHA:
-      for (i = 0; i < n; i++) {
-         rgba[i][RCOMP] = 0;
-         rgba[i][GCOMP] = 0;
-         rgba[i][BCOMP] = 0;
-      }
-      break;
-   case GL_INTENSITY:
-      /* fall-through */
-   case GL_LUMINANCE:
-      for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0;
-         rgba[i][BCOMP] = 0;
-         rgba[i][ACOMP] = 1;
-      }
-      break;
-   case GL_LUMINANCE_ALPHA:
-      for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0;
-         rgba[i][BCOMP] = 0;
-      }
-      break;
-   case GL_RGB:
-      for (i = 0; i < n; i++) {
-         rgba[i][ACOMP] = 1;
-      }
-      break;
-   case GL_RG:
+   dst_bits = _mesa_sizeof_type(dst_type) * 8;
+   assert(dst_bits > 0);
+
+   switch (dst_format) {
+   case GL_LUMINANCE_INTEGER_EXT:
       for (i = 0; i < n; i++) {
-         rgba[i][BCOMP] = 0;
-         rgba[i][ACOMP] = 1;
+         if (!rgba_is_signed) {
+            lum64 = (uint64_t) rgba[i][RCOMP] +
+                    (uint64_t) rgba[i][GCOMP] +
+                    (uint64_t) rgba[i][BCOMP];
+         } else {
+            lum64 = (int64_t) ((int32_t) rgba[i][RCOMP]) +
+                    (int64_t) ((int32_t) rgba[i][GCOMP]) +
+                    (int64_t) ((int32_t) rgba[i][BCOMP]);
+         }
+         lum32 = convert_integer_luminance64(lum64, dst_bits,
+                                             dst_is_signed, rgba_is_signed);
+         switch (dst_type) {
+         case GL_BYTE:
+         case GL_UNSIGNED_BYTE: {
+            GLbyte *dst = (GLbyte *) dstAddr;
+            dst[i] = lum32;
+         }
+         break;
+         case GL_SHORT:
+         case GL_UNSIGNED_SHORT: {
+            GLshort *dst = (GLshort *) dstAddr;
+            dst[i] = lum32;
+         }
+         break;
+         case GL_INT:
+         case GL_UNSIGNED_INT: {
+            GLint *dst = (GLint *) dstAddr;
+            dst[i] = lum32;
+         }
+         break;
+         }
       }
-      break;
-   case GL_RED:
+      return;
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
       for (i = 0; i < n; i++) {
-         rgba[i][GCOMP] = 0;
-         rgba[i][BCOMP] = 0;
-         rgba[i][ACOMP] = 1;
+         if (!rgba_is_signed) {
+            lum64 = (uint64_t) rgba[i][RCOMP] +
+                    (uint64_t) rgba[i][GCOMP] +
+                    (uint64_t) rgba[i][BCOMP];
+         } else {
+            lum64 = (int64_t) ((int32_t) rgba[i][RCOMP]) +
+                    (int64_t) ((int32_t) rgba[i][GCOMP]) +
+                    (int64_t) ((int32_t) rgba[i][BCOMP]);
+         }
+         lum32 = convert_integer_luminance64(lum64, dst_bits,
+                                             dst_is_signed, rgba_is_signed);
+         alpha = convert_integer(rgba[i][ACOMP], dst_bits,
+                                 dst_is_signed, rgba_is_signed);
+         switch (dst_type) {
+         case GL_BYTE:
+         case GL_UNSIGNED_BYTE: {
+            GLbyte *dst = (GLbyte *) dstAddr;
+            dst[2*i] = lum32;
+            dst[2*i+1] = alpha;
+         }
+         case GL_SHORT:
+         case GL_UNSIGNED_SHORT: {
+            GLshort *dst = (GLshort *) dstAddr;
+            dst[i] = lum32;
+            dst[2*i+1] = alpha;
+         }
+         break;
+         case GL_INT:
+         case GL_UNSIGNED_INT: {
+            GLint *dst = (GLint *) dstAddr;
+            dst[i] = lum32;
+            dst[2*i+1] = alpha;
+         }
+         break;
+         }
       }
-   default:
-      /* no-op */
-      ;
+      return;
+   }
+}
+
+GLfloat *
+_mesa_unpack_color_index_to_rgba_float(struct gl_context *ctx, GLuint dims,
+                                       const void *src, GLenum srcFormat, GLenum srcType,
+                                       int srcWidth, int srcHeight, int srcDepth,
+                                       const struct gl_pixelstore_attrib *srcPacking,
+                                       GLbitfield transferOps)
+{
+   int count, img;
+   GLuint *indexes;
+   GLfloat *rgba, *dstPtr;
+
+   count = srcWidth * srcHeight;
+   indexes = malloc(count * sizeof(GLuint));
+   if (!indexes) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
+      return NULL;
+   }
+
+   rgba = malloc(4 * count * srcDepth * sizeof(GLfloat));
+   if (!rgba) {
+      free(indexes);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "pixel unpacking");
+      return NULL;
    }
+
+   /* Convert indexes to RGBA float */
+   dstPtr = rgba;
+   for (img = 0; img < srcDepth; img++) {
+      const GLubyte *srcPtr =
+         (const GLubyte *) _mesa_image_address(dims, srcPacking, src,
+                                               srcWidth, srcHeight,
+                                               srcFormat, srcType,
+                                               img, 0, 0);
+
+      extract_uint_indexes(count, indexes, srcFormat, srcType, srcPtr, srcPacking);
+
+      if (transferOps & IMAGE_SHIFT_OFFSET_BIT)
+         _mesa_shift_and_offset_ci(ctx, count, indexes);
+
+      _mesa_map_ci_to_rgba(ctx, count, indexes, (float (*)[4])dstPtr);
+
+      /* Don't do RGBA scale/bias or RGBA->RGBA mapping if starting
+       * with color indexes.
+       */
+      transferOps &= ~(IMAGE_SCALE_BIAS_BIT | IMAGE_MAP_COLOR_BIT);
+      _mesa_apply_rgba_transfer_ops(ctx, transferOps, count, (float (*)[4])dstPtr);
+
+      dstPtr += srcHeight * srcWidth * 4;
+   }
+
+   free(indexes);
+
+   return rgba;
 }
 
+GLubyte *
+_mesa_unpack_color_index_to_rgba_ubyte(struct gl_context *ctx, GLuint dims,
+                                       const void *src, GLenum srcFormat, GLenum srcType,
+                                       int srcWidth, int srcHeight, int srcDepth,
+                                       const struct gl_pixelstore_attrib *srcPacking,
+                                       GLbitfield transferOps)
+{
+   GLfloat *rgba;
+   GLubyte *dst;
+   int count, i;
+
+   transferOps |= IMAGE_CLAMP_BIT;
+   rgba = _mesa_unpack_color_index_to_rgba_float(ctx, dims,
+                                                 src, srcFormat, srcType,
+                                                 srcWidth, srcHeight, srcDepth,
+                                                 srcPacking, transferOps);
+
+   count = srcWidth * srcHeight * srcDepth;
+   dst = malloc(count * 4 * sizeof(GLubyte));
+   for (i = 0; i < count; i++) {
+      CLAMPED_FLOAT_TO_UBYTE(dst[i * 4 + 0], rgba[i * 4 + 0]);
+      CLAMPED_FLOAT_TO_UBYTE(dst[i * 4 + 1], rgba[i * 4 + 1]);
+      CLAMPED_FLOAT_TO_UBYTE(dst[i * 4 + 2], rgba[i * 4 + 2]);
+      CLAMPED_FLOAT_TO_UBYTE(dst[i * 4 + 3], rgba[i * 4 + 3]);
+   }
+
+   free(rgba);
 
+   return dst;
+}
diff --git a/mesalib/src/mesa/main/pack.h b/mesalib/src/mesa/main/pack.h
index 2173b652e..ac0a099e3 100644
--- a/mesalib/src/mesa/main/pack.h
+++ b/mesalib/src/mesa/main/pack.h
@@ -41,63 +41,12 @@ _mesa_pack_polygon_stipple(const GLuint pattern[32], GLubyte *dest,
                            const struct gl_pixelstore_attrib *packing);
 
 
-extern GLvoid *
-_mesa_unpack_bitmap(GLint width, GLint height, const GLubyte *pixels,
-                    const struct gl_pixelstore_attrib *packing);
-
 extern void
 _mesa_pack_bitmap(GLint width, GLint height, const GLubyte *source,
                   GLubyte *dest, const struct gl_pixelstore_attrib *packing);
 
 
 extern void
-_mesa_pack_rgba_span_float(struct gl_context *ctx, GLuint n,
-                           GLfloat rgba[][4],
-                           GLenum dstFormat, GLenum dstType, GLvoid *dstAddr,
-                           const struct gl_pixelstore_attrib *dstPacking,
-                           GLbitfield transferOps);
-
-
-extern void
-_mesa_unpack_color_span_ubyte(struct gl_context *ctx,
-                             GLuint n, GLenum dstFormat, GLubyte dest[],
-                             GLenum srcFormat, GLenum srcType,
-                             const GLvoid *source,
-                             const struct gl_pixelstore_attrib *srcPacking,
-                             GLbitfield transferOps);
-
-
-extern void
-_mesa_unpack_color_span_float(struct gl_context *ctx,
-                              GLuint n, GLenum dstFormat, GLfloat dest[],
-                              GLenum srcFormat, GLenum srcType,
-                              const GLvoid *source,
-                              const struct gl_pixelstore_attrib *srcPacking,
-                              GLbitfield transferOps);
-
-extern void
-_mesa_unpack_color_span_uint(struct gl_context *ctx,
-                             GLuint n, GLenum dstFormat, GLuint *dest,
-                             GLenum srcFormat, GLenum srcType,
-                             const GLvoid *source,
-                             const struct gl_pixelstore_attrib *srcPacking);
-
-extern void
-_mesa_unpack_index_span(struct gl_context *ctx, GLuint n,
-                        GLenum dstType, GLvoid *dest,
-                        GLenum srcType, const GLvoid *source,
-                        const struct gl_pixelstore_attrib *srcPacking,
-                        GLbitfield transferOps);
-
-
-extern void
-_mesa_pack_index_span(struct gl_context *ctx, GLuint n,
-                      GLenum dstType, GLvoid *dest, const GLuint *source,
-                      const struct gl_pixelstore_attrib *dstPacking,
-                      GLbitfield transferOps);
-
-
-extern void
 _mesa_unpack_stencil_span(struct gl_context *ctx, GLuint n,
                           GLenum dstType, GLvoid *dest,
                           GLenum srcType, const GLvoid *source,
@@ -136,23 +85,28 @@ _mesa_unpack_image(GLuint dimensions,
                    GLenum format, GLenum type, const GLvoid *pixels,
                    const struct gl_pixelstore_attrib *unpack);
 
-
-void
-_mesa_pack_rgba_span_from_uints(struct gl_context *ctx, GLuint n, GLuint rgba[][4],
-                                GLenum dstFormat, GLenum dstType,
-                                GLvoid *dstAddr);
-
-
-void
-_mesa_pack_rgba_span_from_ints(struct gl_context *ctx, GLuint n, GLint rgba[][4],
-                               GLenum dstFormat, GLenum dstType,
-                               GLvoid *dstAddr);
-
-
 extern void
-_mesa_rebase_rgba_float(GLuint n, GLfloat rgba[][4], GLenum baseFormat);
+_mesa_pack_luminance_from_rgba_float(GLuint n, GLfloat rgba[][4],
+                                     GLvoid *dstAddr, GLenum dst_format,
+                                     GLbitfield transferOps);
 
 extern void
-_mesa_rebase_rgba_uint(GLuint n, GLuint rgba[][4], GLenum baseFormat);
+_mesa_pack_luminance_from_rgba_integer(GLuint n, GLuint rgba[][4], bool rgba_is_signed,
+                                       GLvoid *dstAddr, GLenum dst_format,
+                                       GLenum dst_type);
+
+extern GLfloat *
+_mesa_unpack_color_index_to_rgba_float(struct gl_context *ctx, GLuint dims,
+                                       const void *src, GLenum srcFormat, GLenum srcType,
+                                       int srcWidth, int srcHeight, int srcDepth,
+                                       const struct gl_pixelstore_attrib *srcPacking,
+                                       GLbitfield transferOps);
+
+extern GLubyte *
+_mesa_unpack_color_index_to_rgba_ubyte(struct gl_context *ctx, GLuint dims,
+                                       const void *src, GLenum srcFormat, GLenum srcType,
+                                       int srcWidth, int srcHeight, int srcDepth,
+                                       const struct gl_pixelstore_attrib *srcPacking,
+                                       GLbitfield transferOps);
 
 #endif
diff --git a/mesalib/src/mesa/main/pack_tmp.h b/mesalib/src/mesa/main/pack_tmp.h
deleted file mode 100644
index 0d4eb387d..000000000
--- a/mesalib/src/mesa/main/pack_tmp.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-static void
-FN_NAME(struct gl_context *ctx,
-	DST_TYPE *dst,
-	GLenum dstFormat,
-	SRC_TYPE rgba[][4],
-	int n)
-{
-   int i;
-
-   switch (dstFormat) {
-   case GL_RED_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i] = SRC_CONVERT(rgba[i][RCOMP]);
-      }
-      break;
-
-   case GL_GREEN_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i] = SRC_CONVERT(rgba[i][GCOMP]);
-      }
-      break;
-
-   case GL_BLUE_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i] = SRC_CONVERT(rgba[i][BCOMP]);
-      };
-      break;
-
-   case GL_ALPHA_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i] = SRC_CONVERT(rgba[i][ACOMP]);
-      }
-      break;
-
-   case GL_RG_INTEGER:
-      for (i=0;i<n;i++) {
-	 dst[i*2+0] = SRC_CONVERT(rgba[i][RCOMP]);
-	 dst[i*2+1] = SRC_CONVERT(rgba[i][GCOMP]);
-      }
-      break;
-
-   case GL_RGB_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i*3+0] = SRC_CONVERT(rgba[i][RCOMP]);
-	 dst[i*3+1] = SRC_CONVERT(rgba[i][GCOMP]);
-	 dst[i*3+2] = SRC_CONVERT(rgba[i][BCOMP]);
-      }
-      break;
-
-   case GL_RGBA_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i*4+0] = SRC_CONVERT(rgba[i][RCOMP]);
-	 dst[i*4+1] = SRC_CONVERT(rgba[i][GCOMP]);
-	 dst[i*4+2] = SRC_CONVERT(rgba[i][BCOMP]);
-	 dst[i*4+3] = SRC_CONVERT(rgba[i][ACOMP]);
-      }
-      break;
-
-   case GL_BGR_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i*3+0] = SRC_CONVERT(rgba[i][BCOMP]);
-	 dst[i*3+1] = SRC_CONVERT(rgba[i][GCOMP]);
-	 dst[i*3+2] = SRC_CONVERT(rgba[i][RCOMP]);
-      }
-      break;
-
-   case GL_BGRA_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i*4+0] = SRC_CONVERT(rgba[i][BCOMP]);
-	 dst[i*4+1] = SRC_CONVERT(rgba[i][GCOMP]);
-	 dst[i*4+2] = SRC_CONVERT(rgba[i][RCOMP]);
-	 dst[i*4+3] = SRC_CONVERT(rgba[i][ACOMP]);
-      }
-      break;
-
-   case GL_LUMINANCE_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i] = SRC_CONVERT(rgba[i][RCOMP] +
-			      rgba[i][GCOMP] +
-			      rgba[i][BCOMP]);
-      }
-      break;
-
-   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-      for (i=0;i<n;i++) {
-	 dst[i*2+0] = SRC_CONVERT(rgba[i][RCOMP] +
-				  rgba[i][GCOMP] +
-				  rgba[i][BCOMP]);
-	 dst[i*2+1] = SRC_CONVERT(rgba[i][ACOMP]);
-      }
-      break;
-
-   default:
-      _mesa_problem(ctx,
-         "Unsupported format (%s)",
-         _mesa_lookup_enum_by_nr(dstFormat));
-      break;
-   }
-}
diff --git a/mesalib/src/mesa/main/performance_monitor.c b/mesalib/src/mesa/main/performance_monitor.c
index c02910e31..2d740daf0 100644
--- a/mesalib/src/mesa/main/performance_monitor.c
+++ b/mesalib/src/mesa/main/performance_monitor.c
@@ -42,7 +42,7 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "performance_monitor.h"
-#include "bitset.h"
+#include "util/bitset.h"
 #include "util/ralloc.h"
 
 void
diff --git a/mesalib/src/mesa/main/polygon.c b/mesalib/src/mesa/main/polygon.c
index cdaa24483..a1f0aa02d 100644
--- a/mesalib/src/mesa/main/polygon.c
+++ b/mesalib/src/mesa/main/polygon.c
@@ -235,25 +235,33 @@ _mesa_GetPolygonStipple( GLubyte *dest )
    _mesa_GetnPolygonStippleARB(INT_MAX, dest);
 }
 
-
-void GLAPIENTRY
-_mesa_PolygonOffset( GLfloat factor, GLfloat units )
+void
+_mesa_polygon_offset_clamp(struct gl_context *ctx,
+                           GLfloat factor, GLfloat units, GLfloat clamp)
 {
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (MESA_VERBOSE&VERBOSE_API)
-      _mesa_debug(ctx, "glPolygonOffset %f %f\n", factor, units);
-
    if (ctx->Polygon.OffsetFactor == factor &&
-       ctx->Polygon.OffsetUnits == units)
+       ctx->Polygon.OffsetUnits == units &&
+       ctx->Polygon.OffsetClamp == clamp)
       return;
 
    FLUSH_VERTICES(ctx, _NEW_POLYGON);
    ctx->Polygon.OffsetFactor = factor;
    ctx->Polygon.OffsetUnits = units;
+   ctx->Polygon.OffsetClamp = clamp;
 
    if (ctx->Driver.PolygonOffset)
-      ctx->Driver.PolygonOffset( ctx, factor, units );
+      ctx->Driver.PolygonOffset( ctx, factor, units, clamp );
+}
+
+void GLAPIENTRY
+_mesa_PolygonOffset( GLfloat factor, GLfloat units )
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (MESA_VERBOSE&VERBOSE_API)
+      _mesa_debug(ctx, "glPolygonOffset %f %f\n", factor, units);
+
+   _mesa_polygon_offset_clamp(ctx, factor, units, 0.0);
 }
 
 
@@ -265,6 +273,23 @@ _mesa_PolygonOffsetEXT( GLfloat factor, GLfloat bias )
    _mesa_PolygonOffset(factor, bias * ctx->DrawBuffer->_DepthMaxF );
 }
 
+void GLAPIENTRY
+_mesa_PolygonOffsetClampEXT( GLfloat factor, GLfloat units, GLfloat clamp )
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!ctx->Extensions.EXT_polygon_offset_clamp) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "unsupported function (glPolygonOffsetClampEXT) called");
+      return;
+   }
+
+   if (MESA_VERBOSE&VERBOSE_API)
+      _mesa_debug(ctx, "glPolygonOffsetClampEXT %f %f %f\n", factor, units, clamp);
+
+   _mesa_polygon_offset_clamp(ctx, factor, units, clamp);
+}
+
 
 
 /**********************************************************************/
@@ -292,6 +317,7 @@ void _mesa_init_polygon( struct gl_context * ctx )
    ctx->Polygon.StippleFlag = GL_FALSE;
    ctx->Polygon.OffsetFactor = 0.0F;
    ctx->Polygon.OffsetUnits = 0.0F;
+   ctx->Polygon.OffsetClamp = 0.0F;
    ctx->Polygon.OffsetPoint = GL_FALSE;
    ctx->Polygon.OffsetLine = GL_FALSE;
    ctx->Polygon.OffsetFill = GL_FALSE;
diff --git a/mesalib/src/mesa/main/polygon.h b/mesalib/src/mesa/main/polygon.h
index 530adba4c..41344a2ef 100644
--- a/mesalib/src/mesa/main/polygon.h
+++ b/mesalib/src/mesa/main/polygon.h
@@ -55,12 +55,18 @@ extern void GLAPIENTRY
 _mesa_PolygonOffsetEXT( GLfloat factor, GLfloat bias );
 
 extern void GLAPIENTRY
+_mesa_PolygonOffsetClampEXT( GLfloat factor, GLfloat units, GLfloat clamp );
+
+extern void GLAPIENTRY
 _mesa_PolygonStipple( const GLubyte *mask );
 
 extern void GLAPIENTRY
 _mesa_GetPolygonStipple( GLubyte *mask );
 
-extern void 
+extern void
+_mesa_polygon_offset_clamp(struct gl_context *ctx,
+                           GLfloat factor, GLfloat units, GLfloat clamp);
+extern void
 _mesa_init_polygon( struct gl_context * ctx );
 
 #endif
diff --git a/mesalib/src/mesa/main/querymatrix.c b/mesalib/src/mesa/main/querymatrix.c
index eb36c7604..ef8517571 100644
--- a/mesalib/src/mesa/main/querymatrix.c
+++ b/mesalib/src/mesa/main/querymatrix.c
@@ -37,8 +37,12 @@
 #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16))
 #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0))
 
-#if defined(_MSC_VER)
-#if _MSC_VER < 1800  /* Not required on VS2013 and above. */
+#if defined(fpclassify)
+/* ISO C99 says that fpclassify is a macro.  Assume that any implementation
+ * of fpclassify, whether it's in a C99 compiler or not, will be a macro.
+ */
+#elif defined(_MSC_VER)
+/* Not required on VS2013 and above. */
 /* Oddly, the fpclassify() function doesn't exist in such a form
  * on MSVC.  This is an implementation using slightly different
  * lower-level Windows functions.
@@ -71,16 +75,8 @@ fpclassify(double x)
             return FP_NAN;
     }
 }
-#endif  /* _MSC_VER < 1800 */
-
-#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
-     defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
-     (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) || \
-     (defined(__sun) && defined(__GNUC__)) || defined(ANDROID) || defined(__HAIKU__)
-
-/* fpclassify is available. */
 
-#elif !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600
+#else
 
 enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL}
 fpclassify(double x)
diff --git a/mesalib/src/mesa/main/queryobj.c b/mesalib/src/mesa/main/queryobj.c
index 932359c4e..1b19afe4b 100644
--- a/mesalib/src/mesa/main/queryobj.c
+++ b/mesalib/src/mesa/main/queryobj.c
@@ -142,6 +142,18 @@ _mesa_init_query_object_functions(struct dd_function_table *driver)
    driver->CheckQuery = _mesa_check_query;
 }
 
+static struct gl_query_object **
+get_pipe_stats_binding_point(struct gl_context *ctx,
+                             GLenum target)
+{
+   if (!_mesa_is_desktop_gl(ctx) ||
+       !ctx->Extensions.ARB_pipeline_statistics_query)
+      return NULL;
+
+   const int which = target - GL_VERTICES_SUBMITTED_ARB;
+   assert(which < MAX_PIPELINE_STATISTICS);
+   return &ctx->Query.pipeline_stats[which];
+}
 
 /**
  * Return pointer to the query object binding point for the given target and
@@ -183,6 +195,38 @@ get_query_binding_point(struct gl_context *ctx, GLenum target, GLuint index)
          return &ctx->Query.PrimitivesWritten[index];
       else
          return NULL;
+
+   case GL_VERTICES_SUBMITTED_ARB:
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+         return get_pipe_stats_binding_point(ctx, target);
+
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+      /* GL_GEOMETRY_SHADER_INVOCATIONS is defined in a non-sequential order */
+      target = GL_VERTICES_SUBMITTED_ARB + MAX_PIPELINE_STATISTICS - 1;
+      /* fallthrough */
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+      if (_mesa_has_geometry_shaders(ctx))
+         return get_pipe_stats_binding_point(ctx, target);
+      else
+         return NULL;
+
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+      if (ctx->Extensions.ARB_tessellation_shader)
+         return get_pipe_stats_binding_point(ctx, target);
+      else
+         return NULL;
+
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+      if (_mesa_has_compute_shaders(ctx))
+         return get_pipe_stats_binding_point(ctx, target);
+      else
+         return NULL;
+
    default:
       return NULL;
    }
@@ -553,6 +597,39 @@ _mesa_GetQueryIndexediv(GLenum target, GLuint index, GLenum pname,
          case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
             *params = ctx->Const.QueryCounterBits.PrimitivesWritten;
             break;
+         case GL_VERTICES_SUBMITTED_ARB:
+            *params = ctx->Const.QueryCounterBits.VerticesSubmitted;
+            break;
+         case GL_PRIMITIVES_SUBMITTED_ARB:
+            *params = ctx->Const.QueryCounterBits.PrimitivesSubmitted;
+            break;
+         case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+            *params = ctx->Const.QueryCounterBits.VsInvocations;
+            break;
+         case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+            *params = ctx->Const.QueryCounterBits.TessPatches;
+            break;
+         case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+            *params = ctx->Const.QueryCounterBits.TessInvocations;
+            break;
+         case GL_GEOMETRY_SHADER_INVOCATIONS:
+            *params = ctx->Const.QueryCounterBits.GsInvocations;
+            break;
+         case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+            *params = ctx->Const.QueryCounterBits.GsPrimitives;
+            break;
+         case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+            *params = ctx->Const.QueryCounterBits.FsInvocations;
+            break;
+         case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+            *params = ctx->Const.QueryCounterBits.ComputeInvocations;
+            break;
+         case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+            *params = ctx->Const.QueryCounterBits.ClInPrimitives;
+            break;
+         case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+            *params = ctx->Const.QueryCounterBits.ClOutPrimitives;
+            break;
          default:
             _mesa_problem(ctx,
                           "Unknown target in glGetQueryIndexediv(target = %s)",
@@ -771,6 +848,18 @@ _mesa_init_queryobj(struct gl_context *ctx)
    ctx->Const.QueryCounterBits.Timestamp = 64;
    ctx->Const.QueryCounterBits.PrimitivesGenerated = 64;
    ctx->Const.QueryCounterBits.PrimitivesWritten = 64;
+
+   ctx->Const.QueryCounterBits.VerticesSubmitted = 64;
+   ctx->Const.QueryCounterBits.PrimitivesSubmitted = 64;
+   ctx->Const.QueryCounterBits.VsInvocations = 64;
+   ctx->Const.QueryCounterBits.TessPatches = 64;
+   ctx->Const.QueryCounterBits.TessInvocations = 64;
+   ctx->Const.QueryCounterBits.GsInvocations = 64;
+   ctx->Const.QueryCounterBits.GsPrimitives = 64;
+   ctx->Const.QueryCounterBits.FsInvocations = 64;
+   ctx->Const.QueryCounterBits.ComputeInvocations = 64;
+   ctx->Const.QueryCounterBits.ClInPrimitives = 64;
+   ctx->Const.QueryCounterBits.ClOutPrimitives = 64;
 }
 
 
diff --git a/mesalib/src/mesa/main/rastpos.c b/mesalib/src/mesa/main/rastpos.c
index a9a6ceec0..2027a9bd0 100644
--- a/mesalib/src/mesa/main/rastpos.c
+++ b/mesalib/src/mesa/main/rastpos.c
@@ -490,7 +490,7 @@ void glWindowPos4fMESA( GLfloat x, GLfloat y, GLfloat z, GLfloat w )
  */
 void _mesa_init_rastpos( struct gl_context * ctx )
 {
-   int i;
+   unsigned i;
 
    ASSIGN_4V( ctx->Current.RasterPos, 0.0, 0.0, 0.0, 1.0 );
    ctx->Current.RasterDistance = 0.0;
diff --git a/mesalib/src/mesa/main/readpix.c b/mesalib/src/mesa/main/readpix.c
index b09cf5499..ca4b9431b 100644
--- a/mesalib/src/mesa/main/readpix.c
+++ b/mesalib/src/mesa/main/readpix.c
@@ -39,6 +39,8 @@
 #include "state.h"
 #include "glformats.h"
 #include "fbobject.h"
+#include "format_utils.h"
+#include "pixeltransfer.h"
 
 
 /**
@@ -405,174 +407,217 @@ read_stencil_pixels( struct gl_context *ctx,
    ctx->Driver.UnmapRenderbuffer(ctx, rb);
 }
 
-
-/**
- * Try to do glReadPixels of RGBA data using swizzle.
- * \return GL_TRUE if successful, GL_FALSE otherwise (use the slow path)
+/*
+ * Read R, G, B, A, RGB, L, or LA pixels.
  */
-static GLboolean
-read_rgba_pixels_swizzle(struct gl_context *ctx,
-                         GLint x, GLint y,
-                         GLsizei width, GLsizei height,
-                         GLenum format, GLenum type,
-                         GLvoid *pixels,
-                         const struct gl_pixelstore_attrib *packing)
+static void
+read_rgba_pixels( struct gl_context *ctx,
+                  GLint x, GLint y,
+                  GLsizei width, GLsizei height,
+                  GLenum format, GLenum type, GLvoid *pixels,
+                  const struct gl_pixelstore_attrib *packing )
 {
-   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+   GLbitfield transferOps;
+   bool dst_is_integer, dst_is_luminance, needs_rebase;
+   int dst_stride, src_stride, rb_stride;
+   uint32_t dst_format, src_format;
    GLubyte *dst, *map;
-   int dstStride, stride, j;
-   GLboolean swizzle_rb = GL_FALSE, copy_xrgb = GL_FALSE;
-
-   /* XXX we could check for other swizzle/special cases here as needed */
-   if (rb->Format == MESA_FORMAT_R8G8B8A8_UNORM &&
-       format == GL_BGRA &&
-       type == GL_UNSIGNED_INT_8_8_8_8_REV &&
-       !ctx->Pack.SwapBytes) {
-      swizzle_rb = GL_TRUE;
-   }
-   else if (rb->Format == MESA_FORMAT_B8G8R8X8_UNORM &&
-       format == GL_BGRA &&
-       type == GL_UNSIGNED_INT_8_8_8_8_REV &&
-       !ctx->Pack.SwapBytes) {
-      copy_xrgb = GL_TRUE;
-   }
-   else {
-      return GL_FALSE;
-   }
+   mesa_format rb_format;
+   bool needs_rgba;
+   void *rgba, *src;
+   bool src_is_uint = false;
+   uint8_t rebase_swizzle[4];
+   struct gl_framebuffer *fb = ctx->ReadBuffer;
+   struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
 
-   dstStride = _mesa_image_row_stride(packing, width, format, type);
+   if (!rb)
+      return;
+
+   transferOps = get_readpixels_transfer_ops(ctx, rb->Format, format, type,
+                                             GL_FALSE);
+   /* Describe the dst format */
+   dst_is_integer = _mesa_is_enum_format_integer(format);
+   dst_stride = _mesa_image_row_stride(packing, width, format, type);
+   dst_format = _mesa_format_from_format_and_type(format, type);
+   dst_is_luminance = format == GL_LUMINANCE ||
+                      format == GL_LUMINANCE_ALPHA ||
+                      format == GL_LUMINANCE_INTEGER_EXT ||
+                      format == GL_LUMINANCE_ALPHA_INTEGER_EXT;
    dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-					   format, type, 0, 0);
+                                           format, type, 0, 0);
 
+   /* Map the source render buffer */
    ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
-			       &map, &stride);
+                               &map, &rb_stride);
    if (!map) {
       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
-      return GL_TRUE;  /* don't bother trying the slow path */
+      return;
    }
+   rb_format = _mesa_get_srgb_format_linear(rb->Format);
 
-   if (swizzle_rb) {
-      /* swap R/B */
-      for (j = 0; j < height; j++) {
-         int i;
-         for (i = 0; i < width; i++) {
-            GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map;
-            GLuint pixel = map4[i];
-            dst4[i] = (pixel & 0xff00ff00)
-                   | ((pixel & 0x00ff0000) >> 16)
-                   | ((pixel & 0x000000ff) << 16);
-         }
-         dst += dstStride;
-         map += stride;
-      }
-   } else if (copy_xrgb) {
-      /* convert xrgb -> argb */
-      for (j = 0; j < height; j++) {
-         GLuint *dst4 = (GLuint *) dst, *map4 = (GLuint *) map;
-         int i;
-         for (i = 0; i < width; i++) {
-            dst4[i] = map4[i] | 0xff000000;  /* set A=0xff */
-         }
-         dst += dstStride;
-         map += stride;
-      }
+   /*
+    * Depending on the base formats involved in the conversion we might need to
+    * rebase some values, so for these formats we compute a rebase swizzle.
+    */
+   if (rb->_BaseFormat == GL_LUMINANCE || rb->_BaseFormat == GL_INTENSITY) {
+      needs_rebase = true;
+      rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
+   } else if (rb->_BaseFormat == GL_LUMINANCE_ALPHA) {
+      needs_rebase = true;
+      rebase_swizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebase_swizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebase_swizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebase_swizzle[3] = MESA_FORMAT_SWIZZLE_W;
+   } else if (_mesa_get_format_base_format(rb_format) != rb->_BaseFormat) {
+      needs_rebase =
+         _mesa_compute_rgba2base2rgba_component_mapping(rb->_BaseFormat,
+                                                        rebase_swizzle);
+   } else {
+      needs_rebase = false;
    }
 
-   ctx->Driver.UnmapRenderbuffer(ctx, rb);
-
-   return GL_TRUE;
-}
-
-static void
-slow_read_rgba_pixels( struct gl_context *ctx,
-		       GLint x, GLint y,
-		       GLsizei width, GLsizei height,
-		       GLenum format, GLenum type,
-		       GLvoid *pixels,
-		       const struct gl_pixelstore_attrib *packing,
-		       GLbitfield transferOps )
-{
-   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
-   const mesa_format rbFormat = _mesa_get_srgb_format_linear(rb->Format);
-   void *rgba;
-   GLubyte *dst, *map;
-   int dstStride, stride, j;
-   GLboolean dst_is_integer = _mesa_is_enum_format_integer(format);
-   GLboolean dst_is_uint = _mesa_is_format_unsigned(rbFormat);
-
-   dstStride = _mesa_image_row_stride(packing, width, format, type);
-   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
-					   format, type, 0, 0);
-
-   ctx->Driver.MapRenderbuffer(ctx, rb, x, y, width, height, GL_MAP_READ_BIT,
-			       &map, &stride);
-   if (!map) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
-      return;
-   }
+   /* Since _mesa_format_convert does not handle transferOps we need to handle
+    * them before we call the function. This requires to convert to RGBA float
+    * first so we can call _mesa_apply_rgba_transfer_ops. If the dst format is
+    * integer transferOps do not apply.
+    *
+    * Converting to luminance also requires converting to RGBA first, so we can
+    * then compute luminance values as L=R+G+B. Notice that this is different
+    * from GetTexImage, where we compute L=R.
+    */
+   assert(!transferOps || (transferOps && !dst_is_integer));
 
-   rgba = malloc(width * MAX_PIXEL_BYTES);
-   if (!rgba)
-      goto done;
+   needs_rgba = transferOps || dst_is_luminance;
+   rgba = NULL;
+   if (needs_rgba) {
+      uint32_t rgba_format;
+      int rgba_stride;
+      bool need_convert;
 
-   for (j = 0; j < height; j++) {
+      /* Convert to RGBA float or int/uint depending on the type of the src */
       if (dst_is_integer) {
-	 _mesa_unpack_uint_rgba_row(rbFormat, width, map, (GLuint (*)[4]) rgba);
-         _mesa_rebase_rgba_uint(width, (GLuint (*)[4]) rgba,
-                                rb->_BaseFormat);
-         if (dst_is_uint) {
-            _mesa_pack_rgba_span_from_uints(ctx, width, (GLuint (*)[4]) rgba, format,
-                                            type, dst);
+         src_is_uint = _mesa_is_format_unsigned(rb_format);
+         if (src_is_uint) {
+            rgba_format = RGBA32_UINT;
+            rgba_stride = width * 4 * sizeof(GLuint);
          } else {
-            _mesa_pack_rgba_span_from_ints(ctx, width, (GLint (*)[4]) rgba, format,
-                                           type, dst);
+            rgba_format = RGBA32_INT;
+            rgba_stride = width * 4 * sizeof(GLint);
          }
       } else {
-	 _mesa_unpack_rgba_row(rbFormat, width, map, (GLfloat (*)[4]) rgba);
-         _mesa_rebase_rgba_float(width, (GLfloat (*)[4]) rgba,
-                                 rb->_BaseFormat);
-	 _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
-                                    type, dst, packing, transferOps);
+         rgba_format = RGBA32_FLOAT;
+         rgba_stride = width * 4 * sizeof(GLfloat);
       }
-      dst += dstStride;
-      map += stride;
-   }
-
-   free(rgba);
-
-done:
-   ctx->Driver.UnmapRenderbuffer(ctx, rb);
-}
 
-/*
- * Read R, G, B, A, RGB, L, or LA pixels.
- */
-static void
-read_rgba_pixels( struct gl_context *ctx,
-                  GLint x, GLint y,
-                  GLsizei width, GLsizei height,
-                  GLenum format, GLenum type, GLvoid *pixels,
-                  const struct gl_pixelstore_attrib *packing )
-{
-   GLbitfield transferOps;
-   struct gl_framebuffer *fb = ctx->ReadBuffer;
-   struct gl_renderbuffer *rb = fb->_ColorReadBuffer;
+      /* If we are lucky and the dst format matches the RGBA format we need to
+       * convert to, then we can convert directly into the dst buffer and avoid
+       * the final conversion/copy from the rgba buffer to the dst buffer.
+       */
+      if (dst_format == rgba_format) {
+         need_convert = false;
+         rgba = dst;
+      } else {
+         need_convert = true;
+         rgba = malloc(height * rgba_stride);
+         if (!rgba) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
+            goto done_unmap;
+         }
+      }
 
-   if (!rb)
-      return;
+      /* Convert to RGBA now */
+      _mesa_format_convert(rgba, rgba_format, rgba_stride,
+                           map, rb_format, rb_stride,
+                           width, height,
+                           needs_rebase ? rebase_swizzle : NULL);
+
+      /* Handle transfer ops if necessary */
+      if (transferOps)
+         _mesa_apply_rgba_transfer_ops(ctx, transferOps, width * height, rgba);
+
+      /* If we had to rebase, we have already taken care of that */
+      needs_rebase = false;
+
+      /* If we were lucky and our RGBA conversion matches the dst format, then
+       * we are done.
+       */
+      if (!need_convert)
+         goto done_swap;
+
+      /* Otherwise, we need to convert from RGBA to dst next */
+      src = rgba;
+      src_format = rgba_format;
+      src_stride = rgba_stride;
+   } else {
+      /* No RGBA conversion needed, convert directly to dst */
+      src = map;
+      src_format = rb_format;
+      src_stride = rb_stride;
+   }
 
-   transferOps = get_readpixels_transfer_ops(ctx, rb->Format, format, type,
-                                             GL_FALSE);
+   /* Do the conversion.
+    *
+    * If the dst format is Luminance, we need to do the conversion by computing
+    * L=R+G+B values.
+    */
+   if (!dst_is_luminance) {
+      _mesa_format_convert(dst, dst_format, dst_stride,
+                           src, src_format, src_stride,
+                           width, height,
+                           needs_rebase ? rebase_swizzle : NULL);
+   } else if (!dst_is_integer) {
+      /* Compute float Luminance values from RGBA float */
+      int luminance_stride, luminance_bytes;
+      void *luminance;
+      uint32_t luminance_format;
+
+      luminance_stride = width * sizeof(GL_FLOAT);
+      if (format == GL_LUMINANCE_ALPHA)
+         luminance_stride *= 2;
+      luminance_bytes = height * luminance_stride;
+      luminance = malloc(luminance_bytes);
+      if (!luminance) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glReadPixels");
+         free(rgba);
+         goto done_unmap;
+      }
+      _mesa_pack_luminance_from_rgba_float(width * height, src,
+                                           luminance, format, transferOps);
+
+      /* Convert from Luminance float to dst (this will hadle type conversion
+       * from float to the type of dst if necessary)
+       */
+      luminance_format = _mesa_format_from_format_and_type(format, GL_FLOAT);
+      _mesa_format_convert(dst, dst_format, dst_stride,
+                           luminance, luminance_format, luminance_stride,
+                           width, height, NULL);
+   } else {
+      _mesa_pack_luminance_from_rgba_integer(width * height, src, !src_is_uint,
+                                             dst, format, type);
+   }
 
-   /* Try the optimized paths first. */
-   if (!transferOps &&
-       read_rgba_pixels_swizzle(ctx, x, y, width, height,
-                                    format, type, pixels, packing)) {
-      return;
+   if (rgba)
+      free(rgba);
+
+done_swap:
+   /* Handle byte swapping if required */
+   if (packing->SwapBytes) {
+      GLint swapSize = _mesa_sizeof_packed_type(type);
+      if (swapSize == 2 || swapSize == 4) {
+         int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+         assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+         if (swapSize == 2)
+            _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
+         else if (swapSize == 4)
+            _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
+      }
    }
 
-   slow_read_rgba_pixels(ctx, x, y, width, height,
-			 format, type, pixels, packing, transferOps);
+done_unmap:
+   ctx->Driver.UnmapRenderbuffer(ctx, rb);
 }
 
 /**
diff --git a/mesalib/src/mesa/main/renderbuffer.c b/mesalib/src/mesa/main/renderbuffer.c
index 0bc7f2b96..98f3c13b5 100644
--- a/mesalib/src/mesa/main/renderbuffer.c
+++ b/mesalib/src/mesa/main/renderbuffer.c
@@ -38,6 +38,8 @@
 void
 _mesa_init_renderbuffer(struct gl_renderbuffer *rb, GLuint name)
 {
+   GET_CURRENT_CONTEXT(ctx);
+
    mtx_init(&rb->Mutex, mtx_plain);
 
    rb->ClassID = 0;
@@ -53,7 +55,23 @@ _mesa_init_renderbuffer(struct gl_renderbuffer *rb, GLuint name)
    rb->Width = 0;
    rb->Height = 0;
    rb->Depth = 0;
-   rb->InternalFormat = GL_RGBA;
+
+   /* In GL 3, the initial format is GL_RGBA according to Table 6.26
+    * on page 302 of the GL 3.3 spec.
+    *
+    * In GLES 3, the initial format is GL_RGBA4 according to Table 6.15
+    * on page 258 of the GLES 3.0.4 spec.
+    *
+    * If the context is current, set the initial format based on the
+    * specs. If the context is not current, we cannot determine the
+    * API, so default to GL_RGBA.
+    */
+   if (ctx && _mesa_is_gles3(ctx)) {
+      rb->InternalFormat = GL_RGBA4;
+   } else {
+      rb->InternalFormat = GL_RGBA;
+   }
+
    rb->Format = MESA_FORMAT_NONE;
 }
 
diff --git a/mesalib/src/mesa/main/samplerobj.c b/mesalib/src/mesa/main/samplerobj.c
index 18a14d89a..d62a06bf1 100644
--- a/mesalib/src/mesa/main/samplerobj.c
+++ b/mesalib/src/mesa/main/samplerobj.c
@@ -732,8 +732,16 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum pname, GLint param)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameteri(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glSamplerParameteri(sampler %u)", sampler);
       return;
    }
 
@@ -817,8 +825,16 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum pname, GLfloat param)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameterf(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glSamplerParameterf(sampler %u)", sampler);
       return;
    }
 
@@ -901,8 +917,16 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum pname, const GLint *params)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameteriv(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glSamplerParameteriv(sampler %u)", sampler);
       return;
    }
 
@@ -993,8 +1017,16 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum pname, const GLfloat *params)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glSamplerParameterfv(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glSamplerParameterfv(sampler %u)", sampler);
       return;
    }
 
@@ -1249,8 +1281,16 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetSamplerParameteriv(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glGetSamplerParameteriv(sampler %u)", sampler);
       return;
    }
 
@@ -1271,13 +1311,22 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
       *params = sampObj->MagFilter;
       break;
    case GL_TEXTURE_MIN_LOD:
-      *params = (GLint) sampObj->MinLod;
+      /* GL spec 'Data Conversions' section specifies that floating-point
+       * value in integer Get function is rounded to nearest integer
+       */
+      *params = IROUND(sampObj->MinLod);
       break;
    case GL_TEXTURE_MAX_LOD:
-      *params = (GLint) sampObj->MaxLod;
+      /* GL spec 'Data Conversions' section specifies that floating-point
+       * value in integer Get function is rounded to nearest integer
+       */
+      *params = IROUND(sampObj->MaxLod);
       break;
    case GL_TEXTURE_LOD_BIAS:
-      *params = (GLint) sampObj->LodBias;
+      /* GL spec 'Data Conversions' section specifies that floating-point
+       * value in integer Get function is rounded to nearest integer
+       */
+      *params = IROUND(sampObj->LodBias);
       break;
    case GL_TEXTURE_COMPARE_MODE:
       if (!ctx->Extensions.ARB_shadow)
@@ -1290,7 +1339,10 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
       *params = sampObj->CompareFunc;
       break;
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
-      *params = (GLint) sampObj->MaxAnisotropy;
+      /* GL spec 'Data Conversions' section specifies that floating-point
+       * value in integer Get function is rounded to nearest integer
+       */
+      *params = IROUND(sampObj->MaxAnisotropy);
       break;
    case GL_TEXTURE_BORDER_COLOR:
       params[0] = FLOAT_TO_INT(sampObj->BorderColor.f[0]);
@@ -1327,8 +1379,16 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params)
 
    sampObj = _mesa_lookup_samplerobj(ctx, sampler);
    if (!sampObj) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glGetSamplerParameterfv(sampler %u)",
-                  sampler);
+      /* '3.8.2 Sampler Objects' section of the GL-ES 3.0 specification states:
+       *
+       *     "An INVALID_OPERATION error is generated if sampler is not the name
+       *     of a sampler object previously returned from a call to GenSamplers."
+       *
+       * In desktop GL, an GL_INVALID_VALUE is returned instead.
+       */
+      _mesa_error(ctx, (_mesa_is_gles(ctx) ?
+                        GL_INVALID_OPERATION : GL_INVALID_VALUE),
+                  "glGetSamplerParameterfv(sampler %u)", sampler);
       return;
    }
 
diff --git a/mesalib/src/mesa/main/shader_query.cpp b/mesalib/src/mesa/main/shader_query.cpp
index 766ad2965..df9081b73 100644
--- a/mesalib/src/mesa/main/shader_query.cpp
+++ b/mesalib/src/mesa/main/shader_query.cpp
@@ -109,6 +109,11 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index,
    GET_CURRENT_CONTEXT(ctx);
    struct gl_shader_program *shProg;
 
+   if (maxLength < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveAttrib(maxLength < 0)");
+      return;
+   }
+
    shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveAttrib");
    if (!shProg)
       return;
diff --git a/mesalib/src/mesa/main/shaderapi.c b/mesalib/src/mesa/main/shaderapi.c
index 6d831f762..52eab4655 100644
--- a/mesalib/src/mesa/main/shaderapi.c
+++ b/mesalib/src/mesa/main/shaderapi.c
@@ -307,7 +307,7 @@ create_shader(struct gl_context *ctx, GLenum type)
 }
 
 
-static GLuint 
+static GLuint
 create_shader_program(struct gl_context *ctx)
 {
    GLuint name;
@@ -326,8 +326,9 @@ create_shader_program(struct gl_context *ctx)
 
 
 /**
- * Named w/ "2" to indicate OpenGL 2.x vs GL_ARB_fragment_programs's
- * DeleteProgramARB.
+ * Delete a shader program.  Actually, just decrement the program's
+ * reference count and mark it as DeletePending.
+ * Used to implement glDeleteProgram() and glDeleteObjectARB().
  */
 static void
 delete_shader_program(struct gl_context *ctx, GLuint name)
@@ -430,9 +431,7 @@ detach_shader(struct gl_context *ctx, GLuint program, GLuint shader)
    /* not found */
    {
       GLenum err;
-      if (is_shader(ctx, shader))
-         err = GL_INVALID_OPERATION;
-      else if (is_program(ctx, shader))
+      if (is_shader(ctx, shader) || is_program(ctx, shader))
          err = GL_INVALID_OPERATION;
       else
          err = GL_INVALID_VALUE;
@@ -449,8 +448,16 @@ static void
 get_attached_shaders(struct gl_context *ctx, GLuint program, GLsizei maxCount,
                      GLsizei *count, GLuint *obj)
 {
-   struct gl_shader_program *shProg =
+   struct gl_shader_program *shProg;
+
+   if (maxCount < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetAttachedShaders(maxCount < 0)");
+      return;
+   }
+
+   shProg =
       _mesa_lookup_shader_program_err(ctx, program, "glGetAttachedShaders");
+
    if (shProg) {
       GLuint i;
       for (i = 0; i < (GLuint) maxCount && i < shProg->NumShaders; i++) {
@@ -512,7 +519,8 @@ check_gs_query(struct gl_context *ctx, const struct gl_shader_program *shProg)
  * programs (see glGetProgramivARB).
  */
 static void
-get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, GLint *params)
+get_programiv(struct gl_context *ctx, GLuint program, GLenum pname,
+              GLint *params)
 {
    struct gl_shader_program *shProg
       = _mesa_lookup_shader_program(ctx, program);
@@ -532,7 +540,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, GLint *param
    /* Are uniform buffer objects available in this context?
     */
    const bool has_ubo =
-      (ctx->API == API_OPENGL_COMPAT && ctx->Extensions.ARB_uniform_buffer_object)
+      (ctx->API == API_OPENGL_COMPAT &&
+       ctx->Extensions.ARB_uniform_buffer_object)
       || ctx->API == API_OPENGL_CORE
       || _mesa_is_gles3(ctx);
 
@@ -600,7 +609,8 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, GLint *param
       for (i = 0; i < shProg->TransformFeedback.NumVarying; i++) {
          /* Add one for the terminating NUL character.
           */
-         const GLint len = strlen(shProg->TransformFeedback.VaryingNames[i]) + 1;
+         const GLint len =
+            strlen(shProg->TransformFeedback.VaryingNames[i]) + 1;
 
          if (len > max_len)
             max_len = len;
@@ -754,8 +764,7 @@ static void
 get_program_info_log(struct gl_context *ctx, GLuint program, GLsizei bufSize,
                      GLsizei *length, GLchar *infoLog)
 {
-   struct gl_shader_program *shProg
-      = _mesa_lookup_shader_program(ctx, program);
+   struct gl_shader_program *shProg = _mesa_lookup_shader_program(ctx, program);
    if (!shProg) {
       _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramInfoLog(program)");
       return;
@@ -785,6 +794,12 @@ get_shader_source(struct gl_context *ctx, GLuint shader, GLsizei maxLength,
                   GLsizei *length, GLchar *sourceOut)
 {
    struct gl_shader *sh;
+
+   if (maxLength < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetShaderSource(bufSize < 0)");
+      return;
+   }
+
    sh = _mesa_lookup_shader_err(ctx, shader, "glGetShaderSource");
    if (!sh) {
       return;
@@ -870,7 +885,6 @@ compile_shader(struct gl_context *ctx, GLuint shaderObj)
          }
          fflush(stderr);
       }
-
    }
 
    if (!sh->CompileStatus) {
@@ -917,7 +931,7 @@ link_program(struct gl_context *ctx, GLuint program)
 
    _mesa_glsl_link_shader(ctx, shProg);
 
-   if (shProg->LinkStatus == GL_FALSE && 
+   if (shProg->LinkStatus == GL_FALSE &&
        (ctx->_Shader->Flags & GLSL_REPORT_ERRORS)) {
       _mesa_debug(ctx, "Error linking program %u:\n%s\n",
                   shProg->Name, shProg->InfoLog);
@@ -985,8 +999,7 @@ _mesa_active_program(struct gl_context *ctx, struct gl_shader_program *shProg,
    }
 }
 
-/**
- */
+
 static void
 use_shader_program(struct gl_context *ctx, GLenum type,
                    struct gl_shader_program *shProg,
@@ -1033,6 +1046,7 @@ use_shader_program(struct gl_context *ctx, GLenum type,
    }
 }
 
+
 /**
  * Use the named shader program for subsequent rendering.
  */
@@ -1070,7 +1084,7 @@ validate_shader_program(const struct gl_shader_program *shProg,
 
      any active sampler in the current program object refers to a texture
      image unit where fixed-function fragment processing accesses a
-     texture target that does not match the sampler type, or 
+     texture target that does not match the sampler type, or
 
      the sum of the number of active samplers in the program and the
      number of texture image units enabled for fixed-function fragment
@@ -1078,7 +1092,6 @@ validate_shader_program(const struct gl_shader_program *shProg,
      image units allowed.
    */
 
-
    /*
     * Check: any two active samplers in the current program object are of
     * different types, but refer to the same texture image unit,
@@ -1672,7 +1685,7 @@ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat,
    (void) binaryformat;
    (void) binary;
    (void) length;
-   _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+   _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderBinary");
 }
 
 
@@ -1681,30 +1694,46 @@ _mesa_GetProgramBinary(GLuint program, GLsizei bufSize, GLsizei *length,
                        GLenum *binaryFormat, GLvoid *binary)
 {
    struct gl_shader_program *shProg;
+   GLsizei length_dummy;
    GET_CURRENT_CONTEXT(ctx);
 
-   shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetProgramBinary");
-   if (!shProg)
-      return;
-
-   if (!shProg->LinkStatus) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetProgramBinary(program %u not linked)",
-                  shProg->Name);
-      return;
-   }
-
    if (bufSize < 0){
       _mesa_error(ctx, GL_INVALID_VALUE, "glGetProgramBinary(bufSize < 0)");
       return;
    }
 
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetProgramBinary");
+   if (!shProg)
+      return;
+
    /* The ARB_get_program_binary spec says:
     *
     *     "If <length> is NULL, then no length is returned."
+    *
+    * Ensure that length always points to valid storage to avoid multiple NULL
+    * pointer checks below.
     */
    if (length != NULL)
+      length = &length_dummy;
+
+
+   /* The ARB_get_program_binary spec says:
+    *
+    *     "When a program object's LINK_STATUS is FALSE, its program binary
+    *     length is zero, and a call to GetProgramBinary will generate an
+    *     INVALID_OPERATION error.
+    */
+   if (!shProg->LinkStatus) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetProgramBinary(program %u not linked)",
+                  shProg->Name);
       *length = 0;
+      return;
+   }
+
+   *length = 0;
+   _mesa_error(ctx, GL_INVALID_OPERATION,
+               "glGetProgramBinary(driver supports zero binary formats)");
 
    (void) binaryFormat;
    (void) binary;
@@ -1723,8 +1752,31 @@ _mesa_ProgramBinary(GLuint program, GLenum binaryFormat,
 
    (void) binaryFormat;
    (void) binary;
-   (void) length;
-   _mesa_error(ctx, GL_INVALID_OPERATION, __FUNCTION__);
+
+   /* Section 2.3.1 (Errors) of the OpenGL 4.5 spec says:
+    *
+    *     "If a negative number is provided where an argument of type sizei or
+    *     sizeiptr is specified, an INVALID_VALUE error is generated."
+    */
+   if (length < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glProgramBinary(length < 0)");
+      return;
+   }
+
+   /* The ARB_get_program_binary spec says:
+    *
+    *     "<binaryFormat> and <binary> must be those returned by a previous
+    *     call to GetProgramBinary, and <length> must be the length of the
+    *     program binary as returned by GetProgramBinary or GetProgramiv with
+    *     <pname> PROGRAM_BINARY_LENGTH. Loading the program binary will fail,
+    *     setting the LINK_STATUS of <program> to FALSE, if these conditions
+    *     are not met."
+    *
+    * Since any value of binaryFormat passed "is not one of those specified as
+    * allowable for [this] command, an INVALID_ENUM error is generated."
+    */
+   shProg->LinkStatus = GL_FALSE;
+   _mesa_error(ctx, GL_INVALID_ENUM, "glProgramBinary");
 }
 
 
@@ -1755,12 +1807,7 @@ _mesa_ProgramParameteri(GLuint program, GLenum pname, GLint value)
        *     ProgramParameteri is not TRUE or FALSE."
        */
       if (value != GL_TRUE && value != GL_FALSE) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glProgramParameteri(pname=%s, value=%d): "
-                     "value must be 0 or 1.",
-                     _mesa_lookup_enum_by_nr(pname),
-                     value);
-         return;
+         goto invalid_value;
       }
 
       /* No need to notify the driver.  Any changes will actually take effect
@@ -1791,24 +1838,26 @@ _mesa_ProgramParameteri(GLuint program, GLenum pname, GLint value)
        * Chapter 7.3 Program Objects
        */
       if (value != GL_TRUE && value != GL_FALSE) {
-         _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glProgramParameteri(pname=%s, value=%d): "
-                     "value must be 0 or 1.",
-                     _mesa_lookup_enum_by_nr(pname),
-                     value);
-         return;
+         goto invalid_value;
       }
       shProg->SeparateShader = value;
       return;
 
    default:
-      break;
+      _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteri(pname=%s)",
+                  _mesa_lookup_enum_by_nr(pname));
+      return;
    }
 
-   _mesa_error(ctx, GL_INVALID_ENUM, "glProgramParameteri(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+invalid_value:
+   _mesa_error(ctx, GL_INVALID_VALUE,
+               "glProgramParameteri(pname=%s, value=%d): "
+               "value must be 0 or 1.",
+               _mesa_lookup_enum_by_nr(pname),
+               value);
 }
 
+
 void
 _mesa_use_shader_program(struct gl_context *ctx, GLenum type,
                          struct gl_shader_program *shProg,
diff --git a/mesalib/src/mesa/main/shaderobj.c b/mesalib/src/mesa/main/shaderobj.c
index 81bd7829d..02ccf450b 100644
--- a/mesalib/src/mesa/main/shaderobj.c
+++ b/mesalib/src/mesa/main/shaderobj.c
@@ -193,9 +193,9 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint name, const char *caller)
  * Then set ptr to point to shProg, incrementing its refcount.
  */
 void
-_mesa_reference_shader_program(struct gl_context *ctx,
-                               struct gl_shader_program **ptr,
-                               struct gl_shader_program *shProg)
+_mesa_reference_shader_program_(struct gl_context *ctx,
+                                struct gl_shader_program **ptr,
+                                struct gl_shader_program *shProg)
 {
    assert(ptr);
    if (*ptr == shProg) {
diff --git a/mesalib/src/mesa/main/shaderobj.h b/mesalib/src/mesa/main/shaderobj.h
index 05ddfeb50..92f7a33ee 100644
--- a/mesalib/src/mesa/main/shaderobj.h
+++ b/mesalib/src/mesa/main/shaderobj.h
@@ -62,9 +62,20 @@ _mesa_lookup_shader_err(struct gl_context *ctx, GLuint name, const char *caller)
 
 
 extern void
-_mesa_reference_shader_program(struct gl_context *ctx,
+_mesa_reference_shader_program_(struct gl_context *ctx,
                                struct gl_shader_program **ptr,
                                struct gl_shader_program *shProg);
+
+static inline void
+_mesa_reference_shader_program(struct gl_context *ctx,
+                               struct gl_shader_program **ptr,
+                               struct gl_shader_program *shProg)
+{
+   if (*ptr != shProg)
+      _mesa_reference_shader_program_(ctx, ptr, shProg);
+}
+
+
 extern void
 _mesa_init_shader(struct gl_context *ctx, struct gl_shader *shader);
 
diff --git a/mesalib/src/mesa/main/shared.c b/mesalib/src/mesa/main/shared.c
index f74a8232f..ccf5355f3 100644
--- a/mesalib/src/mesa/main/shared.c
+++ b/mesalib/src/mesa/main/shared.c
@@ -36,12 +36,12 @@
 #include "program/program.h"
 #include "dlist.h"
 #include "samplerobj.h"
-#include "set.h"
 #include "shaderapi.h"
 #include "shaderobj.h"
 #include "syncobj.h"
 
 #include "util/hash_table.h"
+#include "util/set.h"
 
 /**
  * Allocate and initialize a shared context state structure.
@@ -119,7 +119,8 @@ _mesa_alloc_shared_state(struct gl_context *ctx)
    shared->FrameBuffers = _mesa_NewHashTable();
    shared->RenderBuffers = _mesa_NewHashTable();
 
-   shared->SyncObjects = _mesa_set_create(NULL, _mesa_key_pointer_equal);
+   shared->SyncObjects = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                          _mesa_key_pointer_equal);
 
    return shared;
 }
diff --git a/mesalib/src/mesa/main/stencil.c b/mesalib/src/mesa/main/stencil.c
index f65116abe..2a19a17b8 100644
--- a/mesalib/src/mesa/main/stencil.c
+++ b/mesalib/src/mesa/main/stencil.c
@@ -573,12 +573,24 @@ _mesa_init_stencil(struct gl_context *ctx)
    ctx->Stencil.Ref[0] = 0;
    ctx->Stencil.Ref[1] = 0;
    ctx->Stencil.Ref[2] = 0;
-   ctx->Stencil.ValueMask[0] = ~0U;
-   ctx->Stencil.ValueMask[1] = ~0U;
-   ctx->Stencil.ValueMask[2] = ~0U;
-   ctx->Stencil.WriteMask[0] = ~0U;
-   ctx->Stencil.WriteMask[1] = ~0U;
-   ctx->Stencil.WriteMask[2] = ~0U;
+
+   /* 4.1.4 Stencil Test section of the GL-ES 3.0 specification says:
+    *
+    *     "In the initial state, [...] the front and back stencil mask are both
+    *     set to the value 2^s − 1, where s is greater than or equal to the
+    *     number of bits in the deepest stencil buffer* supported by the GL
+    *     implementation."
+    *
+    * Since the maximum supported precision for stencil buffers is 8 bits,
+    * mask values should be initialized to 2^8 - 1 = 0xFF.
+    */
+   ctx->Stencil.ValueMask[0] = 0xFF;
+   ctx->Stencil.ValueMask[1] = 0xFF;
+   ctx->Stencil.ValueMask[2] = 0xFF;
+   ctx->Stencil.WriteMask[0] = 0xFF;
+   ctx->Stencil.WriteMask[1] = 0xFF;
+   ctx->Stencil.WriteMask[2] = 0xFF;
+
    ctx->Stencil.Clear = 0;
    ctx->Stencil._BackFace = 1;
 }
diff --git a/mesalib/src/mesa/main/syncobj.c b/mesalib/src/mesa/main/syncobj.c
index 225399eda..c1b2d3bed 100644
--- a/mesalib/src/mesa/main/syncobj.c
+++ b/mesalib/src/mesa/main/syncobj.c
@@ -63,8 +63,8 @@
 #include "get.h"
 #include "dispatch.h"
 #include "mtypes.h"
-#include "set.h"
 #include "util/hash_table.h"
+#include "util/set.h"
 
 #include "syncobj.h"
 
@@ -173,9 +173,7 @@ _mesa_validate_sync(struct gl_context *ctx,
                     const struct gl_sync_object *syncObj)
 {
    return (syncObj != NULL)
-      && _mesa_set_search(ctx->Shared->SyncObjects,
-                          _mesa_hash_pointer(syncObj),
-                          syncObj) != NULL
+      && _mesa_set_search(ctx->Shared->SyncObjects, syncObj) != NULL
       && (syncObj->Type == GL_SYNC_FENCE)
       && !syncObj->DeletePending;
 }
@@ -198,9 +196,7 @@ _mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
    mtx_lock(&ctx->Shared->Mutex);
    syncObj->RefCount--;
    if (syncObj->RefCount == 0) {
-      entry = _mesa_set_search(ctx->Shared->SyncObjects,
-                               _mesa_hash_pointer(syncObj),
-                               syncObj);
+      entry = _mesa_set_search(ctx->Shared->SyncObjects, syncObj);
       assert (entry != NULL);
       _mesa_set_remove(ctx->Shared->SyncObjects, entry);
       mtx_unlock(&ctx->Shared->Mutex);
@@ -289,9 +285,7 @@ _mesa_FenceSync(GLenum condition, GLbitfield flags)
       ctx->Driver.FenceSync(ctx, syncObj, condition, flags);
 
       mtx_lock(&ctx->Shared->Mutex);
-      _mesa_set_add(ctx->Shared->SyncObjects,
-                    _mesa_hash_pointer(syncObj),
-                    syncObj);
+      _mesa_set_add(ctx->Shared->SyncObjects, syncObj);
       mtx_unlock(&ctx->Shared->Mutex);
 
       return (GLsync) syncObj;
diff --git a/mesalib/src/mesa/main/texcompress_bptc.c b/mesalib/src/mesa/main/texcompress_bptc.c
index 9204f123e..c944ac26f 100644
--- a/mesalib/src/mesa/main/texcompress_bptc.c
+++ b/mesalib/src/mesa/main/texcompress_bptc.c
@@ -1276,7 +1276,6 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
 {
    const GLubyte *pixels;
    const GLubyte *tempImage = NULL;
-   GLenum baseFormat;
    int rowstride;
 
    if (srcFormat != GL_RGBA ||
@@ -1284,15 +1283,19 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
        ctx->_ImageTransferState ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/ubyte */
-      baseFormat = _mesa_get_format_base_format(dstFormat);
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                              baseInternalFormat,
-                                              baseFormat,
-                                              srcWidth, srcHeight, srcDepth,
-                                              srcFormat, srcType, srcAddr,
-                                              srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     rgbaRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
 
       pixels = tempImage;
       rowstride = srcWidth * 4;
@@ -1584,7 +1587,6 @@ texstore_bptc_rgb_float(TEXSTORE_PARAMS,
 {
    const float *pixels;
    const float *tempImage = NULL;
-   GLenum baseFormat;
    int rowstride;
 
    if (srcFormat != GL_RGB ||
@@ -1592,16 +1594,19 @@ texstore_bptc_rgb_float(TEXSTORE_PARAMS,
        ctx->_ImageTransferState ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/float */
-      baseFormat = _mesa_get_format_base_format(dstFormat);
-      tempImage = _mesa_make_temp_float_image(ctx, dims,
-                                              baseInternalFormat,
-                                              baseFormat,
-                                              srcWidth, srcHeight, srcDepth,
-                                              srcFormat, srcType, srcAddr,
-                                              srcPacking,
-                                              ctx->_ImageTransferState);
+      GLfloat *tempImageSlices[1];
+      int rgbRowStride = 3 * srcWidth * sizeof(GLfloat);
+      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLfloat));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLfloat *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_RGB_FLOAT32,
+                     rgbRowStride, (GLubyte **)tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
 
       pixels = tempImage;
       rowstride = srcWidth * sizeof(float) * 3;
diff --git a/mesalib/src/mesa/main/texcompress_fxt1.c b/mesalib/src/mesa/main/texcompress_fxt1.c
index 61b01c6b4..7b25e1039 100644
--- a/mesalib/src/mesa/main/texcompress_fxt1.c
+++ b/mesalib/src/mesa/main/texcompress_fxt1.c
@@ -69,14 +69,19 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
        srcPacking->RowLength != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                             _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_RGB_UNORM8,
+                     rgbRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
       srcRowStride = 3 * srcWidth;
       srcFormat = GL_RGB;
@@ -118,14 +123,19 @@ _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
        ctx->_ImageTransferState ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                             _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     rgbaRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
       srcRowStride = 4 * srcWidth;
       srcFormat = GL_RGBA;
diff --git a/mesalib/src/mesa/main/texcompress_rgtc.c b/mesalib/src/mesa/main/texcompress_rgtc.c
index f7ee24d47..e3042011a 100644
--- a/mesalib/src/mesa/main/texcompress_rgtc.c
+++ b/mesalib/src/mesa/main/texcompress_rgtc.c
@@ -83,18 +83,24 @@ _mesa_texstore_red_rgtc1(TEXSTORE_PARAMS)
    const GLubyte *srcaddr;
    GLubyte srcpixels[4][4];
    GLubyte *blkaddr;
-   GLint dstRowDiff;
+   GLint dstRowDiff, redRowStride;
+   GLubyte *tempImageSlices[1];
+
    ASSERT(dstFormat == MESA_FORMAT_R_RGTC1_UNORM ||
           dstFormat == MESA_FORMAT_L_LATC1_UNORM);
 
-   tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-					  baseInternalFormat,
-					  _mesa_get_format_base_format(dstFormat),
-					  srcWidth, srcHeight, srcDepth,
-					  srcFormat, srcType, srcAddr,
-					  srcPacking);
+   tempImage = malloc(srcWidth * srcHeight * 1 * sizeof(GLubyte));
    if (!tempImage)
       return GL_FALSE; /* out of memory */
+   redRowStride = 1 * srcWidth * sizeof(GLubyte);
+   tempImageSlices[0] = (GLubyte *) tempImage;
+   _mesa_texstore(ctx, dims,
+                  baseInternalFormat,
+                  MESA_FORMAT_R_UNORM8,
+                  redRowStride, tempImageSlices,
+                  srcWidth, srcHeight, srcDepth,
+                  srcFormat, srcType, srcAddr,
+                  srcPacking);
 
    dst = dstSlices[0];
 
@@ -130,18 +136,24 @@ _mesa_texstore_signed_red_rgtc1(TEXSTORE_PARAMS)
    const GLfloat *srcaddr;
    GLbyte srcpixels[4][4];
    GLbyte *blkaddr;
-   GLint dstRowDiff;
+   GLint dstRowDiff, redRowStride;
+   GLfloat *tempImageSlices[1];
+
    ASSERT(dstFormat == MESA_FORMAT_R_RGTC1_SNORM ||
           dstFormat == MESA_FORMAT_L_LATC1_SNORM);
 
-   tempImage = _mesa_make_temp_float_image(ctx, dims,
-					   baseInternalFormat,
-					   _mesa_get_format_base_format(dstFormat),
-					   srcWidth, srcHeight, srcDepth,
-					   srcFormat, srcType, srcAddr,
-					   srcPacking, 0x0);
+   redRowStride = 1 * srcWidth * sizeof(GLfloat);
+   tempImage = malloc(srcWidth * srcHeight * 1 * sizeof(GLfloat));
    if (!tempImage)
       return GL_FALSE; /* out of memory */
+   tempImageSlices[0] = (GLfloat *) tempImage;
+   _mesa_texstore(ctx, dims,
+                  baseInternalFormat,
+                  MESA_FORMAT_R_FLOAT32,
+                  redRowStride, (GLubyte **)tempImageSlices,
+                  srcWidth, srcHeight, srcDepth,
+                  srcFormat, srcType, srcAddr,
+                  srcPacking);
 
    dst = (GLbyte *) dstSlices[0];
 
@@ -177,19 +189,30 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
    const GLubyte *srcaddr;
    GLubyte srcpixels[4][4];
    GLubyte *blkaddr;
-   GLint dstRowDiff;
+   GLint dstRowDiff, rgRowStride;
+   mesa_format tempFormat;
+   GLubyte *tempImageSlices[1];
 
    ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2_UNORM ||
           dstFormat == MESA_FORMAT_LA_LATC2_UNORM);
 
-   tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-					  baseInternalFormat,
-					  _mesa_get_format_base_format(dstFormat),
-					  srcWidth, srcHeight, srcDepth,
-					  srcFormat, srcType, srcAddr,
-					  srcPacking);
+   if (baseInternalFormat == GL_RG)
+      tempFormat = MESA_FORMAT_R8G8_UNORM;
+   else
+      tempFormat = MESA_FORMAT_L8A8_UNORM;
+
+   rgRowStride = 2 * srcWidth * sizeof(GLubyte);
+   tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
    if (!tempImage)
       return GL_FALSE; /* out of memory */
+   tempImageSlices[0] = (GLubyte *) tempImage;
+   _mesa_texstore(ctx, dims,
+                  baseInternalFormat,
+                  tempFormat,
+                  rgRowStride, tempImageSlices,
+                  srcWidth, srcHeight, srcDepth,
+                  srcFormat, srcType, srcAddr,
+                  srcPacking);
 
    dst = dstSlices[0];
 
@@ -231,19 +254,30 @@ _mesa_texstore_signed_rg_rgtc2(TEXSTORE_PARAMS)
    const GLfloat *srcaddr;
    GLbyte srcpixels[4][4];
    GLbyte *blkaddr;
-   GLint dstRowDiff;
+   GLint dstRowDiff, rgRowStride;
+   mesa_format tempFormat;
+   GLfloat *tempImageSlices[1];
 
    ASSERT(dstFormat == MESA_FORMAT_RG_RGTC2_SNORM ||
           dstFormat == MESA_FORMAT_LA_LATC2_SNORM);
 
-   tempImage = _mesa_make_temp_float_image(ctx, dims,
-					   baseInternalFormat,
-					   _mesa_get_format_base_format(dstFormat),
-					   srcWidth, srcHeight, srcDepth,
-					   srcFormat, srcType, srcAddr,
-					   srcPacking, 0x0);
+   if (baseInternalFormat == GL_RG)
+      tempFormat = MESA_FORMAT_RG_FLOAT32;
+   else
+      tempFormat = MESA_FORMAT_LA_FLOAT32;
+
+   rgRowStride = 2 * srcWidth * sizeof(GLfloat);
+   tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLfloat));
    if (!tempImage)
       return GL_FALSE; /* out of memory */
+   tempImageSlices[0] = (GLfloat *) tempImage;
+   _mesa_texstore(ctx, dims,
+                  baseInternalFormat,
+                  tempFormat,
+                  rgRowStride, (GLubyte **)tempImageSlices,
+                  srcWidth, srcHeight, srcDepth,
+                  srcFormat, srcType, srcAddr,
+                  srcPacking);
 
    dst = (GLbyte *) dstSlices[0];
 
diff --git a/mesalib/src/mesa/main/texcompress_s3tc.c b/mesalib/src/mesa/main/texcompress_s3tc.c
index 254f84ef7..bfb53dce4 100644
--- a/mesalib/src/mesa/main/texcompress_s3tc.c
+++ b/mesalib/src/mesa/main/texcompress_s3tc.c
@@ -142,14 +142,19 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
        srcPacking->RowLength != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGB/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                             _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_RGB_UNORM8,
+                     rgbRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
       srcFormat = GL_RGB;
    }
@@ -194,14 +199,19 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
        srcPacking->RowLength != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                             _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     rgbaRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
       srcFormat = GL_RGBA;
    }
@@ -246,14 +256,19 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
        srcPacking->RowLength != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                             _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     rgbaRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
    }
    else {
@@ -297,14 +312,19 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
        srcPacking->RowLength != srcWidth ||
        srcPacking->SwapBytes) {
       /* convert image to RGBA/GLubyte */
-      tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                             baseInternalFormat,
-                                   	     _mesa_get_format_base_format(dstFormat),
-                                             srcWidth, srcHeight, srcDepth,
-                                             srcFormat, srcType, srcAddr,
-                                             srcPacking);
+      GLubyte *tempImageSlices[1];
+      int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
+      tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
       if (!tempImage)
          return GL_FALSE; /* out of memory */
+      tempImageSlices[0] = (GLubyte *) tempImage;
+      _mesa_texstore(ctx, dims,
+                     baseInternalFormat,
+                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     rgbaRowStride, tempImageSlices,
+                     srcWidth, srcHeight, srcDepth,
+                     srcFormat, srcType, srcAddr,
+                     srcPacking);
       pixels = tempImage;
    }
    else {
diff --git a/mesalib/src/mesa/main/texgetimage.c b/mesalib/src/mesa/main/texgetimage.c
index cb5f7936c..24df5b6f8 100644
--- a/mesalib/src/mesa/main/texgetimage.c
+++ b/mesalib/src/mesa/main/texgetimage.c
@@ -44,9 +44,10 @@
 #include "texcompress.h"
 #include "texgetimage.h"
 #include "teximage.h"
+#include "texobj.h"
 #include "texstore.h"
-
-
+#include "format_utils.h"
+#include "pixeltransfer.h"
 
 /**
  * Can the given type represent negative values?
@@ -241,13 +242,15 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
    const mesa_format texFormat =
       _mesa_get_srgb_format_linear(texImage->TexFormat);
    const GLenum baseFormat = _mesa_get_format_base_format(texFormat);
-   const GLenum destBaseFormat = _mesa_base_tex_format(ctx, format);
-   GLenum rebaseFormat = GL_NONE;
    const GLuint width = texImage->Width;
    const GLuint height = texImage->Height;
    const GLuint depth = texImage->Depth;
-   GLfloat *tempImage, *tempSlice, *srcRow;
-   GLuint row, slice;
+   GLfloat *tempImage, *tempSlice;
+   GLuint slice;
+   int srcStride, dstStride;
+   uint32_t dstFormat;
+   bool needsRebase;
+   uint8_t rebaseSwizzle[4];
 
    /* Decompress into temp float buffer, then pack into user buffer */
    tempImage = malloc(width * height * depth
@@ -281,46 +284,39 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
       }
    }
 
+   /* Depending on the base format involved we may need to apply a rebase
+    * tranaform (for example: if we download to a Luminance format we want
+    * G=0 and B=0).
+    */
    if (baseFormat == GL_LUMINANCE ||
-       baseFormat == GL_INTENSITY ||
-       baseFormat == GL_LUMINANCE_ALPHA) {
-      /* If a luminance (or intensity) texture is read back as RGB(A), the
-       * returned value should be (L,0,0,1), not (L,L,L,1).  Set rebaseFormat
-       * here to get G=B=0.
-       */
-      rebaseFormat = texImage->_BaseFormat;
-   }
-   else if ((baseFormat == GL_RGBA ||
-             baseFormat == GL_RGB  ||
-             baseFormat == GL_RG) &&
-            (destBaseFormat == GL_LUMINANCE ||
-             destBaseFormat == GL_LUMINANCE_ALPHA ||
-             destBaseFormat == GL_LUMINANCE_INTEGER_EXT ||
-             destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT)) {
-      /* If we're reading back an RGB(A) texture as luminance then we need
-       * to return L=tex(R).  Note, that's different from glReadPixels which
-       * returns L=R+G+B.
-       */
-      rebaseFormat = GL_LUMINANCE_ALPHA; /* this covers GL_LUMINANCE too */
-   }
-
-   if (rebaseFormat) {
-      _mesa_rebase_rgba_float(width * height, (GLfloat (*)[4]) tempImage,
-                              rebaseFormat);
+       baseFormat == GL_INTENSITY) {
+      needsRebase = true;
+      rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
+   } else if (baseFormat == GL_LUMINANCE_ALPHA) {
+      needsRebase = true;
+      rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
+   } else {
+      needsRebase = false;
    }
 
+   srcStride = 4 * width * sizeof(GLfloat);
+   dstStride = _mesa_image_row_stride(&ctx->Pack, width, format, type);
+   dstFormat = _mesa_format_from_format_and_type(format, type);
    tempSlice = tempImage;
    for (slice = 0; slice < depth; slice++) {
-      srcRow = tempSlice;
-      for (row = 0; row < height; row++) {
-         void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels,
-                                          width, height, format, type,
-                                          slice, row, 0);
-
-         _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) srcRow,
-                                    format, type, dest, &ctx->Pack, transferOps);
-         srcRow += 4 * width;
-      }
+      void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels,
+                                       width, height, format, type,
+                                       slice, 0, 0);
+      _mesa_format_convert(dest, dstFormat, dstStride,
+                           tempSlice, RGBA32_FLOAT, srcStride,
+                           width, height,
+                           needsRebase ? rebaseSwizzle : NULL);
       tempSlice += 4 * width * height;
    }
 
@@ -376,145 +372,162 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
    const mesa_format texFormat =
       _mesa_get_srgb_format_linear(texImage->TexFormat);
    const GLuint width = texImage->Width;
-   GLenum destBaseFormat = _mesa_base_pack_format(format);
-   GLenum rebaseFormat = GL_NONE;
    GLuint height = texImage->Height;
    GLuint depth = texImage->Depth;
-   GLuint img, row;
-   GLfloat (*rgba)[4];
-   GLuint (*rgba_uint)[4];
-   GLboolean tex_is_integer = _mesa_is_format_integer_color(texImage->TexFormat);
-   GLboolean tex_is_uint = _mesa_is_format_unsigned(texImage->TexFormat);
-   GLenum texBaseFormat = _mesa_get_format_base_format(texImage->TexFormat);
-
-   /* Allocate buffer for one row of texels */
-   rgba = malloc(4 * width * sizeof(GLfloat));
-   rgba_uint = (GLuint (*)[4]) rgba;
-   if (!rgba) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
-      return;
-   }
+   GLuint img;
+   GLboolean dst_is_integer = _mesa_is_enum_format_integer(format);
+   uint32_t dst_format;
+   int dst_stride;
+   uint8_t rebaseSwizzle[4];
+   bool needsRebase;
+   void *rgba = NULL;
 
    if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
       depth = height;
       height = 1;
    }
 
+   /* Depending on the base format involved we may need to apply a rebase
+    * tranaform (for example: if we download to a Luminance format we want
+    * G=0 and B=0).
+    */
    if (texImage->_BaseFormat == GL_LUMINANCE ||
-       texImage->_BaseFormat == GL_INTENSITY ||
-       texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
-      /* If a luminance (or intensity) texture is read back as RGB(A), the
-       * returned value should be (L,0,0,1), not (L,L,L,1).  Set rebaseFormat
-       * here to get G=B=0.
-       */
-      rebaseFormat = texImage->_BaseFormat;
-   }
-   else if ((texImage->_BaseFormat == GL_RGBA ||
-             texImage->_BaseFormat == GL_RGB ||
-             texImage->_BaseFormat == GL_RG) &&
-            (destBaseFormat == GL_LUMINANCE ||
-             destBaseFormat == GL_LUMINANCE_ALPHA ||
-             destBaseFormat == GL_LUMINANCE_INTEGER_EXT ||
-             destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT)) {
-      /* If we're reading back an RGB(A) texture as luminance then we need
-       * to return L=tex(R).  Note, that's different from glReadPixels which
-       * returns L=R+G+B.
-       */
-      rebaseFormat = GL_LUMINANCE_ALPHA; /* this covers GL_LUMINANCE too */
-   }
-   else if (texImage->_BaseFormat != texBaseFormat) {
-      /* The internal format and the real format differ, so we can't rely
-       * on the unpack functions setting the correct constant values.
-       * (e.g. reading back GL_RGB8 which is actually RGBA won't set alpha=1)
-       */
-      switch (texImage->_BaseFormat) {
-      case GL_RED:
-         if ((texBaseFormat == GL_RGBA ||
-              texBaseFormat == GL_RGB ||
-              texBaseFormat == GL_RG) &&
-             (destBaseFormat == GL_RGBA ||
-              destBaseFormat == GL_RGB ||
-              destBaseFormat == GL_RG ||
-              destBaseFormat == GL_GREEN)) {
-            rebaseFormat = texImage->_BaseFormat;
-            break;
-         }
-         /* fall through */
-      case GL_RG:
-         if ((texBaseFormat == GL_RGBA ||
-              texBaseFormat == GL_RGB) &&
-             (destBaseFormat == GL_RGBA ||
-              destBaseFormat == GL_RGB ||
-              destBaseFormat == GL_BLUE)) {
-            rebaseFormat = texImage->_BaseFormat;
-            break;
-         }
-         /* fall through */
-      case GL_RGB:
-         if (texBaseFormat == GL_RGBA &&
-             (destBaseFormat == GL_RGBA ||
-              destBaseFormat == GL_ALPHA ||
-              destBaseFormat == GL_LUMINANCE_ALPHA)) {
-            rebaseFormat = texImage->_BaseFormat;
-         }
-         break;
-
-      case GL_ALPHA:
-         if (destBaseFormat != GL_ALPHA) {
-            rebaseFormat = texImage->_BaseFormat;
-         }
-         break;
-      }
-   }
+       texImage->_BaseFormat == GL_INTENSITY) {
+      needsRebase = true;
+      rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_ONE;
+   } else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
+      needsRebase = true;
+      rebaseSwizzle[0] = MESA_FORMAT_SWIZZLE_X;
+      rebaseSwizzle[1] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[2] = MESA_FORMAT_SWIZZLE_ZERO;
+      rebaseSwizzle[3] = MESA_FORMAT_SWIZZLE_W;
+    } else if (texImage->_BaseFormat != _mesa_get_format_base_format(texFormat)) {
+      needsRebase =
+         _mesa_compute_rgba2base2rgba_component_mapping(texImage->_BaseFormat,
+                                                        rebaseSwizzle);
+    } else {
+      needsRebase = false;
+    }
+
+   /* Describe the dst format */
+   dst_is_integer = _mesa_is_enum_format_integer(format);
+   dst_format = _mesa_format_from_format_and_type(format, type);
+   dst_stride = _mesa_image_row_stride(&ctx->Pack, width, format, type);
+
+   /* Since _mesa_format_convert does not handle transferOps we need to handle
+    * them before we call the function. This requires to convert to RGBA float
+    * first so we can call _mesa_apply_rgba_transfer_ops. If the dst format is
+    * integer then transferOps do not apply.
+    */
+   assert(!transferOps || (transferOps && !dst_is_integer));
 
    for (img = 0; img < depth; img++) {
       GLubyte *srcMap;
       GLint rowstride;
+      GLubyte *img_src;
+      void *dest;
+      void *src;
+      int src_stride;
+      uint32_t src_format;
 
       /* map src texture buffer */
       ctx->Driver.MapTextureImage(ctx, texImage, img,
                                   0, 0, width, height, GL_MAP_READ_BIT,
                                   &srcMap, &rowstride);
-      if (srcMap) {
-         for (row = 0; row < height; row++) {
-            const GLubyte *src = srcMap + row * rowstride;
-            void *dest = _mesa_image_address(dimensions, &ctx->Pack, pixels,
-                                             width, height, format, type,
-                                             img, row, 0);
+      if (!srcMap) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
+         goto done;
+      }
 
-	    if (tex_is_integer) {
-	       _mesa_unpack_uint_rgba_row(texFormat, width, src, rgba_uint);
-               if (rebaseFormat)
-                  _mesa_rebase_rgba_uint(width, rgba_uint, rebaseFormat);
-               if (tex_is_uint) {
-                  _mesa_pack_rgba_span_from_uints(ctx, width,
-                                                  (GLuint (*)[4]) rgba_uint,
-                                                  format, type, dest);
-               } else {
-                  _mesa_pack_rgba_span_from_ints(ctx, width,
-                                                 (GLint (*)[4]) rgba_uint,
-                                                 format, type, dest);
-               }
-	    } else {
-	       _mesa_unpack_rgba_row(texFormat, width, src, rgba);
-               if (rebaseFormat)
-                  _mesa_rebase_rgba_float(width, rgba, rebaseFormat);
-	       _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba,
-					  format, type, dest,
-					  &ctx->Pack, transferOps);
-	    }
-	 }
-
-         /* Unmap the src texture buffer */
-         ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+      img_src = srcMap;
+      dest = _mesa_image_address(dimensions, &ctx->Pack, pixels,
+                                 width, height, format, type,
+                                 img, 0, 0);
+
+      if (transferOps) {
+         uint32_t rgba_format;
+         int rgba_stride;
+         bool need_convert = false;
+
+         /* We will convert to RGBA float */
+         rgba_format = RGBA32_FLOAT;
+         rgba_stride = width * 4 * sizeof(GLfloat);
+
+         /* If we are lucky and the dst format matches the RGBA format we need
+          * to convert to, then we can convert directly into the dst buffer
+          * and avoid the final conversion/copy from the rgba buffer to the dst
+          * buffer.
+          */
+         if (format == rgba_format) {
+            rgba = dest;
+         } else if (rgba == NULL) { /* Allocate the RGBA buffer only once */
+            need_convert = true;
+            rgba = malloc(height * rgba_stride);
+            if (!rgba) {
+               _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()");
+               ctx->Driver.UnmapTextureImage(ctx, texImage, img);
+               return;
+            }
+         }
+
+         _mesa_format_convert(rgba, rgba_format, rgba_stride,
+                              img_src, texFormat, rowstride,
+                              width, height,
+                              needsRebase ? rebaseSwizzle : NULL);
+
+         /* Handle transfer ops now */
+         _mesa_apply_rgba_transfer_ops(ctx, transferOps, width * height, rgba);
+
+         /* If we had to rebase, we have already handled that */
+         needsRebase = false;
+
+         /* If we were lucky and our RGBA conversion matches the dst format, then
+          * we are done.
+          */
+         if (!need_convert)
+            goto do_swap;
+
+         /* Otherwise, we need to convert from RGBA to dst next */
+         src = rgba;
+         src_format = rgba_format;
+         src_stride = rgba_stride;
+      } else {
+         /* No RGBA conversion needed, convert directly to dst */
+         src = img_src;
+         src_format = texFormat;
+         src_stride = rowstride;
       }
-      else {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage");
-         break;
+
+      /* Do the conversion to destination format */
+      _mesa_format_convert(dest, dst_format, dst_stride,
+                           src, src_format, src_stride,
+                           width, height,
+                           needsRebase ? rebaseSwizzle : NULL);
+
+   do_swap:
+      /* Handle byte swapping if required */
+      if (ctx->Pack.SwapBytes) {
+         GLint swapSize = _mesa_sizeof_packed_type(type);
+         if (swapSize == 2 || swapSize == 4) {
+            int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+            assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+            if (swapSize == 2)
+               _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
+            else if (swapSize == 4)
+               _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
+         }
       }
+
+      /* Unmap the src texture buffer */
+      ctx->Driver.UnmapTextureImage(ctx, texImage, img);
    }
 
-   free(rgba);
+done:
+   if (rgba)
+      free(rgba);
 }
 
 
@@ -585,7 +598,7 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
 
    if (memCopy) {
       const GLuint bpp = _mesa_get_format_bytes(texImage->TexFormat);
-      const GLuint bytesPerRow = texImage->Width * bpp;
+      const GLint bytesPerRow = texImage->Width * bpp;
       GLubyte *dst =
          _mesa_image_address2d(&ctx->Pack, pixels, texImage->Width,
                                texImage->Height, format, type, 0, 0);
@@ -631,9 +644,9 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type,
  * unmap with ctx->Driver.UnmapTextureImage().
  */
 void
-_mesa_get_teximage(struct gl_context *ctx,
-                   GLenum format, GLenum type, GLvoid *pixels,
-                   struct gl_texture_image *texImage)
+_mesa_GetTexImage_sw(struct gl_context *ctx,
+                     GLenum format, GLenum type, GLvoid *pixels,
+                     struct gl_texture_image *texImage)
 {
    const GLuint dimensions =
       _mesa_get_texture_dimensions(texImage->TexObject->Target);
@@ -689,14 +702,14 @@ _mesa_get_teximage(struct gl_context *ctx,
  * All error checking will have been done before this routine is called.
  */
 void
-_mesa_get_compressed_teximage(struct gl_context *ctx,
-                              struct gl_texture_image *texImage,
-                              GLvoid *img)
+_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
+                               struct gl_texture_image *texImage,
+                               GLvoid *img)
 {
    const GLuint dimensions =
       _mesa_get_texture_dimensions(texImage->TexObject->Target);
    struct compressed_pixelstore store;
-   GLuint i, slice;
+   GLint slice;
    GLubyte *dest;
 
    _mesa_compute_compressed_pixelstore(dimensions, texImage->TexFormat,
@@ -729,19 +742,19 @@ _mesa_get_compressed_teximage(struct gl_context *ctx,
       GLubyte *src;
 
       /* map src texture buffer */
-      ctx->Driver.MapTextureImage(ctx, texImage, 0,
+      ctx->Driver.MapTextureImage(ctx, texImage, slice,
                                   0, 0, texImage->Width, texImage->Height,
                                   GL_MAP_READ_BIT, &src, &srcRowStride);
 
       if (src) {
-
+         GLint i;
          for (i = 0; i < store.CopyRowsPerSlice; i++) {
             memcpy(dest, src, store.CopyBytesPerRow);
             dest += store.TotalBytesPerRow;
             src += srcRowStride;
          }
 
-         ctx->Driver.UnmapTextureImage(ctx, texImage, 0);
+         ctx->Driver.UnmapTextureImage(ctx, texImage, slice);
 
          /* Advance to next slice */
          dest += store.TotalBytesPerRow * (store.TotalRowsPerSlice - store.CopyRowsPerSlice);
@@ -758,24 +771,17 @@ _mesa_get_compressed_teximage(struct gl_context *ctx,
 
 
 /**
- * Validate the texture target enum supplied to glTexImage or
- * glCompressedTexImage.
+ * Validate the texture target enum supplied to glGetTex(ture)Image or
+ * glGetCompressedTex(ture)Image.
  */
 static GLboolean
-legal_getteximage_target(struct gl_context *ctx, GLenum target)
+legal_getteximage_target(struct gl_context *ctx, GLenum target, bool dsa)
 {
    switch (target) {
    case GL_TEXTURE_1D:
    case GL_TEXTURE_2D:
    case GL_TEXTURE_3D:
       return GL_TRUE;
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      return ctx->Extensions.ARB_texture_cube_map;
    case GL_TEXTURE_RECTANGLE_NV:
       return ctx->Extensions.NV_texture_rectangle;
    case GL_TEXTURE_1D_ARRAY_EXT:
@@ -783,6 +789,24 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target)
       return ctx->Extensions.EXT_texture_array;
    case GL_TEXTURE_CUBE_MAP_ARRAY:
       return ctx->Extensions.ARB_texture_cube_map_array;
+
+   /* Section 8.11 (Texture Queries) of the OpenGL 4.5 core profile spec
+    * (30.10.2014) says:
+    *    "An INVALID_ENUM error is generated if the effective target is not
+    *    one of TEXTURE_1D, TEXTURE_2D, TEXTURE_3D, TEXTURE_1D_ARRAY,
+    *    TEXTURE_2D_ARRAY, TEXTURE_CUBE_MAP_ARRAY, TEXTURE_RECTANGLE, one of
+    *    the targets from table 8.19 (for GetTexImage and GetnTexImage *only*),
+    *    or TEXTURE_CUBE_MAP (for GetTextureImage *only*)." (Emphasis added.)
+    */
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return dsa ? GL_FALSE : ctx->Extensions.ARB_texture_cube_map;
+   case GL_TEXTURE_CUBE_MAP:
+      return dsa ? GL_TRUE : GL_FALSE;
    default:
       return GL_FALSE;
    }
@@ -790,84 +814,74 @@ legal_getteximage_target(struct gl_context *ctx, GLenum target)
 
 
 /**
- * Do error checking for a glGetTexImage() call.
+ * Do error checking for a glGetTex(ture)Image() call.
  * \return GL_TRUE if any error, GL_FALSE if no errors.
  */
 static GLboolean
-getteximage_error_check(struct gl_context *ctx, GLenum target, GLint level,
+getteximage_error_check(struct gl_context *ctx,
+                        struct gl_texture_image *texImage,
+                        GLenum target, GLint level,
                         GLenum format, GLenum type, GLsizei clientMemSize,
-                        GLvoid *pixels )
+                        GLvoid *pixels, bool dsa)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
    const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
    const GLuint dimensions = (target == GL_TEXTURE_3D) ? 3 : 2;
-   GLenum baseFormat, err;
-
-   if (!legal_getteximage_target(ctx, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexImage(target=0x%x)", target);
-      return GL_TRUE;
-   }
+   GLenum baseFormat;
+   const char *suffix = dsa ? "ture" : "";
 
+   assert(texImage);
    assert(maxLevels != 0);
    if (level < 0 || level >= maxLevels) {
-      _mesa_error( ctx, GL_INVALID_VALUE, "glGetTexImage(level)" );
-      return GL_TRUE;
-   }
-
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err, "glGetTexImage(format/type)");
-      return GL_TRUE;
-   }
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
-   if (!texObj) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexImage(target)");
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetTex%sImage(level out of range)", suffix);
       return GL_TRUE;
    }
 
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
-   if (!texImage) {
-      /* non-existant texture image */
-      return GL_TRUE;
-   }
+   /*
+    * Format and type checking has been moved up to GetnTexImage and
+    * GetTextureImage so that it happens before getting the texImage object.
+    */
 
    baseFormat = _mesa_get_format_base_format(texImage->TexFormat);
-      
+
    /* Make sure the requested image format is compatible with the
     * texture's format.
     */
    if (_mesa_is_color_format(format)
        && !_mesa_is_color_format(baseFormat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexImage(format mismatch)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
       return GL_TRUE;
    }
    else if (_mesa_is_depth_format(format)
             && !_mesa_is_depth_format(baseFormat)
             && !_mesa_is_depthstencil_format(baseFormat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexImage(format mismatch)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
       return GL_TRUE;
    }
    else if (_mesa_is_stencil_format(format)
             && !ctx->Extensions.ARB_texture_stencil8) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexImage(format=GL_STENCIL_INDEX)");
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix);
       return GL_TRUE;
    }
    else if (_mesa_is_ycbcr_format(format)
             && !_mesa_is_ycbcr_format(baseFormat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexImage(format mismatch)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
       return GL_TRUE;
    }
    else if (_mesa_is_depthstencil_format(format)
             && !_mesa_is_depthstencil_format(baseFormat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexImage(format mismatch)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
       return GL_TRUE;
    }
    else if (_mesa_is_enum_format_integer(format) !=
             _mesa_is_format_integer(texImage->TexFormat)) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glGetTexImage(format mismatch)");
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
       return GL_TRUE;
    }
 
@@ -876,11 +890,13 @@ getteximage_error_check(struct gl_context *ctx, GLenum target, GLint level,
                                   format, type, clientMemSize, pixels)) {
       if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTexImage(out of bounds PBO access)");
+                     "glGetTex%sImage(out of bounds PBO access)", suffix);
       } else {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetnTexImageARB(out of bounds access:"
-                     " bufSize (%d) is too small)", clientMemSize);
+                     "%s(out of bounds access:"
+                     " bufSize (%d) is too small)",
+                     dsa ? "glGetTextureImage" : "glGetnTexImageARB",
+                     clientMemSize);
       }
       return GL_TRUE;
    }
@@ -889,7 +905,7 @@ getteximage_error_check(struct gl_context *ctx, GLenum target, GLint level,
       /* PBO should not be mapped */
       if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTexImage(PBO is mapped)");
+                     "glGetTex%sImage(PBO is mapped)", suffix);
          return GL_TRUE;
       }
    }
@@ -898,9 +914,12 @@ getteximage_error_check(struct gl_context *ctx, GLenum target, GLint level,
 }
 
 
-
 /**
- * Get texture image.  Called by glGetTexImage.
+ * This is the implementation for glGetnTexImageARB, glGetTextureImage,
+ * and glGetTexImage.
+ *
+ * Requires caller to pass in texImage object because _mesa_GetTextureImage
+ * must handle the GL_TEXTURE_CUBE_MAP target.
  *
  * \param target texture target.
  * \param level image level.
@@ -908,19 +927,29 @@ getteximage_error_check(struct gl_context *ctx, GLenum target, GLint level,
  * \param type pixel data type for returned image.
  * \param bufSize size of the pixels data buffer.
  * \param pixels returned pixel data.
+ * \param dsa True when the caller is an ARB_direct_state_access function,
+ *            false otherwise
  */
-void GLAPIENTRY
-_mesa_GetnTexImageARB( GLenum target, GLint level, GLenum format,
-                       GLenum type, GLsizei bufSize, GLvoid *pixels )
+void
+_mesa_get_texture_image(struct gl_context *ctx,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage, GLenum target,
+                        GLint level, GLenum format, GLenum type,
+                        GLsizei bufSize, GLvoid *pixels, bool dsa)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GET_CURRENT_CONTEXT(ctx);
+   assert(texObj);
+   assert(texImage);
 
    FLUSH_VERTICES(ctx, 0);
 
-   if (getteximage_error_check(ctx, target, level, format, type,
-                               bufSize, pixels)) {
+   /*
+    * Legal target checking has been moved up to GetnTexImage and
+    * GetTextureImage so that it can be caught before receiving a NULL
+    * texImage object and exiting.
+    */
+
+   if (getteximage_error_check(ctx, texImage, target, level, format,
+                               type, bufSize, pixels, dsa)) {
       return;
    }
 
@@ -929,15 +958,13 @@ _mesa_GetnTexImageARB( GLenum target, GLint level, GLenum format,
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
-
    if (_mesa_is_zero_size_texture(texImage))
       return;
 
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
-      _mesa_debug(ctx, "glGetTexImage(tex %u) format = %s, w=%d, h=%d,"
+      _mesa_debug(ctx, "glGetTex%sImage(tex %u) format = %s, w=%d, h=%d,"
                   " dstFmt=0x%x, dstType=0x%x\n",
+                  dsa ? "ture": "",
                   texObj->Name,
                   _mesa_get_format_name(texImage->TexFormat),
                   texImage->Width, texImage->Height,
@@ -951,6 +978,58 @@ _mesa_GetnTexImageARB( GLenum target, GLint level, GLenum format,
    _mesa_unlock_texture(ctx, texObj);
 }
 
+/**
+ * Get texture image.  Called by glGetTexImage.
+ *
+ * \param target texture target.
+ * \param level image level.
+ * \param format pixel data format for returned image.
+ * \param type pixel data type for returned image.
+ * \param bufSize size of the pixels data buffer.
+ * \param pixels returned pixel data.
+ */
+void GLAPIENTRY
+_mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format,
+                      GLenum type, GLsizei bufSize, GLvoid *pixels)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   GLenum err;
+   GET_CURRENT_CONTEXT(ctx);
+
+   /*
+    * This has been moved here because a format/type mismatch can cause a NULL
+    * texImage object, which in turn causes the mismatch error to be
+    * ignored.
+    */
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      _mesa_error(ctx, err, "glGetnTexImage(format/type)");
+      return;
+   }
+
+   /*
+    * Legal target checking has been moved here to prevent exiting with a NULL
+    * texImage object.
+    */
+   if (!legal_getteximage_target(ctx, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetnTexImage(target=0x%x)",
+                  target);
+      return;
+   }
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   texImage = _mesa_select_tex_image(texObj, target, level);
+   if (!texImage)
+      return;
+
+   _mesa_get_texture_image(ctx, texObj, texImage, target, level, format, type,
+                           bufSize, pixels, false);
+}
+
 
 void GLAPIENTRY
 _mesa_GetTexImage( GLenum target, GLint level, GLenum format,
@@ -959,51 +1038,162 @@ _mesa_GetTexImage( GLenum target, GLint level, GLenum format,
    _mesa_GetnTexImageARB(target, level, format, type, INT_MAX, pixels);
 }
 
+/**
+ * Get texture image.
+ *
+ * \param texture texture name.
+ * \param level image level.
+ * \param format pixel data format for returned image.
+ * \param type pixel data type for returned image.
+ * \param bufSize size of the pixels data buffer.
+ * \param pixels returned pixel data.
+ */
+void GLAPIENTRY
+_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
+                      GLenum type, GLsizei bufSize, GLvoid *pixels)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int i;
+   GLint image_stride;
+   GLenum err;
+   GET_CURRENT_CONTEXT(ctx);
+
+   /*
+    * This has been moved here because a format/type mismatch can cause a NULL
+    * texImage object, which in turn causes the mismatch error to be
+    * ignored.
+    */
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      _mesa_error(ctx, err, "glGetTextureImage(format/type)");
+      return;
+   }
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, "glGetTextureImage");
+   if (!texObj)
+      return;
+
+   /*
+    * Legal target checking has been moved here to prevent exiting with a NULL
+    * texImage object.
+    */
+   if (!legal_getteximage_target(ctx, texObj->Target, true)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glGetTextureImage(target=%s)",
+                  _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
+   /* Must handle special case GL_TEXTURE_CUBE_MAP. */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+
+      /* Error checking */
+      if (texObj->NumLayers < 6) {
+         /* Not enough image planes for a cube map.  The spec does not say
+          * what should happen in this case because the user has always
+          * specified each cube face separately (using
+          * GL_TEXTURE_CUBE_MAP_POSITIVE_X+i) in previous GL versions.
+          * This is addressed in Khronos Bug 13223.
+          */
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glGetTextureImage(insufficient cube map storage)");
+         return;
+      }
+
+      /*
+       * What do we do if the user created a texture with the following code
+       * and then called this function with its handle?
+       *
+       *    GLuint tex;
+       *    glCreateTextures(GL_TEXTURE_CUBE_MAP, 1, &tex);
+       *    glBindTexture(GL_TEXTURE_CUBE_MAP, tex);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, ...);
+       *    // Note: GL_TEXTURE_CUBE_MAP_NEGATIVE_Y not set, or given the
+       *    // wrong format, or given the wrong size, etc.
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, ...);
+       *
+       * A bug has been filed against the spec for this case.  In the
+       * meantime, we will check for cube completeness.
+       *
+       * According to Section 8.17 Texture Completeness in the OpenGL 4.5
+       * Core Profile spec (30.10.2014):
+       *    "[A] cube map texture is cube complete if the
+       *    following conditions all hold true: The [base level] texture
+       *    images of each of the six cube map faces have identical, positive,
+       *    and square dimensions. The [base level] images were each specified
+       *    with the same internal format."
+       *
+       * It seems reasonable to check for cube completeness of an arbitrary
+       * level here so that the returned data has a consistent format and size
+       * and therefore fits in the user's buffer.
+       */
+      if (!_mesa_cube_level_complete(texObj, level)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glGetTextureImage(cube map incomplete)");
+         return;
+      }
+
+      /* Copy each face. */
+      for (i = 0; i < 6; ++i) {
+         texImage = texObj->Image[i][level];
+         _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
+                                 format, type, bufSize, pixels, true);
+
+         image_stride = _mesa_image_image_stride(&ctx->Pack, texImage->Width,
+                                                 texImage->Height, format,
+                                                 type);
+         pixels = (GLubyte *) pixels + image_stride;
+         bufSize -= image_stride;
+      }
+   }
+   else {
+      texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
+      if (!texImage)
+         return;
+
+      _mesa_get_texture_image(ctx, texObj, texImage, texObj->Target, level,
+                              format, type, bufSize, pixels, true);
+   }
+}
 
 /**
  * Do error checking for a glGetCompressedTexImage() call.
  * \return GL_TRUE if any error, GL_FALSE if no errors.
  */
 static GLboolean
-getcompressedteximage_error_check(struct gl_context *ctx, GLenum target,
-                                  GLint level, GLsizei clientMemSize, GLvoid *img)
+getcompressedteximage_error_check(struct gl_context *ctx,
+                                  struct gl_texture_image *texImage,
+                                  GLenum target,
+                                  GLint level, GLsizei clientMemSize,
+                                  GLvoid *img, bool dsa)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
    const GLint maxLevels = _mesa_max_texture_levels(ctx, target);
    GLuint compressedSize, dimensions;
+   const char *suffix = dsa ? "ture" : "";
 
-   if (!legal_getteximage_target(ctx, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetCompressedTexImage(target=0x%x)",
-                  target);
+   assert(texImage);
+
+   if (!legal_getteximage_target(ctx, target, dsa)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glGetCompressedTex%sImage(target=%s)", suffix,
+                  _mesa_lookup_enum_by_nr(target));
       return GL_TRUE;
    }
 
    assert(maxLevels != 0);
    if (level < 0 || level >= maxLevels) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetCompressedTexImageARB(bad level = %d)", level);
-      return GL_TRUE;
-   }
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   if (!texObj) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glGetCompressedTexImageARB(target)");
-      return GL_TRUE;
-   }
-
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
-
-   if (!texImage) {
-      /* probably invalid mipmap level */
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetCompressedTexImageARB(level)");
+                  "glGetCompressedTex%sImage(bad level = %d)", suffix, level);
       return GL_TRUE;
    }
 
    if (!_mesa_is_format_compressed(texImage->TexFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetCompressedTexImageARB(texture is not compressed)");
+                  "glGetCompressedTex%sImage(texture is not compressed)",
+                  suffix);
       return GL_TRUE;
    }
 
@@ -1015,8 +1205,9 @@ getcompressedteximage_error_check(struct gl_context *ctx, GLenum target,
    /* Check for invalid pixel storage modes */
    dimensions = _mesa_get_texture_dimensions(texImage->TexObject->Target);
    if (!_mesa_compressed_pixel_storage_error_check(ctx, dimensions,
-                                              &ctx->Pack,
-                                              "glGetCompressedTexImageARB")) {
+                                              &ctx->Pack, dsa ?
+                                              "glGetCompressedTextureImage":
+                                              "glGetCompressedTexImage")) {
       return GL_TRUE;
    }
 
@@ -1024,8 +1215,9 @@ getcompressedteximage_error_check(struct gl_context *ctx, GLenum target,
       /* do bounds checking on writing to client memory */
       if (clientMemSize < (GLsizei) compressedSize) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetnCompressedTexImageARB(out of bounds access:"
-                     " bufSize (%d) is too small)", clientMemSize);
+                     "%s(out of bounds access: bufSize (%d) is too small)",
+                     dsa ? "glGetCompressedTextureImage" :
+                     "glGetnCompressedTexImageARB", clientMemSize);
          return GL_TRUE;
       }
    } else {
@@ -1033,14 +1225,15 @@ getcompressedteximage_error_check(struct gl_context *ctx, GLenum target,
       if ((const GLubyte *) img + compressedSize >
           (const GLubyte *) ctx->Pack.BufferObj->Size) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetCompressedTexImage(out of bounds PBO access)");
+                     "glGetCompressedTex%sImage(out of bounds PBO access)",
+                     suffix);
          return GL_TRUE;
       }
 
       /* make sure PBO is not mapped */
       if (_mesa_check_disallowed_mapping(ctx->Pack.BufferObj)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetCompressedTexImage(PBO is mapped)");
+                     "glGetCompressedTex%sImage(PBO is mapped)", suffix);
          return GL_TRUE;
       }
    }
@@ -1048,49 +1241,132 @@ getcompressedteximage_error_check(struct gl_context *ctx, GLenum target,
    return GL_FALSE;
 }
 
-
-void GLAPIENTRY
-_mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize,
-                                GLvoid *img)
+/** Implements glGetnCompressedTexImageARB, glGetCompressedTexImage, and
+ * glGetCompressedTextureImage.
+ *
+ * texImage must be passed in because glGetCompressedTexImage must handle the
+ * target GL_TEXTURE_CUBE_MAP.
+ */
+void
+_mesa_get_compressed_texture_image(struct gl_context *ctx,
+                                   struct gl_texture_object *texObj,
+                                   struct gl_texture_image *texImage,
+                                   GLenum target, GLint level,
+                                   GLsizei bufSize, GLvoid *pixels,
+                                   bool dsa)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-   GET_CURRENT_CONTEXT(ctx);
+   assert(texObj);
+   assert(texImage);
 
    FLUSH_VERTICES(ctx, 0);
 
-   if (getcompressedteximage_error_check(ctx, target, level, bufSize, img)) {
+   if (getcompressedteximage_error_check(ctx, texImage, target, level,
+                                         bufSize, pixels, dsa)) {
       return;
    }
 
-   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !img) {
+   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) && !pixels) {
       /* not an error, do nothing */
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
-
    if (_mesa_is_zero_size_texture(texImage))
       return;
 
    if (MESA_VERBOSE & (VERBOSE_API | VERBOSE_TEXTURE)) {
       _mesa_debug(ctx,
-                  "glGetCompressedTexImage(tex %u) format = %s, w=%d, h=%d\n",
-                  texObj->Name,
+                  "glGetCompressedTex%sImage(tex %u) format = %s, w=%d, h=%d\n",
+                  dsa ? "ture" : "", texObj->Name,
                   _mesa_get_format_name(texImage->TexFormat),
                   texImage->Width, texImage->Height);
    }
 
    _mesa_lock_texture(ctx, texObj);
    {
-      ctx->Driver.GetCompressedTexImage(ctx, texImage, img);
+      ctx->Driver.GetCompressedTexImage(ctx, texImage, pixels);
    }
    _mesa_unlock_texture(ctx, texObj);
 }
 
 void GLAPIENTRY
+_mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize,
+                                GLvoid *img)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   texImage = _mesa_select_tex_image(texObj, target, level);
+   if (!texImage)
+      return;
+
+   _mesa_get_compressed_texture_image(ctx, texObj, texImage, target, level,
+                                      bufSize, img, false);
+}
+
+void GLAPIENTRY
 _mesa_GetCompressedTexImage(GLenum target, GLint level, GLvoid *img)
 {
    _mesa_GetnCompressedTexImageARB(target, level, INT_MAX, img);
 }
+
+/**
+ * Get compressed texture image.
+ *
+ * \param texture texture name.
+ * \param level image level.
+ * \param bufSize size of the pixels data buffer.
+ * \param pixels returned pixel data.
+ */
+void GLAPIENTRY
+_mesa_GetCompressedTextureImage(GLuint texture, GLint level,
+                                GLsizei bufSize, GLvoid *pixels)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int i;
+   GLint image_stride;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glGetCompressedTextureImage");
+   if (!texObj)
+      return;
+
+   /* Must handle special case GL_TEXTURE_CUBE_MAP. */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+      assert(texObj->NumLayers >= 6);
+
+      /* Copy each face. */
+      for (i = 0; i < 6; ++i) {
+         texImage = texObj->Image[i][level];
+         if (!texImage)
+            return;
+
+         _mesa_get_compressed_texture_image(ctx, texObj, texImage,
+                                            texObj->Target, level,
+                                            bufSize, pixels, true);
+
+         /* Compressed images don't have a client format */
+         image_stride = _mesa_format_image_size(texImage->TexFormat,
+                                                texImage->Width,
+                                                texImage->Height, 1);
+
+         pixels = (GLubyte *) pixels + image_stride;
+         bufSize -= image_stride;
+      }
+   }
+   else {
+      texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
+      if (!texImage)
+         return;
+
+      _mesa_get_compressed_texture_image(ctx, texObj, texImage,
+                                         texObj->Target, level, bufSize,
+                                         pixels, true);
+   }
+}
diff --git a/mesalib/src/mesa/main/texgetimage.h b/mesalib/src/mesa/main/texgetimage.h
index a292fabc0..1fa2f59dc 100644
--- a/mesalib/src/mesa/main/texgetimage.h
+++ b/mesalib/src/mesa/main/texgetimage.h
@@ -37,16 +37,30 @@ extern GLenum
 _mesa_base_pack_format(GLenum format);
 
 extern void
-_mesa_get_teximage(struct gl_context *ctx,
-                   GLenum format, GLenum type, GLvoid *pixels,
-                   struct gl_texture_image *texImage);
+_mesa_GetTexImage_sw(struct gl_context *ctx,
+                     GLenum format, GLenum type, GLvoid *pixels,
+                     struct gl_texture_image *texImage);
 
 
 extern void
-_mesa_get_compressed_teximage(struct gl_context *ctx,
-                              struct gl_texture_image *texImage,
-                              GLvoid *data);
+_mesa_GetCompressedTexImage_sw(struct gl_context *ctx,
+                               struct gl_texture_image *texImage,
+                               GLvoid *data);
 
+extern void
+_mesa_get_texture_image(struct gl_context *ctx,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage, GLenum target,
+                        GLint level, GLenum format, GLenum type,
+                        GLsizei bufSize, GLvoid *pixels, bool dsa);
+
+extern void
+_mesa_get_compressed_texture_image( struct gl_context *ctx,
+                                    struct gl_texture_object *texObj,
+                                    struct gl_texture_image *texImage,
+                                    GLenum target, GLint level,
+                                    GLsizei bufSize, GLvoid *pixels,
+                                    bool dsa );
 
 
 extern void GLAPIENTRY
@@ -55,6 +69,9 @@ _mesa_GetTexImage( GLenum target, GLint level,
 extern void GLAPIENTRY
 _mesa_GetnTexImageARB( GLenum target, GLint level, GLenum format,
                        GLenum type, GLsizei bufSize, GLvoid *pixels );
+extern void GLAPIENTRY
+_mesa_GetTextureImage(GLuint texture, GLint level, GLenum format,
+                      GLenum type, GLsizei bufSize, GLvoid *pixels);
 
 extern void GLAPIENTRY
 _mesa_GetCompressedTexImage(GLenum target, GLint lod, GLvoid *img);
@@ -63,4 +80,8 @@ extern void GLAPIENTRY
 _mesa_GetnCompressedTexImageARB(GLenum target, GLint level, GLsizei bufSize,
                                 GLvoid *img);
 
+extern void GLAPIENTRY
+_mesa_GetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize,
+                                GLvoid *pixels);
+
 #endif /* TEXGETIMAGE_H */
diff --git a/mesalib/src/mesa/main/teximage.c b/mesalib/src/mesa/main/teximage.c
index 7766904c9..29c325bf2 100644
--- a/mesalib/src/mesa/main/teximage.c
+++ b/mesalib/src/mesa/main/teximage.c
@@ -62,7 +62,58 @@
  */
 #define NEW_COPY_TEX_STATE (_NEW_BUFFERS | _NEW_PIXEL)
 
+/**
+ * Returns a corresponding internal floating point format for a given base
+ * format as specifed by OES_texture_float. In case of GL_FLOAT, the internal
+ * format needs to be a 32 bit component and in case of GL_HALF_FLOAT_OES it
+ * needs to be a 16 bit component.
+ *
+ * For example, given base format GL_RGBA, type GL_Float return GL_RGBA32F_ARB.
+ */
+static GLenum
+adjust_for_oes_float_texture(GLenum format, GLenum type)
+{
+   switch (type) {
+   case GL_FLOAT:
+      switch (format) {
+      case GL_RGBA:
+         return GL_RGBA32F;
+      case GL_RGB:
+         return GL_RGB32F;
+      case GL_ALPHA:
+         return GL_ALPHA32F_ARB;
+      case GL_LUMINANCE:
+         return GL_LUMINANCE32F_ARB;
+      case GL_LUMINANCE_ALPHA:
+         return GL_LUMINANCE_ALPHA32F_ARB;
+      default:
+         break;
+      }
+      break;
 
+   case GL_HALF_FLOAT_OES:
+      switch (format) {
+      case GL_RGBA:
+         return GL_RGBA16F;
+      case GL_RGB:
+         return GL_RGB16F;
+      case GL_ALPHA:
+         return GL_ALPHA16F_ARB;
+      case GL_LUMINANCE:
+         return GL_LUMINANCE16F_ARB;
+      case GL_LUMINANCE_ALPHA:
+         return GL_LUMINANCE_ALPHA16F_ARB;
+      default:
+         break;
+      }
+      break;
+
+   default:
+      break;
+   }
+
+   return format;
+}
 
 /**
  * Return the simple base format for a given internal texture format.
@@ -81,92 +132,102 @@ GLint
 _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
 {
    switch (internalFormat) {
-      case GL_ALPHA:
-      case GL_ALPHA4:
-      case GL_ALPHA8:
-      case GL_ALPHA12:
-      case GL_ALPHA16:
-         return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
-      case 1:
-      case GL_LUMINANCE:
-      case GL_LUMINANCE4:
-      case GL_LUMINANCE8:
-      case GL_LUMINANCE12:
-      case GL_LUMINANCE16:
-         return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
-      case 2:
-      case GL_LUMINANCE_ALPHA:
-      case GL_LUMINANCE4_ALPHA4:
-      case GL_LUMINANCE6_ALPHA2:
-      case GL_LUMINANCE8_ALPHA8:
-      case GL_LUMINANCE12_ALPHA4:
-      case GL_LUMINANCE12_ALPHA12:
-      case GL_LUMINANCE16_ALPHA16:
-         return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
-      case GL_INTENSITY:
-      case GL_INTENSITY4:
-      case GL_INTENSITY8:
-      case GL_INTENSITY12:
-      case GL_INTENSITY16:
-         return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
-      case 3:
-         return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
-      case GL_RGB:
-      case GL_R3_G3_B2:
-      case GL_RGB4:
-      case GL_RGB5:
-      case GL_RGB8:
-      case GL_RGB10:
-      case GL_RGB12:
-      case GL_RGB16:
-         return GL_RGB;
-      case 4:
-         return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
-      case GL_RGBA:
-      case GL_RGBA2:
-      case GL_RGBA4:
-      case GL_RGB5_A1:
-      case GL_RGBA8:
-      case GL_RGB10_A2:
-      case GL_RGBA12:
-      case GL_RGBA16:
-         return GL_RGBA;
-      default:
-         ; /* fallthrough */
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
+   case 3:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return GL_RGB;
+   case 4:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
    }
 
    /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
     */
    if (_mesa_is_gles(ctx)) {
       switch (internalFormat) {
-         case GL_BGRA:
-            return GL_RGBA;
-         default:
-            ; /* fallthrough */
+      case GL_BGRA:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
       }
    }
 
    if (ctx->Extensions.ARB_ES2_compatibility) {
       switch (internalFormat) {
-         case GL_RGB565:
-            return GL_RGB;
-         default:
-            ; /* fallthrough */
+      case GL_RGB565:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
       }
    }
 
    if (ctx->Extensions.ARB_depth_texture) {
       switch (internalFormat) {
-         case GL_DEPTH_COMPONENT:
-         case GL_DEPTH_COMPONENT16:
-         case GL_DEPTH_COMPONENT24:
-         case GL_DEPTH_COMPONENT32:
-            return GL_DEPTH_COMPONENT;
-         case GL_DEPTH_STENCIL:
-         case GL_DEPTH24_STENCIL8:
-            return GL_DEPTH_STENCIL;
-         default:
-            ; /* fallthrough */
+      case GL_DEPTH_COMPONENT:
+      case GL_DEPTH_COMPONENT16:
+      case GL_DEPTH_COMPONENT24:
+      case GL_DEPTH_COMPONENT32:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH_STENCIL:
+      case GL_DEPTH24_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_stencil_texturing) {
+      switch (internalFormat) {
+      case GL_STENCIL_INDEX:
+      case GL_STENCIL_INDEX8:
+         return GL_STENCIL_INDEX;
+      default:
+         ; /* fallthrough */
       }
    }
 
@@ -189,12 +250,12 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
 
    if (ctx->Extensions.TDFX_texture_compression_FXT1) {
       switch (internalFormat) {
-         case GL_COMPRESSED_RGB_FXT1_3DFX:
-            return GL_RGB;
-         case GL_COMPRESSED_RGBA_FXT1_3DFX:
-            return GL_RGBA;
-         default:
-            ; /* fallthrough */
+      case GL_COMPRESSED_RGB_FXT1_3DFX:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_FXT1_3DFX:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
       }
    }
 
@@ -202,28 +263,28 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
     */
    if (ctx->Extensions.ANGLE_texture_compression_dxt) {
       switch (internalFormat) {
-         case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-            return GL_RGB;
-         case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-         case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-         case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-            return GL_RGBA;
-         default:
-            ; /* fallthrough */
+      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
       }
    }
 
    if (_mesa_is_desktop_gl(ctx)
        && ctx->Extensions.ANGLE_texture_compression_dxt) {
       switch (internalFormat) {
-         case GL_RGB_S3TC:
-         case GL_RGB4_S3TC:
-            return GL_RGB;
-         case GL_RGBA_S3TC:
-         case GL_RGBA4_S3TC:
-            return GL_RGBA;
-         default:
-            ; /* fallthrough */
+      case GL_RGB_S3TC:
+      case GL_RGB4_S3TC:
+         return GL_RGB;
+      case GL_RGBA_S3TC:
+      case GL_RGBA4_S3TC:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
       }
    }
 
@@ -234,65 +295,65 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
 
    if (ctx->Extensions.ARB_texture_float) {
       switch (internalFormat) {
-         case GL_ALPHA16F_ARB:
-         case GL_ALPHA32F_ARB:
-            return GL_ALPHA;
-         case GL_RGBA16F_ARB:
-         case GL_RGBA32F_ARB:
-            return GL_RGBA;
-         case GL_RGB16F_ARB:
-         case GL_RGB32F_ARB:
-            return GL_RGB;
-         case GL_INTENSITY16F_ARB:
-         case GL_INTENSITY32F_ARB:
-            return GL_INTENSITY;
-         case GL_LUMINANCE16F_ARB:
-         case GL_LUMINANCE32F_ARB:
-            return GL_LUMINANCE;
-         case GL_LUMINANCE_ALPHA16F_ARB:
-         case GL_LUMINANCE_ALPHA32F_ARB:
-            return GL_LUMINANCE_ALPHA;
-         default:
-            ; /* fallthrough */
+      case GL_ALPHA16F_ARB:
+      case GL_ALPHA32F_ARB:
+         return GL_ALPHA;
+      case GL_RGBA16F_ARB:
+      case GL_RGBA32F_ARB:
+         return GL_RGBA;
+      case GL_RGB16F_ARB:
+      case GL_RGB32F_ARB:
+         return GL_RGB;
+      case GL_INTENSITY16F_ARB:
+      case GL_INTENSITY32F_ARB:
+         return GL_INTENSITY;
+      case GL_LUMINANCE16F_ARB:
+      case GL_LUMINANCE32F_ARB:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA16F_ARB:
+      case GL_LUMINANCE_ALPHA32F_ARB:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
       }
    }
 
    if (ctx->Extensions.EXT_texture_snorm) {
       switch (internalFormat) {
-         case GL_RED_SNORM:
-         case GL_R8_SNORM:
-         case GL_R16_SNORM:
-            return GL_RED;
-         case GL_RG_SNORM:
-         case GL_RG8_SNORM:
-         case GL_RG16_SNORM:
-            return GL_RG;
-         case GL_RGB_SNORM:
-         case GL_RGB8_SNORM:
-         case GL_RGB16_SNORM:
-            return GL_RGB;
-         case GL_RGBA_SNORM:
-         case GL_RGBA8_SNORM:
-         case GL_RGBA16_SNORM:
-            return GL_RGBA;
-         case GL_ALPHA_SNORM:
-         case GL_ALPHA8_SNORM:
-         case GL_ALPHA16_SNORM:
-            return GL_ALPHA;
-         case GL_LUMINANCE_SNORM:
-         case GL_LUMINANCE8_SNORM:
-         case GL_LUMINANCE16_SNORM:
-            return GL_LUMINANCE;
-         case GL_LUMINANCE_ALPHA_SNORM:
-         case GL_LUMINANCE8_ALPHA8_SNORM:
-         case GL_LUMINANCE16_ALPHA16_SNORM:
-            return GL_LUMINANCE_ALPHA;
-         case GL_INTENSITY_SNORM:
-         case GL_INTENSITY8_SNORM:
-         case GL_INTENSITY16_SNORM:
-            return GL_INTENSITY;
-         default:
-            ; /* fallthrough */
+      case GL_RED_SNORM:
+      case GL_R8_SNORM:
+      case GL_R16_SNORM:
+         return GL_RED;
+      case GL_RG_SNORM:
+      case GL_RG8_SNORM:
+      case GL_RG16_SNORM:
+         return GL_RG;
+      case GL_RGB_SNORM:
+      case GL_RGB8_SNORM:
+      case GL_RGB16_SNORM:
+         return GL_RGB;
+      case GL_RGBA_SNORM:
+      case GL_RGBA8_SNORM:
+      case GL_RGBA16_SNORM:
+         return GL_RGBA;
+      case GL_ALPHA_SNORM:
+      case GL_ALPHA8_SNORM:
+      case GL_ALPHA16_SNORM:
+         return GL_ALPHA;
+      case GL_LUMINANCE_SNORM:
+      case GL_LUMINANCE8_SNORM:
+      case GL_LUMINANCE16_SNORM:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA_SNORM:
+      case GL_LUMINANCE8_ALPHA8_SNORM:
+      case GL_LUMINANCE16_ALPHA16_SNORM:
+         return GL_LUMINANCE_ALPHA;
+      case GL_INTENSITY_SNORM:
+      case GL_INTENSITY8_SNORM:
+      case GL_INTENSITY16_SNORM:
+         return GL_INTENSITY;
+      default:
+         ; /* fallthrough */
       }
    }
 
@@ -727,87 +788,6 @@ proxy_target(GLenum target)
 }
 
 
-/**
- * Return a pointer to the current texture object for the given target
- * on the current texture unit.
- * Note: all <target> error checking should have been done by this point.
- */
-struct gl_texture_object *
-_mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
-{
-   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
-   const GLboolean arrayTex = ctx->Extensions.EXT_texture_array;
-
-   switch (target) {
-      case GL_TEXTURE_1D:
-         return texUnit->CurrentTex[TEXTURE_1D_INDEX];
-      case GL_PROXY_TEXTURE_1D:
-         return ctx->Texture.ProxyTex[TEXTURE_1D_INDEX];
-      case GL_TEXTURE_2D:
-         return texUnit->CurrentTex[TEXTURE_2D_INDEX];
-      case GL_PROXY_TEXTURE_2D:
-         return ctx->Texture.ProxyTex[TEXTURE_2D_INDEX];
-      case GL_TEXTURE_3D:
-         return texUnit->CurrentTex[TEXTURE_3D_INDEX];
-      case GL_PROXY_TEXTURE_3D:
-         return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX];
-      case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
-      case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
-      case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
-      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
-      case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
-      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
-      case GL_TEXTURE_CUBE_MAP_ARB:
-         return ctx->Extensions.ARB_texture_cube_map
-                ? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
-         return ctx->Extensions.ARB_texture_cube_map
-                ? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL;
-      case GL_TEXTURE_CUBE_MAP_ARRAY:
-         return ctx->Extensions.ARB_texture_cube_map_array
-                ? texUnit->CurrentTex[TEXTURE_CUBE_ARRAY_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
-         return ctx->Extensions.ARB_texture_cube_map_array
-                ? ctx->Texture.ProxyTex[TEXTURE_CUBE_ARRAY_INDEX] : NULL;
-      case GL_TEXTURE_RECTANGLE_NV:
-         return ctx->Extensions.NV_texture_rectangle
-                ? texUnit->CurrentTex[TEXTURE_RECT_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_RECTANGLE_NV:
-         return ctx->Extensions.NV_texture_rectangle
-                ? ctx->Texture.ProxyTex[TEXTURE_RECT_INDEX] : NULL;
-      case GL_TEXTURE_1D_ARRAY_EXT:
-         return arrayTex ? texUnit->CurrentTex[TEXTURE_1D_ARRAY_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_1D_ARRAY_EXT:
-         return arrayTex ? ctx->Texture.ProxyTex[TEXTURE_1D_ARRAY_INDEX] : NULL;
-      case GL_TEXTURE_2D_ARRAY_EXT:
-         return arrayTex ? texUnit->CurrentTex[TEXTURE_2D_ARRAY_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_2D_ARRAY_EXT:
-         return arrayTex ? ctx->Texture.ProxyTex[TEXTURE_2D_ARRAY_INDEX] : NULL;
-      case GL_TEXTURE_BUFFER:
-         return ctx->API == API_OPENGL_CORE &&
-                ctx->Extensions.ARB_texture_buffer_object ?
-                texUnit->CurrentTex[TEXTURE_BUFFER_INDEX] : NULL;
-      case GL_TEXTURE_EXTERNAL_OES:
-         return _mesa_is_gles(ctx) && ctx->Extensions.OES_EGL_image_external
-            ? texUnit->CurrentTex[TEXTURE_EXTERNAL_INDEX] : NULL;
-      case GL_TEXTURE_2D_MULTISAMPLE:
-         return ctx->Extensions.ARB_texture_multisample
-            ? texUnit->CurrentTex[TEXTURE_2D_MULTISAMPLE_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
-         return ctx->Extensions.ARB_texture_multisample
-            ? ctx->Texture.ProxyTex[TEXTURE_2D_MULTISAMPLE_INDEX] : NULL;
-      case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-         return ctx->Extensions.ARB_texture_multisample
-            ? texUnit->CurrentTex[TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX] : NULL;
-      case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
-         return ctx->Extensions.ARB_texture_multisample
-            ? ctx->Texture.ProxyTex[TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX] : NULL;
-      default:
-         _mesa_problem(NULL, "bad target in _mesa_get_current_tex_object()");
-         return NULL;
-   }
-}
-
 
 
 /**
@@ -815,7 +795,6 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
  * target and mipmap level.  The target and level parameters should
  * have already been error-checked.
  *
- * \param ctx GL context.
  * \param texObj texture unit.
  * \param target texture target.
  * \param level image level.
@@ -823,9 +802,8 @@ _mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
  * \return pointer to the texture image structure, or NULL on failure.
  */
 struct gl_texture_image *
-_mesa_select_tex_image(struct gl_context *ctx,
-                       const struct gl_texture_object *texObj,
-		       GLenum target, GLint level)
+_mesa_select_tex_image(const struct gl_texture_object *texObj,
+		                 GLenum target, GLint level)
 {
    const GLuint face = _mesa_tex_target_to_face(target);
 
@@ -851,7 +829,7 @@ _mesa_get_tex_image(struct gl_context *ctx, struct gl_texture_object *texObj,
    if (!texObj)
       return NULL;
 
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+   texImage = _mesa_select_tex_image(texObj, target, level);
    if (!texImage) {
       texImage = ctx->Driver.NewTextureImage(ctx);
       if (!texImage) {
@@ -1313,7 +1291,7 @@ init_teximage_fields_ms(struct gl_context *ctx,
 
    target = img->TexObject->Target;
    img->_BaseFormat = _mesa_base_tex_format( ctx, internalFormat );
-   ASSERT(img->_BaseFormat > 0);
+   ASSERT(img->_BaseFormat != -1);
    img->InternalFormat = internalFormat;
    img->Border = border;
    img->Width = width;
@@ -1604,12 +1582,11 @@ _mesa_legal_texture_dimensions(struct gl_context *ctx, GLenum target,
  * \return GL_TRUE if error found, GL_FALSE otherwise.
  */
 static GLboolean
-error_check_subtexture_dimensions(struct gl_context *ctx,
-                                  const char *function, GLuint dims,
+error_check_subtexture_dimensions(struct gl_context *ctx, GLuint dims,
                                   const struct gl_texture_image *destImage,
                                   GLint xoffset, GLint yoffset, GLint zoffset,
                                   GLsizei subWidth, GLsizei subHeight,
-                                  GLsizei subDepth)
+                                  GLsizei subDepth, const char *func)
 {
    const GLenum target = destImage->TexObject->Target;
    GLuint bw, bh;
@@ -1617,32 +1594,32 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
    /* Check size */
    if (subWidth < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s%dD(width=%d)", function, dims, subWidth);
+                  "%s%dD(width=%d)", func, dims, subWidth);
       return GL_TRUE;
    }
 
    if (dims > 1 && subHeight < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s%dD(height=%d)", function, dims, subHeight);
+                  "%s%dD(height=%d)", func, dims, subHeight);
       return GL_TRUE;
    }
 
    if (dims > 2 && subDepth < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "%s%dD(depth=%d)", function, dims, subDepth);
+                  "%s%dD(depth=%d)", func, dims, subDepth);
       return GL_TRUE;
    }
 
    /* check xoffset and width */
    if (xoffset < - (GLint) destImage->Border) {
       _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset)",
-                  function, dims);
+                  func, dims);
       return GL_TRUE;
    }
 
    if (xoffset + subWidth > (GLint) destImage->Width) {
       _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset+width)",
-                  function, dims);
+                  func, dims);
       return GL_TRUE;
    }
 
@@ -1651,28 +1628,33 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
       GLint yBorder = (target == GL_TEXTURE_1D_ARRAY) ? 0 : destImage->Border;
       if (yoffset < -yBorder) {
          _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(yoffset)",
-                     function, dims);
+                     func, dims);
          return GL_TRUE;
       }
       if (yoffset + subHeight > (GLint) destImage->Height) {
          _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(yoffset+height)",
-                     function, dims);
+                     func, dims);
          return GL_TRUE;
       }
    }
 
    /* check zoffset and depth */
    if (dims > 2) {
+      GLint depth;
       GLint zBorder = (target == GL_TEXTURE_2D_ARRAY ||
                        target == GL_TEXTURE_CUBE_MAP_ARRAY) ?
                          0 : destImage->Border;
 
       if (zoffset < -zBorder) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset)", function);
+         _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset)", func);
          return GL_TRUE;
       }
-      if (zoffset + subDepth  > (GLint) destImage->Depth) {
-         _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset+depth)", function);
+
+      depth = (GLint) destImage->Depth;
+      if (target == GL_TEXTURE_CUBE_MAP)
+         depth = 6;
+      if (zoffset + subDepth  > depth) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "%s3D(zoffset+depth)", func);
          return GL_TRUE;
       }
    }
@@ -1691,7 +1673,7 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
       if ((xoffset % bw != 0) || (yoffset % bh != 0)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
                      "%s%dD(xoffset = %d, yoffset = %d)",
-                     function, dims, xoffset, yoffset);
+                     func, dims, xoffset, yoffset);
          return GL_TRUE;
       }
 
@@ -1703,14 +1685,14 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
       if ((subWidth % bw != 0) &&
           (xoffset + subWidth != (GLint) destImage->Width)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s%dD(width = %d)", function, dims, subWidth);
+                     "%s%dD(width = %d)", func, dims, subWidth);
          return GL_TRUE;
       }
 
       if ((subHeight % bh != 0) &&
           (yoffset + subHeight != (GLint) destImage->Height)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s%dD(height = %d)", function, dims, subHeight);
+                     "%s%dD(height = %d)", func, dims, subHeight);
          return GL_TRUE;
       }
    }
@@ -1889,7 +1871,8 @@ legal_teximage_target(struct gl_context *ctx, GLuint dims, GLenum target)
  * proxy targets are not supported.
  */
 static GLboolean
-legal_texsubimage_target(struct gl_context *ctx, GLuint dims, GLenum target)
+legal_texsubimage_target(struct gl_context *ctx, GLuint dims, GLenum target,
+                         bool dsa)
 {
    switch (dims) {
    case 1:
@@ -1923,6 +1906,13 @@ legal_texsubimage_target(struct gl_context *ctx, GLuint dims, GLenum target)
       case GL_TEXTURE_CUBE_MAP_ARRAY:
       case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
          return ctx->Extensions.ARB_texture_cube_map_array;
+
+      /* Table 8.15 of the OpenGL 4.5 core profile spec
+       * (20141030) says that TEXTURE_CUBE_MAP is valid for TextureSubImage3D
+       * and CopyTextureSubImage3D.
+       */
+      case GL_TEXTURE_CUBE_MAP:
+         return dsa;
       default:
          return GL_FALSE;
       }
@@ -1942,6 +1932,9 @@ static GLboolean
 mutable_tex_object(struct gl_context *ctx, GLenum target)
 {
    struct gl_texture_object *texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return GL_FALSE;
+
    return !texObj->Immutable;
 }
 
@@ -2137,7 +2130,7 @@ texture_error_check( struct gl_context *ctx,
 
    if (_mesa_is_gles(ctx)) {
       if (_mesa_is_gles3(ctx)) {
-         err = _mesa_es3_error_check_format_and_type(format, type,
+         err = _mesa_es3_error_check_format_and_type(ctx, format, type,
                                                      internalFormat);
       } else {
          if (format != internalFormat) {
@@ -2317,14 +2310,14 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions,
    case GL_PALETTE8_RGB5_A1_OES:
       /* check level (note that level should be zero or less!) */
       if (level > 0 || level < -maxLevels) {
-	 reason = "level";
-	 error = GL_INVALID_VALUE;
+         reason = "level";
+         error = GL_INVALID_VALUE;
          goto error;
       }
 
       if (dimensions != 2) {
-	 reason = "compressed paletted textures must be 2D";
-	 error = GL_INVALID_OPERATION;
+         reason = "compressed paletted textures must be 2D";
+         error = GL_INVALID_OPERATION;
          goto error;
       }
 
@@ -2332,7 +2325,7 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions,
        * checked against the actual size later.
        */
       expectedSize = _mesa_cpal_compressed_size(level, internalFormat,
-						width, height);
+                                                width, height);
 
       /* This is for the benefit of the TestProxyTexImage below.  It expects
        * level to be non-negative.  OES_compressed_paletted_texture uses a
@@ -2347,8 +2340,8 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions,
    default:
       /* check level */
       if (level < 0 || level >= maxLevels) {
-	 reason = "level";
-	 error = GL_INVALID_VALUE;
+         reason = "level";
+         error = GL_INVALID_VALUE;
          goto error;
       }
 
@@ -2401,7 +2394,8 @@ compressed_texture_error_check(struct gl_context *ctx, GLint dimensions,
 
 error:
    /* Note: not all error paths exit through here. */
-   _mesa_error(ctx, error, "glCompressedTexImage%dD(%s)", dimensions, reason);
+   _mesa_error(ctx, error, "glCompressedTexImage%dD(%s)",
+               dimensions, reason);
    return GL_TRUE;
 }
 
@@ -2431,26 +2425,34 @@ error:
  */
 static GLboolean
 texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
+                        struct gl_texture_object *texObj,
                         GLenum target, GLint level,
                         GLint xoffset, GLint yoffset, GLint zoffset,
                         GLint width, GLint height, GLint depth,
-                        GLenum format, GLenum type)
+                        GLenum format, GLenum type, bool dsa)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
    GLenum err;
+   const char* suffix = dsa ? "ture" : "";
+
+   if (!texObj) {
+      /* must be out of memory */
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex%sSubImage%dD()",
+                  suffix, dimensions);
+      return GL_TRUE;
+   }
 
    /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dimensions, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)",
-                  dimensions, _mesa_lookup_enum_by_nr(target));
+   if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)",
+                  suffix, dimensions, _mesa_lookup_enum_by_nr(target));
       return GL_TRUE;
    }
 
    /* level check */
    if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glTexSubImage%uD(level=%d)",
-                  dimensions, level);
+      _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sSubImage%uD(level=%d)",
+                  suffix, dimensions, level);
       return GL_TRUE;
    }
 
@@ -2463,9 +2465,8 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
       err = _mesa_es_error_check_format_and_type(format, type, dimensions);
       if (err != GL_NO_ERROR) {
          _mesa_error(ctx, err,
-                     "glTexSubImage%dD(format = %s, type = %s)",
-                     dimensions,
-                     _mesa_lookup_enum_by_nr(format),
+                     "glTex%sSubImage%dD(format = %s, type = %s)",
+                     suffix, dimensions, _mesa_lookup_enum_by_nr(format),
                      _mesa_lookup_enum_by_nr(type));
          return GL_TRUE;
       }
@@ -2474,38 +2475,34 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
    err = _mesa_error_check_format_and_type(ctx, format, type);
    if (err != GL_NO_ERROR) {
       _mesa_error(ctx, err,
-                  "glTexSubImage%dD(incompatible format = %s, type = %s)",
-                  dimensions, _mesa_lookup_enum_by_nr(format),
+                  "glTex%sSubImage%dD(incompatible format = %s, type = %s)",
+                  suffix, dimensions, _mesa_lookup_enum_by_nr(format),
                   _mesa_lookup_enum_by_nr(type));
       return GL_TRUE;
    }
 
-   /* Get dest texture object / image pointers */
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   if (!texObj) {
-      /* must be out of memory */
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage%dD()", dimensions);
-      return GL_TRUE;
-   }
-
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+   texImage = _mesa_select_tex_image(texObj, target, level);
    if (!texImage) {
       /* non-existant texture level */
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glTexSubImage%dD(invalid texture image)", dimensions);
+                  "glTex%sSubImage%dD(invalid texture image)", suffix,
+                  dimensions);
       return GL_TRUE;
    }
 
-   if (error_check_subtexture_dimensions(ctx, "glTexSubImage", dimensions,
+   if (error_check_subtexture_dimensions(ctx, dimensions,
                                          texImage, xoffset, yoffset, 0,
-                                         width, height, 1)) {
+                                         width, height, 1,
+                                         dsa ? "glTextureSubImage" :
+                                         "glTexSubImage")) {
       return GL_TRUE;
    }
 
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       if (compressedteximage_only_format(ctx, texImage->InternalFormat)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-               "glTexSubImage%dD(no compression for format)", dimensions);
+               "glTex%sSubImage%dD(no compression for format)",
+               suffix, dimensions);
          return GL_TRUE;
       }
    }
@@ -2515,8 +2512,8 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
       if (_mesa_is_format_integer_color(texImage->TexFormat) !=
           _mesa_is_enum_format_integer(format)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glTexSubImage%dD(integer/non-integer format mismatch)",
-                     dimensions);
+                     "glTex%sSubImage%dD(integer/non-integer format mismatch)",
+                     suffix, dimensions);
          return GL_TRUE;
       }
    }
@@ -2554,7 +2551,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
    GLenum rb_internal_format;
 
    /* check target */
-   if (!legal_texsubimage_target(ctx, dimensions, target)) {
+   if (!legal_texsubimage_target(ctx, dimensions, target, false)) {
       _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexImage%uD(target=%s)",
                   dimensions, _mesa_lookup_enum_by_nr(target));
       return GL_TRUE;
@@ -2579,10 +2576,9 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
       }
 
       if (ctx->ReadBuffer->Visual.samples > 0) {
-	 _mesa_error(ctx, GL_INVALID_OPERATION,
-		     "glCopyTexImage%dD(multisample FBO)",
-		     dimensions);
-	 return GL_TRUE;
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glCopyTexImage%dD(multisample FBO)", dimensions);
+         return GL_TRUE;
       }
    }
 
@@ -2791,12 +2787,13 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
  */
 static GLboolean
 copytexsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
+                            const struct gl_texture_object *texObj,
                             GLenum target, GLint level,
                             GLint xoffset, GLint yoffset, GLint zoffset,
-                            GLint width, GLint height)
+                            GLint width, GLint height, bool dsa)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
+   const char *suffix = dsa ? "ture" : "";
 
    /* Check that the source buffer is complete */
    if (_mesa_is_user_fbo(ctx->ReadBuffer)) {
@@ -2805,89 +2802,97 @@ copytexsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
       }
       if (ctx->ReadBuffer->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
          _mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT,
-                     "glCopyTexImage%dD(invalid readbuffer)", dimensions);
+                     "glCopyTex%sSubImage%dD(invalid readbuffer)",
+                     suffix, dimensions);
          return GL_TRUE;
       }
 
       if (ctx->ReadBuffer->Visual.samples > 0) {
-	 _mesa_error(ctx, GL_INVALID_OPERATION,
-		     "glCopyTexSubImage%dD(multisample FBO)",
-		     dimensions);
-	 return GL_TRUE;
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                "glCopyTex%sSubImage%dD(multisample FBO)", suffix,
+                dimensions);
+         return GL_TRUE;
       }
    }
 
    /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dimensions, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTexSubImage%uD(target=%s)",
-                  dimensions, _mesa_lookup_enum_by_nr(target));
+   if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTex%sSubImage%uD(target=%s)",
+                  suffix, dimensions,
+                  _mesa_lookup_enum_by_nr(target));
       return GL_TRUE;
    }
 
    /* Check level */
    if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glCopyTexSubImage%dD(level=%d)", dimensions, level);
+                  "glCopyTex%sSubImage%dD(level=%d)", suffix,
+                  dimensions, level);
       return GL_TRUE;
    }
 
-   /* Get dest texture object / image pointers */
-   texObj = _mesa_get_current_tex_object(ctx, target);
+   /* Get dest image pointers */
    if (!texObj) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage%dD()", dimensions);
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTex%sSubImage%dD()",
+                  suffix, dimensions);
       return GL_TRUE;
    }
 
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+   texImage = _mesa_select_tex_image(texObj, target, level);
    if (!texImage) {
       /* destination image does not exist */
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCopyTexSubImage%dD(invalid texture image)", dimensions);
+                  "glCopyTex%sSubImage%dD(invalid texture image)",
+                  suffix, dimensions);
       return GL_TRUE;
    }
 
-   if (error_check_subtexture_dimensions(ctx, "glCopyTexSubImage",
-                                         dimensions, texImage,
+   if (error_check_subtexture_dimensions(ctx, dimensions, texImage,
                                          xoffset, yoffset, zoffset,
-                                         width, height, 1)) {
+                                         width, height, 1, dsa ?
+                                         "glCompressedTextureSubImage" :
+                                         "glCompressedTexSubImage")) {
       return GL_TRUE;
    }
 
    if (_mesa_is_format_compressed(texImage->TexFormat)) {
       if (compressedteximage_only_format(ctx, texImage->InternalFormat)) {
          _mesa_error(ctx, GL_INVALID_OPERATION,
-               "glCopyTexSubImage%dD(no compression for format)", dimensions);
+               "glCopyTex%sSubImage%dD(no compression for format)",
+               suffix, dimensions);
          return GL_TRUE;
       }
    }
 
    if (texImage->InternalFormat == GL_YCBCR_MESA) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glCopyTexSubImage2D");
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glCopyTex%sSubImage2D", suffix);
       return GL_TRUE;
    }
 
    if (!_mesa_source_buffer_exists(ctx, texImage->_BaseFormat)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCopyTexSubImage%dD(missing readbuffer, format=0x%x)",
-                  dimensions, texImage->_BaseFormat);
+                  "glCopyTex%sSubImage%dD(missing readbuffer, format=0x%x)",
+                  suffix, dimensions, texImage->_BaseFormat);
       return GL_TRUE;
    }
 
    /* From the EXT_texture_integer spec:
     *
-    *     "INVALID_OPERATION is generated by CopyTexImage* and CopyTexSubImage*
-    *      if the texture internalformat is an integer format and the read color
-    *      buffer is not an integer format, or if the internalformat is not an
-    *      integer format and the read color buffer is an integer format."
+    *     "INVALID_OPERATION is generated by CopyTexImage* and
+    *     CopyTexSubImage* if the texture internalformat is an integer format
+    *     and the read color buffer is not an integer format, or if the
+    *     internalformat is not an integer format and the read color buffer
+    *     is an integer format."
     */
    if (_mesa_is_color_format(texImage->InternalFormat)) {
       struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
 
       if (_mesa_is_format_integer_color(rb->Format) !=
-	  _mesa_is_format_integer_color(texImage->TexFormat)) {
-	 _mesa_error(ctx, GL_INVALID_OPERATION,
-		     "glCopyTexImage%dD(integer vs non-integer)", dimensions);
-	 return GL_TRUE;
+          _mesa_is_format_integer_color(texImage->TexFormat)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glCopyTex%sSubImage%dD(integer vs non-integer)",
+                     suffix, dimensions);
+         return GL_TRUE;
       }
    }
 
@@ -2969,7 +2974,6 @@ static inline void
 check_gen_mipmap(struct gl_context *ctx, GLenum target,
                  struct gl_texture_object *texObj, GLint level)
 {
-   ASSERT(target != GL_TEXTURE_CUBE_MAP);
    if (texObj->GenerateMipmap &&
        level == texObj->BaseLevel &&
        level < texObj->MaxLevel) {
@@ -3042,7 +3046,7 @@ _mesa_choose_texture_format(struct gl_context *ctx,
    /* see if we've already chosen a format for the previous level */
    if (level > 0) {
       struct gl_texture_image *prevImage =
-	 _mesa_select_tex_image(ctx, texObj, target, level - 1);
+	 _mesa_select_tex_image(texObj, target, level - 1);
       /* See if the prev level is defined and has an internal format which
        * matches the new internal format.
        */
@@ -3239,6 +3243,19 @@ teximage(struct gl_context *ctx, GLboolean compressed, GLuint dims,
       texFormat = _mesa_glenum_to_compressed_format(internalFormat);
    }
    else {
+      /* In case of HALF_FLOAT_OES or FLOAT_OES, find corresponding sized
+       * internal floating point format for the given base format.
+       */
+      if (_mesa_is_gles(ctx) && format == internalFormat) {
+         if (type == GL_FLOAT) {
+            texObj->_IsFloat = GL_TRUE;
+         } else if (type == GL_HALF_FLOAT_OES || type == GL_HALF_FLOAT) {
+            texObj->_IsHalfFloat = GL_TRUE;
+         }
+
+         internalFormat = adjust_for_oes_float_texture(format, type);
+      }
+
       texFormat = _mesa_choose_texture_format(ctx, texObj, target, level,
                                               internalFormat, format, type);
    }
@@ -3419,13 +3436,13 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
 
    if (!valid_target) {
       _mesa_error(ctx, GL_INVALID_ENUM,
-		  "glEGLImageTargetTexture2D(target=%d)", target);
+                  "glEGLImageTargetTexture2D(target=%d)", target);
       return;
    }
 
    if (!image) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-		  "glEGLImageTargetTexture2D(image=%p)", image);
+                  "glEGLImageTargetTexture2D(image=%p)", image);
       return;
    }
 
@@ -3433,11 +3450,14 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
       _mesa_update_state(ctx);
 
    texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
    _mesa_lock_texture(ctx, texObj);
 
    if (texObj->Immutable) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-		  "glEGLImageTargetTexture2D(texture is immutable)");
+                  "glEGLImageTargetTexture2D(texture is immutable)");
       _mesa_unlock_texture(ctx, texObj);
       return;
    }
@@ -3458,32 +3478,26 @@ _mesa_EGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image)
 }
 
 
-
 /**
- * Implement all the glTexSubImage1/2/3D() functions.
+ * Helper that implements the glTexSubImage1/2/3D()
+ * and glTextureSubImage1/2/3D() functions.
  */
-static void
-texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
-            GLint xoffset, GLint yoffset, GLint zoffset,
-            GLsizei width, GLsizei height, GLsizei depth,
-            GLenum format, GLenum type, const GLvoid *pixels )
+void
+_mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage,
+                        GLenum target, GLint level,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type, const GLvoid *pixels,
+                        bool dsa)
 {
-   struct gl_texture_object *texObj;
-   struct gl_texture_image *texImage;
-
    FLUSH_VERTICES(ctx, 0);
 
-   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-      _mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n",
-                  dims,
-                  _mesa_lookup_enum_by_nr(target), level,
-                  xoffset, yoffset, zoffset, width, height, depth,
-                  _mesa_lookup_enum_by_nr(format),
-                  _mesa_lookup_enum_by_nr(type), pixels);
-
    /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dims, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)",
+   if (!legal_texsubimage_target(ctx, dims, target, dsa)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)",
+                  dsa ? "ture" : "",
                   dims, _mesa_lookup_enum_by_nr(target));
       return;
    }
@@ -3491,18 +3505,8 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
    if (ctx->NewState & _NEW_PIXEL)
       _mesa_update_state(ctx);
 
-   if (texsubimage_error_check(ctx, dims, target, level,
-                               xoffset, yoffset, zoffset,
-                               width, height, depth, format, type)) {
-      return;   /* error was detected */
-   }
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
    _mesa_lock_texture(ctx, texObj);
    {
-      texImage = _mesa_select_tex_image(ctx, texObj, target, level);
-
       if (width > 0 && height > 0 && depth > 0) {
          /* If we have a border, offset=-1 is legal.  Bias by border width. */
          switch (dims) {
@@ -3533,6 +3537,162 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
    _mesa_unlock_texture(ctx, texObj);
 }
 
+/**
+ * Implement all the glTexSubImage1/2/3D() functions.
+ * Must split this out this way because of GL_TEXTURE_CUBE_MAP.
+ */
+static void
+texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
+            GLint xoffset, GLint yoffset, GLint zoffset,
+            GLsizei width, GLsizei height, GLsizei depth,
+            GLenum format, GLenum type, const GLvoid *pixels)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   if (texsubimage_error_check(ctx, dims, texObj, target, level,
+                               xoffset, yoffset, zoffset,
+                               width, height, depth, format, type, false)) {
+      return;   /* error was detected */
+   }
+
+   texImage = _mesa_select_tex_image(texObj, target, level);
+   /* texsubimage_error_check ensures that texImage is not NULL */
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx, "glTexSubImage%uD %s %d %d %d %d %d %d %d %s %s %p\n",
+                  dims,
+                  _mesa_lookup_enum_by_nr(target), level,
+                  xoffset, yoffset, zoffset, width, height, depth,
+                  _mesa_lookup_enum_by_nr(format),
+                  _mesa_lookup_enum_by_nr(type), pixels);
+
+   _mesa_texture_sub_image(ctx, dims, texObj, texImage, target, level,
+                           xoffset, yoffset, zoffset, width, height, depth,
+                           format, type, pixels, false);
+}
+
+
+/**
+ * Implement all the glTextureSubImage1/2/3D() functions.
+ * Must split this out this way because of GL_TEXTURE_CUBE_MAP.
+ */
+static void
+texturesubimage(struct gl_context *ctx, GLuint dims,
+                GLuint texture, GLint level,
+                GLint xoffset, GLint yoffset, GLint zoffset,
+                GLsizei width, GLsizei height, GLsizei depth,
+                GLenum format, GLenum type, const GLvoid *pixels)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int i;
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx,
+                  "glTextureSubImage%uD %d %d %d %d %d %d %d %d %s %s %p\n",
+                  dims, texture, level,
+                  xoffset, yoffset, zoffset, width, height, depth,
+                  _mesa_lookup_enum_by_nr(format),
+                  _mesa_lookup_enum_by_nr(type), pixels);
+
+   /* Get the texture object by Name. */
+   texObj = _mesa_lookup_texture(ctx, texture);
+   if (!texObj) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureSubImage%uD(texture)",
+                  dims);
+      return;
+   }
+
+   if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level,
+                               xoffset, yoffset, zoffset,
+                               width, height, depth, format, type, true)) {
+      return;   /* error was detected */
+   }
+
+
+   /* Must handle special case GL_TEXTURE_CUBE_MAP. */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+      GLint rowStride;
+
+      /* Error checking */
+      if (texObj->NumLayers < 6) {
+         /* Not enough image planes for a cube map.  The spec does not say
+          * what should happen in this case because the user has always
+          * specified each cube face separately (using
+          * GL_TEXTURE_CUBE_MAP_POSITIVE_X+i) in previous GL versions.
+          * This is addressed in Khronos Bug 13223.
+          */
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glTextureSubImage%uD(insufficient cube map storage)",
+                     dims);
+         return;
+      }
+
+      /*
+       * What do we do if the user created a texture with the following code
+       * and then called this function with its handle?
+       *
+       *    GLuint tex;
+       *    glCreateTextures(GL_TEXTURE_CUBE_MAP, 1, &tex);
+       *    glBindTexture(GL_TEXTURE_CUBE_MAP, tex);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, ...);
+       *    // Note: GL_TEXTURE_CUBE_MAP_NEGATIVE_Y not set, or given the
+       *    // wrong format, or given the wrong size, etc.
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, ...);
+       *    glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, ...);
+       *
+       * A bug has been filed against the spec for this case.  In the
+       * meantime, we will check for cube completeness.
+       *
+       * According to Section 8.17 Texture Completeness in the OpenGL 4.5
+       * Core Profile spec (30.10.2014):
+       *    "[A] cube map texture is cube complete if the
+       *    following conditions all hold true: The [base level] texture
+       *    images of each of the six cube map faces have identical, positive,
+       *    and square dimensions. The [base level] images were each specified
+       *    with the same internal format."
+       *
+       * It seems reasonable to check for cube completeness of an arbitrary
+       * level here so that the image data has a consistent format and size.
+       */
+      if (!_mesa_cube_level_complete(texObj, level)) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glTextureSubImage%uD(cube map incomplete)",
+                     dims);
+         return;
+      }
+
+      rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height,
+                                           format, type);
+      /* Copy in each face. */
+      for (i = 0; i < 6; ++i) {
+         texImage = texObj->Image[i][level];
+         _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target,
+                                 level, xoffset, yoffset, zoffset,
+                                 width, height, 1, format,
+                                 type, pixels, true);
+         pixels = (GLubyte *) pixels + rowStride;
+      }
+   }
+   else {
+      texImage = _mesa_select_tex_image(texObj, texObj->Target, level);
+      if (!texImage)
+         return;
+
+      _mesa_texture_sub_image(ctx, dims, texObj, texImage, texObj->Target,
+                              level, xoffset, yoffset, zoffset,
+                              width, height, depth, format,
+                              type, pixels, true);
+   }
+}
+
 
 void GLAPIENTRY
 _mesa_TexSubImage1D( GLenum target, GLint level,
@@ -3578,6 +3738,48 @@ _mesa_TexSubImage3D( GLenum target, GLint level,
                format, type, pixels);
 }
 
+void GLAPIENTRY
+_mesa_TextureSubImage1D(GLuint texture, GLint level,
+                        GLint xoffset, GLsizei width,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   texturesubimage(ctx, 1, texture, level,
+                   xoffset, 0, 0,
+                   width, 1, 1,
+                   format, type, pixels);
+}
+
+
+void GLAPIENTRY
+_mesa_TextureSubImage2D(GLuint texture, GLint level,
+                        GLint xoffset, GLint yoffset,
+                        GLsizei width, GLsizei height,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   texturesubimage(ctx, 2, texture, level,
+                   xoffset, yoffset, 0,
+                   width, height, 1,
+                   format, type, pixels);
+}
+
+
+void GLAPIENTRY
+_mesa_TextureSubImage3D(GLuint texture, GLint level,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   texturesubimage(ctx, 3, texture, level,
+                   xoffset, yoffset, zoffset,
+                   width, height, depth,
+                   format, type, pixels);
+}
 
 
 /**
@@ -3811,40 +4013,40 @@ _mesa_CopyTexImage2D( GLenum target, GLint level, GLenum internalFormat,
                 x, y, width, height, border);
 }
 
-
-
 /**
- * Implementation for glCopyTexSubImage1/2/3D() functions.
+ * Implementation for glCopyTex(ture)SubImage1/2/3D() functions.
  */
-static void
-copytexsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
-                GLint xoffset, GLint yoffset, GLint zoffset,
-                GLint x, GLint y, GLsizei width, GLsizei height)
+void
+_mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
+                             struct gl_texture_object *texObj,
+                             GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset, GLint zoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height,
+                             bool dsa)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
 
    FLUSH_VERTICES(ctx, 0);
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-      _mesa_debug(ctx, "glCopyTexSubImage%uD %s %d %d %d %d %d %d %d %d\n",
-                  dims,
+      _mesa_debug(ctx, "glCopyTex%sSubImage%uD %s %d %d %d %d %d %d %d %d\n",
+                  dsa ? "ture" : "", dims,
                   _mesa_lookup_enum_by_nr(target),
                   level, xoffset, yoffset, zoffset, x, y, width, height);
 
    if (ctx->NewState & NEW_COPY_TEX_STATE)
       _mesa_update_state(ctx);
 
-   if (copytexsubimage_error_check(ctx, dims, target, level,
-                                   xoffset, yoffset, zoffset, width, height)) {
+   if (copytexsubimage_error_check(ctx, dims, texObj, target, level,
+                                   xoffset, yoffset, zoffset,
+                                   width, height, dsa)) {
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
    _mesa_lock_texture(ctx, texObj);
    {
-      texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      texImage = _mesa_select_tex_image(texObj, target, level);
 
       /* If we have a border, offset=-1 is legal.  Bias by border width. */
       switch (dims) {
@@ -3879,13 +4081,19 @@ copytexsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
    _mesa_unlock_texture(ctx, texObj);
 }
 
-
 void GLAPIENTRY
 _mesa_CopyTexSubImage1D( GLenum target, GLint level,
                          GLint xoffset, GLint x, GLint y, GLsizei width )
 {
+   struct gl_texture_object* texObj;
    GET_CURRENT_CONTEXT(ctx);
-   copytexsubimage(ctx, 1, target, level, xoffset, 0, 0, x, y, width, 1);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 1, texObj, target, level, xoffset, 0, 0,
+                                x, y, width, 1, false);
 }
 
 
@@ -3895,9 +4103,16 @@ _mesa_CopyTexSubImage2D( GLenum target, GLint level,
                          GLint xoffset, GLint yoffset,
                          GLint x, GLint y, GLsizei width, GLsizei height )
 {
+   struct gl_texture_object* texObj;
    GET_CURRENT_CONTEXT(ctx);
-   copytexsubimage(ctx, 2, target, level, xoffset, yoffset, 0, x, y,
-                   width, height);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 2, texObj, target, level,
+                                xoffset, yoffset, 0,
+                                x, y, width, height, false);
 }
 
 
@@ -3907,9 +4122,67 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level,
                          GLint xoffset, GLint yoffset, GLint zoffset,
                          GLint x, GLint y, GLsizei width, GLsizei height )
 {
+   struct gl_texture_object* texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 3, texObj, target, level,
+                                xoffset, yoffset, zoffset,
+                                x, y, width, height, false);
+}
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage1D(GLuint texture, GLint level,
+                            GLint xoffset, GLint x, GLint y, GLsizei width)
+{
+   struct gl_texture_object* texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, "glCopyTextureSubImage1D");
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 1, texObj, texObj->Target, level,
+                                xoffset, 0, 0, x, y, width, 1, true);
+}
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage2D(GLuint texture, GLint level,
+                            GLint xoffset, GLint yoffset,
+                            GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   struct gl_texture_object* texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, "glCopyTextureSubImage2D");
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 2, texObj, texObj->Target, level,
+                                xoffset, yoffset, 0,
+                                x, y, width, height, true);
+}
+
+
+
+void GLAPIENTRY
+_mesa_CopyTextureSubImage3D(GLuint texture, GLint level,
+                            GLint xoffset, GLint yoffset, GLint zoffset,
+                            GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   struct gl_texture_object* texObj;
    GET_CURRENT_CONTEXT(ctx);
-   copytexsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset,
-                   x, y, width, height);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture, "glCopyTextureSubImage3D");
+   if (!texObj)
+      return;
+
+   _mesa_copy_texture_sub_image(ctx, 3, texObj, texObj->Target, level,
+                                xoffset, yoffset, zoffset,
+                                x, y, width, height, true);
 }
 
 static bool
@@ -4031,7 +4304,7 @@ get_tex_images_for_clear(struct gl_context *ctx,
       for (i = 0; i < MAX_FACES; i++) {
          target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + i;
 
-         texImages[i] = _mesa_select_tex_image(ctx, texObj, target, level);
+         texImages[i] = _mesa_select_tex_image(texObj, target, level);
          if (texImages[i] == NULL) {
             _mesa_error(ctx, GL_INVALID_OPERATION,
                         "%s(invalid level)", function);
@@ -4042,7 +4315,7 @@ get_tex_images_for_clear(struct gl_context *ctx,
       return MAX_FACES;
    }
 
-   texImages[0] = _mesa_select_tex_image(ctx, texObj, texObj->Target, level);
+   texImages[0] = _mesa_select_tex_image(texObj, texObj->Target, level);
 
    if (texImages[0] == NULL) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid level)", function);
@@ -4186,15 +4459,22 @@ out:
  */
 static GLboolean
 compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
+                                  const struct gl_texture_object *texObj,
                                   GLenum target, GLint level,
                                   GLint xoffset, GLint yoffset, GLint zoffset,
                                   GLsizei width, GLsizei height, GLsizei depth,
-                                  GLenum format, GLsizei imageSize)
+                                  GLenum format, GLsizei imageSize, bool dsa)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
    GLint expectedSize;
    GLboolean targetOK;
+   const char *suffix = dsa ? "ture" : "";
+
+   if (dsa && target == GL_TEXTURE_RECTANGLE) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glCompressedSubTexture%dD(target)", dims);
+      return GL_TRUE;
+   }
 
    switch (dims) {
    case 2:
@@ -4214,7 +4494,52 @@ compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
       }
       break;
    case 3:
-      targetOK = (target == GL_TEXTURE_2D_ARRAY);
+      targetOK = (target == GL_TEXTURE_3D) ||
+                 (target == GL_TEXTURE_2D_ARRAY) ||
+                 (target == GL_TEXTURE_CUBE_MAP_ARRAY) ||
+                 (target == GL_TEXTURE_CUBE_MAP && dsa);
+
+      /* OpenGL 4.5 spec (30.10.2014) says in Section 8.7 Compressed Texture
+       * Images:
+       *    "An INVALID_OPERATION error is generated by
+       *    CompressedTex*SubImage3D if the internal format of the texture is
+       *    one of the EAC, ETC2, or RGTC formats and either border is
+       *    non-zero, or the effective target for the texture is not
+       *    TEXTURE_2D_ARRAY."
+       */
+      if (target != GL_TEXTURE_2D_ARRAY) {
+         bool invalidformat;
+         switch (format) {
+            /* These came from _mesa_is_compressed_format in glformats.c. */
+            /* EAC formats */
+            case GL_COMPRESSED_RGBA8_ETC2_EAC:
+            case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+            case GL_COMPRESSED_R11_EAC:
+            case GL_COMPRESSED_RG11_EAC:
+            case GL_COMPRESSED_SIGNED_R11_EAC:
+            case GL_COMPRESSED_SIGNED_RG11_EAC:
+            /* ETC2 formats */
+            case GL_COMPRESSED_RGB8_ETC2:
+            case GL_COMPRESSED_SRGB8_ETC2:
+            case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+            case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+            /* RGTC formats */
+            case GL_COMPRESSED_RED_RGTC1:
+            case GL_COMPRESSED_SIGNED_RED_RGTC1:
+            case GL_COMPRESSED_RG_RGTC2:
+            case GL_COMPRESSED_SIGNED_RG_RGTC2:
+               invalidformat = true;
+               break;
+            default:
+               invalidformat = false;
+         }
+         if (invalidformat) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glCompressedTex%sSubImage%uD(target)", suffix, dims);
+            return GL_TRUE;
+         }
+      }
+
       break;
    default:
       assert(dims == 1);
@@ -4224,68 +4549,67 @@ compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
    }
 
    if (!targetOK) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexSubImage%uD(target)",
-                  dims);
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCompressedTex%sSubImage%uD(target)", suffix, dims);
       return GL_TRUE;
    }
 
    /* this will catch any invalid compressed format token */
    if (!_mesa_is_compressed_format(ctx, format)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glCompressedTexImage%uD(format)",
-                  dims);
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCompressedTex%sSubImage%uD(format)", suffix, dims);
       return GL_TRUE;
    }
 
    if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glCompressedTexSubImage%uD(level=%d)",
-                  dims, level);
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCompressedTex%sSubImage%uD(level=%d)",
+                  suffix, dims, level);
       return GL_TRUE;
    }
 
    /* Check for invalid pixel storage modes */
    if (!_mesa_compressed_pixel_storage_error_check(ctx, dims,
-                                                &ctx->Unpack,
-                                                "glCompressedTexSubImage")) {
+               &ctx->Unpack,
+               dsa ? "glCompressedTextureSubImage" :
+               "glCompressedTexSubImage")) {
       return GL_TRUE;
    }
 
    expectedSize = compressed_tex_size(width, height, depth, format);
    if (expectedSize != imageSize) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glCompressedTexSubImage%uD(size=%d)",
-                  dims, imageSize);
-      return GL_TRUE;
-   }
-
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   if (!texObj) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY,
-                  "glCompressedTexSubImage%uD()", dims);
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCompressedTex%sSubImage%uD(size=%d)",
+                  suffix, dims, imageSize);
       return GL_TRUE;
    }
 
-   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+   texImage = _mesa_select_tex_image(texObj, target, level);
    if (!texImage) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCompressedTexSubImage%uD(invalid texture image)", dims);
+                  "glCompressedTex%sSubImage%uD(invalid texture image)",
+                  suffix, dims);
       return GL_TRUE;
    }
 
    if ((GLint) format != texImage->InternalFormat) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCompressedTexSubImage%uD(format=0x%x)", dims, format);
+                  "glCompressedTex%sSubImage%uD(format=0x%x)",
+                  suffix, dims, format);
       return GL_TRUE;
    }
 
    if (compressedteximage_only_format(ctx, format)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCompressedTexSubImage%uD(format=0x%x cannot be updated)"
-                  , dims, format);
+               "glCompressedTex%sSubImage%uD(format=0x%x cannot be updated)",
+               suffix, dims, format);
       return GL_TRUE;
    }
 
-   if (error_check_subtexture_dimensions(ctx, "glCompressedTexSubImage", dims,
+   if (error_check_subtexture_dimensions(ctx, dims,
                                          texImage, xoffset, yoffset, zoffset,
-                                         width, height, depth)) {
+                                         width, height, depth,
+                                         "glCompressedTexSubImage")) {
       return GL_TRUE;
    }
 
@@ -4330,31 +4654,34 @@ _mesa_CompressedTexImage3D(GLenum target, GLint level,
 
 
 /**
- * Common helper for glCompressedTexSubImage1/2/3D().
+ * Common helper for glCompressedTexSubImage1/2/3D() and
+ * glCompressedTextureSubImage1/2/3D().
  */
-static void
-compressed_tex_sub_image(GLuint dims, GLenum target, GLint level,
-                         GLint xoffset, GLint yoffset, GLint zoffset,
-                         GLsizei width, GLsizei height, GLsizei depth,
-                         GLenum format, GLsizei imageSize, const GLvoid *data)
+void
+_mesa_compressed_texture_sub_image(struct gl_context *ctx, GLuint dims,
+                                   struct gl_texture_object *texObj,
+                                   GLenum target, GLint level,
+                                   GLint xoffset, GLint yoffset,
+                                   GLint zoffset,
+                                   GLsizei width, GLsizei height,
+                                   GLsizei depth,
+                                   GLenum format, GLsizei imageSize,
+                                   const GLvoid *data, bool dsa)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
-   GET_CURRENT_CONTEXT(ctx);
-   FLUSH_VERTICES(ctx, 0);
 
-   if (compressed_subtexture_error_check(ctx, dims, target, level,
-                                         xoffset, yoffset, zoffset,
+   if (compressed_subtexture_error_check(ctx, dims, texObj, target,
+                                         level, xoffset, yoffset, zoffset,
                                          width, height, depth,
-                                         format, imageSize)) {
+                                         format, imageSize, dsa)) {
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
+   FLUSH_VERTICES(ctx, 0);
 
    _mesa_lock_texture(ctx, texObj);
    {
-      texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      texImage = _mesa_select_tex_image(texObj, target, level);
       assert(texImage);
 
       if (width > 0 && height > 0 && depth > 0) {
@@ -4376,33 +4703,116 @@ compressed_tex_sub_image(GLuint dims, GLenum target, GLint level,
 
 void GLAPIENTRY
 _mesa_CompressedTexSubImage1D(GLenum target, GLint level, GLint xoffset,
-                                 GLsizei width, GLenum format,
-                                 GLsizei imageSize, const GLvoid *data)
+                              GLsizei width, GLenum format,
+                              GLsizei imageSize, const GLvoid *data)
 {
-   compressed_tex_sub_image(1, target, level, xoffset, 0, 0, width, 1, 1,
-                            format, imageSize, data);
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 1, texObj, target, level,
+                                      xoffset, 0, 0, width, 1, 1,
+                                      format, imageSize, data, false);
+}
+
+void GLAPIENTRY
+_mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset,
+                                  GLsizei width, GLenum format,
+                                  GLsizei imageSize, const GLvoid *data)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glCompressedTextureSubImage1D");
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 1, texObj, texObj->Target, level,
+                                      xoffset, 0, 0, width, 1, 1,
+                                      format, imageSize, data, true);
 }
 
 
 void GLAPIENTRY
 _mesa_CompressedTexSubImage2D(GLenum target, GLint level, GLint xoffset,
-                                 GLint yoffset, GLsizei width, GLsizei height,
-                                 GLenum format, GLsizei imageSize,
-                                 const GLvoid *data)
+                              GLint yoffset, GLsizei width, GLsizei height,
+                              GLenum format, GLsizei imageSize,
+                              const GLvoid *data)
 {
-   compressed_tex_sub_image(2, target, level, xoffset, yoffset, 0,
-                            width, height, 1, format, imageSize, data);
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 2, texObj, target, level,
+                                      xoffset, yoffset, 0, width, height, 1,
+                                      format, imageSize, data, false);
 }
 
+void GLAPIENTRY
+_mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset,
+                                  GLint yoffset,
+                                  GLsizei width, GLsizei height,
+                                  GLenum format, GLsizei imageSize,
+                                  const GLvoid *data)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                 "glCompressedTextureSubImage2D");
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 2, texObj, texObj->Target, level,
+                                      xoffset, yoffset, 0, width, height, 1,
+                                      format, imageSize, data, true);
+}
 
 void GLAPIENTRY
 _mesa_CompressedTexSubImage3D(GLenum target, GLint level, GLint xoffset,
-                                 GLint yoffset, GLint zoffset, GLsizei width,
-                                 GLsizei height, GLsizei depth, GLenum format,
-                                 GLsizei imageSize, const GLvoid *data)
+                              GLint yoffset, GLint zoffset, GLsizei width,
+                              GLsizei height, GLsizei depth, GLenum format,
+                              GLsizei imageSize, const GLvoid *data)
 {
-   compressed_tex_sub_image(3, target, level, xoffset, yoffset, zoffset,
-                            width, height, depth, format, imageSize, data);
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 3, texObj, target, level,
+                                      xoffset, yoffset, zoffset,
+                                      width, height, depth,
+                                      format, imageSize, data, false);
+}
+
+void GLAPIENTRY
+_mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset,
+                                  GLint yoffset, GLint zoffset, GLsizei width,
+                                  GLsizei height, GLsizei depth,
+                                  GLenum format, GLsizei imageSize,
+                                  const GLvoid *data)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glCompressedTextureSubImage3D");
+   if (!texObj)
+      return;
+
+   _mesa_compressed_texture_sub_image(ctx, 3, texObj, texObj->Target, level,
+                                      xoffset, yoffset, zoffset,
+                                      width, height, depth,
+                                      format, imageSize, data, true);
 }
 
 static mesa_format
@@ -4461,9 +4871,9 @@ get_texbuffer_format(const struct gl_context *ctx, GLenum internalFormat)
       case GL_LUMINANCE_ALPHA8I_EXT:
          return MESA_FORMAT_LA_SINT8;
       case GL_LUMINANCE_ALPHA16I_EXT:
-         return MESA_FORMAT_LA_SINT8;
-      case GL_LUMINANCE_ALPHA32I_EXT:
          return MESA_FORMAT_LA_SINT16;
+      case GL_LUMINANCE_ALPHA32I_EXT:
+         return MESA_FORMAT_LA_SINT32;
       case GL_LUMINANCE_ALPHA8UI_EXT:
          return MESA_FORMAT_LA_UINT8;
       case GL_LUMINANCE_ALPHA16UI_EXT:
@@ -4619,30 +5029,26 @@ _mesa_validate_texbuffer_format(const struct gl_context *ctx,
 }
 
 
-static void
-texbufferrange(struct gl_context *ctx, GLenum target, GLenum internalFormat,
-               struct gl_buffer_object *bufObj,
-               GLintptr offset, GLsizeiptr size)
+void
+_mesa_texture_buffer_range(struct gl_context *ctx,
+                           struct gl_texture_object *texObj, GLenum target, 
+                           GLenum internalFormat,
+                           struct gl_buffer_object *bufObj,
+                           GLintptr offset, GLsizeiptr size, bool range,
+                           bool dsa)
 {
-   struct gl_texture_object *texObj;
    mesa_format format;
 
    FLUSH_VERTICES(ctx, 0);
 
-   if (target != GL_TEXTURE_BUFFER_ARB) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTexBuffer(target)");
-      return;
-   }
-
    format = _mesa_validate_texbuffer_format(ctx, internalFormat);
    if (format == MESA_FORMAT_NONE) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTexBuffer(internalFormat 0x%x)",
-                  internalFormat);
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glTex%sBuffer%s(internalFormat 0x%x)", dsa ? "ture" : "",
+                  range ? "Range" : "", internalFormat);
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
    _mesa_lock_texture(ctx, texObj);
    {
       _mesa_reference_buffer_object(ctx, &texObj->BufferObject, bufObj);
@@ -4665,10 +5071,17 @@ texbufferrange(struct gl_context *ctx, GLenum target, GLenum internalFormat,
 void GLAPIENTRY
 _mesa_TexBuffer(GLenum target, GLenum internalFormat, GLuint buffer)
 {
+   struct gl_texture_object *texObj;
    struct gl_buffer_object *bufObj;
 
    GET_CURRENT_CONTEXT(ctx);
 
+   /* Need to catch this before it gets to _mesa_get_current_tex_object */
+   if (target != GL_TEXTURE_BUFFER_ARB) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTexBuffer(target)");
+      return;
+   }
+
    /* NOTE: ARB_texture_buffer_object has interactions with
     * the compatibility profile that are not implemented.
     */
@@ -4684,7 +5097,12 @@ _mesa_TexBuffer(GLenum target, GLenum internalFormat, GLuint buffer)
       return;
    }
 
-   texbufferrange(ctx, target, internalFormat, bufObj, 0, buffer ? -1 : 0);
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_buffer_range(ctx, texObj, target, internalFormat, bufObj, 0,
+                              buffer ? -1 : 0, false, false);
 }
 
 
@@ -4693,10 +5111,17 @@ void GLAPIENTRY
 _mesa_TexBufferRange(GLenum target, GLenum internalFormat, GLuint buffer,
                      GLintptr offset, GLsizeiptr size)
 {
+   struct gl_texture_object *texObj;
    struct gl_buffer_object *bufObj;
 
    GET_CURRENT_CONTEXT(ctx);
 
+   /* Need to catch this before it gets to _mesa_get_current_tex_object */
+   if (target != GL_TEXTURE_BUFFER_ARB) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTexBufferRange(target)");
+      return;
+   }
+
    if (!(ctx->API == API_OPENGL_CORE &&
          ctx->Extensions.ARB_texture_buffer_range)) {
       _mesa_error(ctx, GL_INVALID_OPERATION, "glTexBufferRange");
@@ -4725,9 +5150,52 @@ _mesa_TexBufferRange(GLenum target, GLenum internalFormat, GLuint buffer,
       size = 0;
    }
 
-   texbufferrange(ctx, target, internalFormat, bufObj, offset, size);
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_buffer_range(ctx, texObj, target, internalFormat, bufObj,
+                              offset, size, true, false);
 }
 
+void GLAPIENTRY
+_mesa_TextureBuffer(GLuint texture, GLenum internalFormat, GLuint buffer)
+{
+   struct gl_texture_object *texObj;
+   struct gl_buffer_object *bufObj;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* NOTE: ARB_texture_buffer_object has interactions with
+    * the compatibility profile that are not implemented.
+    */
+   if (!(ctx->API == API_OPENGL_CORE &&
+         ctx->Extensions.ARB_texture_buffer_object)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureBuffer");
+      return;
+   }
+
+   bufObj = _mesa_lookup_bufferobj(ctx, buffer);
+   if (!bufObj && buffer) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureBuffer(buffer %u)",
+                  buffer);
+      return;
+   }
+
+   /* Get the texture object by Name. */
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glTextureBuffer(texture)");
+   if (!texObj)
+      return;
+
+   if (texObj->Target != GL_TEXTURE_BUFFER_ARB) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTextureBuffer(target)");
+      return;
+   }
+
+   _mesa_texture_buffer_range(ctx, texObj, texObj->Target, internalFormat,
+                              bufObj, 0, buffer ? -1 : 0, false, true);
+}
 
 static GLboolean
 is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
@@ -4742,16 +5210,18 @@ is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
 
 /** GL_ARB_texture_multisample */
 static GLboolean
-check_multisample_target(GLuint dims, GLenum target)
+check_multisample_target(GLuint dims, GLenum target, bool dsa)
 {
    switch(target) {
    case GL_TEXTURE_2D_MULTISAMPLE:
-   case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
       return dims == 2;
+   case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
+      return dims == 2 && !dsa;
 
    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-   case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return dims == 3;
+   case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      return dims == 3 && !dsa;
 
    default:
       return GL_FALSE;
@@ -4759,19 +5229,20 @@ check_multisample_target(GLuint dims, GLenum target)
 }
 
 
-static void
-teximagemultisample(GLuint dims, GLenum target, GLsizei samples,
-                    GLint internalformat, GLsizei width, GLsizei height,
-                    GLsizei depth, GLboolean fixedsamplelocations,
-                    GLboolean immutable, const char *func)
+void
+_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
+                                struct gl_texture_object *texObj,
+                                GLenum target, GLsizei samples,
+                                GLint internalformat, GLsizei width,
+                                GLsizei height, GLsizei depth,
+                                GLboolean fixedsamplelocations,
+                                GLboolean immutable, const char *func)
 {
-   struct gl_texture_object *texObj;
    struct gl_texture_image *texImage;
    GLboolean sizeOK, dimensionsOK, samplesOK;
    mesa_format texFormat;
    GLenum sample_count_error;
-
-   GET_CURRENT_CONTEXT(ctx);
+   bool dsa = strstr(func, "ture") ? true : false;
 
    if (!(ctx->Extensions.ARB_texture_multisample
       && _mesa_is_desktop_gl(ctx))) {
@@ -4779,9 +5250,15 @@ teximagemultisample(GLuint dims, GLenum target, GLsizei samples,
       return;
    }
 
-   if (!check_multisample_target(dims, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", func);
-      return;
+   if (!check_multisample_target(dims, target, dsa)) {
+      if (dsa) {
+         _mesa_error(ctx, GL_INVALID_OPERATION, "%s(target)", func);
+         return;
+      }
+      else {
+         _mesa_error(ctx, GL_INVALID_ENUM, "%s(target)", func);
+         return;
+      }
    }
 
    /* check that the specified internalformat is color/depth/stencil-renderable;
@@ -4819,8 +5296,6 @@ teximagemultisample(GLuint dims, GLenum target, GLsizei samples,
       return;
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-
    if (immutable && (!texObj || (texObj->Name == 0))) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
             "%s(texture object 0)",
@@ -4893,7 +5368,7 @@ teximagemultisample(GLuint dims, GLenum target, GLsizei samples,
          }
       }
 
-      texObj->Immutable = immutable;
+      texObj->Immutable |= immutable;
 
       if (immutable) {
          _mesa_set_texture_view_state(ctx, texObj, target, 1);
@@ -4909,9 +5384,17 @@ _mesa_TexImage2DMultisample(GLenum target, GLsizei samples,
                             GLenum internalformat, GLsizei width,
                             GLsizei height, GLboolean fixedsamplelocations)
 {
-   teximagemultisample(2, target, samples, internalformat,
-                       width, height, 1, fixedsamplelocations, GL_FALSE,
-                       "glTexImage2DMultisample");
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 2, texObj, target, samples,
+                                   internalformat, width, height, 1,
+                                   fixedsamplelocations, GL_FALSE,
+                                   "glTexImage2DMultisample");
 }
 
 
@@ -4921,9 +5404,17 @@ _mesa_TexImage3DMultisample(GLenum target, GLsizei samples,
                             GLsizei height, GLsizei depth,
                             GLboolean fixedsamplelocations)
 {
-   teximagemultisample(3, target, samples, internalformat,
-                       width, height, depth, fixedsamplelocations, GL_FALSE,
-                       "glTexImage3DMultisample");
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 3, texObj, target, samples,
+                                   internalformat, width, height, depth,
+                                   fixedsamplelocations, GL_FALSE,
+                                   "glTexImage3DMultisample");
 }
 
 
@@ -4932,9 +5423,17 @@ _mesa_TexStorage2DMultisample(GLenum target, GLsizei samples,
                               GLenum internalformat, GLsizei width,
                               GLsizei height, GLboolean fixedsamplelocations)
 {
-   teximagemultisample(2, target, samples, internalformat,
-                       width, height, 1, fixedsamplelocations, GL_TRUE,
-                       "glTexStorage2DMultisample");
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 2, texObj, target, samples,
+                                   internalformat, width, height, 1,
+                                   fixedsamplelocations, GL_TRUE,
+                                   "glTexStorage2DMultisample");
 }
 
 void GLAPIENTRY
@@ -4943,7 +5442,56 @@ _mesa_TexStorage3DMultisample(GLenum target, GLsizei samples,
                               GLsizei height, GLsizei depth,
                               GLboolean fixedsamplelocations)
 {
-   teximagemultisample(3, target, samples, internalformat,
-                       width, height, depth, fixedsamplelocations, GL_TRUE,
-                       "glTexStorage3DMultisample");
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 3, texObj, target, samples,
+                                   internalformat, width, height, depth,
+                                   fixedsamplelocations, GL_TRUE,
+                                   "glTexStorage3DMultisample");
+}
+
+void GLAPIENTRY
+_mesa_TextureStorage2DMultisample(GLuint texture, GLsizei samples,
+                                  GLenum internalformat, GLsizei width,
+                                  GLsizei height,
+                                  GLboolean fixedsamplelocations)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glTextureStorage2DMultisample");
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 2, texObj, texObj->Target, samples,
+                                   internalformat, width, height, 1,
+                                   fixedsamplelocations, GL_TRUE,
+                                   "glTextureStorage2DMultisample");
+}
+
+void GLAPIENTRY
+_mesa_TextureStorage3DMultisample(GLuint texture, GLsizei samples,
+                                  GLenum internalformat, GLsizei width,
+                                  GLsizei height, GLsizei depth,
+                                  GLboolean fixedsamplelocations)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* Get the texture object by Name. */
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glTextureStorage3DMultisample");
+   if (!texObj)
+      return;
+
+   _mesa_texture_image_multisample(ctx, 3, texObj, texObj->Target, samples,
+                                   internalformat, width, height, depth,
+                                   fixedsamplelocations, GL_TRUE,
+                                   "glTextureStorage3DMultisample");
 }
diff --git a/mesalib/src/mesa/main/teximage.h b/mesalib/src/mesa/main/teximage.h
index 4b27381a0..02b0eda38 100644
--- a/mesalib/src/mesa/main/teximage.h
+++ b/mesalib/src/mesa/main/teximage.h
@@ -47,7 +47,7 @@ _mesa_is_cube_face(GLenum target)
            target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB);
 }
 
-/** Is any of the dimensions of given texture equal to zero? */
+/** Are any of the dimensions of given texture equal to zero? */
 static inline GLboolean
 _mesa_is_zero_size_texture(const struct gl_texture_image *texImage)
 {
@@ -99,13 +99,8 @@ _mesa_clear_texture_image(struct gl_context *ctx,
                           struct gl_texture_image *texImage);
 
 
-extern struct gl_texture_object *
-_mesa_get_current_tex_object(struct gl_context *ctx, GLenum target);
-
-
 extern struct gl_texture_image *
-_mesa_select_tex_image(struct gl_context *ctx,
-                       const struct gl_texture_object *texObj,
+_mesa_select_tex_image(const struct gl_texture_object *texObj,
                        GLenum target, GLint level);
 
 
@@ -114,6 +109,16 @@ _mesa_get_tex_image(struct gl_context *ctx, struct gl_texture_object *texObj,
                     GLenum target, GLint level);
 
 
+/**
+ * Return the base-level texture image for the given texture object.
+ */
+static inline const struct gl_texture_image *
+_mesa_base_tex_image(const struct gl_texture_object *texObj)
+{
+   return texObj->Image[0][texObj->BaseLevel];
+}
+
+
 extern GLint
 _mesa_max_texture_levels(struct gl_context *ctx, GLenum target);
 
@@ -160,24 +165,51 @@ _mesa_legal_texture_base_format_for_target(struct gl_context *ctx,
                                            unsigned dimensions,
                                            const char *caller);
 
-/**
- * Lock a texture for updating.  See also _mesa_lock_context_textures().
- */
-static inline void
-_mesa_lock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
-{
-   mtx_lock(&ctx->Shared->TexMutex);
-   ctx->Shared->TextureStateStamp++;
-   (void) texObj;
-}
+extern void
+_mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
+                        struct gl_texture_object *texObj,
+                        struct gl_texture_image *texImage,
+                        GLenum target, GLint level,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type, const GLvoid *pixels,
+                        bool dsa);
 
-static inline void
-_mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
-{
-   (void) texObj;
-   mtx_unlock(&ctx->Shared->TexMutex);
-}
+extern void
+_mesa_compressed_texture_sub_image(struct gl_context *ctx, GLuint dims, 
+                                   struct gl_texture_object *texObj, 
+                                   GLenum target, GLint level,
+                                   GLint xoffset, GLint yoffset,
+                                   GLint zoffset,
+                                   GLsizei width, GLsizei height,
+                                   GLsizei depth,
+                                   GLenum format, GLsizei imageSize,
+                                   const GLvoid *data, bool dsa);
+
+extern void
+_mesa_copy_texture_sub_image(struct gl_context *ctx, GLuint dims,
+                             struct gl_texture_object *texObj,
+                             GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset, GLint zoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height, bool dsa);
+
+extern void
+_mesa_texture_image_multisample(struct gl_context *ctx, GLuint dims,
+                                struct gl_texture_object *texObj,
+                                GLenum target, GLsizei samples,
+                                GLint internalformat, GLsizei width,
+                                GLsizei height, GLsizei depth,
+                                GLboolean fixedsamplelocations,
+                                GLboolean immutable, const char *func);
 
+extern void
+_mesa_texture_buffer_range(struct gl_context *ctx,
+                           struct gl_texture_object *texObj, GLenum target, 
+                           GLenum internalFormat,
+                           struct gl_buffer_object *bufObj,
+                           GLintptr offset, GLsizeiptr size, bool range,
+                           bool dsa);
 /*@}*/
 
 
@@ -233,10 +265,31 @@ _mesa_TexSubImage3D( GLenum target, GLint level,
                      GLenum format, GLenum type,
                      const GLvoid *pixels );
 
+extern void GLAPIENTRY
+_mesa_TextureSubImage1D(GLuint texture, GLint level, GLint xoffset,
+                        GLsizei width,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels);
+
 
 extern void GLAPIENTRY
-_mesa_CopyTexImage1D( GLenum target, GLint level, GLenum internalformat,
-                      GLint x, GLint y, GLsizei width, GLint border );
+_mesa_TextureSubImage2D(GLuint texture, GLint level,
+                        GLint xoffset, GLint yoffset,
+                        GLsizei width, GLsizei height,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels);
+
+extern void GLAPIENTRY
+_mesa_TextureSubImage3D(GLuint texture, GLint level,
+                        GLint xoffset, GLint yoffset, GLint zoffset,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type,
+                        const GLvoid *pixels);
+
+
+extern void GLAPIENTRY
+_mesa_CopyTexImage1D(GLenum target, GLint level, GLenum internalformat,
+                     GLint x, GLint y, GLsizei width, GLint border);
 
 
 extern void GLAPIENTRY
@@ -261,7 +314,21 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level,
                          GLint xoffset, GLint yoffset, GLint zoffset,
                          GLint x, GLint y, GLsizei width, GLsizei height );
 
+extern void GLAPIENTRY
+_mesa_CopyTextureSubImage1D(GLuint texture, GLint level,
+                            GLint xoffset, GLint x, GLint y, GLsizei width);
 
+extern void GLAPIENTRY
+_mesa_CopyTextureSubImage2D(GLuint texture, GLint level,
+                            GLint xoffset, GLint yoffset,
+                            GLint x, GLint y,
+                            GLsizei width, GLsizei height);
+
+extern void GLAPIENTRY
+_mesa_CopyTextureSubImage3D(GLuint texture, GLint level,
+                            GLint xoffset, GLint yoffset, GLint zoffset,
+                            GLint x, GLint y,
+                            GLsizei width, GLsizei height);
 
 extern void GLAPIENTRY
 _mesa_ClearTexSubImage( GLuint texture, GLint level,
@@ -297,17 +364,36 @@ _mesa_CompressedTexSubImage1D(GLenum target, GLint level, GLint xoffset,
                                  GLsizei imageSize, const GLvoid *data);
 
 extern void GLAPIENTRY
+_mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset,
+                                  GLsizei width, GLenum format,
+                                  GLsizei imageSize, const GLvoid *data);
+
+extern void GLAPIENTRY
 _mesa_CompressedTexSubImage2D(GLenum target, GLint level, GLint xoffset,
                                  GLint yoffset, GLsizei width, GLsizei height,
                                  GLenum format, GLsizei imageSize,
                                  const GLvoid *data);
 
 extern void GLAPIENTRY
+_mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset,
+                                  GLint yoffset,
+                                  GLsizei width, GLsizei height,
+                                  GLenum format, GLsizei imageSize,
+                                  const GLvoid *data);
+
+extern void GLAPIENTRY
 _mesa_CompressedTexSubImage3D(GLenum target, GLint level, GLint xoffset,
                                  GLint yoffset, GLint zoffset, GLsizei width,
                                  GLsizei height, GLsizei depth, GLenum format,
                                  GLsizei imageSize, const GLvoid *data);
 
+extern void GLAPIENTRY
+_mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset,
+                                  GLint yoffset, GLint zoffset,
+                                  GLsizei width, GLsizei height,
+                                  GLsizei depth,
+                                  GLenum format, GLsizei imageSize,
+                                  const GLvoid *data);
 
 extern void GLAPIENTRY
 _mesa_TexBuffer(GLenum target, GLenum internalFormat, GLuint buffer);
@@ -316,6 +402,9 @@ extern void GLAPIENTRY
 _mesa_TexBufferRange(GLenum target, GLenum internalFormat, GLuint buffer,
                      GLintptr offset, GLsizeiptr size);
 
+extern void GLAPIENTRY
+_mesa_TextureBuffer(GLuint texture, GLenum internalFormat, GLuint buffer);
+
 
 extern void GLAPIENTRY
 _mesa_TexImage2DMultisample(GLenum target, GLsizei samples,
@@ -339,6 +428,17 @@ _mesa_TexStorage3DMultisample(GLenum target, GLsizei samples,
                               GLsizei height, GLsizei depth,
                               GLboolean fixedsamplelocations);
 
+void GLAPIENTRY
+_mesa_TextureStorage2DMultisample(GLuint texture, GLsizei samples,
+                                  GLenum internalformat, GLsizei width,
+                                  GLsizei height,
+                                  GLboolean fixedsamplelocations);
+
+void GLAPIENTRY
+_mesa_TextureStorage3DMultisample(GLuint texture, GLsizei samples,
+                                  GLenum internalformat, GLsizei width,
+                                  GLsizei height, GLsizei depth,
+                                  GLboolean fixedsamplelocations);
 /*@}*/
 
 #ifdef __cplusplus
diff --git a/mesalib/src/mesa/main/texobj.c b/mesalib/src/mesa/main/texobj.c
index f0ff605fc..59090db4e 100644
--- a/mesalib/src/mesa/main/texobj.c
+++ b/mesalib/src/mesa/main/texobj.c
@@ -49,6 +49,54 @@
 /** \name Internal functions */
 /*@{*/
 
+/**
+ * This function checks for all valid combinations of Min and Mag filters for
+ * Float types, when extensions like OES_texture_float and
+ * OES_texture_float_linear are supported. OES_texture_float mentions support
+ * for NEAREST, NEAREST_MIPMAP_NEAREST magnification and minification filters.
+ * Mag filters like LINEAR and min filters like NEAREST_MIPMAP_LINEAR,
+ * LINEAR_MIPMAP_NEAREST and LINEAR_MIPMAP_LINEAR are only valid in case
+ * OES_texture_float_linear is supported.
+ *
+ * Returns true in case the filter is valid for given Float type else false.
+ */
+static bool
+valid_filter_for_float(const struct gl_context *ctx,
+                       const struct gl_texture_object *obj)
+{
+   switch (obj->Sampler.MagFilter) {
+   case GL_LINEAR:
+      if (obj->_IsHalfFloat && !ctx->Extensions.OES_texture_half_float_linear) {
+         return false;
+      } else if (obj->_IsFloat && !ctx->Extensions.OES_texture_float_linear) {
+         return false;
+      }
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+      break;
+   default:
+      unreachable("Invalid mag filter");
+   }
+
+   switch (obj->Sampler.MinFilter) {
+   case GL_LINEAR:
+   case GL_NEAREST_MIPMAP_LINEAR:
+   case GL_LINEAR_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      if (obj->_IsHalfFloat && !ctx->Extensions.OES_texture_half_float_linear) {
+         return false;
+      } else if (obj->_IsFloat && !ctx->Extensions.OES_texture_float_linear) {
+         return false;
+      }
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+      break;
+   default:
+      unreachable("Invalid min filter");
+   }
+
+   return true;
+}
 
 /**
  * Return the gl_texture_object for a given ID.
@@ -60,6 +108,22 @@ _mesa_lookup_texture(struct gl_context *ctx, GLuint id)
       _mesa_HashLookup(ctx->Shared->TexObjects, id);
 }
 
+/**
+ * Wrapper around _mesa_lookup_texture that throws GL_INVALID_OPERATION if id
+ * is not in the hash table. After calling _mesa_error, it returns NULL.
+ */
+struct gl_texture_object *
+_mesa_lookup_texture_err(struct gl_context *ctx, GLuint id, const char* func)
+{
+   struct gl_texture_object *texObj;
+
+   texObj = _mesa_lookup_texture(ctx, id); /* Returns NULL if not found. */
+
+   if (!texObj)
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(texture)", func);
+
+   return texObj;
+}
 
 void
 _mesa_begin_texture_lookups(struct gl_context *ctx)
@@ -82,6 +146,87 @@ _mesa_lookup_texture_locked(struct gl_context *ctx, GLuint id)
       _mesa_HashLookupLocked(ctx->Shared->TexObjects, id);
 }
 
+/**
+ * Return a pointer to the current texture object for the given target
+ * on the current texture unit.
+ * Note: all <target> error checking should have been done by this point.
+ */
+struct gl_texture_object *
+_mesa_get_current_tex_object(struct gl_context *ctx, GLenum target)
+{
+   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+   const GLboolean arrayTex = ctx->Extensions.EXT_texture_array;
+
+   switch (target) {
+      case GL_TEXTURE_1D:
+         return texUnit->CurrentTex[TEXTURE_1D_INDEX];
+      case GL_PROXY_TEXTURE_1D:
+         return ctx->Texture.ProxyTex[TEXTURE_1D_INDEX];
+      case GL_TEXTURE_2D:
+         return texUnit->CurrentTex[TEXTURE_2D_INDEX];
+      case GL_PROXY_TEXTURE_2D:
+         return ctx->Texture.ProxyTex[TEXTURE_2D_INDEX];
+      case GL_TEXTURE_3D:
+         return texUnit->CurrentTex[TEXTURE_3D_INDEX];
+      case GL_PROXY_TEXTURE_3D:
+         return ctx->Texture.ProxyTex[TEXTURE_3D_INDEX];
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+      case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+      case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      case GL_TEXTURE_CUBE_MAP_ARB:
+         return ctx->Extensions.ARB_texture_cube_map
+                ? texUnit->CurrentTex[TEXTURE_CUBE_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_CUBE_MAP_ARB:
+         return ctx->Extensions.ARB_texture_cube_map
+                ? ctx->Texture.ProxyTex[TEXTURE_CUBE_INDEX] : NULL;
+      case GL_TEXTURE_CUBE_MAP_ARRAY:
+         return ctx->Extensions.ARB_texture_cube_map_array
+                ? texUnit->CurrentTex[TEXTURE_CUBE_ARRAY_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
+         return ctx->Extensions.ARB_texture_cube_map_array
+                ? ctx->Texture.ProxyTex[TEXTURE_CUBE_ARRAY_INDEX] : NULL;
+      case GL_TEXTURE_RECTANGLE_NV:
+         return ctx->Extensions.NV_texture_rectangle
+                ? texUnit->CurrentTex[TEXTURE_RECT_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_RECTANGLE_NV:
+         return ctx->Extensions.NV_texture_rectangle
+                ? ctx->Texture.ProxyTex[TEXTURE_RECT_INDEX] : NULL;
+      case GL_TEXTURE_1D_ARRAY_EXT:
+         return arrayTex ? texUnit->CurrentTex[TEXTURE_1D_ARRAY_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_1D_ARRAY_EXT:
+         return arrayTex ? ctx->Texture.ProxyTex[TEXTURE_1D_ARRAY_INDEX] : NULL;
+      case GL_TEXTURE_2D_ARRAY_EXT:
+         return arrayTex ? texUnit->CurrentTex[TEXTURE_2D_ARRAY_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_2D_ARRAY_EXT:
+         return arrayTex ? ctx->Texture.ProxyTex[TEXTURE_2D_ARRAY_INDEX] : NULL;
+      case GL_TEXTURE_BUFFER:
+         return ctx->API == API_OPENGL_CORE &&
+                ctx->Extensions.ARB_texture_buffer_object ?
+                texUnit->CurrentTex[TEXTURE_BUFFER_INDEX] : NULL;
+      case GL_TEXTURE_EXTERNAL_OES:
+         return _mesa_is_gles(ctx) && ctx->Extensions.OES_EGL_image_external
+            ? texUnit->CurrentTex[TEXTURE_EXTERNAL_INDEX] : NULL;
+      case GL_TEXTURE_2D_MULTISAMPLE:
+         return ctx->Extensions.ARB_texture_multisample
+            ? texUnit->CurrentTex[TEXTURE_2D_MULTISAMPLE_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
+         return ctx->Extensions.ARB_texture_multisample
+            ? ctx->Texture.ProxyTex[TEXTURE_2D_MULTISAMPLE_INDEX] : NULL;
+      case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+         return ctx->Extensions.ARB_texture_multisample
+            ? texUnit->CurrentTex[TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX] : NULL;
+      case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
+         return ctx->Extensions.ARB_texture_multisample
+            ? ctx->Texture.ProxyTex[TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX] : NULL;
+      default:
+         _mesa_problem(NULL, "bad target in _mesa_get_current_tex_object()");
+         return NULL;
+   }
+}
+
 
 /**
  * Allocate and initialize a new texture object.  But don't put it into the
@@ -311,6 +456,8 @@ _mesa_copy_texture_object( struct gl_texture_object *dest,
    dest->_MipmapComplete = src->_MipmapComplete;
    COPY_4V(dest->Swizzle, src->Swizzle);
    dest->_Swizzle = src->_Swizzle;
+   dest->_IsHalfFloat = src->_IsHalfFloat;
+   dest->_IsFloat = src->_IsFloat;
 
    dest->RequiredTextureImageUnits = src->RequiredTextureImageUnits;
 }
@@ -405,6 +552,9 @@ _mesa_reference_texobj_(struct gl_texture_object **ptr,
       mtx_unlock(&oldTex->Mutex);
 
       if (deleteFlag) {
+         /* Passing in the context drastically changes the driver code for
+          * framebuffer deletion.
+          */
          GET_CURRENT_CONTEXT(ctx);
          if (ctx)
             ctx->Driver.DeleteTexture(ctx, oldTex);
@@ -541,6 +691,14 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
       t->_IsIntegerFormat = datatype == GL_INT || datatype == GL_UNSIGNED_INT;
    }
 
+   /* Check if the texture type is Float or HalfFloatOES and ensure Min and Mag
+    * filters are supported in this case.
+    */
+   if (_mesa_is_gles(ctx) && !valid_filter_for_float(ctx, t)) {
+      incomplete(t, BASE, "Filter is not supported with Float types.");
+      return;
+   }
+
    /* Compute _MaxLevel (the maximum mipmap level we'll sample from given the
     * mipmap image sizes and GL_TEXTURE_MAX_LEVEL state).
     */
@@ -710,25 +868,21 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
 }
 
 
-/**
- * Check if the given cube map texture is "cube complete" as defined in
- * the OpenGL specification.
- */
 GLboolean
-_mesa_cube_complete(const struct gl_texture_object *texObj)
+_mesa_cube_level_complete(const struct gl_texture_object *texObj,
+                          const GLint level)
 {
-   const GLint baseLevel = texObj->BaseLevel;
    const struct gl_texture_image *img0, *img;
    GLuint face;
 
    if (texObj->Target != GL_TEXTURE_CUBE_MAP)
       return GL_FALSE;
 
-   if ((baseLevel < 0) || (baseLevel >= MAX_TEXTURE_LEVELS))
+   if ((level < 0) || (level >= MAX_TEXTURE_LEVELS))
       return GL_FALSE;
 
    /* check first face */
-   img0 = texObj->Image[0][baseLevel];
+   img0 = texObj->Image[0][level];
    if (!img0 ||
        img0->Width < 1 ||
        img0->Width != img0->Height)
@@ -736,7 +890,7 @@ _mesa_cube_complete(const struct gl_texture_object *texObj)
 
    /* check remaining faces vs. first face */
    for (face = 1; face < 6; face++) {
-      img = texObj->Image[face][baseLevel];
+      img = texObj->Image[face][level];
       if (!img ||
           img->Width != img0->Width ||
           img->Height != img0->Height ||
@@ -747,6 +901,15 @@ _mesa_cube_complete(const struct gl_texture_object *texObj)
    return GL_TRUE;
 }
 
+/**
+ * Check if the given cube map texture is "cube complete" as defined in
+ * the OpenGL specification.
+ */
+GLboolean
+_mesa_cube_complete(const struct gl_texture_object *texObj)
+{
+   return _mesa_cube_level_complete(texObj, texObj->BaseLevel);
+}
 
 /**
  * Mark a texture object dirty.  It forces the object to be incomplete
@@ -954,6 +1117,20 @@ _mesa_total_texture_memory(struct gl_context *ctx)
 }
 
 
+/**
+ * Return the base format for the given texture object by looking
+ * at the base texture image.
+ * \return base format (such as GL_RGBA) or GL_NONE if it can't be determined
+ */
+GLenum
+_mesa_texture_base_format(const struct gl_texture_object *texObj)
+{
+   const struct gl_texture_image *texImage = _mesa_base_tex_image(texObj);
+
+   return texImage ? texImage->_BaseFormat : GL_NONE;
+}
+
+
 static struct gl_texture_object *
 invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
                                  GLint level, const char *name)
@@ -1007,38 +1184,46 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture,
    return t;
 }
 
-/*@}*/
+/**
+ * Wrapper for the driver function. Need this because _mesa_new_texture_object
+ * permits a target of 0 and does not initialize targetIndex.
+ */
+struct gl_texture_object *
+_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
+{
+      struct gl_texture_object *texObj = NULL;
+      GLint targetIndex;
 
+      if (target == 0)
+         return texObj;
 
-/***********************************************************************/
-/** \name API functions */
-/*@{*/
+      texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
+      targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
+      assert(targetIndex < NUM_TEXTURE_TARGETS);
+      texObj->TargetIndex = targetIndex;
 
+      return texObj;
+}
 
 /**
- * Generate texture names.
- *
- * \param n number of texture names to be generated.
- * \param textures an array in which will hold the generated texture names.
- *
- * \sa glGenTextures().
- *
- * Calls _mesa_HashFindFreeKeyBlock() to find a block of free texture
- * IDs which are stored in \p textures.  Corresponding empty texture
- * objects are also generated.
+ * Helper function for glCreateTextures and glGenTextures. Need this because
+ * glCreateTextures should throw errors if target = 0. This is not exposed to
+ * the rest of Mesa to encourage Mesa internals to use nameless textures,
+ * which do not require expensive hash lookups.
  */
-void GLAPIENTRY
-_mesa_GenTextures( GLsizei n, GLuint *textures )
+static void
+create_textures(struct gl_context *ctx, GLenum target,
+                GLsizei n, GLuint *textures, bool dsa)
 {
-   GET_CURRENT_CONTEXT(ctx);
    GLuint first;
    GLint i;
+   const char *func = dsa ? "Create" : "Gen";
 
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-      _mesa_debug(ctx, "glGenTextures %d\n", n);
+      _mesa_debug(ctx, "gl%sTextures %d\n", func, n);
 
    if (n < 0) {
-      _mesa_error( ctx, GL_INVALID_VALUE, "glGenTextures" );
+      _mesa_error( ctx, GL_INVALID_VALUE, "gl%sTextures(n < 0)", func );
       return;
    }
 
@@ -1055,15 +1240,28 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
    /* Allocate new, empty texture objects */
    for (i = 0; i < n; i++) {
       struct gl_texture_object *texObj;
+      GLint targetIndex;
       GLuint name = first + i;
-      GLenum target = 0;
       texObj = ctx->Driver.NewTextureObject(ctx, name, target);
       if (!texObj) {
          mtx_unlock(&ctx->Shared->Mutex);
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGenTextures");
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "gl%sTextures", func);
          return;
       }
 
+      /* Initialize the target index if target is non-zero. */
+      if (target != 0) {
+         targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
+         if (targetIndex < 0) { /* Bad Target */
+            mtx_unlock(&ctx->Shared->Mutex);
+            _mesa_error(ctx, GL_INVALID_ENUM, "gl%sTextures(target = %s)",
+                        func, _mesa_lookup_enum_by_nr(texObj->Target));
+            return;
+         }
+         assert(targetIndex < NUM_TEXTURE_TARGETS);
+         texObj->TargetIndex = targetIndex;
+      }
+
       /* insert into hash table */
       _mesa_HashInsert(ctx->Shared->TexObjects, texObj->Name, texObj);
 
@@ -1073,6 +1271,65 @@ _mesa_GenTextures( GLsizei n, GLuint *textures )
    mtx_unlock(&ctx->Shared->Mutex);
 }
 
+/*@}*/
+
+
+/***********************************************************************/
+/** \name API functions */
+/*@{*/
+
+
+/**
+ * Generate texture names.
+ *
+ * \param n number of texture names to be generated.
+ * \param textures an array in which will hold the generated texture names.
+ *
+ * \sa glGenTextures(), glCreateTextures().
+ *
+ * Calls _mesa_HashFindFreeKeyBlock() to find a block of free texture
+ * IDs which are stored in \p textures.  Corresponding empty texture
+ * objects are also generated.
+ */
+void GLAPIENTRY
+_mesa_GenTextures(GLsizei n, GLuint *textures)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   create_textures(ctx, 0, n, textures, false);
+}
+
+/**
+ * Create texture objects.
+ *
+ * \param target the texture target for each name to be generated.
+ * \param n number of texture names to be generated.
+ * \param textures an array in which will hold the generated texture names.
+ *
+ * \sa glCreateTextures(), glGenTextures().
+ *
+ * Calls _mesa_HashFindFreeKeyBlock() to find a block of free texture
+ * IDs which are stored in \p textures.  Corresponding empty texture
+ * objects are also generated.
+ */
+void GLAPIENTRY
+_mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures)
+{
+   GLint targetIndex;
+   GET_CURRENT_CONTEXT(ctx);
+
+   /*
+    * The 4.5 core profile spec (30.10.2014) doesn't specify what
+    * glCreateTextures should do with invalid targets, which was probably an
+    * oversight.  This conforms to the spec for glBindTexture.
+    */
+   targetIndex = _mesa_tex_target_to_index(ctx, target);
+   if (targetIndex < 0) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glCreateTextures(target)");
+      return;
+   }
+
+   create_textures(ctx, target, n, textures, true);
+}
 
 /**
  * Check if the given texture object is bound to the current draw or
@@ -1207,8 +1464,18 @@ _mesa_DeleteTextures( GLsizei n, const GLuint *textures)
    if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
       _mesa_debug(ctx, "glDeleteTextures %d\n", n);
 
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteTextures(n < 0)");
+      return;
+   }
+
    FLUSH_VERTICES(ctx, 0); /* too complex */
 
+   if (n < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteTextures(n)");
+      return;
+   }
+
    if (!textures)
       return;
 
@@ -1257,6 +1524,47 @@ _mesa_DeleteTextures( GLsizei n, const GLuint *textures)
    }
 }
 
+/**
+ * This deletes a texObj without altering the hash table.
+ */
+void
+_mesa_delete_nameless_texture(struct gl_context *ctx,
+                              struct gl_texture_object *texObj)
+{
+   if (!texObj)
+      return;
+
+   FLUSH_VERTICES(ctx, 0);
+
+   _mesa_lock_texture(ctx, texObj);
+   {
+      /* Check if texture is bound to any framebuffer objects.
+       * If so, unbind.
+       * See section 4.4.2.3 of GL_EXT_framebuffer_object.
+       */
+      unbind_texobj_from_fbo(ctx, texObj);
+
+      /* Check if this texture is currently bound to any texture units.
+       * If so, unbind it.
+       */
+      unbind_texobj_from_texunits(ctx, texObj);
+
+      /* Check if this texture is currently bound to any shader
+       * image unit.  If so, unbind it.
+       * See section 3.9.X of GL_ARB_shader_image_load_store.
+       */
+      unbind_texobj_from_image_units(ctx, texObj);
+   }
+   _mesa_unlock_texture(ctx, texObj);
+
+   ctx->NewState |= _NEW_TEXTURE;
+
+   /* Unreference the texobj.  If refcount hits zero, the texture
+    * will be deleted.
+    */
+   _mesa_reference_texobj(&texObj, NULL);
+}
+
 
 /**
  * Convert a GL texture target enum such as GL_TEXTURE_2D or GL_TEXTURE_3D
@@ -1428,6 +1736,107 @@ _mesa_BindTexture( GLenum target, GLuint texName )
       ctx->Driver.BindTexture(ctx, ctx->Texture.CurrentUnit, target, newTexObj);
 }
 
+/**
+ * Do the actual binding to a numbered texture unit.
+ * The refcount on the previously bound
+ * texture object will be decremented.  It'll be deleted if the
+ * count hits zero.
+ */
+void
+_mesa_bind_texture_unit(struct gl_context *ctx,
+                        GLuint unit,
+                        struct gl_texture_object *texObj)
+{
+   struct gl_texture_unit *texUnit;
+
+   /* Get the texture unit (this is an array look-up) */
+   texUnit = _mesa_get_tex_unit_err(ctx, unit, "glBindTextureUnit");
+   if (!texUnit)
+      return;
+
+   /* Check if this texture is only used by this context and is already bound.
+    * If so, just return.
+    */
+   {
+      bool early_out;
+      mtx_lock(&ctx->Shared->Mutex);
+      early_out = ((ctx->Shared->RefCount == 1)
+                   && (texObj == texUnit->CurrentTex[texObj->TargetIndex]));
+      mtx_unlock(&ctx->Shared->Mutex);
+      if (early_out) {
+         return;
+      }
+   }
+
+   /* flush before changing binding */
+   FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+
+   _mesa_reference_texobj(&texUnit->CurrentTex[texObj->TargetIndex],
+                          texObj);
+   ASSERT(texUnit->CurrentTex[texObj->TargetIndex]);
+   ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
+                                         unit + 1);
+   texUnit->_BoundTextures |= (1 << texObj->TargetIndex);
+
+
+   /* Pass BindTexture call to device driver */
+   if (ctx->Driver.BindTexture) {
+      ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
+   }
+}
+
+/**
+ * Bind a named texture to the specified texture unit.
+ *
+ * \param unit texture unit.
+ * \param texture texture name.
+ *
+ * \sa glBindTexture().
+ *
+ * If the named texture is 0, this will reset each target for the specified
+ * texture unit to its default texture.
+ * If the named texture is not 0 or a recognized texture name, this throws
+ * GL_INVALID_OPERATION.
+ */
+void GLAPIENTRY
+_mesa_BindTextureUnit(GLuint unit, GLuint texture)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_texture_object *texObj;
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx, "glBindTextureUnit %s %d\n",
+                  _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit), (GLint) texture);
+
+   /* Section 8.1 (Texture Objects) of the OpenGL 4.5 core profile spec
+    * (20141030) says:
+    *    "When texture is zero, each of the targets enumerated at the
+    *    beginning of this section is reset to its default texture for the
+    *    corresponding texture image unit."
+    */
+   if (texture == 0) {
+      unbind_textures_from_unit(ctx, unit);
+      return;
+   }
+
+   /* Get the non-default texture object */
+   texObj = _mesa_lookup_texture(ctx, texture);
+
+   /* Error checking */
+   if (!texObj) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+         "glBindTextureUnit(non-gen name)");
+      return;
+   }
+   if (texObj->Target == 0) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glBindTextureUnit(target)");
+      return;
+   }
+   assert(valid_texture_object(texObj));
+
+   _mesa_bind_texture_unit(ctx, unit, texObj);
+}
+
 
 void GLAPIENTRY
 _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
diff --git a/mesalib/src/mesa/main/texobj.h b/mesalib/src/mesa/main/texobj.h
index efcd7661e..ec5ccb276 100644
--- a/mesalib/src/mesa/main/texobj.h
+++ b/mesalib/src/mesa/main/texobj.h
@@ -51,6 +51,9 @@ extern "C" {
 extern struct gl_texture_object *
 _mesa_lookup_texture(struct gl_context *ctx, GLuint id);
 
+extern struct gl_texture_object *
+_mesa_lookup_texture_err(struct gl_context *ctx, GLuint id, const char* func);
+
 extern void
 _mesa_begin_texture_lookups(struct gl_context *ctx);
 
@@ -61,6 +64,9 @@ extern struct gl_texture_object *
 _mesa_lookup_texture_locked(struct gl_context *ctx, GLuint id);
 
 extern struct gl_texture_object *
+_mesa_get_current_tex_object(struct gl_context *ctx, GLenum target);
+
+extern struct gl_texture_object *
 _mesa_new_texture_object( struct gl_context *ctx, GLuint name, GLenum target );
 
 extern void
@@ -95,6 +101,24 @@ _mesa_reference_texobj(struct gl_texture_object **ptr,
       _mesa_reference_texobj_(ptr, tex);
 }
 
+/**
+ * Lock a texture for updating.  See also _mesa_lock_context_textures().
+ */
+static inline void
+_mesa_lock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
+{
+   mtx_lock(&ctx->Shared->TexMutex);
+   ctx->Shared->TextureStateStamp++;
+   (void) texObj;
+}
+
+static inline void
+_mesa_unlock_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
+{
+   (void) texObj;
+   mtx_unlock(&ctx->Shared->TexMutex);
+}
+
 
 /**
  * Return number of faces for a texture target.  This will be 6 for
@@ -154,6 +178,10 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
                                 struct gl_texture_object *obj );
 
 extern GLboolean
+_mesa_cube_level_complete(const struct gl_texture_object *texObj,
+                          const GLint level);
+
+extern GLboolean
 _mesa_cube_complete(const struct gl_texture_object *texObj);
 
 extern void
@@ -165,12 +193,27 @@ _mesa_get_fallback_texture(struct gl_context *ctx, gl_texture_index tex);
 extern GLuint
 _mesa_total_texture_memory(struct gl_context *ctx);
 
+extern GLenum
+_mesa_texture_base_format(const struct gl_texture_object *texObj);
+
 extern void
 _mesa_unlock_context_textures( struct gl_context *ctx );
 
 extern void
 _mesa_lock_context_textures( struct gl_context *ctx );
 
+extern struct gl_texture_object *
+_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target);
+
+extern void
+_mesa_delete_nameless_texture(struct gl_context *ctx,
+                              struct gl_texture_object *texObj);
+
+extern void
+_mesa_bind_texture_unit(struct gl_context *ctx,
+                        GLuint unit,
+                        struct gl_texture_object *texObj);
+
 /*@}*/
 
 /**
@@ -179,8 +222,10 @@ _mesa_lock_context_textures( struct gl_context *ctx );
 /*@{*/
 
 extern void GLAPIENTRY
-_mesa_GenTextures( GLsizei n, GLuint *textures );
+_mesa_GenTextures(GLsizei n, GLuint *textures);
 
+extern void GLAPIENTRY
+_mesa_CreateTextures(GLenum target, GLsizei n, GLuint *textures);
 
 extern void GLAPIENTRY
 _mesa_DeleteTextures( GLsizei n, const GLuint *textures );
@@ -189,6 +234,8 @@ _mesa_DeleteTextures( GLsizei n, const GLuint *textures );
 extern void GLAPIENTRY
 _mesa_BindTexture( GLenum target, GLuint texture );
 
+extern void GLAPIENTRY
+_mesa_BindTextureUnit(GLuint unit, GLuint texture);
 
 extern void GLAPIENTRY
 _mesa_BindTextures( GLuint first, GLsizei count, const GLuint *textures );
diff --git a/mesalib/src/mesa/main/texparam.c b/mesalib/src/mesa/main/texparam.c
index e40fb249e..c4a5841c5 100644
--- a/mesalib/src/mesa/main/texparam.c
+++ b/mesalib/src/mesa/main/texparam.c
@@ -123,7 +123,7 @@ validate_texture_wrap_mode(struct gl_context * ctx, GLenum target, GLenum wrap)
  * Only the glGetTexLevelParameter() functions accept proxy targets.
  */
 static struct gl_texture_object *
-get_texobj(struct gl_context *ctx, GLenum target, GLboolean get)
+get_texobj_by_target(struct gl_context *ctx, GLenum target, GLboolean get)
 {
    struct gl_texture_unit *texUnit;
    int targetIndex;
@@ -147,6 +147,46 @@ get_texobj(struct gl_context *ctx, GLenum target, GLboolean get)
    return texUnit->CurrentTex[targetIndex];
 }
 
+/**
+ * Get current texture object for given name.
+ * Return NULL if any error (and record the error).
+ * Note that proxy targets are not accepted.
+ * Only the glGetTexLevelParameter() functions accept proxy targets.
+ */
+static struct gl_texture_object *
+get_texobj_by_name(struct gl_context *ctx, GLuint texture, GLboolean get)
+{
+   struct gl_texture_object *texObj;
+
+   texObj = _mesa_lookup_texture(ctx, texture);
+   if (!texObj) {
+      /*
+       * User passed a non-generated name.
+       * Throw the error in the caller.
+       */
+      return NULL;
+   }
+
+   switch (texObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_RECTANGLE:
+      return texObj;
+   default:
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "gl%sTextureParameter(target)", get ? "Get" : "");
+      return NULL;
+   }
+
+}
+
 
 /**
  * Convert GL_RED/GREEN/BLUE/ALPHA/ZERO/ONE to SWIZZLE_X/Y/Z/W/ZERO/ONE.
@@ -189,7 +229,7 @@ set_swizzle_component(GLuint *swizzle, GLuint comp, GLuint swz)
 
 /**
  * This is called just prior to changing any texture object state which
- * will not effect texture completeness.
+ * will not affect texture completeness.
  */
 static inline void
 flush(struct gl_context *ctx)
@@ -200,7 +240,7 @@ flush(struct gl_context *ctx)
 
 /**
  * This is called just prior to changing any texture object state which
- * can effect texture completeness (texture base level, max level).
+ * could affect texture completeness (texture base level, max level).
  * Any pending rendering will be flushed out, we'll set the _NEW_TEXTURE
  * state flag and then mark the texture object as 'incomplete' so that any
  * per-texture derived state gets recomputed.
@@ -234,12 +274,14 @@ target_allows_setting_sampler_parameters(GLenum target)
 static GLboolean
 set_tex_parameteri(struct gl_context *ctx,
                    struct gl_texture_object *texObj,
-                   GLenum pname, const GLint *params)
+                   GLenum pname, const GLint *params, bool dsa)
 {
+   const char *suffix = dsa ? "ture" : "";
+
    switch (pname) {
    case GL_TEXTURE_MIN_FILTER:
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.MinFilter == params[0])
          return GL_FALSE;
@@ -267,7 +309,7 @@ set_tex_parameteri(struct gl_context *ctx,
 
    case GL_TEXTURE_MAG_FILTER:
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.MagFilter == params[0])
          return GL_FALSE;
@@ -284,7 +326,7 @@ set_tex_parameteri(struct gl_context *ctx,
 
    case GL_TEXTURE_WRAP_S:
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.WrapS == params[0])
          return GL_FALSE;
@@ -297,7 +339,7 @@ set_tex_parameteri(struct gl_context *ctx,
 
    case GL_TEXTURE_WRAP_T:
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.WrapT == params[0])
          return GL_FALSE;
@@ -310,7 +352,7 @@ set_tex_parameteri(struct gl_context *ctx,
 
    case GL_TEXTURE_WRAP_R:
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.WrapR == params[0])
          return GL_FALSE;
@@ -332,10 +374,15 @@ set_tex_parameteri(struct gl_context *ctx,
            texObj->Target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) && params[0] != 0)
          goto invalid_operation;
 
-      if (params[0] < 0 ||
-          (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] != 0)) {
+      if (params[0] < 0) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glTexParameter(param=%d)", params[0]);
+                     "glTex%sParameter(param=%d)", suffix, params[0]);
+         return GL_FALSE;
+      }
+      if (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] != 0) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glTex%sParameter(target=%s, param=%d)", suffix,
+                     _mesa_lookup_enum_by_nr(texObj->Target), params[0]);
          return GL_FALSE;
       }
       incomplete(ctx, texObj);
@@ -355,7 +402,8 @@ set_tex_parameteri(struct gl_context *ctx,
       if (params[0] < 0 ||
           (texObj->Target == GL_TEXTURE_RECTANGLE_ARB && params[0] > 0)) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glTexParameter(param=%d)", params[0]);
+                     "glTex%sParameter(param=%d)", suffix,
+                     params[0]);
          return GL_FALSE;
       }
       incomplete(ctx, texObj);
@@ -392,7 +440,7 @@ set_tex_parameteri(struct gl_context *ctx,
           || _mesa_is_gles3(ctx)) {
 
          if (!target_allows_setting_sampler_parameters(texObj->Target))
-            goto invalid_operation;
+            goto invalid_enum;
 
          if (texObj->Sampler.CompareMode == params[0])
             return GL_FALSE;
@@ -411,7 +459,7 @@ set_tex_parameteri(struct gl_context *ctx,
           || _mesa_is_gles3(ctx)) {
 
          if (!target_allows_setting_sampler_parameters(texObj->Target))
-            goto invalid_operation;
+            goto invalid_enum;
 
          if (texObj->Sampler.CompareFunc == params[0])
             return GL_FALSE;
@@ -486,7 +534,7 @@ set_tex_parameteri(struct gl_context *ctx,
          const GLint swz = comp_to_swizzle(params[0]);
          if (swz < 0) {
             _mesa_error(ctx, GL_INVALID_ENUM,
-                        "glTexParameter(swizzle 0x%x)", params[0]);
+                        "glTex%sParameter(swizzle 0x%x)", suffix, params[0]);
             return GL_FALSE;
          }
          ASSERT(comp < 4);
@@ -511,7 +559,8 @@ set_tex_parameteri(struct gl_context *ctx,
             }
             else {
                _mesa_error(ctx, GL_INVALID_ENUM,
-                           "glTexParameter(swizzle 0x%x)", params[comp]);
+                           "glTex%sParameter(swizzle 0x%x)",
+                           suffix, params[comp]);
                return GL_FALSE;
             }
          }
@@ -525,7 +574,7 @@ set_tex_parameteri(struct gl_context *ctx,
          GLenum decode = params[0];
 
          if (!target_allows_setting_sampler_parameters(texObj->Target))
-            goto invalid_operation;
+            goto invalid_enum;
 
 	 if (decode == GL_DECODE_EXT || decode == GL_SKIP_DECODE_EXT) {
 	    if (texObj->Sampler.sRGBDecode != decode) {
@@ -543,7 +592,7 @@ set_tex_parameteri(struct gl_context *ctx,
          GLenum param = params[0];
 
          if (!target_allows_setting_sampler_parameters(texObj->Target))
-            goto invalid_operation;
+            goto invalid_enum;
 
          if (param != GL_TRUE && param != GL_FALSE) {
             goto invalid_param;
@@ -561,18 +610,23 @@ set_tex_parameteri(struct gl_context *ctx,
    }
 
 invalid_pname:
-   _mesa_error(ctx, GL_INVALID_ENUM, "glTexParameter(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+   _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
+               suffix, _mesa_lookup_enum_by_nr(pname));
    return GL_FALSE;
 
 invalid_param:
-   _mesa_error(ctx, GL_INVALID_ENUM, "glTexParameter(param=%s)",
-               _mesa_lookup_enum_by_nr(params[0]));
+   _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(param=%s)",
+               suffix, _mesa_lookup_enum_by_nr(params[0]));
    return GL_FALSE;
 
 invalid_operation:
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glTexParameter(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+   _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sParameter(pname=%s)",
+               suffix, _mesa_lookup_enum_by_nr(pname));
+   return GL_FALSE;
+
+invalid_enum:
+   _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
+               suffix, _mesa_lookup_enum_by_nr(pname));
    return GL_FALSE;
 }
 
@@ -584,15 +638,17 @@ invalid_operation:
 static GLboolean
 set_tex_parameterf(struct gl_context *ctx,
                    struct gl_texture_object *texObj,
-                   GLenum pname, const GLfloat *params)
+                   GLenum pname, const GLfloat *params, bool dsa)
 {
+   const char *suffix = dsa ? "ture" : "";
+
    switch (pname) {
    case GL_TEXTURE_MIN_LOD:
       if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx))
          goto invalid_pname;
 
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.MinLod == params[0])
          return GL_FALSE;
@@ -605,7 +661,7 @@ set_tex_parameterf(struct gl_context *ctx,
          goto invalid_pname;
 
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.MaxLod == params[0])
          return GL_FALSE;
@@ -624,12 +680,13 @@ set_tex_parameterf(struct gl_context *ctx,
    case GL_TEXTURE_MAX_ANISOTROPY_EXT:
       if (ctx->Extensions.EXT_texture_filter_anisotropic) {
          if (!target_allows_setting_sampler_parameters(texObj->Target))
-            goto invalid_operation;
+            goto invalid_enum;
 
          if (texObj->Sampler.MaxAnisotropy == params[0])
             return GL_FALSE;
          if (params[0] < 1.0) {
-            _mesa_error(ctx, GL_INVALID_VALUE, "glTexParameter(param)" );
+            _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sParameter(param)",
+                        suffix);
             return GL_FALSE;
          }
          flush(ctx);
@@ -651,7 +708,7 @@ set_tex_parameterf(struct gl_context *ctx,
          goto invalid_pname;
 
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       if (texObj->Sampler.LodBias != params[0]) {
 	 flush(ctx);
@@ -665,7 +722,7 @@ set_tex_parameterf(struct gl_context *ctx,
          goto invalid_pname;
 
       if (!target_allows_setting_sampler_parameters(texObj->Target))
-         goto invalid_operation;
+         goto invalid_enum;
 
       flush(ctx);
       /* ARB_texture_float disables clamping */
@@ -688,27 +745,23 @@ set_tex_parameterf(struct gl_context *ctx,
    return GL_FALSE;
 
 invalid_pname:
-   _mesa_error(ctx, GL_INVALID_ENUM, "glTexParameter(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+   _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
+               suffix, _mesa_lookup_enum_by_nr(pname));
    return GL_FALSE;
 
-invalid_operation:
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glTexParameter(pname=%s)",
-               _mesa_lookup_enum_by_nr(pname));
+invalid_enum:
+   _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameter(pname=%s)",
+               suffix, _mesa_lookup_enum_by_nr(pname));
    return GL_FALSE;
 }
 
 
-void GLAPIENTRY
-_mesa_TexParameterf(GLenum target, GLenum pname, GLfloat param)
+void
+_mesa_texture_parameterf(struct gl_context *ctx,
+                         struct gl_texture_object *texObj,
+                         GLenum pname, GLfloat param, bool dsa)
 {
    GLboolean need_update;
-   struct gl_texture_object *texObj;
-   GET_CURRENT_CONTEXT(ctx);
-
-   texObj = get_texobj(ctx, target, GL_FALSE);
-   if (!texObj)
-      return;
 
    switch (pname) {
    case GL_TEXTURE_MIN_FILTER:
@@ -736,16 +789,21 @@ _mesa_TexParameterf(GLenum target, GLenum pname, GLfloat param)
                 ((param < INT_MIN) ? INT_MIN : (GLint) (param - 0.5));
 
          p[1] = p[2] = p[3] = 0;
-         need_update = set_tex_parameteri(ctx, texObj, pname, p);
+         need_update = set_tex_parameteri(ctx, texObj, pname, p, dsa);
       }
       break;
+   case GL_TEXTURE_BORDER_COLOR:
+   case GL_TEXTURE_SWIZZLE_RGBA:
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sParameterf(non-scalar pname)",
+                  dsa ? "ture" : "");
+      return;
    default:
       {
          /* this will generate an error if pname is illegal */
          GLfloat p[4];
          p[0] = param;
          p[1] = p[2] = p[3] = 0.0F;
-         need_update = set_tex_parameterf(ctx, texObj, pname, p);
+         need_update = set_tex_parameterf(ctx, texObj, pname, p, dsa);
       }
    }
 
@@ -755,17 +813,12 @@ _mesa_TexParameterf(GLenum target, GLenum pname, GLfloat param)
 }
 
 
-void GLAPIENTRY
-_mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
+void
+_mesa_texture_parameterfv(struct gl_context *ctx,
+                          struct gl_texture_object *texObj,
+                          GLenum pname, const GLfloat *params, bool dsa)
 {
    GLboolean need_update;
-   struct gl_texture_object *texObj;
-   GET_CURRENT_CONTEXT(ctx);
-
-   texObj = get_texobj(ctx, target, GL_FALSE);
-   if (!texObj)
-      return;
-
    switch (pname) {
    case GL_TEXTURE_MIN_FILTER:
    case GL_TEXTURE_MAG_FILTER:
@@ -786,7 +839,7 @@ _mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
          GLint p[4];
          p[0] = (GLint) params[0];
          p[1] = p[2] = p[3] = 0;
-         need_update = set_tex_parameteri(ctx, texObj, pname, p);
+         need_update = set_tex_parameteri(ctx, texObj, pname, p, dsa);
       }
       break;
    case GL_TEXTURE_CROP_RECT_OES:
@@ -797,7 +850,7 @@ _mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
          iparams[1] = (GLint) params[1];
          iparams[2] = (GLint) params[2];
          iparams[3] = (GLint) params[3];
-         need_update = set_tex_parameteri(ctx, texObj, pname, iparams);
+         need_update = set_tex_parameteri(ctx, texObj, pname, iparams, dsa);
       }
       break;
    case GL_TEXTURE_SWIZZLE_R_EXT:
@@ -813,12 +866,12 @@ _mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
             p[2] = (GLint) params[2];
             p[3] = (GLint) params[3];
          }
-         need_update = set_tex_parameteri(ctx, texObj, pname, p);
+         need_update = set_tex_parameteri(ctx, texObj, pname, p, dsa);
       }
       break;
    default:
       /* this will generate an error if pname is illegal */
-      need_update = set_tex_parameterf(ctx, texObj, pname, params);
+      need_update = set_tex_parameterf(ctx, texObj, pname, params, dsa);
    }
 
    if (ctx->Driver.TexParameter && need_update) {
@@ -827,17 +880,12 @@ _mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
 }
 
 
-void GLAPIENTRY
-_mesa_TexParameteri(GLenum target, GLenum pname, GLint param)
+void
+_mesa_texture_parameteri(struct gl_context *ctx,
+                         struct gl_texture_object *texObj,
+                         GLenum pname, GLint param, bool dsa)
 {
    GLboolean need_update;
-   struct gl_texture_object *texObj;
-   GET_CURRENT_CONTEXT(ctx);
-
-   texObj = get_texobj(ctx, target, GL_FALSE);
-   if (!texObj)
-      return;
-
    switch (pname) {
    case GL_TEXTURE_MIN_LOD:
    case GL_TEXTURE_MAX_LOD:
@@ -850,16 +898,24 @@ _mesa_TexParameteri(GLenum target, GLenum pname, GLint param)
          fparam[0] = (GLfloat) param;
          fparam[1] = fparam[2] = fparam[3] = 0.0F;
          /* convert int param to float */
-         need_update = set_tex_parameterf(ctx, texObj, pname, fparam);
+         need_update = set_tex_parameterf(ctx, texObj, pname, fparam, dsa);
       }
       break;
+   case GL_TEXTURE_BORDER_COLOR:
+   case GL_TEXTURE_SWIZZLE_RGBA:
+      {
+         _mesa_error(ctx, GL_INVALID_ENUM,
+                     "glTex%sParameteri(non-scalar pname)",
+                     dsa ? "ture" : "");
+         return;
+      }
    default:
       /* this will generate an error if pname is illegal */
       {
          GLint iparam[4];
          iparam[0] = param;
          iparam[1] = iparam[2] = iparam[3] = 0;
-         need_update = set_tex_parameteri(ctx, texObj, pname, iparam);
+         need_update = set_tex_parameteri(ctx, texObj, pname, iparam, dsa);
       }
    }
 
@@ -870,16 +926,12 @@ _mesa_TexParameteri(GLenum target, GLenum pname, GLint param)
 }
 
 
-void GLAPIENTRY
-_mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)
+void
+_mesa_texture_parameteriv(struct gl_context *ctx,
+                          struct gl_texture_object *texObj,
+                          GLenum pname, const GLint *params, bool dsa)
 {
    GLboolean need_update;
-   struct gl_texture_object *texObj;
-   GET_CURRENT_CONTEXT(ctx);
-
-   texObj = get_texobj(ctx, target, GL_FALSE);
-   if (!texObj)
-      return;
 
    switch (pname) {
    case GL_TEXTURE_BORDER_COLOR:
@@ -890,7 +942,7 @@ _mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)
          fparams[1] = INT_TO_FLOAT(params[1]);
          fparams[2] = INT_TO_FLOAT(params[2]);
          fparams[3] = INT_TO_FLOAT(params[3]);
-         need_update = set_tex_parameterf(ctx, texObj, pname, fparams);
+         need_update = set_tex_parameterf(ctx, texObj, pname, fparams, dsa);
       }
       break;
    case GL_TEXTURE_MIN_LOD:
@@ -904,12 +956,12 @@ _mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)
          GLfloat fparams[4];
          fparams[0] = (GLfloat) params[0];
          fparams[1] = fparams[2] = fparams[3] = 0.0F;
-         need_update = set_tex_parameterf(ctx, texObj, pname, fparams);
+         need_update = set_tex_parameterf(ctx, texObj, pname, fparams, dsa);
       }
       break;
    default:
       /* this will generate an error if pname is illegal */
-      need_update = set_tex_parameteri(ctx, texObj, pname, params);
+      need_update = set_tex_parameteri(ctx, texObj, pname, params, dsa);
    }
 
    if (ctx->Driver.TexParameter && need_update) {
@@ -925,6 +977,94 @@ _mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)
    }
 }
 
+void
+_mesa_texture_parameterIiv(struct gl_context *ctx,
+                           struct gl_texture_object *texObj,
+                           GLenum pname, const GLint *params, bool dsa)
+{
+   switch (pname) {
+   case GL_TEXTURE_BORDER_COLOR:
+      FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+      /* set the integer-valued border color */
+      COPY_4V(texObj->Sampler.BorderColor.i, params);
+      break;
+   default:
+      _mesa_texture_parameteriv(ctx, texObj, pname, params, dsa);
+      break;
+   }
+   /* XXX no driver hook for TexParameterIiv() yet */
+}
+
+void
+_mesa_texture_parameterIuiv(struct gl_context *ctx,
+                            struct gl_texture_object *texObj,
+                            GLenum pname, const GLuint *params, bool dsa)
+{
+   switch (pname) {
+   case GL_TEXTURE_BORDER_COLOR:
+      FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+      /* set the unsigned integer-valued border color */
+      COPY_4V(texObj->Sampler.BorderColor.ui, params);
+      break;
+   default:
+      _mesa_texture_parameteriv(ctx, texObj, pname, (const GLint *) params,
+                                dsa);
+      break;
+   }
+   /* XXX no driver hook for TexParameterIuiv() yet */
+}
+
+void GLAPIENTRY
+_mesa_TexParameterf(GLenum target, GLenum pname, GLfloat param)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
+   if (!texObj)
+      return;
+
+   _mesa_texture_parameterf(ctx, texObj, pname, param, false);
+}
+
+void GLAPIENTRY
+_mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
+   if (!texObj)
+      return;
+
+   _mesa_texture_parameterfv(ctx, texObj, pname, params, false);
+}
+
+void GLAPIENTRY
+_mesa_TexParameteri(GLenum target, GLenum pname, GLint param)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
+   if (!texObj)
+      return;
+
+   _mesa_texture_parameteri(ctx, texObj, pname, param, false);
+}
+
+void GLAPIENTRY
+_mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
+   if (!texObj)
+      return;
+
+   _mesa_texture_parameteriv(ctx, texObj, pname, params, false);
+}
 
 /**
  * Set tex parameter to integer value(s).  Primarily intended to set
@@ -937,24 +1077,13 @@ _mesa_TexParameterIiv(GLenum target, GLenum pname, const GLint *params)
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   texObj = get_texobj(ctx, target, GL_FALSE);
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
    if (!texObj)
       return;
 
-   switch (pname) {
-   case GL_TEXTURE_BORDER_COLOR:
-      FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-      /* set the integer-valued border color */
-      COPY_4V(texObj->Sampler.BorderColor.i, params);
-      break;
-   default:
-      _mesa_TexParameteriv(target, pname, params);
-      break;
-   }
-   /* XXX no driver hook for TexParameterIiv() yet */
+   _mesa_texture_parameterIiv(ctx, texObj, pname, params, false);
 }
 
-
 /**
  * Set tex parameter to unsigned integer value(s).  Primarily intended to set
  * uint-valued texture border color (for integer-valued textures).
@@ -966,26 +1095,117 @@ _mesa_TexParameterIuiv(GLenum target, GLenum pname, const GLuint *params)
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   texObj = get_texobj(ctx, target, GL_FALSE);
+   texObj = get_texobj_by_target(ctx, target, GL_FALSE);
    if (!texObj)
       return;
 
-   switch (pname) {
-   case GL_TEXTURE_BORDER_COLOR:
-      FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-      /* set the unsigned integer-valued border color */
-      COPY_4V(texObj->Sampler.BorderColor.ui, params);
-      break;
-   default:
-      _mesa_TexParameteriv(target, pname, (const GLint *) params);
-      break;
+   _mesa_texture_parameterIuiv(ctx, texObj, pname, params, false);
+}
+
+
+void GLAPIENTRY
+_mesa_TextureParameterfv(GLuint texture, GLenum pname, const GLfloat *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureParameterfv(texture)");
+      return;
    }
-   /* XXX no driver hook for TexParameterIuiv() yet */
+
+   _mesa_texture_parameterfv(ctx, texObj, pname, params, true);
+}
+
+void GLAPIENTRY
+_mesa_TextureParameterf(GLuint texture, GLenum pname, GLfloat param)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureParameterf(texture)");
+      return;
+   }
+
+   _mesa_texture_parameterf(ctx, texObj, pname, param, true);
 }
 
+void GLAPIENTRY
+_mesa_TextureParameteri(GLuint texture, GLenum pname, GLint param)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureParameteri(texture)");
+      return;
+   }
+
+   _mesa_texture_parameteri(ctx, texObj, pname, param, true);
+}
+
+void GLAPIENTRY
+_mesa_TextureParameteriv(GLuint texture, GLenum pname,
+                         const GLint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTextureParameteriv(texture)");
+      return;
+   }
+
+   _mesa_texture_parameteriv(ctx, texObj, pname, params, true);
+}
+
+
+void GLAPIENTRY
+_mesa_TextureParameterIiv(GLuint texture, GLenum pname, const GLint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureParameterIiv(texture)");
+      return;
+   }
+
+   _mesa_texture_parameterIiv(ctx, texObj, pname, params, true);
+}
+
+void GLAPIENTRY
+_mesa_TextureParameterIuiv(GLuint texture, GLenum pname, const GLuint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_FALSE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureParameterIuiv(texture)");
+      return;
+   }
+
+   _mesa_texture_parameterIuiv(ctx, texObj, pname, params, true);
+}
 
 static GLboolean
-legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target)
+legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target,
+                                     bool dsa)
 {
    switch (target) {
    case GL_TEXTURE_1D:
@@ -1038,6 +1258,16 @@ legal_get_tex_level_parameter_target(struct gl_context *ctx, GLenum target)
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE:
    case GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY:
       return ctx->Extensions.ARB_texture_multisample;
+
+   /*  This is a valid target for dsa, but the OpenGL 4.5 core spec
+    *  (30.10.2014) Section 8.11 Texture Queries says:
+    *       "For GetTextureLevelParameter* only, texture may also be a cube
+    *       map texture object.  In this case the query is always performed
+    *       for face zero (the TEXTURE_CUBE_MAP_POSITIVE_X face), since there
+    *       is no way to specify another face."
+    */
+   case GL_TEXTURE_CUBE_MAP:
+      return dsa;
    default:
       return GL_FALSE;
    }
@@ -1048,13 +1278,15 @@ static void
 get_tex_level_parameter_image(struct gl_context *ctx,
                               const struct gl_texture_object *texObj,
                               GLenum target, GLint level,
-                              GLenum pname, GLint *params)
+                              GLenum pname, GLint *params,
+                              bool dsa)
 {
    const struct gl_texture_image *img = NULL;
    struct gl_texture_image dummy_image;
    mesa_format texFormat;
+   const char *suffix = dsa ? "ture" : "";
 
-   img = _mesa_select_tex_image(ctx, texObj, target, level);
+   img = _mesa_select_tex_image(texObj, target, level);
    if (!img || img->TexFormat == MESA_FORMAT_NONE) {
       /* In case of undefined texture image return the default values.
        *
@@ -1160,11 +1392,12 @@ get_tex_level_parameter_image(struct gl_context *ctx,
              !_mesa_is_proxy_texture(target)) {
             *params = _mesa_format_image_size(texFormat, img->Width,
                                               img->Height, img->Depth);
-	 }
-	 else {
-	    _mesa_error(ctx, GL_INVALID_OPERATION,
-			"glGetTexLevelParameter[if]v(pname)");
-	 }
+    }
+    else {
+       _mesa_error(ctx, GL_INVALID_OPERATION,
+                   "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
+                   _mesa_lookup_enum_by_nr(pname));
+    }
          break;
       case GL_TEXTURE_COMPRESSED:
          *params = (GLint) _mesa_is_format_compressed(texFormat);
@@ -1211,7 +1444,7 @@ get_tex_level_parameter_image(struct gl_context *ctx,
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM,
-               "glGetTexLevelParameter[if]v(pname=%s)",
+               "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
                _mesa_lookup_enum_by_nr(pname));
 }
 
@@ -1219,12 +1452,13 @@ invalid_pname:
 static void
 get_tex_level_parameter_buffer(struct gl_context *ctx,
                                const struct gl_texture_object *texObj,
-                               GLenum pname, GLint *params)
+                               GLenum pname, GLint *params, bool dsa)
 {
    const struct gl_buffer_object *bo = texObj->BufferObject;
    mesa_format texFormat = texObj->_BufferObjectFormat;
    GLenum internalFormat = texObj->BufferObjectFormat;
    GLenum baseFormat = _mesa_get_format_base_format(texFormat);
+   const char *suffix = dsa ? "ture" : "";
 
    if (!bo) {
       /* undefined texture buffer object */
@@ -1294,7 +1528,8 @@ get_tex_level_parameter_buffer(struct gl_context *ctx,
       case GL_TEXTURE_COMPRESSED_IMAGE_SIZE:
          /* Always illegal for GL_TEXTURE_BUFFER */
          _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glGetTexLevelParameter[if]v(pname)");
+                     "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
+                     _mesa_lookup_enum_by_nr(pname));
          break;
 
       /* GL_ARB_texture_float */
@@ -1322,38 +1557,37 @@ get_tex_level_parameter_buffer(struct gl_context *ctx,
 
 invalid_pname:
    _mesa_error(ctx, GL_INVALID_ENUM,
-               "glGetTexLevelParameter[if]v(pname=%s)",
+               "glGetTex%sLevelParameter[if]v(pname=%s)", suffix,
                _mesa_lookup_enum_by_nr(pname));
 }
 
 
-void GLAPIENTRY
-_mesa_GetTexLevelParameterfv( GLenum target, GLint level,
-                              GLenum pname, GLfloat *params )
-{
-   GLint iparam;
-   _mesa_GetTexLevelParameteriv( target, level, pname, &iparam );
-   *params = (GLfloat) iparam;
-}
-
-
-void GLAPIENTRY
-_mesa_GetTexLevelParameteriv( GLenum target, GLint level,
-                              GLenum pname, GLint *params )
+/**
+ * This isn't exposed to the rest of the driver because it is a part of the
+ * OpenGL API that is rarely used.
+ */
+static void
+get_tex_level_parameteriv(struct gl_context *ctx,
+                          struct gl_texture_object *texObj,
+                          GLenum target, GLint level,
+                          GLenum pname, GLint *params,
+                          bool dsa)
 {
-   struct gl_texture_object *texObj;
    GLint maxLevels;
-   GET_CURRENT_CONTEXT(ctx);
+   const char *suffix = dsa ? "ture" : "";
 
+   /* Check for errors */
    if (ctx->Texture.CurrentUnit >= ctx->Const.MaxCombinedTextureImageUnits) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glGetTexLevelParameteriv(current unit)");
+                  "glGetTex%sLevelParameter[if]v("
+                  "current unit >= max combined texture units)", suffix);
       return;
    }
 
-   if (!legal_get_tex_level_parameter_target(ctx, target)) {
+   if (!legal_get_tex_level_parameter_target(ctx, target, dsa)) {
       _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glGetTexLevelParameter[if]v(target=0x%x)", target);
+                  "glGetTex%sLevelParameter[if]v(target=%s)", suffix,
+                  _mesa_lookup_enum_by_nr(target));
       return;
    }
 
@@ -1361,29 +1595,98 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
    assert(maxLevels != 0);
 
    if (level < 0 || level >= maxLevels) {
-      _mesa_error( ctx, GL_INVALID_VALUE, "glGetTexLevelParameter[if]v" );
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetTex%sLevelParameter[if]v(level out of range)", suffix);
       return;
    }
 
+   /* Get the level parameter */
+   if (target == GL_TEXTURE_BUFFER) {
+      get_tex_level_parameter_buffer(ctx, texObj, pname, params, dsa);
+   }
+   else {
+      get_tex_level_parameter_image(ctx, texObj, target,
+                                    level, pname, params, dsa);
+   }
+}
+
+void GLAPIENTRY
+_mesa_GetTexLevelParameterfv( GLenum target, GLint level,
+                              GLenum pname, GLfloat *params )
+{
+   struct gl_texture_object *texObj;
+   GLint iparam;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   get_tex_level_parameteriv(ctx, texObj, target, level,
+                             pname, &iparam, false);
+
+   *params = (GLfloat) iparam;
+}
+
+void GLAPIENTRY
+_mesa_GetTexLevelParameteriv( GLenum target, GLint level,
+                              GLenum pname, GLint *params )
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
    texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
 
-   if (target == GL_TEXTURE_BUFFER)
-      get_tex_level_parameter_buffer(ctx, texObj, pname, params);
-   else
-      get_tex_level_parameter_image(ctx, texObj, target, level, pname, params);
+   get_tex_level_parameteriv(ctx, texObj, target, level,
+                             pname, params, false);
 }
 
+void GLAPIENTRY
+_mesa_GetTextureLevelParameterfv(GLuint texture, GLint level,
+                                 GLenum pname, GLfloat *params)
+{
+   struct gl_texture_object *texObj;
+   GLint iparam;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glGetTextureLevelParameterfv");
+   if (!texObj)
+      return;
+
+   get_tex_level_parameteriv(ctx, texObj, texObj->Target, level,
+                             pname, &iparam, true);
+
+   *params = (GLfloat) iparam;
+}
 
 void GLAPIENTRY
-_mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
+_mesa_GetTextureLevelParameteriv(GLuint texture, GLint level,
+                                 GLenum pname, GLint *params)
 {
-   struct gl_texture_object *obj;
+   struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   obj = get_texobj(ctx, target, GL_TRUE);
-   if (!obj)
+   texObj = _mesa_lookup_texture_err(ctx, texture,
+                                     "glGetTextureLevelParameteriv");
+   if (!texObj)
       return;
 
+   get_tex_level_parameteriv(ctx, texObj, texObj->Target, level,
+                             pname, params, true);
+}
+
+/**
+ * This isn't exposed to the rest of the driver because it is a part of the
+ * OpenGL API that is rarely used.
+ */
+static void
+get_tex_parameterfv(struct gl_context *ctx,
+                    struct gl_texture_object *obj,
+                    GLenum pname, GLfloat *params, bool dsa)
+{
    _mesa_lock_context_textures(ctx);
    switch (pname) {
       case GL_TEXTURE_MAG_FILTER:
@@ -1596,20 +1899,16 @@ _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params )
 
 invalid_pname:
    _mesa_unlock_context_textures(ctx);
-   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameterfv(pname=0x%x)", pname);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTex%sParameterfv(pname=0x%x)",
+               dsa ? "ture" : "", pname);
 }
 
 
-void GLAPIENTRY
-_mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
+static void
+get_tex_parameteriv(struct gl_context *ctx,
+                    struct gl_texture_object *obj,
+                    GLenum pname, GLint *params, bool dsa)
 {
-   struct gl_texture_object *obj;
-   GET_CURRENT_CONTEXT(ctx);
-
-   obj = get_texobj(ctx, target, GL_TRUE);
-   if (!obj)
-      return;
-
    _mesa_lock_texture(ctx, obj);
    switch (pname) {
       case GL_TEXTURE_MAG_FILTER:
@@ -1658,14 +1957,18 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
       case GL_TEXTURE_MIN_LOD:
          if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx))
             goto invalid_pname;
-
-         *params = (GLint) obj->Sampler.MinLod;
+         /* GL spec 'Data Conversions' section specifies that floating-point
+          * value in integer Get function is rounded to nearest integer
+          */
+         *params = IROUND(obj->Sampler.MinLod);
          break;
       case GL_TEXTURE_MAX_LOD:
          if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx))
             goto invalid_pname;
-
-         *params = (GLint) obj->Sampler.MaxLod;
+         /* GL spec 'Data Conversions' section specifies that floating-point
+          * value in integer Get function is rounded to nearest integer
+          */
+         *params = IROUND(obj->Sampler.MaxLod);
          break;
       case GL_TEXTURE_BASE_LEVEL:
          if (!_mesa_is_desktop_gl(ctx) && !_mesa_is_gles3(ctx))
@@ -1679,7 +1982,10 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
       case GL_TEXTURE_MAX_ANISOTROPY_EXT:
          if (!ctx->Extensions.EXT_texture_filter_anisotropic)
             goto invalid_pname;
-         *params = (GLint) obj->Sampler.MaxAnisotropy;
+         /* GL spec 'Data Conversions' section specifies that floating-point
+          * value in integer Get function is rounded to nearest integer
+          */
+         *params = IROUND(obj->Sampler.MaxAnisotropy);
          break;
       case GL_GENERATE_MIPMAP_SGIS:
          if (ctx->API != API_OPENGL_COMPAT && ctx->API != API_OPENGLES)
@@ -1818,9 +2124,73 @@ _mesa_GetTexParameteriv( GLenum target, GLenum pname, GLint *params )
 
 invalid_pname:
    _mesa_unlock_texture(ctx, obj);
-   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTexParameteriv(pname=0x%x)", pname);
+   _mesa_error(ctx, GL_INVALID_ENUM, "glGetTex%sParameteriv(pname=0x%x)",
+               dsa ? "ture" : "", pname);
+}
+
+static void
+get_tex_parameterIiv(struct gl_context *ctx,
+                     struct gl_texture_object *obj,
+                     GLenum pname, GLint *params, bool dsa)
+{
+   switch (pname) {
+   case GL_TEXTURE_BORDER_COLOR:
+      COPY_4V(params, obj->Sampler.BorderColor.i);
+      break;
+   default:
+      get_tex_parameteriv(ctx, obj, pname, params, dsa);
+   }
+}
+
+static void
+get_tex_parameterIuiv(struct gl_context *ctx,
+                      struct gl_texture_object *obj,
+                      GLenum pname, GLuint *params, bool dsa)
+{
+   switch (pname) {
+   case GL_TEXTURE_BORDER_COLOR:
+      COPY_4V(params, obj->Sampler.BorderColor.i);
+      break;
+   default:
+      {
+         GLint ip[4];
+         get_tex_parameteriv(ctx, obj, pname, ip, dsa);
+         params[0] = ip[0];
+         if (pname == GL_TEXTURE_SWIZZLE_RGBA_EXT ||
+             pname == GL_TEXTURE_CROP_RECT_OES) {
+            params[1] = ip[1];
+            params[2] = ip[2];
+            params[3] = ip[3];
+         }
+      }
+   }
+}
+
+void GLAPIENTRY
+_mesa_GetTexParameterfv(GLenum target, GLenum pname, GLfloat *params)
+{
+   struct gl_texture_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = get_texobj_by_target(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
+
+   get_tex_parameterfv(ctx, obj, pname, params, false);
 }
 
+void GLAPIENTRY
+_mesa_GetTexParameteriv(GLenum target, GLenum pname, GLint *params)
+{
+   struct gl_texture_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = get_texobj_by_target(ctx, target, GL_TRUE);
+   if (!obj)
+      return;
+
+   get_tex_parameteriv(ctx, obj, pname, params, false);
+}
 
 /** New in GL 3.0 */
 void GLAPIENTRY
@@ -1829,17 +2199,11 @@ _mesa_GetTexParameterIiv(GLenum target, GLenum pname, GLint *params)
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   texObj = get_texobj(ctx, target, GL_TRUE);
+   texObj = get_texobj_by_target(ctx, target, GL_TRUE);
    if (!texObj)
       return;
 
-   switch (pname) {
-   case GL_TEXTURE_BORDER_COLOR:
-      COPY_4V(params, texObj->Sampler.BorderColor.i);
-      break;
-   default:
-      _mesa_GetTexParameteriv(target, pname, params);
-   }
+   get_tex_parameterIiv(ctx, texObj, pname, params, false);
 }
 
 
@@ -1850,25 +2214,79 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)
    struct gl_texture_object *texObj;
    GET_CURRENT_CONTEXT(ctx);
 
-   texObj = get_texobj(ctx, target, GL_TRUE);
+   texObj = get_texobj_by_target(ctx, target, GL_TRUE);
    if (!texObj)
       return;
 
-   switch (pname) {
-   case GL_TEXTURE_BORDER_COLOR:
-      COPY_4V(params, texObj->Sampler.BorderColor.i);
-      break;
-   default:
-      {
-         GLint ip[4];
-         _mesa_GetTexParameteriv(target, pname, ip);
-         params[0] = ip[0];
-         if (pname == GL_TEXTURE_SWIZZLE_RGBA_EXT ||
-             pname == GL_TEXTURE_CROP_RECT_OES) {
-            params[1] = ip[1];
-            params[2] = ip[2];
-            params[3] = ip[3];
-         }
-      }
+   get_tex_parameterIuiv(ctx, texObj, pname, params, false);
+}
+
+
+void GLAPIENTRY
+_mesa_GetTextureParameterfv(GLuint texture, GLenum pname, GLfloat *params)
+{
+   struct gl_texture_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = get_texobj_by_name(ctx, texture, GL_TRUE);
+   if (!obj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTextureParameterfv(texture)");
+      return;
+   }
+
+   get_tex_parameterfv(ctx, obj, pname, params, true);
+}
+
+void GLAPIENTRY
+_mesa_GetTextureParameteriv(GLuint texture, GLenum pname, GLint *params)
+{
+   struct gl_texture_object *obj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   obj = get_texobj_by_name(ctx, texture, GL_TRUE);
+   if (!obj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTextureParameteriv(texture)");
+      return;
+   }
+
+   get_tex_parameteriv(ctx, obj, pname, params, true);
+}
+
+void GLAPIENTRY
+_mesa_GetTextureParameterIiv(GLuint texture, GLenum pname, GLint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_TRUE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTextureParameterIiv(texture)");
+      return;
    }
+
+   get_tex_parameterIiv(ctx, texObj, pname, params, true);
+}
+
+
+void GLAPIENTRY
+_mesa_GetTextureParameterIuiv(GLuint texture, GLenum pname, GLuint *params)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = get_texobj_by_name(ctx, texture, GL_TRUE);
+   if (!texObj) {
+      /* User passed a non-generated name. */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTextureParameterIuiv(texture)");
+      return;
+   }
+
+   get_tex_parameterIuiv(ctx, texObj, pname, params, true);
 }
diff --git a/mesalib/src/mesa/main/texparam.h b/mesalib/src/mesa/main/texparam.h
index 557a7bcb4..96defbec2 100644
--- a/mesalib/src/mesa/main/texparam.h
+++ b/mesalib/src/mesa/main/texparam.h
@@ -29,6 +29,49 @@
 
 #include "main/glheader.h"
 
+/**
+ * \name Internal functions
+ */
+/*@{*/
+
+extern void
+_mesa_texture_parameterf(struct gl_context *ctx,
+                         struct gl_texture_object *texObj,
+                         GLenum pname, GLfloat param, bool dsa);
+
+extern void
+_mesa_texture_parameterfv(struct gl_context *ctx,
+                          struct gl_texture_object *texObj,
+                          GLenum pname, const GLfloat *params, bool dsa);
+
+
+extern void
+_mesa_texture_parameteri(struct gl_context *ctx,
+                         struct gl_texture_object *texObj,
+                         GLenum pname, GLint param, bool dsa);
+
+extern void
+_mesa_texture_parameteriv(struct gl_context *ctx,
+                          struct gl_texture_object *texObj,
+                          GLenum pname, const GLint *params, bool dsa);
+
+extern void
+_mesa_texture_parameterIiv(struct gl_context *ctx,
+                           struct gl_texture_object *texObj,
+                           GLenum pname, const GLint *params, bool dsa);
+
+extern void
+_mesa_texture_parameterIuiv(struct gl_context *ctx,
+                            struct gl_texture_object *texObj,
+                            GLenum pname, const GLuint *params, bool dsa);
+
+/*@}*/
+
+/**
+ * \name API functions
+ */
+/*@{*/
+
 
 extern void GLAPIENTRY
 _mesa_GetTexLevelParameterfv( GLenum target, GLint level,
@@ -39,6 +82,15 @@ _mesa_GetTexLevelParameteriv( GLenum target, GLint level,
                               GLenum pname, GLint *params );
 
 extern void GLAPIENTRY
+_mesa_GetTextureLevelParameterfv(GLuint texture, GLint level,
+                                 GLenum pname, GLfloat *params);
+
+extern void GLAPIENTRY
+_mesa_GetTextureLevelParameteriv(GLuint texture, GLint level,
+                                 GLenum pname, GLint *params);
+
+
+extern void GLAPIENTRY
 _mesa_GetTexParameterfv( GLenum target, GLenum pname, GLfloat *params );
 
 extern void GLAPIENTRY
@@ -52,24 +104,52 @@ _mesa_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params);
 
 
 extern void GLAPIENTRY
+_mesa_GetTextureParameterfv(GLuint texture, GLenum pname, GLfloat *params);
+
+extern void GLAPIENTRY
+_mesa_GetTextureParameteriv(GLuint texture, GLenum pname, GLint *params);
+
+extern void GLAPIENTRY
+_mesa_GetTextureParameterIiv(GLuint texture, GLenum pname, GLint *params);
+
+extern void GLAPIENTRY
+_mesa_GetTextureParameterIuiv(GLuint texture, GLenum pname, GLuint *params);
+
+
+extern void GLAPIENTRY
 _mesa_TexParameterfv( GLenum target, GLenum pname, const GLfloat *params );
 
 extern void GLAPIENTRY
 _mesa_TexParameterf( GLenum target, GLenum pname, GLfloat param );
 
-
 extern void GLAPIENTRY
 _mesa_TexParameteri( GLenum target, GLenum pname, GLint param );
 
 extern void GLAPIENTRY
 _mesa_TexParameteriv( GLenum target, GLenum pname, const GLint *params );
 
-
 extern void GLAPIENTRY
 _mesa_TexParameterIiv(GLenum target, GLenum pname, const GLint *params);
 
 extern void GLAPIENTRY
 _mesa_TexParameterIuiv(GLenum target, GLenum pname, const GLuint *params);
 
+extern void GLAPIENTRY
+_mesa_TextureParameterfv(GLuint texture, GLenum pname, const GLfloat *params);
+
+extern void GLAPIENTRY
+_mesa_TextureParameterf(GLuint texture, GLenum pname, GLfloat param);
+
+extern void GLAPIENTRY
+_mesa_TextureParameteri(GLuint texture, GLenum pname, GLint param);
+
+extern void GLAPIENTRY
+_mesa_TextureParameteriv(GLuint texture, GLenum pname, const GLint *params);
+
+extern void GLAPIENTRY
+_mesa_TextureParameterIiv(GLuint texture, GLenum pname, const GLint *params);
+
+extern void GLAPIENTRY
+_mesa_TextureParameterIuiv(GLuint texture, GLenum pname, const GLuint *params);
 
 #endif /* TEXPARAM_H */
diff --git a/mesalib/src/mesa/main/texstate.c b/mesalib/src/mesa/main/texstate.c
index e0f085218..99c7c8178 100644
--- a/mesalib/src/mesa/main/texstate.c
+++ b/mesalib/src/mesa/main/texstate.c
@@ -22,7 +22,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-/** 
+/**
  * \file texstate.c
  *
  * Texture state handling.
@@ -40,7 +40,7 @@
 #include "teximage.h"
 #include "texstate.h"
 #include "mtypes.h"
-#include "bitset.h"
+#include "util/bitset.h"
 
 
 /**
@@ -153,7 +153,7 @@ _mesa_print_texunit_state( struct gl_context *ctx, GLuint unit )
 /**
  * Convert "classic" texture environment to ARB_texture_env_combine style
  * environments.
- * 
+ *
  * \param state  texture_env_combine state vector to be filled-in.
  * \param mode   Classic texture environment mode (i.e., \c GL_REPLACE,
  *               \c GL_BLEND, \c GL_DECAL, etc.).
@@ -186,7 +186,7 @@ calculate_derived_texenv( struct gl_tex_env_combine_state *state,
    case GL_YCBCR_MESA:
       state->SourceA[0] = GL_PREVIOUS;
       break;
-      
+
    default:
       _mesa_problem(NULL,
                     "Invalid texBaseFormat 0x%x in calculate_derived_texenv",
@@ -203,7 +203,7 @@ calculate_derived_texenv( struct gl_tex_env_combine_state *state,
       mode_rgb = (texBaseFormat == GL_ALPHA) ? GL_REPLACE : mode;
       mode_a   = mode;
       break;
-   
+
    case GL_DECAL:
       mode_rgb = GL_INTERPOLATE;
       mode_a   = GL_REPLACE;
@@ -272,7 +272,7 @@ calculate_derived_texenv( struct gl_tex_env_combine_state *state,
                     mode);
       return;
    }
-   
+
    state->ModeRGB = (state->SourceRGB[0] != GL_PREVIOUS)
        ? mode_rgb : GL_REPLACE;
    state->ModeA   = (state->SourceA[0]   != GL_PREVIOUS)
@@ -290,9 +290,7 @@ _mesa_ActiveTexture(GLenum texture)
    GLuint k;
    GET_CURRENT_CONTEXT(ctx);
 
-   /* See OpenGL spec for glActiveTexture: */
-   k = MAX2(ctx->Const.MaxCombinedTextureImageUnits,
-            ctx->Const.MaxTextureCoordUnits);
+   k = _mesa_max_tex_unit(ctx);
 
    ASSERT(k <= Elements(ctx->Texture.Unit));
 
@@ -769,11 +767,11 @@ _mesa_update_texture( struct gl_context *ctx, GLuint new_state )
 
 /**
  * Allocate the proxy textures for the given context.
- * 
+ *
  * \param ctx the context to allocate proxies for.
- * 
+ *
  * \return GL_TRUE on success, or GL_FALSE on failure
- * 
+ *
  * If run out of memory part way through the allocations, clean up and return
  * GL_FALSE.
  */
@@ -944,7 +942,7 @@ _mesa_free_texture_data(struct gl_context *ctx)
 
 /**
  * Update the default texture objects in the given context to reference those
- * specified in the shared state and release those referencing the old 
+ * specified in the shared state and release those referencing the old
  * shared state.
  */
 void
diff --git a/mesalib/src/mesa/main/texstate.h b/mesalib/src/mesa/main/texstate.h
index 5cd1684f2..abc07eafb 100644
--- a/mesalib/src/mesa/main/texstate.h
+++ b/mesalib/src/mesa/main/texstate.h
@@ -33,9 +33,18 @@
 
 
 #include "compiler.h"
+#include "enums.h"
+#include "macros.h"
 #include "mtypes.h"
 
 
+static inline struct gl_texture_unit *
+_mesa_get_tex_unit(struct gl_context *ctx, GLuint unit)
+{
+   ASSERT(unit < Elements(ctx->Texture.Unit));
+   return &(ctx->Texture.Unit[unit]);
+}
+
 /**
  * Return pointer to current texture unit.
  * This the texture unit set by glActiveTexture(), not glClientActiveTexture().
@@ -43,8 +52,33 @@
 static inline struct gl_texture_unit *
 _mesa_get_current_tex_unit(struct gl_context *ctx)
 {
-   ASSERT(ctx->Texture.CurrentUnit < Elements(ctx->Texture.Unit));
-   return &(ctx->Texture.Unit[ctx->Texture.CurrentUnit]);
+   return _mesa_get_tex_unit(ctx, ctx->Texture.CurrentUnit);
+}
+
+static inline GLuint
+_mesa_max_tex_unit(struct gl_context *ctx)
+{
+   /* See OpenGL spec for glActiveTexture: */
+   return MAX2(ctx->Const.MaxCombinedTextureImageUnits,
+               ctx->Const.MaxTextureCoordUnits);
+}
+
+static inline struct gl_texture_unit *
+_mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func)
+{
+   if (unit < _mesa_max_tex_unit(ctx))
+      return _mesa_get_tex_unit(ctx, unit);
+
+   /* Note: This error is a precedent set by glBindTextures. From the GL 4.5
+    * specification (30.10.2014) Section 8.1 ("Texture Objects"):
+    *
+    *    "An INVALID_OPERATION error is generated if first + count is greater
+    *     than the number of texture image units supported by the
+    *     implementation."
+    */
+   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
+               _mesa_lookup_enum_by_nr(GL_TEXTURE0+unit));
+   return NULL;
 }
 
 
diff --git a/mesalib/src/mesa/main/texstorage.c b/mesalib/src/mesa/main/texstorage.c
index 897d5891a..3ace5e8bb 100644
--- a/mesalib/src/mesa/main/texstorage.c
+++ b/mesalib/src/mesa/main/texstorage.c
@@ -42,7 +42,7 @@
 #include "textureview.h"
 #include "mtypes.h"
 #include "glformats.h"
-
+#include "hash.h"
 
 
 /**
@@ -242,10 +242,10 @@ _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
  * checks at glTexImage* time.
  */
 GLboolean
-_mesa_alloc_texture_storage(struct gl_context *ctx,
-                            struct gl_texture_object *texObj,
-                            GLsizei levels, GLsizei width,
-                            GLsizei height, GLsizei depth)
+_mesa_AllocTextureStorage_sw(struct gl_context *ctx,
+                             struct gl_texture_object *texObj,
+                             GLsizei levels, GLsizei width,
+                             GLsizei height, GLsizei depth)
 {
    const int numFaces = _mesa_num_tex_faces(texObj->Target);
    int face;
@@ -274,34 +274,26 @@ _mesa_alloc_texture_storage(struct gl_context *ctx,
  * \return GL_TRUE if any error, GL_FALSE otherwise.
  */
 static GLboolean
-tex_storage_error_check(struct gl_context *ctx, GLuint dims, GLenum target,
+tex_storage_error_check(struct gl_context *ctx,
+                        struct gl_texture_object *texObj,
+                        GLuint dims, GLenum target,
                         GLsizei levels, GLenum internalformat,
-                        GLsizei width, GLsizei height, GLsizei depth)
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        bool dsa)
 {
-   struct gl_texture_object *texObj;
+   const char* suffix = dsa ? "ture" : "";
 
-   if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glTexStorage%uD(internalformat = %s)", dims,
-                  _mesa_lookup_enum_by_nr(internalformat));
-      return GL_TRUE;
-   }
+   /* Legal format checking has been moved to texstorage and texturestorage in
+    * order to allow meta functions to use legacy formats. */
 
    /* size check */
    if (width < 1 || height < 1 || depth < 1) {
       _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glTexStorage%uD(width, height or depth < 1)", dims);
+                  "glTex%sStorage%uD(width, height or depth < 1)",
+                  suffix, dims);
       return GL_TRUE;
    }  
 
-   /* target check */
-   if (!legal_texobj_target(ctx, dims, target)) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glTexStorage%uD(illegal target=%s)",
-                  dims, _mesa_lookup_enum_by_nr(target));
-      return GL_TRUE;
-   }
-
    /* From section 3.8.6, page 146 of OpenGL ES 3.0 spec:
     *
     *    "The ETC2/EAC texture compression algorithm supports only
@@ -315,50 +307,54 @@ tex_storage_error_check(struct gl_context *ctx, GLuint dims, GLenum target,
        && !_mesa_target_can_be_compressed(ctx, target, internalformat)) {
       _mesa_error(ctx, _mesa_is_desktop_gl(ctx)?
                   GL_INVALID_ENUM : GL_INVALID_OPERATION,
-                  "glTexStorage3D(internalformat = %s)",
+                  "glTex%sStorage%dD(internalformat = %s)", suffix, dims,
                   _mesa_lookup_enum_by_nr(internalformat));
    }
 
    /* levels check */
    if (levels < 1) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "glTexStorage%uD(levels < 1)",
-                  dims);
+      _mesa_error(ctx, GL_INVALID_VALUE, "glTex%sStorage%uD(levels < 1)",
+                  suffix, dims);
       return GL_TRUE;
    }  
 
    /* check levels against maximum (note different error than above) */
    if (levels > (GLint) _mesa_max_texture_levels(ctx, target)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glTexStorage%uD(levels too large)", dims);
+                  "glTex%sStorage%uD(levels too large)",
+                  suffix, dims);
       return GL_TRUE;
    }
 
    /* check levels against width/height/depth */
    if (levels > _mesa_get_tex_max_num_levels(target, width, height, depth)) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glTexStorage%uD(too many levels for max texture dimension)",
-                  dims);
+                  "glTex%sStorage%uD(too many levels"
+                  " for max texture dimension)",
+                  suffix, dims);
       return GL_TRUE;
    }
 
    /* non-default texture object check */
-   texObj = _mesa_get_current_tex_object(ctx, target);
    if (!_mesa_is_proxy_texture(target) && (!texObj || (texObj->Name == 0))) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glTexStorage%uD(texture object 0)", dims);
+                  "glTex%sStorage%uD(texture object 0)",
+                  suffix, dims);
       return GL_TRUE;
    }
 
    /* Check if texObj->Immutable is set */
    if (!_mesa_is_proxy_texture(target) && texObj->Immutable) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "glTexStorage%uD(immutable)",
-                  dims);
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glTex%sStorage%uD(immutable)",
+                  suffix, dims);
       return GL_TRUE;
    }
 
    /* additional checks for depth textures */
    if (!_mesa_legal_texture_base_format_for_target(ctx, target, internalformat,
-                                                   dims, "glTexStorage"))
+                                                   dims, dsa ?
+                                                   "glTextureStorage" :
+                                                   "glTexStorage"))
       return GL_TRUE;
 
    return GL_FALSE;
@@ -366,32 +362,27 @@ tex_storage_error_check(struct gl_context *ctx, GLuint dims, GLenum target,
 
 
 /**
- * Helper used by _mesa_TexStorage1/2/3D().
+ * Helper that does the storage allocation for _mesa_TexStorage1/2/3D()
+ * and _mesa_TextureStorage1/2/3D().
  */
-static void
-texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
-           GLsizei width, GLsizei height, GLsizei depth)
+void
+_mesa_texture_storage(struct gl_context *ctx, GLuint dims,
+                      struct gl_texture_object *texObj,
+                      GLenum target, GLsizei levels,
+                      GLenum internalformat, GLsizei width,
+                      GLsizei height, GLsizei depth, bool dsa)
 {
-   struct gl_texture_object *texObj;
    GLboolean sizeOK, dimensionsOK;
    mesa_format texFormat;
+   const char* suffix = dsa ? "ture" : "";
 
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
-      _mesa_debug(ctx, "glTexStorage%uD %s %d %s %d %d %d\n",
-                  dims,
-                  _mesa_lookup_enum_by_nr(target), levels,
-                  _mesa_lookup_enum_by_nr(internalformat),
-                  width, height, depth);
+   assert(texObj);
 
-   if (tex_storage_error_check(ctx, dims, target, levels,
-                               internalformat, width, height, depth)) {
+   if (tex_storage_error_check(ctx, texObj, dims, target, levels,
+                               internalformat, width, height, depth, dsa)) {
       return; /* error was recorded */
    }
 
-   texObj = _mesa_get_current_tex_object(ctx, target);
-   assert(texObj);
 
    texFormat = _mesa_choose_texture_format(ctx, texObj, target, 0,
                                            internalformat, GL_NONE, GL_NONE);
@@ -404,7 +395,7 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
    sizeOK = ctx->Driver.TestProxyTexImage(ctx, target, 0, texFormat,
                                           width, height, depth, 0);
 
-   if (_mesa_is_proxy_texture(texObj->Target)) {
+   if (_mesa_is_proxy_texture(target)) {
       if (dimensionsOK && sizeOK) {
          initialize_texture_fields(ctx, texObj, levels, width, height, depth,
                                    internalformat, texFormat);
@@ -417,13 +408,15 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
    else {
       if (!dimensionsOK) {
          _mesa_error(ctx, GL_INVALID_VALUE,
-                     "glTexStorage%uD(invalid width, height or depth)", dims);
+                     "glTex%sStorage%uD(invalid width, height or depth)",
+                     suffix, dims);
          return;
       }
 
       if (!sizeOK) {
          _mesa_error(ctx, GL_OUT_OF_MEMORY,
-                     "glTexStorage%uD(texture too large)", dims);
+                     "glTex%sStorage%uD(texture too large)",
+                     suffix, dims);
       }
 
       assert(levels > 0);
@@ -445,7 +438,8 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
           * state but this puts things in a consistent state.
           */
          clear_texture_fields(ctx, texObj);
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexStorage%uD", dims);
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex%sStorage%uD",
+                     suffix, dims);
          return;
       }
 
@@ -454,6 +448,94 @@ texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
    }
 }
 
+/**
+ * Helper used by _mesa_TexStorage1/2/3D().
+ */
+static void
+texstorage(GLuint dims, GLenum target, GLsizei levels, GLenum internalformat,
+           GLsizei width, GLsizei height, GLsizei depth)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* target check */
+   /* This is done here so that _mesa_texture_storage can receive unsized
+    * formats. */
+   if (!legal_texobj_target(ctx, dims, target)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glTexStorage%uD(illegal target=%s)",
+                  dims, _mesa_lookup_enum_by_nr(target));
+      return;
+   }
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx, "glTexStorage%uD %s %d %s %d %d %d\n",
+                  dims,
+                  _mesa_lookup_enum_by_nr(target), levels,
+                  _mesa_lookup_enum_by_nr(internalformat),
+                  width, height, depth);
+   /* Check the format to make sure it is sized. */
+   if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glTexStorage%uD(internalformat = %s)", dims,
+                  _mesa_lookup_enum_by_nr(internalformat));
+      return;
+   }
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+      return;
+
+   _mesa_texture_storage(ctx, dims, texObj, target, levels,
+                         internalformat, width, height, depth, false);
+}
+
+/**
+ * Helper used by _mesa_TextureStorage1/2/3D().
+ */
+static void
+texturestorage(GLuint dims, GLuint texture, GLsizei levels,
+               GLenum internalformat, GLsizei width, GLsizei height,
+               GLsizei depth)
+{
+   struct gl_texture_object *texObj;
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
+      _mesa_debug(ctx, "glTextureStorage%uD %d %d %s %d %d %d\n",
+                  dims, texture, levels,
+                  _mesa_lookup_enum_by_nr(internalformat),
+                  width, height, depth);
+
+   /* Check the format to make sure it is sized. */
+   if (!_mesa_is_legal_tex_storage_format(ctx, internalformat)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glTextureStorage%uD(internalformat = %s)", dims,
+                  _mesa_lookup_enum_by_nr(internalformat));
+      return;
+   }
+
+   /* Get the texture object by Name. */
+   texObj = _mesa_lookup_texture(ctx, texture);
+   if (!texObj) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glTextureStorage%uD(texture = %d)", dims, texture);
+      return;
+   }
+
+   /* target check */
+   /* This is done here so that _mesa_texture_storage can receive unsized
+    * formats. */
+   if (!legal_texobj_target(ctx, dims, texObj->Target)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glTextureStorage%uD(illegal target=%s)",
+                  dims, _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
+   _mesa_texture_storage(ctx, dims, texObj, texObj->Target,
+                         levels, internalformat, width, height, depth, true);
+}
 
 void GLAPIENTRY
 _mesa_TexStorage1D(GLenum target, GLsizei levels, GLenum internalformat,
@@ -478,6 +560,28 @@ _mesa_TexStorage3D(GLenum target, GLsizei levels, GLenum internalformat,
    texstorage(3, target, levels, internalformat, width, height, depth);
 }
 
+void GLAPIENTRY
+_mesa_TextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat,
+                       GLsizei width)
+{
+   texturestorage(1, texture, levels, internalformat, width, 1, 1);
+}
+
+
+void GLAPIENTRY
+_mesa_TextureStorage2D(GLuint texture, GLsizei levels,
+                       GLenum internalformat,
+                       GLsizei width, GLsizei height)
+{
+   texturestorage(2, texture, levels, internalformat, width, height, 1);
+}
+
+void GLAPIENTRY
+_mesa_TextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat,
+                       GLsizei width, GLsizei height, GLsizei depth)
+{
+   texturestorage(3, texture, levels, internalformat, width, height, depth);
+}
 
 
 /*
diff --git a/mesalib/src/mesa/main/texstorage.h b/mesalib/src/mesa/main/texstorage.h
index ec4f71374..6f5495f38 100644
--- a/mesalib/src/mesa/main/texstorage.h
+++ b/mesalib/src/mesa/main/texstorage.h
@@ -26,6 +26,24 @@
 #ifndef TEXSTORAGE_H
 #define TEXSTORAGE_H
 
+/**
+ * \name Internal functions
+ */
+/*@{*/
+
+extern void
+_mesa_texture_storage(struct gl_context *ctx, GLuint dims,
+                      struct gl_texture_object *texObj,
+                      GLenum target, GLsizei levels,
+                      GLenum internalformat, GLsizei width,
+                      GLsizei height, GLsizei depth, bool dsa);
+
+/*@}*/
+
+/**
+ * \name API functions
+ */
+/*@{*/
 
 extern void GLAPIENTRY
 _mesa_TexStorage1D(GLenum target, GLsizei levels, GLenum internalformat,
@@ -41,6 +59,19 @@ extern void GLAPIENTRY
 _mesa_TexStorage3D(GLenum target, GLsizei levels, GLenum internalformat,
                    GLsizei width, GLsizei height, GLsizei depth);
 
+extern void GLAPIENTRY
+_mesa_TextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat,
+                       GLsizei width);
+
+
+extern void GLAPIENTRY
+_mesa_TextureStorage2D(GLuint texture, GLsizei levels, GLenum internalformat,
+                       GLsizei width, GLsizei height);
+
+
+extern void GLAPIENTRY
+_mesa_TextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat,
+                       GLsizei width, GLsizei height, GLsizei depth);
 
 
 extern void GLAPIENTRY
@@ -62,9 +93,9 @@ extern GLboolean
 _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat);
 
 extern GLboolean
-_mesa_alloc_texture_storage(struct gl_context *ctx,
-                            struct gl_texture_object *texObj,
-                            GLsizei levels, GLsizei width,
-                            GLsizei height, GLsizei depth);
+_mesa_AllocTextureStorage_sw(struct gl_context *ctx,
+                             struct gl_texture_object *texObj,
+                             GLsizei levels, GLsizei width,
+                             GLsizei height, GLsizei depth);
 
 #endif /* TEXSTORAGE_H */
diff --git a/mesalib/src/mesa/main/texstore.c b/mesalib/src/mesa/main/texstore.c
index 50aa1fd5e..7039cdf81 100644
--- a/mesalib/src/mesa/main/texstore.c
+++ b/mesalib/src/mesa/main/texstore.c
@@ -73,6 +73,7 @@
 #include "texstore.h"
 #include "enums.h"
 #include "glformats.h"
+#include "pixeltransfer.h"
 #include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 #include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
 
@@ -87,577 +88,6 @@ enum {
  * Texture image storage function.
  */
 typedef GLboolean (*StoreTexImageFunc)(TEXSTORE_PARAMS);
-
-
-enum {
-   IDX_LUMINANCE = 0,
-   IDX_ALPHA,
-   IDX_INTENSITY,
-   IDX_LUMINANCE_ALPHA,
-   IDX_RGB,
-   IDX_RGBA,
-   IDX_RED,
-   IDX_GREEN,
-   IDX_BLUE,
-   IDX_BGR,
-   IDX_BGRA,
-   IDX_ABGR,
-   IDX_RG,
-   MAX_IDX
-};
-
-#define MAP1(x)       MAP4(x, ZERO, ZERO, ZERO)
-#define MAP2(x,y)     MAP4(x, y, ZERO, ZERO)
-#define MAP3(x,y,z)   MAP4(x, y, z, ZERO)
-#define MAP4(x,y,z,w) { x, y, z, w, ZERO, ONE }
-
-
-static const struct {
-   GLubyte format_idx;
-   GLubyte to_rgba[6];
-   GLubyte from_rgba[6];
-} mappings[MAX_IDX] = 
-{
-   {
-      IDX_LUMINANCE,
-      MAP4(0,0,0,ONE),
-      MAP1(0)
-   },
-
-   {
-      IDX_ALPHA,
-      MAP4(ZERO, ZERO, ZERO, 0),
-      MAP1(3)
-   },
-
-   {
-      IDX_INTENSITY,
-      MAP4(0, 0, 0, 0),
-      MAP1(0),
-   },
-
-   {
-      IDX_LUMINANCE_ALPHA,
-      MAP4(0,0,0,1),
-      MAP2(0,3)
-   },
-
-   {
-      IDX_RGB,
-      MAP4(0,1,2,ONE),
-      MAP3(0,1,2)
-   },
-
-   {
-      IDX_RGBA,
-      MAP4(0,1,2,3),
-      MAP4(0,1,2,3),
-   },
-
-   {
-      IDX_RED,
-      MAP4(0, ZERO, ZERO, ONE),
-      MAP1(0),
-   },
-
-   {
-      IDX_GREEN,
-      MAP4(ZERO, 0, ZERO, ONE),
-      MAP1(1),
-   },
-
-   {
-      IDX_BLUE,
-      MAP4(ZERO, ZERO, 0, ONE),
-      MAP1(2),
-   },
-
-   {
-      IDX_BGR,
-      MAP4(2,1,0,ONE),
-      MAP3(2,1,0)
-   },
-
-   {
-      IDX_BGRA,
-      MAP4(2,1,0,3),
-      MAP4(2,1,0,3)
-   },
-
-   {
-      IDX_ABGR,
-      MAP4(3,2,1,0),
-      MAP4(3,2,1,0)
-   },
-
-   {
-      IDX_RG,
-      MAP4(0, 1, ZERO, ONE),
-      MAP2(0, 1)
-   },
-};
-
-
-
-/**
- * Convert a GL image format enum to an IDX_* value (see above).
- */
-static int
-get_map_idx(GLenum value)
-{
-   switch (value) {
-   case GL_LUMINANCE:
-   case GL_LUMINANCE_INTEGER_EXT:
-      return IDX_LUMINANCE;
-   case GL_ALPHA:
-   case GL_ALPHA_INTEGER:
-      return IDX_ALPHA;
-   case GL_INTENSITY:
-      return IDX_INTENSITY;
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
-      return IDX_LUMINANCE_ALPHA;
-   case GL_RGB:
-   case GL_RGB_INTEGER:
-      return IDX_RGB;
-   case GL_RGBA:
-   case GL_RGBA_INTEGER:
-      return IDX_RGBA;
-   case GL_RED:
-   case GL_RED_INTEGER:
-      return IDX_RED;
-   case GL_GREEN:
-      return IDX_GREEN;
-   case GL_BLUE:
-      return IDX_BLUE;
-   case GL_BGR:
-   case GL_BGR_INTEGER:
-      return IDX_BGR;
-   case GL_BGRA:
-   case GL_BGRA_INTEGER:
-      return IDX_BGRA;
-   case GL_ABGR_EXT:
-      return IDX_ABGR;
-   case GL_RG:
-   case GL_RG_INTEGER:
-      return IDX_RG;
-   default:
-      _mesa_problem(NULL, "Unexpected inFormat %s",
-                    _mesa_lookup_enum_by_nr(value));
-      return 0;
-   }
-}   
-
-
-/**
- * When promoting texture formats (see below) we need to compute the
- * mapping of dest components back to source components.
- * This function does that.
- * \param inFormat  the incoming format of the texture
- * \param outFormat  the final texture format
- * \return map[6]  a full 6-component map
- */
-static void
-compute_component_mapping(GLenum inFormat, GLenum outFormat, 
-			  GLubyte *map)
-{
-   const int inFmt = get_map_idx(inFormat);
-   const int outFmt = get_map_idx(outFormat);
-   const GLubyte *in2rgba = mappings[inFmt].to_rgba;
-   const GLubyte *rgba2out = mappings[outFmt].from_rgba;
-   int i;
-   
-   for (i = 0; i < 4; i++)
-      map[i] = in2rgba[rgba2out[i]];
-
-   map[ZERO] = ZERO;
-   map[ONE] = ONE;   
-
-#if 0
-   printf("from %x/%s to %x/%s map %d %d %d %d %d %d\n",
-	  inFormat, _mesa_lookup_enum_by_nr(inFormat),
-	  outFormat, _mesa_lookup_enum_by_nr(outFormat),
-	  map[0], 
-	  map[1], 
-	  map[2], 
-	  map[3], 
-	  map[4], 
-	  map[5]); 
-#endif
-}
-
-
-/**
- * Make a temporary (color) texture image with GLfloat components.
- * Apply all needed pixel unpacking and pixel transfer operations.
- * Note that there are both logicalBaseFormat and textureBaseFormat parameters.
- * Suppose the user specifies GL_LUMINANCE as the internal texture format
- * but the graphics hardware doesn't support luminance textures.  So, we might
- * use an RGB hardware format instead.
- * If logicalBaseFormat != textureBaseFormat we have some extra work to do.
- *
- * \param ctx  the rendering context
- * \param dims  image dimensions: 1, 2 or 3
- * \param logicalBaseFormat  basic texture derived from the user's
- *    internal texture format value
- * \param textureBaseFormat  the actual basic format of the texture
- * \param srcWidth  source image width
- * \param srcHeight  source image height
- * \param srcDepth  source image depth
- * \param srcFormat  source image format
- * \param srcType  source image type
- * \param srcAddr  source image address
- * \param srcPacking  source image pixel packing
- * \return resulting image with format = textureBaseFormat and type = GLfloat.
- */
-GLfloat *
-_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
-			    GLenum logicalBaseFormat,
-			    GLenum textureBaseFormat,
-			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
-			    GLenum srcFormat, GLenum srcType,
-			    const GLvoid *srcAddr,
-			    const struct gl_pixelstore_attrib *srcPacking,
-			    GLbitfield transferOps)
-{
-   GLfloat *tempImage;
-   const GLint components = _mesa_components_in_format(logicalBaseFormat);
-   const GLint srcStride =
-      _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-   GLfloat *dst;
-   GLint img, row;
-
-   ASSERT(dims >= 1 && dims <= 3);
-
-   ASSERT(logicalBaseFormat == GL_RGBA ||
-          logicalBaseFormat == GL_RGB ||
-          logicalBaseFormat == GL_RG ||
-          logicalBaseFormat == GL_RED ||
-          logicalBaseFormat == GL_LUMINANCE_ALPHA ||
-          logicalBaseFormat == GL_LUMINANCE ||
-          logicalBaseFormat == GL_ALPHA ||
-          logicalBaseFormat == GL_INTENSITY ||
-          logicalBaseFormat == GL_DEPTH_COMPONENT);
-
-   ASSERT(textureBaseFormat == GL_RGBA ||
-          textureBaseFormat == GL_RGB ||
-          textureBaseFormat == GL_RG ||
-          textureBaseFormat == GL_RED ||
-          textureBaseFormat == GL_LUMINANCE_ALPHA ||
-          textureBaseFormat == GL_LUMINANCE ||
-          textureBaseFormat == GL_ALPHA ||
-          textureBaseFormat == GL_INTENSITY ||
-          textureBaseFormat == GL_DEPTH_COMPONENT);
-
-   tempImage = malloc(srcWidth * srcHeight * srcDepth
-				  * components * sizeof(GLfloat));
-   if (!tempImage)
-      return NULL;
-
-   dst = tempImage;
-   for (img = 0; img < srcDepth; img++) {
-      const GLubyte *src
-	 = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr,
-						 srcWidth, srcHeight,
-						 srcFormat, srcType,
-						 img, 0, 0);
-      for (row = 0; row < srcHeight; row++) {
-	 _mesa_unpack_color_span_float(ctx, srcWidth, logicalBaseFormat,
-				       dst, srcFormat, srcType, src,
-				       srcPacking, transferOps);
-	 dst += srcWidth * components;
-	 src += srcStride;
-      }
-   }
-
-   if (logicalBaseFormat != textureBaseFormat) {
-      /* more work */
-      GLint texComponents = _mesa_components_in_format(textureBaseFormat);
-      GLint logComponents = _mesa_components_in_format(logicalBaseFormat);
-      GLfloat *newImage;
-      GLint i, n;
-      GLubyte map[6];
-
-      /* we only promote up to RGB, RGBA and LUMINANCE_ALPHA formats for now */
-      ASSERT(textureBaseFormat == GL_RGB || textureBaseFormat == GL_RGBA ||
-             textureBaseFormat == GL_LUMINANCE_ALPHA);
-
-      /* The actual texture format should have at least as many components
-       * as the logical texture format.
-       */
-      ASSERT(texComponents >= logComponents);
-
-      newImage = malloc(srcWidth * srcHeight * srcDepth
-                                          * texComponents * sizeof(GLfloat));
-      if (!newImage) {
-         free(tempImage);
-         return NULL;
-      }
-
-      compute_component_mapping(logicalBaseFormat, textureBaseFormat, map);
-
-      n = srcWidth * srcHeight * srcDepth;
-      for (i = 0; i < n; i++) {
-         GLint k;
-         for (k = 0; k < texComponents; k++) {
-            GLint j = map[k];
-            if (j == ZERO)
-               newImage[i * texComponents + k] = 0.0F;
-            else if (j == ONE)
-               newImage[i * texComponents + k] = 1.0F;
-            else
-               newImage[i * texComponents + k] = tempImage[i * logComponents + j];
-         }
-      }
-
-      free(tempImage);
-      tempImage = newImage;
-   }
-
-   return tempImage;
-}
-
-
-/**
- * Make temporary image with uint pixel values.  Used for unsigned
- * integer-valued textures.
- */
-static GLuint *
-make_temp_uint_image(struct gl_context *ctx, GLuint dims,
-                     GLenum logicalBaseFormat,
-                     GLenum textureBaseFormat,
-                     GLint srcWidth, GLint srcHeight, GLint srcDepth,
-                     GLenum srcFormat, GLenum srcType,
-                     const GLvoid *srcAddr,
-                     const struct gl_pixelstore_attrib *srcPacking)
-{
-   GLuint *tempImage;
-   const GLint components = _mesa_components_in_format(logicalBaseFormat);
-   const GLint srcStride =
-      _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-   GLuint *dst;
-   GLint img, row;
-
-   ASSERT(dims >= 1 && dims <= 3);
-
-   ASSERT(logicalBaseFormat == GL_RGBA ||
-          logicalBaseFormat == GL_RGB ||
-          logicalBaseFormat == GL_RG ||
-          logicalBaseFormat == GL_RED ||
-          logicalBaseFormat == GL_LUMINANCE_ALPHA ||
-          logicalBaseFormat == GL_LUMINANCE ||
-          logicalBaseFormat == GL_INTENSITY ||
-          logicalBaseFormat == GL_ALPHA);
-
-   ASSERT(textureBaseFormat == GL_RGBA ||
-          textureBaseFormat == GL_RGB ||
-          textureBaseFormat == GL_RG ||
-          textureBaseFormat == GL_RED ||
-          textureBaseFormat == GL_LUMINANCE_ALPHA ||
-          textureBaseFormat == GL_LUMINANCE ||
-          textureBaseFormat == GL_INTENSITY ||
-          textureBaseFormat == GL_ALPHA);
-
-   tempImage = malloc(srcWidth * srcHeight * srcDepth
-                                 * components * sizeof(GLuint));
-   if (!tempImage)
-      return NULL;
-
-   dst = tempImage;
-   for (img = 0; img < srcDepth; img++) {
-      const GLubyte *src
-	 = (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr,
-						 srcWidth, srcHeight,
-						 srcFormat, srcType,
-						 img, 0, 0);
-      for (row = 0; row < srcHeight; row++) {
-	 _mesa_unpack_color_span_uint(ctx, srcWidth, logicalBaseFormat,
-                                      dst, srcFormat, srcType, src,
-                                      srcPacking);
-	 dst += srcWidth * components;
-	 src += srcStride;
-      }
-   }
-
-   if (logicalBaseFormat != textureBaseFormat) {
-      /* more work */
-      GLint texComponents = _mesa_components_in_format(textureBaseFormat);
-      GLint logComponents = _mesa_components_in_format(logicalBaseFormat);
-      GLuint *newImage;
-      GLint i, n;
-      GLubyte map[6];
-
-      /* we only promote up to RGB, RGBA and LUMINANCE_ALPHA formats for now */
-      ASSERT(textureBaseFormat == GL_RGB || textureBaseFormat == GL_RGBA ||
-             textureBaseFormat == GL_LUMINANCE_ALPHA);
-
-      /* The actual texture format should have at least as many components
-       * as the logical texture format.
-       */
-      ASSERT(texComponents >= logComponents);
-
-      newImage = malloc(srcWidth * srcHeight * srcDepth
-                                   * texComponents * sizeof(GLuint));
-      if (!newImage) {
-         free(tempImage);
-         return NULL;
-      }
-
-      compute_component_mapping(logicalBaseFormat, textureBaseFormat, map);
-
-      n = srcWidth * srcHeight * srcDepth;
-      for (i = 0; i < n; i++) {
-         GLint k;
-         for (k = 0; k < texComponents; k++) {
-            GLint j = map[k];
-            if (j == ZERO)
-               newImage[i * texComponents + k] = 0;
-            else if (j == ONE)
-               newImage[i * texComponents + k] = 1;
-            else
-               newImage[i * texComponents + k] = tempImage[i * logComponents + j];
-         }
-      }
-
-      free(tempImage);
-      tempImage = newImage;
-   }
-
-   return tempImage;
-}
-
-
-
-/**
- * Make a temporary (color) texture image with GLubyte components.
- * Apply all needed pixel unpacking and pixel transfer operations.
- * Note that there are both logicalBaseFormat and textureBaseFormat parameters.
- * Suppose the user specifies GL_LUMINANCE as the internal texture format
- * but the graphics hardware doesn't support luminance textures.  So, we might
- * use an RGB hardware format instead.
- * If logicalBaseFormat != textureBaseFormat we have some extra work to do.
- *
- * \param ctx  the rendering context
- * \param dims  image dimensions: 1, 2 or 3
- * \param logicalBaseFormat  basic texture derived from the user's
- *    internal texture format value
- * \param textureBaseFormat  the actual basic format of the texture
- * \param srcWidth  source image width
- * \param srcHeight  source image height
- * \param srcDepth  source image depth
- * \param srcFormat  source image format
- * \param srcType  source image type
- * \param srcAddr  source image address
- * \param srcPacking  source image pixel packing
- * \return resulting image with format = textureBaseFormat and type = GLubyte.
- */
-GLubyte *
-_mesa_make_temp_ubyte_image(struct gl_context *ctx, GLuint dims,
-                            GLenum logicalBaseFormat,
-                            GLenum textureBaseFormat,
-                            GLint srcWidth, GLint srcHeight, GLint srcDepth,
-                            GLenum srcFormat, GLenum srcType,
-                            const GLvoid *srcAddr,
-                            const struct gl_pixelstore_attrib *srcPacking)
-{
-   GLuint transferOps = ctx->_ImageTransferState;
-   const GLint components = _mesa_components_in_format(logicalBaseFormat);
-   GLint img, row;
-   GLubyte *tempImage, *dst;
-
-   ASSERT(dims >= 1 && dims <= 3);
-
-   ASSERT(logicalBaseFormat == GL_RGBA ||
-          logicalBaseFormat == GL_RGB ||
-          logicalBaseFormat == GL_RG ||
-          logicalBaseFormat == GL_RED ||
-          logicalBaseFormat == GL_LUMINANCE_ALPHA ||
-          logicalBaseFormat == GL_LUMINANCE ||
-          logicalBaseFormat == GL_ALPHA ||
-          logicalBaseFormat == GL_INTENSITY);
-
-   ASSERT(textureBaseFormat == GL_RGBA ||
-          textureBaseFormat == GL_RGB ||
-          textureBaseFormat == GL_RG ||
-          textureBaseFormat == GL_RED ||
-          textureBaseFormat == GL_LUMINANCE_ALPHA ||
-          textureBaseFormat == GL_LUMINANCE ||
-          textureBaseFormat == GL_ALPHA ||
-          textureBaseFormat == GL_INTENSITY);
-
-   /* unpack and transfer the source image */
-   tempImage = malloc(srcWidth * srcHeight * srcDepth
-                                       * components * sizeof(GLubyte));
-   if (!tempImage) {
-      return NULL;
-   }
-
-   dst = tempImage;
-   for (img = 0; img < srcDepth; img++) {
-      const GLint srcStride =
-         _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-      const GLubyte *src =
-         (const GLubyte *) _mesa_image_address(dims, srcPacking, srcAddr,
-                                               srcWidth, srcHeight,
-                                               srcFormat, srcType,
-                                               img, 0, 0);
-      for (row = 0; row < srcHeight; row++) {
-         _mesa_unpack_color_span_ubyte(ctx, srcWidth, logicalBaseFormat, dst,
-                                       srcFormat, srcType, src, srcPacking,
-                                       transferOps);
-         dst += srcWidth * components;
-         src += srcStride;
-      }
-   }
-
-   if (logicalBaseFormat != textureBaseFormat) {
-      /* one more conversion step */
-      GLint texComponents = _mesa_components_in_format(textureBaseFormat);
-      GLint logComponents = _mesa_components_in_format(logicalBaseFormat);
-      GLubyte *newImage;
-      GLint i, n;
-      GLubyte map[6];
-
-      /* we only promote up to RGB, RGBA and LUMINANCE_ALPHA formats for now */
-      ASSERT(textureBaseFormat == GL_RGB || textureBaseFormat == GL_RGBA ||
-             textureBaseFormat == GL_LUMINANCE_ALPHA);
-
-      /* The actual texture format should have at least as many components
-       * as the logical texture format.
-       */
-      ASSERT(texComponents >= logComponents);
-
-      newImage = malloc(srcWidth * srcHeight * srcDepth
-                                         * texComponents * sizeof(GLubyte));
-      if (!newImage) {
-         free(tempImage);
-         return NULL;
-      }
-
-      compute_component_mapping(logicalBaseFormat, textureBaseFormat, map);
-
-      n = srcWidth * srcHeight * srcDepth;
-      for (i = 0; i < n; i++) {
-         GLint k;
-         for (k = 0; k < texComponents; k++) {
-            GLint j = map[k];
-            if (j == ZERO)
-               newImage[i * texComponents + k] = 0;
-            else if (j == ONE)
-               newImage[i * texComponents + k] = 255;
-            else
-               newImage[i * texComponents + k] = tempImage[i * logComponents + j];
-         }
-      }
-
-      free(tempImage);
-      tempImage = newImage;
-   }
-
-   return tempImage;
-}
-
-
 static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE };
 static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE };
 static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
@@ -716,46 +146,6 @@ memcpy_texture(struct gl_context *ctx,
 
 
 /**
- * General-case function for storing a color texture images with
- * components that can be represented with ubytes.  Example destination
- * texture formats are MESA_FORMAT_ARGB888, ARGB4444, RGB565.
- */
-static GLboolean
-store_ubyte_texture(TEXSTORE_PARAMS)
-{
-   const GLint srcRowStride = srcWidth * 4 * sizeof(GLubyte);
-   GLubyte *tempImage, *src;
-   GLint img;
-
-   tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
-                                           baseInternalFormat,
-                                           GL_RGBA,
-                                           srcWidth, srcHeight, srcDepth,
-                                           srcFormat, srcType, srcAddr,
-                                           srcPacking);
-   if (!tempImage)
-      return GL_FALSE;
-
-   /* This way we will use the RGB versions of the packing functions and it
-    * will work for both RGB and sRGB textures*/
-   dstFormat = _mesa_get_srgb_format_linear(dstFormat);
-
-   src = tempImage;
-   for (img = 0; img < srcDepth; img++) {
-      _mesa_pack_ubyte_rgba_rect(dstFormat, srcWidth, srcHeight,
-                                 src, srcRowStride,
-                                 dstSlices[img], dstRowStride);
-      src += srcHeight * srcRowStride;
-   }
-   free(tempImage);
-
-   return GL_TRUE;
-}
-
-
-
-
-/**
  * Store a 32-bit integer or float depth component texture image.
  */
 static GLboolean
@@ -888,56 +278,6 @@ _mesa_texstore_z16(TEXSTORE_PARAMS)
 
 
 /**
- * Store an rgb565 or rgb565_rev texture image.
- */
-static GLboolean
-_mesa_texstore_rgb565(TEXSTORE_PARAMS)
-{
-   ASSERT(dstFormat == MESA_FORMAT_B5G6R5_UNORM ||
-          dstFormat == MESA_FORMAT_R5G6B5_UNORM);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == 2);
-
-   if (!ctx->_ImageTransferState &&
-       !srcPacking->SwapBytes &&
-       baseInternalFormat == GL_RGB &&
-       srcFormat == GL_RGB &&
-       srcType == GL_UNSIGNED_BYTE &&
-       dims == 2) {
-      /* do optimized tex store */
-      const GLint srcRowStride =
-         _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-      const GLubyte *src = (const GLubyte *)
-         _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight,
-                             srcFormat, srcType, 0, 0, 0);
-      GLubyte *dst = dstSlices[0];
-      GLint row, col;
-      for (row = 0; row < srcHeight; row++) {
-         const GLubyte *srcUB = (const GLubyte *) src;
-         GLushort *dstUS = (GLushort *) dst;
-         /* check for byteswapped format */
-         if (dstFormat == MESA_FORMAT_B5G6R5_UNORM) {
-            for (col = 0; col < srcWidth; col++) {
-               dstUS[col] = PACK_COLOR_565( srcUB[0], srcUB[1], srcUB[2] );
-               srcUB += 3;
-            }
-         }
-         else {
-            for (col = 0; col < srcWidth; col++) {
-               dstUS[col] = PACK_COLOR_565_REV( srcUB[0], srcUB[1], srcUB[2] );
-               srcUB += 3;
-            }
-         }
-         dst += dstRowStride;
-         src += srcRowStride;
-      }
-      return GL_TRUE;
-   } else {
-      return GL_FALSE;
-   }
-}
-
-
-/**
  * Texstore for _mesa_texformat_ycbcr or _mesa_texformat_ycbcr_REV.
  */
 static GLboolean
@@ -1245,119 +585,6 @@ _mesa_texstore_z32f_x24s8(TEXSTORE_PARAMS)
 }
 
 static GLboolean
-_mesa_texstore_argb2101010_uint(TEXSTORE_PARAMS)
-{
-   const GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-
-   ASSERT(dstFormat == MESA_FORMAT_B10G10R10A2_UINT);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == 4);
-
-   {
-      /* general path */
-      const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-                                                     baseInternalFormat,
-                                                     baseFormat,
-                                                     srcWidth, srcHeight,
-                                                     srcDepth, srcFormat,
-                                                     srcType, srcAddr,
-                                                     srcPacking);
-      const GLuint *src = tempImage;
-      GLint img, row, col;
-      GLboolean is_unsigned = _mesa_is_type_unsigned(srcType);
-      if (!tempImage)
-         return GL_FALSE;
-      for (img = 0; img < srcDepth; img++) {
-         GLubyte *dstRow = dstSlices[img];
-
-         for (row = 0; row < srcHeight; row++) {
-            GLuint *dstUI = (GLuint *) dstRow;
-            if (is_unsigned) {
-               for (col = 0; col < srcWidth; col++) {
-                  GLushort a,r,g,b;
-                  r = MIN2(src[RCOMP], 0x3ff);
-                  g = MIN2(src[GCOMP], 0x3ff);
-                  b = MIN2(src[BCOMP], 0x3ff);
-                  a = MIN2(src[ACOMP], 0x003);
-                  dstUI[col] = (a << 30) | (r << 20) | (g << 10) | (b);
-                  src += 4;
-               }
-            } else {
-               for (col = 0; col < srcWidth; col++) {
-                  GLushort a,r,g,b;
-                  r = CLAMP((GLint) src[RCOMP], 0, 0x3ff);
-                  g = CLAMP((GLint) src[GCOMP], 0, 0x3ff);
-                  b = CLAMP((GLint) src[BCOMP], 0, 0x3ff);
-                  a = CLAMP((GLint) src[ACOMP], 0, 0x003);
-                  dstUI[col] = (a << 30) | (r << 20) | (g << 10) | (b);
-                  src += 4;
-               }
-            }
-            dstRow += dstRowStride;
-         }
-      }
-      free((void *) tempImage);
-   }
-   return GL_TRUE;
-}
-
-static GLboolean
-_mesa_texstore_abgr2101010_uint(TEXSTORE_PARAMS)
-{
-   const GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-
-   ASSERT(dstFormat == MESA_FORMAT_R10G10B10A2_UINT);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == 4);
-
-   {
-      /* general path */
-      const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-                                                     baseInternalFormat,
-                                                     baseFormat,
-                                                     srcWidth, srcHeight,
-                                                     srcDepth, srcFormat,
-                                                     srcType, srcAddr,
-                                                     srcPacking);
-      const GLuint *src = tempImage;
-      GLint img, row, col;
-      GLboolean is_unsigned = _mesa_is_type_unsigned(srcType);
-      if (!tempImage)
-         return GL_FALSE;
-      for (img = 0; img < srcDepth; img++) {
-         GLubyte *dstRow = dstSlices[img];
-
-         for (row = 0; row < srcHeight; row++) {
-            GLuint *dstUI = (GLuint *) dstRow;
-            if (is_unsigned) {
-               for (col = 0; col < srcWidth; col++) {
-                  GLushort a,r,g,b;
-                  r = MIN2(src[RCOMP], 0x3ff);
-                  g = MIN2(src[GCOMP], 0x3ff);
-                  b = MIN2(src[BCOMP], 0x3ff);
-                  a = MIN2(src[ACOMP], 0x003);
-                  dstUI[col] = (a << 30) | (b << 20) | (g << 10) | (r);
-                  src += 4;
-               }
-            } else {
-               for (col = 0; col < srcWidth; col++) {
-                  GLushort a,r,g,b;
-                  r = CLAMP((GLint) src[RCOMP], 0, 0x3ff);
-                  g = CLAMP((GLint) src[GCOMP], 0, 0x3ff);
-                  b = CLAMP((GLint) src[BCOMP], 0, 0x3ff);
-                  a = CLAMP((GLint) src[ACOMP], 0, 0x003);
-                  dstUI[col] = (a << 30) | (b << 20) | (g << 10) | (r);
-                  src += 4;
-               }
-            }
-            dstRow += dstRowStride;
-         }
-      }
-      free((void *) tempImage);
-   }
-   return GL_TRUE;
-}
-
-
-static GLboolean
 texstore_depth_stencil(TEXSTORE_PARAMS)
 {
    static StoreTexImageFunc table[MESA_FORMAT_COUNT];
@@ -1446,329 +673,149 @@ texstore_compressed(TEXSTORE_PARAMS)
                            srcFormat, srcType, srcAddr, srcPacking);
 }
 
-static void
-invert_swizzle(uint8_t dst[4], const uint8_t src[4])
-{
-   int i, j;
-
-   dst[0] = MESA_FORMAT_SWIZZLE_NONE;
-   dst[1] = MESA_FORMAT_SWIZZLE_NONE;
-   dst[2] = MESA_FORMAT_SWIZZLE_NONE;
-   dst[3] = MESA_FORMAT_SWIZZLE_NONE;
-
-   for (i = 0; i < 4; ++i)
-      for (j = 0; j < 4; ++j)
-         if (src[j] == i && dst[i] == MESA_FORMAT_SWIZZLE_NONE)
-            dst[i] = j;
-}
-
-/** Store a texture by per-channel conversions and swizzling.
- *
- * This function attempts to perform a texstore operation by doing simple
- * per-channel conversions and swizzling.  This covers a huge chunk of the
- * texture storage operations that anyone cares about.  If this function is
- * incapable of performing the operation, it bails and returns GL_FALSE.
- */
 static GLboolean
-texstore_swizzle(TEXSTORE_PARAMS)
+texstore_rgba(TEXSTORE_PARAMS)
 {
-   const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
-                                                     srcFormat, srcType);
-   const GLint srcImageStride = _mesa_image_image_stride(srcPacking,
-                                      srcWidth, srcHeight, srcFormat, srcType);
-   const GLubyte *srcImage = (const GLubyte *) _mesa_image_address(dims,
-        srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0);
-   const int src_components = _mesa_components_in_format(srcFormat);
-
-   GLubyte swizzle[4], rgba2base[6], base2src[6], rgba2dst[4], dst2rgba[4];
-   const GLubyte *swap;
-   GLenum dst_type;
-   int dst_components;
-   bool is_array, normalized, need_swap;
-   GLint i, img, row;
-   const GLubyte *src_row;
-   GLubyte *dst_row;
-
-   is_array = _mesa_format_to_array(dstFormat, &dst_type, &dst_components,
-                                    rgba2dst, &normalized);
-
-   if (!is_array)
-      return GL_FALSE;
-
-   if (srcFormat == GL_COLOR_INDEX)
-      return GL_FALSE;
-
-   if (_mesa_texstore_needs_transfer_ops(ctx, baseInternalFormat, dstFormat))
-      return GL_FALSE;
-
-   switch (srcType) {
-   case GL_FLOAT:
-   case GL_UNSIGNED_BYTE:
-   case GL_BYTE:
-   case GL_UNSIGNED_SHORT:
-   case GL_SHORT:
-   case GL_UNSIGNED_INT:
-   case GL_INT:
-      /* If wa have to swap bytes in a multi-byte datatype, that means
-       * we're not doing an array conversion anymore */
-      if (srcPacking->SwapBytes)
-         return GL_FALSE;
-      need_swap = false;
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8:
-      need_swap = srcPacking->SwapBytes;
-      if (_mesa_little_endian())
-         need_swap = !need_swap;
-      srcType = GL_UNSIGNED_BYTE;
-      break;
-   case GL_UNSIGNED_INT_8_8_8_8_REV:
-      need_swap = srcPacking->SwapBytes;
-      if (!_mesa_little_endian())
-         need_swap = !need_swap;
-      srcType = GL_UNSIGNED_BYTE;
-      break;
-   default:
-      return GL_FALSE;
+   void *tempImage = NULL, *tempRGBA = NULL;
+   int srcRowStride, img;
+   GLubyte *src, *dst;
+   uint32_t srcMesaFormat;
+   uint8_t rebaseSwizzle[4];
+   bool needRebase;
+   bool transferOpsDone = false;
+
+   /* We have to handle MESA_FORMAT_YCBCR manually because it is a special case
+    * and _mesa_format_convert does not support it. In this case the we only
+    * allow conversions between YCBCR formats and it is mostly a memcpy.
+    */
+   if (dstFormat == MESA_FORMAT_YCBCR || dstFormat == MESA_FORMAT_YCBCR_REV) {
+      return _mesa_texstore_ycbcr(ctx, dims, baseInternalFormat,
+                                  dstFormat, dstRowStride, dstSlices,
+                                  srcWidth, srcHeight, srcDepth,
+                                  srcFormat, srcType, srcAddr,
+                                  srcPacking);
    }
-   swap = need_swap ? map_3210 : map_identity;
-
-   compute_component_mapping(srcFormat, baseInternalFormat, base2src);
-   compute_component_mapping(baseInternalFormat, GL_RGBA, rgba2base);
-   invert_swizzle(dst2rgba, rgba2dst);
 
-   for (i = 0; i < 4; i++) {
-      if (dst2rgba[i] == MESA_FORMAT_SWIZZLE_NONE)
-         swizzle[i] = MESA_FORMAT_SWIZZLE_NONE;
-      else
-         swizzle[i] = swap[base2src[rgba2base[dst2rgba[i]]]];
-   }
+   /* We have to deal with GL_COLOR_INDEX manually because
+    * _mesa_format_convert does not handle this format. So what we do here is
+    * convert it to RGBA ubyte first and then convert from that to dst as usual.
+    */
+   if (srcFormat == GL_COLOR_INDEX) {
+      /* Notice that this will already handle byte swapping if necessary */
+      tempImage =
+         _mesa_unpack_color_index_to_rgba_ubyte(ctx, dims,
+                                                srcAddr, srcFormat, srcType,
+                                                srcWidth, srcHeight, srcDepth,
+                                                srcPacking,
+                                                ctx->_ImageTransferState);
+      if (!tempImage)
+         return GL_FALSE;
 
-   /* Is it normalized? */
-   normalized |= !_mesa_is_enum_format_integer(srcFormat);
+      /* _mesa_unpack_color_index_to_rgba_ubyte has handled transferops
+       * if needed.
+       */
+      transferOpsDone = true;
 
-   for (img = 0; img < srcDepth; img++) {
-      if (dstRowStride == srcWidth * dst_components &&
-          srcRowStride == srcWidth * src_components) {
-         _mesa_swizzle_and_convert(dstSlices[img], dst_type, dst_components,
-                                   srcImage, srcType, src_components,
-                                   swizzle, normalized, srcWidth * srcHeight);
-      } else {
-         src_row = srcImage;
-         dst_row = dstSlices[img];
-         for (row = 0; row < srcHeight; row++) {
-            _mesa_swizzle_and_convert(dst_row, dst_type, dst_components,
-                                      src_row, srcType, src_components,
-                                      swizzle, normalized, srcWidth);
-            dst_row += dstRowStride;
-            src_row += srcRowStride;
-         }
+      /* Now we only have to adjust our src info for a conversion from
+       * the RGBA ubyte and then we continue as usual.
+       */
+      srcAddr = tempImage;
+      srcFormat = GL_RGBA;
+      srcType = GL_UNSIGNED_BYTE;
+   } else if (srcPacking->SwapBytes) {
+      /* We have to handle byte-swapping scenarios before calling
+       * _mesa_format_convert
+       */
+      GLint swapSize = _mesa_sizeof_packed_type(srcType);
+      if (swapSize == 2 || swapSize == 4) {
+         int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
+         int swapsPerPixel = bytesPerPixel / swapSize;
+         int elementCount = srcWidth * srcHeight * srcDepth;
+         assert(bytesPerPixel % swapSize == 0);
+         tempImage = malloc(elementCount * bytesPerPixel);
+         if (!tempImage)
+            return GL_FALSE;
+         if (swapSize == 2)
+            _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
+                             elementCount * swapsPerPixel);
+         else
+            _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
+                             elementCount * swapsPerPixel);
+         srcAddr = tempImage;
       }
-      srcImage += srcImageStride;
    }
 
-   return GL_TRUE;
-}
-
-
-/** Stores a texture by converting float and then to the texture format
- *
- * This function performs a texstore operation by converting to float,
- * applying pixel transfer ops, and then converting to the texture's
- * internal format using pixel store functions.  This function will work
- * for any rgb or srgb textore format.
- */
-static GLboolean
-texstore_via_float(TEXSTORE_PARAMS)
-{
-   GLuint i, img, row;
-   const GLint src_stride =
+   srcRowStride =
       _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-   float *tmp_row;
-   bool need_convert;
-   uint8_t *src_row, *dst_row, map[4], rgba2base[6], base2rgba[6];
-
-   tmp_row = malloc(srcWidth * 4 * sizeof(*tmp_row));
-   if (!tmp_row)
-      return GL_FALSE;
 
-   /* The GL spec (4.0, compatibility profile) only specifies srgb
-    * conversion as something that is done in the sampler during the
-    * filtering process before the colors are handed to the shader.
-    * Furthermore, the flowchart (Figure 3.7 in the 4.0 compatibility spec)
-    * does not list RGB <-> sRGB conversions anywhere.  Therefore, we just
-    * treat sRGB formats the same as RGB formats for the purposes of
-    * texture upload and transfer ops.
-    */
+   srcMesaFormat = _mesa_format_from_format_and_type(srcFormat, srcType);
    dstFormat = _mesa_get_srgb_format_linear(dstFormat);
 
-   need_convert = false;
-   if (baseInternalFormat != _mesa_get_format_base_format(dstFormat)) {
-      compute_component_mapping(GL_RGBA, baseInternalFormat, base2rgba);
-      compute_component_mapping(baseInternalFormat, GL_RGBA, rgba2base);
-      for (i = 0; i < 4; ++i) {
-         map[i] = base2rgba[rgba2base[i]];
-         if (map[i] != i)
-            need_convert = true;
+   /* If we have transferOps then we need to convert to RGBA float first,
+      then apply transferOps, then do the conversion to dst
+    */
+   if (!transferOpsDone &&
+       _mesa_texstore_needs_transfer_ops(ctx, baseInternalFormat, dstFormat)) {
+      /* Allocate RGBA float image */
+      int elementCount = srcWidth * srcHeight * srcDepth;
+      tempRGBA = malloc(4 * elementCount * sizeof(float));
+      if (!tempRGBA) {
+         free(tempImage);
+         free(tempRGBA);
+         return GL_FALSE;
       }
-   }
 
-   for (img = 0; img < srcDepth; img++) {
-      dst_row = dstSlices[img];
-      src_row = _mesa_image_address(dims, srcPacking, srcAddr,
-                                    srcWidth, srcHeight,
-                                    srcFormat, srcType,
-                                    img, 0, 0);
-      for (row = 0; row < srcHeight; row++) {
-	 _mesa_unpack_color_span_float(ctx, srcWidth, GL_RGBA, tmp_row,
-                                       srcFormat, srcType, src_row,
-				       srcPacking, ctx->_ImageTransferState);
-         if (need_convert)
-            _mesa_swizzle_and_convert(tmp_row, GL_FLOAT, 4,
-                                      tmp_row, GL_FLOAT, 4,
-                                      map, false, srcWidth);
-         _mesa_pack_float_rgba_row(dstFormat, srcWidth,
-                                   (const GLfloat (*)[4])tmp_row,
-                                   dst_row);
-         dst_row += dstRowStride;
-         src_row += src_stride;
+      /* Convert from src to RGBA float */
+      src = (GLubyte *) srcAddr;
+      dst = (GLubyte *) tempRGBA;
+      for (img = 0; img < srcDepth; img++) {
+         _mesa_format_convert(dst, RGBA32_FLOAT, 4 * srcWidth * sizeof(float),
+                              src, srcMesaFormat, srcRowStride,
+                              srcWidth, srcHeight, NULL);
+         src += srcHeight * srcRowStride;
+         dst += srcHeight * 4 * srcWidth * sizeof(float);
       }
-   }
 
-   free(tmp_row);
-
-   return GL_TRUE;
-}
+      /* Apply transferOps */
+      _mesa_apply_rgba_transfer_ops(ctx, ctx->_ImageTransferState, elementCount,
+                                    (float(*)[4]) tempRGBA);
 
-/** Stores an integer rgba texture
- *
- * This function performs an integer texture storage operation by unpacking
- * the texture to 32-bit integers, and repacking it into the internal
- * format of the texture.  This will work for any integer rgb texture
- * storage operation.
- */
-static GLboolean
-texstore_rgba_integer(TEXSTORE_PARAMS)
-{
-   GLuint i, img, row, *tmp_row;
-   GLenum dst_type, tmp_type;
-   const GLint src_stride =
-      _mesa_image_row_stride(srcPacking, srcWidth, srcFormat, srcType);
-   int num_dst_components;
-   bool is_array, normalized;
-   uint8_t *src_row, *dst_row;
-   uint8_t swizzle[4], rgba2base[6], base2rgba[6], rgba2dst[4], dst2rgba[4];
-
-   tmp_row = malloc(srcWidth * 4 * sizeof(*tmp_row));
-   if (!tmp_row)
-      return GL_FALSE;
-
-   is_array = _mesa_format_to_array(dstFormat, &dst_type, &num_dst_components,
-                                    rgba2dst, &normalized);
-
-   assert(is_array && !normalized);
-
-   if (!is_array) {
-      free(tmp_row);
-      return GL_FALSE;
+      /* Now we have to adjust our src info for a conversion from
+       * the RGBA float image and then we continue as usual.
+       */
+      srcAddr = tempRGBA;
+      srcFormat = GL_RGBA;
+      srcType = GL_FLOAT;
+      srcRowStride = srcWidth * 4 * sizeof(float);
+      srcMesaFormat = RGBA32_FLOAT;
    }
 
-   invert_swizzle(dst2rgba, rgba2dst);
-   compute_component_mapping(GL_RGBA, baseInternalFormat, base2rgba);
-   compute_component_mapping(baseInternalFormat, GL_RGBA, rgba2base);
-
-   for (i = 0; i < 4; ++i) {
-      if (dst2rgba[i] == MESA_FORMAT_SWIZZLE_NONE)
-         swizzle[i] = MESA_FORMAT_SWIZZLE_NONE;
-      else
-         swizzle[i] = base2rgba[rgba2base[dst2rgba[i]]];
-   }
+   src = (GLubyte *)
+      _mesa_image_address(dims, srcPacking, srcAddr, srcWidth, srcHeight,
+                          srcFormat, srcType, 0, 0, 0);
 
-   if (_mesa_is_type_unsigned(srcType)) {
-      tmp_type = GL_UNSIGNED_INT;
+   if (_mesa_get_format_base_format(dstFormat) != baseInternalFormat) {
+      needRebase =
+         _mesa_compute_rgba2base2rgba_component_mapping(baseInternalFormat,
+                                                        rebaseSwizzle);
    } else {
-      tmp_type = GL_INT;
+      needRebase = false;
    }
 
    for (img = 0; img < srcDepth; img++) {
-      dst_row = dstSlices[img];
-      src_row = _mesa_image_address(dims, srcPacking, srcAddr,
-                                    srcWidth, srcHeight,
-                                    srcFormat, srcType,
-                                    img, 0, 0);
-      for (row = 0; row < srcHeight; row++) {
-	 _mesa_unpack_color_span_uint(ctx, srcWidth, GL_RGBA, tmp_row,
-                                      srcFormat, srcType, src_row, srcPacking);
-         _mesa_swizzle_and_convert(dst_row, dst_type, num_dst_components,
-                                   tmp_row, tmp_type, 4,
-                                   swizzle, false, srcWidth);
-         dst_row += dstRowStride;
-         src_row += src_stride;
-      }
+      _mesa_format_convert(dstSlices[img], dstFormat, dstRowStride,
+                           src, srcMesaFormat, srcRowStride,
+                           srcWidth, srcHeight,
+                           needRebase ? rebaseSwizzle : NULL);
+      src += srcHeight * srcRowStride;
    }
 
-   free(tmp_row);
+   free(tempImage);
+   free(tempRGBA);
 
    return GL_TRUE;
 }
 
-static GLboolean
-texstore_rgba(TEXSTORE_PARAMS)
-{
-   static StoreTexImageFunc table[MESA_FORMAT_COUNT];
-   static GLboolean initialized = GL_FALSE;
-
-   if (!initialized) {
-      memset(table, 0, sizeof table);
-
-      table[MESA_FORMAT_B5G6R5_UNORM] = _mesa_texstore_rgb565;
-      table[MESA_FORMAT_R5G6B5_UNORM] = _mesa_texstore_rgb565;
-      table[MESA_FORMAT_YCBCR] = _mesa_texstore_ycbcr;
-      table[MESA_FORMAT_YCBCR_REV] = _mesa_texstore_ycbcr;
-
-      table[MESA_FORMAT_B10G10R10A2_UINT] = _mesa_texstore_argb2101010_uint;
-      table[MESA_FORMAT_R10G10B10A2_UINT] = _mesa_texstore_abgr2101010_uint;
-
-      initialized = GL_TRUE;
-   }
-
-   if (table[dstFormat] && table[dstFormat](ctx, dims, baseInternalFormat,
-                                            dstFormat, dstRowStride, dstSlices,
-                                            srcWidth, srcHeight, srcDepth,
-                                            srcFormat, srcType, srcAddr,
-                                            srcPacking)) {
-      return GL_TRUE;
-   }
-
-   if (texstore_swizzle(ctx, dims, baseInternalFormat,
-                        dstFormat,
-                        dstRowStride, dstSlices,
-                        srcWidth, srcHeight, srcDepth,
-                        srcFormat, srcType, srcAddr, srcPacking)) {
-      return GL_TRUE;
-   }
-
-   if (_mesa_is_format_integer(dstFormat)) {
-      return texstore_rgba_integer(ctx, dims, baseInternalFormat,
-                                   dstFormat, dstRowStride, dstSlices,
-                                   srcWidth, srcHeight, srcDepth,
-                                   srcFormat, srcType, srcAddr,
-                                   srcPacking);
-   } else if (_mesa_get_format_max_bits(dstFormat) <= 8 &&
-              !_mesa_is_format_signed(dstFormat)) {
-      return store_ubyte_texture(ctx, dims, baseInternalFormat,
-                                 dstFormat,
-                                 dstRowStride, dstSlices,
-                                 srcWidth, srcHeight, srcDepth,
-                                 srcFormat, srcType, srcAddr, srcPacking);
-   } else {
-      return texstore_via_float(ctx, dims, baseInternalFormat,
-                                dstFormat, dstRowStride, dstSlices,
-                                srcWidth, srcHeight, srcDepth,
-                                srcFormat, srcType, srcAddr,
-                                srcPacking);
-   }
-}
-
 GLboolean
 _mesa_texstore_needs_transfer_ops(struct gl_context *ctx,
                                   GLenum baseInternalFormat,
diff --git a/mesalib/src/mesa/main/texstore.h b/mesalib/src/mesa/main/texstore.h
index 4c41d1fcd..2c974f74a 100644
--- a/mesalib/src/mesa/main/texstore.h
+++ b/mesalib/src/mesa/main/texstore.h
@@ -81,25 +81,6 @@ _mesa_texstore_can_use_memcpy(struct gl_context *ctx,
                               const struct gl_pixelstore_attrib *srcPacking);
 
 
-extern GLubyte *
-_mesa_make_temp_ubyte_image(struct gl_context *ctx, GLuint dims,
-                           GLenum logicalBaseFormat,
-                           GLenum textureBaseFormat,
-                           GLint srcWidth, GLint srcHeight, GLint srcDepth,
-                           GLenum srcFormat, GLenum srcType,
-                           const GLvoid *srcAddr,
-                           const struct gl_pixelstore_attrib *srcPacking);
-
-GLfloat *
-_mesa_make_temp_float_image(struct gl_context *ctx, GLuint dims,
-			    GLenum logicalBaseFormat,
-			    GLenum textureBaseFormat,
-			    GLint srcWidth, GLint srcHeight, GLint srcDepth,
-			    GLenum srcFormat, GLenum srcType,
-			    const GLvoid *srcAddr,
-			    const struct gl_pixelstore_attrib *srcPacking,
-			    GLbitfield transferOps);
-
 extern void
 _mesa_store_teximage(struct gl_context *ctx,
                      GLuint dims,
diff --git a/mesalib/src/mesa/main/textureview.c b/mesalib/src/mesa/main/textureview.c
index 6e86a9a44..cd87a27d2 100644
--- a/mesalib/src/mesa/main/textureview.c
+++ b/mesalib/src/mesa/main/textureview.c
@@ -355,7 +355,7 @@ _mesa_set_texture_view_state(struct gl_context *ctx,
    struct gl_texture_image *texImage;
 
    /* Get a reference to what will become this View's base level */
-   texImage = _mesa_select_tex_image(ctx, texObj, target, 0);
+   texImage = _mesa_select_tex_image(texObj, target, 0);
 
    /* When an immutable texture is created via glTexStorage or glTexImageMultisample,
     * TEXTURE_IMMUTABLE_FORMAT becomes TRUE.
@@ -527,8 +527,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint origtexture,
       faceTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + minlayer;
 
    /* Get a reference to what will become this View's base level */
-   origTexImage = _mesa_select_tex_image(ctx, origTexObj,
-                                         faceTarget, minlevel);
+   origTexImage = _mesa_select_tex_image(origTexObj, faceTarget, minlevel);
    width = origTexImage->Width;
    height = origTexImage->Height;
    depth = origTexImage->Depth;
diff --git a/mesalib/src/mesa/main/uniform_query.cpp b/mesalib/src/mesa/main/uniform_query.cpp
index 32870d0c4..40327fba4 100644
--- a/mesalib/src/mesa/main/uniform_query.cpp
+++ b/mesalib/src/mesa/main/uniform_query.cpp
@@ -45,9 +45,14 @@ _mesa_GetActiveUniform(GLuint program, GLuint index,
                        GLenum *type, GLcharARB *nameOut)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_shader_program *shProg =
-      _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
+   struct gl_shader_program *shProg;
+
+   if (maxLength < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glGetActiveUniform(maxLength < 0)");
+      return;
+   }
 
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
    if (!shProg)
       return;
 
@@ -85,16 +90,16 @@ _mesa_GetActiveUniformsiv(GLuint program,
    struct gl_shader_program *shProg;
    GLsizei i;
 
-   shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
-   if (!shProg)
-      return;
-
    if (uniformCount < 0) {
       _mesa_error(ctx, GL_INVALID_VALUE,
 		  "glGetActiveUniformsiv(uniformCount < 0)");
       return;
    }
 
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glGetActiveUniform");
+   if (!shProg)
+      return;
+
    for (i = 0; i < uniformCount; i++) {
       GLuint index = uniformIndices[i];
 
@@ -464,6 +469,9 @@ log_uniform(const void *values, enum glsl_base_type basicType,
       case GLSL_TYPE_FLOAT:
 	 printf("%g ", v[i].f);
 	 break;
+      case GLSL_TYPE_DOUBLE:
+         printf("%g ", *(double* )&v[i * 2].f);
+         break;
       default:
 	 assert(!"Should not get here.");
 	 break;
@@ -524,11 +532,12 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni,
     */
    const unsigned components = MAX2(1, uni->type->vector_elements);
    const unsigned vectors = MAX2(1, uni->type->matrix_columns);
+   const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;
 
    /* Store the data in the driver's requested type in the driver's storage
     * areas.
     */
-   unsigned src_vector_byte_stride = components * 4;
+   unsigned src_vector_byte_stride = components * 4 * dmul;
 
    for (i = 0; i < uni->num_driver_storage; i++) {
       struct gl_uniform_driver_storage *const store = &uni->driver_storage[i];
@@ -536,7 +545,7 @@ _mesa_propagate_uniforms_to_driver_storage(struct gl_uniform_storage *uni,
       const unsigned extra_stride =
 	 store->element_stride - (vectors * store->vector_stride);
       const uint8_t *src =
-	 (uint8_t *) (&uni->storage[array_index * (components * vectors)].i);
+	 (uint8_t *) (&uni->storage[array_index * (dmul * components * vectors)].i);
 
 #if 0
       printf("%s: %p[%d] components=%u vectors=%u count=%u vector_stride=%u "
@@ -603,6 +612,7 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
               unsigned src_components)
 {
    unsigned offset;
+   int size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;
 
    struct gl_uniform_storage *const uni =
       validate_uniform_parameters(ctx, shProg, location, count,
@@ -618,7 +628,7 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
    bool match;
    switch (uni->type->base_type) {
    case GLSL_TYPE_BOOL:
-      match = true;
+      match = (basicType != GLSL_TYPE_DOUBLE);
       break;
    case GLSL_TYPE_SAMPLER:
    case GLSL_TYPE_IMAGE:
@@ -705,8 +715,8 @@ _mesa_uniform(struct gl_context *ctx, struct gl_shader_program *shProg,
    /* Store the data in the "actual type" backing storage for the uniform.
     */
    if (!uni->type->is_boolean()) {
-      memcpy(&uni->storage[components * offset], values,
-	     sizeof(uni->storage[0]) * components * count);
+      memcpy(&uni->storage[size_mul * components * offset], values,
+	     sizeof(uni->storage[0]) * components * count * size_mul);
    } else {
       const union gl_constant_value *src =
 	 (const union gl_constant_value *) values;
@@ -803,13 +813,14 @@ extern "C" void
 _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
-                     GLboolean transpose, const GLfloat *values)
+                     GLboolean transpose,
+                     const GLvoid *values, GLenum type)
 {
    unsigned offset;
    unsigned vectors;
    unsigned components;
    unsigned elements;
-
+   int size_mul;
    struct gl_uniform_storage *const uni =
       validate_uniform_parameters(ctx, shProg, location, count,
                                   &offset, "glUniformMatrix");
@@ -822,6 +833,9 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
       return;
    }
 
+   assert(type == GL_FLOAT || type == GL_DOUBLE);
+   size_mul = type == GL_DOUBLE ? 2 : 1;
+
    assert(!uni->type->is_sampler());
    vectors = uni->type->matrix_columns;
    components = uni->type->vector_elements;
@@ -847,7 +861,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
    }
 
    if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
-      log_uniform(values, GLSL_TYPE_FLOAT, components, vectors, count,
+      log_uniform(values, uni->type->base_type, components, vectors, count,
 		  bool(transpose), shProg, location, uni);
    }
 
@@ -874,11 +888,11 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 
    if (!transpose) {
       memcpy(&uni->storage[elements * offset], values,
-	     sizeof(uni->storage[0]) * elements * count);
-   } else {
+	     sizeof(uni->storage[0]) * elements * count * size_mul);
+   } else if (type == GL_FLOAT) {
       /* Copy and transpose the matrix.
        */
-      const float *src = values;
+      const float *src = (const float *)values;
       float *dst = &uni->storage[elements * offset].f;
 
       for (int i = 0; i < count; i++) {
@@ -891,6 +905,21 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 	 dst += elements;
 	 src += elements;
       }
+   } else {
+      assert(type == GL_DOUBLE);
+      const double *src = (const double *)values;
+      double *dst = (double *)&uni->storage[elements * offset].f;
+
+      for (int i = 0; i < count; i++) {
+	 for (unsigned r = 0; r < rows; r++) {
+	    for (unsigned c = 0; c < cols; c++) {
+	       dst[(c * components) + r] = src[c + (r * vectors)];
+	    }
+	 }
+
+	 dst += elements;
+	 src += elements;
+      }
    }
 
    uni->initialized = true;
diff --git a/mesalib/src/mesa/main/uniforms.c b/mesalib/src/mesa/main/uniforms.c
index d2d70e7f7..e471b878c 100644
--- a/mesalib/src/mesa/main/uniforms.c
+++ b/mesalib/src/mesa/main/uniforms.c
@@ -553,7 +553,7 @@ _mesa_UniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value);
+			2, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -562,7 +562,7 @@ _mesa_UniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value);
+			3, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -571,7 +571,7 @@ _mesa_UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value);
+			4, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -683,7 +683,7 @@ _mesa_ProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -694,7 +694,7 @@ _mesa_ProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -705,7 +705,7 @@ _mesa_ProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 
@@ -718,7 +718,7 @@ _mesa_UniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value);
+			2, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -727,7 +727,7 @@ _mesa_UniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value);
+			3, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -736,7 +736,7 @@ _mesa_UniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value);
+			2, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -745,7 +745,7 @@ _mesa_UniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value);
+			4, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -754,7 +754,7 @@ _mesa_UniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value);
+			3, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -763,7 +763,7 @@ _mesa_UniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
    GET_CURRENT_CONTEXT(ctx);
    _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value);
+			4, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 /** Same as above with direct state access **/
@@ -776,7 +776,7 @@ _mesa_ProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -787,7 +787,7 @@ _mesa_ProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -798,7 +798,7 @@ _mesa_ProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -809,7 +809,7 @@ _mesa_ProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -820,7 +820,7 @@ _mesa_ProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
 }
 
 void GLAPIENTRY
@@ -831,7 +831,7 @@ _mesa_ProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count,
    struct gl_shader_program *shProg =
       _mesa_lookup_shader_program_err(ctx, program,
             "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
 }
 
 
@@ -1338,3 +1338,347 @@ _mesa_GetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex,
       return;
    }
 }
+
+void GLAPIENTRY
+_mesa_Uniform1d(GLint location, GLdouble v0)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, 1, &v0, GLSL_TYPE_DOUBLE, 1);
+}
+
+void GLAPIENTRY
+_mesa_Uniform2d(GLint location, GLdouble v0, GLdouble v1)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[2];
+   v[0] = v0;
+   v[1] = v1;
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, 1, v, GLSL_TYPE_DOUBLE, 2);
+}
+
+void GLAPIENTRY
+_mesa_Uniform3d(GLint location, GLdouble v0, GLdouble v1, GLdouble v2)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[3];
+   v[0] = v0;
+   v[1] = v1;
+   v[2] = v2;
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, 1, v, GLSL_TYPE_DOUBLE, 3);
+}
+
+void GLAPIENTRY
+_mesa_Uniform4d(GLint location, GLdouble v0, GLdouble v1, GLdouble v2,
+                GLdouble v3)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[4];
+   v[0] = v0;
+   v[1] = v1;
+   v[2] = v2;
+   v[3] = v3;
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, 1, v, GLSL_TYPE_DOUBLE, 4);
+}
+
+void GLAPIENTRY
+_mesa_Uniform1dv(GLint location, GLsizei count, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, count, value, GLSL_TYPE_DOUBLE, 1);
+}
+
+void GLAPIENTRY
+_mesa_Uniform2dv(GLint location, GLsizei count, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, count, value, GLSL_TYPE_DOUBLE, 2);
+}
+
+void GLAPIENTRY
+_mesa_Uniform3dv(GLint location, GLsizei count, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, count, value, GLSL_TYPE_DOUBLE, 3);
+}
+
+void GLAPIENTRY
+_mesa_Uniform4dv(GLint location, GLsizei count, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform(ctx, ctx->_Shader->ActiveProgram, location, count, value, GLSL_TYPE_DOUBLE, 4);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose,
+                       const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			2, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose,
+                       const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			3, 3, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose,
+                       const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			4, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			2, 3, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			3, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			2, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			4, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			3, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
+			4, 3, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform1d(GLuint program, GLint location, GLdouble v0)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniform1d");
+   _mesa_uniform(ctx, shProg, location, 1, &v0, GLSL_TYPE_DOUBLE, 1);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform2d(GLuint program, GLint location, GLdouble v0, GLdouble v1)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[2];
+   struct gl_shader_program *shProg;
+   v[0] = v0;
+   v[1] = v1;
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glProgramUniform2d");
+   _mesa_uniform(ctx, shProg, location, 1, v, GLSL_TYPE_DOUBLE, 2);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform3d(GLuint program, GLint location, GLdouble v0, GLdouble v1,
+                       GLdouble v2)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[3];
+   struct gl_shader_program *shProg;
+   v[0] = v0;
+   v[1] = v1;
+   v[2] = v2;
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glProgramUniform3d");
+   _mesa_uniform(ctx, shProg, location, 1, v, GLSL_TYPE_DOUBLE, 3);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform4d(GLuint program, GLint location, GLdouble v0, GLdouble v1,
+                       GLdouble v2, GLdouble v3)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLdouble v[4];
+   struct gl_shader_program *shProg;
+   v[0] = v0;
+   v[1] = v1;
+   v[2] = v2;
+   v[3] = v3;
+   shProg = _mesa_lookup_shader_program_err(ctx, program, "glProgramUniform4d");
+   _mesa_uniform(ctx, shProg, location, 1, v, GLSL_TYPE_DOUBLE, 4);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform1dv(GLuint program, GLint location, GLsizei count,
+                        const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniform1dv");
+   _mesa_uniform(ctx, shProg, location, count, value, GLSL_TYPE_DOUBLE, 1);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform2dv(GLuint program, GLint location, GLsizei count,
+                        const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniform2dv");
+   _mesa_uniform(ctx, shProg, location, count, value, GLSL_TYPE_DOUBLE, 2);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform3dv(GLuint program, GLint location, GLsizei count,
+                        const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniform3dv");
+   _mesa_uniform(ctx, shProg, location, count, value, GLSL_TYPE_DOUBLE, 3);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniform4dv(GLuint program, GLint location, GLsizei count,
+                        const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniform4dv");
+   _mesa_uniform(ctx, shProg, location, count, value, GLSL_TYPE_DOUBLE, 4);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count,
+                              GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix2dv");
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count,
+                              GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix3dv");
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count,
+                              GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix4dv");
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix2x3dv");
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix3x2dv");
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix2x4dv");
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix4x2dv");
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix3x4dv");
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+}
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble * value)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader_program *shProg =
+      _mesa_lookup_shader_program_err(ctx, program,
+            "glProgramUniformMatrix4x3dv");
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+}
diff --git a/mesalib/src/mesa/main/uniforms.h b/mesalib/src/mesa/main/uniforms.h
index 0a9ee7de9..0e6113fe9 100644
--- a/mesalib/src/mesa/main/uniforms.h
+++ b/mesalib/src/mesa/main/uniforms.h
@@ -254,6 +254,95 @@ _mesa_GetActiveUniformsiv(GLuint program,
 void GLAPIENTRY
 _mesa_GetUniformiv(GLuint, GLint, GLint *);
 
+void GLAPIENTRY
+_mesa_Uniform1d(GLint, GLdouble);
+void GLAPIENTRY
+_mesa_Uniform2d(GLint, GLdouble, GLdouble);
+void GLAPIENTRY
+_mesa_Uniform3d(GLint, GLdouble, GLdouble, GLdouble);
+void GLAPIENTRY
+_mesa_Uniform4d(GLint, GLdouble, GLdouble, GLdouble, GLdouble);
+
+void GLAPIENTRY
+_mesa_Uniform1dv(GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_Uniform2dv(GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_Uniform3dv(GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_Uniform4dv(GLint, GLsizei, const GLdouble *);
+
+void GLAPIENTRY
+_mesa_UniformMatrix2dv(GLint, GLsizei, GLboolean, const GLdouble *);
+void GLAPIENTRY
+_mesa_UniformMatrix3dv(GLint, GLsizei, GLboolean, const GLdouble *);
+void GLAPIENTRY
+_mesa_UniformMatrix4dv(GLint, GLsizei, GLboolean, const GLdouble *);
+void GLAPIENTRY
+_mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+void GLAPIENTRY
+_mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+void GLAPIENTRY
+_mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+void GLAPIENTRY
+_mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+void GLAPIENTRY
+_mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+void GLAPIENTRY
+_mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
+                         const GLdouble *value);
+
+void GLAPIENTRY
+_mesa_ProgramUniform1d(GLuint program, GLint, GLdouble);
+void GLAPIENTRY
+_mesa_ProgramUniform2d(GLuint program, GLint, GLdouble, GLdouble);
+void GLAPIENTRY
+_mesa_ProgramUniform3d(GLuint program, GLint, GLdouble, GLdouble, GLdouble);
+void GLAPIENTRY
+_mesa_ProgramUniform4d(GLuint program, GLint, GLdouble, GLdouble, GLdouble, GLdouble);
+
+void GLAPIENTRY
+_mesa_ProgramUniform1dv(GLuint program, GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniform2dv(GLuint program, GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniform3dv(GLuint program, GLint, GLsizei, const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniform4dv(GLuint program, GLint, GLsizei, const GLdouble *);
+
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2dv(GLuint program, GLint, GLsizei, GLboolean,
+                              const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3dv(GLuint program, GLint, GLsizei, GLboolean,
+                              const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4dv(GLuint program, GLint, GLsizei, GLboolean,
+                              const GLdouble *);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+void GLAPIENTRY
+_mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
+                                GLboolean transpose, const GLdouble *value);
+
 long
 _mesa_parse_program_resource_name(const GLchar *name,
                                   const GLchar **out_base_name_end);
@@ -273,7 +362,8 @@ void
 _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                      GLint location, GLsizei count,
-                     GLboolean transpose, const GLfloat *values);
+                     GLboolean transpose,
+                     const GLvoid *values, GLenum type);
 
 void
 _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
diff --git a/mesalib/src/mesa/main/varray.c b/mesalib/src/mesa/main/varray.c
index 89aaad1aa..978ec7b53 100644
--- a/mesalib/src/mesa/main/varray.c
+++ b/mesalib/src/mesa/main/varray.c
@@ -255,7 +255,7 @@ update_array_format(struct gl_context *ctx,
 {
    struct gl_vertex_attrib_array *array;
    GLbitfield typeBit;
-   GLuint elementSize;
+   GLint elementSize;
    GLenum format = GL_RGBA;
 
    if (ctx->Array.LegalTypesMask == 0 || ctx->Array.LegalTypesMaskAPI != ctx->API) {
diff --git a/mesalib/src/mesa/main/vdpau.c b/mesalib/src/mesa/main/vdpau.c
index e1c3e00ba..0efa56e4f 100644
--- a/mesalib/src/mesa/main/vdpau.c
+++ b/mesalib/src/mesa/main/vdpau.c
@@ -33,9 +33,9 @@
 
 #include <stdbool.h>
 #include "util/hash_table.h"
+#include "util/set.h"
 #include "context.h"
 #include "glformats.h"
-#include "set.h"
 #include "texobj.h"
 #include "teximage.h"
 #include "vdpau.h"
@@ -73,7 +73,8 @@ _mesa_VDPAUInitNV(const GLvoid *vdpDevice, const GLvoid *getProcAddress)
 
    ctx->vdpDevice = vdpDevice;
    ctx->vdpGetProcAddress = getProcAddress;
-   ctx->vdpSurfaces = _mesa_set_create(NULL, _mesa_key_pointer_equal);
+   ctx->vdpSurfaces = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                       _mesa_key_pointer_equal);
 }
 
 static void
@@ -179,7 +180,7 @@ register_surface(struct gl_context *ctx, GLboolean isOutput,
       _mesa_reference_texobj(&surf->textures[i], tex);
    }
 
-   _mesa_set_add(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf);
+   _mesa_set_add(ctx->vdpSurfaces, surf);
 
    return (GLintptr)surf;
 }
@@ -227,7 +228,7 @@ _mesa_VDPAUIsSurfaceNV(GLintptr surface)
       return false;
    }
 
-   if (!_mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf)) {
+   if (!_mesa_set_search(ctx->vdpSurfaces, surf)) {
       return false;
    }
 
@@ -251,7 +252,7 @@ _mesa_VDPAUUnregisterSurfaceNV(GLintptr surface)
    if (surface == 0)
       return;
 
-   entry = _mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf);
+   entry = _mesa_set_search(ctx->vdpSurfaces, surf);
    if (!entry) {
       _mesa_error(ctx, GL_INVALID_VALUE, "VDPAUUnregisterSurfaceNV");
       return;
@@ -280,7 +281,7 @@ _mesa_VDPAUGetSurfaceivNV(GLintptr surface, GLenum pname, GLsizei bufSize,
       return;
    }
 
-   if (!_mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf)) {
+   if (!_mesa_set_search(ctx->vdpSurfaces, surf)) {
       _mesa_error(ctx, GL_INVALID_VALUE, "VDPAUGetSurfaceivNV");
       return;
    }
@@ -312,7 +313,7 @@ _mesa_VDPAUSurfaceAccessNV(GLintptr surface, GLenum access)
       return;
    }
 
-   if (!_mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf)) {
+   if (!_mesa_set_search(ctx->vdpSurfaces, surf)) {
       _mesa_error(ctx, GL_INVALID_VALUE, "VDPAUSurfaceAccessNV");
       return;
    }
@@ -346,7 +347,7 @@ _mesa_VDPAUMapSurfacesNV(GLsizei numSurfaces, const GLintptr *surfaces)
    for (i = 0; i < numSurfaces; ++i) {
       struct vdp_surface *surf = (struct vdp_surface *)surfaces[i];
 
-      if (!_mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf)) {
+      if (!_mesa_set_search(ctx->vdpSurfaces, surf)) {
          _mesa_error(ctx, GL_INVALID_VALUE, "VDPAUSurfaceAccessNV");
          return;
       }
@@ -400,7 +401,7 @@ _mesa_VDPAUUnmapSurfacesNV(GLsizei numSurfaces, const GLintptr *surfaces)
    for (i = 0; i < numSurfaces; ++i) {
       struct vdp_surface *surf = (struct vdp_surface *)surfaces[i];
 
-      if (!_mesa_set_search(ctx->vdpSurfaces, _mesa_hash_pointer(surf), surf)) {
+      if (!_mesa_set_search(ctx->vdpSurfaces, surf)) {
          _mesa_error(ctx, GL_INVALID_VALUE, "VDPAUSurfaceAccessNV");
          return;
       }
@@ -422,7 +423,7 @@ _mesa_VDPAUUnmapSurfacesNV(GLsizei numSurfaces, const GLintptr *surfaces)
 
          _mesa_lock_texture(ctx, tex);
 
-         image = _mesa_select_tex_image(ctx, tex, surf->target, 0);
+         image = _mesa_select_tex_image(tex, surf->target, 0);
 
          ctx->Driver.VDPAUUnmapSurface(ctx, surf->target, surf->access,
                                        surf->output, tex, image,
diff --git a/mesalib/src/mesa/math/m_translate.c b/mesalib/src/mesa/math/m_translate.c
index 0b8c858d7..3a8ca74f6 100644
--- a/mesalib/src/mesa/math/m_translate.c
+++ b/mesalib/src/mesa/math/m_translate.c
@@ -30,7 +30,6 @@
 
 #include "main/glheader.h"
 #include "main/macros.h"
-#include "main/mtypes.h"		/* GLchan hack */
 
 #include "m_translate.h"
 
@@ -676,26 +675,6 @@ void _math_trans_4ub(GLubyte (*to)[4],
 }
 
 /**
- * Translate vector of values to GLchan [4].
- */
-void _math_trans_4chan( GLchan (*to)[4],
-			const void *ptr,
-			GLuint stride,
-			GLenum type,
-			GLuint size,
-			GLuint start,
-			GLuint n )
-{
-#if CHAN_TYPE == GL_UNSIGNED_BYTE
-   _math_trans_4ub( to, ptr, stride, type, size, start, n );
-#elif CHAN_TYPE == GL_UNSIGNED_SHORT
-   _math_trans_4us( to, ptr, stride, type, size, start, n );
-#elif CHAN_TYPE == GL_FLOAT
-   _math_trans_4fn( to, ptr, stride, type, size, start, n );
-#endif
-}
-
-/**
  * Translate vector of values to GLushort [4].
  */
 void _math_trans_4us(GLushort (*to)[4],
diff --git a/mesalib/src/mesa/math/m_translate.h b/mesalib/src/mesa/math/m_translate.h
index 250921a3f..bdfa4c770 100644
--- a/mesalib/src/mesa/math/m_translate.h
+++ b/mesalib/src/mesa/math/m_translate.h
@@ -28,8 +28,6 @@
 
 #include "main/compiler.h"
 #include "main/glheader.h"
-#include "main/mtypes.h"		/* hack for GLchan */
-#include "swrast/s_chan.h"
 
 /**
  * Array translation.
@@ -76,14 +74,6 @@ extern void _math_trans_4ub(GLubyte (*to)[4],
 			    GLuint start,
 			    GLuint n );
 
-extern void _math_trans_4chan( GLchan (*to)[4],
-			       const void *ptr,
-			       GLuint stride,
-			       GLenum type,
-			       GLuint size,
-			       GLuint start,
-			       GLuint n );
-
 extern void _math_trans_4us(GLushort (*to)[4],
 			    const void *ptr,
 			    GLuint stride,
diff --git a/mesalib/src/mesa/program/Android.mk b/mesalib/src/mesa/program/Android.mk
index 19c4be0fe..a237b65bc 100644
--- a/mesalib/src/mesa/program/Android.mk
+++ b/mesalib/src/mesa/program/Android.mk
@@ -39,8 +39,6 @@ endef
 # Import the following variables:
 #     PROGRAM_FILES
 include $(MESA_TOP)/src/mesa/Makefile.sources
-SRCDIR :=
-BUILDDIR :=
 
 include $(CLEAR_VARS)
 
diff --git a/mesalib/src/mesa/program/arbprogparse.c b/mesalib/src/mesa/program/arbprogparse.c
index 7dec399a5..53a6f37cb 100644
--- a/mesalib/src/mesa/program/arbprogparse.c
+++ b/mesalib/src/mesa/program/arbprogparse.c
@@ -85,9 +85,6 @@ _mesa_parse_arb_fragment_program(struct gl_context* ctx, GLenum target,
       return;
    }
 
-   if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0)
-      _mesa_optimize_program(ctx, &prog);
-
    free(program->Base.String);
 
    /* Copy the relevant contents of the arb_program struct into the
diff --git a/mesalib/src/mesa/program/hash_table.h b/mesalib/src/mesa/program/hash_table.h
index e95fc4982..eed2e55dc 100644
--- a/mesalib/src/mesa/program/hash_table.h
+++ b/mesalib/src/mesa/program/hash_table.h
@@ -198,6 +198,11 @@ string_to_uint_map_dtor(struct string_to_uint_map *);
 #ifdef __cplusplus
 }
 
+struct string_map_iterate_wrapper_closure {
+   void (*callback)(const char *key, unsigned value, void *closure);
+   void *closure;
+};
+
 /**
  * Map from a string (name) to an unsigned integer value
  *
@@ -229,6 +234,24 @@ public:
    }
 
    /**
+    * Runs a passed callback for the hash
+    */
+   void iterate(void (*func)(const char *, unsigned, void *), void *closure)
+   {
+      struct string_map_iterate_wrapper_closure *wrapper;
+
+      wrapper = (struct string_map_iterate_wrapper_closure *)
+         malloc(sizeof(struct string_map_iterate_wrapper_closure));
+      if (wrapper == NULL)
+         return;
+
+      wrapper->callback = func;
+      wrapper->closure = closure;
+
+      hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper);
+   }
+
+   /**
     * Get the value associated with a particular key
     *
     * \return
@@ -281,6 +304,17 @@ private:
       free((char *)key);
    }
 
+   static void subtract_one_wrapper(const void *key, void *data, void *closure)
+   {
+      struct string_map_iterate_wrapper_closure *wrapper =
+         (struct string_map_iterate_wrapper_closure *) closure;
+      unsigned value = (intptr_t) data;
+
+      value -= 1;
+
+      wrapper->callback((const char *) key, value, wrapper->closure);
+   }
+
    struct hash_table *ht;
 };
 
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index ce3af3120..b2776da45 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -606,6 +606,20 @@ type_size(const struct glsl_type *type)
 	  */
 	 return 1;
       }
+      break;
+   case GLSL_TYPE_DOUBLE:
+      if (type->is_matrix()) {
+         if (type->vector_elements > 2)
+            return type->matrix_columns * 2;
+         else
+            return type->matrix_columns;
+      } else {
+         if (type->vector_elements > 2)
+            return 2;
+         else
+            return 1;
+      }
+      break;
    case GLSL_TYPE_ARRAY:
       assert(type->length > 0);
       return type_size(type->fields.array) * type->length;
@@ -1152,7 +1166,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
       break;
    case ir_binop_mod:
-      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
       assert(ir->type->is_integer());
       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
       break;
@@ -1348,6 +1362,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_pack_unorm_2x16:
    case ir_unop_pack_unorm_4x8:
    case ir_unop_pack_half_2x16:
+   case ir_unop_pack_double_2x32:
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_snorm_4x8:
    case ir_unop_unpack_unorm_2x16:
@@ -1355,11 +1370,21 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_unpack_half_2x16:
    case ir_unop_unpack_half_2x16_split_x:
    case ir_unop_unpack_half_2x16_split_y:
+   case ir_unop_unpack_double_2x32:
    case ir_binop_pack_half_2x16_split:
    case ir_unop_bitfield_reverse:
    case ir_unop_bit_count:
    case ir_unop_find_msb:
    case ir_unop_find_lsb:
+   case ir_unop_d2f:
+   case ir_unop_f2d:
+   case ir_unop_d2i:
+   case ir_unop_i2d:
+   case ir_unop_d2u:
+   case ir_unop_u2d:
+   case ir_unop_d2b:
+   case ir_unop_frexp_sig:
+   case ir_unop_frexp_exp:
       assert(!"not supported");
       break;
    case ir_binop_min:
@@ -1449,6 +1474,7 @@ ir_to_mesa_visitor::visit(ir_swizzle *ir)
    ir->val->accept(this);
    src = this->result;
    assert(src.file != PROGRAM_UNDEFINED);
+   assert(ir->type->vector_elements > 0);
 
    for (i = 0; i < 4; i++) {
       if (i < ir->type->vector_elements) {
@@ -2384,6 +2410,8 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
 
    if (type->is_vector() || type->is_scalar()) {
       size = type->vector_elements;
+      if (type->is_double())
+         size *= 2;
    } else {
       size = type_size(type) * 4;
    }
@@ -2488,6 +2516,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
 	 enum gl_uniform_driver_format format = uniform_native;
 
 	 unsigned columns = 0;
+	 int dmul = 4 * sizeof(float);
 	 switch (storage->type->base_type) {
 	 case GLSL_TYPE_UINT:
 	    assert(ctx->Const.NativeIntegers);
@@ -2499,6 +2528,11 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
 	       (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
 	    columns = 1;
 	    break;
+
+	 case GLSL_TYPE_DOUBLE:
+	    if (storage->type->vector_elements > 2)
+               dmul *= 2;
+	    /* fallthrough */
 	 case GLSL_TYPE_FLOAT:
 	    format = uniform_native;
 	    columns = storage->type->matrix_columns;
@@ -2523,8 +2557,8 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
 	 }
 
 	 _mesa_uniform_attach_driver_storage(storage,
-					     4 * sizeof(float) * columns,
-					     4 * sizeof(float),
+					     dmul * columns,
+					     dmul,
 					     format,
 					     &params->ParameterValues[i]);
 
@@ -2942,7 +2976,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
 	 /* Lowering */
 	 do_mat_op_to_vec(ir);
-	 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
+	 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
 				 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
 				 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
 
diff --git a/mesalib/src/mesa/program/prog_execute.c b/mesalib/src/mesa/program/prog_execute.c
index 33c1751e8..b2fbc808a 100644
--- a/mesalib/src/mesa/program/prog_execute.c
+++ b/mesalib/src/mesa/program/prog_execute.c
@@ -123,7 +123,7 @@ get_src_register_pointer(const struct prog_src_register *source,
       return (GLfloat *) prog->Parameters->ParameterValues[reg];
 
    case PROGRAM_SYSTEM_VALUE:
-      assert(reg < Elements(machine->SystemValues));
+      assert(reg < (GLint) Elements(machine->SystemValues));
       return machine->SystemValues[reg];
 
    default:
@@ -1260,7 +1260,6 @@ _mesa_execute_program(struct gl_context * ctx,
                else if (swz == SWIZZLE_ONE)
                   result[i] = 1.0;
                else {
-                  ASSERT(swz >= 0);
                   ASSERT(swz <= 3);
                   result[i] = src[swz];
                }
diff --git a/mesalib/src/mesa/program/prog_hash_table.c b/mesalib/src/mesa/program/prog_hash_table.c
index 2445d8434..5592b6fb8 100644
--- a/mesalib/src/mesa/program/prog_hash_table.c
+++ b/mesalib/src/mesa/program/prog_hash_table.c
@@ -29,7 +29,7 @@
  */
 
 #include "main/imports.h"
-#include "main/simple_list.h"
+#include "util/simple_list.h"
 #include "hash_table.h"
 
 struct node {
diff --git a/mesalib/src/mesa/program/prog_optimize.c b/mesalib/src/mesa/program/prog_optimize.c
index 60530ebf0..65d427cb4 100644
--- a/mesalib/src/mesa/program/prog_optimize.c
+++ b/mesalib/src/mesa/program/prog_optimize.c
@@ -408,7 +408,7 @@ find_next_use(const struct gl_program *prog,
             for (j = 0; j < numSrc; j++) {
                if (inst->SrcReg[j].RelAddr ||
                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
-                   inst->SrcReg[j].Index == index &&
+                   inst->SrcReg[j].Index == (GLint)index &&
                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
                   return READ;
             }
@@ -944,7 +944,7 @@ update_interval(GLint intBegin[], GLint intEnd[],
 		struct loop_info *loopStack, GLuint loopStackDepth,
 		GLuint index, GLuint ic)
 {
-   int i;
+   unsigned i;
    GLuint begin = ic;
    GLuint end = ic;
 
diff --git a/mesalib/src/mesa/program/prog_print.c b/mesalib/src/mesa/program/prog_print.c
index 4a5c1c1fb..3f499749a 100644
--- a/mesalib/src/mesa/program/prog_print.c
+++ b/mesalib/src/mesa/program/prog_print.c
@@ -82,7 +82,7 @@ _mesa_register_file_name(gl_register_file f)
  * Return ARB_v/f_prog-style input attrib string.
  */
 static const char *
-arb_input_attrib_string(GLint index, GLenum progType)
+arb_input_attrib_string(GLuint index, GLenum progType)
 {
    /*
     * These strings should match the VERT_ATTRIB_x and VARYING_SLOT_x tokens.
@@ -242,7 +242,7 @@ _mesa_print_fp_inputs(GLbitfield inputs)
  * Return ARB_v/f_prog-style output attrib string.
  */
 static const char *
-arb_output_attrib_string(GLint index, GLenum progType)
+arb_output_attrib_string(GLuint index, GLenum progType)
 {
    /*
     * These strings should match the VARYING_SLOT_x and FRAG_RESULT_x tokens.
diff --git a/mesalib/src/mesa/program/prog_statevars.c b/mesalib/src/mesa/program/prog_statevars.c
index be5ddb106..7f5daf8c6 100644
--- a/mesalib/src/mesa/program/prog_statevars.c
+++ b/mesalib/src/mesa/program/prog_statevars.c
@@ -295,9 +295,7 @@ _mesa_fetch_state(struct gl_context *ctx, const gl_state_index state[],
          const gl_state_index modifier = state[4];
          const GLfloat *m;
          GLuint row, i;
-         ASSERT(firstRow >= 0);
          ASSERT(firstRow < 4);
-         ASSERT(lastRow >= 0);
          ASSERT(lastRow < 4);
          if (mat == STATE_MODELVIEW_MATRIX) {
             matrix = ctx->ModelviewMatrixStack.Top;
diff --git a/mesalib/src/mesa/program/programopt.c b/mesalib/src/mesa/program/programopt.c
index b654b1db6..fdaa4a465 100644
--- a/mesalib/src/mesa/program/programopt.c
+++ b/mesalib/src/mesa/program/programopt.c
@@ -589,94 +589,3 @@ _mesa_remove_output_reads(struct gl_program *prog, gl_register_file type)
       }
    }
 }
-
-
-/**
- * Make the given fragment program into a "no-op" shader.
- * Actually, just copy the incoming fragment color (or texcoord)
- * to the output color.
- * This is for debug/test purposes.
- */
-void
-_mesa_nop_fragment_program(struct gl_context *ctx, struct gl_fragment_program *prog)
-{
-   struct prog_instruction *inst;
-   GLuint inputAttr;
-
-   inst = _mesa_alloc_instructions(2);
-   if (!inst) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "_mesa_nop_fragment_program");
-      return;
-   }
-
-   _mesa_init_instructions(inst, 2);
-
-   inst[0].Opcode = OPCODE_MOV;
-   inst[0].DstReg.File = PROGRAM_OUTPUT;
-   inst[0].DstReg.Index = FRAG_RESULT_COLOR;
-   inst[0].SrcReg[0].File = PROGRAM_INPUT;
-   if (prog->Base.InputsRead & VARYING_BIT_COL0)
-      inputAttr = VARYING_SLOT_COL0;
-   else
-      inputAttr = VARYING_SLOT_TEX0;
-   inst[0].SrcReg[0].Index = inputAttr;
-
-   inst[1].Opcode = OPCODE_END;
-
-   _mesa_free_instructions(prog->Base.Instructions,
-                           prog->Base.NumInstructions);
-
-   prog->Base.Instructions = inst;
-   prog->Base.NumInstructions = 2;
-   prog->Base.InputsRead = BITFIELD64_BIT(inputAttr);
-   prog->Base.OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR);
-}
-
-
-/**
- * \sa _mesa_nop_fragment_program
- * Replace the given vertex program with a "no-op" program that just
- * transforms vertex position and emits color.
- */
-void
-_mesa_nop_vertex_program(struct gl_context *ctx, struct gl_vertex_program *prog)
-{
-   struct prog_instruction *inst;
-   GLuint inputAttr;
-
-   /*
-    * Start with a simple vertex program that emits color.
-    */
-   inst = _mesa_alloc_instructions(2);
-   if (!inst) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "_mesa_nop_vertex_program");
-      return;
-   }
-
-   _mesa_init_instructions(inst, 2);
-
-   inst[0].Opcode = OPCODE_MOV;
-   inst[0].DstReg.File = PROGRAM_OUTPUT;
-   inst[0].DstReg.Index = VARYING_SLOT_COL0;
-   inst[0].SrcReg[0].File = PROGRAM_INPUT;
-   if (prog->Base.InputsRead & VERT_BIT_COLOR0)
-      inputAttr = VERT_ATTRIB_COLOR0;
-   else
-      inputAttr = VERT_ATTRIB_TEX0;
-   inst[0].SrcReg[0].Index = inputAttr;
-
-   inst[1].Opcode = OPCODE_END;
-
-   _mesa_free_instructions(prog->Base.Instructions,
-                           prog->Base.NumInstructions);
-
-   prog->Base.Instructions = inst;
-   prog->Base.NumInstructions = 2;
-   prog->Base.InputsRead = BITFIELD64_BIT(inputAttr);
-   prog->Base.OutputsWritten = BITFIELD64_BIT(VARYING_SLOT_COL0);
-
-   /*
-    * Now insert code to do standard modelview/projection transformation.
-    */
-   _mesa_insert_mvp_code(ctx, prog);
-}
diff --git a/mesalib/src/mesa/program/programopt.h b/mesalib/src/mesa/program/programopt.h
index 93c00f740..757421edf 100644
--- a/mesalib/src/mesa/program/programopt.h
+++ b/mesalib/src/mesa/program/programopt.h
@@ -51,12 +51,6 @@ _mesa_count_texture_instructions(struct gl_program *prog);
 extern void
 _mesa_remove_output_reads(struct gl_program *prog, gl_register_file type);
 
-extern void
-_mesa_nop_fragment_program(struct gl_context *ctx, struct gl_fragment_program *prog);
-
-extern void
-_mesa_nop_vertex_program(struct gl_context *ctx, struct gl_vertex_program *prog);
-
 
 #ifdef __cplusplus
 }
diff --git a/mesalib/src/mesa/state_tracker/st_atom_blend.c b/mesalib/src/mesa/state_tracker/st_atom_blend.c
index 064e0c14f..6bb4077f3 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_blend.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_blend.c
@@ -175,7 +175,7 @@ static GLboolean
 blend_per_rt(const struct gl_context *ctx)
 {
    if (ctx->Color.BlendEnabled &&
-      (ctx->Color.BlendEnabled != ((1 << ctx->Const.MaxDrawBuffers) - 1))) {
+      (ctx->Color.BlendEnabled != ((1U << ctx->Const.MaxDrawBuffers) - 1))) {
       /* This can only happen if GL_EXT_draw_buffers2 is enabled */
       return GL_TRUE;
    }
diff --git a/mesalib/src/mesa/state_tracker/st_atom_rasterizer.c b/mesalib/src/mesa/state_tracker/st_atom_rasterizer.c
index 606f19a18..cceed42c8 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_rasterizer.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_rasterizer.c
@@ -155,6 +155,7 @@ static void update_raster_state( struct st_context *st )
       raster->offset_tri = ctx->Polygon.OffsetFill;
       raster->offset_units = ctx->Polygon.OffsetUnits;
       raster->offset_scale = ctx->Polygon.OffsetFactor;
+      raster->offset_clamp = ctx->Polygon.OffsetClamp;
    }
 
    raster->poly_smooth = ctx->Polygon.SmoothFlag;
diff --git a/mesalib/src/mesa/state_tracker/st_atom_sampler.c b/mesalib/src/mesa/state_tracker/st_atom_sampler.c
index 17b536bf5..b68eb16d7 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_sampler.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_sampler.c
@@ -36,6 +36,7 @@
 #include "main/mtypes.h"
 #include "main/glformats.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
 #include "main/texobj.h"
 
 #include "st_context.h"
@@ -133,7 +134,6 @@ convert_sampler(struct st_context *st,
    const struct gl_texture_object *texobj;
    struct gl_context *ctx = st->ctx;
    struct gl_sampler_object *msamp;
-   const struct gl_texture_image *teximg;
    GLenum texBaseFormat;
 
    texobj = ctx->Texture.Unit[texUnit]._Current;
@@ -141,8 +141,7 @@ convert_sampler(struct st_context *st,
       texobj = _mesa_get_fallback_texture(ctx, TEXTURE_2D_INDEX);
    }
 
-   teximg = texobj->Image[0][texobj->BaseLevel];
-   texBaseFormat = teximg ? teximg->_BaseFormat : GL_RGBA;
+   texBaseFormat = _mesa_texture_base_format(texobj);
 
    msamp = _mesa_get_samplerobj(ctx, texUnit);
 
diff --git a/mesalib/src/mesa/state_tracker/st_atom_scissor.c b/mesalib/src/mesa/state_tracker/st_atom_scissor.c
index b72030944..4ebe799e3 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_scissor.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_scissor.c
@@ -47,7 +47,7 @@ update_scissor( struct st_context *st )
    const struct gl_context *ctx = st->ctx;
    const struct gl_framebuffer *fb = ctx->DrawBuffer;
    GLint miny, maxy;
-   int i;
+   unsigned i;
    bool changed = false;
    for (i = 0 ; i < ctx->Const.MaxViewports; i++) {
       scissor[i].minx = 0;
diff --git a/mesalib/src/mesa/state_tracker/st_atom_shader.c b/mesalib/src/mesa/state_tracker/st_atom_shader.c
index 6515a98a3..73768ed12 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_shader.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_shader.c
@@ -149,7 +149,12 @@ update_vp( struct st_context *st )
    key.passthrough_edgeflags = st->vertdata_edgeflags;
 
    key.clamp_color = st->clamp_vert_color_in_shader &&
-                     st->ctx->Light._ClampVertexColor;
+                     st->ctx->Light._ClampVertexColor &&
+                     (stvp->Base.Base.OutputsWritten &
+                      (VARYING_SLOT_COL0 |
+                       VARYING_SLOT_COL1 |
+                       VARYING_SLOT_BFC0 |
+                       VARYING_SLOT_BFC1));
 
    st->vp_variant = st_get_vp_variant(st, stvp, &key);
 
diff --git a/mesalib/src/mesa/state_tracker/st_atom_texture.c b/mesalib/src/mesa/state_tracker/st_atom_texture.c
index 19072ae2f..eff28fc6f 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_texture.c
@@ -35,6 +35,7 @@
 #include "main/macros.h"
 #include "main/mtypes.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
 #include "main/texobj.h"
 #include "program/prog_instruction.h"
 
@@ -175,12 +176,11 @@ compute_texture_format_swizzle(GLenum baseFormat, GLenum depthMode,
 static unsigned
 get_texture_format_swizzle(const struct st_texture_object *stObj)
 {
-   const struct gl_texture_image *texImage =
-      stObj->base.Image[0][stObj->base.BaseLevel];
+   GLenum baseFormat = _mesa_texture_base_format(&stObj->base);
    unsigned tex_swizzle;
 
-   if (texImage) {
-      tex_swizzle = compute_texture_format_swizzle(texImage->_BaseFormat,
+   if (baseFormat != GL_NONE) {
+      tex_swizzle = compute_texture_format_swizzle(baseFormat,
                                                    stObj->base.DepthMode,
                                                    stObj->pt->format);
    }
diff --git a/mesalib/src/mesa/state_tracker/st_atom_viewport.c b/mesalib/src/mesa/state_tracker/st_atom_viewport.c
index efa056e10..2f62590c4 100644
--- a/mesalib/src/mesa/state_tracker/st_atom_viewport.c
+++ b/mesalib/src/mesa/state_tracker/st_atom_viewport.c
@@ -44,7 +44,7 @@ update_viewport( struct st_context *st )
 {
    struct gl_context *ctx = st->ctx;
    GLfloat yScale, yBias;
-   int i;
+   unsigned i;
    /* _NEW_BUFFERS
     */
    if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) {
diff --git a/mesalib/src/mesa/state_tracker/st_cb_blit.c b/mesalib/src/mesa/state_tracker/st_cb_blit.c
index 9c33f4eb9..bbaedd108 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_blit.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_blit.c
@@ -73,6 +73,8 @@ st_adjust_blit_for_msaa_resolve(struct pipe_blit_info *blit)
 
 static void
 st_BlitFramebuffer(struct gl_context *ctx,
+                   struct gl_framebuffer *readFB,
+                   struct gl_framebuffer *drawFB,
                    GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                    GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                    GLbitfield mask, GLenum filter)
@@ -83,8 +85,6 @@ st_BlitFramebuffer(struct gl_context *ctx,
    const uint pFilter = ((filter == GL_NEAREST)
                          ? PIPE_TEX_FILTER_NEAREST
                          : PIPE_TEX_FILTER_LINEAR);
-   struct gl_framebuffer *readFB = ctx->ReadBuffer;
-   struct gl_framebuffer *drawFB = ctx->DrawBuffer;
    struct {
       GLint srcX0, srcY0, srcX1, srcY1;
       GLint dstX0, dstY0, dstX1, dstY1;
@@ -108,7 +108,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
     *
     * XXX: This should depend on mask !
     */
-   if (!_mesa_clip_blit(ctx,
+   if (!_mesa_clip_blit(ctx, readFB, drawFB,
                         &clip.srcX0, &clip.srcY0, &clip.srcX1, &clip.srcY1,
                         &clip.dstX0, &clip.dstY0, &clip.dstX1, &clip.dstY1)) {
       return; /* nothing to draw/blit */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
index 55f36442a..f24805cf5 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -186,7 +186,8 @@ st_bufferobj_data(struct gl_context *ctx,
    struct st_buffer_object *st_obj = st_buffer_object(obj);
    unsigned bind, pipe_usage, pipe_flags = 0;
 
-   if (size && data && st_obj->buffer &&
+   if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD &&
+       size && data && st_obj->buffer &&
        st_obj->Base.Size == size &&
        st_obj->Base.Usage == usage &&
        st_obj->Base.StorageFlags == storageFlags) {
@@ -256,8 +257,15 @@ st_bufferobj_data(struct gl_context *ctx,
          break;
       case GL_STREAM_DRAW:
       case GL_STREAM_COPY:
-         pipe_usage = PIPE_USAGE_STREAM;
-         break;
+         /* XXX: Remove this test and fall-through when we have PBO unpacking
+          * acceleration. Right now, PBO unpacking is done by the CPU, so we
+          * have to make sure CPU reads are fast.
+          */
+         if (target != GL_PIXEL_UNPACK_BUFFER_ARB) {
+            pipe_usage = PIPE_USAGE_STREAM;
+            break;
+         }
+         /* fall through */
       case GL_STATIC_READ:
       case GL_DYNAMIC_READ:
       case GL_STREAM_READ:
@@ -280,6 +288,7 @@ st_bufferobj_data(struct gl_context *ctx,
    }
 
    if (size != 0) {
+      struct pipe_screen *screen = pipe->screen;
       struct pipe_resource buffer;
 
       memset(&buffer, 0, sizeof buffer);
@@ -293,16 +302,22 @@ st_bufferobj_data(struct gl_context *ctx,
       buffer.depth0 = 1;
       buffer.array_size = 1;
 
-      st_obj->buffer = pipe->screen->resource_create(pipe->screen, &buffer);
+      if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+         st_obj->buffer =
+            screen->resource_from_user_memory(screen, &buffer, (void*)data);
+      }
+      else {
+         st_obj->buffer = screen->resource_create(screen, &buffer);
+
+         if (st_obj->buffer && data)
+            pipe_buffer_write(pipe, st_obj->buffer, 0, size, data);
+      }
 
       if (!st_obj->buffer) {
          /* out of memory */
          st_obj->Base.Size = 0;
          return GL_FALSE;
       }
-
-      if (data)
-         pipe_buffer_write(pipe, st_obj->buffer, 0, size, data);
    }
 
    /* BufferData may change an array or uniform buffer, need to update it */
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
index 939fc2065..14fc13952 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -1100,7 +1100,7 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
    const GLfloat *color;
    struct pipe_context *pipe = st->pipe;
    GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
-   struct pipe_sampler_view *sv[2];
+   struct pipe_sampler_view *sv[2] = { NULL };
    int num_sampler_view = 1;
    struct st_fp_variant *fpv;
    struct gl_pixelstore_attrib clippedUnpack;
@@ -1154,8 +1154,9 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
 
       color = NULL;
       if (st->pixel_xfer.pixelmap_enabled) {
-	  sv[1] = st->pixel_xfer.pixelmap_sampler_view;
-	  num_sampler_view++;
+         pipe_sampler_view_reference(&sv[1],
+                                     st->pixel_xfer.pixelmap_sampler_view);
+         num_sampler_view++;
       }
    }
 
@@ -1176,7 +1177,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y,
             if (write_stencil) {
                enum pipe_format stencil_format =
                      util_format_stencil_only(pt->format);
-
+               /* we should not be doing pixel map/transfer (see above) */
+               assert(num_sampler_view == 1);
                sv[1] = st_create_texture_sampler_view_format(st->pipe, pt,
                                                              stencil_format);
                num_sampler_view++;
@@ -1467,7 +1469,7 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
    struct st_renderbuffer *rbRead;
    void *driver_vp, *driver_fp;
    struct pipe_resource *pt;
-   struct pipe_sampler_view *sv[2];
+   struct pipe_sampler_view *sv[2] = { NULL };
    int num_sampler_view = 1;
    GLfloat *color;
    enum pipe_format srcFormat;
@@ -1516,7 +1518,8 @@ st_CopyPixels(struct gl_context *ctx, GLint srcx, GLint srcy,
       driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
 
       if (st->pixel_xfer.pixelmap_enabled) {
-         sv[1] = st->pixel_xfer.pixelmap_sampler_view;
+         pipe_sampler_view_reference(&sv[1],
+                                     st->pixel_xfer.pixelmap_sampler_view);
          num_sampler_view++;
       }
    }
diff --git a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
index d057ff62a..1420b96e5 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_drawtex.c
@@ -15,6 +15,7 @@
 #include "main/imports.h"
 #include "main/image.h"
 #include "main/macros.h"
+#include "main/teximage.h"
 #include "program/program.h"
 #include "program/prog_print.h"
 
@@ -196,7 +197,7 @@ st_DrawTex(struct gl_context *ctx, GLfloat x, GLfloat y, GLfloat z,
          if (ctx->Texture.Unit[i]._Current &&
              ctx->Texture.Unit[i]._Current->Target == GL_TEXTURE_2D) {
             struct gl_texture_object *obj = ctx->Texture.Unit[i]._Current;
-            struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
+            const struct gl_texture_image *img = _mesa_base_tex_image(obj);
             const GLfloat wt = (GLfloat) img->Width;
             const GLfloat ht = (GLfloat) img->Height;
             const GLfloat s0 = obj->CropRect[0] / wt;
diff --git a/mesalib/src/mesa/state_tracker/st_cb_fbo.c b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
index 7b6a444e6..296ea1e0d 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_fbo.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_fbo.c
@@ -408,9 +408,9 @@ st_update_renderbuffer_surface(struct st_context *st,
 {
    struct pipe_context *pipe = st->pipe;
    struct pipe_resource *resource = strb->texture;
-   int rtt_width = strb->Base.Width;
-   int rtt_height = strb->Base.Height;
-   int rtt_depth = strb->Base.Depth;
+   unsigned rtt_width = strb->Base.Width;
+   unsigned rtt_height = strb->Base.Height;
+   unsigned rtt_depth = strb->Base.Depth;
    /*
     * For winsys fbo, it is possible that the renderbuffer is sRGB-capable but
     * the format of strb->texture is linear (because we have no control over
diff --git a/mesalib/src/mesa/state_tracker/st_cb_queryobj.c b/mesalib/src/mesa/state_tracker/st_cb_queryobj.c
index 489f537d8..71222e80b 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_queryobj.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_queryobj.c
@@ -110,6 +110,19 @@ st_BeginQuery(struct gl_context *ctx, struct gl_query_object *q)
       else
          type = PIPE_QUERY_TIMESTAMP;
       break;
+   case GL_VERTICES_SUBMITTED_ARB:
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+      type = PIPE_QUERY_PIPELINE_STATISTICS;
+      break;
    default:
       assert(0 && "unexpected query target in st_BeginQuery()");
       return;
@@ -178,6 +191,8 @@ get_query_result(struct pipe_context *pipe,
                  struct st_query_object *stq,
                  boolean wait)
 {
+   union pipe_query_result data;
+
    if (!stq->pq) {
       /* Only needed in case we failed to allocate the gallium query earlier.
        * Return TRUE so we don't spin on this forever.
@@ -185,11 +200,46 @@ get_query_result(struct pipe_context *pipe,
       return TRUE;
    }
 
-   if (!pipe->get_query_result(pipe,
-                               stq->pq,
-                               wait,
-                               (void *)&stq->base.Result)) {
+   if (!pipe->get_query_result(pipe, stq->pq, wait, &data))
       return FALSE;
+
+   switch (stq->base.Target) {
+   case GL_VERTICES_SUBMITTED_ARB:
+      stq->base.Result = data.pipeline_statistics.ia_vertices;
+      break;
+   case GL_PRIMITIVES_SUBMITTED_ARB:
+      stq->base.Result = data.pipeline_statistics.ia_primitives;
+      break;
+   case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+      stq->base.Result = data.pipeline_statistics.vs_invocations;
+      break;
+   case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+      stq->base.Result = data.pipeline_statistics.hs_invocations;
+      break;
+   case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+      stq->base.Result = data.pipeline_statistics.ds_invocations;
+      break;
+   case GL_GEOMETRY_SHADER_INVOCATIONS:
+      stq->base.Result = data.pipeline_statistics.gs_invocations;
+      break;
+   case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+      stq->base.Result = data.pipeline_statistics.gs_primitives;
+      break;
+   case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+      stq->base.Result = data.pipeline_statistics.ps_invocations;
+      break;
+   case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+      stq->base.Result = data.pipeline_statistics.cs_invocations;
+      break;
+   case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+      stq->base.Result = data.pipeline_statistics.c_invocations;
+      break;
+   case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+      stq->base.Result = data.pipeline_statistics.c_primitives;
+      break;
+   default:
+      stq->base.Result = data.u64;
+      break;
    }
 
    if (stq->base.Target == GL_TIME_ELAPSED &&
diff --git a/mesalib/src/mesa/state_tracker/st_cb_texture.c b/mesalib/src/mesa/state_tracker/st_cb_texture.c
index a8dbb7888..0525e879f 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_texture.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_texture.c
@@ -29,6 +29,8 @@
 #include "main/enums.h"
 #include "main/fbobject.h"
 #include "main/formats.h"
+#include "main/format_utils.h"
+#include "main/glformats.h"
 #include "main/image.h"
 #include "main/imports.h"
 #include "main/macros.h"
@@ -209,7 +211,7 @@ st_MapTextureImage(struct gl_context *ctx,
    map = st_texture_image_map(st, stImage, pipeMode, x, y, slice, w, h, 1,
                               &transfer);
    if (map) {
-      if (_mesa_is_format_etc2(texImage->TexFormat) ||
+      if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
           (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
          /* ETC isn't supported by gallium and it's represented
           * by uncompressed formats. Only write transfers with precompressed
@@ -252,7 +254,7 @@ st_UnmapTextureImage(struct gl_context *ctx,
    struct st_context *st = st_context(ctx);
    struct st_texture_image *stImage  = st_texture_image(texImage);
 
-   if (_mesa_is_format_etc2(texImage->TexFormat) ||
+   if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
        (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
       /* Decompress the ETC texture to the mapped one. */
       unsigned z = slice + stImage->base.Face;
@@ -899,7 +901,7 @@ st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
  * We can do arbitrary X/Y/Z/W/0/1 swizzling here as long as there is
  * a format which matches the swizzling.
  *
- * If such a format isn't available, it falls back to _mesa_get_teximage.
+ * If such a format isn't available, it falls back to _mesa_GetTexImage_sw.
  *
  * NOTE: Drivers usually do a blit to convert between tiled and linear
  *       texture layouts during texture uploads/downloads, so the blit
@@ -944,14 +946,14 @@ st_GetTexImage(struct gl_context * ctx,
       goto fallback;
    }
 
-   /* XXX Fallback to _mesa_get_teximage for depth-stencil formats
+   /* XXX Fallback to _mesa_GetTexImage_sw for depth-stencil formats
     * due to an incomplete stencil blit implementation in some drivers. */
    if (format == GL_DEPTH_STENCIL) {
       goto fallback;
    }
 
    /* If the base internal format and the texture format don't match, we have
-    * to fall back to _mesa_get_teximage. */
+    * to fall back to _mesa_GetTexImage_sw. */
    if (texImage->_BaseFormat !=
        _mesa_get_format_base_format(texImage->TexFormat)) {
       goto fallback;
@@ -1005,7 +1007,7 @@ st_GetTexImage(struct gl_context * ctx,
    if (dst_format == PIPE_FORMAT_NONE) {
       GLenum dst_glformat;
 
-      /* Fall back to _mesa_get_teximage except for compressed formats,
+      /* Fall back to _mesa_GetTexImage_sw except for compressed formats,
        * where decompression with a blit is always preferred. */
       if (!util_format_is_compressed(src->format)) {
          goto fallback;
@@ -1138,6 +1140,8 @@ st_GetTexImage(struct gl_context * ctx,
       /* format translation via floats */
       GLuint row, slice;
       GLfloat *rgba;
+      uint32_t dstMesaFormat;
+      int dstStride, srcStride;
 
       assert(util_format_is_compressed(src->format));
 
@@ -1149,6 +1153,9 @@ st_GetTexImage(struct gl_context * ctx,
       if (ST_DEBUG & DEBUG_FALLBACK)
          debug_printf("%s: fallback format translation\n", __FUNCTION__);
 
+      dstMesaFormat = _mesa_format_from_format_and_type(format, type);
+      dstStride = _mesa_image_row_stride(&ctx->Pack, width, format, type);
+      srcStride = 4 * width * sizeof(GLfloat);
       for (slice = 0; slice < depth; slice++) {
          if (gl_target == GL_TEXTURE_1D_ARRAY) {
             /* 1D array textures.
@@ -1162,8 +1169,9 @@ st_GetTexImage(struct gl_context * ctx,
             pipe_get_tile_rgba_format(tex_xfer, map, 0, 0, width, 1,
                                       dst_format, rgba);
 
-            _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
-                                       type, dest, &ctx->Pack, 0);
+            _mesa_format_convert(dest, dstMesaFormat, dstStride,
+                                 rgba, RGBA32_FLOAT, srcStride,
+                                 width, 1, NULL);
          }
          else {
             for (row = 0; row < height; row++) {
@@ -1175,8 +1183,9 @@ st_GetTexImage(struct gl_context * ctx,
                pipe_get_tile_rgba_format(tex_xfer, map, 0, row, width, 1,
                                          dst_format, rgba);
 
-               _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
-                                          type, dest, &ctx->Pack, 0);
+               _mesa_format_convert(dest, dstMesaFormat, dstStride,
+                                    rgba, RGBA32_FLOAT, srcStride,
+                                    width, 1, NULL);
             }
          }
          map += tex_xfer->layer_stride;
@@ -1195,7 +1204,7 @@ end:
 
 fallback:
    if (!done) {
-      _mesa_get_teximage(ctx, format, type, pixels, texImage);
+      _mesa_GetTexImage_sw(ctx, format, type, pixels, texImage);
    }
 }
 
@@ -1546,7 +1555,7 @@ st_finalize_texture(struct gl_context *ctx,
    struct st_texture_object *stObj = st_texture_object(tObj);
    const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
    GLuint face;
-   struct st_texture_image *firstImage;
+   const struct st_texture_image *firstImage;
    enum pipe_format firstImageFormat;
    GLuint ptWidth, ptHeight, ptDepth, ptLayers, ptNumSamples;
 
@@ -1587,7 +1596,7 @@ st_finalize_texture(struct gl_context *ctx,
 
    }
 
-   firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]);
+   firstImage = st_texture_image_const(_mesa_base_tex_image(&stObj->base));
    assert(firstImage);
 
    /* If both firstImage and stObj point to a texture which can contain
@@ -1886,7 +1895,7 @@ st_init_texture_functions(struct dd_function_table *functions)
 
    /* compressed texture functions */
    functions->CompressedTexImage = st_CompressedTexImage;
-   functions->GetCompressedTexImage = _mesa_get_compressed_teximage;
+   functions->GetCompressedTexImage = _mesa_GetCompressedTexImage_sw;
 
    functions->NewTextureObject = st_NewTextureObject;
    functions->NewTextureImage = st_NewTextureImage;
diff --git a/mesalib/src/mesa/state_tracker/st_cb_xformfb.c b/mesalib/src/mesa/state_tracker/st_cb_xformfb.c
index 8f75eda8a..a2bd86aff 100644
--- a/mesalib/src/mesa/state_tracker/st_cb_xformfb.c
+++ b/mesalib/src/mesa/state_tracker/st_cb_xformfb.c
@@ -122,7 +122,7 @@ st_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
    for (i = 0; i < max_num_targets; i++) {
       struct st_buffer_object *bo = st_buffer_object(sobj->base.Buffers[i]);
 
-      if (bo) {
+      if (bo && bo->buffer) {
          /* Check whether we need to recreate the target. */
          if (!sobj->targets[i] ||
              sobj->targets[i] == sobj->draw_count ||
diff --git a/mesalib/src/mesa/state_tracker/st_context.c b/mesalib/src/mesa/state_tracker/st_context.c
index 9fd6caece..5834ebad3 100644
--- a/mesalib/src/mesa/state_tracker/st_context.c
+++ b/mesalib/src/mesa/state_tracker/st_context.c
@@ -230,6 +230,9 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
    st->has_etc1 = screen->is_format_supported(screen, PIPE_FORMAT_ETC1_RGB8,
                                               PIPE_TEXTURE_2D, 0,
                                               PIPE_BIND_SAMPLER_VIEW);
+   st->has_etc2 = screen->is_format_supported(screen, PIPE_FORMAT_ETC2_RGB8,
+                                              PIPE_TEXTURE_2D, 0,
+                                              PIPE_BIND_SAMPLER_VIEW);
    st->prefer_blit_based_texture_transfer = screen->get_param(screen,
                               PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER);
 
diff --git a/mesalib/src/mesa/state_tracker/st_context.h b/mesalib/src/mesa/state_tracker/st_context.h
index 20d567864..b091a8856 100644
--- a/mesalib/src/mesa/state_tracker/st_context.h
+++ b/mesalib/src/mesa/state_tracker/st_context.h
@@ -93,6 +93,7 @@ struct st_context
    boolean has_time_elapsed;
    boolean has_shader_model3;
    boolean has_etc1;
+   boolean has_etc2;
    boolean prefer_blit_based_texture_transfer;
 
    boolean needs_texcoord_semantic;
diff --git a/mesalib/src/mesa/state_tracker/st_draw.c b/mesalib/src/mesa/state_tracker/st_draw.c
index 64d6ef525..488f6ead2 100644
--- a/mesalib/src/mesa/state_tracker/st_draw.c
+++ b/mesalib/src/mesa/state_tracker/st_draw.c
@@ -40,6 +40,7 @@
 #include "main/image.h"
 #include "main/bufferobj.h"
 #include "main/macros.h"
+#include "main/varray.h"
 
 #include "vbo/vbo.h"
 
@@ -225,7 +226,7 @@ st_draw_vbo(struct gl_context *ctx,
       }
 
       info.indexed = TRUE;
-      if (min_index != ~0 && max_index != ~0) {
+      if (min_index != ~0U && max_index != ~0U) {
          info.min_index = min_index;
          info.max_index = max_index;
       }
@@ -234,7 +235,7 @@ st_draw_vbo(struct gl_context *ctx,
        * so we only set these fields for indexed drawing:
        */
       info.primitive_restart = ctx->Array._PrimitiveRestart;
-      info.restart_index = ctx->Array.RestartIndex;
+      info.restart_index = _mesa_primitive_restart_index(ctx, ib->type);
    }
    else {
       /* Transform feedback drawing is always non-indexed. */
diff --git a/mesalib/src/mesa/state_tracker/st_extensions.c b/mesalib/src/mesa/state_tracker/st_extensions.c
index e472b84a8..ce29d076c 100644
--- a/mesalib/src/mesa/state_tracker/st_extensions.c
+++ b/mesalib/src/mesa/state_tracker/st_extensions.c
@@ -411,7 +411,8 @@ void st_init_extensions(struct pipe_screen *screen,
                         struct st_config_options *options,
                         boolean has_lib_dxtc)
 {
-   int i, glsl_feature_level;
+   unsigned i;
+   int glsl_feature_level;
    GLboolean *extension_table = (GLboolean *) extensions;
 
    static const struct st_extension_cap_mapping cap_mapping[] = {
@@ -425,6 +426,7 @@ void st_init_extensions(struct pipe_screen *screen,
       { o(ARB_instanced_arrays),             PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR  },
       { o(ARB_occlusion_query),              PIPE_CAP_OCCLUSION_QUERY                  },
       { o(ARB_occlusion_query2),             PIPE_CAP_OCCLUSION_QUERY                  },
+      { o(ARB_pipeline_statistics_query),    PIPE_CAP_QUERY_PIPELINE_STATISTICS        },
       { o(ARB_point_sprite),                 PIPE_CAP_POINT_SPRITE                     },
       { o(ARB_seamless_cube_map),            PIPE_CAP_SEAMLESS_CUBE_MAP                },
       { o(ARB_shader_stencil_export),        PIPE_CAP_SHADER_STENCIL_EXPORT            },
@@ -445,6 +447,7 @@ void st_init_extensions(struct pipe_screen *screen,
       { o(EXT_texture_swizzle),              PIPE_CAP_TEXTURE_SWIZZLE                  },
       { o(EXT_transform_feedback),           PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS        },
 
+      { o(AMD_pinned_memory),                PIPE_CAP_RESOURCE_FROM_USER_MEMORY        },
       { o(AMD_seamless_cubemap_per_texture), PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE    },
       { o(ATI_separate_stencil),             PIPE_CAP_TWO_SIDED_STENCIL                },
       { o(ATI_texture_mirror_once),          PIPE_CAP_TEXTURE_MIRROR_CLAMP             },
@@ -463,6 +466,7 @@ void st_init_extensions(struct pipe_screen *screen,
       { o(ARB_conditional_render_inverted),  PIPE_CAP_CONDITIONAL_RENDER_INVERTED      },
       { o(ARB_texture_view),                 PIPE_CAP_SAMPLER_VIEW_TARGET              },
       { o(ARB_clip_control),                 PIPE_CAP_CLIP_HALFZ                       },
+      { o(EXT_polygon_offset_clamp),         PIPE_CAP_POLYGON_OFFSET_CLAMP             },
    };
 
    /* Required: render target and sampler support */
@@ -705,7 +709,7 @@ void st_init_extensions(struct pipe_screen *screen,
       extensions->EXT_texture_integer = GL_FALSE;
    }
 
-   consts->UniformBooleanTrue = consts->NativeIntegers ? ~0 : fui(1.0f);
+   consts->UniformBooleanTrue = consts->NativeIntegers ? ~0U : fui(1.0f);
 
    /* Below are the cases which cannot be moved into tables easily. */
 
@@ -896,4 +900,10 @@ void st_init_extensions(struct pipe_screen *screen,
                                PIPE_VIDEO_CAP_SUPPORTS_INTERLACED)) {
       extensions->NV_vdpau_interop = GL_TRUE;
    }
+
+   if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
+                                PIPE_SHADER_CAP_DOUBLES) &&
+       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+                                PIPE_SHADER_CAP_DOUBLES))
+      extensions->ARB_gpu_shader_fp64 = GL_TRUE;
 }
diff --git a/mesalib/src/mesa/state_tracker/st_format.c b/mesalib/src/mesa/state_tracker/st_format.c
index 6c53567fc..7868bb501 100644
--- a/mesalib/src/mesa/state_tracker/st_format.c
+++ b/mesalib/src/mesa/state_tracker/st_format.c
@@ -443,21 +443,25 @@ st_mesa_format_to_pipe_format(struct st_context *st, mesa_format mesaFormat)
     * The destination formats mustn't be changed, because they are also
     * destination formats of the unpack/decompression function. */
    case MESA_FORMAT_ETC2_RGB8:
-   case MESA_FORMAT_ETC2_RGBA8_EAC:
-   case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
-      return PIPE_FORMAT_R8G8B8A8_UNORM;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_RGB8 : PIPE_FORMAT_R8G8B8A8_UNORM;
    case MESA_FORMAT_ETC2_SRGB8:
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8 : PIPE_FORMAT_B8G8R8A8_SRGB;
+   case MESA_FORMAT_ETC2_RGBA8_EAC:
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_RGBA8 : PIPE_FORMAT_R8G8B8A8_UNORM;
    case MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC:
-   case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
-      return PIPE_FORMAT_B8G8R8A8_SRGB;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGBA8 : PIPE_FORMAT_B8G8R8A8_SRGB;
    case MESA_FORMAT_ETC2_R11_EAC:
-      return PIPE_FORMAT_R16_UNORM;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_R11_UNORM : PIPE_FORMAT_R16_UNORM;
    case MESA_FORMAT_ETC2_RG11_EAC:
-      return PIPE_FORMAT_R16G16_UNORM;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_RG11_UNORM : PIPE_FORMAT_R16G16_UNORM;
    case MESA_FORMAT_ETC2_SIGNED_R11_EAC:
-      return PIPE_FORMAT_R16_SNORM;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_R11_SNORM : PIPE_FORMAT_R16_SNORM;
    case MESA_FORMAT_ETC2_SIGNED_RG11_EAC:
-      return PIPE_FORMAT_R16G16_SNORM;
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_RG11_SNORM : PIPE_FORMAT_R16G16_SNORM;
+   case MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_RGB8A1 : PIPE_FORMAT_R8G8B8A8_UNORM;
+   case MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
+      return st->has_etc2 ? PIPE_FORMAT_ETC2_SRGB8A1 : PIPE_FORMAT_B8G8R8A8_SRGB;
 
    default:
       return PIPE_FORMAT_NONE;
@@ -856,6 +860,27 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
    case PIPE_FORMAT_XRGB8888_SRGB:
       return MESA_FORMAT_X8R8G8B8_SRGB;
 
+   case PIPE_FORMAT_ETC2_RGB8:
+      return MESA_FORMAT_ETC2_RGB8;
+   case PIPE_FORMAT_ETC2_SRGB8:
+      return MESA_FORMAT_ETC2_SRGB8;
+   case PIPE_FORMAT_ETC2_RGB8A1:
+      return MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1;
+   case PIPE_FORMAT_ETC2_SRGB8A1:
+      return MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1;
+   case PIPE_FORMAT_ETC2_RGBA8:
+      return MESA_FORMAT_ETC2_RGBA8_EAC;
+   case PIPE_FORMAT_ETC2_SRGBA8:
+      return MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC;
+   case PIPE_FORMAT_ETC2_R11_UNORM:
+      return MESA_FORMAT_ETC2_R11_EAC;
+   case PIPE_FORMAT_ETC2_R11_SNORM:
+      return MESA_FORMAT_ETC2_SIGNED_R11_EAC;
+   case PIPE_FORMAT_ETC2_RG11_UNORM:
+      return MESA_FORMAT_ETC2_RG11_EAC;
+   case PIPE_FORMAT_ETC2_RG11_SNORM:
+      return MESA_FORMAT_ETC2_SIGNED_RG11_EAC;
+
    default:
       return MESA_FORMAT_NONE;
    }
@@ -896,6 +921,9 @@ test_format_conversion(struct st_context *st)
       if (i == PIPE_FORMAT_ETC1_RGB8 && !st->has_etc1)
          continue;
 
+      if (_mesa_is_format_etc2(mf) && !st->has_etc2)
+         continue;
+
       if (mf != MESA_FORMAT_NONE) {
          enum pipe_format pf = st_mesa_format_to_pipe_format(st, mf);
          assert(pf == i);
@@ -1797,7 +1825,8 @@ st_choose_format(struct st_context *st, GLenum internalFormat,
                  unsigned bindings, boolean allow_dxt)
 {
    struct pipe_screen *screen = st->pipe->screen;
-   int i, j;
+   unsigned i;
+   int j;
    enum pipe_format pf;
 
 #ifdef DEBUG
diff --git a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 89654344b..0b3477161 100644
--- a/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/mesalib/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -229,7 +229,7 @@ public:
    DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
 
    unsigned op;
-   st_dst_reg dst;
+   st_dst_reg dst[2];
    st_src_reg src[4];
    /** Pointer to the ir source this tree came from for debugging */
    ir_instruction *ir;
@@ -262,16 +262,17 @@ public:
 
 class immediate_storage : public exec_node {
 public:
-   immediate_storage(gl_constant_value *values, int size, int type)
+   immediate_storage(gl_constant_value *values, int size32, int type)
    {
-      memcpy(this->values, values, size * sizeof(gl_constant_value));
-      this->size = size;
+      memcpy(this->values, values, size32 * sizeof(gl_constant_value));
+      this->size32 = size32;
       this->type = type;
    }
-   
+
+   /* doubles are stored across 2 gl_constant_values */
    gl_constant_value values[4];
-   int size; /**< Number of components (1-4) */
-   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+   int size32; /**< Number of 32-bit components (1-4) */
+   int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
 };
 
 class function_entry : public exec_node {
@@ -327,14 +328,14 @@ public:
    int num_address_regs;
    int samplers_used;
    bool indirect_addr_consts;
-   
+
    int glsl_version;
    bool native_integers;
    bool have_sqrt;
 
    variable_storage *find_variable_storage(ir_variable *var);
 
-   int add_constant(gl_register_file file, gl_constant_value values[4],
+   int add_constant(gl_register_file file, gl_constant_value values[8],
                     int size, int datatype, GLuint *swizzle_out);
 
    function_entry *get_function_signature(ir_function_signature *sig);
@@ -342,6 +343,7 @@ public:
    st_src_reg get_temp(const glsl_type *type);
    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
 
+   st_src_reg st_src_reg_for_double(double val);
    st_src_reg st_src_reg_for_float(float val);
    st_src_reg st_src_reg_for_int(int val);
    st_src_reg st_src_reg_for_type(int type, int val);
@@ -394,20 +396,29 @@ public:
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
 
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-        		        st_dst_reg dst, st_src_reg src0);
+                                  st_dst_reg dst, st_src_reg src0);
+
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+                                  st_dst_reg dst, st_dst_reg dst1,
+                                  st_src_reg src0);
 
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-        		        st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+                                  st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
-        		        st_dst_reg dst,
-        		        st_src_reg src0, st_src_reg src1, st_src_reg src2);
+                                  st_dst_reg dst,
+                                  st_src_reg src0, st_src_reg src1, st_src_reg src2);
 
    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
                                   st_dst_reg dst,
                                   st_src_reg src0, st_src_reg src1,
                                   st_src_reg src2, st_src_reg src3);
 
+   glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
+                                  st_dst_reg dst, st_dst_reg dst1,
+                                  st_src_reg src0, st_src_reg src1,
+                                  st_src_reg src2, st_src_reg src3);
+
    unsigned get_opcode(ir_instruction *ir, unsigned op,
                     st_dst_reg dst,
                     st_src_reg src0, st_src_reg src1);
@@ -422,15 +433,15 @@ public:
                                      unsigned elements);
 
    void emit_scalar(ir_instruction *ir, unsigned op,
-        	    st_dst_reg dst, st_src_reg src0);
+                    st_dst_reg dst, st_src_reg src0);
 
    void emit_scalar(ir_instruction *ir, unsigned op,
-        	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
+                    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
 
    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
 
    void emit_scs(ir_instruction *ir, unsigned op,
-        	 st_dst_reg dst, const st_src_reg &src);
+                 st_dst_reg dst, const st_src_reg &src);
 
    bool try_emit_mad(ir_expression *ir,
               int mul_operand);
@@ -451,11 +462,14 @@ public:
 
    void copy_propagate(void);
    int eliminate_dead_code(void);
+
+   void merge_two_dsts(void);
    void merge_registers(void);
    void renumber_registers(void);
 
    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
-                       st_dst_reg *l, st_src_reg *r);
+                       st_dst_reg *l, st_src_reg *r,
+                       st_src_reg *cond, bool cond_swap);
 
    void *mem_ctx;
 };
@@ -485,7 +499,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...)
 static int
 swizzle_for_size(int size)
 {
-   int size_swizzles[4] = {
+   static const int size_swizzles[4] = {
       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
@@ -519,13 +533,13 @@ num_inst_src_regs(unsigned opcode)
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-                           st_dst_reg dst,
+                           st_dst_reg dst, st_dst_reg dst1,
                            st_src_reg src0, st_src_reg src1,
                            st_src_reg src2, st_src_reg src3)
 {
    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
-   int num_reladdr = 0, i;
-   
+   int num_reladdr = 0, i, j;
+
    op = get_opcode(ir, op, dst, src0, src1);
 
    /* If we have to do relative addressing, we want to load the ARL
@@ -533,6 +547,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
     * sources into temps.
     */
    num_reladdr += dst.reladdr != NULL;
+   num_reladdr += dst1.reladdr != NULL;
    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
@@ -547,10 +562,15 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
       emit_arl(ir, address_reg, *dst.reladdr);
       num_reladdr--;
    }
+   if (dst1.reladdr) {
+      emit_arl(ir, address_reg, *dst1.reladdr);
+      num_reladdr--;
+   }
    assert(num_reladdr == 0);
 
    inst->op = op;
-   inst->dst = dst;
+   inst->dst[0] = dst;
+   inst->dst[1] = dst1;
    inst->src[0] = src0;
    inst->src[1] = src1;
    inst->src[2] = src2;
@@ -559,7 +579,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
    inst->dead_mask = 0;
 
    inst->function = NULL;
-   
+
    /* Update indirect addressing status used by TGSI */
    if (dst.reladdr) {
       switch(dst.file) {
@@ -576,7 +596,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
       }
    }
    else {
-      for (i=0; i<4; i++) {
+      for (i = 0; i < 4; i++) {
          if(inst->src[i].reladdr) {
             switch(inst->src[i].file) {
             case PROGRAM_STATE_VAR:
@@ -596,46 +616,162 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
 
    this->instructions.push_tail(inst);
 
+   /*
+    * This section contains the double processing.
+    * GLSL just represents doubles as single channel values,
+    * however most HW and TGSI represent doubles as pairs of register channels.
+    *
+    * so we have to fixup destination writemask/index and src swizzle/indexes.
+    * dest writemasks need to translate from single channel write mask
+    * to a dual-channel writemask, but also need to modify the index,
+    * if we are touching the Z,W fields in the pre-translated writemask.
+    *
+    * src channels have similiar index modifications along with swizzle
+    * changes to we pick the XY, ZW pairs from the correct index.
+    *
+    * GLSL [0].x -> TGSI [0].xy
+    * GLSL [0].y -> TGSI [0].zw
+    * GLSL [0].z -> TGSI [1].xy
+    * GLSL [0].w -> TGSI [1].zw
+    */
+   if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == GLSL_TYPE_DOUBLE ||
+       inst->src[0].type == GLSL_TYPE_DOUBLE) {
+      glsl_to_tgsi_instruction *dinst = NULL;
+      int initial_src_swz[4], initial_src_idx[4];
+      int initial_dst_idx[2], initial_dst_writemask[2];
+      /* select the writemask for dst0 or dst1 */
+      unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
+
+      /* copy out the writemask, index and swizzles for all src/dsts. */
+      for (j = 0; j < 2; j++) {
+         initial_dst_writemask[j] = inst->dst[j].writemask;
+         initial_dst_idx[j] = inst->dst[j].index;
+      }
+
+      for (j = 0; j < 4; j++) {
+         initial_src_swz[j] = inst->src[j].swizzle;
+         initial_src_idx[j] = inst->src[j].index;
+      }
+
+      /*
+       * scan all the components in the dst writemask
+       * generate an instruction for each of them if required.
+       */
+      while (writemask) {
+
+         int i = u_bit_scan(&writemask);
+
+         /* first time use previous instruction */
+         if (dinst == NULL) {
+            dinst = inst;
+         } else {
+            /* create a new instructions for subsequent attempts */
+            dinst = new(mem_ctx) glsl_to_tgsi_instruction();
+            *dinst = *inst;
+            dinst->next = NULL;
+            dinst->prev = NULL;
+            this->instructions.push_tail(dinst);
+         }
+
+         /* modify the destination if we are splitting */
+         for (j = 0; j < 2; j++) {
+            if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
+               dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
+               dinst->dst[j].index = initial_dst_idx[j];
+               if (i > 1)
+                     dinst->dst[j].index++;
+            } else {
+               /* if we aren't writing to a double, just get the bit of the initial writemask
+                  for this channel */
+               dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
+            }
+         }
+
+         /* modify the src registers */
+         for (j = 0; j < 4; j++) {
+            int swz = GET_SWZ(initial_src_swz[j], i);
+
+            if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
+               dinst->src[j].index = initial_src_idx[j];
+               if (swz > 1)
+                  dinst->src[j].index++;
+
+               if (swz & 1)
+                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+               else
+                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+
+            } else {
+               /* some opcodes are special case in what they use as sources
+                  - F2D is a float src0, DLDEXP is integer src1 */
+               if (op == TGSI_OPCODE_F2D ||
+                   op == TGSI_OPCODE_DLDEXP ||
+                   (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) {
+                  dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
+               }
+            }
+         }
+      }
+      inst = dinst;
+   }
+
+
    return inst;
 }
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+                           st_dst_reg dst,
+                           st_src_reg src0, st_src_reg src1,
+                           st_src_reg src2, st_src_reg src3)
+{
+   return emit(ir, op, dst, undef_dst, src0, src1, src2, src3);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
                            st_dst_reg dst, st_src_reg src0,
                            st_src_reg src1, st_src_reg src2)
 {
-   return emit(ir, op, dst, src0, src1, src2, undef_src);
+   return emit(ir, op, dst, undef_dst, src0, src1, src2, undef_src);
 }
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-        		 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
+                           st_dst_reg dst, st_src_reg src0, st_src_reg src1)
 {
-   return emit(ir, op, dst, src0, src1, undef_src, undef_src);
+   return emit(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src);
 }
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
-        		 st_dst_reg dst, st_src_reg src0)
+                           st_dst_reg dst, st_src_reg src0)
 {
    assert(dst.writemask != 0);
-   return emit(ir, op, dst, src0, undef_src, undef_src, undef_src);
+   return emit(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src);
+}
+
+glsl_to_tgsi_instruction *
+glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
+                           st_dst_reg dst, st_dst_reg dst1, st_src_reg src0)
+{
+   return emit(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src);
 }
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
 {
-   return emit(ir, op, undef_dst, undef_src, undef_src, undef_src, undef_src);
+   return emit(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src);
 }
 
 /**
- * Determines whether to use an integer, unsigned integer, or float opcode 
+ * Determines whether to use an integer, unsigned integer, or float opcode
  * based on the operands and input opcode, then emits the result.
  */
 unsigned
 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
-        		 st_dst_reg dst,
-        		 st_src_reg src0, st_src_reg src1)
+                                 st_dst_reg dst,
+                                 st_src_reg src0, st_src_reg src1)
 {
    int type = GLSL_TYPE_FLOAT;
 
@@ -647,12 +783,26 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
    assert(src1.type != GLSL_TYPE_ARRAY);
    assert(src1.type != GLSL_TYPE_STRUCT);
 
-   if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
+   if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
+      type = GLSL_TYPE_DOUBLE;
+   else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
       type = GLSL_TYPE_FLOAT;
    else if (native_integers)
       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
 
-#define case4(c, f, i, u) \
+#define case5(c, f, i, u, d)                    \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_DOUBLE)           \
+         op = TGSI_OPCODE_##d; \
+      else if (type == GLSL_TYPE_INT)       \
+         op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) \
+         op = TGSI_OPCODE_##u; \
+      else \
+         op = TGSI_OPCODE_##f; \
+      break;
+
+#define case4(c, f, i, u)                    \
    case TGSI_OPCODE_##c: \
       if (type == GLSL_TYPE_INT) \
          op = TGSI_OPCODE_##i; \
@@ -663,12 +813,16 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
       break;
 
 #define case3(f, i, u)  case4(f, f, i, u)
+#define case4d(f, i, u, d)  case5(f, f, i, u, d)
+#define case3fid(f, i, d) case5(f, f, i, i, d)
 #define case2fi(f, i)   case4(f, f, i, i)
 #define case2iu(i, u)   case4(i, LAST, i, u)
 
-#define casecomp(c, f, i, u) \
+#define casecomp(c, f, i, u, d)                   \
    case TGSI_OPCODE_##c: \
-      if (type == GLSL_TYPE_INT) \
+      if (type == GLSL_TYPE_DOUBLE) \
+         op = TGSI_OPCODE_##d; \
+      else if (type == GLSL_TYPE_INT)       \
          op = TGSI_OPCODE_##i; \
       else if (type == GLSL_TYPE_UINT) \
          op = TGSI_OPCODE_##u; \
@@ -679,38 +833,50 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
       break;
 
    switch(op) {
-      case2fi(ADD, UADD);
-      case2fi(MUL, UMUL);
-      case2fi(MAD, UMAD);
+      case3fid(ADD, UADD, DADD);
+      case3fid(MUL, UMUL, DMUL);
+      case3fid(MAD, UMAD, DMAD);
       case3(DIV, IDIV, UDIV);
-      case3(MAX, IMAX, UMAX);
-      case3(MIN, IMIN, UMIN);
+      case4d(MAX, IMAX, UMAX, DMAX);
+      case4d(MIN, IMIN, UMIN, DMIN);
       case2iu(MOD, UMOD);
 
-      casecomp(SEQ, FSEQ, USEQ, USEQ);
-      casecomp(SNE, FSNE, USNE, USNE);
-      casecomp(SGE, FSGE, ISGE, USGE);
-      casecomp(SLT, FSLT, ISLT, USLT);
+      casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
+      casecomp(SNE, FSNE, USNE, USNE, DSNE);
+      casecomp(SGE, FSGE, ISGE, USGE, DSGE);
+      casecomp(SLT, FSLT, ISLT, USLT, DSLT);
 
       case2iu(ISHR, USHR);
 
-      case2fi(SSG, ISSG);
-      case3(ABS, IABS, IABS);
+      case3fid(SSG, ISSG, DSSG);
+      case3fid(ABS, IABS, DABS);
 
       case2iu(IBFE, UBFE);
       case2iu(IMSB, UMSB);
       case2iu(IMUL_HI, UMUL_HI);
+
+      case3fid(SQRT, SQRT, DSQRT);
+
+      case3fid(RCP, RCP, DRCP);
+      case3fid(RSQ, RSQ, DRSQ);
+
+      case3fid(FRC, FRC, DFRAC);
+      case3fid(TRUNC, TRUNC, DTRUNC);
+      case3fid(CEIL, CEIL, DCEIL);
+      case3fid(FLR, FLR, DFLR);
+      case3fid(ROUND, ROUND, DROUND);
+
       default: break;
    }
-   
+
    assert(op != TGSI_OPCODE_LAST);
    return op;
 }
 
 glsl_to_tgsi_instruction *
 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
-        		    st_dst_reg dst, st_src_reg src0, st_src_reg src1,
-        		    unsigned elements)
+                              st_dst_reg dst, st_src_reg src0, st_src_reg src1,
+                              unsigned elements)
 {
    static const unsigned dot_opcodes[] = {
       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
@@ -729,8 +895,8 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
  */
 void
 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
-        		        st_dst_reg dst,
-        			st_src_reg orig_src0, st_src_reg orig_src1)
+                                  st_dst_reg dst,
+                                  st_src_reg orig_src0, st_src_reg orig_src1)
 {
    int i, j;
    int done_mask = ~dst.writemask;
@@ -741,7 +907,6 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
     */
    for (i = 0; i < 4; i++) {
       GLuint this_mask = (1 << i);
-      glsl_to_tgsi_instruction *inst;
       st_src_reg src0 = orig_src0;
       st_src_reg src1 = orig_src1;
 
@@ -762,19 +927,19 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
          }
       }
       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
-        			   src0_swiz, src0_swiz);
+                                   src0_swiz, src0_swiz);
       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
-        			  src1_swiz, src1_swiz);
+                                   src1_swiz, src1_swiz);
 
-      inst = emit(ir, op, dst, src0, src1);
-      inst->dst.writemask = this_mask;
+      dst.writemask = this_mask;
+      emit(ir, op, dst, src0, src1);
       done_mask |= this_mask;
    }
 }
 
 void
 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
-        		        st_dst_reg dst, st_src_reg src0)
+                                  st_dst_reg dst, st_src_reg src0)
 {
    st_src_reg undef = undef_src;
 
@@ -785,7 +950,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
 
 void
 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
-        		        st_dst_reg dst, st_src_reg src0)
+                               st_dst_reg dst, st_src_reg src0)
 {
    int op = TGSI_OPCODE_ARL;
 
@@ -803,20 +968,20 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
  * Emit an TGSI_OPCODE_SCS instruction
  *
  * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
- * Instead of splatting its result across all four components of the 
- * destination, it writes one value to the \c x component and another value to 
+ * Instead of splatting its result across all four components of the
+ * destination, it writes one value to the \c x component and another value to
  * the \c y component.
  *
  * \param ir        IR instruction being processed
- * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 
+ * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
  *                  on which value is desired.
  * \param dst       Destination register
  * \param src       Source register
  */
 void
 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
-        		     st_dst_reg dst,
-        		     const st_src_reg &src)
+                               st_dst_reg dst,
+                               const st_src_reg &src)
 {
    /* Vertex programs cannot use the SCS opcode.
     */
@@ -855,7 +1020,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
 
       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
-        			   src0_swiz, src0_swiz);
+                                   src0_swiz, src0_swiz);
       for (unsigned j = i + 1; j < 4; j++) {
          /* If there is another enabled component in the destination that is
           * derived from the same inputs, generate its value on this pass as
@@ -874,20 +1039,20 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
          /* Emit the SCS instruction.
           */
          inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
-         inst->dst.writemask = scs_mask;
+         inst->dst[0].writemask = scs_mask;
 
          /* Move the result of the SCS instruction to the desired location in
           * the destination.
           */
          tmp.swizzle = MAKE_SWIZZLE4(component, component,
-        			     component, component);
+                                     component, component);
          inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
-         inst->dst.writemask = this_mask;
+         inst->dst[0].writemask = this_mask;
       } else {
          /* Emit the SCS instruction to write directly to the destination.
           */
          glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
-         inst->dst.writemask = scs_mask;
+         inst->dst[0].writemask = scs_mask;
       }
 
       done_mask |= this_mask;
@@ -896,35 +1061,54 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
 
 int
 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
-        		     gl_constant_value values[4], int size, int datatype,
-        		     GLuint *swizzle_out)
+                                   gl_constant_value values[8], int size, int datatype,
+                                   GLuint *swizzle_out)
 {
    if (file == PROGRAM_CONSTANT) {
       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
                                               size, datatype, swizzle_out);
-   } else {
-      int index = 0;
-      immediate_storage *entry;
-      assert(file == PROGRAM_IMMEDIATE);
+   }
 
-      /* Search immediate storage to see if we already have an identical
-       * immediate that we can use instead of adding a duplicate entry.
-       */
-      foreach_in_list(immediate_storage, entry, &this->immediates) {
-         if (entry->size == size &&
-             entry->type == datatype &&
-             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
-             return index;
-         }
-         index++;
+   assert(file == PROGRAM_IMMEDIATE);
+
+   int index = 0;
+   immediate_storage *entry;
+   int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+   int i;
+
+   /* Search immediate storage to see if we already have an identical
+    * immediate that we can use instead of adding a duplicate entry.
+    */
+   foreach_in_list(immediate_storage, entry, &this->immediates) {
+      immediate_storage *tmp = entry;
+
+      for (i = 0; i * 4 < size32; i++) {
+         int slot_size = MIN2(size32 - (i * 4), 4);
+         if (tmp->type != datatype || tmp->size32 != slot_size)
+            break;
+         if (memcmp(tmp->values, &values[i * 4],
+                    slot_size * sizeof(gl_constant_value)))
+            break;
+
+         /* Everything matches, keep going until the full size is matched */
+         tmp = (immediate_storage *)tmp->next;
       }
-      
+
+      /* The full value matched */
+      if (i * 4 >= size32)
+         return index;
+
+      index++;
+   }
+
+   for (i = 0; i * 4 < size32; i++) {
+      int slot_size = MIN2(size32 - (i * 4), 4);
       /* Add this immediate to the list. */
-      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
       this->immediates.push_tail(entry);
       this->num_immediates++;
-      return index;
    }
+   return index;
 }
 
 st_src_reg
@@ -940,11 +1124,24 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
 }
 
 st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
+   union gl_constant_value uval[2];
+
+   uval[0].u = *(uint32_t *)&val;
+   uval[1].u = *(((uint32_t *)&val) + 1);
+   src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
+
+   return src;
+}
+
+st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
 {
    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
    union gl_constant_value uval;
-   
+
    assert(native_integers);
 
    uval.i = val;
@@ -957,7 +1154,7 @@ st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
 {
    if (native_integers)
-      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
+      return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
                                        st_src_reg_for_int(val);
    else
       return st_src_reg_for_float(val);
@@ -984,6 +1181,23 @@ type_size(const struct glsl_type *type)
           */
          return 1;
       }
+      break;
+   case GLSL_TYPE_DOUBLE:
+      if (type->is_matrix()) {
+         if (type->vector_elements <= 2)
+            return type->matrix_columns;
+         else
+            return type->matrix_columns * 2;
+      } else {
+         /* For doubles if we have a double or dvec2 they fit in one
+          * vec4, else they need 2 vec4s.
+          */
+         if (type->vector_elements <= 2)
+            return 1;
+         else
+            return 2;
+      }
+      break;
    case GLSL_TYPE_ARRAY:
       assert(type->length > 0);
       return type_size(type->fields.array) * type->length;
@@ -1049,7 +1263,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
 variable_storage *
 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
 {
-   
+
    foreach_in_list(variable_storage, entry, &this->variables) {
       if (entry->var == var)
          return entry;
@@ -1110,7 +1324,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
 
       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
          int index = _mesa_add_state_reference(this->prog->Parameters,
-        				       (gl_state_index *)slots[i].tokens);
+                                               (gl_state_index *)slots[i].tokens);
 
          if (storage->file == PROGRAM_STATE_VAR) {
             if (storage->index == -1) {
@@ -1119,11 +1333,11 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
                assert(index == storage->index + (int)i);
             }
          } else {
-         	/* We use GLSL_TYPE_FLOAT here regardless of the actual type of
-         	 * the data being moved since MOV does not care about the type of
-         	 * data it is moving, and we don't want to declare registers with
-         	 * array or struct types.
-         	 */
+            /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
+             * the data being moved since MOV does not care about the type of
+             * data it is moving, and we don't want to declare registers with
+             * array or struct types.
+             */
             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
             src.swizzle = slots[i].swizzle;
             emit(ir, TGSI_OPCODE_MOV, dst, src);
@@ -1135,9 +1349,9 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
       if (storage->file == PROGRAM_TEMPORARY &&
           dst.index != storage->index + (int) ir->get_num_state_slots()) {
          fail_link(this->shader_program,
-        	   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
-        	   ir->name, dst.index - storage->index,
-        	   type_size(ir->type));
+                  "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
+                  ir->name, dst.index - storage->index,
+                  type_size(ir->type));
       }
    }
 }
@@ -1261,7 +1475,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan
 
 void
 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
-        			    st_src_reg *reg, int *num_reladdr)
+                                      st_src_reg *reg, int *num_reladdr)
 {
    if (!reg->reladdr && !reg->reladdr2)
       return;
@@ -1300,9 +1514,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
     */
    if (!native_integers && ir->operation == ir_binop_logic_and) {
       if (try_emit_mad_for_and_not(ir, 1))
-	 return;
+         return;
       if (try_emit_mad_for_and_not(ir, 0))
-	 return;
+         return;
    }
 
    if (ir->operation == ir_quadop_vector)
@@ -1328,7 +1542,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    int vector_elements = ir->operands[0]->type->vector_elements;
    if (ir->operands[1]) {
       vector_elements = MAX2(vector_elements,
-        		     ir->operands[1]->type->vector_elements);
+                             ir->operands[1]->type->vector_elements);
    }
 
    this->result.file = PROGRAM_UNDEFINED;
@@ -1362,6 +1576,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_neg:
       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else if (result_dst.type == GLSL_TYPE_DOUBLE)
+         emit(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
       else {
          op[0].negate = ~op[0].negate;
          result_src = op[0];
@@ -1441,6 +1657,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       break;
    }
 
+   case ir_unop_frexp_sig:
+      emit(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
+      break;
+
+   case ir_unop_frexp_exp:
+      emit(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
+      break;
+
    case ir_unop_noise: {
       /* At some point, a motivated person could add a better
        * implementation of noise.  Currently not even the nvidia
@@ -1463,7 +1687,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
       break;
    case ir_binop_div:
-      if (result_dst.type == GLSL_TYPE_FLOAT)
+      if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
       else
          emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
@@ -1498,15 +1722,15 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(native_integers ?
-               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
-               glsl_type::vec4_type);
-         
+                                    glsl_type::uvec4_type :
+                                    glsl_type::vec4_type);
+
          if (native_integers) {
             st_dst_reg temp_dst = st_dst_reg(temp);
             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
-            
+
             emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
-            
+
             /* Emit 1-3 AND operations to combine the SEQ results. */
             switch (ir->operands[0]->type->vector_elements) {
             case 2:
@@ -1527,13 +1751,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                temp2.swizzle = SWIZZLE_WWWW;
                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
             }
-            
+
             temp1.swizzle = SWIZZLE_XXXX;
             temp2.swizzle = SWIZZLE_YYYY;
             emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
          } else {
             emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-            
+
             /* After the dot-product, the value will be an integer on the
              * range [0,4].  Zero becomes 1.0, and positive values become zero.
              */
@@ -1556,14 +1780,14 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       if (ir->operands[0]->type->is_vector() ||
           ir->operands[1]->type->is_vector()) {
          st_src_reg temp = get_temp(native_integers ?
-               glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
-               glsl_type::vec4_type);
+                                    glsl_type::uvec4_type :
+                                    glsl_type::vec4_type);
          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
 
          if (native_integers) {
             st_dst_reg temp_dst = st_dst_reg(temp);
             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
-            
+
             /* Emit 1-3 OR operations to combine the SNE results. */
             switch (ir->operands[0]->type->vector_elements) {
             case 2:
@@ -1584,7 +1808,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
                temp2.swizzle = SWIZZLE_WWWW;
                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
             }
-            
+
             temp1.swizzle = SWIZZLE_XXXX;
             temp2.swizzle = SWIZZLE_YYYY;
             emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
@@ -1706,7 +1930,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
 
    case ir_binop_logic_or: {
       if (native_integers) {
-         /* If integers are used as booleans, we can use an actual "or" 
+         /* If integers are used as booleans, we can use an actual "or"
           * instruction.
           */
          assert(native_integers);
@@ -1756,8 +1980,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_sqrt:
       if (have_sqrt) {
          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
-      }
-      else {
+      } else {
          /* sqrt(x) = x * rsq(x). */
          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
          emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
@@ -1795,7 +2018,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           */
          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
       } else {
-         /* Booleans and integers are both stored as floats when native 
+         /* Booleans and integers are both stored as floats when native
           * integers are disabled.
           */
          result_src = op[0];
@@ -1829,6 +2052,9 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_f2b:
       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
       break;
+   case ir_unop_d2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
+      break;
    case ir_unop_i2b:
       if (native_integers)
          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
@@ -1908,12 +2134,12 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       st_src_reg index_reg = get_temp(glsl_type::uint_type);
       st_src_reg cbuf;
 
-      cbuf.type = glsl_type::vec4_type->base_type;
+      cbuf.type = ir->type->base_type;
       cbuf.file = PROGRAM_CONSTANT;
       cbuf.index = 0;
       cbuf.reladdr = NULL;
       cbuf.negate = 0;
-      
+
       assert(ir->type->is_vector() || ir->type->is_scalar());
 
       if (const_offset_ir) {
@@ -1944,10 +2170,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
       }
 
       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
-      cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
-                                    const_offset % 16 / 4,
-                                    const_offset % 16 / 4,
-                                    const_offset % 16 / 4);
+      if (cbuf.type == GLSL_TYPE_DOUBLE)
+         cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
+                                       const_offset % 16 / 8,
+                                       const_offset % 16 / 8,
+                                       const_offset % 16 / 8);
+      else
+         cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
+                                       const_offset % 16 / 4,
+                                       const_offset % 16 / 4,
+                                       const_offset % 16 / 4);
 
       if (ir->type->base_type == GLSL_TYPE_BOOL) {
          emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
@@ -2004,11 +2236,44 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_binop_interpolate_at_sample:
       emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
       break;
+
+   case ir_unop_d2f:
+      emit(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
+      break;
+   case ir_unop_f2d:
+      emit(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
+      break;
+   case ir_unop_d2i:
+      emit(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
+      break;
+   case ir_unop_i2d:
+      emit(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
+      break;
+   case ir_unop_d2u:
+      emit(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
+      break;
+   case ir_unop_u2d:
+      emit(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
+      break;
+   case ir_unop_unpack_double_2x32:
+   case ir_unop_pack_double_2x32:
+      emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
+      break;
+
+   case ir_binop_ldexp:
+      if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
+         emit(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
+      } else {
+         assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
+      }
+      break;
+
    case ir_unop_pack_snorm_2x16:
    case ir_unop_pack_unorm_2x16:
    case ir_unop_pack_half_2x16:
    case ir_unop_pack_snorm_4x8:
    case ir_unop_pack_unorm_4x8:
+
    case ir_unop_unpack_snorm_2x16:
    case ir_unop_unpack_unorm_2x16:
    case ir_unop_unpack_half_2x16:
@@ -2016,13 +2281,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
    case ir_unop_unpack_half_2x16_split_y:
    case ir_unop_unpack_snorm_4x8:
    case ir_unop_unpack_unorm_4x8:
+
    case ir_binop_pack_half_2x16_split:
    case ir_binop_bfm:
    case ir_triop_bfi:
    case ir_quadop_vector:
    case ir_binop_vector_extract:
    case ir_triop_vector_insert:
-   case ir_binop_ldexp:
    case ir_binop_carry:
    case ir_binop_borrow:
       /* This operation is not supported, or should have already been handled.
@@ -2050,6 +2315,7 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
    ir->val->accept(this);
    src = this->result;
    assert(src.file != PROGRAM_UNDEFINED);
+   assert(ir->type->vector_elements > 0);
 
    for (i = 0; i < 4; i++) {
       if (i < ir->type->vector_elements) {
@@ -2090,7 +2356,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
       switch (var->data.mode) {
       case ir_var_uniform:
          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
-        				       var->data.location);
+                                               var->data.location);
          this->variables.push_tail(entry);
          break;
       case ir_var_shader_in:
@@ -2388,18 +2654,20 @@ glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
 
 void
 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
-                                     st_dst_reg *l, st_src_reg *r)
+                                     st_dst_reg *l, st_src_reg *r,
+                                     st_src_reg *cond, bool cond_swap)
 {
    if (type->base_type == GLSL_TYPE_STRUCT) {
       for (unsigned int i = 0; i < type->length; i++) {
-         emit_block_mov(ir, type->fields.structure[i].type, l, r);
+         emit_block_mov(ir, type->fields.structure[i].type, l, r,
+                        cond, cond_swap);
       }
       return;
    }
 
    if (type->is_array()) {
       for (unsigned int i = 0; i < type->length; i++) {
-         emit_block_mov(ir, type->fields.array, l, r);
+         emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
       }
       return;
    }
@@ -2411,7 +2679,7 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
                                          type->vector_elements, 1);
 
       for (int i = 0; i < type->matrix_columns; i++) {
-         emit_block_mov(ir, vec_type, l, r);
+         emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
       }
       return;
    }
@@ -2419,7 +2687,22 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
    assert(type->is_scalar() || type->is_vector());
 
    r->type = type->base_type;
-   emit(ir, TGSI_OPCODE_MOV, *l, *r);
+   if (cond) {
+      st_src_reg l_src = st_src_reg(*l);
+      l_src.swizzle = swizzle_for_size(type->vector_elements);
+
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_UCMP, *l, *cond,
+              cond_swap ? l_src : *r,
+              cond_swap ? *r : l_src);
+      } else {
+         emit(ir, TGSI_OPCODE_CMP, *l, *cond,
+              cond_swap ? l_src : *r,
+              cond_swap ? *r : l_src);
+      }
+   } else {
+      emit(ir, TGSI_OPCODE_MOV, *l, *r);
+   }
    l->index++;
    r->index++;
 }
@@ -2429,7 +2712,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
 {
    st_dst_reg l;
    st_src_reg r;
-   int i;
 
    ir->rhs->accept(this);
    r = this->result;
@@ -2486,35 +2768,13 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       const bool switch_order = this->process_move_condition(ir->condition);
       st_src_reg condition = this->result;
 
-      for (i = 0; i < type_size(ir->lhs->type); i++) {
-         st_src_reg l_src = st_src_reg(l);
-         st_src_reg condition_temp = condition;
-         st_src_reg op1, op2;
-         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
-
-         op1 = r;
-         op2 = l_src;
-         if (switch_order) {
-            op1 = l_src;
-            op2 = r;
-         }
-
-         if (native_integers) {
-            emit(ir, TGSI_OPCODE_UCMP, l, condition_temp, op1, op2);
-         }
-         else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, op1, op2);
-         }
-
-         l.index++;
-         r.index++;
-      }
+      emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
    } else if (ir->rhs->as_expression() &&
               this->instructions.get_tail() &&
               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
               type_size(ir->lhs->type) == 1 &&
-              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
-      /* To avoid emitting an extra MOV when assigning an expression to a 
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
+      /* To avoid emitting an extra MOV when assigning an expression to a
        * variable, emit the last instruction of the expression again, but
        * replace the destination register with the target of the assignment.
        * Dead code elimination will remove the original instruction.
@@ -2523,9 +2783,9 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
       new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
       new_inst->saturate = inst->saturate;
-      inst->dead_mask = inst->dst.writemask;
+      inst->dead_mask = inst->dst[0].writemask;
    } else {
-      emit_block_mov(ir, ir->rhs->type, &l, &r);
+      emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
    }
 }
 
@@ -2534,7 +2794,7 @@ void
 glsl_to_tgsi_visitor::visit(ir_constant *ir)
 {
    st_src_reg src;
-   GLfloat stack_vals[4] = { 0 };
+   GLdouble stack_vals[4] = { 0 };
    gl_constant_value *values = (gl_constant_value *) stack_vals;
    GLenum gl_type = GL_NONE;
    unsigned int i;
@@ -2622,6 +2882,13 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
          values[i].f = ir->value.f[i];
       }
       break;
+   case GLSL_TYPE_DOUBLE:
+      gl_type = GL_DOUBLE;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         values[i * 2].i = *(uint32_t *)&ir->value.d[i];
+         values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
+      }
+      break;
    case GLSL_TYPE_UINT:
       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
       for (i = 0; i < ir->type->vector_elements; i++) {
@@ -2816,7 +3083,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
-      opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; 
+      opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
       if (ir->offset) {
          ir->offset->accept(this);
          offset[0] = this->result;
@@ -2936,8 +3203,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
             tmp_src = get_temp(glsl_type::vec4_type);
             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
 
-	    /* Projective division not allowed for array samplers. */
-	    assert(!sampler_type->sampler_array);
+            /* Projective division not allowed for array samplers. */
+            assert(!sampler_type->sampler_array);
 
             tmp_dst.writemask = WRITEMASK_Z;
             emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
@@ -2979,7 +3246,6 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
          } else {
             coord_dst.writemask = WRITEMASK_Z;
          }
-         
          emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
          coord_dst.writemask = WRITEMASK_XYZW;
       }
@@ -3229,7 +3495,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
          }
       }
    }
-   
    prog->SamplersUsed = v->samplers_used;
 
    if (v->shader_program != NULL)
@@ -3268,9 +3533,9 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
  * Here is why this conversion is safe:
  * CMP T0, T1 T2 T0 can be expanded to:
  * if (T1 < 0.0)
- * 	MOV T0, T2;
+ *   MOV T0, T2;
  * else
- * 	MOV T0, T0;
+ *   MOV T0, T0;
  *
  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
  * as the original program.  If (T1 < 0.0) evaluates to false, executing
@@ -3292,7 +3557,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
       unsigned prevWriteMask = 0;
 
       /* Give up if we encounter relative addressing or flow control. */
-      if (inst->dst.reladdr ||
+      if (inst->dst[0].reladdr ||
+          inst->dst[1].reladdr ||
           tgsi_get_opcode_info(inst->op)->is_branch ||
           inst->op == TGSI_OPCODE_BGNSUB ||
           inst->op == TGSI_OPCODE_CONT ||
@@ -3302,12 +3568,12 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
          break;
       }
 
-      if (inst->dst.file == PROGRAM_OUTPUT) {
-         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
-         prevWriteMask = outputWrites[inst->dst.index];
-         outputWrites[inst->dst.index] |= inst->dst.writemask;
-      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         if (inst->dst.index >= tempWritesSize) {
+      if (inst->dst[0].file == PROGRAM_OUTPUT) {
+         assert(inst->dst[0].index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst[0].index];
+         outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
+      } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
+         if (inst->dst[0].index >= tempWritesSize) {
             const int inc = 4096;
 
             tempWrites = (unsigned*)
@@ -3320,18 +3586,18 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
             tempWritesSize += inc;
          }
 
-         prevWriteMask = tempWrites[inst->dst.index];
-         tempWrites[inst->dst.index] |= inst->dst.writemask;
+         prevWriteMask = tempWrites[inst->dst[0].index];
+         tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
       } else
          continue;
 
       /* For a CMP to be considered a conditional write, the destination
        * register and source register two must be the same. */
       if (inst->op == TGSI_OPCODE_CMP
-          && !(inst->dst.writemask & prevWriteMask)
-          && inst->src[2].file == inst->dst.file
-          && inst->src[2].index == inst->dst.index
-          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+          && !(inst->dst[0].writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst[0].file
+          && inst->src[2].index == inst->dst[0].index
+          && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
 
          inst->op = TGSI_OPCODE_MOV;
          inst->src[0] = inst->src[1];
@@ -3347,23 +3613,25 @@ glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
 {
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
       unsigned j;
-      
-      for (j=0; j < num_inst_src_regs(inst->op); j++) {
-         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+
+      for (j = 0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY &&
              inst->src[j].index == index) {
             inst->src[j].index = new_index;
          }
       }
 
-      for (j=0; j < inst->tex_offset_num_offset; j++) {
-         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && 
+      for (j = 0; j < inst->tex_offset_num_offset; j++) {
+         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
              inst->tex_offsets[j].index == index) {
             inst->tex_offsets[j].index = new_index;
          }
       }
-      
-      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
-         inst->dst.index = new_index;
+
+      for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
+         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
+            inst->dst[j].index = new_index;
+         }
       }
    }
 }
@@ -3374,21 +3642,20 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
    int depth = 0; /* loop depth */
    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
    unsigned i = 0, j;
-   
+
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      for (j=0; j < num_inst_src_regs(inst->op); j++) {
-         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+      for (j = 0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY &&
              inst->src[j].index == index) {
             return (depth == 0) ? i : loop_start;
          }
       }
-      for (j=0; j < inst->tex_offset_num_offset; j++) {
-         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY && 
+      for (j = 0; j < inst->tex_offset_num_offset; j++) {
+         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
              inst->tex_offsets[j].index == index) {
             return (depth == 0) ? i : loop_start;
          }
       }
-      
       if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
@@ -3397,10 +3664,8 @@ glsl_to_tgsi_visitor::get_first_temp_read(int index)
             loop_start = -1;
       }
       assert(depth >= 0);
-      
       i++;
    }
-   
    return -1;
 }
 
@@ -3410,12 +3675,14 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index)
    int depth = 0; /* loop depth */
    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
    int i = 0;
-   
+   unsigned j;
+
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
-         return (depth == 0) ? i : loop_start;
+      for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
+         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
+            return (depth == 0) ? i : loop_start;
+         }
       }
-      
       if (inst->op == TGSI_OPCODE_BGNLOOP) {
          if(depth++ == 0)
             loop_start = i;
@@ -3424,10 +3691,8 @@ glsl_to_tgsi_visitor::get_first_temp_write(int index)
             loop_start = -1;
       }
       assert(depth >= 0);
-      
       i++;
    }
-   
    return -1;
 }
 
@@ -3437,30 +3702,27 @@ glsl_to_tgsi_visitor::get_last_temp_read(int index)
    int depth = 0; /* loop depth */
    int last = -1; /* index of last instruction that reads the temporary */
    unsigned i = 0, j;
-   
+
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      for (j=0; j < num_inst_src_regs(inst->op); j++) {
-         if (inst->src[j].file == PROGRAM_TEMPORARY && 
+      for (j = 0; j < num_inst_src_regs(inst->op); j++) {
+         if (inst->src[j].file == PROGRAM_TEMPORARY &&
              inst->src[j].index == index) {
             last = (depth == 0) ? i : -2;
          }
       }
-      for (j=0; j < inst->tex_offset_num_offset; j++) {
+      for (j = 0; j < inst->tex_offset_num_offset; j++) {
           if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
               inst->tex_offsets[j].index == index)
               last = (depth == 0) ? i : -2;
       }
-      
       if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
       else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
-      
       i++;
    }
-   
    assert(last >= -1);
    return last;
 }
@@ -3471,21 +3733,22 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index)
    int depth = 0; /* loop depth */
    int last = -1; /* index of last instruction that writes to the temporary */
    int i = 0;
-   
+   unsigned j;
+
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
-         last = (depth == 0) ? i : -2;
-      
+      for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
+         if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
+            last = (depth == 0) ? i : -2;
+      }
+
       if (inst->op == TGSI_OPCODE_BGNLOOP)
          depth++;
       else if (inst->op == TGSI_OPCODE_ENDLOOP)
          if (--depth == 0 && last == -2)
             last = i;
       assert(depth >= 0);
-      
       i++;
    }
-   
    assert(last >= -1);
    return last;
 }
@@ -3514,14 +3777,14 @@ void
 glsl_to_tgsi_visitor::copy_propagate(void)
 {
    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
-        					    glsl_to_tgsi_instruction *,
-        					    this->next_temp * 4);
+                                                  glsl_to_tgsi_instruction *,
+                                                  this->next_temp * 4);
    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
    int level = 0;
 
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      assert(inst->dst.file != PROGRAM_TEMPORARY
-             || inst->dst.index < this->next_temp);
+      assert(inst->dst[0].file != PROGRAM_TEMPORARY
+             || inst->dst[0].index < this->next_temp);
 
       /* First, do any copy propagation possible into the src regs. */
       for (int r = 0; r < 3; r++) {
@@ -3555,8 +3818,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
                if (first->src[0].file != copy_chan->src[0].file ||
                    first->src[0].index != copy_chan->src[0].index ||
                    first->src[0].index2D != copy_chan->src[0].index2D) {
-        	  good = false;
-        	  break;
+                  good = false;
+                  break;
                }
             }
          }
@@ -3574,8 +3837,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
             for (int i = 0; i < 4; i++) {
                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
-               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
-        		   (3 * i));
+               swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
             }
             inst->src[r].swizzle = swizzle;
          }
@@ -3601,10 +3863,10 @@ glsl_to_tgsi_visitor::copy_propagate(void)
          for (int r = 0; r < this->next_temp; r++) {
             for (int c = 0; c < 4; c++) {
                if (!acp[4 * r + c])
-        	  continue;
+                  continue;
 
                if (acp_level[4 * r + c] >= level)
-        	  acp[4 * r + c] = NULL;
+                  acp[4 * r + c] = NULL;
             }
          }
          if (inst->op == TGSI_OPCODE_ENDIF)
@@ -3615,50 +3877,50 @@ glsl_to_tgsi_visitor::copy_propagate(void)
          /* Continuing the block, clear any written channels from
           * the ACP.
           */
-         if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
-            /* Any temporary might be written, so no copy propagation
-             * across this instruction.
-             */
-            memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
-         } else if (inst->dst.file == PROGRAM_OUTPUT &&
-        	    inst->dst.reladdr) {
-            /* Any output might be written, so no copy propagation
-             * from outputs across this instruction.
-             */
-            for (int r = 0; r < this->next_temp; r++) {
-               for (int c = 0; c < 4; c++) {
-        	  if (!acp[4 * r + c])
-        	     continue;
-
-        	  if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
-        	     acp[4 * r + c] = NULL;
+         for (int d = 0; d < 2; d++) {
+            if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
+               /* Any temporary might be written, so no copy propagation
+                * across this instruction.
+                */
+               memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
+            } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
+                       inst->dst[d].reladdr) {
+               /* Any output might be written, so no copy propagation
+                * from outputs across this instruction.
+                */
+               for (int r = 0; r < this->next_temp; r++) {
+                  for (int c = 0; c < 4; c++) {
+                     if (!acp[4 * r + c])
+                        continue;
+
+                     if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
+                        acp[4 * r + c] = NULL;
+                  }
                }
-            }
-         } else if (inst->dst.file == PROGRAM_TEMPORARY ||
-        	    inst->dst.file == PROGRAM_OUTPUT) {
-            /* Clear where it's used as dst. */
-            if (inst->dst.file == PROGRAM_TEMPORARY) {
-               for (int c = 0; c < 4; c++) {
-        	  if (inst->dst.writemask & (1 << c)) {
-        	     acp[4 * inst->dst.index + c] = NULL;
-        	  }
+            } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
+                       inst->dst[d].file == PROGRAM_OUTPUT) {
+               /* Clear where it's used as dst. */
+               if (inst->dst[d].file == PROGRAM_TEMPORARY) {
+                  for (int c = 0; c < 4; c++) {
+                     if (inst->dst[d].writemask & (1 << c))
+                        acp[4 * inst->dst[d].index + c] = NULL;
+                  }
                }
-            }
-
-            /* Clear where it's used as src. */
-            for (int r = 0; r < this->next_temp; r++) {
-               for (int c = 0; c < 4; c++) {
-        	  if (!acp[4 * r + c])
-        	     continue;
 
-        	  int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
-
-        	  if (acp[4 * r + c]->src[0].file == inst->dst.file &&
-        	      acp[4 * r + c]->src[0].index == inst->dst.index &&
-        	      inst->dst.writemask & (1 << src_chan))
-        	  {
-        	     acp[4 * r + c] = NULL;
-        	  }
+               /* Clear where it's used as src. */
+               for (int r = 0; r < this->next_temp; r++) {
+                  for (int c = 0; c < 4; c++) {
+                     if (!acp[4 * r + c])
+                        continue;
+
+                     int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
+
+                     if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
+                         acp[4 * r + c]->src[0].index == inst->dst[d].index &&
+                         inst->dst[d].writemask & (1 << src_chan)) {
+                        acp[4 * r + c] = NULL;
+                     }
+                  }
                }
             }
          }
@@ -3667,18 +3929,18 @@ glsl_to_tgsi_visitor::copy_propagate(void)
 
       /* If this is a copy, add it to the ACP. */
       if (inst->op == TGSI_OPCODE_MOV &&
-          inst->dst.file == PROGRAM_TEMPORARY &&
-          !(inst->dst.file == inst->src[0].file &&
-             inst->dst.index == inst->src[0].index) &&
-          !inst->dst.reladdr &&
+          inst->dst[0].file == PROGRAM_TEMPORARY &&
+          !(inst->dst[0].file == inst->src[0].file &&
+             inst->dst[0].index == inst->src[0].index) &&
+          !inst->dst[0].reladdr &&
           !inst->saturate &&
           !inst->src[0].reladdr &&
           !inst->src[0].reladdr2 &&
           !inst->src[0].negate) {
          for (int i = 0; i < 4; i++) {
-            if (inst->dst.writemask & (1 << i)) {
-               acp[4 * inst->dst.index + i] = inst;
-               acp_level[4 * inst->dst.index + i] = level;
+            if (inst->dst[0].writemask & (1 << i)) {
+               acp[4 * inst->dst[0].index + i] = inst;
+               acp_level[4 * inst->dst[0].index + i] = level;
             }
          }
       }
@@ -3693,7 +3955,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
  * code elimination.
  *
  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
- * will occur.  As an example, a TXP production after copy propagation but 
+ * will occur.  As an example, a TXP production after copy propagation but
  * before this pass:
  *
  * 0: MOV TEMP[1], INPUT[4].xyyy;
@@ -3715,9 +3977,9 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
    int removed = 0;
 
    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      assert(inst->dst.file != PROGRAM_TEMPORARY
-             || inst->dst.index < this->next_temp);
-      
+      assert(inst->dst[0].file != PROGRAM_TEMPORARY
+             || inst->dst[0].index < this->next_temp);
+
       switch (inst->op) {
       case TGSI_OPCODE_BGNLOOP:
       case TGSI_OPCODE_ENDLOOP:
@@ -3742,30 +4004,27 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
          for (int r = 0; r < this->next_temp; r++) {
             for (int c = 0; c < 4; c++) {
                if (!writes[4 * r + c])
-        	         continue;
+                  continue;
 
                if (write_level[4 * r + c] == level)
-        	         write_level[4 * r + c] = level-1;
+                  write_level[4 * r + c] = level-1;
             }
          }
-
          if(inst->op == TGSI_OPCODE_ENDIF)
             --level;
-         
          break;
 
       case TGSI_OPCODE_IF:
       case TGSI_OPCODE_UIF:
          ++level;
          /* fallthrough to default case to mark the condition as read */
-      
       default:
          /* Continuing the block, clear any channels from the write array that
           * are read by this instruction.
           */
          for (unsigned i = 0; i < Elements(inst->src); i++) {
             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
-               /* Any temporary might be read, so no dead code elimination 
+               /* Any temporary might be read, so no dead code elimination
                 * across this instruction.
                 */
                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
@@ -3775,17 +4034,16 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
-               
+
                for (int c = 0; c < 4; c++) {
-              	   if (src_chans & (1 << c)) {
-              	      writes[4 * inst->src[i].index + c] = NULL;
-              	   }
+                  if (src_chans & (1 << c))
+                     writes[4 * inst->src[i].index + c] = NULL;
                }
             }
          }
          for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
             if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
-               /* Any temporary might be read, so no dead code elimination 
+               /* Any temporary might be read, so no dead code elimination
                 * across this instruction.
                 */
                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
@@ -3795,11 +4053,10 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
-               
+
                for (int c = 0; c < 4; c++) {
-              	   if (src_chans & (1 << c)) {
-              	      writes[4 * inst->tex_offsets[i].index + c] = NULL;
-              	   }
+                  if (src_chans & (1 << c))
+                     writes[4 * inst->tex_offsets[i].index + c] = NULL;
                }
             }
          }
@@ -3810,19 +4067,21 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
        * If there is already an instruction in the write array for one or more
        * of the channels, flag that channel write as dead.
        */
-      if (inst->dst.file == PROGRAM_TEMPORARY &&
-          !inst->dst.reladdr &&
-          !inst->saturate) {
-         for (int c = 0; c < 4; c++) {
-            if (inst->dst.writemask & (1 << c)) {
-               if (writes[4 * inst->dst.index + c]) {
-                  if (write_level[4 * inst->dst.index + c] < level)
-                     continue;
-                  else
-                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+      for (unsigned i = 0; i < Elements(inst->dst); i++) {
+         if (inst->dst[i].file == PROGRAM_TEMPORARY &&
+             !inst->dst[i].reladdr &&
+             !inst->saturate) {
+            for (int c = 0; c < 4; c++) {
+               if (inst->dst[i].writemask & (1 << c)) {
+                  if (writes[4 * inst->dst[i].index + c]) {
+                     if (write_level[4 * inst->dst[i].index + c] < level)
+                        continue;
+                     else
+                        writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
+                  }
+                  writes[4 * inst->dst[i].index + c] = inst;
+                  write_level[4 * inst->dst[i].index + c] = level;
                }
-               writes[4 * inst->dst.index + c] = inst;
-               write_level[4 * inst->dst.index + c] = level;
             }
          }
       }
@@ -3841,26 +4100,75 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
     * the writemask of other instructions with dead channels.
     */
    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
-      if (!inst->dead_mask || !inst->dst.writemask)
+      if (!inst->dead_mask || !inst->dst[0].writemask)
          continue;
-      else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
+      else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
          inst->remove();
          delete inst;
          removed++;
-      } else
-         inst->dst.writemask &= ~(inst->dead_mask);
+      } else {
+         if (inst->dst[0].type == GLSL_TYPE_DOUBLE) {
+            if (inst->dead_mask == WRITEMASK_XY ||
+                inst->dead_mask == WRITEMASK_ZW)
+               inst->dst[0].writemask &= ~(inst->dead_mask);
+         } else
+            inst->dst[0].writemask &= ~(inst->dead_mask);
+      }
    }
 
    ralloc_free(write_level);
    ralloc_free(writes);
-   
+
    return removed;
 }
 
-/* Merges temporary registers together where possible to reduce the number of 
+/* merge DFRACEXP instructions into one. */
+void
+glsl_to_tgsi_visitor::merge_two_dsts(void)
+{
+   foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
+      glsl_to_tgsi_instruction *inst2;
+      bool merged;
+      if (num_inst_dst_regs(inst->op) != 2)
+         continue;
+
+      if (inst->dst[0].file != PROGRAM_UNDEFINED &&
+          inst->dst[1].file != PROGRAM_UNDEFINED)
+         continue;
+
+      inst2 = (glsl_to_tgsi_instruction *) inst->next;
+      do {
+
+         if (inst->src[0].file == inst2->src[0].file &&
+             inst->src[0].index == inst2->src[0].index &&
+             inst->src[0].type == inst2->src[0].type &&
+             inst->src[0].swizzle == inst2->src[0].swizzle)
+            break;
+         inst2 = (glsl_to_tgsi_instruction *) inst2->next;
+      } while (inst2);
+
+      if (!inst2)
+         continue;
+      merged = false;
+      if (inst->dst[0].file == PROGRAM_UNDEFINED) {
+         merged = true;
+         inst->dst[0] = inst2->dst[0];
+      } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
+         inst->dst[1] = inst2->dst[1];
+         merged = true;
+      }
+
+      if (merged) {
+         inst2->remove();
+         delete inst2;
+      }
+   }
+}
+
+/* Merges temporary registers together where possible to reduce the number of
  * registers needed to run a program.
- * 
- * Produces optimal code only after copy propagation and dead code elimination 
+ *
+ * Produces optimal code only after copy propagation and dead code elimination
  * have been run. */
 void
 glsl_to_tgsi_visitor::merge_registers(void)
@@ -3868,36 +4176,35 @@ glsl_to_tgsi_visitor::merge_registers(void)
    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
    int i, j;
-   
+
    /* Read the indices of the last read and first write to each temp register
-    * into an array so that we don't have to traverse the instruction list as 
+    * into an array so that we don't have to traverse the instruction list as
     * much. */
-   for (i=0; i < this->next_temp; i++) {
+   for (i = 0; i < this->next_temp; i++) {
       last_reads[i] = get_last_temp_read(i);
       first_writes[i] = get_first_temp_write(i);
    }
-   
-   /* Start looking for registers with non-overlapping usages that can be 
+
+   /* Start looking for registers with non-overlapping usages that can be
     * merged together. */
-   for (i=0; i < this->next_temp; i++) {
+   for (i = 0; i < this->next_temp; i++) {
       /* Don't touch unused registers. */
       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
-      
-      for (j=0; j < this->next_temp; j++) {
+
+      for (j = 0; j < this->next_temp; j++) {
          /* Don't touch unused registers. */
          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
-         
-         /* We can merge the two registers if the first write to j is after or 
-          * in the same instruction as the last read from i.  Note that the 
-          * register at index i will always be used earlier or at the same time 
+
+         /* We can merge the two registers if the first write to j is after or
+          * in the same instruction as the last read from i.  Note that the
+          * register at index i will always be used earlier or at the same time
           * as the register at index j. */
-         if (first_writes[i] <= first_writes[j] && 
-             last_reads[i] <= first_writes[j])
-         {
+         if (first_writes[i] <= first_writes[j] &&
+             last_reads[i] <= first_writes[j]) {
             rename_temp_register(j, i); /* Replace all references to j with i.*/
-            
-            /* Update the first_writes and last_reads arrays with the new 
-             * values for the merged register index, and mark the newly unused 
+
+            /* Update the first_writes and last_reads arrays with the new
+             * values for the merged register index, and mark the newly unused
              * register index as such. */
             last_reads[i] = last_reads[j];
             first_writes[j] = -1;
@@ -3905,26 +4212,26 @@ glsl_to_tgsi_visitor::merge_registers(void)
          }
       }
    }
-   
+
    ralloc_free(last_reads);
    ralloc_free(first_writes);
 }
 
-/* Reassign indices to temporary registers by reusing unused indices created 
+/* Reassign indices to temporary registers by reusing unused indices created
  * by optimization passes. */
 void
 glsl_to_tgsi_visitor::renumber_registers(void)
 {
    int i = 0;
    int new_index = 0;
-   
-   for (i=0; i < this->next_temp; i++) {
+
+   for (i = 0; i < this->next_temp; i++) {
       if (get_first_temp_read(i) < 0) continue;
       if (i != new_index)
          rename_temp_register(i, new_index);
       new_index++;
    }
-   
+
    this->next_temp = new_index;
 }
 
@@ -4032,14 +4339,13 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
       glsl_to_tgsi_instruction *newinst;
       st_src_reg src_regs[3];
 
-      if (inst->dst.file == PROGRAM_OUTPUT)
-         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+      if (inst->dst[0].file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
 
-      for (int i=0; i<3; i++) {
+      for (int i = 0; i < 3; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT &&
-             src_regs[i].index == VARYING_SLOT_COL0)
-         {
+             src_regs[i].index == VARYING_SLOT_COL0) {
             src_regs[i].file = PROGRAM_TEMPORARY;
             src_regs[i].index = src0.index;
          }
@@ -4047,7 +4353,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
       newinst->tex_target = inst->tex_target;
       newinst->sampler_array_size = inst->sampler_array_size;
    }
@@ -4118,16 +4424,16 @@ get_bitmap_visitor(struct st_fragment_program *fp,
       glsl_to_tgsi_instruction *newinst;
       st_src_reg src_regs[3];
 
-      if (inst->dst.file == PROGRAM_OUTPUT)
-         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+      if (inst->dst[0].file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
 
-      for (int i=0; i<3; i++) {
+      for (int i = 0; i < 3; i++) {
          src_regs[i] = inst->src[i];
          if (src_regs[i].file == PROGRAM_INPUT)
             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
       }
 
-      newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
       newinst->tex_target = inst->tex_target;
       newinst->sampler_array_size = inst->sampler_array_size;
    }
@@ -4221,7 +4527,7 @@ static unsigned *get_label(struct st_translate *t, unsigned branch_target)
 
    if (t->labels_count + 1 >= t->labels_size) {
       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
-      t->labels = (struct label *)realloc(t->labels, 
+      t->labels = (struct label *)realloc(t->labels,
                                           t->labels_size * sizeof(struct label));
       if (t->labels == NULL) {
          static unsigned dummy;
@@ -4269,6 +4575,8 @@ emit_immediate(struct st_translate *t,
    {
    case GL_FLOAT:
       return ureg_DECL_immediate(ureg, &values[0].f, size);
+   case GL_DOUBLE:
+      return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
    case GL_INT:
       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
    case GL_UNSIGNED_INT:
@@ -4405,20 +4713,23 @@ translate_dst(struct st_translate *t,
               const st_dst_reg *dst_reg,
               bool saturate, bool clamp_color)
 {
-   struct ureg_dst dst = dst_register(t, 
+   struct ureg_dst dst = dst_register(t,
                                       dst_reg->file,
                                       dst_reg->index);
 
+   if (dst.File == TGSI_FILE_NULL)
+      return dst;
+
    dst = ureg_writemask(dst, dst_reg->writemask);
-   
+
    if (saturate)
       dst = ureg_saturate(dst);
    else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
       /* Clamp colors for ARB_color_buffer_float. */
       switch (t->procType) {
       case TGSI_PROCESSOR_VERTEX:
-         /* XXX if the geometry shader is present, this must be done there
-          * instead of here. */
+         /* This can only occur with a compatibility profile, which doesn't
+          * support geometry shaders. */
          if (dst_reg->index == VARYING_SLOT_COL0 ||
              dst_reg->index == VARYING_SLOT_COL1 ||
              dst_reg->index == VARYING_SLOT_BFC0 ||
@@ -4536,7 +4847,7 @@ compile_tgsi_instruction(struct st_translate *t,
 {
    struct ureg_program *ureg = t->ureg;
    GLuint i;
-   struct ureg_dst dst[1];
+   struct ureg_dst dst[2];
    struct ureg_src src[4];
    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
 
@@ -4547,9 +4858,9 @@ compile_tgsi_instruction(struct st_translate *t,
    num_dst = num_inst_dst_regs(inst->op);
    num_src = num_inst_src_regs(inst->op);
 
-   if (num_dst) 
-      dst[0] = translate_dst(t, 
-                             &inst->dst,
+   for (i = 0; i < num_dst; i++)
+      dst[i] = translate_dst(t,
+                             &inst->dst[i],
                              inst->saturate,
                              clamp_dst_color_output);
 
@@ -4569,7 +4880,7 @@ compile_tgsi_instruction(struct st_translate *t,
       ureg_label_insn(ureg,
                       inst->op,
                       src, num_src,
-                      get_label(t, 
+                      get_label(t,
                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
       return;
 
@@ -4636,9 +4947,9 @@ emit_wpos_adjustment( struct st_translate *t,
     * where T = INPUT[WPOS] by y is inverted.
     */
    static const gl_state_index wposTransformState[STATE_LENGTH]
-      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 
+      = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
           (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
-   
+
    /* XXX: note we are modifying the incoming shader here!  Need to
     * do this before emitting the constant decls below, or this
     * will be missed:
@@ -4772,7 +5083,7 @@ emit_wpos(struct st_context *st,
       else
          assert(0);
    }
-   
+
    if (fp->PixelCenterInteger) {
       /* Fragment shader wants pixel center integer */
       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
@@ -5024,7 +5335,7 @@ st_translate_program(
                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
-	 }
+         }
       }
       if (passthrough_edgeflags)
          emit_edgeflags(t);
@@ -5076,7 +5387,7 @@ st_translate_program(
     */
    memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
 
-   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+   /* Emit constants and uniforms.  TGSI uses a single index space for these,
     * so we put all the translated regs in t->constants.
     */
    if (proginfo->Parameters) {
@@ -5128,7 +5439,7 @@ st_translate_program(
          ureg_DECL_constant2D(t->ureg, first, last, i + 1);
       }
    }
-   
+
    /* Emit immediate values.
     */
    t->immediates = (struct ureg_src *)
@@ -5140,7 +5451,7 @@ st_translate_program(
    i = 0;
    foreach_in_list(immediate_storage, imm, &program->immediates) {
       assert(i < program->num_immediates);
-      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
    }
    assert(i == program->num_immediates);
 
@@ -5219,7 +5530,7 @@ shader_stage_to_ptarget(gl_shader_stage stage)
 
 
 /**
- * Convert a shader's GLSL IR into a Mesa gl_program, although without 
+ * Convert a shader's GLSL IR into a Mesa gl_program, although without
  * generating Mesa IR.
  */
 static struct gl_program *
@@ -5256,7 +5567,7 @@ get_mesa_program(struct gl_context *ctx,
 
    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
-					       prog->Parameters);
+                                               prog->Parameters);
 
    /* Remove reads from output registers. */
    lower_output_reads(shader->ir);
@@ -5292,14 +5603,14 @@ get_mesa_program(struct gl_context *ctx,
    } while (progress);
 
 #if 0
-   /* Print out some information (for debugging purposes) used by the 
+   /* Print out some information (for debugging purposes) used by the
     * optimization passes. */
-   for (i=0; i < v->next_temp; i++) {
+   for (i = 0; i < v->next_temp; i++) {
       int fr = v->get_first_temp_read(i);
       int fw = v->get_first_temp_write(i);
       int lr = v->get_last_temp_read(i);
       int lw = v->get_last_temp_write(i);
-      
+
       printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
       assert(fw <= fr);
    }
@@ -5310,9 +5621,10 @@ get_mesa_program(struct gl_context *ctx,
    v->copy_propagate();
    while (v->eliminate_dead_code());
 
+   v->merge_two_dsts();
    v->merge_registers();
    v->renumber_registers();
-   
+
    /* Write the END instruction. */
    v->emit(NULL, TGSI_OPCODE_END);
 
@@ -5334,7 +5646,7 @@ get_mesa_program(struct gl_context *ctx,
    count_resources(v, prog);
 
    _mesa_reference_program(ctx, &shader->Program, prog);
-   
+
    /* This has to be done last.  Any operation the can cause
     * prog->ParameterValues to get reallocated (e.g., anything that adds a
     * program constant) has to happen before creating this linkage.
@@ -5347,7 +5659,7 @@ get_mesa_program(struct gl_context *ctx,
    struct st_vertex_program *stvp;
    struct st_fragment_program *stfp;
    struct st_geometry_program *stgp;
-   
+
    switch (shader->Type) {
    case GL_VERTEX_SHADER:
       stvp = (struct st_vertex_program *)prog;
@@ -5374,7 +5686,7 @@ extern "C" {
 /**
  * Link a shader.
  * Called via ctx->Driver.LinkShader()
- * This actually involves converting GLSL IR into an intermediate TGSI-like IR 
+ * This actually involves converting GLSL IR into an intermediate TGSI-like IR
  * with code lowering and other optimizations.
  */
 GLboolean
@@ -5389,8 +5701,14 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
       bool progress;
       exec_list *ir = prog->_LinkedShaders[i]->ir;
+      gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
       const struct gl_shader_compiler_options *options =
-            &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type)];
+            &ctx->Const.ShaderCompilerOptions[stage];
+      unsigned ptarget = shader_stage_to_ptarget(stage);
+      bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
+                                                   PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
+      bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
+                                                   PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
 
       /* If there are forms of indirect addressing that the driver
        * cannot handle, perform the lowering pass.
@@ -5423,13 +5741,15 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
          lower_offset_arrays(ir);
       do_mat_op_to_vec(ir);
       lower_instructions(ir,
-                         MOD_TO_FRACT |
+                         MOD_TO_FLOOR |
                          DIV_TO_MUL_RCP |
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
                          LDEXP_TO_ARITH |
+                         (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
                          CARRY_TO_ARITH |
                          BORROW_TO_ARITH |
+                         (have_dround ? 0 : DOPS_TO_DFRAC) |
                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
@@ -5450,7 +5770,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 
          progress = do_common_optimization(ir, true, true, options,
                                            ctx->Const.NativeIntegers)
-	   || progress;
+           || progress;
 
          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
 
@@ -5468,13 +5788,13 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
 
       if (linked_prog) {
-	 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
-				 linked_prog);
+         _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                 linked_prog);
          if (!ctx->Driver.ProgramStringNotify(ctx,
                                               _mesa_shader_stage_to_program(i),
                                               linked_prog)) {
-	    _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
-				    NULL);
+            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                    NULL);
             _mesa_reference_program(ctx, &linked_prog, NULL);
             return GL_FALSE;
          }
diff --git a/mesalib/src/mesa/state_tracker/st_manager.c b/mesalib/src/mesa/state_tracker/st_manager.c
index 606d67891..5411d84b0 100644
--- a/mesalib/src/mesa/state_tracker/st_manager.c
+++ b/mesalib/src/mesa/state_tracker/st_manager.c
@@ -685,7 +685,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
    if (attribs->major > 1 || attribs->minor > 0) {
       /* Is the actual version less than the requested version?
        */
-      if (st->ctx->Version < attribs->major * 10 + attribs->minor) {
+      if (st->ctx->Version < attribs->major * 10U + attribs->minor) {
 	 *error = ST_CONTEXT_ERROR_BAD_VERSION;
          st_destroy_context(st);
          return NULL;
diff --git a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 2c9d9a523..3dd8a14b6 100644
--- a/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/mesalib/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -299,8 +299,8 @@ translate_dst( struct st_translate *t,
       /* Clamp colors for ARB_color_buffer_float. */
       switch (t->procType) {
       case TGSI_PROCESSOR_VERTEX:
-         /* XXX if the geometry shader is present, this must be done there
-          * instead of here. */
+         /* This can only occur with a compatibility profile, which doesn't
+          * support geometry shaders. */
          if (DstReg->Index == VARYING_SLOT_COL0 ||
              DstReg->Index == VARYING_SLOT_COL1 ||
              DstReg->Index == VARYING_SLOT_BFC0 ||
diff --git a/mesalib/src/mesa/state_tracker/st_program.c b/mesalib/src/mesa/state_tracker/st_program.c
index 737c2694e..10a5f2900 100644
--- a/mesalib/src/mesa/state_tracker/st_program.c
+++ b/mesalib/src/mesa/state_tracker/st_program.c
@@ -1095,7 +1095,7 @@ st_translate_geometry_program(struct st_context *st,
 
    /* find max output slot referenced to compute gs_num_outputs */
    for (attr = 0; attr < VARYING_SLOT_MAX; attr++) {
-      if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot)
+      if (outputMapping[attr] != ~0U && outputMapping[attr] > maxSlot)
          maxSlot = outputMapping[attr];
    }
    gs_num_outputs = maxSlot + 1;
diff --git a/mesalib/src/mesa/state_tracker/st_texture.h b/mesalib/src/mesa/state_tracker/st_texture.h
index d66afcb56..2f540295f 100644
--- a/mesalib/src/mesa/state_tracker/st_texture.h
+++ b/mesalib/src/mesa/state_tracker/st_texture.h
@@ -123,6 +123,12 @@ st_texture_image(struct gl_texture_image *img)
    return (struct st_texture_image *) img;
 }
 
+static INLINE const struct st_texture_image *
+st_texture_image_const(const struct gl_texture_image *img)
+{
+   return (const struct st_texture_image *) img;
+}
+
 static INLINE struct st_texture_object *
 st_texture_object(struct gl_texture_object *obj)
 {
diff --git a/mesalib/src/mesa/swrast/s_aaline.c b/mesalib/src/mesa/swrast/s_aaline.c
index b4e05ff80..6aea9d545 100644
--- a/mesalib/src/mesa/swrast/s_aaline.c
+++ b/mesalib/src/mesa/swrast/s_aaline.c
@@ -27,6 +27,7 @@
 #include "main/imports.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
+#include "main/teximage.h"
 #include "swrast/s_aaline.h"
 #include "swrast/s_context.h"
 #include "swrast/s_span.h"
diff --git a/mesalib/src/mesa/swrast/s_aalinetemp.h b/mesalib/src/mesa/swrast/s_aalinetemp.h
index 670b663bd..f1d078fd8 100644
--- a/mesalib/src/mesa/swrast/s_aalinetemp.h
+++ b/mesalib/src/mesa/swrast/s_aalinetemp.h
@@ -179,7 +179,8 @@ NAME(line)(struct gl_context *ctx, const SWvertex *v0, const SWvertex *v1)
          if (attr >= VARYING_SLOT_TEX0 && attr < VARYING_SLOT_VAR0) {
             const GLuint u = attr - VARYING_SLOT_TEX0;
             const struct gl_texture_object *obj = ctx->Texture.Unit[u]._Current;
-            const struct gl_texture_image *texImage = obj->Image[0][obj->BaseLevel];
+            const struct gl_texture_image *texImage =
+               _mesa_base_tex_image(obj);
             line.texWidth[attr]  = (GLfloat) texImage->Width;
             line.texHeight[attr] = (GLfloat) texImage->Height;
          }
diff --git a/mesalib/src/mesa/swrast/s_blit.c b/mesalib/src/mesa/swrast/s_blit.c
index e3b45f146..16e5b8c1c 100644
--- a/mesalib/src/mesa/swrast/s_blit.c
+++ b/mesalib/src/mesa/swrast/s_blit.c
@@ -107,14 +107,14 @@ RESAMPLE(resample_row_16, GLuint, 4)
  */
 static void
 blit_nearest(struct gl_context *ctx,
+             struct gl_framebuffer *readFb,
+             struct gl_framebuffer *drawFb,
              GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
              GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
              GLbitfield buffer)
 {
    struct gl_renderbuffer *readRb, *drawRb = NULL;
    struct gl_renderbuffer_attachment *readAtt = NULL, *drawAtt = NULL;
-   struct gl_framebuffer *readFb = ctx->ReadBuffer;
-   struct gl_framebuffer *drawFb = ctx->DrawBuffer;
    GLuint numDrawBuffers = 0;
    GLuint i;
 
@@ -508,11 +508,11 @@ resample_linear_row_float(GLint srcWidth, GLint dstWidth,
  */
 static void
 blit_linear(struct gl_context *ctx,
+            struct gl_framebuffer *readFb,
+            struct gl_framebuffer *drawFb,
             GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
             GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1)
 {
-   struct gl_framebuffer *drawFb = ctx->DrawBuffer;
-   struct gl_framebuffer *readFb = ctx->ReadBuffer;
    struct gl_renderbuffer *readRb = readFb->_ColorReadBuffer;
    struct gl_renderbuffer_attachment *readAtt =
       &readFb->Attachment[readFb->_ColorReadBufferIndex];
@@ -733,6 +733,8 @@ fail_no_memory:
  */
 void
 _swrast_BlitFramebuffer(struct gl_context *ctx,
+                        struct gl_framebuffer *readFb,
+                        struct gl_framebuffer *drawFb,
                         GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                         GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                         GLbitfield mask, GLenum filter)
@@ -756,7 +758,7 @@ _swrast_BlitFramebuffer(struct gl_context *ctx,
    if (!_mesa_check_conditional_render(ctx))
       return; /* Do not blit */
 
-   if (!_mesa_clip_blit(ctx, &srcX0, &srcY0, &srcX1, &srcY1,
+   if (!_mesa_clip_blit(ctx, readFb, drawFb, &srcX0, &srcY0, &srcX1, &srcY1,
                         &dstX0, &dstY0, &dstX1, &dstY1)) {
       return;
    }
@@ -775,33 +777,34 @@ _swrast_BlitFramebuffer(struct gl_context *ctx,
        dstY0 < dstY1) {
       for (i = 0; i < 3; i++) {
          if (mask & buffers[i]) {
-	    if (swrast_fast_copy_pixels(ctx,
-					srcX0, srcY0,
-					srcX1 - srcX0, srcY1 - srcY0,
-					dstX0, dstY0,
-					buffer_enums[i])) {
-	       mask &= ~buffers[i];
-	    }
-	 }
+            if (swrast_fast_copy_pixels(ctx,
+                                        readFb, drawFb,
+                                        srcX0, srcY0,
+                                        srcX1 - srcX0, srcY1 - srcY0,
+                                        dstX0, dstY0,
+                                        buffer_enums[i])) {
+               mask &= ~buffers[i];
+            }
+         }
       }
 
       if (!mask)
-	 return;
+         return;
    }
 
    if (filter == GL_NEAREST) {
       for (i = 0; i < 3; i++) {
-	 if (mask & buffers[i]) {
-	    blit_nearest(ctx,  srcX0, srcY0, srcX1, srcY1,
-			 dstX0, dstY0, dstX1, dstY1, buffers[i]);
-	 }
+          if (mask & buffers[i]) {
+             blit_nearest(ctx, readFb, drawFb, srcX0, srcY0, srcX1, srcY1,
+                          dstX0, dstY0, dstX1, dstY1, buffers[i]);
+          }
       }
    }
    else {
       ASSERT(filter == GL_LINEAR);
       if (mask & GL_COLOR_BUFFER_BIT) {  /* depth/stencil not allowed */
-	 blit_linear(ctx,  srcX0, srcY0, srcX1, srcY1,
-		     dstX0, dstY0, dstX1, dstY1);
+         blit_linear(ctx, readFb, drawFb, srcX0, srcY0, srcX1, srcY1,
+                     dstX0, dstY0, dstX1, dstY1);
       }
    }
 
diff --git a/mesalib/src/mesa/swrast/s_copypix.c b/mesalib/src/mesa/swrast/s_copypix.c
index d00173476..e21c69dbc 100644
--- a/mesalib/src/mesa/swrast/s_copypix.c
+++ b/mesalib/src/mesa/swrast/s_copypix.c
@@ -442,11 +442,11 @@ end:
  */
 GLboolean
 swrast_fast_copy_pixels(struct gl_context *ctx,
-			GLint srcX, GLint srcY, GLsizei width, GLsizei height,
-			GLint dstX, GLint dstY, GLenum type)
+                        struct gl_framebuffer *srcFb,
+                        struct gl_framebuffer *dstFb,
+                        GLint srcX, GLint srcY, GLsizei width, GLsizei height,
+                        GLint dstX, GLint dstY, GLenum type)
 {
-   struct gl_framebuffer *srcFb = ctx->ReadBuffer;
-   struct gl_framebuffer *dstFb = ctx->DrawBuffer;
    struct gl_renderbuffer *srcRb, *dstRb;
    GLint row;
    GLuint pixelBytes, widthInBytes;
@@ -620,9 +620,9 @@ map_readbuffer(struct gl_context *ctx, GLenum type)
  * By time we get here, all parameters will have been error-checked.
  */
 void
-_swrast_CopyPixels( struct gl_context *ctx,
-		    GLint srcx, GLint srcy, GLsizei width, GLsizei height,
-		    GLint destx, GLint desty, GLenum type )
+_swrast_CopyPixels(struct gl_context *ctx,
+                   GLint srcx, GLint srcy, GLsizei width, GLsizei height,
+                   GLint destx, GLint desty, GLenum type)
 {
    SWcontext *swrast = SWRAST_CONTEXT(ctx);
    struct gl_renderbuffer *rb;
@@ -634,11 +634,12 @@ _swrast_CopyPixels( struct gl_context *ctx,
       _swrast_validate_derived( ctx );
 
    if (!(SWRAST_CONTEXT(ctx)->_RasterMask != 0x0 ||
-	 ctx->Pixel.ZoomX != 1.0F ||
-	 ctx->Pixel.ZoomY != 1.0F ||
-	 ctx->_ImageTransferState) &&
-       swrast_fast_copy_pixels(ctx, srcx, srcy, width, height, destx, desty,
-			       type)) {
+       ctx->Pixel.ZoomX != 1.0F ||
+       ctx->Pixel.ZoomY != 1.0F ||
+       ctx->_ImageTransferState) &&
+      swrast_fast_copy_pixels(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                              srcx, srcy, width, height, destx, desty,
+                              type)) {
       /* all done */
       return;
    }
diff --git a/mesalib/src/mesa/swrast/s_drawpix.c b/mesalib/src/mesa/swrast/s_drawpix.c
index f7926e426..c99251904 100644
--- a/mesalib/src/mesa/swrast/s_drawpix.c
+++ b/mesalib/src/mesa/swrast/s_drawpix.c
@@ -29,6 +29,8 @@
 #include "main/condrender.h"
 #include "main/context.h"
 #include "main/format_pack.h"
+#include "main/format_utils.h"
+#include "main/glformats.h"
 #include "main/image.h"
 #include "main/imports.h"
 #include "main/macros.h"
@@ -414,7 +416,6 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
 {
    const GLint imgX = x, imgY = y;
    const GLboolean zoom = ctx->Pixel.ZoomX!=1.0 || ctx->Pixel.ZoomY!=1.0;
-   GLfloat *convImage = NULL;
    GLbitfield transferOps = ctx->_ImageTransferState;
    SWspan span;
 
@@ -452,6 +453,28 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
       GLint skipPixels = 0;
       /* use span array for temp color storage */
       GLfloat *rgba = (GLfloat *) span.array->attribs[VARYING_SLOT_COL0];
+      void *tempImage = NULL;
+
+      if (unpack->SwapBytes) {
+         /* We have to handle byte-swapping scenarios before calling
+          * _mesa_format_convert
+          */
+         GLint swapSize = _mesa_sizeof_packed_type(type);
+         if (swapSize == 2 || swapSize == 4) {
+            int components = _mesa_components_in_format(format);
+            int elementCount = width * height * components;
+            tempImage = malloc(elementCount * swapSize);
+            if (!tempImage) {
+               _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
+               return;
+            }
+            if (swapSize == 2)
+               _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
+            else
+               _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+            pixels = tempImage;
+         }
+      }
 
       /* if the span is wider than SWRAST_MAX_WIDTH we have to do it in chunks */
       while (skipPixels < width) {
@@ -462,11 +485,15 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
                                                       type, 0, skipPixels);
          GLint row;
 
+         /* get image row as float/RGBA */
+         uint32_t srcMesaFormat = _mesa_format_from_format_and_type(format, type);
          for (row = 0; row < height; row++) {
-            /* get image row as float/RGBA */
-            _mesa_unpack_color_span_float(ctx, spanWidth, GL_RGBA, rgba,
-                                     format, type, source, unpack,
-                                     transferOps);
+            int dstRowStride = 4 * width * sizeof(float);
+            _mesa_format_convert(rgba, RGBA32_FLOAT, dstRowStride,
+                                 (void*)source, srcMesaFormat, srcStride,
+                                 spanWidth, 1, NULL);
+            if (transferOps)
+               _mesa_apply_rgba_transfer_ops(ctx, transferOps, spanWidth, (GLfloat (*)[4])rgba);
 	    /* Set these for each row since the _swrast_write_* functions
 	     * may change them while clipping/rendering.
 	     */
@@ -491,9 +518,9 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
 
       /* XXX this is ugly/temporary, to undo above change */
       span.array->ChanType = CHAN_TYPE;
-   }
 
-   free(convImage);
+      free(tempImage);
+   }
 
    swrast_render_finish(ctx);
 }
diff --git a/mesalib/src/mesa/swrast/s_fragprog.c b/mesalib/src/mesa/swrast/s_fragprog.c
index 21699f3ea..1d7c33619 100644
--- a/mesalib/src/mesa/swrast/s_fragprog.c
+++ b/mesalib/src/mesa/swrast/s_fragprog.c
@@ -25,6 +25,7 @@
 #include "main/glheader.h"
 #include "main/colormac.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
 #include "program/prog_instruction.h"
 
 #include "s_context.h"
@@ -116,8 +117,7 @@ fetch_texel_deriv( struct gl_context *ctx, const GLfloat texcoord[4],
    const struct gl_texture_object *texObj = texUnit->_Current;
 
    if (texObj) {
-      const struct gl_texture_image *texImg =
-         texObj->Image[0][texObj->BaseLevel];
+      const struct gl_texture_image *texImg = _mesa_base_tex_image(texObj);
       const struct swrast_texture_image *swImg =
          swrast_texture_image_const(texImg);
       const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, unit);
diff --git a/mesalib/src/mesa/swrast/s_span.c b/mesalib/src/mesa/swrast/s_span.c
index 10aa33c6f..321959df9 100644
--- a/mesalib/src/mesa/swrast/s_span.c
+++ b/mesalib/src/mesa/swrast/s_span.c
@@ -39,6 +39,7 @@
 #include "main/imports.h"
 #include "main/image.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
 
 #include "s_atifragshader.h"
 #include "s_alpha.h"
@@ -495,7 +496,7 @@ interpolate_texcoords(struct gl_context *ctx, SWspan *span)
          GLfloat q = span->attrStart[attr][3] + span->leftClip * dqdx;
 
          if (obj) {
-            const struct gl_texture_image *img = obj->Image[0][obj->BaseLevel];
+            const struct gl_texture_image *img = _mesa_base_tex_image(obj);
             const struct swrast_texture_image *swImg =
                swrast_texture_image_const(img);
             const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, u);
@@ -932,19 +933,19 @@ clamp_colors(SWspan *span)
  * \param output  which fragment program color output is being processed
  */
 static inline void
-convert_color_type(SWspan *span, GLenum newType, GLuint output)
+convert_color_type(SWspan *span, GLenum srcType, GLenum newType, GLuint output)
 {
    GLvoid *src, *dst;
 
-   if (output > 0 || span->array->ChanType == GL_FLOAT) {
+   if (output > 0 || srcType == GL_FLOAT) {
       src = span->array->attribs[VARYING_SLOT_COL0 + output];
       span->array->ChanType = GL_FLOAT;
    }
-   else if (span->array->ChanType == GL_UNSIGNED_BYTE) {
+   else if (srcType == GL_UNSIGNED_BYTE) {
       src = span->array->rgba8;
    }
    else {
-      ASSERT(span->array->ChanType == GL_UNSIGNED_SHORT);
+      ASSERT(srcType == GL_UNSIGNED_SHORT);
       src = span->array->rgba16;
    }
 
@@ -978,7 +979,7 @@ shade_texture_span(struct gl_context *ctx, SWspan *span)
        ctx->ATIFragmentShader._Enabled) {
       /* programmable shading */
       if (span->primitive == GL_BITMAP && span->array->ChanType != GL_FLOAT) {
-         convert_color_type(span, GL_FLOAT, 0);
+         convert_color_type(span, span->array->ChanType, GL_FLOAT, 0);
       }
       else {
          span->array->rgba = (void *) span->array->attribs[VARYING_SLOT_COL0];
@@ -1313,6 +1314,8 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span)
       const GLboolean multiFragOutputs = 
          _swrast_use_fragment_program(ctx)
          && fp->Base.OutputsWritten >= (1 << FRAG_RESULT_DATA0);
+      /* Save srcColorType because convert_color_type() can change it */
+      const GLenum srcColorType = span->array->ChanType;
       GLuint buf;
 
       for (buf = 0; buf < numBuffers; buf++) {
@@ -1324,17 +1327,18 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span)
             /* re-use one of the attribute array buffers for rgbaSave */
             GLchan (*rgbaSave)[4] = (GLchan (*)[4]) span->array->attribs[0];
             struct swrast_renderbuffer *srb = swrast_renderbuffer(rb);
-            GLenum colorType = srb->ColorType;
+            const GLenum dstColorType = srb->ColorType;
 
-            assert(colorType == GL_UNSIGNED_BYTE ||
-                   colorType == GL_FLOAT);
+            assert(dstColorType == GL_UNSIGNED_BYTE ||
+                   dstColorType == GL_FLOAT);
 
             /* set span->array->rgba to colors for renderbuffer's datatype */
-            if (span->array->ChanType != colorType) {
-               convert_color_type(span, colorType, 0);
+            if (srcColorType != dstColorType) {
+               convert_color_type(span, srcColorType, dstColorType,
+                                  multiFragOutputs ? buf : 0);
             }
             else {
-               if (span->array->ChanType == GL_UNSIGNED_BYTE) {
+               if (srcColorType == GL_UNSIGNED_BYTE) {
                   span->array->rgba = span->array->rgba8;
                }
                else {
diff --git a/mesalib/src/mesa/swrast/s_texfetch.c b/mesalib/src/mesa/swrast/s_texfetch.c
index 0f6da919d..9629024b9 100644
--- a/mesalib/src/mesa/swrast/s_texfetch.c
+++ b/mesalib/src/mesa/swrast/s_texfetch.c
@@ -166,6 +166,8 @@ texfetch_funcs[] =
    FETCH_FUNCS(B10G10R10A2_UNORM),
    FETCH_NULL(B10G10R10X2_UNORM),
    FETCH_FUNCS(R10G10B10A2_UNORM),
+   FETCH_NULL(R10G10B10X2_UNORM),
+
    FETCH_FUNCS(S8_UINT_Z24_UNORM),
    {
       MESA_FORMAT_X8_UINT_Z24_UNORM,
@@ -180,6 +182,13 @@ texfetch_funcs[] =
       fetch_texel_2d_Z24_UNORM_S8_UINT,
       fetch_texel_3d_Z24_UNORM_S8_UINT
    },
+   FETCH_NULL(R3G3B2_UNORM),
+   FETCH_NULL(A4B4G4R4_UNORM),
+   FETCH_NULL(R4G4B4A4_UNORM),
+   FETCH_NULL(R5G5B5A1_UNORM),
+   FETCH_NULL(A2B10G10R10_UNORM),
+   FETCH_NULL(A2R10G10B10_UNORM),
+
    FETCH_FUNCS(YCBCR),
    FETCH_FUNCS(YCBCR_REV),
 
@@ -276,6 +285,8 @@ texfetch_funcs[] =
    /* Packed signed/unsigned non-normalized integer formats */
    FETCH_NULL(B10G10R10A2_UINT),
    FETCH_NULL(R10G10B10A2_UINT),
+   FETCH_NULL(A2B10G10R10_UINT),
+   FETCH_NULL(A2R10G10B10_UINT),
 
    /* Array signed/unsigned non-normalized integer formats */
    FETCH_NULL(A_UINT8),
diff --git a/mesalib/src/mesa/swrast/s_texfetch_tmp.h b/mesalib/src/mesa/swrast/s_texfetch_tmp.h
index 7ff30f6b4..45bd83950 100644
--- a/mesalib/src/mesa/swrast/s_texfetch_tmp.h
+++ b/mesalib/src/mesa/swrast/s_texfetch_tmp.h
@@ -39,6 +39,7 @@
  * \author Brian Paul
  */
 
+#include <format_unpack.h>
 
 #if DIM == 1
 
@@ -68,1244 +69,128 @@
 #error	illegal number of texture dimensions
 #endif
 
-
-static void
-FETCH(Z_UNORM32)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[0] = src[0] * (1.0F / 0xffffffff);
-}
-
-
-static void
-FETCH(Z_UNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[0] = src[0] * (1.0F / 65535.0F);
-}
-
-
-static void
-FETCH(RGBA_FLOAT32)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 4);
-   texel[RCOMP] = src[0];
-   texel[GCOMP] = src[1];
-   texel[BCOMP] = src[2];
-   texel[ACOMP] = src[3];
-}
-
-
-static void
-FETCH(RGBA_FLOAT16)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 4);
-   texel[RCOMP] = _mesa_half_to_float(src[0]);
-   texel[GCOMP] = _mesa_half_to_float(src[1]);
-   texel[BCOMP] = _mesa_half_to_float(src[2]);
-   texel[ACOMP] = _mesa_half_to_float(src[3]);
-}
-
-
-static void
-FETCH(RGB_FLOAT32)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 3);
-   texel[RCOMP] = src[0];
-   texel[GCOMP] = src[1];
-   texel[BCOMP] = src[2];
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(RGB_FLOAT16)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 3);
-   texel[RCOMP] = _mesa_half_to_float(src[0]);
-   texel[GCOMP] = _mesa_half_to_float(src[1]);
-   texel[BCOMP] = _mesa_half_to_float(src[2]);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A_FLOAT32)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = src[0];
-}
-
-
-static void
-FETCH(A_FLOAT16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = _mesa_half_to_float(src[0]);
-}
-
-
-static void
-FETCH(L_FLOAT32)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = src[0];
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(L_FLOAT16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = _mesa_half_to_float(src[0]);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(LA_FLOAT32)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = src[0];
-   texel[ACOMP] = src[1];
-}
-
-
-static void
-FETCH(LA_FLOAT16)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 2);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = _mesa_half_to_float(src[0]);
-   texel[ACOMP] = _mesa_half_to_float(src[1]);
-}
-
-
-static void
-FETCH(I_FLOAT32)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = src[0];
-}
-
-
-static void
-FETCH(I_FLOAT16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = _mesa_half_to_float(src[0]);
-}
-
-
-static void
-FETCH(R_FLOAT32)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 1);
-   texel[RCOMP] = src[0];
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(R_FLOAT16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 1);
-   texel[RCOMP] = _mesa_half_to_float(src[0]);
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(RG_FLOAT32)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2);
-   texel[RCOMP] = src[0];
-   texel[GCOMP] = src[1];
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(RG_FLOAT16)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *src = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 2);
-   texel[RCOMP] = _mesa_half_to_float(src[0]);
-   texel[GCOMP] = _mesa_half_to_float(src[1]);
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A8B8G8R8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-}
-
-
-static void
-FETCH(R8G8B8A8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-}
-
-
-static void
-FETCH(B8G8R8A8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-}
-
-
-static void
-FETCH(A8R8G8B8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-}
-
-
-static void
-FETCH(X8B8G8R8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(R8G8B8X8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(B8G8R8X8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(X8R8G8B8_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( (s >>  8) & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( (s >> 16) & 0xff );
-   texel[BCOMP] = UBYTE_TO_FLOAT( (s >> 24)        );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(BGR_UNORM8)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 3);
-   texel[RCOMP] = UBYTE_TO_FLOAT( src[2] );
-   texel[GCOMP] = UBYTE_TO_FLOAT( src[1] );
-   texel[BCOMP] = UBYTE_TO_FLOAT( src[0] );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(RGB_UNORM8)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 3);
-   texel[RCOMP] = UBYTE_TO_FLOAT( src[0] );
-   texel[GCOMP] = UBYTE_TO_FLOAT( src[1] );
-   texel[BCOMP] = UBYTE_TO_FLOAT( src[2] );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(B5G6R5_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = *src;
-   texel[RCOMP] = ((s >> 11) & 0x1f) * (1.0F / 31.0F);
-   texel[GCOMP] = ((s >> 5 ) & 0x3f) * (1.0F / 63.0F);
-   texel[BCOMP] = ((s      ) & 0x1f) * (1.0F / 31.0F);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(R5G6B5_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = (*src >> 8) | (*src << 8); /* byte swap */
-   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 8) & 0xf8) | ((s >> 13) & 0x7) );
-   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >> 3) & 0xfc) | ((s >>  9) & 0x3) );
-   texel[BCOMP] = UBYTE_TO_FLOAT( ((s << 3) & 0xf8) | ((s >>  2) & 0x7) );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(B4G4R4A4_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = *src;
-   texel[RCOMP] = ((s >>  8) & 0xf) * (1.0F / 15.0F);
-   texel[GCOMP] = ((s >>  4) & 0xf) * (1.0F / 15.0F);
-   texel[BCOMP] = ((s      ) & 0xf) * (1.0F / 15.0F);
-   texel[ACOMP] = ((s >> 12) & 0xf) * (1.0F / 15.0F);
-}
-
-
-static void
-FETCH(A4R4G4B4_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] = ((s >>  4) & 0xf) * (1.0F / 15.0F);
-   texel[GCOMP] = ((s >>  8) & 0xf) * (1.0F / 15.0F);
-   texel[BCOMP] = ((s >> 12) & 0xf) * (1.0F / 15.0F);
-   texel[ACOMP] = ((s      ) & 0xf) * (1.0F / 15.0F);
-}
-
-
-static void
-FETCH(A1B5G5R5_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = *src;
-   texel[RCOMP] = ((s >> 11) & 0x1f) * (1.0F / 31.0F);
-   texel[GCOMP] = ((s >>  6) & 0x1f) * (1.0F / 31.0F);
-   texel[BCOMP] = ((s >>  1) & 0x1f) * (1.0F / 31.0F);
-   texel[ACOMP] = ((s      ) & 0x01) * 1.0F;
-}
-
-
-static void
-FETCH(B5G5R5A1_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = *src;
-   texel[RCOMP] = ((s >> 10) & 0x1f) * (1.0F / 31.0F);
-   texel[GCOMP] = ((s >>  5) & 0x1f) * (1.0F / 31.0F);
-   texel[BCOMP] = ((s >>  0) & 0x1f) * (1.0F / 31.0F);
-   texel[ACOMP] = ((s >> 15) & 0x01) * 1.0F;
-}
-
-
-static void
-FETCH(A1R5G5B5_UNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   const GLushort s = (*src << 8) | (*src >> 8); /* byteswap */
-   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  7) & 0xf8) | ((s >> 12) & 0x7) );
-   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  2) & 0xf8) | ((s >>  7) & 0x7) );
-   texel[BCOMP] = UBYTE_TO_FLOAT( ((s <<  3) & 0xf8) | ((s >>  2) & 0x7) );
-   texel[ACOMP] = UBYTE_TO_FLOAT( ((s >> 15) & 0x01) * 255 );
-}
-
-
-static void
-FETCH(B10G10R10A2_UNORM)(const struct swrast_texture_image *texImage,
-                         GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   const GLuint s = *src;
-   texel[RCOMP] = ((s >> 20) & 0x3ff) * (1.0F / 1023.0F);
-   texel[GCOMP] = ((s >> 10) & 0x3ff) * (1.0F / 1023.0F);
-   texel[BCOMP] = ((s >>  0) & 0x3ff) * (1.0F / 1023.0F);
-   texel[ACOMP] = ((s >> 30) & 0x03) * (1.0F / 3.0F);
-}
-
-
-static void
-FETCH(R10G10B10A2_UNORM)(const struct swrast_texture_image *texImage,
-                         GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   const GLuint s = *src;
-   texel[RCOMP] = ((s >>  0) & 0x3ff) * (1.0F / 1023.0F);
-   texel[GCOMP] = ((s >> 10) & 0x3ff) * (1.0F / 1023.0F);
-   texel[BCOMP] = ((s >> 20) & 0x3ff) * (1.0F / 1023.0F);
-   texel[ACOMP] = ((s >> 30) & 0x03) * (1.0F / 3.0F);
-}
-
-
-static void
-FETCH(R8G8_UNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( s & 0xff );
-   texel[GCOMP] = UBYTE_TO_FLOAT( s >> 8 );
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(G8R8_UNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT( s >> 8 );
-   texel[GCOMP] = UBYTE_TO_FLOAT( s & 0xff );
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(L4A4_UNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte s = *TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = (s & 0xf) * (1.0F / 15.0F);
-   texel[ACOMP] = ((s >> 4) & 0xf) * (1.0F / 15.0F);
-}
-
-
-static void
-FETCH(L8A8_UNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = UBYTE_TO_FLOAT( s & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( s >> 8 );
-}
-
-
-static void
-FETCH(R_UNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte s = *TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] = UBYTE_TO_FLOAT(s);
-   texel[GCOMP] = 0.0;
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(R_UNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] = USHORT_TO_FLOAT(s);
-   texel[GCOMP] = 0.0;
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(A8L8_UNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = UBYTE_TO_FLOAT( s >> 8 );
-   texel[ACOMP] = UBYTE_TO_FLOAT( s & 0xff );
-}
-
-
-static void
-FETCH(R16G16_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = USHORT_TO_FLOAT( s & 0xffff );
-   texel[GCOMP] = USHORT_TO_FLOAT( s >> 16 );
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(G16R16_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = USHORT_TO_FLOAT( s >> 16 );
-   texel[GCOMP] = USHORT_TO_FLOAT( s & 0xffff );
-   texel[BCOMP] = 0.0;
-   texel[ACOMP] = 1.0;
-}
-
-
-static void
-FETCH(L16A16_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = USHORT_TO_FLOAT( s & 0xffff );
-   texel[ACOMP] = USHORT_TO_FLOAT( s >> 16 );
-}
-
-
-static void
-FETCH(A16L16_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = USHORT_TO_FLOAT( s >> 16 );
-   texel[ACOMP] = USHORT_TO_FLOAT( s & 0xffff );
-}
-
-
-static void
-FETCH(B2G3R3_UNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   const GLubyte s = *src;
-   texel[RCOMP] = ((s >> 5) & 0x7) * (1.0F / 7.0F);
-   texel[GCOMP] = ((s >> 2) & 0x7) * (1.0F / 7.0F);
-   texel[BCOMP] = ((s     ) & 0x3) * (1.0F / 3.0F);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A_UNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = UBYTE_TO_FLOAT( src[0] );
-}
-
-
-static void
-FETCH(A_UNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = USHORT_TO_FLOAT( src[0] );
-}
-
-
-static void
-FETCH(L_UNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = UBYTE_TO_FLOAT( src[0] );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(L_UNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = USHORT_TO_FLOAT( src[0] );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(I_UNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = UBYTE_TO_FLOAT( src[0] );
-}
-
-
-static void
-FETCH(I_UNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = USHORT_TO_FLOAT( src[0] );
-}
-
-
-static void
-FETCH(BGR_SRGB8)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 3);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(src[2]);
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(src[1]);
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(src[0]);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A8B8G8R8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 24) );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s      ) & 0xff ); /* linear! */
-}
-
-
-static void
-FETCH(B8G8R8A8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s      ) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s >> 24) ); /* linear! */
-}
-
-
-static void
-FETCH(A8R8G8B8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 24) );
-   texel[ACOMP] = UBYTE_TO_FLOAT( s & 0xff ); /* linear! */
-}
-
-
-static void
-FETCH(R8G8B8A8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s      ) & 0xff );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[ACOMP] = UBYTE_TO_FLOAT( (s >> 24) ); /* linear! */
-}
-
-
-static void
-FETCH(R8G8B8X8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s      ) & 0xff );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(X8B8G8R8_SRGB)(const struct swrast_texture_image *texImage,
-                     GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 24) );
-   texel[GCOMP] = util_format_srgb_8unorm_to_linear_float( (s >> 16) & 0xff );
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float( (s >>  8) & 0xff );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(L_SRGB8)(const struct swrast_texture_image *texImage,
-               GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(src[0]);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(L8A8_SRGB)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(s & 0xff);
-   texel[ACOMP] = UBYTE_TO_FLOAT(s >> 8); /* linear */
-}
-
-
-static void
-FETCH(A8L8_SRGB)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLushort, texImage, i, j, k, 2);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(s >> 8);
-   texel[ACOMP] = UBYTE_TO_FLOAT(s & 0xff); /* linear */
-}
-
-
-static void
-FETCH(RGBA_SINT8)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLbyte *src = TEXEL_ADDR(GLbyte, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(RGBA_SINT16)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort *src = TEXEL_ADDR(GLshort, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(RGBA_SINT32)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLint *src = TEXEL_ADDR(GLint, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(RGBA_UINT8)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLubyte *src = TEXEL_ADDR(GLubyte, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(RGBA_UINT16)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src = TEXEL_ADDR(GLushort, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(RGBA_UINT32)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 4);
-   texel[RCOMP] = (GLfloat) src[0];
-   texel[GCOMP] = (GLfloat) src[1];
-   texel[BCOMP] = (GLfloat) src[2];
-   texel[ACOMP] = (GLfloat) src[3];
-}
-
-
-static void
-FETCH(R_SNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLbyte s = *TEXEL_ADDR(GLbyte, texImage, i, j, k, 1);
-   texel[RCOMP] = BYTE_TO_FLOAT_TEX( s );
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A_SNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLbyte s = *TEXEL_ADDR(GLbyte, texImage, i, j, k, 1);
-   texel[RCOMP] = 0.0F;
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( s );
-}
-
-
-static void
-FETCH(L_SNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLbyte s = *TEXEL_ADDR(GLbyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( s );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(I_SNORM8)(const struct swrast_texture_image *texImage,
-                GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLbyte s = *TEXEL_ADDR(GLbyte, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( s );
-}
-
-
-static void
-FETCH(R8G8_SNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s & 0xff) );
-   texel[GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 8) );
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(L8A8_SNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s & 0xff) );
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 8) );
-}
-
-
-static void
-FETCH(A8L8_SNORM)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 8) );
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s & 0xff) );
-}
-
-
-static void
-FETCH(X8B8G8R8_SNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 24) );
-   texel[GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 16) );
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >>  8) );
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(A8B8G8R8_SNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 24) );
-   texel[GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 16) );
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >>  8) );
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s      ) );
-}
-
-
-static void
-FETCH(R8G8B8A8_SNORM)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint s = *TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   texel[RCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s      ) );
-   texel[GCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >>  8) );
-   texel[BCOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 16) );
-   texel[ACOMP] = BYTE_TO_FLOAT_TEX( (GLbyte) (s >> 24) );
-}
-
-
-static void
-FETCH(R_SNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] = SHORT_TO_FLOAT_TEX( s );
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(A_SNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] = 0.0F;
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = SHORT_TO_FLOAT_TEX( s );
-}
-
-
-static void
-FETCH(L_SNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = SHORT_TO_FLOAT_TEX( s );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(I_SNORM16)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort s = *TEXEL_ADDR(GLshort, texImage, i, j, k, 1);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] =
-   texel[ACOMP] = SHORT_TO_FLOAT_TEX( s );
-}
-
-
-static void
-FETCH(R16G16_SNORM)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort *s = TEXEL_ADDR(GLshort, texImage, i, j, k, 2);
-   texel[RCOMP] = SHORT_TO_FLOAT_TEX( s[0] );
-   texel[GCOMP] = SHORT_TO_FLOAT_TEX( s[1] );
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(LA_SNORM16)(const struct swrast_texture_image *texImage,
-                  GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort *s = TEXEL_ADDR(GLshort, texImage, i, j, k, 2);
-   texel[RCOMP] =
-   texel[GCOMP] =
-   texel[BCOMP] = SHORT_TO_FLOAT_TEX( s[0] );
-   texel[ACOMP] = SHORT_TO_FLOAT_TEX( s[1] );
-}
-
-
-static void
-
-FETCH(RGB_SNORM16)(const struct swrast_texture_image *texImage,
-                   GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort *s = TEXEL_ADDR(GLshort, texImage, i, j, k, 3);
-   texel[RCOMP] = SHORT_TO_FLOAT_TEX( s[0] );
-   texel[GCOMP] = SHORT_TO_FLOAT_TEX( s[1] );
-   texel[BCOMP] = SHORT_TO_FLOAT_TEX( s[2] );
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(RGBA_SNORM16)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLshort *s = TEXEL_ADDR(GLshort, texImage, i, j, k, 4);
-   texel[RCOMP] = SHORT_TO_FLOAT_TEX( s[0] );
-   texel[GCOMP] = SHORT_TO_FLOAT_TEX( s[1] );
-   texel[BCOMP] = SHORT_TO_FLOAT_TEX( s[2] );
-   texel[ACOMP] = SHORT_TO_FLOAT_TEX( s[3] );
-}
-
-
-static void
-FETCH(RGBA_UNORM16)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *s = TEXEL_ADDR(GLushort, texImage, i, j, k, 4);
-   texel[RCOMP] = USHORT_TO_FLOAT( s[0] );
-   texel[GCOMP] = USHORT_TO_FLOAT( s[1] );
-   texel[BCOMP] = USHORT_TO_FLOAT( s[2] );
-   texel[ACOMP] = USHORT_TO_FLOAT( s[3] );
-}
-
-
-static void
-FETCH(RGBX_UNORM16)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *s = TEXEL_ADDR(GLushort, texImage, i, j, k, 4);
-   texel[RCOMP] = USHORT_TO_FLOAT(s[0]);
-   texel[GCOMP] = USHORT_TO_FLOAT(s[1]);
-   texel[BCOMP] = USHORT_TO_FLOAT(s[2]);
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(RGBX_FLOAT16)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLhalfARB *s = TEXEL_ADDR(GLhalfARB, texImage, i, j, k, 4);
-   texel[RCOMP] = _mesa_half_to_float(s[0]);
-   texel[GCOMP] = _mesa_half_to_float(s[1]);
-   texel[BCOMP] = _mesa_half_to_float(s[2]);
-   texel[ACOMP] = 1.0f;
-}
-
-
-static void
-FETCH(RGBX_FLOAT32)(const struct swrast_texture_image *texImage,
-                    GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *s = TEXEL_ADDR(GLfloat, texImage, i, j, k, 4);
-   texel[RCOMP] = s[0];
-   texel[GCOMP] = s[1];
-   texel[BCOMP] = s[2];
-   texel[ACOMP] = 1.0f;
-}
-
-
-/**
- * Fetch texel from 1D, 2D or 3D ycbcr texture, returning RGBA.
- */
-static void
-FETCH(YCBCR)(const struct swrast_texture_image *texImage,
-             GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src0 = TEXEL_ADDR(GLushort, texImage, (i & ~1), j, k, 1); /* even */
-   const GLushort *src1 = src0 + 1;                               /* odd */
-   const GLubyte y0 = (*src0 >> 8) & 0xff;  /* luminance */
-   const GLubyte cb = *src0 & 0xff;         /* chroma U */
-   const GLubyte y1 = (*src1 >> 8) & 0xff;  /* luminance */
-   const GLubyte cr = *src1 & 0xff;         /* chroma V */
-   const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
-   GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
-   GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
-   GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
-   r *= (1.0F / 255.0F);
-   g *= (1.0F / 255.0F);
-   b *= (1.0F / 255.0F);
-   texel[RCOMP] = CLAMP(r, 0.0F, 1.0F);
-   texel[GCOMP] = CLAMP(g, 0.0F, 1.0F);
-   texel[BCOMP] = CLAMP(b, 0.0F, 1.0F);
-   texel[ACOMP] = 1.0F;
-}
-
-
-/**
- * Fetch texel from 1D, 2D or 3D ycbcr texture, returning RGBA.
- */
-static void
-FETCH(YCBCR_REV)(const struct swrast_texture_image *texImage,
-                 GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLushort *src0 = TEXEL_ADDR(GLushort, texImage, (i & ~1), j, k, 1); /* even */
-   const GLushort *src1 = src0 + 1;                               /* odd */
-   const GLubyte y0 = *src0 & 0xff;         /* luminance */
-   const GLubyte cr = (*src0 >> 8) & 0xff;  /* chroma V */
-   const GLubyte y1 = *src1 & 0xff;         /* luminance */
-   const GLubyte cb = (*src1 >> 8) & 0xff;  /* chroma U */
-   const GLubyte y = (i & 1) ? y1 : y0;     /* choose even/odd luminance */
-   GLfloat r = 1.164F * (y - 16) + 1.596F * (cr - 128);
-   GLfloat g = 1.164F * (y - 16) - 0.813F * (cr - 128) - 0.391F * (cb - 128);
-   GLfloat b = 1.164F * (y - 16) + 2.018F * (cb - 128);
-   r *= (1.0F / 255.0F);
-   g *= (1.0F / 255.0F);
-   b *= (1.0F / 255.0F);
-   texel[RCOMP] = CLAMP(r, 0.0F, 1.0F);
-   texel[GCOMP] = CLAMP(g, 0.0F, 1.0F);
-   texel[BCOMP] = CLAMP(b, 0.0F, 1.0F);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(S8_UINT_Z24_UNORM)(const struct swrast_texture_image *texImage,
-                         GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   /* only return Z, not stencil data */
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   texel[0] = (GLfloat) (((*src) >> 8) * scale);
-   ASSERT(texImage->Base.TexFormat == MESA_FORMAT_S8_UINT_Z24_UNORM ||
-	  texImage->Base.TexFormat == MESA_FORMAT_X8_UINT_Z24_UNORM);
-   ASSERT(texel[0] >= 0.0F);
-   ASSERT(texel[0] <= 1.0F);
-}
-
-
-static void
-FETCH(Z24_UNORM_S8_UINT)(const struct swrast_texture_image *texImage,
-                         GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   /* only return Z, not stencil data */
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   const GLdouble scale = 1.0 / (GLdouble) 0xffffff;
-   texel[0] = (GLfloat) (((*src) & 0x00ffffff) * scale);
-   ASSERT(texImage->Base.TexFormat == MESA_FORMAT_Z24_UNORM_S8_UINT ||
-	  texImage->Base.TexFormat == MESA_FORMAT_Z24_UNORM_X8_UINT);
-   ASSERT(texel[0] >= 0.0F);
-   ASSERT(texel[0] <= 1.0F);
-}
-
-
-static void
-FETCH(R9G9B9E5_FLOAT)(const struct swrast_texture_image *texImage,
-                      GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   rgb9e5_to_float3(*src, texel);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(R11G11B10_FLOAT)(const struct swrast_texture_image *texImage,
-                       GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLuint *src = TEXEL_ADDR(GLuint, texImage, i, j, k, 1);
-   r11g11b10f_to_float3(*src, texel);
-   texel[ACOMP] = 1.0F;
-}
-
-
-static void
-FETCH(Z32_FLOAT_S8X24_UINT)(const struct swrast_texture_image *texImage,
-                            GLint i, GLint j, GLint k, GLfloat *texel)
-{
-   const GLfloat *src = TEXEL_ADDR(GLfloat, texImage, i, j, k, 2);
-   texel[RCOMP] = src[0];
-   texel[GCOMP] = 0.0F;
-   texel[BCOMP] = 0.0F;
-   texel[ACOMP] = 1.0F;
-}
-
-
+#define FETCH_Z(x, type, size)                       \
+   static void \
+   FETCH(x) (const struct swrast_texture_image *texImage, \
+             GLint i, GLint j, GLint k, GLfloat *texel) \
+   { \
+            const type *src = TEXEL_ADDR(type, texImage, i, j, k, size); \
+            _mesa_unpack_float_z_row(MESA_FORMAT_##x, 1, src, texel); \
+   }
+
+#define FETCH_RGBA(x, type, size)                    \
+   static void \
+   FETCH(x) (const struct swrast_texture_image *texImage, \
+             GLint i, GLint j, GLint k, GLfloat *texel) \
+   { \
+            const type *src = TEXEL_ADDR(type, texImage, i, j, k, size); \
+            _mesa_unpack_rgba_row(MESA_FORMAT_##x, 1, src, (GLvoid *)texel); \
+   }
+
+FETCH_Z(Z_UNORM32, GLuint, 1)
+FETCH_Z(Z_UNORM16, GLushort, 1)
+FETCH_Z(S8_UINT_Z24_UNORM, GLuint, 1) /* only return Z, not stencil data */
+FETCH_Z(Z24_UNORM_S8_UINT, GLuint, 1) /* only return Z, not stencil data */
+FETCH_Z(Z32_FLOAT_S8X24_UINT, GLfloat, 2)
+
+FETCH_RGBA(RGBA_FLOAT32, GLfloat, 4)
+FETCH_RGBA(RGBA_FLOAT16, GLhalfARB, 4)
+FETCH_RGBA(RGB_FLOAT32, GLfloat, 3)
+FETCH_RGBA(RGB_FLOAT16, GLhalfARB, 3)
+FETCH_RGBA(A_FLOAT32, GLfloat, 1)
+FETCH_RGBA(A_FLOAT16, GLhalfARB, 1)
+FETCH_RGBA(L_FLOAT32, GLfloat, 1)
+FETCH_RGBA(L_FLOAT16, GLhalfARB, 1)
+FETCH_RGBA(LA_FLOAT32, GLfloat, 2)
+FETCH_RGBA(LA_FLOAT16, GLhalfARB, 2)
+FETCH_RGBA(I_FLOAT32, GLfloat, 1)
+FETCH_RGBA(I_FLOAT16, GLhalfARB, 1)
+FETCH_RGBA(R_FLOAT32, GLfloat, 1)
+FETCH_RGBA(R_FLOAT16, GLhalfARB, 1)
+FETCH_RGBA(RG_FLOAT32, GLfloat, 2)
+FETCH_RGBA(RG_FLOAT16, GLhalfARB, 2)
+FETCH_RGBA(A8B8G8R8_UNORM, GLuint, 1)
+FETCH_RGBA(R8G8B8A8_UNORM, GLuint, 1)
+FETCH_RGBA(B8G8R8A8_UNORM, GLuint, 1)
+FETCH_RGBA(A8R8G8B8_UNORM, GLuint, 1)
+FETCH_RGBA(X8B8G8R8_UNORM, GLuint, 1)
+FETCH_RGBA(R8G8B8X8_UNORM, GLuint, 1)
+FETCH_RGBA(B8G8R8X8_UNORM, GLuint, 1)
+FETCH_RGBA(X8R8G8B8_UNORM, GLuint, 1)
+FETCH_RGBA(BGR_UNORM8, GLubyte, 3)
+FETCH_RGBA(RGB_UNORM8, GLubyte, 3)
+FETCH_RGBA(B5G6R5_UNORM, GLushort, 1)
+FETCH_RGBA(R5G6B5_UNORM, GLushort, 1)
+FETCH_RGBA(B4G4R4A4_UNORM, GLushort, 1)
+FETCH_RGBA(A4R4G4B4_UNORM, GLushort, 1)
+FETCH_RGBA(A1B5G5R5_UNORM, GLushort, 1)
+FETCH_RGBA(B5G5R5A1_UNORM, GLushort, 1)
+FETCH_RGBA(A1R5G5B5_UNORM, GLushort, 1)
+FETCH_RGBA(B10G10R10A2_UNORM, GLuint, 1)
+FETCH_RGBA(R10G10B10A2_UNORM, GLuint, 1)
+FETCH_RGBA(R8G8_UNORM, GLushort, 1)
+FETCH_RGBA(G8R8_UNORM, GLushort, 1)
+FETCH_RGBA(L4A4_UNORM, GLubyte, 1)
+FETCH_RGBA(L8A8_UNORM, GLushort, 1)
+FETCH_RGBA(R_UNORM8, GLubyte, 1)
+FETCH_RGBA(R_UNORM16, GLushort, 1)
+FETCH_RGBA(A8L8_UNORM, GLushort, 1)
+FETCH_RGBA(R16G16_UNORM, GLuint, 1)
+FETCH_RGBA(G16R16_UNORM, GLuint, 1)
+FETCH_RGBA(L16A16_UNORM, GLuint, 1)
+FETCH_RGBA(A16L16_UNORM, GLuint, 1)
+FETCH_RGBA(B2G3R3_UNORM, GLubyte, 1)
+FETCH_RGBA(A_UNORM8, GLubyte, 1)
+FETCH_RGBA(A_UNORM16, GLushort, 1)
+FETCH_RGBA(L_UNORM8, GLubyte, 1)
+FETCH_RGBA(L_UNORM16, GLushort, 1)
+FETCH_RGBA(I_UNORM8, GLubyte, 1)
+FETCH_RGBA(I_UNORM16, GLushort, 1)
+FETCH_RGBA(BGR_SRGB8, GLubyte, 3)
+FETCH_RGBA(A8B8G8R8_SRGB, GLuint, 1)
+FETCH_RGBA(B8G8R8A8_SRGB, GLuint, 1)
+FETCH_RGBA(A8R8G8B8_SRGB, GLuint, 1)
+FETCH_RGBA(R8G8B8A8_SRGB, GLuint, 1)
+FETCH_RGBA(R8G8B8X8_SRGB, GLuint, 1)
+FETCH_RGBA(X8B8G8R8_SRGB, GLuint, 1)
+FETCH_RGBA(L_SRGB8, GLubyte, 1)
+FETCH_RGBA(L8A8_SRGB, GLushort, 1)
+FETCH_RGBA(A8L8_SRGB, GLushort, 2)
+FETCH_RGBA(RGBA_SINT8, GLbyte, 4)
+FETCH_RGBA(RGBA_SINT16, GLshort, 4)
+FETCH_RGBA(RGBA_SINT32, GLint, 4)
+FETCH_RGBA(RGBA_UINT8, GLubyte, 4)
+FETCH_RGBA(RGBA_UINT16, GLushort, 4)
+FETCH_RGBA(RGBA_UINT32, GLuint, 4)
+FETCH_RGBA(R_SNORM8, GLbyte, 1)
+FETCH_RGBA(A_SNORM8, GLbyte, 1)
+FETCH_RGBA(L_SNORM8, GLbyte, 1)
+FETCH_RGBA(I_SNORM8, GLbyte, 1)
+FETCH_RGBA(R8G8_SNORM, GLshort, 1)
+FETCH_RGBA(L8A8_SNORM, GLshort, 1)
+FETCH_RGBA(A8L8_SNORM, GLshort, 1)
+FETCH_RGBA(X8B8G8R8_SNORM, GLint, 1)
+FETCH_RGBA(A8B8G8R8_SNORM, GLint, 1)
+FETCH_RGBA(R8G8B8A8_SNORM, GLint, 1)
+FETCH_RGBA(R_SNORM16, GLshort, 1)
+FETCH_RGBA(A_SNORM16, GLshort, 1)
+FETCH_RGBA(L_SNORM16, GLshort, 1)
+FETCH_RGBA(I_SNORM16, GLshort, 1)
+FETCH_RGBA(R16G16_SNORM, GLshort, 2)
+FETCH_RGBA(LA_SNORM16, GLshort, 2)
+FETCH_RGBA(RGB_SNORM16, GLshort, 3)
+FETCH_RGBA(RGBA_SNORM16, GLshort, 4)
+FETCH_RGBA(RGBA_UNORM16, GLushort, 4)
+FETCH_RGBA(RGBX_UNORM16, GLushort, 4)
+FETCH_RGBA(RGBX_FLOAT16, GLhalfARB, 4)
+FETCH_RGBA(RGBX_FLOAT32, GLfloat, 4)
+FETCH_RGBA(YCBCR, GLushort, 1) /* Fetch texel from 1D, 2D or 3D ycbcr texture, returning RGBA. */
+FETCH_RGBA(YCBCR_REV, GLushort, 1) /* Fetch texel from 1D, 2D or 3D ycbcr texture, returning RGBA. */
+FETCH_RGBA(R9G9B9E5_FLOAT, GLuint, 1)
+FETCH_RGBA(R11G11B10_FLOAT, GLuint, 1)
 
 #undef TEXEL_ADDR
 #undef DIM
 #undef FETCH
+#undef FETCH_Z
+#undef FETCH_RGBA
diff --git a/mesalib/src/mesa/swrast/s_texfilter.c b/mesalib/src/mesa/swrast/s_texfilter.c
index faeccae7c..fa79fdc5b 100644
--- a/mesalib/src/mesa/swrast/s_texfilter.c
+++ b/mesalib/src/mesa/swrast/s_texfilter.c
@@ -27,8 +27,9 @@
 #include "main/context.h"
 #include "main/colormac.h"
 #include "main/imports.h"
-#include "main/texobj.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
 
 #include "s_context.h"
 #include "s_texfilter.h"
@@ -832,7 +833,7 @@ apply_depth_mode(GLenum depthMode, GLfloat z, GLfloat texel[4])
 static GLboolean
 is_depth_texture(const struct gl_texture_object *tObj)
 {
-   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+   GLenum format = _mesa_texture_base_format(tObj);
    return format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL_EXT;
 }
 
@@ -1004,7 +1005,7 @@ sample_nearest_1d( struct gl_context *ctx,
                    GLfloat rgba[][4] )
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_1d_nearest(ctx, samp, image, texcoords[i], rgba[i]);
@@ -1021,7 +1022,7 @@ sample_linear_1d( struct gl_context *ctx,
                   GLfloat rgba[][4] )
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_1d_linear(ctx, samp, image, texcoords[i], rgba[i]);
@@ -1051,12 +1052,12 @@ sample_lambda_1d( struct gl_context *ctx,
       switch (samp->MinFilter) {
       case GL_NEAREST:
          for (i = minStart; i < minEnd; i++)
-            sample_1d_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = minStart; i < minEnd; i++)
-            sample_1d_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_linear(ctx, samp, _mesa_base_tex_image(tObj),
                              texcoords[i], rgba[i]);
          break;
       case GL_NEAREST_MIPMAP_NEAREST:
@@ -1086,12 +1087,12 @@ sample_lambda_1d( struct gl_context *ctx,
       switch (samp->MagFilter) {
       case GL_NEAREST:
          for (i = magStart; i < magEnd; i++)
-            sample_1d_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = magStart; i < magEnd; i++)
-            sample_1d_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_linear(ctx, samp, _mesa_base_tex_image(tObj),
                              texcoords[i], rgba[i]);
          break;
       default:
@@ -1364,7 +1365,7 @@ sample_nearest_2d(struct gl_context *ctx,
                   const GLfloat lambda[], GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_2d_nearest(ctx, samp, image, texcoords[i], rgba[i]);
@@ -1381,7 +1382,7 @@ sample_linear_2d(struct gl_context *ctx,
                  const GLfloat lambda[], GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg = swrast_texture_image_const(image);
    (void) lambda;
    if (samp->WrapS == GL_REPEAT &&
@@ -1415,7 +1416,7 @@ opt_sample_rgb_2d(struct gl_context *ctx,
                   GLuint n, const GLfloat texcoords[][4],
                   const GLfloat lambda[], GLfloat rgba[][4])
 {
-   const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *img = _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg = swrast_texture_image_const(img);
    const GLfloat width = (GLfloat) img->Width;
    const GLfloat height = (GLfloat) img->Height;
@@ -1460,7 +1461,7 @@ opt_sample_rgba_2d(struct gl_context *ctx,
                    GLuint n, const GLfloat texcoords[][4],
                    const GLfloat lambda[], GLfloat rgba[][4])
 {
-   const struct gl_texture_image *img = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *img = _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg = swrast_texture_image_const(img);
    const GLfloat width = (GLfloat) img->Width;
    const GLfloat height = (GLfloat) img->Height;
@@ -1498,7 +1499,7 @@ sample_lambda_2d(struct gl_context *ctx,
                  GLuint n, const GLfloat texcoords[][4],
                  const GLfloat lambda[], GLfloat rgba[][4])
 {
-   const struct gl_texture_image *tImg = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *tImg = _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg = swrast_texture_image_const(tImg);
    GLuint minStart, minEnd;  /* texels with minification */
    GLuint magStart, magEnd;  /* texels with magnification */
@@ -1653,7 +1654,7 @@ sample_2d_ewa(struct gl_context *ctx,
    GLfloat scaling = 1.0f / (1 << level);
    const struct gl_texture_image *img =	tObj->Image[0][level];
    const struct gl_texture_image *mostDetailedImage =
-      tObj->Image[0][tObj->BaseLevel];
+      _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg =
       swrast_texture_image_const(mostDetailedImage);
    GLfloat tex_u = -0.5f + texcoord[0] * swImg->WidthScale * scaling;
@@ -1865,7 +1866,7 @@ sample_lambda_2d_aniso(struct gl_context *ctx,
                        GLuint n, const GLfloat texcoords[][4],
                        const GLfloat lambda_iso[], GLfloat rgba[][4])
 {
-   const struct gl_texture_image *tImg = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *tImg = _mesa_base_tex_image(tObj);
    const struct swrast_texture_image *swImg = swrast_texture_image_const(tImg);
    const GLfloat maxEccentricity =
       samp->MaxAnisotropy * samp->MaxAnisotropy;
@@ -2232,7 +2233,7 @@ sample_nearest_3d(struct gl_context *ctx,
                   GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_3d_nearest(ctx, samp, image, texcoords[i], rgba[i]);
@@ -2249,7 +2250,7 @@ sample_linear_3d(struct gl_context *ctx,
 		 const GLfloat lambda[], GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_3d_linear(ctx, samp, image, texcoords[i], rgba[i]);
@@ -2279,12 +2280,12 @@ sample_lambda_3d(struct gl_context *ctx,
       switch (samp->MinFilter) {
       case GL_NEAREST:
          for (i = minStart; i < minEnd; i++)
-            sample_3d_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_3d_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = minStart; i < minEnd; i++)
-            sample_3d_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_3d_linear(ctx, samp, _mesa_base_tex_image(tObj),
                              texcoords[i], rgba[i]);
          break;
       case GL_NEAREST_MIPMAP_NEAREST:
@@ -2314,12 +2315,12 @@ sample_lambda_3d(struct gl_context *ctx,
       switch (samp->MagFilter) {
       case GL_NEAREST:
          for (i = magStart; i < magEnd; i++)
-            sample_3d_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_3d_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = magStart; i < magEnd; i++)
-            sample_3d_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_3d_linear(ctx, samp, _mesa_base_tex_image(tObj),
                              texcoords[i], rgba[i]);
          break;
       default:
@@ -3020,7 +3021,7 @@ sample_nearest_2d_array(struct gl_context *ctx,
                         GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_2d_array_nearest(ctx, samp, image, texcoords[i], rgba[i]);
@@ -3038,7 +3039,7 @@ sample_linear_2d_array(struct gl_context *ctx,
                        const GLfloat lambda[], GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_2d_array_linear(ctx, samp, image, texcoords[i], rgba[i]);
@@ -3068,12 +3069,12 @@ sample_lambda_2d_array(struct gl_context *ctx,
       switch (samp->MinFilter) {
       case GL_NEAREST:
          for (i = minStart; i < minEnd; i++)
-            sample_2d_array_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_2d_array_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                                     texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = minStart; i < minEnd; i++)
-            sample_2d_array_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_2d_array_linear(ctx, samp, _mesa_base_tex_image(tObj),
                                    texcoords[i], rgba[i]);
          break;
       case GL_NEAREST_MIPMAP_NEAREST:
@@ -3111,12 +3112,12 @@ sample_lambda_2d_array(struct gl_context *ctx,
       switch (samp->MagFilter) {
       case GL_NEAREST:
          for (i = magStart; i < magEnd; i++)
-            sample_2d_array_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_2d_array_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = magStart; i < magEnd; i++)
-            sample_2d_array_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_2d_array_linear(ctx, samp, _mesa_base_tex_image(tObj),
                                    texcoords[i], rgba[i]);
          break;
       default:
@@ -3311,7 +3312,7 @@ sample_nearest_1d_array(struct gl_context *ctx,
                         GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_1d_array_nearest(ctx, samp, image, texcoords[i], rgba[i]);
@@ -3328,7 +3329,7 @@ sample_linear_1d_array(struct gl_context *ctx,
                        const GLfloat lambda[], GLfloat rgba[][4])
 {
    GLuint i;
-   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   const struct gl_texture_image *image = _mesa_base_tex_image(tObj);
    (void) lambda;
    for (i = 0; i < n; i++) {
       sample_1d_array_linear(ctx, samp, image, texcoords[i], rgba[i]);
@@ -3358,12 +3359,12 @@ sample_lambda_1d_array(struct gl_context *ctx,
       switch (samp->MinFilter) {
       case GL_NEAREST:
          for (i = minStart; i < minEnd; i++)
-            sample_1d_array_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_array_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                                     texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = minStart; i < minEnd; i++)
-            sample_1d_array_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_array_linear(ctx, samp, _mesa_base_tex_image(tObj),
                                    texcoords[i], rgba[i]);
          break;
       case GL_NEAREST_MIPMAP_NEAREST:
@@ -3397,12 +3398,12 @@ sample_lambda_1d_array(struct gl_context *ctx,
       switch (samp->MagFilter) {
       case GL_NEAREST:
          for (i = magStart; i < magEnd; i++)
-            sample_1d_array_nearest(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_array_nearest(ctx, samp, _mesa_base_tex_image(tObj),
                               texcoords[i], rgba[i]);
          break;
       case GL_LINEAR:
          for (i = magStart; i < magEnd; i++)
-            sample_1d_array_linear(ctx, samp, tObj->Image[0][tObj->BaseLevel],
+            sample_1d_array_linear(ctx, samp, _mesa_base_tex_image(tObj),
                                    texcoords[i], rgba[i]);
          break;
       default:
@@ -3749,7 +3750,7 @@ _swrast_choose_texture_sample_func( struct gl_context *ctx,
          }
          else {
             /* check for a few optimized cases */
-            const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+            const struct gl_texture_image *img = _mesa_base_tex_image(t);
             const struct swrast_texture_image *swImg =
                swrast_texture_image_const(img);
             texture_sample_func func;
diff --git a/mesalib/src/mesa/swrast/s_triangle.c b/mesalib/src/mesa/swrast/s_triangle.c
index 164906643..1d8e31c2e 100644
--- a/mesalib/src/mesa/swrast/s_triangle.c
+++ b/mesalib/src/mesa/swrast/s_triangle.c
@@ -37,6 +37,7 @@
 #include "main/mtypes.h"
 #include "main/state.h"
 #include "main/samplerobj.h"
+#include "main/teximage.h"
 #include "program/prog_instruction.h"
 
 #include "s_aatriangle.h"
@@ -127,7 +128,7 @@ _swrast_culltriangle( struct gl_context *ctx,
    const struct gl_texture_object *obj = 				\
       ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX];		\
    const struct gl_texture_image *texImg =				\
-      obj->Image[0][obj->BaseLevel];					\
+      _mesa_base_tex_image(obj);					\
    const struct swrast_texture_image *swImg =				\
       swrast_texture_image_const(texImg);				\
    const GLfloat twidth = (GLfloat) texImg->Width;			\
@@ -185,7 +186,7 @@ _swrast_culltriangle( struct gl_context *ctx,
    const struct gl_texture_object *obj = 				\
       ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX];		\
    const struct gl_texture_image *texImg = 				\
-       obj->Image[0][obj->BaseLevel]; 					\
+      _mesa_base_tex_image(obj);					\
    const struct swrast_texture_image *swImg =				\
       swrast_texture_image_const(texImg);				\
    const GLfloat twidth = (GLfloat) texImg->Width;			\
@@ -542,7 +543,7 @@ affine_span(struct gl_context *ctx, SWspan *span,
    const struct gl_texture_object *obj = 				\
       ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX];		\
    const struct gl_texture_image *texImg = 				\
-      obj->Image[0][obj->BaseLevel]; 					\
+      _mesa_base_tex_image(obj);					\
    const struct swrast_texture_image *swImg =				\
       swrast_texture_image_const(texImg);				\
    const GLfloat twidth = (GLfloat) texImg->Width;			\
@@ -811,7 +812,7 @@ fast_persp_span(struct gl_context *ctx, SWspan *span,
    const struct gl_texture_object *obj = 				\
       ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX];		\
    const struct gl_texture_image *texImg = 				\
-      obj->Image[0][obj->BaseLevel];			 		\
+      _mesa_base_tex_image(obj);					\
    const struct swrast_texture_image *swImg =				\
       swrast_texture_image_const(texImg);				\
    info.texture = (const GLchan *) swImg->ImageSlices[0];		\
@@ -1059,7 +1060,7 @@ _swrast_choose_triangle( struct gl_context *ctx )
          else
             samp = NULL;
 
-         texImg = texObj2D ? texObj2D->Image[0][texObj2D->BaseLevel] : NULL;
+         texImg = texObj2D ? _mesa_base_tex_image(texObj2D) : NULL;
          swImg = swrast_texture_image_const(texImg);
 
          format = texImg ? texImg->TexFormat : MESA_FORMAT_NONE;
diff --git a/mesalib/src/mesa/swrast/swrast.h b/mesalib/src/mesa/swrast/swrast.h
index ac3dbe304..a89dc6cda 100644
--- a/mesalib/src/mesa/swrast/swrast.h
+++ b/mesalib/src/mesa/swrast/swrast.h
@@ -132,16 +132,18 @@ _swrast_Bitmap( struct gl_context *ctx,
 		const GLubyte *bitmap );
 
 extern void
-_swrast_CopyPixels( struct gl_context *ctx,
-		    GLint srcx, GLint srcy,
-		    GLint destx, GLint desty,
-		    GLsizei width, GLsizei height,
-		    GLenum type );
+_swrast_CopyPixels(struct gl_context *ctx,
+                   GLint srcx, GLint srcy,
+                   GLint destx, GLint desty,
+                   GLsizei width, GLsizei height,
+                   GLenum type);
 
 extern GLboolean
 swrast_fast_copy_pixels(struct gl_context *ctx,
-			GLint srcX, GLint srcY, GLsizei width, GLsizei height,
-			GLint dstX, GLint dstY, GLenum type);
+                        struct gl_framebuffer *srcFb,
+                        struct gl_framebuffer *dstFb,
+                        GLint srcX, GLint srcY, GLsizei width, GLsizei height,
+                        GLint dstX, GLint dstY, GLenum type);
 
 extern void
 _swrast_DrawPixels( struct gl_context *ctx,
@@ -153,6 +155,8 @@ _swrast_DrawPixels( struct gl_context *ctx,
 
 extern void
 _swrast_BlitFramebuffer(struct gl_context *ctx,
+                        struct gl_framebuffer *readFb,
+                        struct gl_framebuffer *drawFb,
                         GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                         GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                         GLbitfield mask, GLenum filter);
diff --git a/mesalib/src/mesa/tnl/t_rasterpos.c b/mesalib/src/mesa/tnl/t_rasterpos.c
index e538c348f..2f52bb306 100644
--- a/mesalib/src/mesa/tnl/t_rasterpos.c
+++ b/mesalib/src/mesa/tnl/t_rasterpos.c
@@ -28,7 +28,7 @@
 #include "main/feedback.h"
 #include "main/light.h"
 #include "main/macros.h"
-#include "main/simple_list.h"
+#include "util/simple_list.h"
 #include "main/mtypes.h"
 
 #include "math/m_matrix.h"
diff --git a/mesalib/src/mesa/tnl/t_vb_light.c b/mesalib/src/mesa/tnl/t_vb_light.c
index ee80f1b82..f6884a464 100644
--- a/mesalib/src/mesa/tnl/t_vb_light.c
+++ b/mesalib/src/mesa/tnl/t_vb_light.c
@@ -29,7 +29,7 @@
 #include "main/light.h"
 #include "main/macros.h"
 #include "main/imports.h"
-#include "main/simple_list.h"
+#include "util/simple_list.h"
 #include "main/mtypes.h"
 
 #include "math/m_translate.h"
diff --git a/mesalib/src/mesa/tnl/t_vertex_generic.c b/mesalib/src/mesa/tnl/t_vertex_generic.c
index 8926c178d..079d473fc 100644
--- a/mesalib/src/mesa/tnl/t_vertex_generic.c
+++ b/mesalib/src/mesa/tnl/t_vertex_generic.c
@@ -29,7 +29,7 @@
 #include "main/glheader.h"
 #include "main/context.h"
 #include "main/colormac.h"
-#include "main/simple_list.h"
+#include "util/simple_list.h"
 #include "swrast/s_chan.h"
 #include "t_context.h"
 #include "t_vertex.h"
diff --git a/mesalib/src/mesa/tnl/t_vertex_sse.c b/mesalib/src/mesa/tnl/t_vertex_sse.c
index 4b52f7dbb..93128fbe6 100644
--- a/mesalib/src/mesa/tnl/t_vertex_sse.c
+++ b/mesalib/src/mesa/tnl/t_vertex_sse.c
@@ -28,7 +28,7 @@
 #include "main/glheader.h"
 #include "main/context.h"
 #include "main/colormac.h"
-#include "main/simple_list.h"
+#include "util/simple_list.h"
 #include "main/enums.h"
 #include "swrast/s_chan.h"
 #include "t_context.h"
diff --git a/mesalib/src/mesa/vbo/vbo_attrib_tmp.h b/mesalib/src/mesa/vbo/vbo_attrib_tmp.h
index ec66934fc..0c44540fc 100644
--- a/mesalib/src/mesa/vbo/vbo_attrib_tmp.h
+++ b/mesalib/src/mesa/vbo/vbo_attrib_tmp.h
@@ -210,6 +210,7 @@ static inline float conv_i2_to_norm_float(const struct gl_context *ctx, int i2)
       }								\
    } else if ((type) == GL_UNSIGNED_INT_10F_11F_11F_REV) {	\
       float res[4];						\
+      res[3] = 1;                                               \
       r11g11b10f_to_float3((arg), res);				\
       ATTR##val##FV((attr), res);				\
    } else							\
diff --git a/mesalib/src/mesa/vbo/vbo_exec_array.c b/mesalib/src/mesa/vbo/vbo_exec_array.c
index 6eac841b1..c16fe77ee 100644
--- a/mesalib/src/mesa/vbo/vbo_exec_array.c
+++ b/mesalib/src/mesa/vbo/vbo_exec_array.c
@@ -300,7 +300,8 @@ check_draw_elements_data(struct gl_context *ctx, GLsizei count, GLenum elemType,
 {
    struct gl_vertex_array_object *vao = ctx->Array.VAO;
    const void *elemMap;
-   GLint i, k;
+   GLint i;
+   GLuint k;
 
    if (_mesa_is_bufferobj(ctx->Array.VAO->IndexBufferObj)) {
       elemMap = ctx->Driver.MapBufferRange(ctx, 0,
@@ -596,7 +597,8 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
    prim[0].is_indirect = 0;
 
    /* Implement the primitive restart index */
-   if (ctx->Array.PrimitiveRestart && ctx->Array.RestartIndex < count) {
+   if (ctx->Array.PrimitiveRestart && !ctx->Array.PrimitiveRestartFixedIndex &&
+       ctx->Array.RestartIndex < count) {
       GLuint primCount = 0;
 
       if (ctx->Array.RestartIndex == start) {
diff --git a/mesalib/src/mesa/vbo/vbo_save_api.c b/mesalib/src/mesa/vbo/vbo_save_api.c
index 5055c221b..beef342be 100644
--- a/mesalib/src/mesa/vbo/vbo_save_api.c
+++ b/mesalib/src/mesa/vbo/vbo_save_api.c
@@ -375,11 +375,14 @@ _save_compile_vertex_list(struct gl_context *ctx)
     * being compiled.
     */
    node = (struct vbo_save_vertex_list *)
-      _mesa_dlist_alloc(ctx, save->opcode_vertex_list, sizeof(*node));
+      _mesa_dlist_alloc_aligned(ctx, save->opcode_vertex_list, sizeof(*node));
 
    if (!node)
       return;
 
+   /* Make sure the pointer is aligned to the size of a pointer */
+   assert((GLintptr) node % sizeof(void *) == 0);
+
    /* Duplicate our template, increment refcounts to the storage structs:
     */
    memcpy(node->attrsz, save->attrsz, sizeof(node->attrsz));
diff --git a/mesalib/src/util/Makefile.am b/mesalib/src/util/Makefile.am
index c7e183e8d..9af233059 100644
--- a/mesalib/src/util/Makefile.am
+++ b/mesalib/src/util/Makefile.am
@@ -31,12 +31,21 @@ libmesautil_la_CPPFLAGS = \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa \
+	-I$(top_srcdir)/src/gallium/include \
+	-I$(top_srcdir)/src/gallium/auxiliary \
+	$(SHA1_CFLAGS) \
 	$(VISIBILITY_CFLAGS)
 
 libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)
 
+if ENABLE_SHADER_CACHE
+libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
+endif
+
+libmesautil_la_LIBADD = $(SHA1_LIBS)
+
 check_PROGRAMS = u_atomic_test
 TESTS = $(check_PROGRAMS)
 
diff --git a/mesalib/src/util/Makefile.sources b/mesalib/src/util/Makefile.sources
index 5f87fc32a..560ea836a 100644
--- a/mesalib/src/util/Makefile.sources
+++ b/mesalib/src/util/Makefile.sources
@@ -1,4 +1,9 @@
+MESA_UTIL_SHADER_CACHE_FILES := \
+	mesa-sha1.c \
+	mesa-sha1.h
+
 MESA_UTIL_FILES :=	\
+	bitset.h \
 	format_srgb.h \
 	hash_table.c	\
 	hash_table.h \
@@ -9,6 +14,9 @@ MESA_UTIL_FILES :=	\
 	register_allocate.h \
 	rgtc.c \
 	rgtc.h \
+	set.c \
+	set.h \
+	simple_list.h \
 	strtod.cpp \
 	strtod.h \
 	texcompress_rgtc_tmp.h \
diff --git a/mesalib/src/util/SConscript b/mesalib/src/util/SConscript
index 34b9a2dea..84bd7a1e1 100644
--- a/mesalib/src/util/SConscript
+++ b/mesalib/src/util/SConscript
@@ -11,6 +11,8 @@ env.Prepend(CPPPATH = [
     '#src',
     '#src/mapi',
     '#src/mesa',
+    '#src/gallium/include',
+    '#src/gallium/auxiliary',
     '#src/util',
 ])
 
@@ -29,6 +31,11 @@ mesautil_sources = (
     source_lists['MESA_UTIL_GENERATED_FILES']
 )
 
+# XXX We don't yet have scons support for detecting any of the various
+# HAVE_SHA1_* definitions, so for now simply disable the shader cache.
+if False:
+    mesautil_sources += source_lists['MESA_UTIL_SHADER_CACHE_FILES']
+
 mesautil = env.ConvenienceLibrary(
     target = 'mesautil',
     source = mesautil_sources,
diff --git a/mesalib/src/mesa/main/bitset.h b/mesalib/src/util/bitset.h
index 601fd0ebf..17c5d5d25 100644
--- a/mesalib/src/mesa/main/bitset.h
+++ b/mesalib/src/util/bitset.h
@@ -31,18 +31,18 @@
 #ifndef BITSET_H
 #define BITSET_H
 
-#include "imports.h"
+#include "util/u_math.h"
 
 /****************************************************************************
  * generic bitset implementation
  */
 
-#define BITSET_WORD GLuint
+#define BITSET_WORD unsigned int
 #define BITSET_WORDBITS (sizeof (BITSET_WORD) * 8)
 
 /* bitset declarations
  */
-#define BITSET_WORDS(bits) (ALIGN(bits, BITSET_WORDBITS) / BITSET_WORDBITS)
+#define BITSET_WORDS(bits) (((bits) + BITSET_WORDBITS - 1) / BITSET_WORDBITS)
 #define BITSET_DECLARE(name, bits) BITSET_WORD name[BITSET_WORDS(bits)]
 
 /* bitset operations
diff --git a/mesalib/src/util/hash_table.c b/mesalib/src/util/hash_table.c
index 0ad038377..3247593c1 100644
--- a/mesalib/src/util/hash_table.c
+++ b/mesalib/src/util/hash_table.c
@@ -232,7 +232,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data);
 
 static void
-_mesa_hash_table_rehash(struct hash_table *ht, int new_size_index)
+_mesa_hash_table_rehash(struct hash_table *ht, unsigned new_size_index)
 {
    struct hash_table old_ht;
    struct hash_entry *table, *entry;
@@ -267,6 +267,7 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
                   const void *key, void *data)
 {
    uint32_t start_hash_address, hash_address;
+   struct hash_entry *available_entry = NULL;
 
    if (ht->entries >= ht->max_entries) {
       _mesa_hash_table_rehash(ht, ht->size_index + 1);
@@ -281,13 +282,11 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
       uint32_t double_hash;
 
       if (!entry_is_present(ht, entry)) {
-         if (entry_is_deleted(ht, entry))
-            ht->deleted_entries--;
-         entry->hash = hash;
-         entry->key = key;
-         entry->data = data;
-         ht->entries++;
-         return entry;
+         /* Stash the first available entry we find */
+         if (available_entry == NULL)
+            available_entry = entry;
+         if (entry_is_free(entry))
+            break;
       }
 
       /* Implement replacement when another insert happens
@@ -314,6 +313,16 @@ hash_table_insert(struct hash_table *ht, uint32_t hash,
       hash_address = (hash_address + double_hash) % ht->size;
    } while (hash_address != start_hash_address);
 
+   if (available_entry) {
+      if (entry_is_deleted(ht, available_entry))
+         ht->deleted_entries--;
+      available_entry->hash = hash;
+      available_entry->key = key;
+      available_entry->data = data;
+      ht->entries++;
+      return available_entry;
+   }
+
    /* We could hit here if a required resize failed. An unchecked-malloc
     * application could ignore this result.
     */
@@ -334,8 +343,8 @@ _mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data)
 }
 
 struct hash_entry *
-_mesa_hash_table_insert_with_hash(struct hash_table *ht, uint32_t hash,
-                                  const void *key, void *data)
+_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                   const void *key, void *data)
 {
    assert(ht->key_hash_function == NULL || hash == ht->key_hash_function(key));
    return hash_table_insert(ht, hash, key, data);
@@ -431,27 +440,18 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
 uint32_t
 _mesa_hash_data(const void *data, size_t size)
 {
-   uint32_t hash = 2166136261ul;
-   const uint8_t *bytes = data;
-
-   while (size-- != 0) {
-      hash ^= *bytes;
-      hash = hash * 0x01000193;
-      bytes++;
-   }
-
-   return hash;
+   return _mesa_fnv32_1a_accumulate_block(_mesa_fnv32_1a_offset_bias,
+                                          data, size);
 }
 
 /** FNV-1a string hash implementation */
 uint32_t
 _mesa_hash_string(const char *key)
 {
-   uint32_t hash = 2166136261ul;
+   uint32_t hash = _mesa_fnv32_1a_offset_bias;
 
    while (*key != 0) {
-      hash ^= *key;
-      hash = hash * 0x01000193;
+      hash = _mesa_fnv32_1a_accumulate(hash, *key);
       key++;
    }
 
diff --git a/mesalib/src/util/hash_table.h b/mesalib/src/util/hash_table.h
index 5561e1584..eb9dbc333 100644
--- a/mesalib/src/util/hash_table.h
+++ b/mesalib/src/util/hash_table.h
@@ -70,8 +70,8 @@ void _mesa_hash_table_set_deleted_key(struct hash_table *ht,
 struct hash_entry *
 _mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data);
 struct hash_entry *
-_mesa_hash_table_insert_with_hash(struct hash_table *ht, uint32_t hash,
-                                  const void *key, void *data);
+_mesa_hash_table_insert_pre_hashed(struct hash_table *ht, uint32_t hash,
+                                   const void *key, void *data);
 struct hash_entry *
 _mesa_hash_table_search(struct hash_table *ht, const void *key);
 struct hash_entry *
@@ -101,6 +101,25 @@ static inline uint32_t _mesa_hash_pointer(const void *pointer)
    return _mesa_hash_data(&pointer, sizeof(pointer));
 }
 
+static const uint32_t _mesa_fnv32_1a_offset_bias = 2166136261u;
+
+static inline uint32_t
+_mesa_fnv32_1a_accumulate_block(uint32_t hash, const void *data, size_t size)
+{
+   const uint8_t *bytes = (const uint8_t *)data;
+
+   while (size-- != 0) {
+      hash ^= *bytes;
+      hash = hash * 0x01000193;
+      bytes++;
+   }
+
+   return hash;
+}
+
+#define _mesa_fnv32_1a_accumulate(hash, expr) \
+   _mesa_fnv32_1a_accumulate_block(hash, &(expr), sizeof(expr))
+
 /**
  * This foreach function is safe against deletion (which just replaces
  * an entry's data with the deleted marker), but not against insertion
diff --git a/mesalib/src/util/macros.h b/mesalib/src/util/macros.h
index 5fc672953..eec8b9352 100644
--- a/mesalib/src/util/macros.h
+++ b/mesalib/src/util/macros.h
@@ -82,7 +82,7 @@ do {                        \
 #endif
 
 #ifndef unreachable
-#define unreachable(str)
+#define unreachable(str) assert(!str)
 #endif
 
 /**
diff --git a/mesalib/src/util/mesa-sha1.c b/mesalib/src/util/mesa-sha1.c
new file mode 100644
index 000000000..fa2819377
--- /dev/null
+++ b/mesalib/src/util/mesa-sha1.c
@@ -0,0 +1,316 @@
+/* Copyright © 2007 Carl Worth
+ * Copyright © 2009 Jeremy Huddleston, Julien Cristau, and Matthieu Herrb
+ * Copyright © 2009-2010 Mikhail Gusarov
+ * Copyright © 2012 Yaakov Selkowitz and Keith Packard
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "mesa-sha1.h"
+
+#if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
+	|| defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
+
+#include <sha1.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   SHA1_CTX *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+
+   SHA1Init(ctx);
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
+
+   SHA1Update(sha1_ctx, data, size);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
+
+   SHA1Final(result, sha1_ctx);
+   free(sha1_ctx);
+   return 1;
+}
+
+#elif defined(HAVE_SHA1_IN_COMMONCRYPTO)        /* Use CommonCrypto for SHA1 */
+
+#include <CommonCrypto/CommonDigest.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   CC_SHA1_CTX *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+
+   CC_SHA1_Init(ctx);
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
+
+   CC_SHA1_Update(sha1_ctx, data, size);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
+
+   CC_SHA1_Final(result, sha1_ctx);
+   free(sha1_ctx);
+   return 1;
+}
+
+#elif defined(HAVE_SHA1_IN_CRYPTOAPI)        /* Use CryptoAPI for SHA1 */
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <wincrypt.h>
+
+static HCRYPTPROV hProv;
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   HCRYPTHASH *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+
+   CryptAcquireContext(&hProv, NULL, MS_DEF_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
+   CryptCreateHash(hProv, CALG_SHA1, 0, 0, ctx);
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
+
+   CryptHashData(*hHash, data, size, 0);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
+   DWORD len = 20;
+
+   CryptGetHashParam(*hHash, HP_HASHVAL, result, &len, 0);
+   CryptDestroyHash(*hHash);
+   CryptReleaseContext(hProv, 0);
+   free(ctx);
+   return 1;
+}
+
+#elif defined(HAVE_SHA1_IN_LIBNETTLE)   /* Use libnettle for SHA1 */
+
+#include <nettle/sha.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   struct sha1_ctx *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+   sha1_init(ctx);
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   sha1_update((struct sha1_ctx *) ctx, size, data);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   sha1_digest((struct sha1_ctx *) ctx, 20, result);
+   free(ctx);
+   return 1;
+}
+
+#elif defined(HAVE_SHA1_IN_LIBGCRYPT)   /* Use libgcrypt for SHA1 */
+
+#include <gcrypt.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   static int init;
+   gcry_md_hd_t h;
+   gcry_error_t err;
+
+   if (!init) {
+      if (!gcry_check_version(NULL))
+         return NULL;
+      gcry_control(GCRYCTL_DISABLE_SECMEM, 0);
+      gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
+      init = 1;
+   }
+
+   err = gcry_md_open(&h, GCRY_MD_SHA1, 0);
+   if (err)
+      return NULL;
+   return (struct mesa_sha1 *) h;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
+
+   gcry_md_write(h, data, size);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
+
+   memcpy(result, gcry_md_read(h, GCRY_MD_SHA1), 20);
+   gcry_md_close(h);
+   return 1;
+}
+
+#elif defined(HAVE_SHA1_IN_LIBSHA1)     /* Use libsha1 */
+
+#include <libsha1.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   sha1_ctx *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+   sha1_begin(ctx);
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   sha1_hash(data, size, (sha1_ctx *) ctx);
+   return 1;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   sha1_end(result, (sha1_ctx *) ctx);
+   free(ctx);
+   return 1;
+}
+
+#else                           /* Use OpenSSL's libcrypto */
+
+#include <stddef.h>             /* buggy openssl/sha.h wants size_t */
+#include <openssl/sha.h>
+
+struct mesa_sha1 *
+_mesa_sha1_init(void)
+{
+   int ret;
+   SHA_CTX *ctx = malloc(sizeof(*ctx));
+
+   if (!ctx)
+      return NULL;
+   ret = SHA1_Init(ctx);
+   if (!ret) {
+      free(ctx);
+      return NULL;
+   }
+   return (struct mesa_sha1 *) ctx;
+}
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
+{
+   int ret;
+   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
+
+   ret = SHA1_Update(sha_ctx, data, size);
+   if (!ret)
+      free(sha_ctx);
+   return ret;
+}
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
+{
+   int ret;
+   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
+
+   ret = SHA1_Final(result, (SHA_CTX *) sha_ctx);
+   free(sha_ctx);
+   return ret;
+}
+
+#endif
+
+void
+_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
+{
+   struct mesa_sha1 *ctx;
+
+   ctx = _mesa_sha1_init();
+   _mesa_sha1_update(ctx, data, size);
+   _mesa_sha1_final(ctx, result);
+}
+
+char *
+_mesa_sha1_format(char *buf, const unsigned char *sha1)
+{
+   static const char hex_digits[] = "0123456789abcdef";
+   int i;
+
+   for (i = 0; i < 40; i += 2) {
+      buf[i] = hex_digits[sha1[i >> 1] >> 4];
+      buf[i + 1] = hex_digits[sha1[i >> 1] & 0x0f];
+   }
+   buf[i] = '\0';
+
+   return buf;
+}
diff --git a/mesalib/src/util/mesa-sha1.h b/mesalib/src/util/mesa-sha1.h
new file mode 100644
index 000000000..1599405cd
--- /dev/null
+++ b/mesalib/src/util/mesa-sha1.h
@@ -0,0 +1,53 @@
+/* Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SHA1_H
+#define SHA1_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+
+struct mesa_sha1;
+
+struct mesa_sha1 *
+_mesa_sha1_init(void);
+
+int
+_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size);
+
+int
+_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]);
+
+char *
+_mesa_sha1_format(char *buf, const unsigned char *sha1);
+
+void
+_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif
+
+#endif
diff --git a/mesalib/src/util/register_allocate.c b/mesalib/src/util/register_allocate.c
index af7a20c09..684ee5d6c 100644
--- a/mesalib/src/util/register_allocate.c
+++ b/mesalib/src/util/register_allocate.c
@@ -76,7 +76,7 @@
 #include "main/imports.h"
 #include "main/macros.h"
 #include "main/mtypes.h"
-#include "main/bitset.h"
+#include "util/bitset.h"
 #include "register_allocate.h"
 
 #define NO_REG ~0U
diff --git a/mesalib/src/mesa/main/set.c b/mesalib/src/util/set.c
index 52c1dabd8..f01f8699a 100644
--- a/mesalib/src/mesa/main/set.c
+++ b/mesalib/src/util/set.c
@@ -33,10 +33,11 @@
  */
 
 #include <stdlib.h>
+#include <assert.h>
 
 #include "macros.h"
+#include "ralloc.h"
 #include "set.h"
-#include "util/ralloc.h"
 
 /*
  * From Knuth -- a good choice for hash/rehash values is p, p-2 where
@@ -103,6 +104,7 @@ entry_is_present(struct set_entry *entry)
 
 struct set *
 _mesa_set_create(void *mem_ctx,
+                 uint32_t (*key_hash_function)(const void *key),
                  bool (*key_equals_function)(const void *a,
                                              const void *b))
 {
@@ -116,6 +118,7 @@ _mesa_set_create(void *mem_ctx,
    ht->size = hash_sizes[ht->size_index].size;
    ht->rehash = hash_sizes[ht->size_index].rehash;
    ht->max_entries = hash_sizes[ht->size_index].max_entries;
+   ht->key_hash_function = key_hash_function;
    ht->key_equals_function = key_equals_function;
    ht->table = rzalloc_array(ht, struct set_entry, ht->size);
    ht->entries = 0;
@@ -157,8 +160,8 @@ _mesa_set_destroy(struct set *ht, void (*delete_function)(struct set_entry *entr
  *
  * Returns NULL if no entry is found.
  */
-struct set_entry *
-_mesa_set_search(const struct set *ht, uint32_t hash, const void *key)
+static struct set_entry *
+set_search(const struct set *ht, uint32_t hash, const void *key)
 {
    uint32_t hash_address;
 
@@ -184,8 +187,27 @@ _mesa_set_search(const struct set *ht, uint32_t hash, const void *key)
    return NULL;
 }
 
+struct set_entry *
+_mesa_set_search(const struct set *set, const void *key)
+{
+   assert(set->key_hash_function);
+   return set_search(set, set->key_hash_function(key), key);
+}
+
+struct set_entry *
+_mesa_set_search_pre_hashed(const struct set *set, uint32_t hash,
+                            const void *key)
+{
+   assert(set->key_hash_function == NULL ||
+          hash == set->key_hash_function(key));
+   return set_search(set, hash, key);
+}
+
+static struct set_entry *
+set_add(struct set *ht, uint32_t hash, const void *key);
+
 static void
-set_rehash(struct set *ht, int new_size_index)
+set_rehash(struct set *ht, unsigned new_size_index)
 {
    struct set old_ht;
    struct set_entry *table, *entry;
@@ -212,7 +234,7 @@ set_rehash(struct set *ht, int new_size_index)
         entry != old_ht.table + old_ht.size;
         entry++) {
       if (entry_is_present(entry)) {
-         _mesa_set_add(ht, entry->hash, entry->key);
+         set_add(ht, entry->hash, entry->key);
       }
    }
 
@@ -225,10 +247,11 @@ set_rehash(struct set *ht, int new_size_index)
  * Note that insertion may rearrange the table on a resize or rehash,
  * so previously found hash_entries are no longer valid after this function.
  */
-struct set_entry *
-_mesa_set_add(struct set *ht, uint32_t hash, const void *key)
+static struct set_entry *
+set_add(struct set *ht, uint32_t hash, const void *key)
 {
    uint32_t hash_address;
+   struct set_entry *available_entry = NULL;
 
    if (ht->entries >= ht->max_entries) {
       set_rehash(ht, ht->size_index + 1);
@@ -242,12 +265,11 @@ _mesa_set_add(struct set *ht, uint32_t hash, const void *key)
       uint32_t double_hash;
 
       if (!entry_is_present(entry)) {
-         if (entry_is_deleted(entry))
-            ht->deleted_entries--;
-         entry->hash = hash;
-         entry->key = key;
-         ht->entries++;
-         return entry;
+         /* Stash the first available entry we find */
+         if (available_entry == NULL)
+            available_entry = entry;
+         if (entry_is_free(entry))
+            break;
       }
 
       /* Implement replacement when another insert happens
@@ -271,12 +293,36 @@ _mesa_set_add(struct set *ht, uint32_t hash, const void *key)
       hash_address = (hash_address + double_hash) % ht->size;
    } while (hash_address != hash % ht->size);
 
+   if (available_entry) {
+      if (entry_is_deleted(available_entry))
+         ht->deleted_entries--;
+      available_entry->hash = hash;
+      available_entry->key = key;
+      ht->entries++;
+      return available_entry;
+   }
+
    /* We could hit here if a required resize failed. An unchecked-malloc
     * application could ignore this result.
     */
    return NULL;
 }
 
+struct set_entry *
+_mesa_set_add(struct set *set, const void *key)
+{
+   assert(set->key_hash_function);
+   return set_add(set, set->key_hash_function(key), key);
+}
+
+struct set_entry *
+_mesa_set_add_pre_hashed(struct set *set, uint32_t hash, const void *key)
+{
+   assert(set->key_hash_function == NULL ||
+          hash == set->key_hash_function(key));
+   return set_add(set, hash, key);
+}
+
 /**
  * This function deletes the given hash table entry.
  *
@@ -343,4 +389,3 @@ _mesa_set_random_entry(struct set *ht,
 
    return NULL;
 }
-
diff --git a/mesalib/src/mesa/main/set.h b/mesalib/src/util/set.h
index 206d0c4d2..9acd2c28c 100644
--- a/mesalib/src/mesa/main/set.h
+++ b/mesalib/src/util/set.h
@@ -43,6 +43,7 @@ struct set_entry {
 struct set {
    void *mem_ctx;
    struct set_entry *table;
+   uint32_t (*key_hash_function)(const void *key);
    bool (*key_equals_function)(const void *a, const void *b);
    uint32_t size;
    uint32_t rehash;
@@ -54,6 +55,7 @@ struct set {
 
 struct set *
 _mesa_set_create(void *mem_ctx,
+                 uint32_t (*key_hash_function)(const void *key),
                  bool (*key_equals_function)(const void *a,
                                              const void *b));
 void
@@ -61,11 +63,15 @@ _mesa_set_destroy(struct set *set,
                   void (*delete_function)(struct set_entry *entry));
 
 struct set_entry *
-_mesa_set_add(struct set *set, uint32_t hash, const void *key);
+_mesa_set_add(struct set *set, const void *key);
+struct set_entry *
+_mesa_set_add_pre_hashed(struct set *set, uint32_t hash, const void *key);
 
 struct set_entry *
-_mesa_set_search(const struct set *set, uint32_t hash,
-                 const void *key);
+_mesa_set_search(const struct set *set, const void *key);
+struct set_entry *
+_mesa_set_search_pre_hashed(const struct set *set, uint32_t hash,
+                            const void *key);
 
 void
 _mesa_set_remove(struct set *set, struct set_entry *entry);
diff --git a/mesalib/src/mesa/main/simple_list.h b/mesalib/src/util/simple_list.h
index 903432dce..5f261612a 100644
--- a/mesalib/src/mesa/main/simple_list.h
+++ b/mesalib/src/util/simple_list.h
@@ -55,6 +55,7 @@ struct simple_node {
 do {						\
    (elem)->next->prev = (elem)->prev;		\
    (elem)->prev->next = (elem)->next;		\
+   make_empty_list(elem);			\
 } while (0)
 
 /**
diff --git a/mesalib/src/util/u_atomic.h b/mesalib/src/util/u_atomic.h
index 401003638..d15398e1e 100644
--- a/mesalib/src/util/u_atomic.h
+++ b/mesalib/src/util/u_atomic.h
@@ -39,6 +39,7 @@
 #define p_atomic_dec_zero(v) (__sync_sub_and_fetch((v), 1) == 0)
 #define p_atomic_inc(v) (void) __sync_add_and_fetch((v), 1)
 #define p_atomic_dec(v) (void) __sync_sub_and_fetch((v), 1)
+#define p_atomic_add(v, i) (void) __sync_add_and_fetch((v), (i))
 #define p_atomic_inc_return(v) __sync_add_and_fetch((v), 1)
 #define p_atomic_dec_return(v) __sync_sub_and_fetch((v), 1)
 #define p_atomic_cmpxchg(v, old, _new) \
@@ -60,6 +61,7 @@
 #define p_atomic_dec_zero(_v) (p_atomic_dec_return(_v) == 0)
 #define p_atomic_inc(_v) ((void) p_atomic_inc_return(_v))
 #define p_atomic_dec(_v) ((void) p_atomic_dec_return(_v))
+#define p_atomic_add(_v, _i) (*(_v) = *(_v) + (_i))
 #define p_atomic_inc_return(_v) (++(*(_v)))
 #define p_atomic_dec_return(_v) (--(*(_v)))
 #define p_atomic_cmpxchg(_v, _old, _new) (*(_v) == (_old) ? (*(_v) = (_new), (_old)) : *(_v))
@@ -71,8 +73,8 @@
 
 #define PIPE_ATOMIC "MSVC Intrinsics"
 
-/* We use the Windows header's Interlocked* functions instead of the
- * _Interlocked* intrinsics wherever we can, as support for the latter varies
+/* We use the Windows header's Interlocked*64 functions instead of the
+ * _Interlocked*64 intrinsics wherever we can, as support for the latter varies
  * with target CPU, whereas Windows headers take care of all portability
  * issues: using intrinsics where available, falling back to library
  * implementations where not.
@@ -84,7 +86,64 @@
 #include <intrin.h>
 #include <assert.h>
 
-#pragma intrinsic(_InterlockedCompareExchange8)
+#if _MSC_VER < 1600
+
+/* Implement _InterlockedCompareExchange8 in terms of _InterlockedCompareExchange16 */
+static __inline char
+_InterlockedCompareExchange8(char volatile *destination8, char exchange8, char comparand8)
+{
+   INT_PTR destinationAddr = (INT_PTR)destination8;
+   short volatile *destination16 = (short volatile *)(destinationAddr & ~1);
+   const short shift8 = (destinationAddr & 1) * 8;
+   const short mask8 = 0xff << shift8;
+   short initial16 = *destination16;
+   char initial8 = initial16 >> shift8;
+   while (initial8 == comparand8) {
+      /* initial *destination8 matches, so try exchange it while keeping the
+       * neighboring byte untouched */
+      short exchange16 = (initial16 & ~mask8) | ((short)exchange8 << shift8);
+      short comparand16 = initial16;
+      short initial16 = _InterlockedCompareExchange16(destination16, exchange16, comparand16);
+      if (initial16 == comparand16) {
+         /* succeeded */
+         return comparand8;
+      }
+      /* something changed, retry with the new initial value */
+      initial8 = initial16 >> shift8;
+   }
+   return initial8;
+}
+
+/* Implement _InterlockedExchangeAdd16 in terms of _InterlockedCompareExchange16 */
+static __inline short
+_InterlockedExchangeAdd16(short volatile *addend, short value)
+{
+   short initial = *addend;
+   short comparand;
+   do {
+      short exchange = initial + value;
+      comparand = initial;
+      /* if *addend==comparand then *addend=exchange, return original *addend */
+      initial = _InterlockedCompareExchange16(addend, exchange, comparand);
+   } while(initial != comparand);
+   return comparand;
+}
+
+/* Implement _InterlockedExchangeAdd8 in terms of _InterlockedCompareExchange8 */
+static __inline char
+_InterlockedExchangeAdd8(char volatile *addend, char value)
+{
+   char initial = *addend;
+   char comparand;
+   do {
+      char exchange = initial + value;
+      comparand = initial;
+      initial = _InterlockedCompareExchange8(addend, exchange, comparand);
+   } while(initial != comparand);
+   return comparand;
+}
+
+#endif /* _MSC_VER < 1600 */
 
 /* MSVC supports decltype keyword, but it's only supported on C++ and doesn't
  * quite work here; and if a C++-only solution is worthwhile, then it would be
@@ -102,25 +161,32 @@
    ((void) p_atomic_inc_return(_v))
 
 #define p_atomic_inc_return(_v) (\
-   sizeof *(_v) == sizeof(short)   ? InterlockedIncrement16((short *)  (_v)) : \
-   sizeof *(_v) == sizeof(long)    ? InterlockedIncrement  ((long *)   (_v)) : \
-   sizeof *(_v) == sizeof(__int64) ? InterlockedIncrement64((__int64 *)(_v)) : \
+   sizeof *(_v) == sizeof(short)   ? _InterlockedIncrement16((short *)  (_v)) : \
+   sizeof *(_v) == sizeof(long)    ? _InterlockedIncrement  ((long *)   (_v)) : \
+   sizeof *(_v) == sizeof(__int64) ? InterlockedIncrement64 ((__int64 *)(_v)) : \
                                      (assert(!"should not get here"), 0))
 
 #define p_atomic_dec(_v) \
    ((void) p_atomic_dec_return(_v))
 
 #define p_atomic_dec_return(_v) (\
-   sizeof *(_v) == sizeof(short)   ? InterlockedDecrement16((short *)  (_v)) : \
-   sizeof *(_v) == sizeof(long)    ? InterlockedDecrement  ((long *)   (_v)) : \
-   sizeof *(_v) == sizeof(__int64) ? InterlockedDecrement64((__int64 *)(_v)) : \
+   sizeof *(_v) == sizeof(short)   ? _InterlockedDecrement16((short *)  (_v)) : \
+   sizeof *(_v) == sizeof(long)    ? _InterlockedDecrement  ((long *)   (_v)) : \
+   sizeof *(_v) == sizeof(__int64) ? InterlockedDecrement64 ((__int64 *)(_v)) : \
+                                     (assert(!"should not get here"), 0))
+
+#define p_atomic_add(_v, _i) (\
+   sizeof *(_v) == sizeof(char)    ? _InterlockedExchangeAdd8 ((char *)   (_v), (_i)) : \
+   sizeof *(_v) == sizeof(short)   ? _InterlockedExchangeAdd16((short *)  (_v), (_i)) : \
+   sizeof *(_v) == sizeof(long)    ? _InterlockedExchangeAdd  ((long *)   (_v), (_i)) : \
+   sizeof *(_v) == sizeof(__int64) ? InterlockedExchangeAdd64((__int64 *)(_v), (_i)) : \
                                      (assert(!"should not get here"), 0))
 
 #define p_atomic_cmpxchg(_v, _old, _new) (\
-   sizeof *(_v) == sizeof(char)    ? _InterlockedCompareExchange8((char *)   (_v), (char)   (_new), (char)   (_old)) : \
-   sizeof *(_v) == sizeof(short)   ? InterlockedCompareExchange16((short *)  (_v), (short)  (_new), (short)  (_old)) : \
-   sizeof *(_v) == sizeof(long)    ? InterlockedCompareExchange  ((long *)   (_v), (long)   (_new), (long)   (_old)) : \
-   sizeof *(_v) == sizeof(__int64) ? InterlockedCompareExchange64((__int64 *)(_v), (__int64)(_new), (__int64)(_old)) : \
+   sizeof *(_v) == sizeof(char)    ? _InterlockedCompareExchange8 ((char *)   (_v), (char)   (_new), (char)   (_old)) : \
+   sizeof *(_v) == sizeof(short)   ? _InterlockedCompareExchange16((short *)  (_v), (short)  (_new), (short)  (_old)) : \
+   sizeof *(_v) == sizeof(long)    ? _InterlockedCompareExchange  ((long *)   (_v), (long)   (_new), (long)   (_old)) : \
+   sizeof *(_v) == sizeof(__int64) ? InterlockedCompareExchange64 ((__int64 *)(_v), (__int64)(_new), (__int64)(_old)) : \
                                      (assert(!"should not get here"), 0))
 
 #endif
@@ -149,7 +215,7 @@
    sizeof(*v) == sizeof(uint64_t) ? atomic_inc_64((uint64_t *)(v)) : \
                                     (assert(!"should not get here"), 0))
 
-#define p_atomic_inc_return(v) ((typeof(*v)) \
+#define p_atomic_inc_return(v) ((__typeof(*v)) \
    sizeof(*v) == sizeof(uint8_t)  ? atomic_inc_8_nv ((uint8_t  *)(v)) : \
    sizeof(*v) == sizeof(uint16_t) ? atomic_inc_16_nv((uint16_t *)(v)) : \
    sizeof(*v) == sizeof(uint32_t) ? atomic_inc_32_nv((uint32_t *)(v)) : \
@@ -163,14 +229,21 @@
    sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64((uint64_t *)(v)) : \
                                     (assert(!"should not get here"), 0))
 
-#define p_atomic_dec_return(v) ((typeof(*v)) \
+#define p_atomic_dec_return(v) ((__typeof(*v)) \
    sizeof(*v) == sizeof(uint8_t)  ? atomic_dec_8_nv ((uint8_t  *)(v)) : \
    sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16_nv((uint16_t *)(v)) : \
    sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32_nv((uint32_t *)(v)) : \
    sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64_nv((uint64_t *)(v)) : \
                                     (assert(!"should not get here"), 0))
 
-#define p_atomic_cmpxchg(v, old, _new) ((typeof(*v)) \
+#define p_atomic_add(v, i) ((void)				     \
+   sizeof(*v) == sizeof(uint8_t)  ? atomic_add_8 ((uint8_t  *)(v), (i)) : \
+   sizeof(*v) == sizeof(uint16_t) ? atomic_add_16((uint16_t *)(v), (i)) : \
+   sizeof(*v) == sizeof(uint32_t) ? atomic_add_32((uint32_t *)(v), (i)) : \
+   sizeof(*v) == sizeof(uint64_t) ? atomic_add_64((uint64_t *)(v), (i)) : \
+                                    (assert(!"should not get here"), 0))
+
+#define p_atomic_cmpxchg(v, old, _new) ((__typeof(*v)) \
    sizeof(*v) == sizeof(uint8_t)  ? atomic_cas_8 ((uint8_t  *)(v), (uint8_t )(old), (uint8_t )(_new)) : \
    sizeof(*v) == sizeof(uint16_t) ? atomic_cas_16((uint16_t *)(v), (uint16_t)(old), (uint16_t)(_new)) : \
    sizeof(*v) == sizeof(uint32_t) ? atomic_cas_32((uint32_t *)(v), (uint32_t)(old), (uint32_t)(_new)) : \
diff --git a/mesalib/src/util/u_atomic_test.c b/mesalib/src/util/u_atomic_test.c
index 4845e753e..939cfe445 100644
--- a/mesalib/src/util/u_atomic_test.c
+++ b/mesalib/src/util/u_atomic_test.c
@@ -37,8 +37,9 @@
 #include "u_atomic.h"
 
 
-#define test_atomic_cmpxchg(type, ones) \
-   static void test_atomic_cmpxchg_##type (void) { \
+/* Test only assignment-like operations, which are supported on all types */
+#define test_atomic_assign(type, ones) \
+   static void test_atomic_assign_##type (void) { \
       type v, r; \
       \
       p_atomic_set(&v, ones); \
@@ -59,14 +60,33 @@
    }
 
 
+/* Test arithmetic operations that are supported on 8 bits integer types */
+#define test_atomic_8bits(type, ones) \
+   test_atomic_assign(type, ones) \
+   \
+   static void test_atomic_8bits_##type (void) { \
+      type v, r; \
+      \
+      test_atomic_assign_##type(); \
+      \
+      v = 23; \
+      p_atomic_add(&v, 42); \
+      r = p_atomic_read(&v); \
+      assert(r == 65 && "p_atomic_add"); \
+      \
+      (void) r; \
+   }
+
+
+/* Test all operations */
 #define test_atomic(type, ones) \
-   test_atomic_cmpxchg(type, ones) \
+   test_atomic_8bits(type, ones) \
    \
    static void test_atomic_##type (void) { \
       type v, r; \
       bool b; \
       \
-      test_atomic_cmpxchg_##type(); \
+      test_atomic_8bits_##type(); \
       \
       v = 2; \
       b = p_atomic_dec_zero(&v); \
@@ -112,9 +132,9 @@ test_atomic(uint32_t, UINT32_C(0xffffffff))
 test_atomic(int64_t, INT64_C(-1))
 test_atomic(uint64_t, UINT64_C(0xffffffffffffffff))
 
-test_atomic_cmpxchg(int8_t, INT8_C(-1))
-test_atomic_cmpxchg(uint8_t, UINT8_C(0xff))
-test_atomic_cmpxchg(bool, true)
+test_atomic_8bits(int8_t, INT8_C(-1))
+test_atomic_8bits(uint8_t, UINT8_C(0xff))
+test_atomic_assign(bool, true)
 
 int
 main()
@@ -129,9 +149,9 @@ main()
    test_atomic_int64_t();
    test_atomic_uint64_t();
 
-   test_atomic_cmpxchg_int8_t();
-   test_atomic_cmpxchg_uint8_t();
-   test_atomic_cmpxchg_bool();
+   test_atomic_8bits_int8_t();
+   test_atomic_8bits_uint8_t();
+   test_atomic_assign_bool();
 
    return 0;
 }
author	marha <marha@users.sourceforge.net>	2015-02-22 14:31:16 +0100
committer	marha <marha@users.sourceforge.net>	2015-02-22 14:31:16 +0100
commit	f1c2db43dcf35d2cf4715390bd2391c28e42a8c2 (patch)
tree	46b537271afe0f6534231b1bd4cc4f91ae1fb446 /mesalib/src
parent	5e5a48ff8cd08f123601cd0625ca62a86675aac9 (diff)
download	vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.tar.gz vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.tar.bz2 vcxsrv-f1c2db43dcf35d2cf4715390bd2391c28e42a8c2.zip