8 files changed, 1140 insertions, 186 deletions
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index 39790ec8e..3dcb53702 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -303,9 +303,6 @@ public:
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0, src_reg src1);
 
-   void emit_scs(ir_instruction *ir, enum prog_opcode op,
-		 dst_reg dst, const src_reg &src);
-
    bool try_emit_mad(ir_expression *ir,
 			  int mul_operand);
    bool try_emit_mad_for_and_not(ir_expression *ir,
@@ -479,101 +476,6 @@ ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
    emit_scalar(ir, op, dst, src0, undef);
 }
 
-/**
- * Emit an OPCODE_SCS instruction
- *
- * The \c SCS opcode functions a bit differently than the other Mesa (or
- * ARB_fragment_program) opcodes.  Instead of splatting its result across all
- * four components of the destination, it writes one value to the \c x
- * component and another value to the \c y component.
- *
- * \param ir        IR instruction being processed
- * \param op        Either \c OPCODE_SIN or \c OPCODE_COS depending on which
- *                  value is desired.
- * \param dst       Destination register
- * \param src       Source register
- */
-void
-ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
-			     dst_reg dst,
-			     const src_reg &src)
-{
-   /* Vertex programs cannot use the SCS opcode.
-    */
-   if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
-      emit_scalar(ir, op, dst, src);
-      return;
-   }
-
-   const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
-   const unsigned scs_mask = (1U << component);
-   int done_mask = ~dst.writemask;
-   src_reg tmp;
-
-   assert(op == OPCODE_SIN || op == OPCODE_COS);
-
-   /* If there are compnents in the destination that differ from the component
-    * that will be written by the SCS instrution, we'll need a temporary.
-    */
-   if (scs_mask != unsigned(dst.writemask)) {
-      tmp = get_temp(glsl_type::vec4_type);
-   }
-
-   for (unsigned i = 0; i < 4; i++) {
-      unsigned this_mask = (1U << i);
-      src_reg src0 = src;
-
-      if ((done_mask & this_mask) != 0)
-	 continue;
-
-      /* The source swizzle specified which component of the source generates
-       * sine / cosine for the current component in the destination.  The SCS
-       * instruction requires that this value be swizzle to the X component.
-       * Replace the current swizzle with a swizzle that puts the source in
-       * the X component.
-       */
-      unsigned src0_swiz = GET_SWZ(src.swizzle, i);
-
-      src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
-				   src0_swiz, src0_swiz);
-      for (unsigned j = i + 1; j < 4; j++) {
-	 /* If there is another enabled component in the destination that is
-	  * derived from the same inputs, generate its value on this pass as
-	  * well.
-	  */
-	 if (!(done_mask & (1 << j)) &&
-	     GET_SWZ(src0.swizzle, j) == src0_swiz) {
-	    this_mask |= (1 << j);
-	 }
-      }
-
-      if (this_mask != scs_mask) {
-	 ir_to_mesa_instruction *inst;
-	 dst_reg tmp_dst = dst_reg(tmp);
-
-	 /* Emit the SCS instruction.
-	  */
-	 inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
-	 inst->dst.writemask = scs_mask;
-
-	 /* Move the result of the SCS instruction to the desired location in
-	  * the destination.
-	  */
-	 tmp.swizzle = MAKE_SWIZZLE4(component, component,
-				     component, component);
-	 inst = emit(ir, OPCODE_SCS, dst, tmp);
-	 inst->dst.writemask = this_mask;
-      } else {
-	 /* Emit the SCS instruction to write directly to the destination.
-	  */
-	 ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
-	 inst->dst.writemask = scs_mask;
-      }
-
-      done_mask |= this_mask;
-   }
-}
-
 src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
@@ -1122,12 +1024,6 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
    case ir_unop_cos:
       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
       break;
-   case ir_unop_sin_reduced:
-      emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
-      break;
-   case ir_unop_cos_reduced:
-      emit_scs(ir, OPCODE_COS, result_dst, op[0]);
-      break;
 
    case ir_unop_dFdx:
       emit(ir, OPCODE_DDX, result_dst, op[0]);
diff --git a/mesalib/src/mesa/program/prog_execute.c b/mesalib/src/mesa/program/prog_execute.c
index dc4919ae8..16e8e340d 100644
--- a/mesalib/src/mesa/program/prog_execute.c
+++ b/mesalib/src/mesa/program/prog_execute.c
@@ -37,7 +37,6 @@
 
 #include "c99_math.h"
 #include "main/glheader.h"
-#include "main/colormac.h"
 #include "main/macros.h"
 #include "prog_execute.h"
 #include "prog_instruction.h"
diff --git a/mesalib/src/mesa/program/prog_instruction.h b/mesalib/src/mesa/program/prog_instruction.h
index ab3acbc02..96da198f8 100644
--- a/mesalib/src/mesa/program/prog_instruction.h
+++ b/mesalib/src/mesa/program/prog_instruction.h
@@ -59,6 +59,8 @@
 #define SWIZZLE_NOOP           MAKE_SWIZZLE4(0,1,2,3)
 #define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
 #define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+/** Determine if swz contains SWIZZLE_ZERO/ONE/NIL for any components. */
+#define HAS_EXTENDED_SWIZZLE(swz) (swz & 0x924)
 
 #define SWIZZLE_XYZW MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)
 #define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)
diff --git a/mesalib/src/mesa/program/prog_parameter.c b/mesalib/src/mesa/program/prog_parameter.c
index cdfe25145..53e9813e6 100644
--- a/mesalib/src/mesa/program/prog_parameter.c
+++ b/mesalib/src/mesa/program/prog_parameter.c
@@ -190,40 +190,6 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
 
 
 /**
- * Add a new named constant to the parameter list.
- * This will be used when the program contains something like this:
- *    PARAM myVals = { 0, 1, 2, 3 };
- *
- * \param paramList  the parameter list
- * \param name  the name for the constant
- * \param values  four float values
- * \return index/position of the new parameter in the parameter list
- */
-GLint
-_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const gl_constant_value values[4],
-                         GLuint size)
-{
-   /* first check if this is a duplicate constant */
-   GLint pos;
-   for (pos = 0; pos < (GLint)paramList->NumParameters; pos++) {
-      const gl_constant_value *pvals = paramList->ParameterValues[pos];
-      if (pvals[0].u == values[0].u &&
-          pvals[1].u == values[1].u &&
-          pvals[2].u == values[2].u &&
-          pvals[3].u == values[3].u &&
-          strcmp(paramList->Parameters[pos].Name, name) == 0) {
-         /* Same name and value is already in the param list - reuse it */
-         return pos;
-      }
-   }
-   /* not found, add new parameter */
-   return _mesa_add_parameter(paramList, PROGRAM_CONSTANT, name,
-                              size, GL_NONE, values, NULL);
-}
-
-
-/**
  * Add a new unnamed constant to the parameter list.  This will be used
  * when a fragment/vertex program contains something like this:
  *    MOV r, { 0, 1, 2, 3 };
@@ -303,28 +269,6 @@ _mesa_add_unnamed_constant(struct gl_program_parameter_list *paramList,
                                            swizzleOut);
 }
 
-#if 0 /* not used yet */
-/**
- * Returns the number of 4-component registers needed to store a piece
- * of GL state.  For matrices this may be as many as 4 registers,
- * everything else needs
- * just 1 register.
- */
-static GLuint
-sizeof_state_reference(const GLint *stateTokens)
-{
-   if (stateTokens[0] == STATE_MATRIX) {
-      GLuint rows = stateTokens[4] - stateTokens[3] + 1;
-      assert(rows >= 1);
-      assert(rows <= 4);
-      return rows;
-   }
-   else {
-      return 1;
-   }
-}
-#endif
-
 
 /**
  * Add a new state reference to the parameter list.
@@ -365,22 +309,6 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
 
 
 /**
- * Lookup a parameter value by name in the given parameter list.
- * \return pointer to the float[4] values.
- */
-gl_constant_value *
-_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
-                             GLsizei nameLen, const char *name)
-{
-   GLint i = _mesa_lookup_parameter_index(paramList, nameLen, name);
-   if (i < 0)
-      return NULL;
-   else
-      return paramList->ParameterValues[i];
-}
-
-
-/**
  * Given a program parameter name, find its position in the list of parameters.
  * \param paramList  the parameter list to search
  * \param nameLen  length of name (in chars).
diff --git a/mesalib/src/mesa/program/prog_parameter.h b/mesalib/src/mesa/program/prog_parameter.h
index 6b3b3c262..74a5fd918 100644
--- a/mesalib/src/mesa/program/prog_parameter.h
+++ b/mesalib/src/mesa/program/prog_parameter.h
@@ -120,11 +120,6 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList,
                     const gl_state_index state[STATE_LENGTH]);
 
 extern GLint
-_mesa_add_named_constant(struct gl_program_parameter_list *paramList,
-                         const char *name, const gl_constant_value values[4],
-                         GLuint size);
-
-extern GLint
 _mesa_add_typed_unnamed_constant(struct gl_program_parameter_list *paramList,
                            const gl_constant_value values[4], GLuint size,
                            GLenum datatype, GLuint *swizzleOut);
@@ -138,10 +133,6 @@ extern GLint
 _mesa_add_state_reference(struct gl_program_parameter_list *paramList,
                           const gl_state_index stateTokens[STATE_LENGTH]);
 
-extern gl_constant_value *
-_mesa_lookup_parameter_value(const struct gl_program_parameter_list *paramList,
-                             GLsizei nameLen, const char *name);
-
 extern GLint
 _mesa_lookup_parameter_index(const struct gl_program_parameter_list *paramList,
                              GLsizei nameLen, const char *name);
diff --git a/mesalib/src/mesa/program/prog_to_nir.c b/mesalib/src/mesa/program/prog_to_nir.c
new file mode 100644
index 000000000..c738f5073
--- /dev/null
+++ b/mesalib/src/mesa/program/prog_to_nir.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ * Copyright © 2014-2015 Broadcom
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "glsl/list.h"
+#include "main/imports.h"
+#include "util/ralloc.h"
+
+#include "prog_to_nir.h"
+#include "prog_instruction.h"
+#include "prog_parameter.h"
+#include "prog_print.h"
+
+/**
+ * \file prog_to_nir.c
+ *
+ * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
+ * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
+ * vertex processing.  Full GLSL support should use glsl_to_nir instead.
+ */
+
+struct ptn_compile {
+   const struct gl_program *prog;
+   nir_builder build;
+   bool error;
+
+   nir_variable *input_vars[VARYING_SLOT_MAX];
+   nir_variable *output_vars[VARYING_SLOT_MAX];
+   nir_register **output_regs;
+   nir_register **temp_regs;
+
+   nir_register *addr_reg;
+};
+
+#define SWIZ(X, Y, Z, W) \
+   (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
+#define ptn_swizzle(b, src, x, y, z, w) nir_swizzle(b, src, SWIZ(x, y, z, w), 4, true)
+#define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
+
+static nir_ssa_def *
+ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
+{
+   nir_builder *b = &c->build;
+
+   nir_alu_src src;
+   memset(&src, 0, sizeof(src));
+
+   if (dest->dest.is_ssa)
+      src.src = nir_src_for_ssa(&dest->dest.ssa);
+   else {
+      assert(!dest->dest.reg.indirect);
+      src.src = nir_src_for_reg(dest->dest.reg.reg);
+      src.src.reg.base_offset = dest->dest.reg.base_offset;
+   }
+
+   for (int i = 0; i < 4; i++)
+      src.swizzle[i] = i;
+
+   return nir_fmov_alu(b, src, 4);
+}
+
+static nir_alu_dest
+ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
+{
+   nir_alu_dest dest;
+
+   memset(&dest, 0, sizeof(dest));
+
+   switch (prog_dst->File) {
+   case PROGRAM_TEMPORARY:
+      dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
+      break;
+   case PROGRAM_OUTPUT:
+      dest.dest.reg.reg = c->output_regs[prog_dst->Index];
+      break;
+   case PROGRAM_ADDRESS:
+      assert(prog_dst->Index == 0);
+      dest.dest.reg.reg = c->addr_reg;
+      break;
+   case PROGRAM_UNDEFINED:
+      break;
+   }
+
+   dest.write_mask = prog_dst->WriteMask;
+   dest.saturate = false;
+
+   assert(!prog_dst->RelAddr);
+
+   return dest;
+}
+
+/**
+ * Multiply the contents of the ADDR register by 4 to convert from the number
+ * of vec4s to the number of floating point components.
+ */
+static nir_ssa_def *
+ptn_addr_reg_value(struct ptn_compile *c)
+{
+   nir_builder *b = &c->build;
+   nir_alu_src src;
+   memset(&src, 0, sizeof(src));
+   src.src = nir_src_for_reg(c->addr_reg);
+
+   return nir_imul(b, nir_fmov_alu(b, src, 1), nir_imm_int(b, 4));
+}
+
+static nir_ssa_def *
+ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
+{
+   nir_builder *b = &c->build;
+   nir_alu_src src;
+
+   memset(&src, 0, sizeof(src));
+
+   switch (prog_src->File) {
+   case PROGRAM_UNDEFINED:
+      return nir_imm_float(b, 0.0);
+   case PROGRAM_TEMPORARY:
+      assert(!prog_src->RelAddr && prog_src->Index >= 0);
+      src.src.reg.reg = c->temp_regs[prog_src->Index];
+      break;
+   case PROGRAM_INPUT: {
+      /* ARB_vertex_program doesn't allow relative addressing on vertex
+       * attributes; ARB_fragment_program has no relative addressing at all.
+       */
+      assert(!prog_src->RelAddr);
+
+      assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
+
+      nir_intrinsic_instr *load =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
+      load->num_components = 4;
+      load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
+
+      nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+
+      src.src = nir_src_for_ssa(&load->dest.ssa);
+      break;
+   }
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT: {
+      /* We actually want to look at the type in the Parameters list for this,
+       * because it lets us upload constant builtin uniforms as actual
+       * constants.
+       */
+      struct gl_program_parameter_list *plist = c->prog->Parameters;
+      gl_register_file file = prog_src->RelAddr ? prog_src->File :
+         plist->Parameters[prog_src->Index].Type;
+
+      switch (file) {
+      case PROGRAM_CONSTANT:
+         if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
+            float *v = (float *) plist->ParameterValues[prog_src->Index];
+            src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
+            break;
+         }
+         /* FALLTHROUGH */
+      case PROGRAM_STATE_VAR: {
+         nir_intrinsic_op load_op =
+            prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
+                                nir_intrinsic_load_uniform;
+         nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, load_op);
+         nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+         load->num_components = 4;
+
+         /* Multiply src->Index by 4 to scale from # of vec4s to components. */
+         load->const_index[0] = 4 * prog_src->Index;
+         load->const_index[1] = 1;
+
+         if (prog_src->RelAddr) {
+            nir_ssa_def *reladdr = ptn_addr_reg_value(c);
+            if (prog_src->Index < 0) {
+               /* This is a negative offset which should be added to the address
+                * register's value.
+                */
+               reladdr = nir_iadd(b, reladdr, nir_imm_int(b, load->const_index[0]));
+               load->const_index[0] = 0;
+            }
+            load->src[0] = nir_src_for_ssa(reladdr);
+         }
+
+         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+
+         src.src = nir_src_for_ssa(&load->dest.ssa);
+         break;
+      }
+      default:
+         fprintf(stderr, "bad uniform src register file: %s (%d)\n",
+                 _mesa_register_file_name(file), file);
+         abort();
+      }
+      break;
+   }
+   default:
+      fprintf(stderr, "unknown src register file: %s (%d)\n",
+              _mesa_register_file_name(prog_src->File), prog_src->File);
+      abort();
+   }
+
+   nir_ssa_def *def;
+   if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle)) {
+      for (int i = 0; i < 4; i++)
+         src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
+
+      def = nir_fmov_alu(b, src, 4);
+   } else {
+      nir_ssa_def *chans[4];
+      for (int i = 0; i < 4; i++) {
+         int swizzle = GET_SWZ(prog_src->Swizzle, i);
+         if (swizzle == SWIZZLE_ZERO) {
+            chans[i] = nir_imm_float(b, 0.0);
+         } else if (swizzle == SWIZZLE_ONE) {
+            chans[i] = nir_imm_float(b, 1.0);
+         } else {
+            assert(swizzle != SWIZZLE_NIL);
+            nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+            mov->dest.write_mask = 0x1;
+            mov->src[0] = src;
+            mov->src[0].swizzle[0] = swizzle;
+            nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+
+            chans[i] = &mov->dest.dest.ssa;
+         }
+      }
+      def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
+   }
+
+   if (prog_src->Abs)
+      def = nir_fabs(b, def);
+
+   if (prog_src->Negate)
+      def = nir_fneg(b, def);
+
+   return def;
+}
+
+static void
+ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
+{
+   unsigned num_srcs = nir_op_infos[op].num_inputs;
+   nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
+   unsigned i;
+
+   for (i = 0; i < num_srcs; i++)
+      instr->src[i].src = nir_src_for_ssa(src[i]);
+
+   instr->dest = dest;
+   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+}
+
+static void
+ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
+                     nir_ssa_def *def, unsigned write_mask)
+{
+   if (!(dest.write_mask & write_mask))
+      return;
+
+   nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
+   if (!mov)
+      return;
+
+   mov->dest = dest;
+   mov->dest.write_mask &= write_mask;
+   mov->src[0].src = nir_src_for_ssa(def);
+   for (unsigned i = def->num_components; i < 4; i++)
+      mov->src[0].swizzle[i] = def->num_components - 1;
+   nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+}
+
+static void
+ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
+{
+   ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
+}
+
+static void
+ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
+}
+
+/* EXP - Approximate Exponential Base 2
+ *  dst.x = 2^{\lfloor src.x\rfloor}
+ *  dst.y = src.x - \lfloor src.x\rfloor
+ *  dst.z = 2^{src.x}
+ *  dst.w = 1.0
+ */
+static void
+ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *srcx = ptn_channel(b, src[0], X);
+
+   ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
+   ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
+   ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
+}
+
+/* LOG - Approximate Logarithm Base 2
+ *  dst.x = \lfloor\log_2{|src.x|}\rfloor
+ *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
+ *  dst.z = \log_2{|src.x|}
+ *  dst.w = 1.0
+ */
+static void
+ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
+   nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
+   nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
+
+   ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
+   ptn_move_dest_masked(b, dest,
+                        nir_fmul(b, abs_srcx,
+                                 nir_fexp2(b, nir_fneg(b, floor_log2))),
+                        WRITEMASK_Y);
+   ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
+}
+
+/* DST - Distance Vector
+ *   dst.x = 1.0
+ *   dst.y = src0.y \times src1.y
+ *   dst.z = src0.z
+ *   dst.w = src1.w
+ */
+static void
+ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
+   ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
+   ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
+   ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
+}
+
+/* LIT - Light Coefficients
+ *  dst.x = 1.0
+ *  dst.y = max(src.x, 0.0)
+ *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
+ *  dst.w = 1.0
+ */
+static void
+ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
+
+   ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
+                                          nir_imm_float(b, 0.0)), WRITEMASK_Y);
+
+   if (dest.write_mask & WRITEMASK_Z) {
+      nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
+      nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
+                                                 nir_imm_float(b, 128.0)),
+                                     nir_imm_float(b, -128.0));
+      nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
+                                  wclamp);
+
+      nir_ssa_def *z;
+      if (b->shader->options->native_integers) {
+         z = nir_bcsel(b,
+                       nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
+                       nir_imm_float(b, 0.0),
+                       pow);
+      } else {
+         z = nir_fcsel(b,
+                       nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
+                       nir_imm_float(b, 0.0),
+                       pow);
+      }
+
+      ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
+   }
+}
+
+/* SCS - Sine Cosine
+ *   dst.x = \cos{src.x}
+ *   dst.y = \sin{src.x}
+ *   dst.z = 0.0
+ *   dst.w = 1.0
+ */
+static void
+ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
+                        WRITEMASK_X);
+   ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
+                        WRITEMASK_Y);
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
+}
+
+/**
+ * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
+ */
+static void
+ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   if (b->shader->options->native_integers) {
+      ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
+   } else {
+      ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
+   }
+}
+
+/**
+ * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
+ */
+static void
+ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   if (b->shader->options->native_integers) {
+      ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
+   } else {
+      ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
+   }
+}
+
+static void
+ptn_sle(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *commuted[] = { src[1], src[0] };
+   ptn_sge(b, dest, commuted);
+}
+
+static void
+ptn_sgt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *commuted[] = { src[1], src[0] };
+   ptn_slt(b, dest, commuted);
+}
+
+/**
+ * Emit SEQ.  For platforms with integers, prefer b2f(feq(...)).
+ */
+static void
+ptn_seq(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   if (b->shader->options->native_integers) {
+      ptn_move_dest(b, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
+   } else {
+      ptn_move_dest(b, dest, nir_seq(b, src[0], src[1]));
+   }
+}
+
+/**
+ * Emit SNE.  For platforms with integers, prefer b2f(fne(...)).
+ */
+static void
+ptn_sne(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   if (b->shader->options->native_integers) {
+      ptn_move_dest(b, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
+   } else {
+      ptn_move_dest(b, dest, nir_sne(b, src[0], src[1]));
+   }
+}
+
+static void
+ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest_masked(b, dest,
+                        nir_fsub(b,
+                                 nir_fmul(b,
+                                          ptn_swizzle(b, src[0], Y, Z, X, X),
+                                          ptn_swizzle(b, src[1], Z, X, Y, X)),
+                                 nir_fmul(b,
+                                          ptn_swizzle(b, src[1], Y, Z, X, X),
+                                          ptn_swizzle(b, src[0], Z, X, Y, X))),
+                        WRITEMASK_XYZ);
+   ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
+}
+
+static void
+ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
+}
+
+static void
+ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
+}
+
+static void
+ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
+}
+
+static void
+ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *dp3 = nir_fdot3(b, src[0], src[1]);
+   ptn_move_dest(b, dest, nir_fadd(b, dp3, ptn_channel(b, src[1], W)));
+}
+
+static void
+ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   if (b->shader->options->native_integers) {
+      ptn_move_dest(b, dest, nir_bcsel(b,
+                                       nir_flt(b, src[0], nir_imm_float(b, 0.0)),
+                                       src[1], src[2]));
+   } else {
+      ptn_move_dest(b, dest, nir_fcsel(b,
+                                       nir_slt(b, src[0], nir_imm_float(b, 0.0)),
+                                       src[1], src[2]));
+   }
+}
+
+static void
+ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
+}
+
+static void
+ptn_kil(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
+{
+   nir_ssa_def *cmp = b->shader->options->native_integers ?
+      nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0))) :
+      nir_fany4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)));
+
+   nir_intrinsic_instr *discard =
+      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
+   discard->src[0] = nir_src_for_ssa(cmp);
+   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+}
+
+static void
+ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
+        struct prog_instruction *prog_inst)
+{
+   nir_tex_instr *instr;
+   nir_texop op;
+   unsigned num_srcs;
+
+   switch (prog_inst->Opcode) {
+   case OPCODE_TEX:
+      op = nir_texop_tex;
+      num_srcs = 1;
+      break;
+   case OPCODE_TXB:
+      op = nir_texop_txb;
+      num_srcs = 2;
+      break;
+   case OPCODE_TXD:
+      op = nir_texop_txd;
+      num_srcs = 3;
+      break;
+   case OPCODE_TXL:
+      op = nir_texop_txl;
+      num_srcs = 2;
+      break;
+   case OPCODE_TXP:
+      op = nir_texop_tex;
+      num_srcs = 2;
+      break;
+   case OPCODE_TXP_NV:
+      assert(!"not handled");
+      op = nir_texop_tex;
+      num_srcs = 2;
+      break;
+   default:
+      fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
+      abort();
+   }
+
+   if (prog_inst->TexShadow)
+      num_srcs++;
+
+   instr = nir_tex_instr_create(b->shader, num_srcs);
+   instr->op = op;
+   instr->dest_type = nir_type_float;
+   instr->is_shadow = prog_inst->TexShadow;
+   instr->sampler_index = prog_inst->TexSrcUnit;
+
+   switch (prog_inst->TexSrcTarget) {
+   case TEXTURE_1D_INDEX:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
+      break;
+   case TEXTURE_2D_INDEX:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
+      break;
+   case TEXTURE_3D_INDEX:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
+      break;
+   case TEXTURE_CUBE_INDEX:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
+      break;
+   case TEXTURE_RECT_INDEX:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
+      break;
+   default:
+      fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
+      abort();
+   }
+
+   switch (instr->sampler_dim) {
+   case GLSL_SAMPLER_DIM_1D:
+   case GLSL_SAMPLER_DIM_BUF:
+      instr->coord_components = 1;
+      break;
+   case GLSL_SAMPLER_DIM_2D:
+   case GLSL_SAMPLER_DIM_RECT:
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+   case GLSL_SAMPLER_DIM_MS:
+      instr->coord_components = 2;
+      break;
+   case GLSL_SAMPLER_DIM_3D:
+   case GLSL_SAMPLER_DIM_CUBE:
+      instr->coord_components = 3;
+      break;
+   }
+
+   unsigned src_number = 0;
+
+   instr->src[src_number].src =
+      nir_src_for_ssa(ptn_swizzle(b, src[0], X, Y, Z, W));
+   instr->src[src_number].src_type = nir_tex_src_coord;
+   src_number++;
+
+   if (prog_inst->Opcode == OPCODE_TXP) {
+      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
+      instr->src[src_number].src_type = nir_tex_src_projector;
+      src_number++;
+   }
+
+   if (prog_inst->Opcode == OPCODE_TXB) {
+      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
+      instr->src[src_number].src_type = nir_tex_src_bias;
+      src_number++;
+   }
+
+   if (prog_inst->Opcode == OPCODE_TXL) {
+      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
+      instr->src[src_number].src_type = nir_tex_src_lod;
+      src_number++;
+   }
+
+   if (instr->is_shadow) {
+      if (instr->coord_components < 3)
+         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
+      else
+         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
+
+      instr->src[src_number].src_type = nir_tex_src_comparitor;
+      src_number++;
+   }
+
+   assert(src_number == num_srcs);
+
+   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
+   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+
+   /* Resolve the writemask on the texture op. */
+   ptn_move_dest(b, dest, &instr->dest.ssa);
+}
+
+static const nir_op op_trans[MAX_OPCODE] = {
+   [OPCODE_NOP] = 0,
+   [OPCODE_ABS] = nir_op_fabs,
+   [OPCODE_ADD] = nir_op_fadd,
+   [OPCODE_ARL] = 0,
+   [OPCODE_CMP] = 0,
+   [OPCODE_COS] = nir_op_fcos,
+   [OPCODE_DDX] = nir_op_fddx,
+   [OPCODE_DDY] = nir_op_fddy,
+   [OPCODE_DP2] = 0,
+   [OPCODE_DP3] = 0,
+   [OPCODE_DP4] = 0,
+   [OPCODE_DPH] = 0,
+   [OPCODE_DST] = 0,
+   [OPCODE_END] = 0,
+   [OPCODE_EX2] = nir_op_fexp2,
+   [OPCODE_EXP] = nir_op_fexp,
+   [OPCODE_FLR] = nir_op_ffloor,
+   [OPCODE_FRC] = nir_op_ffract,
+   [OPCODE_LG2] = nir_op_flog2,
+   [OPCODE_LIT] = 0,
+   [OPCODE_LOG] = 0,
+   [OPCODE_LRP] = 0,
+   [OPCODE_MAD] = nir_op_ffma,
+   [OPCODE_MAX] = nir_op_fmax,
+   [OPCODE_MIN] = nir_op_fmin,
+   [OPCODE_MOV] = nir_op_fmov,
+   [OPCODE_MUL] = nir_op_fmul,
+   [OPCODE_POW] = nir_op_fpow,
+   [OPCODE_RCP] = nir_op_frcp,
+
+   [OPCODE_RSQ] = nir_op_frsq,
+   [OPCODE_SCS] = 0,
+   [OPCODE_SEQ] = 0,
+   [OPCODE_SGE] = 0,
+   [OPCODE_SGT] = 0,
+   [OPCODE_SIN] = nir_op_fsin,
+   [OPCODE_SLE] = 0,
+   [OPCODE_SLT] = 0,
+   [OPCODE_SNE] = 0,
+   [OPCODE_SSG] = nir_op_fsign,
+   [OPCODE_SUB] = nir_op_fsub,
+   [OPCODE_SWZ] = 0,
+   [OPCODE_TEX] = 0,
+   [OPCODE_TRUNC] = nir_op_ftrunc,
+   [OPCODE_TXB] = 0,
+   [OPCODE_TXD] = 0,
+   [OPCODE_TXL] = 0,
+   [OPCODE_TXP] = 0,
+   [OPCODE_TXP_NV] = 0,
+   [OPCODE_XPD] = 0,
+};
+
+static void
+ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
+{
+   nir_builder *b = &c->build;
+   unsigned i;
+   const unsigned op = prog_inst->Opcode;
+
+   if (op == OPCODE_END)
+      return;
+
+   nir_ssa_def *src[3];
+   for (i = 0; i < 3; i++) {
+      src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
+   }
+   nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
+   if (c->error)
+      return;
+
+   switch (op) {
+   case OPCODE_RSQ:
+      ptn_move_dest(b, dest, nir_frsq(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_RCP:
+      ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_EX2:
+      ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_LG2:
+      ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_POW:
+      ptn_move_dest(b, dest, nir_fpow(b,
+                                      ptn_channel(b, src[0], X),
+                                      ptn_channel(b, src[1], X)));
+      break;
+
+   case OPCODE_COS:
+      ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_SIN:
+      ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
+      break;
+
+   case OPCODE_ARL:
+      ptn_arl(b, dest, src);
+      break;
+
+   case OPCODE_EXP:
+      ptn_exp(b, dest, src);
+      break;
+
+   case OPCODE_LOG:
+      ptn_log(b, dest, src);
+      break;
+
+   case OPCODE_LRP:
+      ptn_lrp(b, dest, src);
+      break;
+
+   case OPCODE_DST:
+      ptn_dst(b, dest, src);
+      break;
+
+   case OPCODE_LIT:
+      ptn_lit(b, dest, src);
+      break;
+
+   case OPCODE_XPD:
+      ptn_xpd(b, dest, src);
+      break;
+
+   case OPCODE_DP2:
+      ptn_dp2(b, dest, src);
+      break;
+
+   case OPCODE_DP3:
+      ptn_dp3(b, dest, src);
+      break;
+
+   case OPCODE_DP4:
+      ptn_dp4(b, dest, src);
+      break;
+
+   case OPCODE_DPH:
+      ptn_dph(b, dest, src);
+      break;
+
+   case OPCODE_KIL:
+      ptn_kil(b, dest, src);
+      break;
+
+   case OPCODE_CMP:
+      ptn_cmp(b, dest, src);
+      break;
+
+   case OPCODE_SCS:
+      ptn_scs(b, dest, src);
+      break;
+
+   case OPCODE_SLT:
+      ptn_slt(b, dest, src);
+      break;
+
+   case OPCODE_SGT:
+      ptn_sgt(b, dest, src);
+      break;
+
+   case OPCODE_SLE:
+      ptn_sle(b, dest, src);
+      break;
+
+   case OPCODE_SGE:
+      ptn_sge(b, dest, src);
+      break;
+
+   case OPCODE_SEQ:
+      ptn_seq(b, dest, src);
+      break;
+
+   case OPCODE_SNE:
+      ptn_sne(b, dest, src);
+      break;
+
+   case OPCODE_TEX:
+   case OPCODE_TXB:
+   case OPCODE_TXD:
+   case OPCODE_TXL:
+   case OPCODE_TXP:
+   case OPCODE_TXP_NV:
+      ptn_tex(b, dest, src, prog_inst);
+      break;
+
+   case OPCODE_SWZ:
+      /* Extended swizzles were already handled in ptn_get_src(). */
+      ptn_alu(b, nir_op_fmov, dest, src);
+      break;
+
+   case OPCODE_NOP:
+      break;
+
+   default:
+      if (op_trans[op] != 0 || op == OPCODE_MOV) {
+         ptn_alu(b, op_trans[op], dest, src);
+      } else {
+         fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
+         abort();
+      }
+      break;
+   }
+
+   if (prog_inst->SaturateMode) {
+      assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
+      assert(!dest.dest.is_ssa);
+      ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
+   }
+}
+
+/**
+ * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
+ * variables at the end of the shader.
+ *
+ * We don't generate these incrementally as the PROGRAM_OUTPUT values are
+ * written, because there's no output load intrinsic, which means we couldn't
+ * handle writemasks.
+ */
+static void
+ptn_add_output_stores(struct ptn_compile *c)
+{
+   nir_builder *b = &c->build;
+
+   foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
+      nir_intrinsic_instr *store =
+         nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
+      store->num_components = 4;
+      store->variables[0] =
+         nir_deref_var_create(store, c->output_vars[var->data.location]);
+      store->src[0].reg.reg = c->output_regs[var->data.location];
+      nir_instr_insert_after_cf_list(c->build.cf_node_list, &store->instr);
+   }
+}
+
+static void
+setup_registers_and_variables(struct ptn_compile *c)
+{
+   nir_builder *b = &c->build;
+   struct nir_shader *shader = b->shader;
+
+   /* Create input variables. */
+   const int num_inputs = _mesa_flsll(c->prog->InputsRead);
+   for (int i = 0; i < num_inputs; i++) {
+      if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
+         continue;
+      nir_variable *var = rzalloc(shader, nir_variable);
+      var->type = glsl_vec4_type();
+      var->data.read_only = true;
+      var->data.mode = nir_var_shader_in;
+      var->name = ralloc_asprintf(var, "in_%d", i);
+      var->data.location = i;
+      var->data.index = 0;
+
+      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+         struct gl_fragment_program *fp =
+            (struct gl_fragment_program *) c->prog;
+
+         var->data.interpolation = fp->InterpQualifier[i];
+
+         if (i == VARYING_SLOT_POS) {
+            var->data.origin_upper_left = fp->OriginUpperLeft;
+            var->data.pixel_center_integer = fp->PixelCenterInteger;
+         } else if (i == VARYING_SLOT_FOGC) {
+            /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
+             * input variable a float, and create a local containing the
+             * full vec4 value.
+             */
+            var->type = glsl_float_type();
+
+            nir_intrinsic_instr *load_x =
+               nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
+            load_x->num_components = 1;
+            load_x->variables[0] = nir_deref_var_create(load_x, var);
+            nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
+            nir_instr_insert_after_cf_list(b->cf_node_list, &load_x->instr);
+
+            nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
+                                         nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
+
+            nir_variable *fullvar = rzalloc(shader, nir_variable);
+            fullvar->type = glsl_vec4_type();
+            fullvar->data.mode = nir_var_local;
+            fullvar->name = "fogcoord_tmp";
+            exec_list_push_tail(&b->impl->locals, &fullvar->node);
+
+            nir_intrinsic_instr *store =
+               nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
+            store->num_components = 4;
+            store->variables[0] = nir_deref_var_create(store, fullvar);
+            store->src[0] = nir_src_for_ssa(f001);
+            nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+
+            /* Insert the real input into the list so the driver has real
+             * inputs, but set c->input_vars[i] to the temporary so we use
+             * the splatted value.
+             */
+            exec_list_push_tail(&shader->inputs, &var->node);
+            c->input_vars[i] = fullvar;
+            continue;
+         }
+      }
+
+      exec_list_push_tail(&shader->inputs, &var->node);
+      c->input_vars[i] = var;
+   }
+
+   /* Create output registers and variables. */
+   int max_outputs = _mesa_fls(c->prog->OutputsWritten);
+   c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
+
+   for (int i = 0; i < max_outputs; i++) {
+      if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
+         continue;
+
+      /* Since we can't load from outputs in the IR, we make temporaries
+       * for the outputs and emit stores to the real outputs at the end of
+       * the shader.
+       */
+      nir_register *reg = nir_local_reg_create(b->impl);
+      reg->num_components = 4;
+
+      nir_variable *var = rzalloc(shader, nir_variable);
+      var->type = glsl_vec4_type();
+      var->data.mode = nir_var_shader_out;
+      var->name = ralloc_asprintf(var, "out_%d", i);
+
+      var->data.location = i;
+      var->data.index = 0;
+
+      c->output_regs[i] = reg;
+
+      exec_list_push_tail(&shader->outputs, &var->node);
+      c->output_vars[i] = var;
+   }
+
+   /* Create temporary registers. */
+   c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
+
+   nir_register *reg;
+   for (int i = 0; i < c->prog->NumTemporaries; i++) {
+      reg = nir_local_reg_create(b->impl);
+      if (!reg) {
+         c->error = true;
+         return;
+      }
+      reg->num_components = 4;
+      c->temp_regs[i] = reg;
+   }
+
+   /* Create the address register (for ARB_vertex_program). */
+   reg = nir_local_reg_create(b->impl);
+   if (!reg) {
+      c->error = true;
+      return;
+   }
+   reg->num_components = 1;
+   c->addr_reg = reg;
+
+   /* Set the number of uniforms */
+   shader->num_uniforms = 4 * c->prog->Parameters->NumParameters;
+}
+
+struct nir_shader *
+prog_to_nir(const struct gl_program *prog, const nir_shader_compiler_options *options)
+{
+   struct ptn_compile *c;
+   struct nir_shader *s;
+
+   c = rzalloc(NULL, struct ptn_compile);
+   if (!c)
+      return NULL;
+   s = nir_shader_create(NULL, options);
+   if (!s)
+      goto fail;
+   c->prog = prog;
+
+   nir_function *func = nir_function_create(s, "main");
+   nir_function_overload *overload = nir_function_overload_create(func);
+   nir_function_impl *impl = nir_function_impl_create(overload);
+
+   c->build.shader = s;
+   c->build.impl = impl;
+   c->build.cf_node_list = &impl->body;
+
+   setup_registers_and_variables(c);
+   if (unlikely(c->error))
+      goto fail;
+
+   for (unsigned int i = 0; i < prog->NumInstructions; i++) {
+      ptn_emit_instruction(c, &prog->Instructions[i]);
+
+      if (unlikely(c->error))
+         break;
+   }
+
+   ptn_add_output_stores(c);
+
+fail:
+   if (c->error) {
+      ralloc_free(s);
+      s = NULL;
+   }
+   ralloc_free(c);
+   return s;
+}
diff --git a/mesalib/src/mesa/program/prog_to_nir.h b/mesalib/src/mesa/program/prog_to_nir.h
new file mode 100644
index 000000000..34e4cd104
--- /dev/null
+++ b/mesalib/src/mesa/program/prog_to_nir.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+#ifndef PROG_TO_NIR_H
+#define PROG_TO_NIR_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct nir_shader *prog_to_nir(const struct gl_program *prog,
+                               const nir_shader_compiler_options *options);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/mesalib/src/mesa/program/program.c b/mesalib/src/mesa/program/program.c
index 3c214d5e3..4f28e2a3b 100644
--- a/mesalib/src/mesa/program/program.c
+++ b/mesalib/src/mesa/program/program.c
@@ -37,6 +37,7 @@
 #include "prog_cache.h"
 #include "prog_parameter.h"
 #include "prog_instruction.h"
+#include "util/ralloc.h"
 
 
 /**
@@ -380,6 +381,10 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
       _mesa_free_parameter_list(prog->Parameters);
    }
 
+   if (prog->nir) {
+      ralloc_free(prog->nir);
+   }
+
    free(prog);
 }