Merge remote-tracking branch 'origin/released'

Conflicts: apps/xwininfo/configure.ac apps/xwininfo/xwininfo.c libX11/configure.ac libX11/specs/libX11/AppC.xml libX11/specs/libX11/AppD.xml libX11/specs/libX11/CH03.xml libX11/specs/libX11/CH04.xml libX11/specs/libX11/CH05.xml libX11/specs/libX11/CH06.xml libX11/specs/libX11/CH07.xml libX11/specs/libX11/CH08.xml libX11/specs/libX11/CH09.xml libX11/specs/libX11/CH11.xml libX11/specs/libX11/CH12.xml libX11/specs/libX11/CH13.xml libX11/specs/libX11/CH14.xml libX11/specs/libX11/CH15.xml libX11/specs/libX11/CH16.xml libxcb/configure.ac libxcb/src/c_client.py libxcb/src/xcb_auth.c libxcb/src/xcb_util.c mesalib/common.py mesalib/configs/linux-dri mesalib/docs/GL3.txt mesalib/docs/download.html mesalib/docs/install.html mesalib/include/GL/internal/dri_interface.h mesalib/scons/custom.py mesalib/scons/gallium.py mesalib/src/gallium/auxiliary/util/u_math.h mesalib/src/gallium/auxiliary/util/u_vbuf_mgr.c mesalib/src/glsl/ast_function.cpp mesalib/src/glsl/ast_to_hir.cpp mesalib/src/glsl/glcpp/glcpp-parse.y mesalib/src/glsl/glsl_parser_extras.cpp mesalib/src/glsl/glsl_parser_extras.h mesalib/src/glsl/ir.cpp mesalib/src/glsl/ir.h mesalib/src/glsl/ir_clone.cpp mesalib/src/glsl/ir_print_visitor.cpp mesalib/src/glsl/ir_validate.cpp mesalib/src/glsl/linker.cpp mesalib/src/glsl/main.cpp mesalib/src/glsl/opt_tree_grafting.cpp mesalib/src/mapi/glapi/gen/Makefile mesalib/src/mapi/glapi/gen/gl_XML.py mesalib/src/mapi/glapi/gen/gl_table.py mesalib/src/mapi/glapi/glapi_mapi_tmp.h mesalib/src/mapi/glapi/glapi_sparc.S mesalib/src/mapi/glapi/glapi_x86-64.S mesalib/src/mapi/glapi/glapi_x86.S mesalib/src/mapi/glapi/glapitemp.h mesalib/src/mapi/glapi/glprocs.h mesalib/src/mapi/mapi/u_thread.c mesalib/src/mapi/mapi/u_thread.h mesalib/src/mesa/SConscript mesalib/src/mesa/drivers/common/driverfuncs.c mesalib/src/mesa/drivers/windows/gldirect/dglcontext.c mesalib/src/mesa/main/api_arrayelt.c mesalib/src/mesa/main/bufferobj.c mesalib/src/mesa/main/compiler.h mesalib/src/mesa/main/dd.h mesalib/src/mesa/main/dlist.c mesalib/src/mesa/main/enums.c mesalib/src/mesa/main/es_generator.py mesalib/src/mesa/main/fbobject.c mesalib/src/mesa/main/imports.h mesalib/src/mesa/main/mtypes.h mesalib/src/mesa/main/pbo.c mesalib/src/mesa/main/remap_helper.h mesalib/src/mesa/main/shared.c mesalib/src/mesa/main/texgetimage.c mesalib/src/mesa/main/teximage.c mesalib/src/mesa/main/uniforms.c mesalib/src/mesa/program/ir_to_mesa.cpp mesalib/src/mesa/program/prog_optimize.c mesalib/src/mesa/program/register_allocate.c mesalib/src/mesa/program/register_allocate.h mesalib/src/mesa/sources.mak mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c mesalib/src/mesa/state_tracker/st_cb_texture.c mesalib/src/mesa/state_tracker/st_texture.h mesalib/src/mesa/swrast/s_context.c mesalib/src/mesa/tnl/t_draw.c mesalib/src/mesa/vbo/vbo_exec_array.c mesalib/src/mesa/vbo/vbo_save_api.c pixman/pixman/pixman-arm-common.h pixman/pixman/pixman-fast-path.c pixman/pixman/pixman-inlines.h pixman/test/Makefile.am pixman/test/utils.c pixman/test/utils.h xorg-server/configure.ac xorg-server/fb/fbblt.c xorg-server/hw/xquartz/darwin.c xorg-server/xkeyboard-config/symbols/de
author: marha <marha@users.sourceforge.net> 2011-08-29 14:48:31 +0200
committer: marha <marha@users.sourceforge.net> 2011-08-29 14:48:31 +0200
commit: 924a6ffc82a7af0ca311af711ea2cf973dc6ff15 (patch)
tree: f5ee6d0aaf321029a4abb12c3c051c0434dde308 /mesalib/src/mesa/program/ir_to_mesa.cpp
parent: 8e57a7fcbf736b8e437baf0bbccb93095d97617f (diff)
parent: 01df5d59e56a1b060568f8cad2e89f7eea22fc70 (diff)
download: vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.gz
vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.bz2
vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.zip
1 files changed, 153 insertions, 26 deletions
diff --git a/mesalib/src/mesa/program/ir_to_mesa.cpp b/mesalib/src/mesa/program/ir_to_mesa.cpp
index ae0df472d..1540eb77e 100644
--- a/mesalib/src/mesa/program/ir_to_mesa.cpp
+++ b/mesalib/src/mesa/program/ir_to_mesa.cpp
@@ -297,11 +297,11 @@ public:
    /**
     * Emit the correct dot-product instruction for the type of arguments
     */
-   void emit_dp(ir_instruction *ir,
-	        dst_reg dst,
-	        src_reg src0,
-	        src_reg src1,
-	        unsigned elements);
+   ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
+				    dst_reg dst,
+				    src_reg src0,
+				    src_reg src1,
+				    unsigned elements);
 
    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
 		    dst_reg dst, src_reg src0);
@@ -312,9 +312,11 @@ public:
    void emit_scs(ir_instruction *ir, enum prog_opcode op,
 		 dst_reg dst, const src_reg &src);
 
-   GLboolean try_emit_mad(ir_expression *ir,
+   bool try_emit_mad(ir_expression *ir,
 			  int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+				 int mul_operand);
+   bool try_emit_sat(ir_expression *ir);
 
    void emit_swz(ir_expression *ir);
 
@@ -408,7 +410,7 @@ ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
 }
 
-void
+ir_to_mesa_instruction *
 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
 			    dst_reg dst, src_reg src0, src_reg src1,
 			    unsigned elements)
@@ -417,7 +419,7 @@ ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
    };
 
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
 }
 
 /**
@@ -579,7 +581,7 @@ ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
    }
 }
 
-struct src_reg
+src_reg
 ir_to_mesa_visitor::src_reg_for_float(float val)
 {
    src_reg src(PROGRAM_CONSTANT, -1, NULL);
@@ -723,7 +725,7 @@ ir_to_mesa_visitor::visit(ir_variable *ir)
 	 }
       }
 
-      struct variable_storage *storage;
+      variable_storage *storage;
       dst_reg dst;
       if (i == ir->num_state_slots) {
 	 /* We'll set the index later. */
@@ -869,7 +871,7 @@ ir_to_mesa_visitor::visit(ir_function *ir)
    }
 }
 
-GLboolean
+bool
 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
 {
    int nonmul_operand = 1 - mul_operand;
@@ -892,7 +894,47 @@ ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    return true;
 }
 
-GLboolean
+/**
+ * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
+bool
 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
 {
    /* Saturates were only introduced to vertex programs in
@@ -1088,6 +1130,16 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
       if (try_emit_mad(ir, 0))
 	 return;
    }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+	 return;
+      if (try_emit_mad_for_and_not(ir, 0))
+	 return;
+   }
+
    if (try_emit_sat(ir))
       return;
 
@@ -1135,7 +1187,13 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 
    switch (ir->operation) {
    case ir_unop_logic_not:
-      emit(ir, OPCODE_SEQ, result_dst, op[0], src_reg_for_float(0.0));
+      /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+       * older GPUs implement SEQ using multiple instructions (i915 uses two
+       * SGE instructions and a MUL instruction).  Since our logic values are
+       * 0.0 and 1.0, 1-x also implements !x.
+       */
+      op[0].negate = ~op[0].negate;
+      emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
       break;
    case ir_unop_neg:
       op[0].negate = ~op[0].negate;
@@ -1231,8 +1289,19 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero becomes 1.0, and positive values become zero.
+	  */
 	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
+	  * achieved using SGE.
+	  */
+	 src_reg sge_src = result_src;
+	 sge_src.negate = ~sge_src.negate;
+	 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
       } else {
 	 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
       }
@@ -1243,29 +1312,83 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
 	  ir->operands[1]->type->is_vector()) {
 	 src_reg temp = get_temp(glsl_type::vec4_type);
 	 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
-	 emit_dp(ir, result_dst, temp, temp, vector_elements);
-	 emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+	 /* After the dot-product, the value will be an integer on the
+	  * range [0,4].  Zero stays zero, and positive values become 1.0.
+	  */
+	 ir_to_mesa_instruction *const dp =
+	    emit_dp(ir, result_dst, temp, temp, vector_elements);
+	 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	    /* The clamping to [0,1] can be done for free in the fragment
+	     * shader with a saturate.
+	     */
+	    dp->saturate = true;
+	 } else {
+	    /* Negating the result of the dot-product gives values on the range
+	     * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	     * achieved using SLT.
+	     */
+	    src_reg slt_src = result_src;
+	    slt_src.negate = ~slt_src.negate;
+	    emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+	 }
       } else {
 	 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       }
       break;
 
-   case ir_unop_any:
+   case ir_unop_any: {
       assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-	      ir->operands[0]->type->vector_elements);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *const dp =
+	 emit_dp(ir, result_dst, op[0], op[0],
+		 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 dp->saturate = true;
+      } else {
+	 /* Negating the result of the dot-product gives values on the range
+	  * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_xor:
       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
       break;
 
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      /* After the addition, the value will be an integer on the
+       * range [0,2].  Zero stays zero, and positive values become 1.0.
+       */
+      ir_to_mesa_instruction *add =
+	 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+	 /* The clamping to [0,1] can be done for free in the fragment
+	  * shader with a saturate.
+	  */
+	 add->saturate = true;
+      } else {
+	 /* Negating the result of the addition gives values on the range
+	  * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+	  * is achieved using SLT.
+	  */
+	 src_reg slt_src = result_src;
+	 slt_src.negate = ~slt_src.negate;
+	 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+      }
       break;
+   }
 
    case ir_binop_logic_and:
       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
@@ -1981,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
    ir_to_mesa_instruction *inst = NULL;
    prog_opcode opcode = OPCODE_NOP;
 
-   ir->coordinate->accept(this);
+   if (ir->op == ir_txs)
+      this->result = src_reg_for_float(0.0);
+   else
+      ir->coordinate->accept(this);
 
    /* Put our coords in a temp.  We'll need to modify them for shadow,
     * projection, or LOD, so the only case we'd use it as is is if
@@ -2005,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_tex:
+   case ir_txs:
       opcode = OPCODE_TEX;
       break;
    case ir_txb:
author	marha <marha@users.sourceforge.net>	2011-08-29 14:48:31 +0200
committer	marha <marha@users.sourceforge.net>	2011-08-29 14:48:31 +0200
commit	924a6ffc82a7af0ca311af711ea2cf973dc6ff15 (patch)
tree	f5ee6d0aaf321029a4abb12c3c051c0434dde308 /mesalib/src/mesa/program/ir_to_mesa.cpp
parent	8e57a7fcbf736b8e437baf0bbccb93095d97617f (diff)
parent	01df5d59e56a1b060568f8cad2e89f7eea22fc70 (diff)
download	vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.gz vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.bz2 vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.zip