aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/mesa/program/prog_opt_constant_fold.c
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2011-08-29 14:48:31 +0200
committermarha <marha@users.sourceforge.net>2011-08-29 14:48:31 +0200
commit924a6ffc82a7af0ca311af711ea2cf973dc6ff15 (patch)
treef5ee6d0aaf321029a4abb12c3c051c0434dde308 /mesalib/src/mesa/program/prog_opt_constant_fold.c
parent8e57a7fcbf736b8e437baf0bbccb93095d97617f (diff)
parent01df5d59e56a1b060568f8cad2e89f7eea22fc70 (diff)
downloadvcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.gz
vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.bz2
vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.zip
Merge remote-tracking branch 'origin/released'
Conflicts: apps/xwininfo/configure.ac apps/xwininfo/xwininfo.c libX11/configure.ac libX11/specs/libX11/AppC.xml libX11/specs/libX11/AppD.xml libX11/specs/libX11/CH03.xml libX11/specs/libX11/CH04.xml libX11/specs/libX11/CH05.xml libX11/specs/libX11/CH06.xml libX11/specs/libX11/CH07.xml libX11/specs/libX11/CH08.xml libX11/specs/libX11/CH09.xml libX11/specs/libX11/CH11.xml libX11/specs/libX11/CH12.xml libX11/specs/libX11/CH13.xml libX11/specs/libX11/CH14.xml libX11/specs/libX11/CH15.xml libX11/specs/libX11/CH16.xml libxcb/configure.ac libxcb/src/c_client.py libxcb/src/xcb_auth.c libxcb/src/xcb_util.c mesalib/common.py mesalib/configs/linux-dri mesalib/docs/GL3.txt mesalib/docs/download.html mesalib/docs/install.html mesalib/include/GL/internal/dri_interface.h mesalib/scons/custom.py mesalib/scons/gallium.py mesalib/src/gallium/auxiliary/util/u_math.h mesalib/src/gallium/auxiliary/util/u_vbuf_mgr.c mesalib/src/glsl/ast_function.cpp mesalib/src/glsl/ast_to_hir.cpp mesalib/src/glsl/glcpp/glcpp-parse.y mesalib/src/glsl/glsl_parser_extras.cpp mesalib/src/glsl/glsl_parser_extras.h mesalib/src/glsl/ir.cpp mesalib/src/glsl/ir.h mesalib/src/glsl/ir_clone.cpp mesalib/src/glsl/ir_print_visitor.cpp mesalib/src/glsl/ir_validate.cpp mesalib/src/glsl/linker.cpp mesalib/src/glsl/main.cpp mesalib/src/glsl/opt_tree_grafting.cpp mesalib/src/mapi/glapi/gen/Makefile mesalib/src/mapi/glapi/gen/gl_XML.py mesalib/src/mapi/glapi/gen/gl_table.py mesalib/src/mapi/glapi/glapi_mapi_tmp.h mesalib/src/mapi/glapi/glapi_sparc.S mesalib/src/mapi/glapi/glapi_x86-64.S mesalib/src/mapi/glapi/glapi_x86.S mesalib/src/mapi/glapi/glapitemp.h mesalib/src/mapi/glapi/glprocs.h mesalib/src/mapi/mapi/u_thread.c mesalib/src/mapi/mapi/u_thread.h mesalib/src/mesa/SConscript mesalib/src/mesa/drivers/common/driverfuncs.c mesalib/src/mesa/drivers/windows/gldirect/dglcontext.c mesalib/src/mesa/main/api_arrayelt.c mesalib/src/mesa/main/bufferobj.c mesalib/src/mesa/main/compiler.h mesalib/src/mesa/main/dd.h mesalib/src/mesa/main/dlist.c mesalib/src/mesa/main/enums.c mesalib/src/mesa/main/es_generator.py mesalib/src/mesa/main/fbobject.c mesalib/src/mesa/main/imports.h mesalib/src/mesa/main/mtypes.h mesalib/src/mesa/main/pbo.c mesalib/src/mesa/main/remap_helper.h mesalib/src/mesa/main/shared.c mesalib/src/mesa/main/texgetimage.c mesalib/src/mesa/main/teximage.c mesalib/src/mesa/main/uniforms.c mesalib/src/mesa/program/ir_to_mesa.cpp mesalib/src/mesa/program/prog_optimize.c mesalib/src/mesa/program/register_allocate.c mesalib/src/mesa/program/register_allocate.h mesalib/src/mesa/sources.mak mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c mesalib/src/mesa/state_tracker/st_cb_texture.c mesalib/src/mesa/state_tracker/st_texture.h mesalib/src/mesa/swrast/s_context.c mesalib/src/mesa/tnl/t_draw.c mesalib/src/mesa/vbo/vbo_exec_array.c mesalib/src/mesa/vbo/vbo_save_api.c pixman/pixman/pixman-arm-common.h pixman/pixman/pixman-fast-path.c pixman/pixman/pixman-inlines.h pixman/test/Makefile.am pixman/test/utils.c pixman/test/utils.h xorg-server/configure.ac xorg-server/fb/fbblt.c xorg-server/hw/xquartz/darwin.c xorg-server/xkeyboard-config/symbols/de
Diffstat (limited to 'mesalib/src/mesa/program/prog_opt_constant_fold.c')
-rw-r--r--mesalib/src/mesa/program/prog_opt_constant_fold.c451
1 files changed, 451 insertions, 0 deletions
diff --git a/mesalib/src/mesa/program/prog_opt_constant_fold.c b/mesalib/src/mesa/program/prog_opt_constant_fold.c
new file mode 100644
index 000000000..e2418b554
--- /dev/null
+++ b/mesalib/src/mesa/program/prog_opt_constant_fold.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "program.h"
+#include "prog_instruction.h"
+#include "prog_optimize.h"
+#include "prog_parameter.h"
+#include <stdbool.h>
+
+static bool
+src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
+{
+ unsigned i;
+
+ for (i = 0; i < num_srcs; i++) {
+ if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
+ return false;
+ }
+
+ return true;
+}
+
+static struct prog_src_register
+src_reg_for_float(struct gl_program *prog, float val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) &val, 1, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static struct prog_src_register
+src_reg_for_vec4(struct gl_program *prog, const float *val)
+{
+ struct prog_src_register src;
+ unsigned swiz;
+
+ memset(&src, 0, sizeof(src));
+
+ src.File = PROGRAM_CONSTANT;
+ src.Index = _mesa_add_unnamed_constant(prog->Parameters,
+ (gl_constant_value *) val, 4, &swiz);
+ src.Swizzle = swiz;
+ return src;
+}
+
+static bool
+src_regs_are_same(const struct prog_src_register *a,
+ const struct prog_src_register *b)
+{
+ return (a->File == b->File)
+ && (a->Index == b->Index)
+ && (a->Swizzle == b->Swizzle)
+ && (a->Abs == b->Abs)
+ && (a->Negate == b->Negate)
+ && (a->RelAddr == 0)
+ && (b->RelAddr == 0);
+}
+
+static void
+get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
+{
+ const gl_constant_value *const value =
+ prog->Parameters->ParameterValues[r->Index];
+
+ data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
+ data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
+ data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
+ data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
+
+ if (r->Abs) {
+ data[0] = fabsf(data[0]);
+ data[1] = fabsf(data[1]);
+ data[2] = fabsf(data[2]);
+ data[3] = fabsf(data[3]);
+ }
+
+ if (r->Negate & 0x01) {
+ data[0] = -data[0];
+ }
+
+ if (r->Negate & 0x02) {
+ data[1] = -data[1];
+ }
+
+ if (r->Negate & 0x04) {
+ data[2] = -data[2];
+ }
+
+ if (r->Negate & 0x08) {
+ data[3] = -data[3];
+ }
+}
+
+/**
+ * Try to replace instructions that produce a constant result with simple moves
+ *
+ * The hope is that a following copy propagation pass will eliminate the
+ * unnecessary move instructions.
+ */
+GLboolean
+_mesa_constant_fold(struct gl_program *prog)
+{
+ bool progress = false;
+ unsigned i;
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *const inst = &prog->Instructions[i];
+
+ switch (inst->Opcode) {
+ case OPCODE_ADD:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] + b[0];
+ result[1] = a[1] + b[1];
+ result[2] = a[2] + b[2];
+ result[3] = a[3] + b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_CMP:
+ /* FINISHME: We could also optimize CMP instructions where the first
+ * FINISHME: source is a constant that is either all < 0.0 or all
+ * FINISHME: >= 0.0.
+ */
+ if (src_regs_are_constant(inst, 3)) {
+ float a[4];
+ float b[4];
+ float c[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+ get_value(prog, &inst->SrcReg[2], c);
+
+ result[0] = a[0] < 0.0f ? b[0] : c[0];
+ result[1] = a[1] < 0.0f ? b[1] : c[1];
+ result[2] = a[2] < 0.0f ? b[2] : c[2];
+ result[3] = a[3] < 0.0f ? b[3] : c[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result;
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ /* It seems like a loop could be used here, but we cleverly put
+ * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from
+ * the opcode results in various failures of the loop control.
+ */
+ result = (a[0] * b[0]) + (a[1] * b[1]);
+
+ if (inst->Opcode >= OPCODE_DP3)
+ result += a[2] * b[2];
+
+ if (inst->Opcode == OPCODE_DP4)
+ result += a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_MUL:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = a[0] * b[0];
+ result[1] = a[1] * b[1];
+ result[2] = a[2] * b[2];
+ result[3] = a[3] * b[3];
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SEQ:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SLT:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ case OPCODE_SNE:
+ if (src_regs_are_constant(inst, 2)) {
+ float a[4];
+ float b[4];
+ float result[4];
+
+ get_value(prog, &inst->SrcReg[0], a);
+ get_value(prog, &inst->SrcReg[1], b);
+
+ result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
+ result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
+ result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
+ result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_vec4(prog, result);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
+
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}