diff options
author | marha <marha@users.sourceforge.net> | 2011-08-29 14:48:31 +0200 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2011-08-29 14:48:31 +0200 |
commit | 924a6ffc82a7af0ca311af711ea2cf973dc6ff15 (patch) | |
tree | f5ee6d0aaf321029a4abb12c3c051c0434dde308 /mesalib/src/mesa/program/prog_opt_constant_fold.c | |
parent | 8e57a7fcbf736b8e437baf0bbccb93095d97617f (diff) | |
parent | 01df5d59e56a1b060568f8cad2e89f7eea22fc70 (diff) | |
download | vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.gz vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.tar.bz2 vcxsrv-924a6ffc82a7af0ca311af711ea2cf973dc6ff15.zip |
Merge remote-tracking branch 'origin/released'
Conflicts:
apps/xwininfo/configure.ac
apps/xwininfo/xwininfo.c
libX11/configure.ac
libX11/specs/libX11/AppC.xml
libX11/specs/libX11/AppD.xml
libX11/specs/libX11/CH03.xml
libX11/specs/libX11/CH04.xml
libX11/specs/libX11/CH05.xml
libX11/specs/libX11/CH06.xml
libX11/specs/libX11/CH07.xml
libX11/specs/libX11/CH08.xml
libX11/specs/libX11/CH09.xml
libX11/specs/libX11/CH11.xml
libX11/specs/libX11/CH12.xml
libX11/specs/libX11/CH13.xml
libX11/specs/libX11/CH14.xml
libX11/specs/libX11/CH15.xml
libX11/specs/libX11/CH16.xml
libxcb/configure.ac
libxcb/src/c_client.py
libxcb/src/xcb_auth.c
libxcb/src/xcb_util.c
mesalib/common.py
mesalib/configs/linux-dri
mesalib/docs/GL3.txt
mesalib/docs/download.html
mesalib/docs/install.html
mesalib/include/GL/internal/dri_interface.h
mesalib/scons/custom.py
mesalib/scons/gallium.py
mesalib/src/gallium/auxiliary/util/u_math.h
mesalib/src/gallium/auxiliary/util/u_vbuf_mgr.c
mesalib/src/glsl/ast_function.cpp
mesalib/src/glsl/ast_to_hir.cpp
mesalib/src/glsl/glcpp/glcpp-parse.y
mesalib/src/glsl/glsl_parser_extras.cpp
mesalib/src/glsl/glsl_parser_extras.h
mesalib/src/glsl/ir.cpp
mesalib/src/glsl/ir.h
mesalib/src/glsl/ir_clone.cpp
mesalib/src/glsl/ir_print_visitor.cpp
mesalib/src/glsl/ir_validate.cpp
mesalib/src/glsl/linker.cpp
mesalib/src/glsl/main.cpp
mesalib/src/glsl/opt_tree_grafting.cpp
mesalib/src/mapi/glapi/gen/Makefile
mesalib/src/mapi/glapi/gen/gl_XML.py
mesalib/src/mapi/glapi/gen/gl_table.py
mesalib/src/mapi/glapi/glapi_mapi_tmp.h
mesalib/src/mapi/glapi/glapi_sparc.S
mesalib/src/mapi/glapi/glapi_x86-64.S
mesalib/src/mapi/glapi/glapi_x86.S
mesalib/src/mapi/glapi/glapitemp.h
mesalib/src/mapi/glapi/glprocs.h
mesalib/src/mapi/mapi/u_thread.c
mesalib/src/mapi/mapi/u_thread.h
mesalib/src/mesa/SConscript
mesalib/src/mesa/drivers/common/driverfuncs.c
mesalib/src/mesa/drivers/windows/gldirect/dglcontext.c
mesalib/src/mesa/main/api_arrayelt.c
mesalib/src/mesa/main/bufferobj.c
mesalib/src/mesa/main/compiler.h
mesalib/src/mesa/main/dd.h
mesalib/src/mesa/main/dlist.c
mesalib/src/mesa/main/enums.c
mesalib/src/mesa/main/es_generator.py
mesalib/src/mesa/main/fbobject.c
mesalib/src/mesa/main/imports.h
mesalib/src/mesa/main/mtypes.h
mesalib/src/mesa/main/pbo.c
mesalib/src/mesa/main/remap_helper.h
mesalib/src/mesa/main/shared.c
mesalib/src/mesa/main/texgetimage.c
mesalib/src/mesa/main/teximage.c
mesalib/src/mesa/main/uniforms.c
mesalib/src/mesa/program/ir_to_mesa.cpp
mesalib/src/mesa/program/prog_optimize.c
mesalib/src/mesa/program/register_allocate.c
mesalib/src/mesa/program/register_allocate.h
mesalib/src/mesa/sources.mak
mesalib/src/mesa/state_tracker/st_cb_bufferobjects.c
mesalib/src/mesa/state_tracker/st_cb_texture.c
mesalib/src/mesa/state_tracker/st_texture.h
mesalib/src/mesa/swrast/s_context.c
mesalib/src/mesa/tnl/t_draw.c
mesalib/src/mesa/vbo/vbo_exec_array.c
mesalib/src/mesa/vbo/vbo_save_api.c
pixman/pixman/pixman-arm-common.h
pixman/pixman/pixman-fast-path.c
pixman/pixman/pixman-inlines.h
pixman/test/Makefile.am
pixman/test/utils.c
pixman/test/utils.h
xorg-server/configure.ac
xorg-server/fb/fbblt.c
xorg-server/hw/xquartz/darwin.c
xorg-server/xkeyboard-config/symbols/de
Diffstat (limited to 'mesalib/src/mesa/program/prog_opt_constant_fold.c')
-rw-r--r-- | mesalib/src/mesa/program/prog_opt_constant_fold.c | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/mesalib/src/mesa/program/prog_opt_constant_fold.c b/mesalib/src/mesa/program/prog_opt_constant_fold.c new file mode 100644 index 000000000..e2418b554 --- /dev/null +++ b/mesalib/src/mesa/program/prog_opt_constant_fold.c @@ -0,0 +1,451 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "program.h" +#include "prog_instruction.h" +#include "prog_optimize.h" +#include "prog_parameter.h" +#include <stdbool.h> + +static bool +src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) +{ + unsigned i; + + for (i = 0; i < num_srcs; i++) { + if (inst->SrcReg[i].File != PROGRAM_CONSTANT) + return false; + } + + return true; +} + +static struct prog_src_register +src_reg_for_float(struct gl_program *prog, float val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) &val, 1, &swiz); + src.Swizzle = swiz; + return src; +} + +static struct prog_src_register +src_reg_for_vec4(struct gl_program *prog, const float *val) +{ + struct prog_src_register src; + unsigned swiz; + + memset(&src, 0, sizeof(src)); + + src.File = PROGRAM_CONSTANT; + src.Index = _mesa_add_unnamed_constant(prog->Parameters, + (gl_constant_value *) val, 4, &swiz); + src.Swizzle = swiz; + return src; +} + +static bool +src_regs_are_same(const struct prog_src_register *a, + const struct prog_src_register *b) +{ + return (a->File == b->File) + && (a->Index == b->Index) + && (a->Swizzle == b->Swizzle) + && (a->Abs == b->Abs) + && (a->Negate == b->Negate) + && (a->RelAddr == 0) + && (b->RelAddr == 0); +} + +static void +get_value(struct gl_program *prog, struct prog_src_register *r, float *data) +{ + const gl_constant_value *const value = + prog->Parameters->ParameterValues[r->Index]; + + data[0] = value[GET_SWZ(r->Swizzle, 0)].f; + data[1] = value[GET_SWZ(r->Swizzle, 1)].f; + data[2] = value[GET_SWZ(r->Swizzle, 2)].f; + data[3] = value[GET_SWZ(r->Swizzle, 3)].f; + + if (r->Abs) { + data[0] = fabsf(data[0]); + data[1] = fabsf(data[1]); + data[2] = fabsf(data[2]); + data[3] = fabsf(data[3]); + } + + if (r->Negate & 0x01) { + data[0] = -data[0]; + } + + if (r->Negate & 0x02) { + data[1] = -data[1]; + } + + if (r->Negate & 0x04) { + data[2] = -data[2]; + } + + if (r->Negate & 0x08) { + data[3] = -data[3]; + } +} + +/** + * Try to replace instructions that produce a constant result with simple moves + * + * The hope is that a following copy propagation pass will eliminate the + * unnecessary move instructions. + */ +GLboolean +_mesa_constant_fold(struct gl_program *prog) +{ + bool progress = false; + unsigned i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *const inst = &prog->Instructions[i]; + + switch (inst->Opcode) { + case OPCODE_ADD: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] + b[0]; + result[1] = a[1] + b[1]; + result[2] = a[2] + b[2]; + result[3] = a[3] + b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_CMP: + /* FINISHME: We could also optimize CMP instructions where the first + * FINISHME: source is a constant that is either all < 0.0 or all + * FINISHME: >= 0.0. + */ + if (src_regs_are_constant(inst, 3)) { + float a[4]; + float b[4]; + float c[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + get_value(prog, &inst->SrcReg[2], c); + + result[0] = a[0] < 0.0f ? b[0] : c[0]; + result[1] = a[1] < 0.0f ? b[1] : c[1]; + result[2] = a[2] < 0.0f ? b[2] : c[2]; + result[3] = a[3] < 0.0f ? b[3] : c[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + /* It seems like a loop could be used here, but we cleverly put + * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from + * the opcode results in various failures of the loop control. + */ + result = (a[0] * b[0]) + (a[1] * b[1]); + + if (inst->Opcode >= OPCODE_DP3) + result += a[2] * b[2]; + + if (inst->Opcode == OPCODE_DP4) + result += a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_MUL: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = a[0] * b[0]; + result[1] = a[1] * b[1]; + result[2] = a[2] * b[2]; + result[3] = a[3] * b[3]; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SEQ: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SGT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SLT: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + case OPCODE_SNE: + if (src_regs_are_constant(inst, 2)) { + float a[4]; + float b[4]; + float result[4]; + + get_value(prog, &inst->SrcReg[0], a); + get_value(prog, &inst->SrcReg[1], b); + + result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; + result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; + result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; + result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_vec4(prog, result); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); + + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + + progress = true; + } + break; + + default: + break; + } + } + + return progress; +} |