From f4092abdf94af6a99aff944d6264bc1284e8bdd4 Mon Sep 17 00:00:00 2001 From: Reinhard Tartler Date: Mon, 10 Oct 2011 17:43:39 +0200 Subject: Imported nx-X11-3.1.0-1.tar.gz Summary: Imported nx-X11-3.1.0-1.tar.gz Keywords: Imported nx-X11-3.1.0-1.tar.gz into Git repository --- .../Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c | 1090 ++++++++++++++++++++ 1 file changed, 1090 insertions(+) create mode 100644 nx-X11/extras/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c (limited to 'nx-X11/extras/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c') diff --git a/nx-X11/extras/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c b/nx-X11/extras/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c new file mode 100644 index 000000000..8750ff60c --- /dev/null +++ b/nx-X11/extras/Mesa/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -0,0 +1,1090 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Authors: + * Ben Skeggs + */ + +/*TODO'S + * + * - Implement remaining arb_f_p opcodes + * - Depth write + * - Negate on individual components (implement in swizzle code?) + * - Reuse input/temp regs, if they're no longer needed. + * - Find out whether there's any benifit in ordering registers the way + * fglrx does (see r300_reg.h). + * - Verify results of opcodes for accuracy, I've only checked them + * in specific cases. + * - and more... + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "arbfragparse.h" + +#include "program.h" +#include "nvfragprog.h" +#include "r300_context.h" +#if USE_ARB_F_P == 1 +#include "r300_fragprog.h" +#include "r300_reg.h" + +#define PFS_INVAL 0xFFFFFFFF + +static void dump_program(struct r300_fragment_program *rp); +static void emit_arith(struct r300_fragment_program *rp, int op, + pfs_reg_t dest, int mask, + pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, + int flags); + +/*************************************** + * begin: useful data structions for fragment program generation + ***************************************/ + +/* description of r300 native hw instructions */ +const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + { "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD }, + { "DP3", 2, R300_FPI0_OUTC_DP3, PFS_INVAL }, + { "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 }, + { "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN }, + { "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX }, + { "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP }, + { "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC }, + { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2 }, + { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2 }, + { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP }, + { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ }, + { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL } +}; + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table + **/ +static const struct r300_pfv_swizzle { + const char *name; + GLuint hash; /* swizzle value this matches */ + GLboolean native; + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLboolean dep_sca; +} v_swiz[] = { +/* native swizzles */ + { "xyz", MAKE_SWZ3(X, Y, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_XYZ, 4, GL_FALSE }, + { "xxx", MAKE_SWZ3(X, X, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_XXX, 4, GL_FALSE }, + { "yyy", MAKE_SWZ3(Y, Y, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_YYY, 4, GL_FALSE }, + { "zzz", MAKE_SWZ3(Z, Z, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZZZ, 4, GL_FALSE }, + { "www", MAKE_SWZ3(W, W, W), GL_TRUE, R300_FPI0_ARGC_SRC0A, 1, GL_TRUE }, + { "yzx", MAKE_SWZ3(Y, Z, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_YZX, 1, GL_FALSE }, + { "zxy", MAKE_SWZ3(Z, X, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZXY, 1, GL_FALSE }, +/* disable this for now, until I find a clean way of making sure xyz/w streams + * have a source in the same register slot.. */ +// { "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1, GL_TRUE }, +/* special cases */ + { NULL, MAKE_SWZ3(ONE, ONE, ONE), GL_FALSE, R300_FPI0_ARGC_ONE, 0, GL_FALSE}, + { NULL, MAKE_SWZ3(ZERO, ZERO, ZERO), GL_FALSE, R300_FPI0_ARGC_ZERO, 0, GL_FALSE}, + { NULL, PFS_INVAL, GL_FALSE, R300_FPI0_ARGC_HALF, 0, GL_FALSE}, + { NULL, PFS_INVAL, GL_FALSE, 0, 0, 0 }, +}; +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_WWW 4 +#define SWIZZLE_111 7 +#define SWIZZLE_000 8 +#define SWIZZLE_HHH 9 + +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +/* used during matching of non-native swizzles */ +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3}, + { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2}, + { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2}, + { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2}, + { SWZ_X_MASK, 1, 1}, + { SWZ_Y_MASK, 2, 1}, + { SWZ_Z_MASK, 4, 1}, + { PFS_INVAL, PFS_INVAL, PFS_INVAL} +}; + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +static const struct { + const char *name; + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLboolean dep_vec; +} s_swiz[] = { + { "x", R300_FPI2_ARGA_SRC0C_X, 3, GL_TRUE }, + { "y", R300_FPI2_ARGA_SRC0C_Y, 3, GL_TRUE }, + { "z", R300_FPI2_ARGA_SRC0C_Z, 3, GL_TRUE }, + { "w", R300_FPI2_ARGA_SRC0A , 1, GL_FALSE }, + { "0", R300_FPI2_ARGA_ZERO , 0, GL_FALSE }, + { "1", R300_FPI2_ARGA_ONE , 0, GL_FALSE }, + { ".5", R300_FPI2_ARGA_HALF, 0, GL_FALSE } +}; +#define SWIZZLE_HALF 6 + +/* boiler-plate reg, for convenience */ +const pfs_reg_t pfs_default_reg = { + type: REG_TYPE_TEMP, + index: 0, + v_swz: 0 /* matches XYZ in table */, + s_swz: SWIZZLE_W, + negate: 0, + valid: GL_FALSE +}; + +/* constant zero source */ +const pfs_reg_t pfs_one = { + type: REG_TYPE_CONST, + index: 0, + v_swz: SWIZZLE_111, + s_swz: SWIZZLE_ONE, + valid: GL_TRUE +}; + +/* constant one source */ +const pfs_reg_t pfs_zero = { + type: REG_TYPE_CONST, + index: 0, + v_swz: SWIZZLE_000, + s_swz: SWIZZLE_ZERO, + valid: GL_TRUE +}; + +/*************************************** + * end: data structures + ***************************************/ + +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \ + rp->error = GL_TRUE; \ +} while(0) + +static int get_hw_temp(struct r300_fragment_program *rp) +{ + int r = ffs(~rp->hwreg_in_use); + if (!r) { + ERROR("Out of hardware temps\n"); + return 0; + } + + rp->hwreg_in_use |= (1 << --r); + if (r > rp->max_temp_idx) + rp->max_temp_idx = r; + + return r; +} + +static void free_hw_temp(struct r300_fragment_program *rp, int idx) +{ + rp->hwreg_in_use &= ~(1<temp_in_use); + if (!r.index) { + ERROR("Out of program temps\n"); + return r; + } + rp->temp_in_use |= (1 << --r.index); + + rp->temps[r.index] = get_hw_temp(rp); + r.valid = GL_TRUE; + return r; +} + +static pfs_reg_t get_temp_reg_tex(struct r300_fragment_program *rp) +{ + pfs_reg_t r = pfs_default_reg; + int hwreg; + + hwreg = ffs(~(rp->hwreg_in_use | rp->used_in_node)); + if (!hwreg) { + /* Try and grab an already used temp, will + * cause an indirection, but better than failing */ + return get_temp_reg(rp); + } + if (hwreg > rp->max_temp_idx) + rp->max_temp_idx = hwreg; + + r.index = ffs(~rp->temp_in_use); + if (!r.index) { + ERROR("Out of program temps\n"); + return r; + } + rp->temp_in_use |= (1 << --r.index); + rp->temps[r.index] = --hwreg; + + r.valid = GL_TRUE; + return r; +} + +static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r) +{ + if (!rp || !(rp->temp_in_use & (1<temps[r.index]); + rp->temp_in_use &= ~(1<param_nr++; + r.index = rp->const_nr++; + if (pidx >= PFS_NUM_CONST_REGS || r.index >= PFS_NUM_CONST_REGS) { + ERROR("Out of const/param slots!\n"); + return r; + } + + rp->param[pidx].idx = r.index; + rp->param[pidx].values = values; + rp->params_uptodate = GL_FALSE; + + r.valid = GL_TRUE; + return r; +} + +static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp) +{ + pfs_reg_t r = pfs_default_reg; + r.type = REG_TYPE_CONST; + + r.index = rp->const_nr++; + if (r.index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return r; + } + + COPY_4V(rp->constant[r.index], cp); + + r.valid = GL_TRUE; + return r; +} + +static pfs_reg_t negate(pfs_reg_t r) +{ + r.negate = 1; + return r; +} + +static int swz_native(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r) +{ + /* Native swizzle, nothing to see here */ + *r = src; + return 3; +} + +static int swz_emit_partial(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r, int mask, int mc) +{ + if (!r->valid) + *r = get_temp_reg(rp); + + /* A partial match, src.v_swz/mask define what parts of the + * desired swizzle we match */ + if (mc + s_mask[mask].count == 3) + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, src, pfs_one, pfs_zero, 0); + else + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); + return s_mask[mask].count; +} + +static int swz_special_case(struct r300_fragment_program *rp, + pfs_reg_t src, pfs_reg_t *r, int mask, int mc) +{ + switch(GET_SWZ(v_swiz[src.v_swz].hash, 0)) { + case SWIZZLE_ONE: + case SWIZZLE_ZERO: + if (!r->valid) + *r = get_temp_reg(rp); + if (mc + s_mask[mask].count == 3) + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, src, pfs_one, pfs_zero, 0); + else + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); + break; + default: + ERROR("Unknown special-case swizzle! %d\n", src.v_swz); + return 0; + } + + return s_mask[mask].count; +} + +static pfs_reg_t swizzle(struct r300_fragment_program *rp, + pfs_reg_t src, + GLuint arbswz) +{ + pfs_reg_t r = pfs_default_reg; + + int c_mask = 0; + int v_matched = 0; + src.v_swz = SWIZZLE_XYZ; + src.s_swz = GET_SWZ(arbswz, 3); + + do { + do { +#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash) + if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) { + if (v_swiz[src.v_swz].native == GL_FALSE) + v_matched += swz_special_case(rp, src, &r, c_mask, v_matched); + else if (s_mask[c_mask].count == 3) + v_matched += swz_native(rp, src, &r); + else + v_matched += swz_emit_partial(rp, src, &r, c_mask, v_matched); + + if (v_matched == 3) + return r; + + /* Fill with something invalid.. all 0's was wrong before, matched + * SWIZZLE_X. So all 1's will be okay for now */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while(v_swiz[++src.v_swz].hash != PFS_INVAL); + src.v_swz = SWIZZLE_XYZ; + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static pfs_reg_t t_src(struct r300_fragment_program *rp, + struct fp_src_register fpsrc) { + pfs_reg_t r = pfs_default_reg; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + r.index = fpsrc.Index; + r.valid = GL_TRUE; + break; + case PROGRAM_INPUT: + r.index = fpsrc.Index; + r.type = REG_TYPE_INPUT; + r.valid = GL_TRUE; + break; + case PROGRAM_LOCAL_PARAM: + r = emit_param4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_param4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + r = emit_param4fv(rp, rp->mesa_program.Parameters->ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO) + r = swizzle(rp, r, fpsrc.Swizzle); + + /* WRONG! Need to be able to do individual component negation, + * should probably handle this in the swizzling code unless + * all components are negated, then we can do this natively */ + if (fpsrc.NegateBase) + r.negate = GL_TRUE; + + return r; +} + +static pfs_reg_t t_dst(struct r300_fragment_program *rp, + struct fp_dst_register dest) { + pfs_reg_t r = pfs_default_reg; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + r.index = dest.Index; + r.valid = GL_TRUE; + return r; + case PROGRAM_OUTPUT: + r.type = REG_TYPE_OUTPUT; + switch (dest.Index) { + case 0: + r.valid = GL_TRUE; + return r; + case 1: + ERROR("I don't know how to write depth!\n"); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static void sync_streams(struct r300_fragment_program *rp) { + /* Bring vector/scalar streams into sync, inserting nops into + * whatever stream is lagging behind + * + * Using NOP == MAD out.none, 0, 0, 0 + */ + while (rp->v_pos != rp->s_pos) { + if (rp->s_pos > rp->v_pos) { + rp->alu.inst[rp->v_pos].inst0 = 0x00050A14; + rp->alu.inst[rp->v_pos].inst1 = 0x00020820; + rp->v_pos++; + } else { + rp->alu.inst[rp->s_pos].inst2 = 0x00040810; + rp->alu.inst[rp->s_pos].inst3 = 0x00020820; + rp->s_pos++; + } + } +} + +static void emit_tex(struct r300_fragment_program *rp, + struct fp_instruction *fpi, + int opcode) +{ + pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]); + pfs_reg_t dest = t_dst(rp, fpi->DstReg), rdest = pfs_default_reg; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest, flags = 0; + + if (dest.type == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(rp); + } + + switch (coord.type) { + case REG_TYPE_TEMP: + hwsrc = rp->temps[coord.index]; + break; + case REG_TYPE_INPUT: + hwsrc = rp->inputs[coord.index]; + break; + case REG_TYPE_CONST: + hwsrc = coord.index; + flags = R300_FPITX_SRC_CONST; + break; + default: + ERROR("Unknown coord.type = %d\n", coord.type); + return; + } + hwdest = rp->temps[dest.index]; + + /* Indirection if source has been written in this node, or if the dest has + * been read/written in this node + */ + if ((coord.type != REG_TYPE_CONST && (rp->dest_in_node & (1<used_in_node & (1<cur_node == 3) { /* We only support 4 natively */ + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Finish off current node */ + sync_streams(rp); + rp->node[rp->cur_node].alu_end = rp->v_pos - 1; + + /* Start new node */ + rp->cur_node++; + rp->used_in_node = 0; + rp->dest_in_node = 0; + rp->node[rp->cur_node].tex_offset = rp->tex.length; + rp->node[rp->cur_node].alu_offset = rp->v_pos; + rp->node[rp->cur_node].tex_end = -1; + rp->node[rp->cur_node].alu_end = -1; + } + + if (rp->cur_node == 0) rp->first_node_has_tex = 1; + + rp->tex.inst[rp->tex.length++] = 0 + | (hwsrc << R300_FPITX_SRC_SHIFT) + | (hwdest << R300_FPITX_DST_SHIFT) + | (unit << R300_FPITX_IMAGE_SHIFT) + | (opcode << R300_FPITX_OPCODE_SHIFT) /* not entirely sure about this */ + | flags; + rp->dest_in_node |= (1 << hwdest); + if (coord.type != REG_TYPE_CONST) + rp->used_in_node |= (1 << hwsrc); + + rp->node[rp->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (rdest.valid) { + emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest, pfs_one, pfs_zero, 0); + free_temp(rp, dest); + } +} + +#define ARG_NEG (1<<5) +#define ARG_ABS (1<<6) +#define SRC_CONST (1<<5) +#define SRC_STRIDE 6 + +static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src) +{ + int idx; + + switch (src.type) { + case REG_TYPE_TEMP: + idx = rp->temps[src.index]; + break; + case REG_TYPE_INPUT: + idx = rp->inputs[src.index]; + break; + case REG_TYPE_CONST: + return (src.index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + rp->used_in_node |= (1 << idx); + return idx; +} + +/* Add sources to FPI1/FPI3 lists. If source is already on list, + * reuse the index instead of wasting a source. + */ +static inline int add_src(int src[3], int *cnt, int reg) { + int i; + + for (i=0;i<*cnt;i++) + if (src[i] == reg) return i; + + if (*cnt == 3) assert(0); /* I don't *think* this can happen */ + + src[*cnt] = reg; + return (*cnt)++; +} + +static void emit_arith(struct r300_fragment_program *rp, int op, + pfs_reg_t dest, int mask, + pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, + int flags) +{ + pfs_reg_t src[3] = { src0, src1, src2 }; + /* XYZ/W emit control */ + int v_idx = rp->v_pos, s_idx = rp->s_pos; + GLboolean emit_v = GL_FALSE, emit_s = GL_FALSE; + /* INST1/INST3 sources */ + int vsrc[3], ssrc[3]; + int nvs = 0, nss = 0; + /* INST0/INST2 sources */ + int vswz[3], sswz[3]; + /* temp stuff */ + int hwdest, hwsrc; + int argc; + int vop, sop; + int i; + int str; + + if (!dest.valid || !src0.valid || !src1.valid || !src2.valid) { + ERROR("invalid register. dest/src0/src1/src2 valid = %d/%d/%d/%d\n", + dest.valid, src0.valid, src1.valid, src2.valid); + return; + } + + /* check opcode */ + if (op > MAX_PFS_OP) { + ERROR("unknown opcode!\n"); + return; + } + argc = r300_fpop[op].argc; + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + + /* grab hwregs of dest */ + switch (dest.type) { + case REG_TYPE_TEMP: + hwdest = rp->temps[dest.index]; + rp->dest_in_node |= (1 << hwdest); + rp->used_in_node |= (1 << hwdest); + break; + case REG_TYPE_OUTPUT: + hwdest = 0; + break; + default: + ERROR("invalid dest reg type %d\n", dest.type); + return; + } + + for (i=0;i<3;i++) { + if (iv_pos; + emit_s = GL_TRUE; + str = add_src(ssrc, &nss, hwsrc); + } else + str = add_src(vsrc, &nvs, hwsrc); + vswz[i] = v_swiz[src[i].v_swz].base + (str * v_swiz[src[i].v_swz].stride); + } else + vswz[i] = R300_FPI0_ARGC_ZERO; + + if (mask & WRITEMASK_W || vop == R300_FPI0_OUTC_REPL_ALPHA) { + if (s_swiz[src[i].s_swz].dep_vec) { + sync_streams(rp); + v_idx = s_idx = rp->v_pos; + emit_v = GL_TRUE; + str = add_src(vsrc, &nvs, hwsrc); + } else + str = add_src(ssrc, &nss, hwsrc); + sswz[i] = s_swiz[src[i].s_swz].base + (str * s_swiz[src[i].s_swz].stride); + } else + sswz[i] = R300_FPI2_ARGA_ZERO; + + if (src[i].negate) { + vswz[i] |= ARG_NEG; + sswz[i] |= ARG_NEG; + } + + if (flags & PFS_FLAG_ABS) { + vswz[i] |= ARG_ABS; + sswz[i] |= ARG_ABS; + } + } else { + vswz[i] = R300_FPI0_ARGC_ZERO; + sswz[i] = R300_FPI2_ARGA_ZERO; + } + } + /* Unused sources, read constant reg 0 */ + for (i=nvs;i<3;i++) + vsrc[i] = 0 | SRC_CONST; + for (i=nss;i<3;i++) + ssrc[i] = 0 | SRC_CONST; + + if (flags & PFS_FLAG_SAT) { + vop |= R300_FPI0_OUTC_SAT; + sop |= R300_FPI2_OUTA_SAT; + } + + if (mask & WRITEMASK_XYZ || emit_v) { + if (r300_fpop[op].v_op == R300_FPI0_OUTC_REPL_ALPHA) { + sync_streams(rp); + s_idx = v_idx = rp->v_pos; + } + rp->alu.inst[v_idx].inst0 = vop | + vswz[0] << R300_FPI0_ARG0C_SHIFT | + vswz[1] << R300_FPI0_ARG1C_SHIFT | + vswz[2] << R300_FPI0_ARG2C_SHIFT; + rp->alu.inst[v_idx].inst1 = hwdest << R300_FPI1_DSTC_SHIFT | + vsrc[0] << R300_FPI1_SRC0C_SHIFT | + vsrc[1] << R300_FPI1_SRC1C_SHIFT | + vsrc[2] << R300_FPI1_SRC2C_SHIFT | + ((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23)); + rp->v_pos = v_idx + 1; + } + + if (mask & WRITEMASK_W || emit_s || vop == R300_FPI0_OUTC_REPL_ALPHA) { + rp->alu.inst[s_idx].inst2 = sop | + sswz[0] << R300_FPI2_ARG0A_SHIFT | + sswz[1] << R300_FPI2_ARG1A_SHIFT | + sswz[2] << R300_FPI2_ARG2A_SHIFT; + rp->alu.inst[s_idx].inst3 = hwdest << R300_FPI3_DSTA_SHIFT | + ssrc[0] << R300_FPI3_SRC0A_SHIFT | + ssrc[1] << R300_FPI3_SRC1A_SHIFT | + ssrc[2] << R300_FPI3_SRC2A_SHIFT | + (((mask & WRITEMASK_W)?1:0) << (dest.type == REG_TYPE_OUTPUT ? 24 : 23)); + rp->s_pos = s_idx + 1; + } + +/* sync_streams(rp); */ + return; +}; + +static GLboolean parse_program(struct r300_fragment_program *rp) +{ + struct fragment_program *mp = &rp->mesa_program; + const struct fp_instruction *inst = mp->Instructions; + struct fp_instruction *fpi; + pfs_reg_t src0, src1, src2, dest, temp; + int flags = 0; + + if (!inst || inst[0].Opcode == FP_OPCODE_END) { + ERROR("empty program?\n"); + return GL_FALSE; + } + + for (fpi=mp->Instructions; fpi->Opcode != FP_OPCODE_END; fpi++) { + if (fpi->Saturate) { + flags = PFS_FLAG_SAT; + } + + switch (fpi->Opcode) { + case FP_OPCODE_ABS: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_ADD: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + pfs_one, + t_src(rp, fpi->SrcReg[1]), + flags); + break; + case FP_OPCODE_CMP: + case FP_OPCODE_COS: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_DP3: + dest = t_dst(rp, fpi->DstReg); + if (fpi->DstReg.WriteMask & WRITEMASK_W) { + /* I assume these need to share the same alu slot */ + sync_streams(rp); + emit_arith(rp, PFS_OP_DP4, dest, WRITEMASK_W, + pfs_zero, pfs_zero, pfs_zero, + flags); + } + emit_arith(rp, PFS_OP_DP3, t_dst(rp, fpi->DstReg), + fpi->DstReg.WriteMask & WRITEMASK_XYZ, + t_src(rp, fpi->SrcReg[0]), + t_src(rp, fpi->SrcReg[1]), + pfs_zero, flags); + break; + case FP_OPCODE_DP4: + case FP_OPCODE_DPH: + case FP_OPCODE_DST: + case FP_OPCODE_EX2: + case FP_OPCODE_FLR: + case FP_OPCODE_FRC: + case FP_OPCODE_KIL: + case FP_OPCODE_LG2: + case FP_OPCODE_LIT: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_LRP: + /* TODO: use the special LRP form if possible */ + src0 = t_src(rp, fpi->SrcReg[0]); + src1 = t_src(rp, fpi->SrcReg[1]); + src2 = t_src(rp, fpi->SrcReg[2]); + // result = tmp0tmp1 + (1 - tmp0)tmp2 + // = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + // MAD temp, -tmp0, tmp2, tmp2 + // MAD result, tmp0, tmp1, temp + temp = get_temp_reg(rp); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW, + negate(src0), src2, src2, 0); + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + src0, src1, temp, flags); + free_temp(rp, temp); + break; + case FP_OPCODE_MAD: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + t_src(rp, fpi->SrcReg[1]), + t_src(rp, fpi->SrcReg[2]), + flags); + break; + case FP_OPCODE_MAX: + case FP_OPCODE_MIN: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_MOV: + case FP_OPCODE_SWZ: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), pfs_one, pfs_zero, + flags); + break; + case FP_OPCODE_MUL: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + t_src(rp, fpi->SrcReg[1]), + pfs_zero, + flags); + break; + case FP_OPCODE_POW: + src0 = t_src(rp, fpi->SrcReg[0]); + src1 = t_src(rp, fpi->SrcReg[1]); + dest = t_dst(rp, fpi->DstReg); + temp = get_temp_reg(rp); + + emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, + src0, pfs_zero, pfs_zero, 0); + emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, + temp, src1, pfs_zero, 0); + emit_arith(rp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp, pfs_zero, pfs_zero, 0); + free_temp(rp, temp); + break; + case FP_OPCODE_RCP: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_RSQ: + emit_arith(rp, PFS_OP_RSQ, t_dst(rp, fpi->DstReg), + fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), pfs_zero, pfs_zero, + flags | PFS_FLAG_ABS); + break; + case FP_OPCODE_SCS: + case FP_OPCODE_SGE: + case FP_OPCODE_SIN: + case FP_OPCODE_SLT: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + case FP_OPCODE_SUB: + emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, + t_src(rp, fpi->SrcReg[0]), + pfs_one, + negate(t_src(rp, fpi->SrcReg[1])), + flags); + break; + case FP_OPCODE_TEX: + emit_tex(rp, fpi, R300_FPITX_OP_TEX); + break; + case FP_OPCODE_TXB: + emit_tex(rp, fpi, R300_FPITX_OP_TXB); + break; + case FP_OPCODE_TXP: + emit_tex(rp, fpi, R300_FPITX_OP_TXP); + break; + case FP_OPCODE_XPD: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } + + if (rp->error) + return GL_FALSE; + } + + return GL_TRUE; +} + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(struct r300_fragment_program *rp) +{ + struct fragment_program *mp = &rp->mesa_program; + struct fp_instruction *fpi; + GLuint InputsRead = mp->InputsRead; + GLuint fp_reg = 0; + GLuint temps_used = 0; /* for rp->temps[] */ + int i; + + rp->translated = GL_FALSE; + rp->error = GL_FALSE; + + rp->v_pos = 0; + rp->s_pos = 0; + + rp->tex.length = 0; + rp->node[0].alu_offset = 0; + rp->node[0].alu_end = -1; + rp->node[0].tex_offset = 0; + rp->node[0].tex_end = -1; + rp->cur_node = 0; + rp->first_node_has_tex = 0; + rp->used_in_node = 0; + rp->dest_in_node = 0; + + rp->const_nr = 0; + rp->param_nr = 0; + rp->params_uptodate = GL_FALSE; + + rp->temp_in_use = 0; + rp->hwreg_in_use = 0; + rp->max_temp_idx = 0; + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * I'm using get_hw_temp() here now rather than doing this manually. + * This depends on get_hw_temp() allocating registers in order, starting + * at 0 (which it currently does). + */ + + /* Texcoords come first */ + for (i=0;ictx->Const.MaxTextureUnits;i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) + rp->inputs[FRAG_ATTRIB_TEX0+i] = get_hw_temp(rp); + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) + rp->inputs[FRAG_ATTRIB_COL0] = get_hw_temp(rp); + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) + rp->inputs[FRAG_ATTRIB_COL1] = get_hw_temp(rp); + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i=0;i<32;i++) + if (InputsRead & (1<inputs[i] = 0; + } + + /* Possibly the worst part of how I went about this... Find out what + * temps are used by the mesa program so we don't clobber something + * when we need a temp for other reasons. + * + * Possibly not too bad actually, as we could add to this later and + * find out when inputs are last used so we can reuse them as temps. + */ + if (!mp->Instructions) { + ERROR("No instructions found in program\n"); + return; + } + for (fpi=mp->Instructions;fpi->Opcode != FP_OPCODE_END; fpi++) { + for (i=0;i<3;i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << fpi->SrcReg[i].Index))) { + temps_used |= (1 << fpi->SrcReg[i].Index); + rp->temps[fpi->SrcReg[i].Index] = get_hw_temp(rp); + } + } + } + /* needed? surely if a program writes a temp it'll read it again */ + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << fpi->DstReg.Index))) { + temps_used |= (1 << fpi->DstReg.Index); + rp->temps[fpi->DstReg.Index] = get_hw_temp(rp); + } + } + } + rp->temp_in_use = temps_used; +} + +static void update_params(struct r300_fragment_program *rp) { + struct fragment_program *mp = &rp->mesa_program; + int i; + + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (rp->param_nr) + _mesa_load_state_parameters(rp->ctx, mp->Parameters); + + for (i=0;iparam_nr;i++) + COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values); + + rp->params_uptodate = GL_TRUE; +} + +void translate_fragment_shader(struct r300_fragment_program *rp) +{ + int i; + + if (!rp->translated) { + init_program(rp); + + if (parse_program(rp) == GL_FALSE) { + dump_program(rp); + return; + } + + /* Finish off */ + sync_streams(rp); + rp->node[rp->cur_node].alu_end = rp->v_pos - 1; + rp->alu_offset = 0; + rp->alu_end = rp->v_pos - 1; + rp->tex_offset = 0; + rp->tex_end = rp->tex.length - 1; + + rp->translated = GL_TRUE; + if (0) dump_program(rp); + } + + + update_params(rp); +} + +/* just some random things... */ +static void dump_program(struct r300_fragment_program *rp) +{ + int i; + static int pc = 0; + + fprintf(stderr, "pc=%d*************************************\n", pc++); + + fprintf(stderr, "Mesa program:\n"); + fprintf(stderr, "-------------\n"); + _mesa_debug_fp_inst(rp->mesa_program.NumTexInstructions + + rp->mesa_program.NumAluInstructions, + rp->mesa_program.Instructions); + fflush(stdout); + + fprintf(stderr, "Hardware program\n"); + fprintf(stderr, "----------------\n"); + for (i=0;i<(rp->cur_node+1);i++) { + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, alu_end: %d, tex_end: %d\n", i, + rp->node[i].alu_offset, + rp->node[i].tex_offset, + rp->node[i].alu_end, + rp->node[i].tex_end); + } + +/* dump program in pretty_print_command_stream.tcl-readable format */ + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2); + fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2))); + for (i=0;i<=rp->alu_end;i++) + fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3); + fprintf(stderr, "00000000\n"); + +} +#endif // USE_ARB_F_P == 1 -- cgit v1.2.3