/* * * Copyright (C) 2004 David Airlie All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "glheader.h" #include "colormac.h" #include "context.h" #include "atifragshader.h" #include "macros.h" #include "program.h" #include "s_atifragshader.h" #include "s_nvfragprog.h" #include "s_span.h" #include "s_texture.h" /** * Fetch a texel. */ static void fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda, GLuint unit, GLfloat color[4]) { GLchan rgba[4]; SWcontext *swrast = SWRAST_CONTEXT(ctx); /* XXX use a float-valued TextureSample routine here!!! */ swrast->TextureSample[unit] (ctx, unit, ctx->Texture.Unit[unit]._Current, 1, (const GLfloat(*)[4]) texcoord, &lambda, &rgba); color[0] = CHAN_TO_FLOAT(rgba[0]); color[1] = CHAN_TO_FLOAT(rgba[1]); color[2] = CHAN_TO_FLOAT(rgba[2]); color[3] = CHAN_TO_FLOAT(rgba[3]); } static void apply_swizzle(struct atifs_machine *machine, GLuint reg, GLuint swizzle) { GLfloat s, t, r, q; s = machine->Registers[reg][0]; t = machine->Registers[reg][1]; r = machine->Registers[reg][2]; q = machine->Registers[reg][3]; switch (swizzle) { case GL_SWIZZLE_STR_ATI: machine->Registers[reg][0] = s; machine->Registers[reg][1] = t; machine->Registers[reg][2] = r; break; case GL_SWIZZLE_STQ_ATI: machine->Registers[reg][0] = s; machine->Registers[reg][1] = t; machine->Registers[reg][2] = q; break; case GL_SWIZZLE_STR_DR_ATI: machine->Registers[reg][0] = s / r; machine->Registers[reg][1] = t / r; machine->Registers[reg][2] = 1 / r; break; case GL_SWIZZLE_STQ_DQ_ATI: machine->Registers[reg][0] = s / q; machine->Registers[reg][1] = t / q; machine->Registers[reg][2] = 1 / q; break; } machine->Registers[reg][3] = 0.0; } static void apply_src_rep(GLint optype, GLuint rep, GLfloat * val) { GLint i; GLint start, end; if (!rep) return; start = optype ? 3 : 0; end = optype ? 4 : 3; for (i = start; i < end; i++) { switch (rep) { case GL_RED: val[i] = val[0]; break; case GL_GREEN: val[i] = val[1]; break; case GL_BLUE: val[i] = val[2]; break; case GL_ALPHA: val[i] = val[3]; break; } } } static void apply_src_mod(GLint optype, GLuint mod, GLfloat * val) { GLint i; GLint start, end; if (!mod) return; start = optype ? 3 : 0; end = optype ? 4 : 3; for (i = start; i < end; i++) { if (mod & GL_COMP_BIT_ATI) val[i] = 1 - val[i]; if (mod & GL_BIAS_BIT_ATI) val[i] = val[i] - 0.5; if (mod & GL_2X_BIT_ATI) val[i] = 2 * val[i]; if (mod & GL_NEGATE_BIT_ATI) val[i] = -val[i]; } } static void apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val) { GLint i; GLint has_sat = mod & GL_SATURATE_BIT_ATI; GLint start, end; mod &= ~GL_SATURATE_BIT_ATI; start = optype ? 3 : 0; end = optype ? 4 : 3; for (i = start; i < end; i++) { switch (mod) { case GL_2X_BIT_ATI: val[i] = 2 * val[i]; break; case GL_4X_BIT_ATI: val[i] = 4 * val[i]; break; case GL_8X_BIT_ATI: val[i] = 8 * val[i]; break; case GL_HALF_BIT_ATI: val[i] = val[i] * 0.5; break; case GL_QUARTER_BIT_ATI: val[i] = val[i] * 0.25; break; case GL_EIGHTH_BIT_ATI: val[i] = val[i] * 0.125; break; } if (has_sat) { if (val[i] < 0.0) val[i] = 0; else if (val[i] > 1.0) val[i] = 1.0; } else { if (val[i] < -8.0) val[i] = -8.0; else if (val[i] > 8.0) val[i] = 8.0; } } } static void write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src, GLfloat * dst) { GLint i; apply_dst_mod(optype, mod, src); if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) { if (mask) { if (mask & GL_RED_BIT_ATI) dst[0] = src[0]; if (mask & GL_GREEN_BIT_ATI) dst[1] = src[1]; if (mask & GL_BLUE_BIT_ATI) dst[2] = src[2]; } else { for (i = 0; i < 3; i++) dst[i] = src[i]; } } else dst[3] = src[3]; } static void finish_pass(struct atifs_machine *machine) { GLint i; for (i = 0; i < 6; i++) { COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]); } } /** * Execute the given fragment shader * NOTE: we do everything in single-precision floating point; we don't * currently observe the single/half/fixed-precision qualifiers. * \param ctx - rendering context * \param program - the fragment program to execute * \param machine - machine state (register file) * \param maxInst - max number of instructions to execute * \return GL_TRUE if program completed or GL_FALSE if program executed KIL. */ struct ati_fs_opcode_st ati_fs_opcodes[] = { {GL_ADD_ATI, 2}, {GL_SUB_ATI, 2}, {GL_MUL_ATI, 2}, {GL_MAD_ATI, 3}, {GL_LERP_ATI, 3}, {GL_MOV_ATI, 1}, {GL_CND_ATI, 3}, {GL_CND0_ATI, 3}, {GL_DOT2_ADD_ATI, 3}, {GL_DOT3_ATI, 2}, {GL_DOT4_ATI, 2} }; static void handle_pass_op(struct atifs_machine *machine, struct atifs_instruction *inst, const struct sw_span *span, GLuint column) { GLuint idx = inst->DstReg[0].Index - GL_REG_0_ATI; GLuint swizzle = inst->DstReg[0].Swizzle; GLuint pass_tex = inst->SrcReg[0][0].Index; /* if we get here after passing pass one then we are starting pass two - backup the registers */ if (machine->pass == 1) { finish_pass(machine); machine->pass = 2; } if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { pass_tex -= GL_TEXTURE0_ARB; COPY_4V(machine->Registers[idx], span->array->texcoords[pass_tex][column]); } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI && machine->pass == 2) { pass_tex -= GL_REG_0_ATI; COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]); } apply_swizzle(machine, idx, swizzle); } static void handle_sample_op(GLcontext * ctx, struct atifs_machine *machine, struct atifs_instruction *inst, const struct sw_span *span, GLuint column) { GLuint idx = inst->DstReg[0].Index - GL_REG_0_ATI; GLuint swizzle = inst->DstReg[0].Swizzle; GLuint sample_tex = inst->SrcReg[0][0].Index; /* if we get here after passing pass one then we are starting pass two - backup the registers */ if (machine->pass == 1) { finish_pass(machine); machine->pass = 2; } if (sample_tex >= GL_TEXTURE0_ARB && sample_tex <= GL_TEXTURE7_ARB) { sample_tex -= GL_TEXTURE0_ARB; fetch_texel(ctx, span->array->texcoords[sample_tex][column], 0.0F, sample_tex, machine->Registers[idx]); } else if (sample_tex >= GL_REG_0_ATI && sample_tex <= GL_REG_5_ATI) { /* this is wrong... */ sample_tex -= GL_REG_0_ATI; fetch_texel(ctx, machine->Registers[sample_tex], 0, sample_tex, machine->Registers[idx]); } apply_swizzle(machine, idx, swizzle); } #define SETUP_SRC_REG(optype, i, x) do { \ if (optype) \ src[optype][i][3] = x[3]; \ else \ COPY_3V(src[optype][i], x); \ } while (0) static GLboolean execute_shader(GLcontext * ctx, const struct ati_fragment_shader *shader, GLuint maxInst, struct atifs_machine *machine, const struct sw_span *span, GLuint column) { GLuint pc; struct atifs_instruction *inst; GLint optype; GLint i; GLint dstreg; GLfloat src[2][3][4]; GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 }; GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 }; GLfloat dst[2][4], *dstp; for (pc = 0; pc < shader->Base.NumInstructions; pc++) { inst = &shader->Instructions[pc]; if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_PASS_OP) handle_pass_op(machine, inst, span, column); else if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_SAMPLE_OP) handle_sample_op(ctx, machine, inst, span, column); else { if (machine->pass == 0) machine->pass = 1; /* setup the source registers for color and alpha ops */ for (optype = 0; optype < 2; optype++) { for (i = 0; i < inst->ArgCount[optype]; i++) { GLint index = inst->SrcReg[optype][i].Index; if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI) SETUP_SRC_REG(optype, i, machine->Registers[index - GL_REG_0_ATI]); else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) SETUP_SRC_REG(optype, i, shader->Constants[index - GL_CON_0_ATI]); else if (index == GL_ONE) SETUP_SRC_REG(optype, i, ones); else if (index == GL_ZERO) SETUP_SRC_REG(optype, i, zeros); else if (index == GL_PRIMARY_COLOR_EXT) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_PRIMARY]); else if (index == GL_SECONDARY_INTERPOLATOR_ATI) SETUP_SRC_REG(optype, i, machine->Inputs[ATI_FS_INPUT_SECONDARY]); apply_src_rep(optype, inst->SrcReg[optype][i].argRep, src[optype][i]); apply_src_mod(optype, inst->SrcReg[optype][i].argMod, src[optype][i]); } } /* Execute the operations - color then alpha */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { switch (inst->Opcode[optype]) { case GL_ADD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] + src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] + src[optype][1][3]; break; case GL_SUB_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] - src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] - src[optype][1][3]; break; case GL_MUL_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3]; break; case GL_MAD_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + src[optype][2][3]; break; case GL_LERP_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i] * src[optype][1][i] + (1 - src [optype] [0][i]) * src[optype][2][i]; } else dst[optype][3] = src[optype][0][3] * src[optype][1][3] + (1 - src[optype] [0][3]) * src[optype][2][3]; break; case GL_MOV_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = src[optype][0][i]; } else dst[optype][3] = src[optype][0][3]; break; case GL_CND_ATI: if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] > 0.5) ? src[optype][0][i] : src[optype][1][i]; } } else { dst[optype][3] = (src[optype][2][3] > 0.5) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_CND0_ATI: if (!optype) for (i = 0; i < 3; i++) { dst[optype][i] = (src[optype][2][i] >= 0) ? src[optype][0][i] : src[optype][1][i]; } else { dst[optype][3] = (src[optype][2][3] >= 0) ? src[optype][0][3] : src[optype][1][3]; } break; case GL_DOT2_ADD_ATI: { GLfloat result; /* DOT 2 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][2][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT3_ATI: { GLfloat result; /* DOT 3 always uses the source from the color op */ result = src[0][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; case GL_DOT4_ATI: { GLfloat result; /* DOT 4 always uses the source from the color op */ result = src[optype][0][0] * src[0][1][0] + src[0][0][1] * src[0][1][1] + src[0][0][2] * src[0][1][2] + src[0][0][3] * src[0][1][3]; if (!optype) { for (i = 0; i < 3; i++) { dst[optype][i] = result; } } else dst[optype][3] = result; } break; } } } /* write out the destination registers */ for (optype = 0; optype < 2; optype++) { if (inst->Opcode[optype]) { dstreg = inst->DstReg[optype].Index; dstp = machine->Registers[dstreg - GL_REG_0_ATI]; write_dst_addr(optype, inst->DstReg[optype].dstMod, inst->DstReg[optype].dstMask, dst[optype], dstp); } } } } return GL_TRUE; } static void init_machine(GLcontext * ctx, struct atifs_machine *machine, const struct ati_fragment_shader *shader, const struct sw_span *span, GLuint col) { GLint i, j; for (i = 0; i < 6; i++) { for (j = 0; j < 4; j++) ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0; } ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] = CHAN_TO_FLOAT(span->array->rgba[col][0]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] = CHAN_TO_FLOAT(span->array->rgba[col][1]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] = CHAN_TO_FLOAT(span->array->rgba[col][2]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] = CHAN_TO_FLOAT(span->array->rgba[col][3]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] = CHAN_TO_FLOAT(span->array->spec[col][0]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] = CHAN_TO_FLOAT(span->array->spec[col][1]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] = CHAN_TO_FLOAT(span->array->spec[col][2]); ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] = CHAN_TO_FLOAT(span->array->spec[col][3]); ctx->ATIFragmentShader.Machine.pass = 0; } /** * Execute the current fragment program, operating on the given span. */ void _swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span) { const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current; GLuint i; ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI; for (i = 0; i < span->end; i++) { if (span->array->mask[i]) { init_machine(ctx, &ctx->ATIFragmentShader.Machine, ctx->ATIFragmentShader.Current, span, i); if (execute_shader(ctx, shader, ~0, &ctx->ATIFragmentShader.Machine, span, i)) { span->array->mask[i] = GL_FALSE; } { const GLfloat *colOut = ctx->ATIFragmentShader.Machine.Registers[0]; /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */ UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]); UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]); UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]); UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]); } } } ctx->_CurrentProgram = 0; }