aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/glsl/nir/nir_opt_peephole_ffma.c')
-rw-r--r--mesalib/src/glsl/nir/nir_opt_peephole_ffma.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
index b430eac8e..798506b75 100644
--- a/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/mesalib/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)
}
static nir_alu_instr *
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
+get_mul_for_src(nir_alu_src *src, int num_components,
+ uint8_t swizzle[4], bool *negate, bool *abs)
{
assert(src->src.is_ssa && !src->abs && !src->negate);
@@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
switch (alu->op) {
case nir_op_imov:
case nir_op_fmov:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
break;
case nir_op_fneg:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = !*negate;
break;
case nir_op_fabs:
- alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);
+ alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);
*negate = false;
*abs = true;
break;
@@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
if (!alu)
return NULL;
- for (unsigned i = 0; i < 4; i++) {
- if (!(alu->dest.write_mask & (1 << i)))
- break;
-
+ for (unsigned i = 0; i < num_components; i++)
swizzle[i] = swizzle[src->swizzle[i]];
- }
return alu;
}
@@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
negate = false;
abs = false;
- mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);
+ mul = get_mul_for_src(&add->src[add_mul_src],
+ add->dest.dest.ssa.num_components,
+ swizzle, &negate, &abs);
if (mul != NULL)
break;