/*
 * Copyright © 2010 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

/**
 * \file lower_vector.cpp
 * IR lowering pass to remove some types of ir_quadop_vector
 *
 * \author Ian Romanick <ian.d.romanick@intel.com>
 */

#include "ir.h"
#include "ir_rvalue_visitor.h"

class lower_vector_visitor : public ir_rvalue_visitor {
public:
   lower_vector_visitor() : progress(false)
   {
      /* empty */
   }

   void handle_rvalue(ir_rvalue **rvalue);

   /**
    * Should SWZ-like expressions be lowered?
    */
   bool dont_lower_swz;

   bool progress;
};

/**
 * Determine if an IR expression tree looks like an extended swizzle
 *
 * Extended swizzles consist of access of a single vector source (with possible
 * per component negation) and the constants -1, 0, or 1.
 */
bool
is_extended_swizzle(ir_expression *ir)
{
   /* Track any variables that are accessed by this expression.
    */
   ir_variable *var = NULL;

   assert(ir->operation == ir_quadop_vector);

   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
      ir_rvalue *op = ir->operands[i];

      while (op != NULL) {
	 switch (op->ir_type) {
	 case ir_type_constant: {
	    const ir_constant *const c = op->as_constant();

	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
	       return false;

	    op = NULL;
	    break;
	 }

	 case ir_type_dereference_variable: {
	    ir_dereference_variable *const d = (ir_dereference_variable *) op;

	    if ((var != NULL) && (var != d->var))
	       return false;

	    var = d->var;
	    op = NULL;
	    break;
	 }

	 case ir_type_expression: {
	    ir_expression *const ex = (ir_expression *) op;

	    if (ex->operation != ir_unop_neg)
	       return false;

	    op = ex->operands[0];
	    break;
	 }

	 case ir_type_swizzle:
	    op = ((ir_swizzle *) op)->val;
	    break;

	 default:
	    return false;
	 }
      }
   }

   return true;
}

void
lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
{
   if (!*rvalue)
      return;

   ir_expression *expr = (*rvalue)->as_expression();
   if ((expr == NULL) || (expr->operation != ir_quadop_vector))
      return;

   if (this->dont_lower_swz && is_extended_swizzle(expr))
      return;

   /* FINISHME: Is this the right thing to use for the talloc context?
    */
   void *const mem_ctx = expr;

   assert(expr->type->vector_elements == expr->get_num_operands());

   /* Generate a temporary with the same type as the ir_quadop_operation.
    */
   ir_variable *const temp =
      new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);

   this->base_ir->insert_before(temp);

   /* Counter of the number of components collected so far.
    */
   unsigned assigned;

   /* Write-mask in the destination that receives counted by 'assigned'.
    */
   unsigned write_mask;


   /* Generate upto four assignments to that variable.  Try to group component
    * assignments together:
    *
    * - All constant components can be assigned at once.
    * - All assigments of components from a single variable with the same
    *   unary operator can be assigned at once.
    */
   ir_constant_data d = { { 0 } };

   assigned = 0;
   write_mask = 0;
   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
      const ir_constant *const c = expr->operands[i]->as_constant();

      if (c == NULL)
	 continue;

      switch (expr->type->base_type) {
      case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
      case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
      case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
      case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
      default:              assert(!"Should not get here."); break;
      }

      write_mask |= (1U << i);
      assigned++;
   }

   assert((write_mask == 0) == (assigned == 0));

   /* If there were constant values, generate an assignment.
    */
   if (assigned > 0) {
      ir_constant *const c =
	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
							  assigned, 0),
				  &d);
      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
      ir_assignment *const assign =
	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);

      this->base_ir->insert_before(assign);
   }

   /* FINISHME: This should try to coalesce assignments.
    */
   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
      if (expr->operands[i]->ir_type == ir_type_constant)
	 continue;

      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
      ir_assignment *const assign =
	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));

      this->base_ir->insert_before(assign);
      assigned++;
   }

   assert(assigned == expr->type->vector_elements);

   *rvalue = new(mem_ctx) ir_dereference_variable(temp);
   this->progress = true;
}

bool
lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
{
   lower_vector_visitor v;

   v.dont_lower_swz = dont_lower_swz;
   visit_list_elements(&v, instructions);

   return v.progress;
}