diff options
| author | marha <marha@users.sourceforge.net> | 2011-04-30 14:43:27 +0000 | 
|---|---|---|
| committer | marha <marha@users.sourceforge.net> | 2011-04-30 14:43:27 +0000 | 
| commit | c4ae0bb951bfb6061e7f0c492ba03de330137515 (patch) | |
| tree | f1236eb9fdabf7ca199522b748789b4b5d15cd9d /mesalib/src/mesa/program | |
| parent | 2df2595ab27b00523dbbdd0d3661a97610f5a137 (diff) | |
| download | vcxsrv-c4ae0bb951bfb6061e7f0c492ba03de330137515.tar.gz vcxsrv-c4ae0bb951bfb6061e7f0c492ba03de330137515.tar.bz2 vcxsrv-c4ae0bb951bfb6061e7f0c492ba03de330137515.zip | |
xserver mesa xkeyboard-config git update 30 Apr 2011
Diffstat (limited to 'mesalib/src/mesa/program')
| -rw-r--r-- | mesalib/src/mesa/program/register_allocate.c | 972 | 
1 files changed, 517 insertions, 455 deletions
| diff --git a/mesalib/src/mesa/program/register_allocate.c b/mesalib/src/mesa/program/register_allocate.c index 95a9bde40..e78db24a4 100644 --- a/mesalib/src/mesa/program/register_allocate.c +++ b/mesalib/src/mesa/program/register_allocate.c @@ -1,455 +1,517 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - *    Eric Anholt <eric@anholt.net> - * - */ - -/** @file register_allocate.c - * - * Graph-coloring register allocator. - */ - -#include <ralloc.h> - -#include "main/imports.h" -#include "main/macros.h" -#include "main/mtypes.h" -#include "register_allocate.h" - -struct ra_reg { -   GLboolean *conflicts; -   unsigned int *conflict_list; -   unsigned int conflict_list_size; -   unsigned int num_conflicts; -}; - -struct ra_regs { -   struct ra_reg *regs; -   unsigned int count; - -   struct ra_class **classes; -   unsigned int class_count; -}; - -struct ra_class { -   GLboolean *regs; - -   /** -    * p_B in Runeson/Nyström paper. -    * -    * This is "how many regs are in the set." -    */ -   unsigned int p; - -   /** -    * q_B,C in Runeson/Nyström paper. -    */ -   unsigned int *q; -}; - -struct ra_node { -   GLboolean *adjacency; -   unsigned int *adjacency_list; -   unsigned int class; -   unsigned int adjacency_count; -   unsigned int reg; -   GLboolean in_stack; -   float spill_cost; -}; - -struct ra_graph { -   struct ra_regs *regs; -   /** -    * the variables that need register allocation. -    */ -   struct ra_node *nodes; -   unsigned int count; /**< count of nodes. */ - -   unsigned int *stack; -   unsigned int stack_count; -}; - -struct ra_regs * -ra_alloc_reg_set(unsigned int count) -{ -   unsigned int i; -   struct ra_regs *regs; - -   regs = rzalloc(NULL, struct ra_regs); -   regs->count = count; -   regs->regs = rzalloc_array(regs, struct ra_reg, count); - -   for (i = 0; i < count; i++) { -      regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count); -      regs->regs[i].conflicts[i] = GL_TRUE; - -      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4); -      regs->regs[i].conflict_list_size = 4; -      regs->regs[i].conflict_list[0] = i; -      regs->regs[i].num_conflicts = 1; -   } - -   return regs; -} - -static void -ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) -{ -   struct ra_reg *reg1 = ®s->regs[r1]; - -   if (reg1->conflict_list_size == reg1->num_conflicts) { -      reg1->conflict_list_size *= 2; -      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, -				     unsigned int, reg1->conflict_list_size); -   } -   reg1->conflict_list[reg1->num_conflicts++] = r2; -   reg1->conflicts[r2] = GL_TRUE; -} - -void -ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) -{ -   if (!regs->regs[r1].conflicts[r2]) { -      ra_add_conflict_list(regs, r1, r2); -      ra_add_conflict_list(regs, r2, r1); -   } -} - -unsigned int -ra_alloc_reg_class(struct ra_regs *regs) -{ -   struct ra_class *class; - -   regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, -			    regs->class_count + 1); - -   class = rzalloc(regs, struct ra_class); -   regs->classes[regs->class_count] = class; - -   class->regs = rzalloc_array(class, GLboolean, regs->count); - -   return regs->class_count++; -} - -void -ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) -{ -   struct ra_class *class = regs->classes[c]; - -   class->regs[r] = GL_TRUE; -   class->p++; -} - -/** - * Must be called after all conflicts and register classes have been - * set up and before the register set is used for allocation. - */ -void -ra_set_finalize(struct ra_regs *regs) -{ -   unsigned int b, c; - -   for (b = 0; b < regs->class_count; b++) { -      regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count); -   } - -   /* Compute, for each class B and C, how many regs of B an -    * allocation to C could conflict with. -    */ -   for (b = 0; b < regs->class_count; b++) { -      for (c = 0; c < regs->class_count; c++) { -	 unsigned int rc; -	 int max_conflicts = 0; - -	 for (rc = 0; rc < regs->count; rc++) { -	    int conflicts = 0; -	    int i; - -	    if (!regs->classes[c]->regs[rc]) -	       continue; - -	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) { -	       unsigned int rb = regs->regs[rc].conflict_list[i]; -	       if (regs->classes[b]->regs[rb]) -		  conflicts++; -	    } -	    max_conflicts = MAX2(max_conflicts, conflicts); -	 } -	 regs->classes[b]->q[c] = max_conflicts; -      } -   } -} - -static void -ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) -{ -   g->nodes[n1].adjacency[n2] = GL_TRUE; -   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; -   g->nodes[n1].adjacency_count++; -} - -struct ra_graph * -ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) -{ -   struct ra_graph *g; -   unsigned int i; - -   g = rzalloc(regs, struct ra_graph); -   g->regs = regs; -   g->nodes = rzalloc_array(g, struct ra_node, count); -   g->count = count; - -   g->stack = rzalloc_array(g, unsigned int, count); - -   for (i = 0; i < count; i++) { -      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count); -      g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count); -      g->nodes[i].adjacency_count = 0; -      ra_add_node_adjacency(g, i, i); -      g->nodes[i].reg = ~0; -   } - -   return g; -} - -void -ra_set_node_class(struct ra_graph *g, -		  unsigned int n, unsigned int class) -{ -   g->nodes[n].class = class; -} - -void -ra_add_node_interference(struct ra_graph *g, -			 unsigned int n1, unsigned int n2) -{ -   if (!g->nodes[n1].adjacency[n2]) { -      ra_add_node_adjacency(g, n1, n2); -      ra_add_node_adjacency(g, n2, n1); -   } -} - -static GLboolean pq_test(struct ra_graph *g, unsigned int n) -{ -   unsigned int j; -   unsigned int q = 0; -   int n_class = g->nodes[n].class; - -   for (j = 0; j < g->nodes[n].adjacency_count; j++) { -      unsigned int n2 = g->nodes[n].adjacency_list[j]; -      unsigned int n2_class = g->nodes[n2].class; - -      if (n != n2 && !g->nodes[n2].in_stack) { -	 q += g->regs->classes[n_class]->q[n2_class]; -      } -   } - -   return q < g->regs->classes[n_class]->p; -} - -/** - * Simplifies the interference graph by pushing all - * trivially-colorable nodes into a stack of nodes to be colored, - * removing them from the graph, and rinsing and repeating. - * - * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE - * means that either spilling will be required, or optimistic coloring - * should be applied. - */ -GLboolean -ra_simplify(struct ra_graph *g) -{ -   GLboolean progress = GL_TRUE; -   int i; - -   while (progress) { -      progress = GL_FALSE; - -      for (i = g->count - 1; i >= 0; i--) { -	 if (g->nodes[i].in_stack) -	    continue; - -	 if (pq_test(g, i)) { -	    g->stack[g->stack_count] = i; -	    g->stack_count++; -	    g->nodes[i].in_stack = GL_TRUE; -	    progress = GL_TRUE; -	 } -      } -   } - -   for (i = 0; i < g->count; i++) { -      if (!g->nodes[i].in_stack) -	 return GL_FALSE; -   } - -   return GL_TRUE; -} - -/** - * Pops nodes from the stack back into the graph, coloring them with - * registers as they go. - * - * If all nodes were trivially colorable, then this must succeed.  If - * not (optimistic coloring), then it may return GL_FALSE; - */ -GLboolean -ra_select(struct ra_graph *g) -{ -   int i; - -   while (g->stack_count != 0) { -      unsigned int r; -      int n = g->stack[g->stack_count - 1]; -      struct ra_class *c = g->regs->classes[g->nodes[n].class]; - -      /* Find the lowest-numbered reg which is not used by a member -       * of the graph adjacent to us. -       */ -      for (r = 0; r < g->regs->count; r++) { -	 if (!c->regs[r]) -	    continue; - -	 /* Check if any of our neighbors conflict with this register choice. */ -	 for (i = 0; i < g->nodes[n].adjacency_count; i++) { -	    unsigned int n2 = g->nodes[n].adjacency_list[i]; - -	    if (!g->nodes[n2].in_stack && -		g->regs->regs[r].conflicts[g->nodes[n2].reg]) { -	       break; -	    } -	 } -	 if (i == g->nodes[n].adjacency_count) -	    break; -      } -      if (r == g->regs->count) -	 return GL_FALSE; - -      g->nodes[n].reg = r; -      g->nodes[n].in_stack = GL_FALSE; -      g->stack_count--; -   } - -   return GL_TRUE; -} - -/** - * Optimistic register coloring: Just push the remaining nodes - * on the stack.  They'll be colored first in ra_select(), and - * if they succeed then the locally-colorable nodes are still - * locally-colorable and the rest of the register allocation - * will succeed. - */ -void -ra_optimistic_color(struct ra_graph *g) -{ -   unsigned int i; - -   for (i = 0; i < g->count; i++) { -      if (g->nodes[i].in_stack) -	 continue; - -      g->stack[g->stack_count] = i; -      g->stack_count++; -      g->nodes[i].in_stack = GL_TRUE; -   } -} - -GLboolean -ra_allocate_no_spills(struct ra_graph *g) -{ -   if (!ra_simplify(g)) { -      ra_optimistic_color(g); -   } -   return ra_select(g); -} - -unsigned int -ra_get_node_reg(struct ra_graph *g, unsigned int n) -{ -   return g->nodes[n].reg; -} - -static float -ra_get_spill_benefit(struct ra_graph *g, unsigned int n) -{ -   int j; -   float benefit = 0; -   int n_class = g->nodes[n].class; - -   /* Define the benefit of eliminating an interference between n, n2 -    * through spilling as q(C, B) / p(C).  This is similar to the -    * "count number of edges" approach of traditional graph coloring, -    * but takes classes into account. -    */ -   for (j = 0; j < g->nodes[n].adjacency_count; j++) { -      unsigned int n2 = g->nodes[n].adjacency_list[j]; -      if (n != n2) { -	 unsigned int n2_class = g->nodes[n2].class; -	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] / -		     g->regs->classes[n_class]->p); -      } -   } - -   return benefit; -} - -/** - * Returns a node number to be spilled according to the cost/benefit using - * the pq test, or -1 if there are no spillable nodes. - */ -int -ra_get_best_spill_node(struct ra_graph *g) -{ -   unsigned int best_node = -1; -   unsigned int best_benefit = 0.0; -   unsigned int n; - -   for (n = 0; n < g->count; n++) { -      float cost = g->nodes[n].spill_cost; -      float benefit; - -      if (cost <= 0.0) -	 continue; - -      benefit = ra_get_spill_benefit(g, n); - -      if (benefit / cost > best_benefit) { -	 best_benefit = benefit / cost; -	 best_node = n; -      } -   } - -   return best_node; -} - -/** - * Only nodes with a spill cost set (cost != 0.0) will be considered - * for register spilling. - */ -void -ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) -{ -   g->nodes[n].spill_cost = cost; -} +/*
 + * Copyright © 2010 Intel Corporation
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the next
 + * paragraph) shall be included in all copies or substantial portions of the
 + * Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 + * IN THE SOFTWARE.
 + *
 + * Authors:
 + *    Eric Anholt <eric@anholt.net>
 + *
 + */
 +
 +/** @file register_allocate.c
 + *
 + * Graph-coloring register allocator.
 + *
 + * The basic idea of graph coloring is to make a node in a graph for
 + * every thing that needs a register (color) number assigned, and make
 + * edges in the graph between nodes that interfere (can't be allocated
 + * to the same register at the same time).
 + *
 + * During the "simplify" process, any any node with fewer edges than
 + * there are registers means that that edge can get assigned a
 + * register regardless of what its neighbors choose, so that node is
 + * pushed on a stack and removed (with its edges) from the graph.
 + * That likely causes other nodes to become trivially colorable as well.
 + *
 + * Then during the "select" process, nodes are popped off of that
 + * stack, their edges restored, and assigned a color different from
 + * their neighbors.  Because they were pushed on the stack only when
 + * they were trivially colorable, any color chosen won't interfere
 + * with the registers to be popped later.
 + *
 + * The downside to most graph coloring is that real hardware often has
 + * limitations, like registers that need to be allocated to a node in
 + * pairs, or aligned on some boundary.  This implementation follows
 + * the paper "Retargetable Graph-Coloring Register Allocation for
 + * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström.
 + *
 + * In this system, there are register classes each containing various
 + * registers, and registers may interfere with other registers.  For
 + * example, one might have a class of base registers, and a class of
 + * aligned register pairs that would each interfere with their pair of
 + * the base registers.  Each node has a register class it needs to be
 + * assigned to.  Define p(B) to be the size of register class B, and
 + * q(B,C) to be the number of registers in B that the worst choice
 + * register in C could conflict with.  Then, this system replaces the
 + * basic graph coloring test of "fewer edges from this node than there
 + * are registers" with "For this node of class B, the sum of q(B,C)
 + * for each neighbor node of class C is less than pB".
 + *
 + * A nice feature of the pq test is that q(B,C) can be computed once
 + * up front and stored in a 2-dimensional array, so that the cost of
 + * coloring a node is constant with the number of registers.  We do
 + * this during ra_set_finalize().
 + */
 +
 +#include <ralloc.h>
 +
 +#include "main/imports.h"
 +#include "main/macros.h"
 +#include "main/mtypes.h"
 +#include "register_allocate.h"
 +
 +struct ra_reg {
 +   GLboolean *conflicts;
 +   unsigned int *conflict_list;
 +   unsigned int conflict_list_size;
 +   unsigned int num_conflicts;
 +};
 +
 +struct ra_regs {
 +   struct ra_reg *regs;
 +   unsigned int count;
 +
 +   struct ra_class **classes;
 +   unsigned int class_count;
 +};
 +
 +struct ra_class {
 +   GLboolean *regs;
 +
 +   /**
 +    * p(B) in Runeson/Nyström paper.
 +    *
 +    * This is "how many regs are in the set."
 +    */
 +   unsigned int p;
 +
 +   /**
 +    * q(B,C) (indexed by C, B is this register class) in
 +    * Runeson/Nyström paper.  This is "how many registers of B could
 +    * the worst choice register from C conflict with".
 +    */
 +   unsigned int *q;
 +};
 +
 +struct ra_node {
 +   /** @{
 +    *
 +    * List of which nodes this node interferes with.  This should be
 +    * symmetric with the other node.
 +    */
 +   GLboolean *adjacency;
 +   unsigned int *adjacency_list;
 +   unsigned int adjacency_count;
 +   /** @} */
 +
 +   unsigned int class;
 +
 +   /* Register, if assigned, or ~0. */
 +   unsigned int reg;
 +
 +   /**
 +    * Set when the node is in the trivially colorable stack.  When
 +    * set, the adjacency to this node is ignored, to implement the
 +    * "remove the edge from the graph" in simplification without
 +    * having to actually modify the adjacency_list.
 +    */
 +   GLboolean in_stack;
 +
 +   /* For an implementation that needs register spilling, this is the
 +    * approximate cost of spilling this node.
 +    */
 +   float spill_cost;
 +};
 +
 +struct ra_graph {
 +   struct ra_regs *regs;
 +   /**
 +    * the variables that need register allocation.
 +    */
 +   struct ra_node *nodes;
 +   unsigned int count; /**< count of nodes. */
 +
 +   unsigned int *stack;
 +   unsigned int stack_count;
 +};
 +
 +struct ra_regs *
 +ra_alloc_reg_set(unsigned int count)
 +{
 +   unsigned int i;
 +   struct ra_regs *regs;
 +
 +   regs = rzalloc(NULL, struct ra_regs);
 +   regs->count = count;
 +   regs->regs = rzalloc_array(regs, struct ra_reg, count);
 +
 +   for (i = 0; i < count; i++) {
 +      regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count);
 +      regs->regs[i].conflicts[i] = GL_TRUE;
 +
 +      regs->regs[i].conflict_list = ralloc_array(regs->regs, unsigned int, 4);
 +      regs->regs[i].conflict_list_size = 4;
 +      regs->regs[i].conflict_list[0] = i;
 +      regs->regs[i].num_conflicts = 1;
 +   }
 +
 +   return regs;
 +}
 +
 +static void
 +ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 +{
 +   struct ra_reg *reg1 = ®s->regs[r1];
 +
 +   if (reg1->conflict_list_size == reg1->num_conflicts) {
 +      reg1->conflict_list_size *= 2;
 +      reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list,
 +				     unsigned int, reg1->conflict_list_size);
 +   }
 +   reg1->conflict_list[reg1->num_conflicts++] = r2;
 +   reg1->conflicts[r2] = GL_TRUE;
 +}
 +
 +void
 +ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2)
 +{
 +   if (!regs->regs[r1].conflicts[r2]) {
 +      ra_add_conflict_list(regs, r1, r2);
 +      ra_add_conflict_list(regs, r2, r1);
 +   }
 +}
 +
 +unsigned int
 +ra_alloc_reg_class(struct ra_regs *regs)
 +{
 +   struct ra_class *class;
 +
 +   regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *,
 +			    regs->class_count + 1);
 +
 +   class = rzalloc(regs, struct ra_class);
 +   regs->classes[regs->class_count] = class;
 +
 +   class->regs = rzalloc_array(class, GLboolean, regs->count);
 +
 +   return regs->class_count++;
 +}
 +
 +void
 +ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r)
 +{
 +   struct ra_class *class = regs->classes[c];
 +
 +   class->regs[r] = GL_TRUE;
 +   class->p++;
 +}
 +
 +/**
 + * Must be called after all conflicts and register classes have been
 + * set up and before the register set is used for allocation.
 + */
 +void
 +ra_set_finalize(struct ra_regs *regs)
 +{
 +   unsigned int b, c;
 +
 +   for (b = 0; b < regs->class_count; b++) {
 +      regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count);
 +   }
 +
 +   /* Compute, for each class B and C, how many regs of B an
 +    * allocation to C could conflict with.
 +    */
 +   for (b = 0; b < regs->class_count; b++) {
 +      for (c = 0; c < regs->class_count; c++) {
 +	 unsigned int rc;
 +	 int max_conflicts = 0;
 +
 +	 for (rc = 0; rc < regs->count; rc++) {
 +	    int conflicts = 0;
 +	    int i;
 +
 +	    if (!regs->classes[c]->regs[rc])
 +	       continue;
 +
 +	    for (i = 0; i < regs->regs[rc].num_conflicts; i++) {
 +	       unsigned int rb = regs->regs[rc].conflict_list[i];
 +	       if (regs->classes[b]->regs[rb])
 +		  conflicts++;
 +	    }
 +	    max_conflicts = MAX2(max_conflicts, conflicts);
 +	 }
 +	 regs->classes[b]->q[c] = max_conflicts;
 +      }
 +   }
 +}
 +
 +static void
 +ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
 +{
 +   g->nodes[n1].adjacency[n2] = GL_TRUE;
 +   g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2;
 +   g->nodes[n1].adjacency_count++;
 +}
 +
 +struct ra_graph *
 +ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
 +{
 +   struct ra_graph *g;
 +   unsigned int i;
 +
 +   g = rzalloc(regs, struct ra_graph);
 +   g->regs = regs;
 +   g->nodes = rzalloc_array(g, struct ra_node, count);
 +   g->count = count;
 +
 +   g->stack = rzalloc_array(g, unsigned int, count);
 +
 +   for (i = 0; i < count; i++) {
 +      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
 +      g->nodes[i].adjacency_list = ralloc_array(g, unsigned int, count);
 +      g->nodes[i].adjacency_count = 0;
 +      ra_add_node_adjacency(g, i, i);
 +      g->nodes[i].reg = ~0;
 +   }
 +
 +   return g;
 +}
 +
 +void
 +ra_set_node_class(struct ra_graph *g,
 +		  unsigned int n, unsigned int class)
 +{
 +   g->nodes[n].class = class;
 +}
 +
 +void
 +ra_add_node_interference(struct ra_graph *g,
 +			 unsigned int n1, unsigned int n2)
 +{
 +   if (!g->nodes[n1].adjacency[n2]) {
 +      ra_add_node_adjacency(g, n1, n2);
 +      ra_add_node_adjacency(g, n2, n1);
 +   }
 +}
 +
 +static GLboolean pq_test(struct ra_graph *g, unsigned int n)
 +{
 +   unsigned int j;
 +   unsigned int q = 0;
 +   int n_class = g->nodes[n].class;
 +
 +   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
 +      unsigned int n2 = g->nodes[n].adjacency_list[j];
 +      unsigned int n2_class = g->nodes[n2].class;
 +
 +      if (n != n2 && !g->nodes[n2].in_stack) {
 +	 q += g->regs->classes[n_class]->q[n2_class];
 +      }
 +   }
 +
 +   return q < g->regs->classes[n_class]->p;
 +}
 +
 +/**
 + * Simplifies the interference graph by pushing all
 + * trivially-colorable nodes into a stack of nodes to be colored,
 + * removing them from the graph, and rinsing and repeating.
 + *
 + * Returns GL_TRUE if all nodes were removed from the graph.  GL_FALSE
 + * means that either spilling will be required, or optimistic coloring
 + * should be applied.
 + */
 +GLboolean
 +ra_simplify(struct ra_graph *g)
 +{
 +   GLboolean progress = GL_TRUE;
 +   int i;
 +
 +   while (progress) {
 +      progress = GL_FALSE;
 +
 +      for (i = g->count - 1; i >= 0; i--) {
 +	 if (g->nodes[i].in_stack)
 +	    continue;
 +
 +	 if (pq_test(g, i)) {
 +	    g->stack[g->stack_count] = i;
 +	    g->stack_count++;
 +	    g->nodes[i].in_stack = GL_TRUE;
 +	    progress = GL_TRUE;
 +	 }
 +      }
 +   }
 +
 +   for (i = 0; i < g->count; i++) {
 +      if (!g->nodes[i].in_stack)
 +	 return GL_FALSE;
 +   }
 +
 +   return GL_TRUE;
 +}
 +
 +/**
 + * Pops nodes from the stack back into the graph, coloring them with
 + * registers as they go.
 + *
 + * If all nodes were trivially colorable, then this must succeed.  If
 + * not (optimistic coloring), then it may return GL_FALSE;
 + */
 +GLboolean
 +ra_select(struct ra_graph *g)
 +{
 +   int i;
 +
 +   while (g->stack_count != 0) {
 +      unsigned int r;
 +      int n = g->stack[g->stack_count - 1];
 +      struct ra_class *c = g->regs->classes[g->nodes[n].class];
 +
 +      /* Find the lowest-numbered reg which is not used by a member
 +       * of the graph adjacent to us.
 +       */
 +      for (r = 0; r < g->regs->count; r++) {
 +	 if (!c->regs[r])
 +	    continue;
 +
 +	 /* Check if any of our neighbors conflict with this register choice. */
 +	 for (i = 0; i < g->nodes[n].adjacency_count; i++) {
 +	    unsigned int n2 = g->nodes[n].adjacency_list[i];
 +
 +	    if (!g->nodes[n2].in_stack &&
 +		g->regs->regs[r].conflicts[g->nodes[n2].reg]) {
 +	       break;
 +	    }
 +	 }
 +	 if (i == g->nodes[n].adjacency_count)
 +	    break;
 +      }
 +      if (r == g->regs->count)
 +	 return GL_FALSE;
 +
 +      g->nodes[n].reg = r;
 +      g->nodes[n].in_stack = GL_FALSE;
 +      g->stack_count--;
 +   }
 +
 +   return GL_TRUE;
 +}
 +
 +/**
 + * Optimistic register coloring: Just push the remaining nodes
 + * on the stack.  They'll be colored first in ra_select(), and
 + * if they succeed then the locally-colorable nodes are still
 + * locally-colorable and the rest of the register allocation
 + * will succeed.
 + */
 +void
 +ra_optimistic_color(struct ra_graph *g)
 +{
 +   unsigned int i;
 +
 +   for (i = 0; i < g->count; i++) {
 +      if (g->nodes[i].in_stack)
 +	 continue;
 +
 +      g->stack[g->stack_count] = i;
 +      g->stack_count++;
 +      g->nodes[i].in_stack = GL_TRUE;
 +   }
 +}
 +
 +GLboolean
 +ra_allocate_no_spills(struct ra_graph *g)
 +{
 +   if (!ra_simplify(g)) {
 +      ra_optimistic_color(g);
 +   }
 +   return ra_select(g);
 +}
 +
 +unsigned int
 +ra_get_node_reg(struct ra_graph *g, unsigned int n)
 +{
 +   return g->nodes[n].reg;
 +}
 +
 +static float
 +ra_get_spill_benefit(struct ra_graph *g, unsigned int n)
 +{
 +   int j;
 +   float benefit = 0;
 +   int n_class = g->nodes[n].class;
 +
 +   /* Define the benefit of eliminating an interference between n, n2
 +    * through spilling as q(C, B) / p(C).  This is similar to the
 +    * "count number of edges" approach of traditional graph coloring,
 +    * but takes classes into account.
 +    */
 +   for (j = 0; j < g->nodes[n].adjacency_count; j++) {
 +      unsigned int n2 = g->nodes[n].adjacency_list[j];
 +      if (n != n2) {
 +	 unsigned int n2_class = g->nodes[n2].class;
 +	 benefit += ((float)g->regs->classes[n_class]->q[n2_class] /
 +		     g->regs->classes[n_class]->p);
 +      }
 +   }
 +
 +   return benefit;
 +}
 +
 +/**
 + * Returns a node number to be spilled according to the cost/benefit using
 + * the pq test, or -1 if there are no spillable nodes.
 + */
 +int
 +ra_get_best_spill_node(struct ra_graph *g)
 +{
 +   unsigned int best_node = -1;
 +   unsigned int best_benefit = 0.0;
 +   unsigned int n;
 +
 +   for (n = 0; n < g->count; n++) {
 +      float cost = g->nodes[n].spill_cost;
 +      float benefit;
 +
 +      if (cost <= 0.0)
 +	 continue;
 +
 +      benefit = ra_get_spill_benefit(g, n);
 +
 +      if (benefit / cost > best_benefit) {
 +	 best_benefit = benefit / cost;
 +	 best_node = n;
 +      }
 +   }
 +
 +   return best_node;
 +}
 +
 +/**
 + * Only nodes with a spill cost set (cost != 0.0) will be considered
 + * for register spilling.
 + */
 +void
 +ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost)
 +{
 +   g->nodes[n].spill_cost = cost;
 +}
 | 
