diff options
Diffstat (limited to 'tools/bison++/lalr.cc')
-rw-r--r-- | tools/bison++/lalr.cc | 761 |
1 files changed, 761 insertions, 0 deletions
diff --git a/tools/bison++/lalr.cc b/tools/bison++/lalr.cc new file mode 100644 index 000000000..f79f16d98 --- /dev/null +++ b/tools/bison++/lalr.cc @@ -0,0 +1,761 @@ +/* Compute look-ahead criteria for bison, + Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc. + +This file is part of Bison, the GNU Compiler Compiler. + +Bison is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Bison is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Bison; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + + +/* Compute how to make the finite state machine deterministic; + find which rules need lookahead in each state, and which lookahead tokens they accept. + +lalr(), the entry point, builds these data structures: + +goto_map, from_state and to_state + record each shift transition which accepts a variable (a nonterminal). +ngotos is the number of such transitions. +from_state[t] is the state number which a transition leads from +and to_state[t] is the state number it leads to. +All the transitions that accept a particular variable are grouped together and +goto_map[i - ntokens] is the index in from_state and to_state of the first of them. + +consistent[s] is nonzero if no lookahead is needed to decide what to do in state s. + +LAruleno is a vector which records the rules that need lookahead in various states. +The elements of LAruleno that apply to state s are those from + lookaheads[s] through lookaheads[s+1]-1. +Each element of LAruleno is a rule number. + +If lr is the length of LAruleno, then a number from 0 to lr-1 +can specify both a rule and a state where the rule might be applied. + +LA is a lr by ntokens matrix of bits. +LA[l, i] is 1 if the rule LAruleno[l] is applicable in the appropriate state + when the next token is symbol i. +If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict. +*/ + +#include <stdio.h> +#include "system.h" +#include "machine.h" +#include "types.h" +#include "state.h" +#include "new.h" +#include "gram.h" + + +extern short **derives; +extern char *nullable; + + +int tokensetsize; +short *lookaheads; +short *LAruleno; +unsigned *LA; +short *accessing_symbol; +char *consistent; +core **state_table; +shifts **shift_table; +reductions **reduction_table; +short *goto_map; +short *from_state; +short *to_state; + +short **transpose(short**,int); +void set_state_table(); +void set_accessing_symbol(); +void set_shift_table(); +void set_reduction_table(); +void set_maxrhs(); +void initialize_LA(); +void set_goto_map(); +void initialize_F(); +void build_relations(); +void add_lookback_edge(int,int,int); +void compute_FOLLOWS(); +void compute_lookaheads(); +void digraph(short**); +void traverse(int); + +extern void toomany(char*); +extern void berror(char*); + +static int infinity; +static int maxrhs; +static int ngotos; +static unsigned *F; +static short **includes; +static shorts **lookback; +static short **R; +static short *INDEX; +static short *VERTICES; +static int top; + + +void +lalr() +{ + tokensetsize = WORDSIZE(ntokens); + + set_state_table(); + set_accessing_symbol(); + set_shift_table(); + set_reduction_table(); + set_maxrhs(); + initialize_LA(); + set_goto_map(); + initialize_F(); + build_relations(); + compute_FOLLOWS(); + compute_lookaheads(); +} + + +void +set_state_table() +{ + register core *sp; + + state_table = NEW2(nstates, core *); + + for (sp = first_state; sp; sp = sp->next) + state_table[sp->number] = sp; +} + + +void +set_accessing_symbol() +{ + register core *sp; + + accessing_symbol = NEW2(nstates, short); + + for (sp = first_state; sp; sp = sp->next) + accessing_symbol[sp->number] = sp->accessing_symbol; +} + + +void +set_shift_table() +{ + register shifts *sp; + + shift_table = NEW2(nstates, shifts *); + + for (sp = first_shift; sp; sp = sp->next) + shift_table[sp->number] = sp; +} + + +void +set_reduction_table() +{ + register reductions *rp; + + reduction_table = NEW2(nstates, reductions *); + + for (rp = first_reduction; rp; rp = rp->next) + reduction_table[rp->number] = rp; +} + + +void +set_maxrhs() +{ + register short *itemp; + register int length; + register int max; + + length = 0; + max = 0; + for (itemp = ritem; *itemp; itemp++) + { + if (*itemp > 0) + { + length++; + } + else + { + if (length > max) max = length; + length = 0; + } + } + + maxrhs = max; +} + + +void +initialize_LA() +{ + register int i; + register int j; + register int count; + register reductions *rp; + register shifts *sp; + register short *np; + + consistent = NEW2(nstates, char); + lookaheads = NEW2(nstates + 1, short); + + count = 0; + for (i = 0; i < nstates; i++) + { + register int k; + + lookaheads[i] = count; + + rp = reduction_table[i]; + sp = shift_table[i]; + if (rp && (rp->nreds > 1 + || (sp && ! ISVAR(accessing_symbol[sp->internalShifts[0]])))) + count += rp->nreds; + else + consistent[i] = 1; + + if (sp) + for (k = 0; k < sp->nshifts; k++) + { + if (accessing_symbol[sp->internalShifts[k]] == error_token_number) + { + consistent[i] = 0; + break; + } + } + } + + lookaheads[nstates] = count; + + if (count == 0) + { + LA = NEW2(1 * tokensetsize, unsigned); + LAruleno = NEW2(1, short); + lookback = NEW2(1, shorts *); + } + else + { + LA = NEW2(count * tokensetsize, unsigned); + LAruleno = NEW2(count, short); + lookback = NEW2(count, shorts *); + } + + np = LAruleno; + for (i = 0; i < nstates; i++) + { + if (!consistent[i]) + { + if (rp = reduction_table[i]) + for (j = 0; j < rp->nreds; j++) + *np++ = rp->rules[j]; + } + } +} + + +void +set_goto_map() +{ + register shifts *sp; + register int i; + register int symbol; + register int k; + register short *temp_map; + register int state2; + register int state1; + + goto_map = NEW2(nvars + 1, short) - ntokens; + temp_map = NEW2(nvars + 1, short) - ntokens; + + ngotos = 0; + for (sp = first_shift; sp; sp = sp->next) + { + for (i = sp->nshifts - 1; i >= 0; i--) + { + symbol = accessing_symbol[sp->internalShifts[i]]; + + if (ISTOKEN(symbol)) break; + + if (ngotos == MAXSHORT) + toomany("gotos"); + + ngotos++; + goto_map[symbol]++; + } + } + + k = 0; + for (i = ntokens; i < nsyms; i++) + { + temp_map[i] = k; + k += goto_map[i]; + } + + for (i = ntokens; i < nsyms; i++) + goto_map[i] = temp_map[i]; + + goto_map[nsyms] = ngotos; + temp_map[nsyms] = ngotos; + + from_state = NEW2(ngotos, short); + to_state = NEW2(ngotos, short); + + for (sp = first_shift; sp; sp = sp->next) + { + state1 = sp->number; + for (i = sp->nshifts - 1; i >= 0; i--) + { + state2 = sp->internalShifts[i]; + symbol = accessing_symbol[state2]; + + if (ISTOKEN(symbol)) break; + + k = temp_map[symbol]++; + from_state[k] = state1; + to_state[k] = state2; + } + } + + FREE(temp_map + ntokens); +} + + + +/* Map_goto maps a state/symbol pair into its numeric representation. */ + +int +map_goto(int state, int symbol) +{ + register int high; + register int low; + register int middle; + register int s; + + low = goto_map[symbol]; + high = goto_map[symbol + 1] - 1; + + while (low <= high) + { + middle = (low + high) / 2; + s = from_state[middle]; + if (s == state) + return (middle); + else if (s < state) + low = middle + 1; + else + high = middle - 1; + } + + berror("map_goto"); +/* NOTREACHED */ + return 0; +} + + +void +initialize_F() +{ + register int i; + register int j; + register int k; + register shifts *sp; + register short *edge; + register unsigned *rowp; + register short *rp; + register short **reads; + register int nedges; + register int stateno; + register int symbol; + register int nwords; + + nwords = ngotos * tokensetsize; + F = NEW2(nwords, unsigned); + + reads = NEW2(ngotos, short *); + edge = NEW2(ngotos + 1, short); + nedges = 0; + + rowp = F; + for (i = 0; i < ngotos; i++) + { + stateno = to_state[i]; + sp = shift_table[stateno]; + + if (sp) + { + k = sp->nshifts; + + for (j = 0; j < k; j++) + { + symbol = accessing_symbol[sp->internalShifts[j]]; + if (ISVAR(symbol)) + break; + SETBIT(rowp, symbol); + } + + for (; j < k; j++) + { + symbol = accessing_symbol[sp->internalShifts[j]]; + if (nullable[symbol]) + edge[nedges++] = map_goto(stateno, symbol); + } + + if (nedges) + { + reads[i] = rp = NEW2(nedges + 1, short); + + for (j = 0; j < nedges; j++) + rp[j] = edge[j]; + + rp[nedges] = -1; + nedges = 0; + } + } + + rowp += tokensetsize; + } + + digraph(reads); + + for (i = 0; i < ngotos; i++) + { + if (reads[i]) + FREE(reads[i]); + } + + FREE(reads); + FREE(edge); +} + + +void +build_relations() +{ + register int i; + register int j; + register int k; + register short *rulep; + register short *rp; + register shifts *sp; + register int length; + register int nedges; + register int done; + register int state1; + register int stateno; + register int symbol1; + register int symbol2; + register short *shortp; + register short *edge; + register short *states; + register short **new_includes; + + includes = NEW2(ngotos, short *); + edge = NEW2(ngotos + 1, short); + states = NEW2(maxrhs + 1, short); + + for (i = 0; i < ngotos; i++) + { + nedges = 0; + state1 = from_state[i]; + symbol1 = accessing_symbol[to_state[i]]; + + for (rulep = derives[symbol1]; *rulep > 0; rulep++) + { + length = 1; + states[0] = state1; + stateno = state1; + + for (rp = ritem + rrhs[*rulep]; *rp > 0; rp++) + { + symbol2 = *rp; + sp = shift_table[stateno]; + k = sp->nshifts; + + for (j = 0; j < k; j++) + { + stateno = sp->internalShifts[j]; + if (accessing_symbol[stateno] == symbol2) break; + } + + states[length++] = stateno; + } + + if (!consistent[stateno]) + add_lookback_edge(stateno, *rulep, i); + + length--; + done = 0; + while (!done) + { + done = 1; + rp--; + /* JF added rp>=ritem && I hope to god its right! */ + if (rp>=ritem && ISVAR(*rp)) + { + stateno = states[--length]; + edge[nedges++] = map_goto(stateno, *rp); + if (nullable[*rp]) done = 0; + } + } + } + + if (nedges) + { + includes[i] = shortp = NEW2(nedges + 1, short); + for (j = 0; j < nedges; j++) + shortp[j] = edge[j]; + shortp[nedges] = -1; + } + } + + new_includes = transpose(includes, ngotos); + + for (i = 0; i < ngotos; i++) + if (includes[i]) + FREE(includes[i]); + + FREE(includes); + + includes = new_includes; + + FREE(edge); + FREE(states); +} + + +void +add_lookback_edge(int stateno, int ruleno, int gotono) +{ + register int i; + register int k; + register int found; + register shorts *sp; + + i = lookaheads[stateno]; + k = lookaheads[stateno + 1]; + found = 0; + while (!found && i < k) + { + if (LAruleno[i] == ruleno) + found = 1; + else + i++; + } + + if (found == 0) + berror("add_lookback_edge"); + + sp = NEW(shorts); + sp->next = lookback[i]; + sp->value = gotono; + lookback[i] = sp; +} + + + +short ** +transpose(short** R_arg, int n) +{ + register short **new_R; + register short **temp_R; + register short *nedges; + register short *sp; + register int i; + register int k; + + nedges = NEW2(n, short); + + for (i = 0; i < n; i++) + { + sp = R_arg[i]; + if (sp) + { + while (*sp >= 0) + nedges[*sp++]++; + } + } + + new_R = NEW2(n, short *); + temp_R = NEW2(n, short *); + + for (i = 0; i < n; i++) + { + k = nedges[i]; + if (k > 0) + { + sp = NEW2(k + 1, short); + new_R[i] = sp; + temp_R[i] = sp; + sp[k] = -1; + } + } + + FREE(nedges); + + for (i = 0; i < n; i++) + { + sp = R_arg[i]; + if (sp) + { + while (*sp >= 0) + *temp_R[*sp++]++ = i; + } + } + + FREE(temp_R); + + return (new_R); +} + + +void +compute_FOLLOWS() +{ + register int i; + + digraph(includes); + + for (i = 0; i < ngotos; i++) + { + if (includes[i]) FREE(includes[i]); + } + + FREE(includes); +} + + +void +compute_lookaheads() +{ + register int i; + register int n; + register unsigned *fp1; + register unsigned *fp2; + register unsigned *fp3; + register shorts *sp; + register unsigned *rowp; +/* register short *rulep; JF unused */ +/* register int count; JF unused */ + register shorts *sptmp;/* JF */ + + rowp = LA; + n = lookaheads[nstates]; + for (i = 0; i < n; i++) + { + fp3 = rowp + tokensetsize; + for (sp = lookback[i]; sp; sp = sp->next) + { + fp1 = rowp; + fp2 = F + tokensetsize * sp->value; + while (fp1 < fp3) + *fp1++ |= *fp2++; + } + + rowp = fp3; + } + + for (i = 0; i < n; i++) + {/* JF removed ref to freed storage */ + for (sp = lookback[i]; sp; sp = sptmp) { + sptmp=sp->next; + FREE(sp); + } + } + + FREE(lookback); + FREE(F); +} + + +void +digraph(short** relation) +{ + register int i; + + infinity = ngotos + 2; + INDEX = NEW2(ngotos + 1, short); + VERTICES = NEW2(ngotos + 1, short); + top = 0; + + R = relation; + + for (i = 0; i < ngotos; i++) + INDEX[i] = 0; + + for (i = 0; i < ngotos; i++) + { + if (INDEX[i] == 0 && R[i]) + traverse(i); + } + + FREE(INDEX); + FREE(VERTICES); +} + + +void +traverse(int i) +{ + register unsigned *fp1; + register unsigned *fp2; + register unsigned *fp3; + register int j; + register short *rp; + + int height; + unsigned *base; + + VERTICES[++top] = i; + INDEX[i] = height = top; + + base = F + i * tokensetsize; + fp3 = base + tokensetsize; + + rp = R[i]; + if (rp) + { + while ((j = *rp++) >= 0) + { + if (INDEX[j] == 0) + traverse(j); + + if (INDEX[i] > INDEX[j]) + INDEX[i] = INDEX[j]; + + fp1 = base; + fp2 = F + j * tokensetsize; + + while (fp1 < fp3) + *fp1++ |= *fp2++; + } + } + + if (INDEX[i] == height) + { + for (;;) + { + j = VERTICES[top--]; + INDEX[j] = infinity; + + if (i == j) + break; + + fp1 = base; + fp2 = F + j * tokensetsize; + + while (fp1 < fp3) + *fp2++ = *fp1++; + } + } +} |