diff options
Diffstat (limited to 'mesalib/src/glsl/glcpp/glcpp-lex.l')
-rw-r--r-- | mesalib/src/glsl/glcpp/glcpp-lex.l | 449 |
1 files changed, 317 insertions, 132 deletions
diff --git a/mesalib/src/glsl/glcpp/glcpp-lex.l b/mesalib/src/glsl/glcpp/glcpp-lex.l index a1a8e76af..98d500ec0 100644 --- a/mesalib/src/glsl/glcpp/glcpp-lex.l +++ b/mesalib/src/glsl/glcpp/glcpp-lex.l @@ -52,14 +52,107 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); yylloc->last_column = yycolumn + 1; \ parser->has_new_line_number = 0; \ parser->has_new_source_number = 0; \ - } while(0); + } while(0); #define YY_USER_INIT \ do { \ yylineno = 1; \ - yycolumn = 1; \ + yycolumn = 0; \ yylloc->source = 0; \ } while(0) + +/* It's ugly to have macros that have return statements inside of + * them, but flex-based lexer generation is all built around the + * return statement. + * + * To mitigate the ugliness, we defer as much of the logic as possible + * to an actual function, not a macro (see + * glcpplex_update_state_per_token) and we make the word RETURN + * prominent in all of the macros which may return. + * + * The most-commonly-used macro is RETURN_TOKEN which will perform all + * necessary state updates based on the provided token,, then + * conditionally return the token. It will not return a token if the + * parser is currently skipping tokens, (such as within #if + * 0...#else). + * + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that + * makes the token returning unconditional. This is needed for things + * like #if and the tokens of its condition, (since these must be + * evaluated by the parser even when otherwise skipping). + * + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top + * of RETURN_TOKEN that performs a string copy of yytext before the + * return. + */ +#define RETURN_TOKEN_NEVER_SKIP(token) \ + do { \ + if (glcpp_lex_update_state_per_token (parser, token)) \ + return token; \ + } while (0) + +#define RETURN_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + RETURN_TOKEN_NEVER_SKIP(token); \ + } \ + } while(0) + +#define RETURN_STRING_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + yylval->str = ralloc_strdup (yyextra, yytext); \ + RETURN_TOKEN_NEVER_SKIP (token); \ + } \ + } while(0) + + +/* Update all state necessary for each token being returned. + * + * Here we'll be tracking newlines and spaces so that the lexer can + * alter its behavior as necessary, (for example, '#' has special + * significance if it is the first non-whitespace, non-comment token + * in a line, but does not otherwise). + * + * NOTE: If this function returns FALSE, then no token should be + * returned at all. This is used to suprress duplicate SPACE tokens. + */ +static int +glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) +{ + /* After the first non-space token in a line, we won't + * allow any '#' to introduce a directive. */ + if (token == NEWLINE) { + parser->first_non_space_token_this_line = 1; + } else if (token != SPACE) { + parser->first_non_space_token_this_line = 0; + } + + /* Track newlines just to know whether a newline needs + * to be inserted if end-of-file comes early. */ + if (token == NEWLINE) { + parser->last_token_was_newline = 1; + } else { + parser->last_token_was_newline = 0; + } + + /* Track spaces to avoid emitting multiple SPACE + * tokens in a row. */ + if (token == SPACE) { + if (! parser->last_token_was_space) { + parser->last_token_was_space = 1; + return 1; + } else { + parser->last_token_was_space = 1; + return 0; + } + } else { + parser->last_token_was_space = 0; + return 1; + } +} + + %} %option bison-bridge bison-locations reentrant noyywrap @@ -67,14 +160,19 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner); %option prefix="glcpp_" %option stack %option never-interactive +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ -%x DONE COMMENT UNREACHABLE SKIP DEFINE NEWLINE_CATCHUP +%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE SPACE [[:space:]] NONSPACE [^[:space:]] -NEWLINE [\n] HSPACE [ \t] -HASH ^{HSPACE}*#{HSPACE}* +HASH # +NEWLINE (\r\n|\n\r|\r|\n) IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] @@ -111,270 +209,357 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? parser->commented_newlines--; if (parser->commented_newlines == 0) BEGIN INITIAL; - return NEWLINE; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } - /* The handling of the SKIP vs INITIAL start states requires - * some special handling. Typically, a lexer would change - * start states with statements like "BEGIN SKIP" within the - * lexer rules. We can't get away with that here, since we - * need the parser to actually evaluate expressions for - * directives like "#if". + /* Set up the parser->skipping bit here before doing any lexing. + * + * This bit controls whether tokens are skipped, (as implemented by + * RETURN_TOKEN), such as between "#if 0" and "#endif". * - * So, here, in code that will be executed on every call to - * the lexer,and before any rules, we examine the skip_stack - * as set by the parser to know whether to change from INITIAL - * to SKIP or from SKIP back to INITIAL. + * The parser maintains a skip_stack indicating whether we should be + * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will + * push and pop items from the stack. * - * Three cases cause us to switch out of the SKIP state and - * back to the INITIAL state: + * Here are the rules for determining whether we are skipping: * - * 1. The top of the skip_stack is of type SKIP_NO_SKIP - * This means we're still evaluating some #if - * hierarchy, but we're on a branch of it where - * content should not be skipped (such as "#if 1" or - * "#else" or so). + * 1. If the skip stack is NULL, we are outside of all #if blocks + * and we are not skipping. * - * 2. The skip_stack is NULL meaning that we've reached - * the last #endif. + * 2. If the skip stack is non-NULL, the type of the top node in + * the stack determines whether to skip. A type of + * SKIP_NO_SKIP is used for blocks wheere we are emitting + * tokens, (such as between #if 1 and #endif, or after the + * #else of an #if 0, etc.). * - * 3. The lexing_directive bit is set. This indicates that we are - * lexing a pre-processor directive, (such as #if, #elif, or - * #else). For the #if and #elif directives we always need to - * parse the conditions, (even if otherwise within an #if - * 0). And for #else, we want to be able to generate an error - * if any garbage follows #else. + * 3. The lexing_directive bit overrides the skip stack. This bit + * is set when we are actively lexing the expression for a + * pre-processor condition, (such as #if, #elif, or #else). In + * this case, even if otherwise skipping, we need to emit the + * tokens for this condition so that the parser can evaluate + * the expression. (For, #else, there's no expression, but we + * emit tokens so the parser can generate a nice error message + * if there are any tokens here). */ - if (YY_START == INITIAL || YY_START == SKIP) { - if (parser->lexing_directive || - parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - BEGIN INITIAL; - } else { - BEGIN SKIP; - } + if (parser->skip_stack && + parser->skip_stack->type != SKIP_NO_SKIP && + ! parser->lexing_directive) + { + parser->skipping = 1; + } else { + parser->skipping = 0; } /* Single-line comments */ -"//"[^\n]* { +<INITIAL,DEFINE,HASH>"//"[^\r\n]* { } /* Multi-line comments */ -"/*" { yy_push_state(COMMENT, yyscanner); } -<COMMENT>[^*\n]* -<COMMENT>[^*\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; } -<COMMENT>"*"+[^*/\n]* -<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; } +<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); } +<COMMENT>[^*\r\n]* +<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +<COMMENT>"*"+[^*/\r\n]* +<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } <COMMENT>"*"+"/" { yy_pop_state(yyscanner); - if (yyextra->space_tokens) - return SPACE; + /* In the <HASH> start condition, we don't want any SPACE token. */ + if (yyextra->space_tokens && YY_START != HASH) + RETURN_TOKEN (SPACE); +} + +{HASH} { + + /* If the '#' is the first non-whitespace, non-comment token on this + * line, then it introduces a directive, switch to the <HASH> start + * condition. + * + * Otherwise, this is just punctuation, so return the HASH_TOKEN + * token. */ + if (parser->first_non_space_token_this_line) { + BEGIN HASH; + } + + RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); } -{HASH}version{HSPACE}+ { - yylval->str = ralloc_strdup (yyextra, yytext); +<HASH>version{HSPACE}+ { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_VERSION; + RETURN_STRING_TOKEN (VERSION_TOKEN); +} + + /* Swallow empty #pragma directives, (to avoid confusing the + * downstream compiler). */ +<HASH>pragma{HSPACE}*/{NEWLINE} { + BEGIN INITIAL; } /* glcpp doesn't handle #extension, #version, or #pragma directives. * Simply pass them through to the main compiler's lexer/parser. */ -{HASH}(extension|pragma)[^\n]* { - if (parser->commented_newlines) - BEGIN NEWLINE_CATCHUP; - yylval->str = ralloc_strdup (yyextra, yytext); - yylineno++; - yycolumn = 0; - return OTHER; +<HASH>(extension|pragma)[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (PRAGMA); } -{HASH}line{HSPACE}+ { - return HASH_LINE; +<HASH>line{HSPACE}+ { + BEGIN INITIAL; + RETURN_TOKEN (LINE); } -<SKIP,INITIAL>{ -{HASH}ifdef { +<HASH>{NEWLINE} { + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); +} + + /* For the pre-processor directives, we return these tokens + * even when we are otherwise skipping. */ +<HASH>ifdef { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IFDEF; + RETURN_TOKEN_NEVER_SKIP (IFDEF); } -{HASH}ifndef { +<HASH>ifndef { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IFNDEF; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); } -{HASH}if/[^_a-zA-Z0-9] { +<HASH>if/[^_a-zA-Z0-9] { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_IF; + RETURN_TOKEN_NEVER_SKIP (IF); } -{HASH}elif/[^_a-zA-Z0-9] { +<HASH>elif/[^_a-zA-Z0-9] { + BEGIN INITIAL; yyextra->lexing_directive = 1; yyextra->space_tokens = 0; - return HASH_ELIF; + RETURN_TOKEN_NEVER_SKIP (ELIF); } -{HASH}else { +<HASH>else { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_ELSE; + RETURN_TOKEN_NEVER_SKIP (ELSE); } -{HASH}endif { +<HASH>endif { + BEGIN INITIAL; yyextra->space_tokens = 0; - return HASH_ENDIF; -} + RETURN_TOKEN_NEVER_SKIP (ENDIF); } -<SKIP>[^\n] { - if (parser->commented_newlines) - BEGIN NEWLINE_CATCHUP; +<HASH>error[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (ERROR_TOKEN); } -{HASH}error.* { - char *p; - for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */ - p += 5; /* skip "error" */ - glcpp_error(yylloc, yyextra, "#error%s", p); + /* After we see a "#define" we enter the <DEFINE> start state + * for the lexer. Within <DEFINE> we are looking for the first + * identifier and specifically checking whether the identifier + * is followed by a '(' or not, (to lex either a + * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). + * + * While in the <DEFINE> state we also need to explicitly + * handle a few other things that may appear before the + * identifier: + * + * * Comments, (handled above with the main support for + * comments). + * + * * Whitespace (simply ignored) + * + * * Anything else, (not an identifier, not a comment, + * and not whitespace). This will generate an error. + */ +<HASH>define{HSPACE}* { + if (! parser->skipping) { + BEGIN DEFINE; + yyextra->space_tokens = 0; + RETURN_TOKEN (DEFINE_TOKEN); + } } -{HASH}define{HSPACE}+ { +<HASH>undef { + BEGIN INITIAL; yyextra->space_tokens = 0; - yy_push_state(DEFINE, yyscanner); - return HASH_DEFINE; + RETURN_TOKEN (UNDEF); } +<HASH>{HSPACE}+ { + /* Nothing to do here. Importantly, don't leave the <HASH> + * start condition, since it's legal to have space between the + * '#' and the directive.. */ +} + + /* This will catch any non-directive garbage after a HASH */ +<HASH>{NONSPACE} { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); +} + + /* An identifier immediately followed by '(' */ <DEFINE>{IDENTIFIER}/"(" { - yy_pop_state(yyscanner); - yylval->str = ralloc_strdup (yyextra, yytext); - return FUNC_IDENTIFIER; + BEGIN INITIAL; + RETURN_STRING_TOKEN (FUNC_IDENTIFIER); } + /* An identifier not immediately followed by '(' */ <DEFINE>{IDENTIFIER} { - yy_pop_state(yyscanner); - yylval->str = ralloc_strdup (yyextra, yytext); - return OBJ_IDENTIFIER; + BEGIN INITIAL; + RETURN_STRING_TOKEN (OBJ_IDENTIFIER); } -{HASH}undef { - yyextra->space_tokens = 0; - return HASH_UNDEF; + /* Whitespace */ +<DEFINE>{HSPACE}+ { + /* Just ignore it. Nothing to do here. */ } -{HASH} { - yyextra->space_tokens = 0; - return HASH; + /* '/' not followed by '*', so not a comment. This is an error. */ +<DEFINE>[/][^*]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + + /* A character that can't start an identifier, comment, or + * space. This is an error. */ +<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); } {DECIMAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {OCTAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } {HEXADECIMAL_INTEGER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return INTEGER_STRING; + RETURN_STRING_TOKEN (INTEGER_STRING); } "<<" { - return LEFT_SHIFT; + RETURN_TOKEN (LEFT_SHIFT); } ">>" { - return RIGHT_SHIFT; + RETURN_TOKEN (RIGHT_SHIFT); } "<=" { - return LESS_OR_EQUAL; + RETURN_TOKEN (LESS_OR_EQUAL); } ">=" { - return GREATER_OR_EQUAL; + RETURN_TOKEN (GREATER_OR_EQUAL); } "==" { - return EQUAL; + RETURN_TOKEN (EQUAL); } "!=" { - return NOT_EQUAL; + RETURN_TOKEN (NOT_EQUAL); } "&&" { - return AND; + RETURN_TOKEN (AND); } "||" { - return OR; + RETURN_TOKEN (OR); +} + +"++" { + RETURN_TOKEN (PLUS_PLUS); +} + +"--" { + RETURN_TOKEN (MINUS_MINUS); } "##" { - if (parser->is_gles) - glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); - return PASTE; + if (! parser->skipping) { + if (parser->is_gles) + glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); + RETURN_TOKEN (PASTE); + } } "defined" { - return DEFINED; + RETURN_TOKEN (DEFINED); } {IDENTIFIER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return IDENTIFIER; + RETURN_STRING_TOKEN (IDENTIFIER); } {PP_NUMBER} { - yylval->str = ralloc_strdup (yyextra, yytext); - return OTHER; + RETURN_STRING_TOKEN (OTHER); } {PUNCTUATION} { - return yytext[0]; + RETURN_TOKEN (yytext[0]); } {OTHER}+ { - yylval->str = ralloc_strdup (yyextra, yytext); - return OTHER; + RETURN_STRING_TOKEN (OTHER); } {HSPACE} { if (yyextra->space_tokens) { - return SPACE; + RETURN_TOKEN (SPACE); } } -<SKIP,INITIAL>\n { + /* We preserve all newlines, even between #if 0..#endif, so no + skipping.. */ +<*>{NEWLINE} { if (parser->commented_newlines) { BEGIN NEWLINE_CATCHUP; + } else { + BEGIN INITIAL; } + yyextra->space_tokens = 1; yyextra->lexing_directive = 0; yylineno++; yycolumn = 0; - return NEWLINE; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); } - /* Handle missing newline at EOF. */ -<INITIAL><<EOF>> { +<INITIAL,COMMENT,DEFINE,HASH><<EOF>> { + if (YY_START == COMMENT) + glcpp_error(yylloc, yyextra, "Unterminated comment"); BEGIN DONE; /* Don't keep matching this rule forever. */ yyextra->lexing_directive = 0; - return NEWLINE; + if (! parser->last_token_was_newline) + RETURN_TOKEN (NEWLINE); } + /* This is a catch-all to avoid the annoying default flex action which + * matches any character and prints it. If any input ever matches this + * rule, then we have made a mistake above and need to fix one or more + * of the preceding patterns to match that input. */ + +<*>. { + glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); + /* We don't actually use the UNREACHABLE start condition. We - only have this action here so that we can pretend to call some + only have this block here so that we can pretend to call some generated functions, (to avoid "defined but not used" warnings. */ -<UNREACHABLE>. { - unput('.'); - yy_top_state(yyextra); + if (YY_START == UNREACHABLE) { + unput('.'); + yy_top_state(yyextra); + } } %% |