aboutsummaryrefslogtreecommitdiff
path: root/mesalib/src/glsl/glcpp/glcpp-lex.l
diff options
context:
space:
mode:
Diffstat (limited to 'mesalib/src/glsl/glcpp/glcpp-lex.l')
-rw-r--r--mesalib/src/glsl/glcpp/glcpp-lex.l449
1 files changed, 317 insertions, 132 deletions
diff --git a/mesalib/src/glsl/glcpp/glcpp-lex.l b/mesalib/src/glsl/glcpp/glcpp-lex.l
index a1a8e76af..98d500ec0 100644
--- a/mesalib/src/glsl/glcpp/glcpp-lex.l
+++ b/mesalib/src/glsl/glcpp/glcpp-lex.l
@@ -52,14 +52,107 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner);
yylloc->last_column = yycolumn + 1; \
parser->has_new_line_number = 0; \
parser->has_new_source_number = 0; \
- } while(0);
+ } while(0);
#define YY_USER_INIT \
do { \
yylineno = 1; \
- yycolumn = 1; \
+ yycolumn = 0; \
yylloc->source = 0; \
} while(0)
+
+/* It's ugly to have macros that have return statements inside of
+ * them, but flex-based lexer generation is all built around the
+ * return statement.
+ *
+ * To mitigate the ugliness, we defer as much of the logic as possible
+ * to an actual function, not a macro (see
+ * glcpplex_update_state_per_token) and we make the word RETURN
+ * prominent in all of the macros which may return.
+ *
+ * The most-commonly-used macro is RETURN_TOKEN which will perform all
+ * necessary state updates based on the provided token,, then
+ * conditionally return the token. It will not return a token if the
+ * parser is currently skipping tokens, (such as within #if
+ * 0...#else).
+ *
+ * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that
+ * makes the token returning unconditional. This is needed for things
+ * like #if and the tokens of its condition, (since these must be
+ * evaluated by the parser even when otherwise skipping).
+ *
+ * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top
+ * of RETURN_TOKEN that performs a string copy of yytext before the
+ * return.
+ */
+#define RETURN_TOKEN_NEVER_SKIP(token) \
+ do { \
+ if (glcpp_lex_update_state_per_token (parser, token)) \
+ return token; \
+ } while (0)
+
+#define RETURN_TOKEN(token) \
+ do { \
+ if (! parser->skipping) { \
+ RETURN_TOKEN_NEVER_SKIP(token); \
+ } \
+ } while(0)
+
+#define RETURN_STRING_TOKEN(token) \
+ do { \
+ if (! parser->skipping) { \
+ yylval->str = ralloc_strdup (yyextra, yytext); \
+ RETURN_TOKEN_NEVER_SKIP (token); \
+ } \
+ } while(0)
+
+
+/* Update all state necessary for each token being returned.
+ *
+ * Here we'll be tracking newlines and spaces so that the lexer can
+ * alter its behavior as necessary, (for example, '#' has special
+ * significance if it is the first non-whitespace, non-comment token
+ * in a line, but does not otherwise).
+ *
+ * NOTE: If this function returns FALSE, then no token should be
+ * returned at all. This is used to suprress duplicate SPACE tokens.
+ */
+static int
+glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token)
+{
+ /* After the first non-space token in a line, we won't
+ * allow any '#' to introduce a directive. */
+ if (token == NEWLINE) {
+ parser->first_non_space_token_this_line = 1;
+ } else if (token != SPACE) {
+ parser->first_non_space_token_this_line = 0;
+ }
+
+ /* Track newlines just to know whether a newline needs
+ * to be inserted if end-of-file comes early. */
+ if (token == NEWLINE) {
+ parser->last_token_was_newline = 1;
+ } else {
+ parser->last_token_was_newline = 0;
+ }
+
+ /* Track spaces to avoid emitting multiple SPACE
+ * tokens in a row. */
+ if (token == SPACE) {
+ if (! parser->last_token_was_space) {
+ parser->last_token_was_space = 1;
+ return 1;
+ } else {
+ parser->last_token_was_space = 1;
+ return 0;
+ }
+ } else {
+ parser->last_token_was_space = 0;
+ return 1;
+ }
+}
+
+
%}
%option bison-bridge bison-locations reentrant noyywrap
@@ -67,14 +160,19 @@ void glcpp_set_column (int column_no , yyscan_t yyscanner);
%option prefix="glcpp_"
%option stack
%option never-interactive
+%option warn nodefault
+
+ /* Note: When adding any start conditions to this list, you must also
+ * update the "Internal compiler error" catch-all rule near the end of
+ * this file. */
-%x DONE COMMENT UNREACHABLE SKIP DEFINE NEWLINE_CATCHUP
+%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE
SPACE [[:space:]]
NONSPACE [^[:space:]]
-NEWLINE [\n]
HSPACE [ \t]
-HASH ^{HSPACE}*#{HSPACE}*
+HASH #
+NEWLINE (\r\n|\n\r|\r|\n)
IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]*
PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])*
PUNCTUATION [][(){}.&*~!/%<>^|;,=+-]
@@ -111,270 +209,357 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
parser->commented_newlines--;
if (parser->commented_newlines == 0)
BEGIN INITIAL;
- return NEWLINE;
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
- /* The handling of the SKIP vs INITIAL start states requires
- * some special handling. Typically, a lexer would change
- * start states with statements like "BEGIN SKIP" within the
- * lexer rules. We can't get away with that here, since we
- * need the parser to actually evaluate expressions for
- * directives like "#if".
+ /* Set up the parser->skipping bit here before doing any lexing.
+ *
+ * This bit controls whether tokens are skipped, (as implemented by
+ * RETURN_TOKEN), such as between "#if 0" and "#endif".
*
- * So, here, in code that will be executed on every call to
- * the lexer,and before any rules, we examine the skip_stack
- * as set by the parser to know whether to change from INITIAL
- * to SKIP or from SKIP back to INITIAL.
+ * The parser maintains a skip_stack indicating whether we should be
+ * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will
+ * push and pop items from the stack.
*
- * Three cases cause us to switch out of the SKIP state and
- * back to the INITIAL state:
+ * Here are the rules for determining whether we are skipping:
*
- * 1. The top of the skip_stack is of type SKIP_NO_SKIP
- * This means we're still evaluating some #if
- * hierarchy, but we're on a branch of it where
- * content should not be skipped (such as "#if 1" or
- * "#else" or so).
+ * 1. If the skip stack is NULL, we are outside of all #if blocks
+ * and we are not skipping.
*
- * 2. The skip_stack is NULL meaning that we've reached
- * the last #endif.
+ * 2. If the skip stack is non-NULL, the type of the top node in
+ * the stack determines whether to skip. A type of
+ * SKIP_NO_SKIP is used for blocks wheere we are emitting
+ * tokens, (such as between #if 1 and #endif, or after the
+ * #else of an #if 0, etc.).
*
- * 3. The lexing_directive bit is set. This indicates that we are
- * lexing a pre-processor directive, (such as #if, #elif, or
- * #else). For the #if and #elif directives we always need to
- * parse the conditions, (even if otherwise within an #if
- * 0). And for #else, we want to be able to generate an error
- * if any garbage follows #else.
+ * 3. The lexing_directive bit overrides the skip stack. This bit
+ * is set when we are actively lexing the expression for a
+ * pre-processor condition, (such as #if, #elif, or #else). In
+ * this case, even if otherwise skipping, we need to emit the
+ * tokens for this condition so that the parser can evaluate
+ * the expression. (For, #else, there's no expression, but we
+ * emit tokens so the parser can generate a nice error message
+ * if there are any tokens here).
*/
- if (YY_START == INITIAL || YY_START == SKIP) {
- if (parser->lexing_directive ||
- parser->skip_stack == NULL ||
- parser->skip_stack->type == SKIP_NO_SKIP)
- {
- BEGIN INITIAL;
- } else {
- BEGIN SKIP;
- }
+ if (parser->skip_stack &&
+ parser->skip_stack->type != SKIP_NO_SKIP &&
+ ! parser->lexing_directive)
+ {
+ parser->skipping = 1;
+ } else {
+ parser->skipping = 0;
}
/* Single-line comments */
-"//"[^\n]* {
+<INITIAL,DEFINE,HASH>"//"[^\r\n]* {
}
/* Multi-line comments */
-"/*" { yy_push_state(COMMENT, yyscanner); }
-<COMMENT>[^*\n]*
-<COMMENT>[^*\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
-<COMMENT>"*"+[^*/\n]*
-<COMMENT>"*"+[^*/\n]*\n { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<INITIAL,DEFINE,HASH>"/*" { yy_push_state(COMMENT, yyscanner); }
+<COMMENT>[^*\r\n]*
+<COMMENT>[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
+<COMMENT>"*"+[^*/\r\n]*
+<COMMENT>"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; }
<COMMENT>"*"+"/" {
yy_pop_state(yyscanner);
- if (yyextra->space_tokens)
- return SPACE;
+ /* In the <HASH> start condition, we don't want any SPACE token. */
+ if (yyextra->space_tokens && YY_START != HASH)
+ RETURN_TOKEN (SPACE);
+}
+
+{HASH} {
+
+ /* If the '#' is the first non-whitespace, non-comment token on this
+ * line, then it introduces a directive, switch to the <HASH> start
+ * condition.
+ *
+ * Otherwise, this is just punctuation, so return the HASH_TOKEN
+ * token. */
+ if (parser->first_non_space_token_this_line) {
+ BEGIN HASH;
+ }
+
+ RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN);
}
-{HASH}version{HSPACE}+ {
- yylval->str = ralloc_strdup (yyextra, yytext);
+<HASH>version{HSPACE}+ {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- return HASH_VERSION;
+ RETURN_STRING_TOKEN (VERSION_TOKEN);
+}
+
+ /* Swallow empty #pragma directives, (to avoid confusing the
+ * downstream compiler). */
+<HASH>pragma{HSPACE}*/{NEWLINE} {
+ BEGIN INITIAL;
}
/* glcpp doesn't handle #extension, #version, or #pragma directives.
* Simply pass them through to the main compiler's lexer/parser. */
-{HASH}(extension|pragma)[^\n]* {
- if (parser->commented_newlines)
- BEGIN NEWLINE_CATCHUP;
- yylval->str = ralloc_strdup (yyextra, yytext);
- yylineno++;
- yycolumn = 0;
- return OTHER;
+<HASH>(extension|pragma)[^\r\n]* {
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (PRAGMA);
}
-{HASH}line{HSPACE}+ {
- return HASH_LINE;
+<HASH>line{HSPACE}+ {
+ BEGIN INITIAL;
+ RETURN_TOKEN (LINE);
}
-<SKIP,INITIAL>{
-{HASH}ifdef {
+<HASH>{NEWLINE} {
+ BEGIN INITIAL;
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
+}
+
+ /* For the pre-processor directives, we return these tokens
+ * even when we are otherwise skipping. */
+<HASH>ifdef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- return HASH_IFDEF;
+ RETURN_TOKEN_NEVER_SKIP (IFDEF);
}
-{HASH}ifndef {
+<HASH>ifndef {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- return HASH_IFNDEF;
+ RETURN_TOKEN_NEVER_SKIP (IFNDEF);
}
-{HASH}if/[^_a-zA-Z0-9] {
+<HASH>if/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- return HASH_IF;
+ RETURN_TOKEN_NEVER_SKIP (IF);
}
-{HASH}elif/[^_a-zA-Z0-9] {
+<HASH>elif/[^_a-zA-Z0-9] {
+ BEGIN INITIAL;
yyextra->lexing_directive = 1;
yyextra->space_tokens = 0;
- return HASH_ELIF;
+ RETURN_TOKEN_NEVER_SKIP (ELIF);
}
-{HASH}else {
+<HASH>else {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- return HASH_ELSE;
+ RETURN_TOKEN_NEVER_SKIP (ELSE);
}
-{HASH}endif {
+<HASH>endif {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- return HASH_ENDIF;
-}
+ RETURN_TOKEN_NEVER_SKIP (ENDIF);
}
-<SKIP>[^\n] {
- if (parser->commented_newlines)
- BEGIN NEWLINE_CATCHUP;
+<HASH>error[^\r\n]* {
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (ERROR_TOKEN);
}
-{HASH}error.* {
- char *p;
- for (p = yytext; !isalpha(p[0]); p++); /* skip " # " */
- p += 5; /* skip "error" */
- glcpp_error(yylloc, yyextra, "#error%s", p);
+ /* After we see a "#define" we enter the <DEFINE> start state
+ * for the lexer. Within <DEFINE> we are looking for the first
+ * identifier and specifically checking whether the identifier
+ * is followed by a '(' or not, (to lex either a
+ * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token).
+ *
+ * While in the <DEFINE> state we also need to explicitly
+ * handle a few other things that may appear before the
+ * identifier:
+ *
+ * * Comments, (handled above with the main support for
+ * comments).
+ *
+ * * Whitespace (simply ignored)
+ *
+ * * Anything else, (not an identifier, not a comment,
+ * and not whitespace). This will generate an error.
+ */
+<HASH>define{HSPACE}* {
+ if (! parser->skipping) {
+ BEGIN DEFINE;
+ yyextra->space_tokens = 0;
+ RETURN_TOKEN (DEFINE_TOKEN);
+ }
}
-{HASH}define{HSPACE}+ {
+<HASH>undef {
+ BEGIN INITIAL;
yyextra->space_tokens = 0;
- yy_push_state(DEFINE, yyscanner);
- return HASH_DEFINE;
+ RETURN_TOKEN (UNDEF);
}
+<HASH>{HSPACE}+ {
+ /* Nothing to do here. Importantly, don't leave the <HASH>
+ * start condition, since it's legal to have space between the
+ * '#' and the directive.. */
+}
+
+ /* This will catch any non-directive garbage after a HASH */
+<HASH>{NONSPACE} {
+ BEGIN INITIAL;
+ RETURN_TOKEN (GARBAGE);
+}
+
+ /* An identifier immediately followed by '(' */
<DEFINE>{IDENTIFIER}/"(" {
- yy_pop_state(yyscanner);
- yylval->str = ralloc_strdup (yyextra, yytext);
- return FUNC_IDENTIFIER;
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (FUNC_IDENTIFIER);
}
+ /* An identifier not immediately followed by '(' */
<DEFINE>{IDENTIFIER} {
- yy_pop_state(yyscanner);
- yylval->str = ralloc_strdup (yyextra, yytext);
- return OBJ_IDENTIFIER;
+ BEGIN INITIAL;
+ RETURN_STRING_TOKEN (OBJ_IDENTIFIER);
}
-{HASH}undef {
- yyextra->space_tokens = 0;
- return HASH_UNDEF;
+ /* Whitespace */
+<DEFINE>{HSPACE}+ {
+ /* Just ignore it. Nothing to do here. */
}
-{HASH} {
- yyextra->space_tokens = 0;
- return HASH;
+ /* '/' not followed by '*', so not a comment. This is an error. */
+<DEFINE>[/][^*]{NONSPACE}* {
+ BEGIN INITIAL;
+ glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
+ RETURN_STRING_TOKEN (INTEGER_STRING);
+}
+
+ /* A character that can't start an identifier, comment, or
+ * space. This is an error. */
+<DEFINE>[^_a-zA-Z/[:space:]]{NONSPACE}* {
+ BEGIN INITIAL;
+ glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext);
+ RETURN_STRING_TOKEN (INTEGER_STRING);
}
{DECIMAL_INTEGER} {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return INTEGER_STRING;
+ RETURN_STRING_TOKEN (INTEGER_STRING);
}
{OCTAL_INTEGER} {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return INTEGER_STRING;
+ RETURN_STRING_TOKEN (INTEGER_STRING);
}
{HEXADECIMAL_INTEGER} {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return INTEGER_STRING;
+ RETURN_STRING_TOKEN (INTEGER_STRING);
}
"<<" {
- return LEFT_SHIFT;
+ RETURN_TOKEN (LEFT_SHIFT);
}
">>" {
- return RIGHT_SHIFT;
+ RETURN_TOKEN (RIGHT_SHIFT);
}
"<=" {
- return LESS_OR_EQUAL;
+ RETURN_TOKEN (LESS_OR_EQUAL);
}
">=" {
- return GREATER_OR_EQUAL;
+ RETURN_TOKEN (GREATER_OR_EQUAL);
}
"==" {
- return EQUAL;
+ RETURN_TOKEN (EQUAL);
}
"!=" {
- return NOT_EQUAL;
+ RETURN_TOKEN (NOT_EQUAL);
}
"&&" {
- return AND;
+ RETURN_TOKEN (AND);
}
"||" {
- return OR;
+ RETURN_TOKEN (OR);
+}
+
+"++" {
+ RETURN_TOKEN (PLUS_PLUS);
+}
+
+"--" {
+ RETURN_TOKEN (MINUS_MINUS);
}
"##" {
- if (parser->is_gles)
- glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
- return PASTE;
+ if (! parser->skipping) {
+ if (parser->is_gles)
+ glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES");
+ RETURN_TOKEN (PASTE);
+ }
}
"defined" {
- return DEFINED;
+ RETURN_TOKEN (DEFINED);
}
{IDENTIFIER} {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return IDENTIFIER;
+ RETURN_STRING_TOKEN (IDENTIFIER);
}
{PP_NUMBER} {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return OTHER;
+ RETURN_STRING_TOKEN (OTHER);
}
{PUNCTUATION} {
- return yytext[0];
+ RETURN_TOKEN (yytext[0]);
}
{OTHER}+ {
- yylval->str = ralloc_strdup (yyextra, yytext);
- return OTHER;
+ RETURN_STRING_TOKEN (OTHER);
}
{HSPACE} {
if (yyextra->space_tokens) {
- return SPACE;
+ RETURN_TOKEN (SPACE);
}
}
-<SKIP,INITIAL>\n {
+ /* We preserve all newlines, even between #if 0..#endif, so no
+ skipping.. */
+<*>{NEWLINE} {
if (parser->commented_newlines) {
BEGIN NEWLINE_CATCHUP;
+ } else {
+ BEGIN INITIAL;
}
+ yyextra->space_tokens = 1;
yyextra->lexing_directive = 0;
yylineno++;
yycolumn = 0;
- return NEWLINE;
+ RETURN_TOKEN_NEVER_SKIP (NEWLINE);
}
- /* Handle missing newline at EOF. */
-<INITIAL><<EOF>> {
+<INITIAL,COMMENT,DEFINE,HASH><<EOF>> {
+ if (YY_START == COMMENT)
+ glcpp_error(yylloc, yyextra, "Unterminated comment");
BEGIN DONE; /* Don't keep matching this rule forever. */
yyextra->lexing_directive = 0;
- return NEWLINE;
+ if (! parser->last_token_was_newline)
+ RETURN_TOKEN (NEWLINE);
}
+ /* This is a catch-all to avoid the annoying default flex action which
+ * matches any character and prints it. If any input ever matches this
+ * rule, then we have made a mistake above and need to fix one or more
+ * of the preceding patterns to match that input. */
+
+<*>. {
+ glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext);
+
/* We don't actually use the UNREACHABLE start condition. We
- only have this action here so that we can pretend to call some
+ only have this block here so that we can pretend to call some
generated functions, (to avoid "defined but not used"
warnings. */
-<UNREACHABLE>. {
- unput('.');
- yy_top_state(yyextra);
+ if (YY_START == UNREACHABLE) {
+ unput('.');
+ yy_top_state(yyextra);
+ }
}
%%