From: Matthias Kruk Date: Mon, 1 Jun 2020 01:13:41 +0000 (+0900) Subject: lex: Add token types for keywords X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=refs%2Fheads%2Flexer;p=ccc lex: Add token types for keywords --- diff --git a/src/lex.c b/src/lex.c index dc0366e..a0613c6 100644 --- a/src/lex.c +++ b/src/lex.c @@ -50,6 +50,46 @@ struct token **_tokens; int _num_tokens; int _cur_token; +struct { + const char *keyword; + token_type_t type; +} _keywords[] = { + { "auto", TOKEN_AUTO }, + { "register", TOKEN_REGISTER }, + { "static", TOKEN_STATIC }, + { "extern", TOKEN_EXTERN }, + { "typedef", TOKEN_TYPEDEF }, + { "void", TOKEN_VOID }, + { "char", TOKEN_CHAR }, + { "short", TOKEN_SHORT }, + { "int", TOKEN_INT }, + { "long", TOKEN_LONG }, + { "float", TOKEN_FLOAT }, + { "double", TOKEN_DOUBLE }, + { "signed", TOKEN_SIGNED }, + { "unsigned", TOKEN_UNSIGNED }, + { "const", TOKEN_CONST }, + { "volatile", TOKEN_VOLATILE }, + { "restrict", TOKEN_RESTRICT }, + { "struct", TOKEN_STRUCT }, + { "union", TOKEN_UNION }, + { "enum", TOKEN_ENUM }, + { "goto", TOKEN_GOTO }, + { "case", TOKEN_CASE }, + { "break", TOKEN_BREAK }, + { "continue", TOKEN_CONTINUE }, + { "default", TOKEN_DEFAULT }, + { "if", TOKEN_IF }, + { "else", TOKEN_ELSE }, + { "switch", TOKEN_SWITCH }, + { "while", TOKEN_WHILE }, + { "do", TOKEN_DO }, + { "for", TOKEN_FOR }, + { "return", TOKEN_RETURN }, + { "sizeof", TOKEN_SIZEOF }, + { NULL, TOKEN_INVALID } +}; + struct tokenlist { struct token *token; struct tokenlist *next; @@ -112,6 +152,22 @@ static int _putnextchar(const char c) return(ret_val); } +static void _identify_token(struct token *t) +{ + int i; + + assert(t->type == TOKEN_IDENTIFIER); + + for(i = 0; _keywords[i].keyword; i++) { + if(token_cmp(t, _keywords[i].keyword) == 0) { + t->type = _keywords[i].type; + break; + } + } + + return; +} + struct token *_nexttoken(void) { struct token *tok; @@ -421,7 +477,7 @@ struct token *_nexttoken(void) case STATE_CHR: if(!tok) { - tok = token_new_from_str(TOKEN_CHAR, _line, _col, "'"); + tok = token_new_from_str(TOKEN_CHAR_LITERAL, _line, _col, "'"); assert(tok); } @@ -445,6 +501,7 @@ struct token *_nexttoken(void) token_append_char(tok, c); } else { _putnextchar(c); + _identify_token(tok); return(tok); } diff --git a/src/main.c b/src/main.c index 88032c0..3b2f0d6 100644 --- a/src/main.c +++ b/src/main.c @@ -7,7 +7,8 @@ int main(int argc, char *argv[]) struct token *tok; while((tok = lex_gettoken())) { - printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value)); + printf("Token at %4d:%3d: \"%s\" %d\n", tok->line, tok->column, + str_value(tok->value), tok->type); token_free(tok); } diff --git a/src/token.c b/src/token.c index 2c2ebb2..03b1bdb 100644 --- a/src/token.c +++ b/src/token.c @@ -114,3 +114,11 @@ const char *token_getvalue(struct token *tok) { return(str_value(tok->value)); } + +int token_cmp(struct token *tok, const char *str) +{ + const char *value; + + value = str_value(tok->value); + return(strcmp(value, str)); +} diff --git a/src/token.h b/src/token.h index c353dbb..c40efe3 100644 --- a/src/token.h +++ b/src/token.h @@ -17,7 +17,7 @@ typedef enum { TOKEN_ASSIGN_SHR, TOKEN_ASSIGN_SUB, TOKEN_ASSIGN_XOR, - TOKEN_CHAR, + TOKEN_CHAR_LITERAL, TOKEN_COLON, TOKEN_COMMA, TOKEN_COMMENT, @@ -53,7 +53,42 @@ typedef enum { TOKEN_SHL, TOKEN_SHR, TOKEN_STRING, - TOKEN_SUB + TOKEN_SUB, + + /* keywords */ + TOKEN_AUTO, + TOKEN_REGISTER, + TOKEN_STATIC, + TOKEN_EXTERN, + TOKEN_TYPEDEF, + TOKEN_VOID, + TOKEN_CHAR, + TOKEN_SHORT, + TOKEN_INT, + TOKEN_LONG, + TOKEN_FLOAT, + TOKEN_DOUBLE, + TOKEN_SIGNED, + TOKEN_UNSIGNED, + TOKEN_CONST, + TOKEN_VOLATILE, + TOKEN_RESTRICT, + TOKEN_STRUCT, + TOKEN_UNION, + TOKEN_ENUM, + TOKEN_GOTO, + TOKEN_CASE, + TOKEN_BREAK, + TOKEN_CONTINUE, + TOKEN_DEFAULT, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_SWITCH, + TOKEN_WHILE, + TOKEN_DO, + TOKEN_FOR, + TOKEN_RETURN, + TOKEN_SIZEOF } token_type_t; struct token { @@ -71,5 +106,6 @@ void token_free(struct token*); int token_setvalue(struct token*, const char*); int token_append_str(struct token*, const char*); int token_append_char(struct token*, const char); +int token_cmp(struct token*, const char*); #endif /* TOKEN_H */