]> git.corax.cc Git - ccc/commitdiff
lex: Add token types for keywords lexer unstable
authorMatthias Kruk <m@m10k.eu>
Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
committerMatthias Kruk <m@m10k.eu>
Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
src/lex.c
src/main.c
src/token.c
src/token.h

index dc0366edc161b4d6c0fda7a52db83fffb30f6602..a0613c67c07bc3088334f3aa447ad0408498c421 100644 (file)
--- a/src/lex.c
+++ b/src/lex.c
@@ -50,6 +50,46 @@ struct token **_tokens;
 int _num_tokens;
 int _cur_token;
 
+struct {
+       const char *keyword;
+       token_type_t type;
+} _keywords[] = {
+       { "auto", TOKEN_AUTO },
+       { "register", TOKEN_REGISTER },
+       { "static", TOKEN_STATIC },
+       { "extern", TOKEN_EXTERN },
+       { "typedef", TOKEN_TYPEDEF },
+       { "void", TOKEN_VOID },
+       { "char", TOKEN_CHAR },
+       { "short", TOKEN_SHORT },
+       { "int", TOKEN_INT },
+       { "long", TOKEN_LONG },
+       { "float", TOKEN_FLOAT },
+       { "double", TOKEN_DOUBLE },
+       { "signed", TOKEN_SIGNED },
+       { "unsigned", TOKEN_UNSIGNED },
+       { "const", TOKEN_CONST },
+       { "volatile", TOKEN_VOLATILE },
+       { "restrict", TOKEN_RESTRICT },
+       { "struct", TOKEN_STRUCT },
+       { "union", TOKEN_UNION },
+       { "enum", TOKEN_ENUM },
+       { "goto", TOKEN_GOTO },
+       { "case", TOKEN_CASE },
+       { "break", TOKEN_BREAK },
+       { "continue", TOKEN_CONTINUE },
+       { "default", TOKEN_DEFAULT },
+       { "if", TOKEN_IF },
+       { "else", TOKEN_ELSE },
+       { "switch", TOKEN_SWITCH },
+       { "while", TOKEN_WHILE },
+       { "do", TOKEN_DO },
+       { "for", TOKEN_FOR },
+       { "return", TOKEN_RETURN },
+       { "sizeof", TOKEN_SIZEOF },
+       { NULL, TOKEN_INVALID }
+};
+
 struct tokenlist {
        struct token *token;
        struct tokenlist *next;
@@ -112,6 +152,22 @@ static int _putnextchar(const char c)
        return(ret_val);
 }
 
+static void _identify_token(struct token *t)
+{
+       int i;
+
+       assert(t->type == TOKEN_IDENTIFIER);
+
+       for(i = 0; _keywords[i].keyword; i++) {
+               if(token_cmp(t, _keywords[i].keyword) == 0) {
+                       t->type = _keywords[i].type;
+                       break;
+               }
+       }
+
+       return;
+}
+
 struct token *_nexttoken(void)
 {
        struct token *tok;
@@ -421,7 +477,7 @@ struct token *_nexttoken(void)
 
                case STATE_CHR:
                        if(!tok) {
-                               tok = token_new_from_str(TOKEN_CHAR, _line, _col, "'");
+                               tok = token_new_from_str(TOKEN_CHAR_LITERAL, _line, _col, "'");
                                assert(tok);
                        }
 
@@ -445,6 +501,7 @@ struct token *_nexttoken(void)
                                token_append_char(tok, c);
                        } else {
                                _putnextchar(c);
+                               _identify_token(tok);
                                return(tok);
                        }
 
index 88032c043c75a1c44a9627e6959719854502bb91..3b2f0d69c2c6a34414c58a90cc2556ca8fdd0ac7 100644 (file)
@@ -7,7 +7,8 @@ int main(int argc, char *argv[])
        struct token *tok;
 
        while((tok = lex_gettoken())) {
-               printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
+               printf("Token at %4d:%3d: \"%s\" %d\n", tok->line, tok->column,
+                      str_value(tok->value), tok->type);
                token_free(tok);
        }
 
index 2c2ebb2a8f28cc6fac1d7943bb59ad9c97471249..03b1bdbc7df095cbcbb03a42e03ee9b857be238c 100644 (file)
@@ -114,3 +114,11 @@ const char *token_getvalue(struct token *tok)
 {
        return(str_value(tok->value));
 }
+
+int token_cmp(struct token *tok, const char *str)
+{
+       const char *value;
+
+       value = str_value(tok->value);
+       return(strcmp(value, str));
+}
index c353dbb88d8b426c6ab0c6fc19d21350096ea60b..c40efe302bb7eb2e62cd71c0ace692f0b32d9da9 100644 (file)
@@ -17,7 +17,7 @@ typedef enum {
        TOKEN_ASSIGN_SHR,
        TOKEN_ASSIGN_SUB,
        TOKEN_ASSIGN_XOR,
-       TOKEN_CHAR,
+       TOKEN_CHAR_LITERAL,
        TOKEN_COLON,
        TOKEN_COMMA,
        TOKEN_COMMENT,
@@ -53,7 +53,42 @@ typedef enum {
        TOKEN_SHL,
        TOKEN_SHR,
        TOKEN_STRING,
-       TOKEN_SUB
+       TOKEN_SUB,
+
+       /* keywords */
+       TOKEN_AUTO,
+       TOKEN_REGISTER,
+       TOKEN_STATIC,
+       TOKEN_EXTERN,
+       TOKEN_TYPEDEF,
+       TOKEN_VOID,
+       TOKEN_CHAR,
+       TOKEN_SHORT,
+       TOKEN_INT,
+       TOKEN_LONG,
+       TOKEN_FLOAT,
+       TOKEN_DOUBLE,
+       TOKEN_SIGNED,
+       TOKEN_UNSIGNED,
+       TOKEN_CONST,
+       TOKEN_VOLATILE,
+       TOKEN_RESTRICT,
+       TOKEN_STRUCT,
+       TOKEN_UNION,
+       TOKEN_ENUM,
+       TOKEN_GOTO,
+       TOKEN_CASE,
+       TOKEN_BREAK,
+       TOKEN_CONTINUE,
+       TOKEN_DEFAULT,
+       TOKEN_IF,
+       TOKEN_ELSE,
+       TOKEN_SWITCH,
+       TOKEN_WHILE,
+       TOKEN_DO,
+       TOKEN_FOR,
+       TOKEN_RETURN,
+       TOKEN_SIZEOF
 } token_type_t;
 
 struct token {
@@ -71,5 +106,6 @@ void token_free(struct token*);
 int token_setvalue(struct token*, const char*);
 int token_append_str(struct token*, const char*);
 int token_append_char(struct token*, const char);
+int token_cmp(struct token*, const char*);
 
 #endif /* TOKEN_H */