lex: Add token types for keywords

author Matthias Kruk <m@m10k.eu>

Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)

committer Matthias Kruk <m@m10k.eu>

Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
author Matthias Kruk <m@m10k.eu>
Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
committer Matthias Kruk <m@m10k.eu>
Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
diff --git a/src/lex.c b/src/lex.c

index dc0366edc161b4d6c0fda7a52db83fffb30f6602..a0613c67c07bc3088334f3aa447ad0408498c421 100644 (file)
--- a/src/lex.c
+++ b/src/lex.c
@@ -50,6 +50,46 @@ struct token **_tokens;
  int _num_tokens;
  int _cur_token;
  
+struct {
+       const char *keyword;
+       token_type_t type;
+} _keywords[] = {
+       { "auto", TOKEN_AUTO },
+       { "register", TOKEN_REGISTER },
+       { "static", TOKEN_STATIC },
+       { "extern", TOKEN_EXTERN },
+       { "typedef", TOKEN_TYPEDEF },
+       { "void", TOKEN_VOID },
+       { "char", TOKEN_CHAR },
+       { "short", TOKEN_SHORT },
+       { "int", TOKEN_INT },
+       { "long", TOKEN_LONG },
+       { "float", TOKEN_FLOAT },
+       { "double", TOKEN_DOUBLE },
+       { "signed", TOKEN_SIGNED },
+       { "unsigned", TOKEN_UNSIGNED },
+       { "const", TOKEN_CONST },
+       { "volatile", TOKEN_VOLATILE },
+       { "restrict", TOKEN_RESTRICT },
+       { "struct", TOKEN_STRUCT },
+       { "union", TOKEN_UNION },
+       { "enum", TOKEN_ENUM },
+       { "goto", TOKEN_GOTO },
+       { "case", TOKEN_CASE },
+       { "break", TOKEN_BREAK },
+       { "continue", TOKEN_CONTINUE },
+       { "default", TOKEN_DEFAULT },
+       { "if", TOKEN_IF },
+       { "else", TOKEN_ELSE },
+       { "switch", TOKEN_SWITCH },
+       { "while", TOKEN_WHILE },
+       { "do", TOKEN_DO },
+       { "for", TOKEN_FOR },
+       { "return", TOKEN_RETURN },
+       { "sizeof", TOKEN_SIZEOF },
+       { NULL, TOKEN_INVALID }
+};
+
  struct tokenlist {
         struct token *token;
         struct tokenlist *next;
@@ -112,6 +152,22 @@ static int _putnextchar(const char c)
         return(ret_val);
  }
  
+static void _identify_token(struct token *t)
+{
+       int i;
+
+       assert(t->type == TOKEN_IDENTIFIER);
+
+       for(i = 0; _keywords[i].keyword; i++) {
+               if(token_cmp(t, _keywords[i].keyword) == 0) {
+                       t->type = _keywords[i].type;
+                       break;
+               }
+       }
+
+       return;
+}
+
  struct token *_nexttoken(void)
  {
         struct token *tok;
@@ -421,7 +477,7 @@ struct token *_nexttoken(void)
  
                 case STATE_CHR:
                         if(!tok) {
-                               tok = token_new_from_str(TOKEN_CHAR, _line, _col, "'");
+                               tok = token_new_from_str(TOKEN_CHAR_LITERAL, _line, _col, "'");
                                 assert(tok);
                         }
  
@@ -445,6 +501,7 @@ struct token *_nexttoken(void)
                                 token_append_char(tok, c);
                         } else {
                                 _putnextchar(c);
+                               _identify_token(tok);
                                 return(tok);
                         }
  
diff --git a/src/main.c b/src/main.c

index 88032c043c75a1c44a9627e6959719854502bb91..3b2f0d69c2c6a34414c58a90cc2556ca8fdd0ac7 100644 (file)
--- a/src/main.c
+++ b/src/main.c
@@ -7,7 +7,8 @@ int main(int argc, char *argv[])
         struct token *tok;
  
         while((tok = lex_gettoken())) {
-               printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
+               printf("Token at %4d:%3d: \"%s\" %d\n", tok->line, tok->column,
+                      str_value(tok->value), tok->type);
                 token_free(tok);
         }
  
diff --git a/src/token.c b/src/token.c

index 2c2ebb2a8f28cc6fac1d7943bb59ad9c97471249..03b1bdbc7df095cbcbb03a42e03ee9b857be238c 100644 (file)
--- a/src/token.c
+++ b/src/token.c
@@ -114,3 +114,11 @@ const char *token_getvalue(struct token *tok)
  {
         return(str_value(tok->value));
  }
+
+int token_cmp(struct token *tok, const char *str)
+{
+       const char *value;
+
+       value = str_value(tok->value);
+       return(strcmp(value, str));
+}
diff --git a/src/token.h b/src/token.h

index c353dbb88d8b426c6ab0c6fc19d21350096ea60b..c40efe302bb7eb2e62cd71c0ace692f0b32d9da9 100644 (file)
--- a/src/token.h
+++ b/src/token.h
@@ -17,7 +17,7 @@ typedef enum {
         TOKEN_ASSIGN_SHR,
         TOKEN_ASSIGN_SUB,
         TOKEN_ASSIGN_XOR,
-       TOKEN_CHAR,
+       TOKEN_CHAR_LITERAL,
         TOKEN_COLON,
         TOKEN_COMMA,
         TOKEN_COMMENT,
@@ -53,7 +53,42 @@ typedef enum {
         TOKEN_SHL,
         TOKEN_SHR,
         TOKEN_STRING,
-       TOKEN_SUB
+       TOKEN_SUB,
+
+       /* keywords */
+       TOKEN_AUTO,
+       TOKEN_REGISTER,
+       TOKEN_STATIC,
+       TOKEN_EXTERN,
+       TOKEN_TYPEDEF,
+       TOKEN_VOID,
+       TOKEN_CHAR,
+       TOKEN_SHORT,
+       TOKEN_INT,
+       TOKEN_LONG,
+       TOKEN_FLOAT,
+       TOKEN_DOUBLE,
+       TOKEN_SIGNED,
+       TOKEN_UNSIGNED,
+       TOKEN_CONST,
+       TOKEN_VOLATILE,
+       TOKEN_RESTRICT,
+       TOKEN_STRUCT,
+       TOKEN_UNION,
+       TOKEN_ENUM,
+       TOKEN_GOTO,
+       TOKEN_CASE,
+       TOKEN_BREAK,
+       TOKEN_CONTINUE,
+       TOKEN_DEFAULT,
+       TOKEN_IF,
+       TOKEN_ELSE,
+       TOKEN_SWITCH,
+       TOKEN_WHILE,
+       TOKEN_DO,
+       TOKEN_FOR,
+       TOKEN_RETURN,
+       TOKEN_SIZEOF
  } token_type_t;
  
  struct token {
@@ -71,5 +106,6 @@ void token_free(struct token*);
  int token_setvalue(struct token*, const char*);
  int token_append_str(struct token*, const char*);
  int token_append_char(struct token*, const char);
+int token_cmp(struct token*, const char*);
  
  #endif /* TOKEN_H */
author	Matthias Kruk <m@m10k.eu>
	Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
committer	Matthias Kruk <m@m10k.eu>
	Mon, 1 Jun 2020 01:13:41 +0000 (10:13 +0900)
src/lex.c		patch \| blob \| history
src/main.c		patch \| blob \| history
src/token.c		patch \| blob \| history
src/token.h		patch \| blob \| history