From: Matthias Kruk Date: Sun, 31 May 2020 04:16:17 +0000 (+0900) Subject: lex: Refactor the tokenizer into a lexer module X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=a7b85485f63a2c4256bf6f3ebcf0d5e1067a702e;p=ccc lex: Refactor the tokenizer into a lexer module --- diff --git a/src/Makefile b/src/Makefile index 9560990..1a0b10f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,4 @@ -OBJECTS = str.o token.o list.o lex.o +OBJECTS = str.o token.o list.o lex.o main.o OUTPUT = c3 PHONY = clean diff --git a/src/lex.c b/src/lex.c index c70f5e3..6175fd6 100644 --- a/src/lex.c +++ b/src/lex.c @@ -48,9 +48,9 @@ static int _line = 1; static int _col = 0; static int _pline = 1; static int _pcol = 1; -list_t *tokens = NULL; +static struct token *_next_token = NULL; -int getnext(void) +static int _getnextchar(void) { int ret_val; @@ -90,7 +90,7 @@ int getnext(void) return(ret_val); } -int putnext(const char c) +static int _putnextchar(const char c) { int ret_val; @@ -107,16 +107,20 @@ int putnext(const char c) return(ret_val); } -struct token *nexttoken(void) +struct token *lex_gettoken(void) { struct token *tok; int state; char c; + if(_next_token) { + return(_next_token); + } + state = STATE_NONE; tok = NULL; - while((c = getnext())) { + while((c = _getnextchar())) { switch(state) { case STATE_NONE: switch(c) { @@ -236,7 +240,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "/=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "/")); } @@ -277,7 +281,7 @@ struct token *nexttoken(void) break; default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "<")); } @@ -288,7 +292,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "<<=")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "<<")); case STATE_GT: @@ -301,7 +305,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, ">=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, ">")); } @@ -312,7 +316,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, ">>=")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, ">>")); case STATE_NOT: @@ -320,7 +324,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "!=")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "!")); case STATE_MOD: @@ -328,7 +332,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "%=")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "%")); case STATE_AND: @@ -340,7 +344,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "&=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "&")); } @@ -353,7 +357,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "|=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "|")); } @@ -362,7 +366,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "^=")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "^")); case STATE_ASSIGN: @@ -370,7 +374,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "==")); } - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "=")); case STATE_STRING: @@ -419,7 +423,7 @@ struct token *nexttoken(void) if(identifier_chr(c)) { str_appendc(tok->value, c); } else { - putnext(c); + _putnextchar(c); return(tok); } @@ -431,7 +435,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "*=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "*")); } @@ -444,7 +448,7 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "+=")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "+")); } @@ -460,12 +464,12 @@ struct token *nexttoken(void) return(token_new2(_line, _col, "->")); default: - putnext(c); + _putnextchar(c); return(token_new2(_line, _col, "-")); } case STATE_DOT: - putnext(c); + _putnextchar(c); if(c >= '0' && c <= '9') { state = STATE_NUM; @@ -480,12 +484,12 @@ struct token *nexttoken(void) str_appendc(tok->value, 'x'); state = STATE_NUM; } else if(c >= '0' && c <= '9') { - putnext(c); + _putnextchar(c); state = STATE_NUM; } else if(c == '.') { } else { - putnext(c); + _putnextchar(c); return(tok); } @@ -496,7 +500,7 @@ struct token *nexttoken(void) if(c >= '0' && c <= '9' || c == '.') { str_appendc(tok->value, c); } else { - putnext(c); + _putnextchar(c); return(tok); } @@ -507,15 +511,15 @@ struct token *nexttoken(void) return(tok); } -int main(int argc, char *argv[]) +int lex_puttoken(struct token *tok) { int ret_val; - struct token *tok; - - ret_val = 0; - while((tok = nexttoken())) { - printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value)); + if(_next_token) { + ret_val = -EALREADY; + } else { + _next_token = tok; + ret_val = 0; } return(ret_val); diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..a4fc46e --- /dev/null +++ b/src/lex.h @@ -0,0 +1,9 @@ +#ifndef LEX_H +#define LEX_H + +#include "token.h" + +struct token *lex_gettoken(void); +int lex_puttoken(struct token*); + +#endif /* LEX_H */ diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..88032c0 --- /dev/null +++ b/src/main.c @@ -0,0 +1,15 @@ +#include +#include "lex.h" +#include "str.h" + +int main(int argc, char *argv[]) +{ + struct token *tok; + + while((tok = lex_gettoken())) { + printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value)); + token_free(tok); + } + + return(0); +}