From: Matthias Kruk Date: Thu, 28 May 2020 00:27:28 +0000 (+0900) Subject: tokenize: Implement correct tokenization of comments and operators starting with... X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=1963f5e74460e8abf5141209a9c58fe537c04a70;p=ccc tokenize: Implement correct tokenization of comments and operators starting with < and > --- diff --git a/src/tokenize.c b/src/tokenize.c index 73fe060..2f6841d 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -6,11 +6,16 @@ #include "list.h" #include "token.h" -#define TABWIDTH 8 -#define STATE_NONE 0 -#define STATE_COMMENT 1 -#define STATE_OP 2 -#define STATE_DIV 3 +#define TABWIDTH 8 + +#define STATE_NONE 0 +#define STATE_COMMENT 1 +#define STATE_COMMENT_END 2 +#define STATE_DIV 3 +#define STATE_LT 4 +#define STATE_SHL 5 +#define STATE_GT 6 +#define STATE_SHR 7 #define STATE_DONE 8 @@ -79,167 +84,16 @@ int putnext(const char c) return(ret_val); } -#if 0 -int comment(void) -{ - struct token *tok; - int cstate; - char c; - -#define CSTATE_HEAD 0 -#define CSTATE_BODY 1 -#define CSTATE_FOOT 2 -#define CSTATE_DONE 3 - - cstate = CSTATE_HEAD; - - tok = token_new(TOKEN_COMMENT); - assert(tok); - - token_setpos(tok, _line, _col); - - str_appendc(tok->value, '/'); - - while(cstate != CSTATE_DONE) { - c = getnext(); - - str_appendc(tok->value, c); - - switch(cstate) { - case CSTATE_HEAD: - assert(c == '*'); - cstate = CSTATE_BODY; - break; - - case CSTATE_BODY: - if(c == '*') { - cstate = CSTATE_FOOT; - } - break; - - case CSTATE_FOOT: - if(c == '/') { - cstate = CSTATE_DONE; - } else { - cstate = CSTATE_BODY; - } - - break; - - default: - assert(0); - break; - } - } - - list_append(&tokens, tok); - - return(0); -} - -int div(void) -{ - char c; - - c = getnext(); - -int none(void) -{ - char c; - int ret_val; - char lookahead; - - c = getnext(); - - if(!c) { - return(STATE_DONE); - } - - switch(c) { - case ' ': - case '\t': - case '\n': - ret_val = STATE_NONE; - break; - - case '/': - lookahead = getnext(); - - if(lookahead == '*') { - ret_val = STATE_COMMENT; - } else { - ret_val = STATE_DIV; - } - - putnext(lookahead); - - break; - - default: - ret_val = STATE_NONE; - break; -/* - case '"': - case '\'': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - ret_val = STATE_LITERAL; - break; - */ -/* - case '+': - case '-': - case '*': - case '=': - ret_val = STATE_OP; - break; -*/ -/* - case '<': - case '>': - case '?': - case ',': - case ';': - case ':': - case '!': - case '%': - case '&': - case '(': - case ')': - case '~': - case '^': - case '|': - case '[': - case ']': - case '{': - case '}': -*/ - } - - return(ret_val); -} -#endif /* 0 */ - struct token *nexttoken(void) { struct token *tok; - int state; + char c; state = STATE_NONE; tok = NULL; - while(1) { - char c = getnext(); - + while((c = getnext())) { switch(state) { case STATE_NONE: switch(c) { @@ -247,6 +101,14 @@ struct token *nexttoken(void) state = STATE_DIV; break; + case '<': + state = STATE_LT; + break; + + case '>': + state = STATE_GT; + break; + case '\r': fprintf(stderr, "DOS user, eh?\n"); case ' ': @@ -256,7 +118,7 @@ struct token *nexttoken(void) default: /* unrecognized token */ - fprintf(stderr, "Unrecognized token at %d:%d\n", _line, _col); + fprintf(stderr, "Unrecognized token at %d:%d [%02x]\n", _line, _col, c); return(NULL); } @@ -270,7 +132,6 @@ struct token *nexttoken(void) case '=': return(token_new2(_line, _col, "/=")); - break; default: putnext(c); @@ -287,11 +148,71 @@ struct token *nexttoken(void) str_appendc(tok->value, c); - if(c == '"') { + if(c == '*') { + state = STATE_COMMENT_END; + } + + break; + + case STATE_COMMENT_END: + str_appendc(tok->value, c); + + if(c == '/') { return(tok); } + state = STATE_COMMENT; + break; + + case STATE_LT: + switch(c) { + case '<': + state = STATE_SHL; + break; + + case '=': + return(token_new2(_line, _col, "<=")); + break; + + default: + putnext(c); + return(token_new2(_line, _col, "<")); + } + break; + + case STATE_SHL: + if(c == '=') { + return(token_new2(_line, _col, "<<=")); + } + + putnext(c); + return(token_new2(_line, _col, "<<")); + + case STATE_GT: + switch(c) { + case '>': + state = STATE_SHR; + break; + + case '=': + return(token_new2(_line, _col, ">=")); + + default: + putnext(c); + return(token_new2(_line, _col, ">")); + } + + break; + + case STATE_SHR: + if(c == '=') { + return(token_new2(_line, _col, ">>=")); + } + + putnext(c); + return(token_new2(_line, _col, ">>")); + } }