From: Matthias Kruk Date: Fri, 29 May 2020 11:01:53 +0000 (+0900) Subject: tokenize: Implement tokenization of bitwise operators, string literals, character... X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=aadf1928ba9daf1a49e5d428dfbef936086fa61f;p=ccc tokenize: Implement tokenization of bitwise operators, string literals, character literals, and some others --- diff --git a/src/tokenize.c b/src/tokenize.c index 2f6841d..88371e2 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -16,6 +16,16 @@ #define STATE_SHL 5 #define STATE_GT 6 #define STATE_SHR 7 +#define STATE_NOT 8 +#define STATE_MOD 9 +#define STATE_AND 10 +#define STATE_OR 11 +#define STATE_XOR 12 +#define STATE_ASSIGN 13 +#define STATE_STRING 14 +#define STATE_STRING_ESC 15 +#define STATE_CHR 16 +#define STATE_CHR_ESC 17 #define STATE_DONE 8 @@ -109,6 +119,34 @@ struct token *nexttoken(void) state = STATE_GT; break; + case '!': + state = STATE_NOT; + break; + + case '%': + state = STATE_MOD; + break; + + case '&': + state = STATE_AND; + break; + + case '|': + state = STATE_OR; + break; + + case '^': + state = STATE_XOR; + break; + + case '=': + state = STATE_ASSIGN; + break; + + case '"': + state = STATE_STRING; + break; + case '\r': fprintf(stderr, "DOS user, eh?\n"); case ' ': @@ -213,14 +251,111 @@ struct token *nexttoken(void) putnext(c); return(token_new2(_line, _col, ">>")); + case STATE_NOT: + if(c == '=') { + return(token_new2(_line, _col, "!=")); + } + + putnext(c); + return(token_new2(_line, _col, "!")); + + case STATE_MOD: + if(c == '=') { + return(token_new2(_line, _col, "%=")); + } + + putnext(c); + return(token_new2(_line, _col, "%")); + + case STATE_AND: + switch(c) { + case '&': + return(token_new2(_line, _col, "&&")); + + case '=': + return(token_new2(_line, _col, "&=")); + + default: + putnext(c); + return(token_new2(_line, _col, "&")); + } + + case STATE_OR: + switch(c) { + case '|': + return(token_new2(_line, _col, "||")); + + case '=': + return(token_new2(_line, _col, "|=")); + + default: + putnext(c); + return(token_new2(_line, _col, "|")); + } + + case STATE_XOR: + if(c == '=') { + return(token_new2(_line, _col, "^=")); + } + + putnext(c); + return(token_new2(_line, _col, "^")); + + case STATE_ASSIGN: + if(c == '=') { + return(token_new2(_line, _col, "==")); + } + + putnext(c); + return(token_new2(_line, _col, "=")); + + case STATE_STRING: + if(!tok) { + tok = token_new2(_line, _col, "\""); + assert(tok); + } + + str_appendc(tok->value, c); + + if(c == '\\') { + state = STATE_STRING_ESC; + } else if(c == '"') { + return(tok); + } + + break; + + case STATE_STRING_ESC: + str_appendc(tok->value, c); + state = STATE_STRING; + break; + + case STATE_CHR: + if(!tok) { + tok = token_new2(_line, _col, "'"); + assert(tok); + } + + str_appendc(tok->value, c); + + if(c == '\\') { + state = STATE_CHR_ESC; + } else if(c == '\'') { + return(tok); + } + + break; + + case STATE_CHR_ESC: + str_appendc(tok->value, c); + state = STATE_CHR; + break; } } return(NULL); } - - int main(int argc, char *argv[]) { int ret_val;