]> git.corax.cc Git - ccc/commitdiff
tokenize: Implement tokenization of bitwise operators, string literals, character...
authorMatthias Kruk <m@m10k.eu>
Fri, 29 May 2020 11:01:53 +0000 (20:01 +0900)
committerMatthias Kruk <m@m10k.eu>
Fri, 29 May 2020 11:01:53 +0000 (20:01 +0900)
src/tokenize.c

index 2f6841df71a6c7c4621239c3994a8c74686d1561..88371e2867a908fa066f7a97140834d64d0a209c 100644 (file)
 #define STATE_SHL         5
 #define STATE_GT          6
 #define STATE_SHR         7
+#define STATE_NOT         8
+#define STATE_MOD         9
+#define STATE_AND         10
+#define STATE_OR          11
+#define STATE_XOR         12
+#define STATE_ASSIGN      13
+#define STATE_STRING      14
+#define STATE_STRING_ESC  15
+#define STATE_CHR         16
+#define STATE_CHR_ESC     17
 
 #define STATE_DONE    8
 
@@ -109,6 +119,34 @@ struct token *nexttoken(void)
                                state = STATE_GT;
                                break;
 
+                       case '!':
+                               state = STATE_NOT;
+                               break;
+
+                       case '%':
+                               state = STATE_MOD;
+                               break;
+
+                       case '&':
+                               state = STATE_AND;
+                               break;
+
+                       case '|':
+                               state = STATE_OR;
+                               break;
+
+                       case '^':
+                               state = STATE_XOR;
+                               break;
+
+                       case '=':
+                               state = STATE_ASSIGN;
+                               break;
+
+                       case '"':
+                               state = STATE_STRING;
+                               break;
+                               
                        case '\r':
                                fprintf(stderr, "DOS user, eh?\n");
                        case ' ':
@@ -213,14 +251,111 @@ struct token *nexttoken(void)
                        putnext(c);
                        return(token_new2(_line, _col, ">>"));
 
+               case STATE_NOT:
+                       if(c == '=') {
+                               return(token_new2(_line, _col, "!="));
+                       }
+
+                       putnext(c);
+                       return(token_new2(_line, _col, "!"));
+
+               case STATE_MOD:
+                       if(c == '=') {
+                               return(token_new2(_line, _col, "%="));
+                       }
+
+                       putnext(c);
+                       return(token_new2(_line, _col, "%"));
+
+               case STATE_AND:
+                       switch(c) {
+                       case '&':
+                               return(token_new2(_line, _col, "&&"));
+
+                       case '=':
+                               return(token_new2(_line, _col, "&="));
+
+                       default:
+                               putnext(c);
+                               return(token_new2(_line, _col, "&"));
+                       }
+
+               case STATE_OR:
+                       switch(c) {
+                       case '|':
+                               return(token_new2(_line, _col, "||"));
+
+                       case '=':
+                               return(token_new2(_line, _col, "|="));
+
+                       default:
+                               putnext(c);
+                               return(token_new2(_line, _col, "|"));
+                       }
+
+               case STATE_XOR:
+                       if(c == '=') {
+                               return(token_new2(_line, _col, "^="));
+                       }
+
+                       putnext(c);
+                       return(token_new2(_line, _col, "^"));
+
+               case STATE_ASSIGN:
+                       if(c == '=') {
+                               return(token_new2(_line, _col, "=="));
+                       }
+
+                       putnext(c);
+                       return(token_new2(_line, _col, "="));
+
+               case STATE_STRING:
+                       if(!tok) {
+                               tok = token_new2(_line, _col, "\"");
+                               assert(tok);
+                       }
+
+                       str_appendc(tok->value, c);
+                       
+                       if(c == '\\') {
+                               state = STATE_STRING_ESC;
+                       } else if(c == '"') {
+                               return(tok);
+                       }
+
+                       break;
+
+               case STATE_STRING_ESC:
+                       str_appendc(tok->value, c);
+                       state = STATE_STRING;
+                       break;
+
+               case STATE_CHR:
+                       if(!tok) {
+                               tok = token_new2(_line, _col, "'");
+                               assert(tok);
+                       }
+
+                       str_appendc(tok->value, c);
+
+                       if(c == '\\') {
+                               state = STATE_CHR_ESC;
+                       } else if(c == '\'') {
+                               return(tok);
+                       }
+
+                       break;
+
+               case STATE_CHR_ESC:
+                       str_appendc(tok->value, c);
+                       state = STATE_CHR;
+                       break;
                }
        }
 
        return(NULL);
 }
 
-
-
 int main(int argc, char *argv[])
 {
        int ret_val;