#define STATE_STRING_ESC 15
#define STATE_CHR 16
#define STATE_CHR_ESC 17
+#define STATE_ID 18
+#define STATE_ADD 19
+#define STATE_SUB 20
+#define STATE_MUL 21
+#define STATE_DOT 22
+#define STATE_ZERO 23
+#define STATE_NUM 24
#define STATE_DONE 8
+#define identifier_firstchr(_c) ((_c) == '_' || \
+ (_c) >= 'a' && (_c) <= 'z' || \
+ (_c) >= 'A' && (_c) <= 'Z')
+#define identifier_chr(_c) (identifier_firstchr(_c) || \
+ (_c) >= '0' && (_c) <= '9')
+
static char _next = 0;
static int _state = STATE_NONE;
static int _line = 1;
case '"':
state = STATE_STRING;
break;
-
+
+ case '\'':
+ state = STATE_CHR;
+ break;
+
+ case '*':
+ state = STATE_MUL;
+ break;
+
+ case '+':
+ state = STATE_ADD;
+ break;
+
+ case '-':
+ state = STATE_SUB;
+ break;
+
+ case '.':
+ state = STATE_DOT;
+ break;
+
+ case '~':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ':':
+ case ';':
+ case ',':
+ case '?':
+ return(token_new_from_char(_line, _col, c));
+
case '\r':
fprintf(stderr, "DOS user, eh?\n");
case ' ':
break;
default:
+ if(identifier_firstchr(c)) {
+ /* looks like an identifier */
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_ID;
+ break;
+ } else if(c == '0') {
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_ZERO;
+ break;
+ } else if(c > '0' && c <= '9') {
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_NUM;
+ break;
+ }
+
/* unrecognized token */
- fprintf(stderr, "Unrecognized token at %d:%d [%02x]\n", _line, _col, c);
+ fprintf(stderr, "Unrecognized token at %d:%d ['%c']\n", _line, _col, c);
return(NULL);
}
}
str_appendc(tok->value, c);
-
+
if(c == '\\') {
state = STATE_STRING_ESC;
} else if(c == '"') {
str_appendc(tok->value, c);
state = STATE_CHR;
break;
+
+ case STATE_ID:
+ if(identifier_chr(c)) {
+ str_appendc(tok->value, c);
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
+
+ case STATE_MUL:
+ switch(c) {
+ case '=':
+ return(token_new2(_line, _col, "*="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "*"));
+ }
+
+ case STATE_ADD:
+ switch(c) {
+ case '+':
+ return(token_new2(_line, _col, "++"));
+
+ case '=':
+ return(token_new2(_line, _col, "+="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "+"));
+ }
+
+ case STATE_SUB:
+ switch(c) {
+ case '-':
+ return(token_new2(_line, _col, "--"));
+
+ case '=':
+ return(token_new2(_line, _col, "-="));
+
+ default:
+ putchar(c);
+ return(token_new2(_line, _col, "-"));
+ }
+
+ case STATE_DOT:
+ putnext(c);
+
+ if(c >= '0' && c <= '9') {
+ state = STATE_NUM;
+ } else {
+ return(token_new2(_line, _col, "."));
+ }
+
+ break;
+
+ case STATE_ZERO:
+ if(c == 'x' || c == 'X') {
+ str_appendc(tok->value, 'x');
+ state = STATE_NUM;
+ } else if(c >= '0' && c <= '9') {
+ putnext(c);
+ state = STATE_NUM;
+ } else if(c == '.') {
+
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
+
+ case STATE_NUM:
+ /* FIXME: e, E may be in the middle, (u|U)(l|L|ll|LL) or vice versa may be at the end */
+
+ if(c >= '0' && c <= '9' || c == '.') {
+ str_appendc(tok->value, c);
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
}
}