-OBJECTS = str.o token.o list.o tokenize.o
-OUTPUT = tokenize
+OBJECTS = str.o token.o list.o lex.o
+OUTPUT = c3
PHONY = clean
all: $(OUTPUT)
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include "str.h"
+#include "list.h"
+#include "token.h"
+
+#define TABWIDTH 8
+
+#define STATE_NONE 0
+#define STATE_COMMENT 1
+#define STATE_COMMENT_END 2
+#define STATE_DIV 3
+#define STATE_LT 4
+#define STATE_SHL 5
+#define STATE_GT 6
+#define STATE_SHR 7
+#define STATE_NOT 8
+#define STATE_MOD 9
+#define STATE_AND 10
+#define STATE_OR 11
+#define STATE_XOR 12
+#define STATE_ASSIGN 13
+#define STATE_STRING 14
+#define STATE_STRING_ESC 15
+#define STATE_CHR 16
+#define STATE_CHR_ESC 17
+#define STATE_ID 18
+#define STATE_ADD 19
+#define STATE_SUB 20
+#define STATE_MUL 21
+#define STATE_DOT 22
+#define STATE_ZERO 23
+#define STATE_NUM 24
+
+#define STATE_DONE 8
+
+#define identifier_firstchr(_c) ((_c) == '_' || \
+ (_c) >= 'a' && (_c) <= 'z' || \
+ (_c) >= 'A' && (_c) <= 'Z')
+#define identifier_chr(_c) (identifier_firstchr(_c) || \
+ (_c) >= '0' && (_c) <= '9')
+
+static char _next = 0;
+static int _state = STATE_NONE;
+static int _line = 1;
+static int _col = 0;
+static int _pline = 1;
+static int _pcol = 1;
+list_t *tokens = NULL;
+
+int getnext(void)
+{
+ int ret_val;
+
+ if(_next) {
+ ret_val = _next;
+ _next = 0;
+ } else {
+ ret_val = getchar();
+
+ if(ret_val == EOF) {
+ ret_val = 0;
+ }
+ }
+
+ switch(ret_val) {
+ case '\n':
+ _pline = _line;
+ _pcol = _col;
+ _line++;
+ _col = 0;
+ break;
+
+ case '\t':
+ _pcol = _col;
+ _col += TABWIDTH;
+ break;
+
+ default:
+ _pcol = _col;
+ _col++;
+ break;
+
+ case 0:
+ break;
+ }
+
+ return(ret_val);
+}
+
+int putnext(const char c)
+{
+ int ret_val;
+
+ if(_next) {
+ ret_val = -EALREADY;
+ } else {
+ _next = c;
+ ret_val = 0;
+
+ _col = _pcol;
+ _line = _pline;
+ }
+
+ return(ret_val);
+}
+
+struct token *nexttoken(void)
+{
+ struct token *tok;
+ int state;
+ char c;
+
+ state = STATE_NONE;
+ tok = NULL;
+
+ while((c = getnext())) {
+ switch(state) {
+ case STATE_NONE:
+ switch(c) {
+ case '/':
+ state = STATE_DIV;
+ break;
+
+ case '<':
+ state = STATE_LT;
+ break;
+
+ case '>':
+ state = STATE_GT;
+ break;
+
+ case '!':
+ state = STATE_NOT;
+ break;
+
+ case '%':
+ state = STATE_MOD;
+ break;
+
+ case '&':
+ state = STATE_AND;
+ break;
+
+ case '|':
+ state = STATE_OR;
+ break;
+
+ case '^':
+ state = STATE_XOR;
+ break;
+
+ case '=':
+ state = STATE_ASSIGN;
+ break;
+
+ case '"':
+ state = STATE_STRING;
+ break;
+
+ case '\'':
+ state = STATE_CHR;
+ break;
+
+ case '*':
+ state = STATE_MUL;
+ break;
+
+ case '+':
+ state = STATE_ADD;
+ break;
+
+ case '-':
+ state = STATE_SUB;
+ break;
+
+ case '.':
+ state = STATE_DOT;
+ break;
+
+ case '~':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case ':':
+ case ';':
+ case ',':
+ case '?':
+ return(token_new_from_char(_line, _col, c));
+
+ case '\r':
+ fprintf(stderr, "DOS user, eh?\n");
+ case ' ':
+ case '\t':
+ case '\n':
+ break;
+
+ default:
+ if(identifier_firstchr(c)) {
+ /* looks like an identifier */
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_ID;
+ break;
+ } else if(c == '0') {
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_ZERO;
+ break;
+ } else if(c > '0' && c <= '9') {
+ tok = token_new_from_char(_line, _col, c);
+ assert(tok);
+ state = STATE_NUM;
+ break;
+ }
+
+ /* unrecognized token */
+ fprintf(stderr, "Unrecognized token at %d:%d ['%c']\n", _line, _col, c);
+ return(NULL);
+ }
+
+ break;
+
+ case STATE_DIV:
+ switch(c) {
+ case '*':
+ state = STATE_COMMENT;
+ break;
+
+ case '=':
+ return(token_new2(_line, _col, "/="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "/"));
+ }
+
+ break;
+
+ case STATE_COMMENT:
+ if(!tok) {
+ tok = token_new2(_line, _col, "/*");
+ assert(tok);
+ }
+
+ str_appendc(tok->value, c);
+
+ if(c == '*') {
+ state = STATE_COMMENT_END;
+ }
+
+ break;
+
+ case STATE_COMMENT_END:
+ str_appendc(tok->value, c);
+
+ if(c == '/') {
+ return(tok);
+ }
+
+ state = STATE_COMMENT;
+ break;
+
+ case STATE_LT:
+ switch(c) {
+ case '<':
+ state = STATE_SHL;
+ break;
+
+ case '=':
+ return(token_new2(_line, _col, "<="));
+ break;
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "<"));
+ }
+
+ break;
+
+ case STATE_SHL:
+ if(c == '=') {
+ return(token_new2(_line, _col, "<<="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, "<<"));
+
+ case STATE_GT:
+ switch(c) {
+ case '>':
+ state = STATE_SHR;
+ break;
+
+ case '=':
+ return(token_new2(_line, _col, ">="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, ">"));
+ }
+
+ break;
+
+ case STATE_SHR:
+ if(c == '=') {
+ return(token_new2(_line, _col, ">>="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, ">>"));
+
+ case STATE_NOT:
+ if(c == '=') {
+ return(token_new2(_line, _col, "!="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, "!"));
+
+ case STATE_MOD:
+ if(c == '=') {
+ return(token_new2(_line, _col, "%="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, "%"));
+
+ case STATE_AND:
+ switch(c) {
+ case '&':
+ return(token_new2(_line, _col, "&&"));
+
+ case '=':
+ return(token_new2(_line, _col, "&="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "&"));
+ }
+
+ case STATE_OR:
+ switch(c) {
+ case '|':
+ return(token_new2(_line, _col, "||"));
+
+ case '=':
+ return(token_new2(_line, _col, "|="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "|"));
+ }
+
+ case STATE_XOR:
+ if(c == '=') {
+ return(token_new2(_line, _col, "^="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, "^"));
+
+ case STATE_ASSIGN:
+ if(c == '=') {
+ return(token_new2(_line, _col, "=="));
+ }
+
+ putnext(c);
+ return(token_new2(_line, _col, "="));
+
+ case STATE_STRING:
+ if(!tok) {
+ tok = token_new2(_line, _col, "\"");
+ assert(tok);
+ }
+
+ str_appendc(tok->value, c);
+
+ if(c == '\\') {
+ state = STATE_STRING_ESC;
+ } else if(c == '"') {
+ return(tok);
+ }
+
+ break;
+
+ case STATE_STRING_ESC:
+ str_appendc(tok->value, c);
+ state = STATE_STRING;
+ break;
+
+ case STATE_CHR:
+ if(!tok) {
+ tok = token_new2(_line, _col, "'");
+ assert(tok);
+ }
+
+ str_appendc(tok->value, c);
+
+ if(c == '\\') {
+ state = STATE_CHR_ESC;
+ } else if(c == '\'') {
+ return(tok);
+ }
+
+ break;
+
+ case STATE_CHR_ESC:
+ str_appendc(tok->value, c);
+ state = STATE_CHR;
+ break;
+
+ case STATE_ID:
+ if(identifier_chr(c)) {
+ str_appendc(tok->value, c);
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
+
+ case STATE_MUL:
+ switch(c) {
+ case '=':
+ return(token_new2(_line, _col, "*="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "*"));
+ }
+
+ case STATE_ADD:
+ switch(c) {
+ case '+':
+ return(token_new2(_line, _col, "++"));
+
+ case '=':
+ return(token_new2(_line, _col, "+="));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "+"));
+ }
+
+ case STATE_SUB:
+ switch(c) {
+ case '-':
+ return(token_new2(_line, _col, "--"));
+
+ case '=':
+ return(token_new2(_line, _col, "-="));
+
+ case '>':
+ return(token_new2(_line, _col, "->"));
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "-"));
+ }
+
+ case STATE_DOT:
+ putnext(c);
+
+ if(c >= '0' && c <= '9') {
+ state = STATE_NUM;
+ } else {
+ return(token_new2(_line, _col, "."));
+ }
+
+ break;
+
+ case STATE_ZERO:
+ if(c == 'x' || c == 'X') {
+ str_appendc(tok->value, 'x');
+ state = STATE_NUM;
+ } else if(c >= '0' && c <= '9') {
+ putnext(c);
+ state = STATE_NUM;
+ } else if(c == '.') {
+
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
+
+ case STATE_NUM:
+ /* FIXME: e, E may be in the middle, (u|U)(l|L|ll|LL) or vice versa may be at the end */
+ if(c >= '0' && c <= '9' || c == '.') {
+ str_appendc(tok->value, c);
+ } else {
+ putnext(c);
+ return(tok);
+ }
+
+ break;
+ }
+ }
+
+ return(tok);
+}
+
+int main(int argc, char *argv[])
+{
+ int ret_val;
+ struct token *tok;
+
+ ret_val = 0;
+
+ while((tok = nexttoken())) {
+ printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
+ }
+
+ return(ret_val);
+}
+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <assert.h>
-#include "str.h"
-#include "list.h"
-#include "token.h"
-
-#define TABWIDTH 8
-
-#define STATE_NONE 0
-#define STATE_COMMENT 1
-#define STATE_COMMENT_END 2
-#define STATE_DIV 3
-#define STATE_LT 4
-#define STATE_SHL 5
-#define STATE_GT 6
-#define STATE_SHR 7
-#define STATE_NOT 8
-#define STATE_MOD 9
-#define STATE_AND 10
-#define STATE_OR 11
-#define STATE_XOR 12
-#define STATE_ASSIGN 13
-#define STATE_STRING 14
-#define STATE_STRING_ESC 15
-#define STATE_CHR 16
-#define STATE_CHR_ESC 17
-#define STATE_ID 18
-#define STATE_ADD 19
-#define STATE_SUB 20
-#define STATE_MUL 21
-#define STATE_DOT 22
-#define STATE_ZERO 23
-#define STATE_NUM 24
-
-#define STATE_DONE 8
-
-#define identifier_firstchr(_c) ((_c) == '_' || \
- (_c) >= 'a' && (_c) <= 'z' || \
- (_c) >= 'A' && (_c) <= 'Z')
-#define identifier_chr(_c) (identifier_firstchr(_c) || \
- (_c) >= '0' && (_c) <= '9')
-
-static char _next = 0;
-static int _state = STATE_NONE;
-static int _line = 1;
-static int _col = 0;
-static int _pline = 1;
-static int _pcol = 1;
-list_t *tokens = NULL;
-
-int getnext(void)
-{
- int ret_val;
-
- if(_next) {
- ret_val = _next;
- _next = 0;
- } else {
- ret_val = getchar();
-
- if(ret_val == EOF) {
- ret_val = 0;
- }
- }
-
- switch(ret_val) {
- case '\n':
- _pline = _line;
- _pcol = _col;
- _line++;
- _col = 0;
- break;
-
- case '\t':
- _pcol = _col;
- _col += TABWIDTH;
- break;
-
- default:
- _pcol = _col;
- _col++;
- break;
-
- case 0:
- break;
- }
-
- return(ret_val);
-}
-
-int putnext(const char c)
-{
- int ret_val;
-
- if(_next) {
- ret_val = -EALREADY;
- } else {
- _next = c;
- ret_val = 0;
-
- _col = _pcol;
- _line = _pline;
- }
-
- return(ret_val);
-}
-
-struct token *nexttoken(void)
-{
- struct token *tok;
- int state;
- char c;
-
- state = STATE_NONE;
- tok = NULL;
-
- while((c = getnext())) {
- switch(state) {
- case STATE_NONE:
- switch(c) {
- case '/':
- state = STATE_DIV;
- break;
-
- case '<':
- state = STATE_LT;
- break;
-
- case '>':
- state = STATE_GT;
- break;
-
- case '!':
- state = STATE_NOT;
- break;
-
- case '%':
- state = STATE_MOD;
- break;
-
- case '&':
- state = STATE_AND;
- break;
-
- case '|':
- state = STATE_OR;
- break;
-
- case '^':
- state = STATE_XOR;
- break;
-
- case '=':
- state = STATE_ASSIGN;
- break;
-
- case '"':
- state = STATE_STRING;
- break;
-
- case '\'':
- state = STATE_CHR;
- break;
-
- case '*':
- state = STATE_MUL;
- break;
-
- case '+':
- state = STATE_ADD;
- break;
-
- case '-':
- state = STATE_SUB;
- break;
-
- case '.':
- state = STATE_DOT;
- break;
-
- case '~':
- case '(':
- case ')':
- case '{':
- case '}':
- case '[':
- case ']':
- case ':':
- case ';':
- case ',':
- case '?':
- return(token_new_from_char(_line, _col, c));
-
- case '\r':
- fprintf(stderr, "DOS user, eh?\n");
- case ' ':
- case '\t':
- case '\n':
- break;
-
- default:
- if(identifier_firstchr(c)) {
- /* looks like an identifier */
- tok = token_new_from_char(_line, _col, c);
- assert(tok);
- state = STATE_ID;
- break;
- } else if(c == '0') {
- tok = token_new_from_char(_line, _col, c);
- assert(tok);
- state = STATE_ZERO;
- break;
- } else if(c > '0' && c <= '9') {
- tok = token_new_from_char(_line, _col, c);
- assert(tok);
- state = STATE_NUM;
- break;
- }
-
- /* unrecognized token */
- fprintf(stderr, "Unrecognized token at %d:%d ['%c']\n", _line, _col, c);
- return(NULL);
- }
-
- break;
-
- case STATE_DIV:
- switch(c) {
- case '*':
- state = STATE_COMMENT;
- break;
-
- case '=':
- return(token_new2(_line, _col, "/="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "/"));
- }
-
- break;
-
- case STATE_COMMENT:
- if(!tok) {
- tok = token_new2(_line, _col, "/*");
- assert(tok);
- }
-
- str_appendc(tok->value, c);
-
- if(c == '*') {
- state = STATE_COMMENT_END;
- }
-
- break;
-
- case STATE_COMMENT_END:
- str_appendc(tok->value, c);
-
- if(c == '/') {
- return(tok);
- }
-
- state = STATE_COMMENT;
- break;
-
- case STATE_LT:
- switch(c) {
- case '<':
- state = STATE_SHL;
- break;
-
- case '=':
- return(token_new2(_line, _col, "<="));
- break;
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "<"));
- }
-
- break;
-
- case STATE_SHL:
- if(c == '=') {
- return(token_new2(_line, _col, "<<="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, "<<"));
-
- case STATE_GT:
- switch(c) {
- case '>':
- state = STATE_SHR;
- break;
-
- case '=':
- return(token_new2(_line, _col, ">="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, ">"));
- }
-
- break;
-
- case STATE_SHR:
- if(c == '=') {
- return(token_new2(_line, _col, ">>="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, ">>"));
-
- case STATE_NOT:
- if(c == '=') {
- return(token_new2(_line, _col, "!="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, "!"));
-
- case STATE_MOD:
- if(c == '=') {
- return(token_new2(_line, _col, "%="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, "%"));
-
- case STATE_AND:
- switch(c) {
- case '&':
- return(token_new2(_line, _col, "&&"));
-
- case '=':
- return(token_new2(_line, _col, "&="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "&"));
- }
-
- case STATE_OR:
- switch(c) {
- case '|':
- return(token_new2(_line, _col, "||"));
-
- case '=':
- return(token_new2(_line, _col, "|="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "|"));
- }
-
- case STATE_XOR:
- if(c == '=') {
- return(token_new2(_line, _col, "^="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, "^"));
-
- case STATE_ASSIGN:
- if(c == '=') {
- return(token_new2(_line, _col, "=="));
- }
-
- putnext(c);
- return(token_new2(_line, _col, "="));
-
- case STATE_STRING:
- if(!tok) {
- tok = token_new2(_line, _col, "\"");
- assert(tok);
- }
-
- str_appendc(tok->value, c);
-
- if(c == '\\') {
- state = STATE_STRING_ESC;
- } else if(c == '"') {
- return(tok);
- }
-
- break;
-
- case STATE_STRING_ESC:
- str_appendc(tok->value, c);
- state = STATE_STRING;
- break;
-
- case STATE_CHR:
- if(!tok) {
- tok = token_new2(_line, _col, "'");
- assert(tok);
- }
-
- str_appendc(tok->value, c);
-
- if(c == '\\') {
- state = STATE_CHR_ESC;
- } else if(c == '\'') {
- return(tok);
- }
-
- break;
-
- case STATE_CHR_ESC:
- str_appendc(tok->value, c);
- state = STATE_CHR;
- break;
-
- case STATE_ID:
- if(identifier_chr(c)) {
- str_appendc(tok->value, c);
- } else {
- putnext(c);
- return(tok);
- }
-
- break;
-
- case STATE_MUL:
- switch(c) {
- case '=':
- return(token_new2(_line, _col, "*="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "*"));
- }
-
- case STATE_ADD:
- switch(c) {
- case '+':
- return(token_new2(_line, _col, "++"));
-
- case '=':
- return(token_new2(_line, _col, "+="));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "+"));
- }
-
- case STATE_SUB:
- switch(c) {
- case '-':
- return(token_new2(_line, _col, "--"));
-
- case '=':
- return(token_new2(_line, _col, "-="));
-
- case '>':
- return(token_new2(_line, _col, "->"));
-
- default:
- putnext(c);
- return(token_new2(_line, _col, "-"));
- }
-
- case STATE_DOT:
- putnext(c);
-
- if(c >= '0' && c <= '9') {
- state = STATE_NUM;
- } else {
- return(token_new2(_line, _col, "."));
- }
-
- break;
-
- case STATE_ZERO:
- if(c == 'x' || c == 'X') {
- str_appendc(tok->value, 'x');
- state = STATE_NUM;
- } else if(c >= '0' && c <= '9') {
- putnext(c);
- state = STATE_NUM;
- } else if(c == '.') {
-
- } else {
- putnext(c);
- return(tok);
- }
-
- break;
-
- case STATE_NUM:
- /* FIXME: e, E may be in the middle, (u|U)(l|L|ll|LL) or vice versa may be at the end */
- if(c >= '0' && c <= '9' || c == '.') {
- str_appendc(tok->value, c);
- } else {
- putnext(c);
- return(tok);
- }
-
- break;
- }
- }
-
- return(tok);
-}
-
-int main(int argc, char *argv[])
-{
- int ret_val;
- struct token *tok;
-
- ret_val = 0;
-
- while((tok = nexttoken())) {
- printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
- }
-
- return(ret_val);
-}