From: Matthias Kruk <m@m10k.eu>
Date: Sun, 31 May 2020 03:49:36 +0000 (+0900)
Subject: Rename tokenize.c to lex.c
X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=87d69e4b5c6378999aa682a4148f6f13719a0e82;p=ccc

Rename tokenize.c to lex.c
---

diff --git a/src/Makefile b/src/Makefile
index 51280eb..9560990 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,5 +1,5 @@
-OBJECTS = str.o token.o list.o tokenize.o
-OUTPUT = tokenize
+OBJECTS = str.o token.o list.o lex.o
+OUTPUT = c3
 PHONY = clean
 
 all: $(OUTPUT)
diff --git a/src/lex.c b/src/lex.c
new file mode 100644
index 0000000..c70f5e3
--- /dev/null
+++ b/src/lex.c
@@ -0,0 +1,522 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include "str.h"
+#include "list.h"
+#include "token.h"
+
+#define TABWIDTH          8
+
+#define STATE_NONE        0
+#define STATE_COMMENT     1
+#define STATE_COMMENT_END 2
+#define STATE_DIV         3
+#define STATE_LT          4
+#define STATE_SHL         5
+#define STATE_GT          6
+#define STATE_SHR         7
+#define STATE_NOT         8
+#define STATE_MOD         9
+#define STATE_AND         10
+#define STATE_OR          11
+#define STATE_XOR         12
+#define STATE_ASSIGN      13
+#define STATE_STRING      14
+#define STATE_STRING_ESC  15
+#define STATE_CHR         16
+#define STATE_CHR_ESC     17
+#define STATE_ID          18
+#define STATE_ADD         19
+#define STATE_SUB         20
+#define STATE_MUL         21
+#define STATE_DOT         22
+#define STATE_ZERO        23
+#define STATE_NUM         24
+
+#define STATE_DONE    8
+
+#define identifier_firstchr(_c) ((_c) == '_' ||			\
+				 (_c) >= 'a' && (_c) <= 'z' ||	\
+				 (_c) >= 'A' && (_c) <= 'Z')
+#define identifier_chr(_c)      (identifier_firstchr(_c) ||	\
+				 (_c) >= '0' && (_c) <= '9')
+
+static char _next = 0;
+static int _state = STATE_NONE;
+static int _line = 1;
+static int _col = 0;
+static int _pline = 1;
+static int _pcol = 1;
+list_t *tokens = NULL;
+
+int getnext(void)
+{
+	int ret_val;
+
+	if(_next) {
+		ret_val = _next;
+		_next = 0;
+	} else {
+		ret_val = getchar();
+
+		if(ret_val == EOF) {
+			ret_val = 0;
+		}
+	}
+
+	switch(ret_val) {
+	case '\n':
+		_pline = _line;
+		_pcol = _col;
+		_line++;
+		_col = 0;
+		break;
+
+	case '\t':
+		_pcol = _col;
+		_col += TABWIDTH;
+		break;
+
+	default:
+		_pcol = _col;
+		_col++;
+		break;
+
+	case 0:
+		break;
+	}
+
+	return(ret_val);
+}
+
+int putnext(const char c)
+{
+	int ret_val;
+
+	if(_next) {
+		ret_val = -EALREADY;
+	} else {
+		_next = c;
+		ret_val = 0;
+
+		_col = _pcol;
+		_line = _pline;
+	}
+
+	return(ret_val);
+}
+
+struct token *nexttoken(void)
+{
+	struct token *tok;
+	int state;
+	char c;
+
+	state = STATE_NONE;
+	tok = NULL;
+
+	while((c = getnext())) {
+		switch(state) {
+		case STATE_NONE:
+			switch(c) {
+			case '/':
+				state = STATE_DIV;
+				break;
+
+			case '<':
+				state = STATE_LT;
+				break;
+
+			case '>':
+				state = STATE_GT;
+				break;
+
+			case '!':
+				state = STATE_NOT;
+				break;
+
+			case '%':
+				state = STATE_MOD;
+				break;
+
+			case '&':
+				state = STATE_AND;
+				break;
+
+			case '|':
+				state = STATE_OR;
+				break;
+
+			case '^':
+				state = STATE_XOR;
+				break;
+
+			case '=':
+				state = STATE_ASSIGN;
+				break;
+
+			case '"':
+				state = STATE_STRING;
+				break;
+
+			case '\'':
+				state = STATE_CHR;
+				break;
+
+			case '*':
+				state = STATE_MUL;
+				break;
+
+			case '+':
+				state = STATE_ADD;
+				break;
+
+			case '-':
+				state = STATE_SUB;
+				break;
+
+			case '.':
+				state = STATE_DOT;
+				break;
+
+			case '~':
+			case '(':
+			case ')':
+			case '{':
+			case '}':
+			case '[':
+			case ']':
+			case ':':
+			case ';':
+			case ',':
+			case '?':
+				return(token_new_from_char(_line, _col, c));
+
+			case '\r':
+				fprintf(stderr, "DOS user, eh?\n");
+			case ' ':
+			case '\t':
+			case '\n':
+				break;
+
+			default:
+				if(identifier_firstchr(c)) {
+					/* looks like an identifier */
+					tok = token_new_from_char(_line, _col, c);
+					assert(tok);
+					state = STATE_ID;
+					break;
+				} else if(c == '0') {
+					tok = token_new_from_char(_line, _col, c);
+					assert(tok);
+					state = STATE_ZERO;
+					break;
+				} else if(c > '0' && c <= '9') {
+					tok = token_new_from_char(_line, _col, c);
+					assert(tok);
+					state = STATE_NUM;
+					break;
+				}
+
+				/* unrecognized token */
+				fprintf(stderr, "Unrecognized token at %d:%d ['%c']\n", _line, _col, c);
+				return(NULL);
+			}
+
+			break;
+
+		case STATE_DIV:
+			switch(c) {
+			case '*':
+				state = STATE_COMMENT;
+				break;
+
+			case '=':
+				return(token_new2(_line, _col, "/="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "/"));
+			}
+
+			break;
+
+		case STATE_COMMENT:
+			if(!tok) {
+				tok = token_new2(_line, _col, "/*");
+				assert(tok);
+			}
+
+			str_appendc(tok->value, c);
+
+			if(c == '*') {
+				state = STATE_COMMENT_END;
+			}
+
+			break;
+
+		case STATE_COMMENT_END:
+			str_appendc(tok->value, c);
+
+			if(c == '/') {
+				return(tok);
+			}
+
+			state = STATE_COMMENT;
+			break;
+
+		case STATE_LT:
+			switch(c) {
+			case '<':
+				state = STATE_SHL;
+				break;
+
+			case '=':
+				return(token_new2(_line, _col, "<="));
+				break;
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "<"));
+			}
+
+			break;
+
+		case STATE_SHL:
+			if(c == '=') {
+				return(token_new2(_line, _col, "<<="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, "<<"));
+
+		case STATE_GT:
+			switch(c) {
+			case '>':
+				state = STATE_SHR;
+				break;
+
+			case '=':
+				return(token_new2(_line, _col, ">="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, ">"));
+			}
+
+			break;
+
+		case STATE_SHR:
+			if(c == '=') {
+				return(token_new2(_line, _col, ">>="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, ">>"));
+
+		case STATE_NOT:
+			if(c == '=') {
+				return(token_new2(_line, _col, "!="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, "!"));
+
+		case STATE_MOD:
+			if(c == '=') {
+				return(token_new2(_line, _col, "%="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, "%"));
+
+		case STATE_AND:
+			switch(c) {
+			case '&':
+				return(token_new2(_line, _col, "&&"));
+
+			case '=':
+				return(token_new2(_line, _col, "&="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "&"));
+			}
+
+		case STATE_OR:
+			switch(c) {
+			case '|':
+				return(token_new2(_line, _col, "||"));
+
+			case '=':
+				return(token_new2(_line, _col, "|="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "|"));
+			}
+
+		case STATE_XOR:
+			if(c == '=') {
+				return(token_new2(_line, _col, "^="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, "^"));
+
+		case STATE_ASSIGN:
+			if(c == '=') {
+				return(token_new2(_line, _col, "=="));
+			}
+
+			putnext(c);
+			return(token_new2(_line, _col, "="));
+
+		case STATE_STRING:
+			if(!tok) {
+				tok = token_new2(_line, _col, "\"");
+				assert(tok);
+			}
+
+			str_appendc(tok->value, c);
+
+			if(c == '\\') {
+				state = STATE_STRING_ESC;
+			} else if(c == '"') {
+				return(tok);
+			}
+
+			break;
+
+		case STATE_STRING_ESC:
+			str_appendc(tok->value, c);
+			state = STATE_STRING;
+			break;
+
+		case STATE_CHR:
+			if(!tok) {
+				tok = token_new2(_line, _col, "'");
+				assert(tok);
+			}
+
+			str_appendc(tok->value, c);
+
+			if(c == '\\') {
+				state = STATE_CHR_ESC;
+			} else if(c == '\'') {
+				return(tok);
+			}
+
+			break;
+
+		case STATE_CHR_ESC:
+			str_appendc(tok->value, c);
+			state = STATE_CHR;
+			break;
+
+		case STATE_ID:
+			if(identifier_chr(c)) {
+				str_appendc(tok->value, c);
+			} else {
+				putnext(c);
+				return(tok);
+			}
+
+			break;
+
+		case STATE_MUL:
+			switch(c) {
+			case '=':
+				return(token_new2(_line, _col, "*="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "*"));
+			}
+
+		case STATE_ADD:
+			switch(c) {
+			case '+':
+				return(token_new2(_line, _col, "++"));
+
+			case '=':
+				return(token_new2(_line, _col, "+="));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "+"));
+			}
+
+		case STATE_SUB:
+			switch(c) {
+			case '-':
+				return(token_new2(_line, _col, "--"));
+
+			case '=':
+				return(token_new2(_line, _col, "-="));
+
+			case '>':
+				return(token_new2(_line, _col, "->"));
+
+			default:
+				putnext(c);
+				return(token_new2(_line, _col, "-"));
+			}
+
+		case STATE_DOT:
+			putnext(c);
+
+			if(c >= '0' && c <= '9') {
+				state = STATE_NUM;
+			} else {
+				return(token_new2(_line, _col, "."));
+			}
+
+			break;
+
+		case STATE_ZERO:
+			if(c == 'x' || c == 'X') {
+				str_appendc(tok->value, 'x');
+				state = STATE_NUM;
+			} else if(c >= '0' && c <= '9') {
+				putnext(c);
+				state = STATE_NUM;
+			} else if(c == '.') {
+
+			} else {
+				putnext(c);
+				return(tok);
+			}
+
+			break;
+
+		case STATE_NUM:
+			/* FIXME: e, E may be in the middle, (u|U)(l|L|ll|LL) or vice versa may be at the end */
+			if(c >= '0' && c <= '9' || c == '.') {
+				str_appendc(tok->value, c);
+			} else {
+				putnext(c);
+				return(tok);
+			}
+
+			break;
+		}
+	}
+
+	return(tok);
+}
+
+int main(int argc, char *argv[])
+{
+	int ret_val;
+	struct token *tok;
+
+	ret_val = 0;
+
+	while((tok = nexttoken())) {
+		printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
+	}
+
+	return(ret_val);
+}
diff --git a/src/tokenize.c b/src/tokenize.c
deleted file mode 100644
index c70f5e3..0000000
--- a/src/tokenize.c
+++ /dev/null
@@ -1,522 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <assert.h>
-#include "str.h"
-#include "list.h"
-#include "token.h"
-
-#define TABWIDTH          8
-
-#define STATE_NONE        0
-#define STATE_COMMENT     1
-#define STATE_COMMENT_END 2
-#define STATE_DIV         3
-#define STATE_LT          4
-#define STATE_SHL         5
-#define STATE_GT          6
-#define STATE_SHR         7
-#define STATE_NOT         8
-#define STATE_MOD         9
-#define STATE_AND         10
-#define STATE_OR          11
-#define STATE_XOR         12
-#define STATE_ASSIGN      13
-#define STATE_STRING      14
-#define STATE_STRING_ESC  15
-#define STATE_CHR         16
-#define STATE_CHR_ESC     17
-#define STATE_ID          18
-#define STATE_ADD         19
-#define STATE_SUB         20
-#define STATE_MUL         21
-#define STATE_DOT         22
-#define STATE_ZERO        23
-#define STATE_NUM         24
-
-#define STATE_DONE    8
-
-#define identifier_firstchr(_c) ((_c) == '_' ||			\
-				 (_c) >= 'a' && (_c) <= 'z' ||	\
-				 (_c) >= 'A' && (_c) <= 'Z')
-#define identifier_chr(_c)      (identifier_firstchr(_c) ||	\
-				 (_c) >= '0' && (_c) <= '9')
-
-static char _next = 0;
-static int _state = STATE_NONE;
-static int _line = 1;
-static int _col = 0;
-static int _pline = 1;
-static int _pcol = 1;
-list_t *tokens = NULL;
-
-int getnext(void)
-{
-	int ret_val;
-
-	if(_next) {
-		ret_val = _next;
-		_next = 0;
-	} else {
-		ret_val = getchar();
-
-		if(ret_val == EOF) {
-			ret_val = 0;
-		}
-	}
-
-	switch(ret_val) {
-	case '\n':
-		_pline = _line;
-		_pcol = _col;
-		_line++;
-		_col = 0;
-		break;
-
-	case '\t':
-		_pcol = _col;
-		_col += TABWIDTH;
-		break;
-
-	default:
-		_pcol = _col;
-		_col++;
-		break;
-
-	case 0:
-		break;
-	}
-
-	return(ret_val);
-}
-
-int putnext(const char c)
-{
-	int ret_val;
-
-	if(_next) {
-		ret_val = -EALREADY;
-	} else {
-		_next = c;
-		ret_val = 0;
-
-		_col = _pcol;
-		_line = _pline;
-	}
-
-	return(ret_val);
-}
-
-struct token *nexttoken(void)
-{
-	struct token *tok;
-	int state;
-	char c;
-
-	state = STATE_NONE;
-	tok = NULL;
-
-	while((c = getnext())) {
-		switch(state) {
-		case STATE_NONE:
-			switch(c) {
-			case '/':
-				state = STATE_DIV;
-				break;
-
-			case '<':
-				state = STATE_LT;
-				break;
-
-			case '>':
-				state = STATE_GT;
-				break;
-
-			case '!':
-				state = STATE_NOT;
-				break;
-
-			case '%':
-				state = STATE_MOD;
-				break;
-
-			case '&':
-				state = STATE_AND;
-				break;
-
-			case '|':
-				state = STATE_OR;
-				break;
-
-			case '^':
-				state = STATE_XOR;
-				break;
-
-			case '=':
-				state = STATE_ASSIGN;
-				break;
-
-			case '"':
-				state = STATE_STRING;
-				break;
-
-			case '\'':
-				state = STATE_CHR;
-				break;
-
-			case '*':
-				state = STATE_MUL;
-				break;
-
-			case '+':
-				state = STATE_ADD;
-				break;
-
-			case '-':
-				state = STATE_SUB;
-				break;
-
-			case '.':
-				state = STATE_DOT;
-				break;
-
-			case '~':
-			case '(':
-			case ')':
-			case '{':
-			case '}':
-			case '[':
-			case ']':
-			case ':':
-			case ';':
-			case ',':
-			case '?':
-				return(token_new_from_char(_line, _col, c));
-
-			case '\r':
-				fprintf(stderr, "DOS user, eh?\n");
-			case ' ':
-			case '\t':
-			case '\n':
-				break;
-
-			default:
-				if(identifier_firstchr(c)) {
-					/* looks like an identifier */
-					tok = token_new_from_char(_line, _col, c);
-					assert(tok);
-					state = STATE_ID;
-					break;
-				} else if(c == '0') {
-					tok = token_new_from_char(_line, _col, c);
-					assert(tok);
-					state = STATE_ZERO;
-					break;
-				} else if(c > '0' && c <= '9') {
-					tok = token_new_from_char(_line, _col, c);
-					assert(tok);
-					state = STATE_NUM;
-					break;
-				}
-
-				/* unrecognized token */
-				fprintf(stderr, "Unrecognized token at %d:%d ['%c']\n", _line, _col, c);
-				return(NULL);
-			}
-
-			break;
-
-		case STATE_DIV:
-			switch(c) {
-			case '*':
-				state = STATE_COMMENT;
-				break;
-
-			case '=':
-				return(token_new2(_line, _col, "/="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "/"));
-			}
-
-			break;
-
-		case STATE_COMMENT:
-			if(!tok) {
-				tok = token_new2(_line, _col, "/*");
-				assert(tok);
-			}
-
-			str_appendc(tok->value, c);
-
-			if(c == '*') {
-				state = STATE_COMMENT_END;
-			}
-
-			break;
-
-		case STATE_COMMENT_END:
-			str_appendc(tok->value, c);
-
-			if(c == '/') {
-				return(tok);
-			}
-
-			state = STATE_COMMENT;
-			break;
-
-		case STATE_LT:
-			switch(c) {
-			case '<':
-				state = STATE_SHL;
-				break;
-
-			case '=':
-				return(token_new2(_line, _col, "<="));
-				break;
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "<"));
-			}
-
-			break;
-
-		case STATE_SHL:
-			if(c == '=') {
-				return(token_new2(_line, _col, "<<="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, "<<"));
-
-		case STATE_GT:
-			switch(c) {
-			case '>':
-				state = STATE_SHR;
-				break;
-
-			case '=':
-				return(token_new2(_line, _col, ">="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, ">"));
-			}
-
-			break;
-
-		case STATE_SHR:
-			if(c == '=') {
-				return(token_new2(_line, _col, ">>="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, ">>"));
-
-		case STATE_NOT:
-			if(c == '=') {
-				return(token_new2(_line, _col, "!="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, "!"));
-
-		case STATE_MOD:
-			if(c == '=') {
-				return(token_new2(_line, _col, "%="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, "%"));
-
-		case STATE_AND:
-			switch(c) {
-			case '&':
-				return(token_new2(_line, _col, "&&"));
-
-			case '=':
-				return(token_new2(_line, _col, "&="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "&"));
-			}
-
-		case STATE_OR:
-			switch(c) {
-			case '|':
-				return(token_new2(_line, _col, "||"));
-
-			case '=':
-				return(token_new2(_line, _col, "|="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "|"));
-			}
-
-		case STATE_XOR:
-			if(c == '=') {
-				return(token_new2(_line, _col, "^="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, "^"));
-
-		case STATE_ASSIGN:
-			if(c == '=') {
-				return(token_new2(_line, _col, "=="));
-			}
-
-			putnext(c);
-			return(token_new2(_line, _col, "="));
-
-		case STATE_STRING:
-			if(!tok) {
-				tok = token_new2(_line, _col, "\"");
-				assert(tok);
-			}
-
-			str_appendc(tok->value, c);
-
-			if(c == '\\') {
-				state = STATE_STRING_ESC;
-			} else if(c == '"') {
-				return(tok);
-			}
-
-			break;
-
-		case STATE_STRING_ESC:
-			str_appendc(tok->value, c);
-			state = STATE_STRING;
-			break;
-
-		case STATE_CHR:
-			if(!tok) {
-				tok = token_new2(_line, _col, "'");
-				assert(tok);
-			}
-
-			str_appendc(tok->value, c);
-
-			if(c == '\\') {
-				state = STATE_CHR_ESC;
-			} else if(c == '\'') {
-				return(tok);
-			}
-
-			break;
-
-		case STATE_CHR_ESC:
-			str_appendc(tok->value, c);
-			state = STATE_CHR;
-			break;
-
-		case STATE_ID:
-			if(identifier_chr(c)) {
-				str_appendc(tok->value, c);
-			} else {
-				putnext(c);
-				return(tok);
-			}
-
-			break;
-
-		case STATE_MUL:
-			switch(c) {
-			case '=':
-				return(token_new2(_line, _col, "*="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "*"));
-			}
-
-		case STATE_ADD:
-			switch(c) {
-			case '+':
-				return(token_new2(_line, _col, "++"));
-
-			case '=':
-				return(token_new2(_line, _col, "+="));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "+"));
-			}
-
-		case STATE_SUB:
-			switch(c) {
-			case '-':
-				return(token_new2(_line, _col, "--"));
-
-			case '=':
-				return(token_new2(_line, _col, "-="));
-
-			case '>':
-				return(token_new2(_line, _col, "->"));
-
-			default:
-				putnext(c);
-				return(token_new2(_line, _col, "-"));
-			}
-
-		case STATE_DOT:
-			putnext(c);
-
-			if(c >= '0' && c <= '9') {
-				state = STATE_NUM;
-			} else {
-				return(token_new2(_line, _col, "."));
-			}
-
-			break;
-
-		case STATE_ZERO:
-			if(c == 'x' || c == 'X') {
-				str_appendc(tok->value, 'x');
-				state = STATE_NUM;
-			} else if(c >= '0' && c <= '9') {
-				putnext(c);
-				state = STATE_NUM;
-			} else if(c == '.') {
-
-			} else {
-				putnext(c);
-				return(tok);
-			}
-
-			break;
-
-		case STATE_NUM:
-			/* FIXME: e, E may be in the middle, (u|U)(l|L|ll|LL) or vice versa may be at the end */
-			if(c >= '0' && c <= '9' || c == '.') {
-				str_appendc(tok->value, c);
-			} else {
-				putnext(c);
-				return(tok);
-			}
-
-			break;
-		}
-	}
-
-	return(tok);
-}
-
-int main(int argc, char *argv[])
-{
-	int ret_val;
-	struct token *tok;
-
-	ret_val = 0;
-
-	while((tok = nexttoken())) {
-		printf("Token at %4d:%3d: \"%s\"\n", tok->line, tok->column, str_value(tok->value));
-	}
-
-	return(ret_val);
-}