--- /dev/null
+OBJECTS = str.o token.o list.o tokenize.o
+OUTPUT = tokenize
+PHONY = clean
+
+all: $(OUTPUT)
+
+$(OUTPUT): $(OBJECTS)
+ $(CC) -std=c99 -Wall -pedantic -o $@ $^
+
+clean:
+ rm -rf $(OUTPUT) $(OBJECTS)
+
+.PHONY: $(PHONY)
--- /dev/null
+#include <stdlib.h>
+#include <errno.h>
+#include "list.h"
+
+list_t *list_new(void *data)
+{
+ list_t *l;
+
+ l = malloc(sizeof(*l));
+
+ if(l) {
+ l->next = NULL;
+ l->data = data;
+ }
+
+ return(l);
+}
+
+void list_free(list_t *list)
+{
+ free(list);
+ return;
+}
+
+int list_append(list_t **list, void *data)
+{
+ while(*list) {
+ list = &((*list)->next);
+ }
+
+ *list = list_new(data);
+
+ return(*list ? 0 : -ENOMEM);
+}
--- /dev/null
+#ifndef LIST_H
+#define LIST_H
+
+typedef struct list list_t;
+
+struct list {
+ struct list *next;
+ void *data;
+};
+
+list_t *list_new(void*);
+void list_free(list_t*);
+
+int list_append(list_t**, void*);
+
+#endif /* LIST_H */
--- /dev/null
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "str.h"
+
+struct str {
+ char *data;
+ size_t size;
+ size_t len;
+};
+
+#define STR_INITLEN 1024
+
+str_t *str_new(void)
+{
+ str_t *s;
+
+ s = malloc(sizeof(*s));
+
+ if(s) {
+ s->size = STR_INITLEN;
+ s->data = malloc(s->size + 1);
+ s->len = 0;
+
+ if(s->data) {
+ memset(s->data, 0, s->size + 1);
+ } else {
+ free(s);
+ s = NULL;
+ }
+ }
+
+ return(s);
+}
+
+int _str_expand(str_t *str)
+{
+ char *ndata;
+ size_t nsize;
+
+ nsize = str->size + STR_INITLEN;
+ ndata = malloc(nsize + 1);
+
+ if(ndata) {
+ memset(ndata, 0, nsize + 1);
+ snprintf(ndata, nsize, "%s", str->data);
+
+ free(str->data);
+ str->data = ndata;
+ str->size = nsize;
+
+ return(0);
+ }
+
+ return(-ENOMEM);
+}
+
+int str_appendc(str_t *str, const char c)
+{
+ int ret_val;
+
+ if(str->len == str->size) {
+ ret_val = _str_expand(str);
+ } else {
+ ret_val = 0;
+ }
+
+ if(!ret_val) {
+ if(str->size > str->len) {
+ str->data[str->len++] = c;
+ }
+ }
+
+ return(ret_val);
+}
+
+int str_appends(str_t *str, const char *s)
+{
+ int i;
+
+ for(i = 0; s[i]; i++) {
+ if(str_appendc(str, s[i]) < 0) {
+ break;
+ }
+ }
+
+ return(i);
+}
+
+const char* str_value(str_t *str)
+{
+ return(str->data);
+}
+
+int str_set(str_t *str, const char *val)
+{
+ int ret_val;
+ int nlen;
+
+ nlen = strlen(val);
+
+ for(ret_val = 0; str->size < nlen; ) {
+ ret_val = _str_expand(str);
+
+ if(ret_val < 0) {
+ goto gtfo;
+ }
+ }
+
+ str->len = snprintf(str->data, str->size, "%s", val);
+
+ if(str->len < 0) {
+ ret_val = -errno;
+ perror("snprintf");
+ }
+
+gtfo:
+ return(ret_val);
+}
--- /dev/null
+#ifndef STR_H
+#define STR_H
+
+typedef struct str str_t;
+
+str_t *str_new(void);
+int str_appendc(str_t*, const char);
+int str_appends(str_t*, const char*);
+void str_free(str_t*);
+const char* str_value(str_t*);
+int str_set(str_t*, const char*);
+
+#endif /* STR_H */
--- /dev/null
+#include <stdio.h>
+#include "str.h"
+
+int main(int argc, char *argv[])
+{
+ str_t *str;
+
+ str = str_alloc();
+
+ str_appends(str, "Hello, world");
+ str_appendc(str, '!');
+
+ printf("%s\n", str_value(str));
+ return(0);
+}
--- /dev/null
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include "token.h"
+#include "str.h"
+
+struct token *token_new(void)
+{
+ struct token *tok;
+
+ tok = malloc(sizeof(*tok));
+
+ if(tok) {
+ memset(tok, 0, sizeof(*tok));
+ }
+
+ return(tok);
+}
+
+struct token *token_new2(const int line, const int col, const char *val)
+{
+ struct token *tok;
+
+ tok = token_new();
+
+ if(tok) {
+ token_setpos(tok, line, col);
+ assert(token_setvalue(tok, val) == 0);
+ }
+
+ return(tok);
+}
+
+void token_free(struct token *tok)
+{
+ free(tok);
+ return;
+}
+
+void token_setpos(struct token *tok, int line, int col)
+{
+ tok->line = line;
+ tok->column = col;
+ return;
+}
+
+int token_setvalue(struct token *tok, const char *str)
+{
+ int ret_val;
+
+ if(!tok->value) {
+ tok->value = str_new();
+ }
+
+ if(tok->value) {
+ ret_val = str_set(tok->value, str);
+ } else {
+ ret_val = -ENOMEM;
+ }
+
+ return(ret_val);
+}
+
+const char *token_getvalue(struct token *tok)
+{
+ return(str_value(tok->value));
+}
--- /dev/null
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include "str.h"
+
+struct token {
+ int line;
+ int column;
+ str_t *value;
+ size_t len;
+};
+
+struct token *token_new2(const int, const int, const char*);
+struct token *token_new(void);
+void token_free(struct token*);
+void token_setpos(struct token*, const int, const int);
+int token_setvalue(struct token*, const char*);
+
+#endif /* TOKEN_H */
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include "str.h"
+#include "list.h"
+#include "token.h"
+
+#define TABWIDTH 8
+#define STATE_NONE 0
+#define STATE_COMMENT 1
+#define STATE_OP 2
+#define STATE_DIV 3
+
+#define STATE_DONE 8
+
+static char _next = 0;
+static int _state = STATE_NONE;
+static int _line = 1;
+static int _col = 1;
+static int _pline = 1;
+static int _pcol = 1;
+list_t *tokens = NULL;
+
+int getnext(void)
+{
+ int ret_val;
+
+ if(_next) {
+ ret_val = _next;
+ _next = 0;
+ } else {
+ ret_val = getchar();
+
+ if(ret_val == EOF) {
+ ret_val = 0;
+ }
+ }
+
+ switch(ret_val) {
+ case '\n':
+ _pline = _line;
+ _pcol = _col;
+ _line++;
+ _col = 1;
+ break;
+
+ case '\t':
+ _pcol = _col;
+ _col += TABWIDTH;
+ break;
+
+ default:
+ _pcol = _col;
+ _col++;
+ break;
+
+ case 0:
+ break;
+ }
+
+ return(ret_val);
+}
+
+int putnext(const char c)
+{
+ int ret_val;
+
+ if(_next) {
+ ret_val = -EALREADY;
+ } else {
+ _next = c;
+ ret_val = 0;
+
+ _col = _pcol;
+ _line = _pline;
+ }
+
+ return(ret_val);
+}
+
+#if 0
+int comment(void)
+{
+ struct token *tok;
+ int cstate;
+ char c;
+
+#define CSTATE_HEAD 0
+#define CSTATE_BODY 1
+#define CSTATE_FOOT 2
+#define CSTATE_DONE 3
+
+ cstate = CSTATE_HEAD;
+
+ tok = token_new(TOKEN_COMMENT);
+ assert(tok);
+
+ token_setpos(tok, _line, _col);
+
+ str_appendc(tok->value, '/');
+
+ while(cstate != CSTATE_DONE) {
+ c = getnext();
+
+ str_appendc(tok->value, c);
+
+ switch(cstate) {
+ case CSTATE_HEAD:
+ assert(c == '*');
+ cstate = CSTATE_BODY;
+ break;
+
+ case CSTATE_BODY:
+ if(c == '*') {
+ cstate = CSTATE_FOOT;
+ }
+ break;
+
+ case CSTATE_FOOT:
+ if(c == '/') {
+ cstate = CSTATE_DONE;
+ } else {
+ cstate = CSTATE_BODY;
+ }
+
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ list_append(&tokens, tok);
+
+ return(0);
+}
+
+int div(void)
+{
+ char c;
+
+ c = getnext();
+
+int none(void)
+{
+ char c;
+ int ret_val;
+ char lookahead;
+
+ c = getnext();
+
+ if(!c) {
+ return(STATE_DONE);
+ }
+
+ switch(c) {
+ case ' ':
+ case '\t':
+ case '\n':
+ ret_val = STATE_NONE;
+ break;
+
+ case '/':
+ lookahead = getnext();
+
+ if(lookahead == '*') {
+ ret_val = STATE_COMMENT;
+ } else {
+ ret_val = STATE_DIV;
+ }
+
+ putnext(lookahead);
+
+ break;
+
+ default:
+ ret_val = STATE_NONE;
+ break;
+/*
+ case '"':
+ case '\'':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ ret_val = STATE_LITERAL;
+ break;
+ */
+/*
+ case '+':
+ case '-':
+ case '*':
+ case '=':
+ ret_val = STATE_OP;
+ break;
+*/
+/*
+ case '<':
+ case '>':
+ case '?':
+ case ',':
+ case ';':
+ case ':':
+ case '!':
+ case '%':
+ case '&':
+ case '(':
+ case ')':
+ case '~':
+ case '^':
+ case '|':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+*/
+ }
+
+ return(ret_val);
+}
+#endif /* 0 */
+
+struct token *nexttoken(void)
+{
+ struct token *tok;
+
+ int state;
+
+ state = STATE_NONE;
+ tok = NULL;
+
+ while(1) {
+ char c = getnext();
+
+ switch(state) {
+ case STATE_NONE:
+ switch(c) {
+ case '/':
+ state = STATE_DIV;
+ break;
+
+ case '\r':
+ fprintf(stderr, "DOS user, eh?\n");
+ case ' ':
+ case '\t':
+ case '\n':
+ break;
+
+ default:
+ /* unrecognized token */
+ fprintf(stderr, "Unrecognized token at %d:%d\n", _line, _col);
+ return(NULL);
+ }
+
+ break;
+
+ case STATE_DIV:
+ switch(c) {
+ case '*':
+ state = STATE_COMMENT;
+ break;
+
+ case '=':
+ return(token_new2(_line, _col, "/="));
+ break;
+
+ default:
+ putnext(c);
+ return(token_new2(_line, _col, "/"));
+ }
+
+ break;
+
+ case STATE_COMMENT:
+ if(!tok) {
+ tok = token_new2(_line, _col, "/*");
+ assert(tok);
+ }
+
+ str_appendc(tok->value, c);
+
+ if(c == '"') {
+ return(tok);
+ }
+
+ break;
+ }
+ }
+
+ return(NULL);
+}
+
+
+
+int main(int argc, char *argv[])
+{
+ int ret_val;
+ struct token *tok;
+
+ ret_val = 0;
+
+ while((tok = nexttoken())) {
+ printf("Token at %d:%d: %s\n", tok->line, tok->column, str_value(tok->value));
+ }
+#if 0
+ while(_state != STATE_DONE) {
+ printf("_state = %d\n", _state);
+
+ switch(_state) {
+ case STATE_NONE:
+ _state = none();
+ break;
+
+ case STATE_COMMENT:
+ _state = comment();
+ break;
+
+ case STATE_OP:
+ _state = op();
+ break;
+
+ default:
+ _state = STATE_DONE;
+ break;
+ }
+ }
+
+ if(!tokens) {
+ printf("No tokens\n");
+ }
+
+ for(litem = tokens; litem; litem = litem->next) {
+ struct token *t;
+
+ t = (struct token*)litem->data;
+
+ printf("%s token at line %d:%d\n", token_typestr(t->type), t->line, t->column);
+ }
+#endif
+
+ return(ret_val);
+}