From: Matthias Kruk Date: Sun, 5 Jul 2020 05:57:33 +0000 (+0900) Subject: parser: Implement parsing of strings, identifiers, and primary expressions X-Git-Url: https://git.corax.cc/?a=commitdiff_plain;h=20fc4c03e3b44cc41e94278110d7f2da13b2743f;p=ccc parser: Implement parsing of strings, identifiers, and primary expressions --- diff --git a/src/grammar.c b/src/grammar.c index 612357b..b83e557 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -218,6 +218,37 @@ struct primary_expression *primary_expression_new(void) return(pexp); } +void primary_expression_debug(struct primary_expression *pexpr) +{ + switch(pexpr->type) { + default: + case PRIMARY_EXPR_INVALID: + printf("PEXPR { PRIMARY_EXPR_INVALID, NULL }\n"); + break; + + case PRIMARY_EXPR_IDENTIFIER: + printf("PEXPR { PRIMARY_EXPR_IDENTIFIER, ... }\n"); + +/* printf("PEXPR { PRIMARY_EXPR_IDENTIFIER, %s }\n", + pexpr->data.identifier ? token_value(pexpr->data.identifier->token) : ""); +*/ break; + + case PRIMARY_EXPR_CONSTANT: + printf("PEXPR { PRIMARY_EXPR_CONSTANT, ... }\n"); + break; + + case PRIMARY_EXPR_STRING: + printf("PEXPR { PRIMARY_EXPR_STRING, ... }\n"); + break; + + case PRIMARY_EXPR_EXPR: + printf("PEXPR { PRIMARY_EXPR_EXPR, ... }\n"); + break; + } + + return; +} + struct integer_constant *integer_constant_new(void) { struct integer_constant *iconst; @@ -283,6 +314,32 @@ struct constant *constant_new(void) return(c); } +struct identifier *identifier_new(void) +{ + struct identifier *id; + + id = malloc(sizeof(*id)); + + if(id) { + memset(id, 0, sizeof(*id)); + } + + return(id); +} + +struct string *string_new(void) +{ + struct string *s; + + s = malloc(sizeof(*s)); + + if(s) { + memset(s, 0, sizeof(*s)); + } + + return(s); +} + struct argument_expression_list* argument_expression_list_new(void) { struct argument_expression_list *aelist; @@ -319,8 +376,7 @@ void constant_debug(struct constant *cnst) break; case CONST_TYPE_CHARACTER: - snprintf(data, sizeof(data), "%s%s", - cnst->data.cconst->prefix ? token_value(cnst->data.cconst->prefix) : "", + snprintf(data, sizeof(data), "%s", cnst->data.cconst->token ? token_value(cnst->data.cconst->token) : ""); break; @@ -339,3 +395,11 @@ void constant_debug(struct constant *cnst) printf("CONSTANT { %s, %s }\n", _const_type_str[cnst->type], data); return; } + +void string_debug(struct string *s) +{ + printf("STRING { %s }\n", + s->token ? token_value(s->token) : ""); + + return; +} diff --git a/src/grammar.h b/src/grammar.h index b6208d3..fbaca9c 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -15,7 +15,6 @@ struct integer_constant { struct character_constant { /* L'x' */ - struct token *prefix; struct token *token; }; @@ -29,17 +28,21 @@ struct enumeration_constant { struct token *token; }; +struct string { + struct token *token; +}; + struct argument_expression_list { struct assigment_expression *aexpr; struct argument_expression_list *next; }; enum primary_expression_type { - PEXPR_INVALID = 0, - PEXPR_IDENTIFIER, - PEXPR_CONSTANT, - PEXPR_STRING, - PEXPR_EXPR + PRIMARY_EXPR_INVALID = 0, + PRIMARY_EXPR_IDENTIFIER, + PRIMARY_EXPR_CONSTANT, + PRIMARY_EXPR_STRING, + PRIMARY_EXPR_EXPR }; struct primary_expression { @@ -50,9 +53,9 @@ struct primary_expression { struct constant *constant; struct string *string; struct { - struct token *oparen; + struct token *lparen; struct expression *expr; - struct token *cparen; + struct token *rparen; } expr; } data; }; @@ -222,6 +225,8 @@ struct translation_unit { }; struct primary_expression *primary_expression_new(void); +void primary_expression_debug(struct primary_expression*); + struct integer_constant *integer_constant_new(void); struct character_constant *character_constant_new(void); struct floating_constant *floating_constant_new(void); @@ -246,7 +251,11 @@ struct integer_constant *integer_constant_new(void); struct character_constant *character_constant_new(void); struct floating_constant *floating_constant_new(void); struct enumeration_constant *enueration_constant_new(void); +struct string *string_new(void); + +struct identifier* identifier_new(void); void constant_debug(struct constant*); +void string_debug(struct string*); #endif /* GRAMMAR_H */ diff --git a/src/parser.c b/src/parser.c index af8d589..8211cbc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -447,14 +447,6 @@ struct character_constant *parse_character_constant(void) } pos = lex_getpos(); - cc->prefix = lex_gettoken(); - - if(!cc->prefix || cc->prefix->type != TOKEN_IDENTIFIER || - token_cmp(cc->prefix, "L") != 0) { - lex_setpos(pos); - cc->prefix = NULL; - } - cc->token = lex_gettoken(); if(!cc->token || cc->token->type != TOKEN_CHAR_LITERAL) { @@ -503,5 +495,107 @@ struct floating_constant *parse_floating_constant(void) struct enumeration_constant *parse_enumeration_constant(void) { + /* TODO: Implement parsing of enumeration constants */ + return(NULL); +} + +struct identifier *parse_identifier(void) +{ + struct identifier *id; + int pos; + + id = identifier_new(); + + if(!id) { + return(NULL); + } + + pos = lex_getpos(); + id->token = lex_gettoken(); + + if(!id->token || id->token->type != TOKEN_IDENTIFIER) { + lex_setpos(pos); + free(id); + id = NULL; + } + + return(id); +} + +struct string *parse_string(void) +{ + struct string *s; + int pos; + + s = string_new(); + + if(!s) { + return(NULL); + } + + pos = lex_getpos(); + s->token = lex_gettoken(); + + if(!s->token || s->token->type != TOKEN_STRING) { + lex_setpos(pos); + free(s); + s = NULL; + } + + return(s); +} + +struct expression *parse_expression(void) +{ + /* TODO: Implement parsing of expressions */ + return(NULL); } + +struct primary_expression *parse_primary_expression(void) +{ + struct primary_expression *pe; + + pe = primary_expression_new(); + + if(!pe) { + return(NULL); + } + + if((pe->data.identifier = parse_identifier())) { + pe->type = PRIMARY_EXPR_IDENTIFIER; + } else if((pe->data.constant = parse_constant())) { + pe->type = PRIMARY_EXPR_CONSTANT; + } else if((pe->data.string = parse_string())) { + pe->type = PRIMARY_EXPR_STRING; + } else { + int pos; + + pos = lex_getpos(); + + /* + * This looks wrong with the error checking after the three consecutive calls, + * but the parser and lexer are written in a way that this is safe to do. + */ + + pe->data.expr.lparen = lex_gettoken(); + pe->data.expr.expr = parse_expression(); + pe->data.expr.rparen = lex_gettoken(); + pe->type = PRIMARY_EXPR_EXPR; + + if(!pe->data.expr.lparen || !pe->data.expr.expr || !pe->data.expr.rparen || + pe->data.expr.lparen->type != TOKEN_LPAREN || + pe->data.expr.rparen->type != TOKEN_RPAREN) { + /* + * If any of the tokens are not what we expected, return the lexer to the initial + * position and free the struct, since we can't match a primary expression. + */ + + lex_setpos(pos); + free(pe); + pe = NULL; + } + } + + return(pe); +}