build a parser for c--

This commit is contained in:
ulic-youthlic 2025-03-24 18:40:16 +08:00
parent 843eb0938a
commit 7a6ac3d2b8
Signed by: youthlic
GPG key ID: 63E86C3C14A0D721
10 changed files with 830 additions and 3 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
/.direnv
/.cache

24
Makefile Normal file
View file

@ -0,0 +1,24 @@
all: parser.tab.o lex.yy.o node.o main.o
gcc ./parser.tab.o ./lex.yy.o ./node.o ./main.o -o program -lfl
main.o: main.c
gcc -c main.c
node.o: node.c
gcc -c ./node.c
parser.tab.c parser.tab.h: ./parser.y
bison -d parser.y
parser.tab.o: ./parser.tab.c
gcc -c ./parser.tab.c
lex.yy.o: lex.yy.c
gcc -c lex.yy.c
lex.yy.c: ./scanner.l
flex scanner.l
.PHONY: clean
clean:
-rm parser.tab.c parser.tab.h parser.tab.o lex.yy.o lex.yy.c program main.o

6
flake.lock generated
View file

@ -19,11 +19,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1742206328,
"narHash": "sha256-q+AQ///oMnyyFzzF4H9ShSRENt3Zsx37jTiRkLkXXE0=",
"lastModified": 1742707865,
"narHash": "sha256-RVQQZy38O3Zb8yoRJhuFgWo/iDIDj0hEdRTVfhOtzRk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "096478927c360bc18ea80c8274f013709cf7bdcd",
"rev": "dd613136ee91f67e5dba3f3f41ac99ae89c5406b",
"type": "github"
},
"original": {

View file

@ -23,6 +23,8 @@
flex
clang-tools
clang
bear
neovim
];
};
}

26
main.c Normal file
View file

@ -0,0 +1,26 @@
#include "node.h"
#include <stdio.h>
extern int yyparse();
extern pNode root;
extern void yyrestart(FILE *);
extern int errors;
int main(int argc, char **argv) {
if (argc > 1) {
FILE *f = fopen(argv[1], "r");
if (!f) {
perror(argv[1]);
return 1;
}
yyrestart(f);
}
int running_res = 0;
do {
running_res = 0;
running_res = yyparse();
} while (running_res);
if (!errors) {
DFS(root, 0);
}
return 0;
}

201
node.c Normal file
View file

@ -0,0 +1,201 @@
#include "node.h"
#include <stdio.h>
#include <stdlib.h>
pNode newNode(NodeType typ, intptr_t line, intptr_t size, Node **children,
ValueUnion value) {
pNode node = (pNode)calloc(sizeof(Node), 1);
Node **new_children = (Node **)calloc(sizeof(pNode), size);
for (int i = 0; i < size; i++) {
new_children[i] = children[i];
}
node->type = typ;
node->line = line;
node->size = size;
node->children = new_children;
node->value = value;
return node;
}
ValueUnion nullValue() {
ValueUnion value = {.null = NULL};
return value;
}
void displayNode(pNode);
void DFS(pNode node, int level) {
if (node == NULL) {
return;
};
if (node->line != -1) {
if (node->size == 0) {
for (int i = 0; i < level; i++)
printf(" ");
displayNode(node);
putchar('\n');
} else {
for (int i = 0; i < level; i++)
printf(" ");
displayNode(node);
putchar('\n');
}
}
for (int i = 0; i < node->size; i++)
DFS(node->children[i], level + 1);
}
void displayNode(pNode node) {
switch (node->type) {
case Type_int:
printf("TYPE: int");
break;
case Type_float:
printf("TYPE: float");
break;
case Ident:
printf("ID: %s", node->value.id);
break;
case Integer:
printf("INT: %ld", node->value.integer);
break;
case Float:
printf("FLOAT: %lf", node->value.real);
break;
case Semi:
printf("SEMI");
break;
case Comma:
printf("COMMA");
break;
case Assignop:
printf("ASSIGNOP");
break;
case Plus:
printf("PLUS");
break;
case Minus:
printf("MINUS");
break;
case Times:
printf("TIMES");
break;
case Div:
printf("DIV");
break;
case And:
printf("AND");
break;
case Or:
printf("OR");
break;
case Dot:
printf("DOT");
break;
case Not:
printf("NOT");
break;
case Lp:
printf("LP");
break;
case Rp:
printf("RP");
break;
case Lb:
printf("LB");
break;
case Rb:
printf("RB");
break;
case Lc:
printf("LC");
break;
case Rc:
printf("RC");
break;
case Struct:
printf("STRUCT");
break;
case Return:
printf("RETURN");
break;
case If:
printf("IF");
break;
case Else:
printf("ELSE");
break;
case While:
printf("WHILE");
break;
case Line_comment:
printf("LINE_COMMENT");
break;
case Block_comment:
printf("BLOCK_COMMENT");
break;
case Program:
printf("Program (%ld)", node->line);
break;
case ExtDefList:
printf("ExtDefList (%ld)", node->line);
break;
case ExtDef:
printf("ExtDef (%ld)", node->line);
break;
case ExtDecList:
printf("ExtDecList (%ld)", node->line);
break;
case Specifier:
printf("Specifier (%ld)", node->line);
break;
case StructSpecifier:
printf("StructSpecifier (%ld)", node->line);
break;
case OptTag:
printf("OptTag (%ld)", node->line);
break;
case Tag:
printf("Tag (%ld)", node->line);
break;
case VarDec:
printf("VarDec (%ld)", node->line);
break;
case FunDec:
printf("FunDec (%ld)", node->line);
break;
case VarList:
printf("VarList (%ld)", node->line);
break;
case ParamDec:
printf("ParamDec (%ld)", node->line);
break;
case CompSt:
printf("CompSt (%ld)", node->line);
break;
case StmtList:
printf("StmtList (%ld)", node->line);
break;
case Stmt:
printf("Stmt (%ld)", node->line);
break;
case DefList:
printf("DefList (%ld)", node->line);
break;
case Def:
printf("Def (%ld)", node->line);
break;
case DecList:
printf("DecList (%ld)", node->line);
break;
case Dec:
printf("Dec (%ld)", node->line);
break;
case Exp:
printf("Exp (%ld)", node->line);
break;
case Args:
printf("Args (%ld)", node->line);
break;
default:
printf("%d (%ld)", node->type, node->line);
}
}

74
node.h Normal file
View file

@ -0,0 +1,74 @@
#pragma once
#include <stdint.h>
typedef enum node_kind_t {
Semi,
Comma,
Assignop,
Plus,
Minus,
Times,
Div,
And,
Or,
Dot,
Not,
Lp,
Rp,
Lb,
Rb,
Lc,
Rc,
Struct,
Return,
If,
Else,
While,
Type_int,
Type_float,
Integer,
Float,
Ident,
Line_comment,
Block_comment,
Program,
ExtDefList,
ExtDef,
ExtDecList,
Specifier,
StructSpecifier,
OptTag,
Tag,
VarDec,
FunDec,
VarList,
ParamDec,
CompSt,
StmtList,
Stmt,
DefList,
Def,
DecList,
Dec,
Exp,
Args
} NodeType;
typedef union value {
double real;
intptr_t integer;
char const *id;
void *null;
} ValueUnion;
typedef struct node {
NodeType type;
intptr_t line;
intptr_t size;
struct node **children;
union value value;
} Node;
typedef Node *pNode;
pNode newNode(NodeType, intptr_t, intptr_t, Node **, ValueUnion);
ValueUnion nullValue();
void DFS(pNode, int);

BIN
node.o Normal file

Binary file not shown.

311
parser.y Normal file
View file

@ -0,0 +1,311 @@
%{
#include <stdio.h>
#include "node.h"
extern int yylex(void);
extern int yylineno;
int errors = 0;
struct node;
void yyerror(char const *);
pNode root;
%}
%define api.value.type {struct node*}
%token INTEGER FLOAT
%token SEMI COMMA ASSIGNOP PLUS MINUS TIMES DIV AND OR DOT NOT LP RP LB RB LC RC STRUCT RETURN IF ELSE WHILE
%token TYPE_INT TYPE_FLOAT ID
%token LINE_COMMENT BLOCK_COMMENT
%%
// High-level Definitions
Program:
ExtDefList {
pNode children[1] = { $1 };
$$ = newNode(Program, yylineno, 1, children, nullValue());
root = $$;
}
;
ExtDefList:
ExtDef ExtDefList {
pNode children[2] = { $1, $2 };
$$ = newNode(ExtDefList, yylineno, 2, children, nullValue());
}
| {
$$ = newNode(ExtDefList, -1, 0, NULL, nullValue());
}
;
ExtDef:
Specifier ExtDecList SEMI {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(ExtDef, yylineno, 3, children, nullValue());
}
| Specifier SEMI {
pNode children[2] = { $1, $2 };
$$ = newNode(ExtDef, yylineno, 2, children, nullValue());
}
| Specifier FunDec CompSt {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(ExtDef, yylineno, 3, children, nullValue());
}
;
ExtDecList:
VarDec {
pNode children[1] = { $1 };
$$ = newNode(ExtDecList, yylineno, 1, children, nullValue());
}
| VarDec COMMA ExtDecList {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(ExtDecList, yylineno, 3, children, nullValue());
}
;
// Specifiers
Specifier:
TYPE_INT {
pNode children[1] = { $1 };
$$ = newNode(Specifier, yylineno, 1, children, nullValue());
}
| TYPE_FLOAT {
pNode children[1] = { $1 };
$$ = newNode(Specifier, yylineno, 1, children, nullValue());
}
| StructSpecifier {
pNode children[1] = { $1 };
$$ = newNode(Specifier, yylineno, 1, children, nullValue());
}
;
StructSpecifier:
STRUCT OptTag LC DefList RC {
pNode children[5] = { $1, $2, $3, $4, $5 };
$$ = newNode(StructSpecifier, yylineno, 5, children, nullValue());
}
| STRUCT Tag {
pNode children[2] = { $1, $2 };
$$ = newNode(StructSpecifier, yylineno, 2, children, nullValue());
}
;
OptTag:
ID {
pNode children[1] = { $1 };
$$ = newNode(OptTag, yylineno, 1, children, nullValue());
}
| {
$$ = newNode(OptTag, -1, 0, NULL, nullValue());
}
;
Tag:
ID {
pNode children[1] = { $1 };
$$ = newNode(Tag, yylineno, 1, children, nullValue());
}
;
// Declarators
VarDec:
ID {
pNode children[1] = { $1 };
$$ = newNode(VarDec, yylineno, 1, children, nullValue());
}
| VarDec LB INTEGER RB {
pNode children[4] = { $1, $2, $3, $4 };
$$ = newNode(VarDec, yylineno, 4, children, nullValue());
}
;
FunDec:
ID LP VarList RP {
pNode children[4] = { $1, $2, $3, $4 };
$$ = newNode(FunDec, yylineno, 4, children, nullValue());
}
| ID LP RP {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(FunDec, yylineno, 3, children, nullValue());
}
;
VarList:
ParamDec COMMA VarList {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(VarList, yylineno, 3, children, nullValue());
}
| ParamDec {
pNode children[1] = { $1 };
$$ = newNode(VarList, yylineno, 1, children, nullValue());
}
;
ParamDec:
Specifier VarDec {
pNode children[1] = { $1 };
$$ = newNode(ParamDec, yylineno, 1, children, nullValue());
}
;
// Statements
CompSt:
LC DefList StmtList RC {
pNode children[4] = { $1, $2, $3, $4 };
$$ = newNode(CompSt, yylineno, 4, children, nullValue());
}
;
StmtList:
Stmt StmtList {
pNode children[2] = { $1, $2 };
$$ = newNode(StmtList, yylineno, 2, children, nullValue());
}
| {
$$ = newNode(StmtList, -1, 0, NULL, nullValue());
}
;
Stmt:
Exp SEMI {
pNode children[2] = { $1, $2 };
$$ = newNode(Stmt, yylineno, 2, children, nullValue());
}
| CompSt {
pNode children[1] = { $1 };
$$ = newNode(Stmt, yylineno, 1, children, nullValue());
}
| RETURN Exp SEMI {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Stmt, yylineno, 3, children, nullValue());
}
| IF LP Exp RP Stmt {
pNode children[5] = { $1, $2, $3, $4, $5 };
$$ = newNode(Stmt, yylineno, 5, children, nullValue());
}
| IF LP Exp RP Stmt ELSE Stmt {
pNode children[7] = { $1, $2, $3, $4, $5, $6, $7 };
$$ = newNode(Stmt, yylineno, 7, children, nullValue());
}
| WHILE LP Exp RP Stmt {
pNode children[5] = { $1, $2, $3, $4, $5 };
$$ = newNode(Stmt, yylineno, 5, children, nullValue());
}
| error SEMI
;
// Local Definitions
DefList:
Def DefList {
pNode children[2] = { $1, $2 };
$$ = newNode(DefList, yylineno, 2, children, nullValue());
}
| {
$$ = newNode(DefList, -1, 0, NULL, nullValue());
}
;
Def:
Specifier DecList SEMI {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Def, yylineno, 3, children, nullValue());
}
;
DecList:
Dec {
pNode children[1] = { $1 };
$$ = newNode(DecList, yylineno, 1, children, nullValue());
}
| Dec COMMA DecList {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(DecList, yylineno, 3, children, nullValue());
}
;
Dec:
VarDec {
pNode children[1] = { $1 };
$$ = newNode(Dec, yylineno, 1, children, nullValue());
}
| VarDec ASSIGNOP Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Dec, yylineno, 3, children, nullValue());
}
;
// Expressions
Exp:
Exp ASSIGNOP Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp AND Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp OR Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp PLUS Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp MINUS Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp TIMES Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp DIV Exp {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| LP Exp RP {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| MINUS Exp {
pNode children[2] = { $1, $2 };
$$ = newNode(Exp, yylineno, 2, children, nullValue());
}
| NOT Exp {
pNode children[2] = { $1, $2 };
$$ = newNode(Exp, yylineno, 2, children, nullValue());
}
| ID LP Args RP {
pNode children[4] = { $1, $2, $3, $4 };
$$ = newNode(Exp, yylineno, 4, children, nullValue());
}
| ID LP RP {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| Exp LB Exp RB {
pNode children[4] = { $1, $2, $3, $4 };
$$ = newNode(Exp, yylineno, 4, children, nullValue());
}
| Exp DOT ID {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Exp, yylineno, 3, children, nullValue());
}
| ID {
pNode children[1] = { $1 };
$$ = newNode(Exp, yylineno, 1, children, nullValue());
}
| INTEGER {
pNode children[1] = { $1 };
$$ = newNode(Exp, yylineno, 1, children, nullValue());
}
| FLOAT {
pNode children[1] = { $1 };
$$ = newNode(Exp, yylineno, 1, children, nullValue());
}
;
Args:
Exp COMMA Args {
pNode children[3] = { $1, $2, $3 };
$$ = newNode(Args, yylineno, 3, children, nullValue());
}
| Exp {
pNode children[1] = { $1 };
$$ = newNode(Exp, yylineno, 1, children, nullValue());
}
;
%%
void yyerror(char const *s) {
errors += 1;
fprintf(stderr, "Error type B at Line %d: \"%s\".\n", yylineno, s);
}

188
scanner.l Normal file
View file

@ -0,0 +1,188 @@
%{
#include "parser.tab.h"
#include "node.h"
#include <stdio.h>
#include <stdlib.h>
extern int errors;
%}
%option yylineno
DEC_DIGIT [0-9]
HEX_DIGIT [0-9a-fA-F]
OCT_DIGIT [0-8]
DEC_INTEGER [+-]?(0|[1-9]{DEC_DIGIT}*)
HEX_INTEGER [+-]?0[xX]{HEX_DIGIT}+
OCT_INTEGER [+-]?0{OCT_DIGIT}+
FLOAT [+-]?{DEC_DIGIT}*\.{DEC_DIGIT}+([eE][-+]?{DEC_DIGIT}+)?
NEWLINE (\n|\r\n)
WHITE_SPACE [ \t]
SEMI ";"
COMMA ","
ASSIGNOP "="
PLUS "+"
MINUS "-"
TIMES "*"
DIV "/"
AND "&&"
OR "||"
DOT "."
NOT "!"
LP "("
RP ")"
LB "["
RB "]"
LC "{"
RC "}"
STRUCT "struct"
RETURN "return"
IF "if"
ELSE "else"
WHILE "while"
TYPE_INT "int"
TYPE_FLOAT "float"
ID [a-zA-Z_][a-zA-Z0-9_]*
LINE_COMMENT \/\/.*$
BLOCK_COMMENT \/\*(.|\n)*?\*\/
%%
{SEMI} {
yylval = newNode(Semi, yylineno, 0, NULL, nullValue());
return SEMI;
}
{COMMA} {
yylval = newNode(Comma, yylineno, 0, NULL, nullValue());
return COMMA;
}
{ASSIGNOP} {
yylval = newNode(Assignop, yylineno, 0, NULL, nullValue());
return ASSIGNOP;
}
{PLUS} {
yylval = newNode(Plus, yylineno, 0, NULL, nullValue());
return PLUS;
}
{MINUS} {
yylval = newNode(Minus, yylineno, 0, NULL, nullValue());
return MINUS;
}
{TIMES} {
yylval = newNode(Times, yylineno, 0, NULL, nullValue());
return TIMES;
}
{DIV} {
yylval = newNode(Div, yylineno, 0, NULL, nullValue());
return DIV;
}
{AND} {
yylval = newNode(And, yylineno, 0, NULL, nullValue());
return AND;
}
{OR} {
yylval = newNode(Or, yylineno, 0, NULL, nullValue());
return OR;
}
{DOT} {
yylval = newNode(Dot, yylineno, 0, NULL, nullValue());
return DOT;
}
{NOT} {
yylval = newNode(Not, yylineno, 0, NULL, nullValue());
return NOT;
}
{LP} {
yylval = newNode(Lp, yylineno, 0, NULL, nullValue());
return LP;
}
{RP} {
yylval = newNode(Rp, yylineno, 0, NULL, nullValue());
return RP;
}
{LB} {
yylval = newNode(Lb, yylineno, 0, NULL, nullValue());
return LB;
}
{RB} {
yylval = newNode(Rb, yylineno, 0, NULL, nullValue());
return RB;
}
{LC} {
yylval = newNode(Lc, yylineno, 0, NULL, nullValue());
return LC;
}
{RC} {
yylval = newNode(Rc, yylineno, 0, NULL, nullValue());
return RC;
}
{STRUCT} {
yylval = newNode(Struct, yylineno, 0, NULL, nullValue());
return STRUCT;
}
{RETURN} {
yylval = newNode(Return, yylineno, 0, NULL, nullValue());
return RETURN;
}
{IF} {
yylval = newNode(If, yylineno, 0, NULL, nullValue());
return IF;
}
{ELSE} {
yylval = newNode(Else, yylineno, 0, NULL, nullValue());
return ELSE;
}
{WHILE} {
yylval = newNode(While, yylineno, 0, NULL, nullValue());
return WHILE;
}
{TYPE_INT} {
yylval = newNode(Type_int, yylineno, 0, NULL, nullValue());
return TYPE_INT;
}
{TYPE_FLOAT} {
yylval = newNode(Type_float, yylineno, 0, NULL, nullValue());
return TYPE_FLOAT;
}
{DEC_INTEGER} {
ValueUnion value = { .integer = strtol(yytext, NULL, 10) };
yylval = newNode(Integer, yylineno, 0, NULL, value);
return INTEGER;
}
{HEX_INTEGER} {
ValueUnion value = { .integer = strtol(yytext, NULL, 16) };
yylval = newNode(Integer, yylineno, 0, NULL, value);
return INTEGER;
}
{OCT_INTEGER} {
ValueUnion value = { .integer = strtol(yytext, NULL, 8) };
yylval = newNode(Integer, yylineno, 0, NULL, value);
return INTEGER;
}
{FLOAT} {
ValueUnion value = { .real = atof(yytext) };
yylval = newNode(Float, yylineno, 0, NULL, value);
return FLOAT;
}
{ID} {
char* id = (char*)calloc(sizeof(char), yyleng + 1);
strcpy(id, yytext);
ValueUnion value = { .id = id };
yylval = newNode(Ident, yylineno, 0, NULL, value);
return ID;
}
{LINE_COMMENT} {
yylval = newNode(Line_comment, yylineno, 0, NULL, nullValue());
return LINE_COMMENT;
}
{BLOCK_COMMENT} {
yylval = newNode(Block_comment, yylineno, 0, NULL, nullValue());
return BLOCK_COMMENT;
}
{NEWLINE}|{WHITE_SPACE} {}
. {
errors += 1;
fprintf(stderr, "Error type A at Line %d: Mysterious character \"%s\".\n", yylineno, yytext);
return YYerror;
}
%%