diff --git a/.gitignore b/.gitignore index 7ad6275..4777691 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /.direnv +/.cache diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8c80ca6 --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +all: parser.tab.o lex.yy.o node.o main.o + gcc ./parser.tab.o ./lex.yy.o ./node.o ./main.o -o program -lfl + +main.o: main.c + gcc -c main.c + +node.o: node.c + gcc -c ./node.c + +parser.tab.c parser.tab.h: ./parser.y + bison -d parser.y + +parser.tab.o: ./parser.tab.c + gcc -c ./parser.tab.c + +lex.yy.o: lex.yy.c + gcc -c lex.yy.c + +lex.yy.c: ./scanner.l + flex scanner.l + +.PHONY: clean +clean: + -rm parser.tab.c parser.tab.h parser.tab.o lex.yy.o lex.yy.c program main.o diff --git a/flake.lock b/flake.lock index 0b7c20b..4680682 100644 --- a/flake.lock +++ b/flake.lock @@ -19,11 +19,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1742206328, - "narHash": "sha256-q+AQ///oMnyyFzzF4H9ShSRENt3Zsx37jTiRkLkXXE0=", + "lastModified": 1742707865, + "narHash": "sha256-RVQQZy38O3Zb8yoRJhuFgWo/iDIDj0hEdRTVfhOtzRk=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "096478927c360bc18ea80c8274f013709cf7bdcd", + "rev": "dd613136ee91f67e5dba3f3f41ac99ae89c5406b", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 1946716..53f0721 100644 --- a/flake.nix +++ b/flake.nix @@ -23,6 +23,8 @@ flex clang-tools clang + bear + neovim ]; }; } diff --git a/main.c b/main.c new file mode 100644 index 0000000..c6831b0 --- /dev/null +++ b/main.c @@ -0,0 +1,26 @@ +#include "node.h" +#include +extern int yyparse(); +extern pNode root; +extern void yyrestart(FILE *); +extern int errors; + +int main(int argc, char **argv) { + if (argc > 1) { + FILE *f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + return 1; + } + yyrestart(f); + } + int running_res = 0; + do { + running_res = 0; + running_res = yyparse(); + } while (running_res); + if (!errors) { + DFS(root, 0); + } + return 0; +} diff --git a/node.c b/node.c new file mode 100644 index 0000000..64f24b1 --- /dev/null +++ b/node.c @@ -0,0 +1,201 @@ +#include "node.h" +#include +#include + +pNode newNode(NodeType typ, intptr_t line, intptr_t size, Node **children, + ValueUnion value) { + pNode node = (pNode)calloc(sizeof(Node), 1); + Node **new_children = (Node **)calloc(sizeof(pNode), size); + for (int i = 0; i < size; i++) { + new_children[i] = children[i]; + } + node->type = typ; + node->line = line; + node->size = size; + node->children = new_children; + node->value = value; + return node; +} +ValueUnion nullValue() { + ValueUnion value = {.null = NULL}; + return value; +} +void displayNode(pNode); +void DFS(pNode node, int level) { + if (node == NULL) { + return; + }; + + if (node->line != -1) { + if (node->size == 0) { + for (int i = 0; i < level; i++) + printf(" "); + displayNode(node); + putchar('\n'); + } else { + for (int i = 0; i < level; i++) + printf(" "); + displayNode(node); + putchar('\n'); + } + } + + for (int i = 0; i < node->size; i++) + DFS(node->children[i], level + 1); +} +void displayNode(pNode node) { + switch (node->type) { + case Type_int: + printf("TYPE: int"); + break; + case Type_float: + printf("TYPE: float"); + break; + case Ident: + printf("ID: %s", node->value.id); + break; + case Integer: + printf("INT: %ld", node->value.integer); + break; + case Float: + printf("FLOAT: %lf", node->value.real); + break; + case Semi: + printf("SEMI"); + break; + case Comma: + printf("COMMA"); + break; + case Assignop: + printf("ASSIGNOP"); + break; + case Plus: + printf("PLUS"); + break; + case Minus: + printf("MINUS"); + break; + case Times: + printf("TIMES"); + break; + case Div: + printf("DIV"); + break; + case And: + printf("AND"); + break; + case Or: + printf("OR"); + break; + case Dot: + printf("DOT"); + break; + case Not: + printf("NOT"); + break; + case Lp: + printf("LP"); + break; + case Rp: + printf("RP"); + break; + case Lb: + printf("LB"); + break; + case Rb: + printf("RB"); + break; + case Lc: + printf("LC"); + break; + case Rc: + printf("RC"); + break; + case Struct: + printf("STRUCT"); + break; + case Return: + printf("RETURN"); + break; + case If: + printf("IF"); + break; + case Else: + printf("ELSE"); + break; + case While: + printf("WHILE"); + break; + case Line_comment: + printf("LINE_COMMENT"); + break; + case Block_comment: + printf("BLOCK_COMMENT"); + break; + case Program: + printf("Program (%ld)", node->line); + break; + case ExtDefList: + printf("ExtDefList (%ld)", node->line); + break; + case ExtDef: + printf("ExtDef (%ld)", node->line); + break; + case ExtDecList: + printf("ExtDecList (%ld)", node->line); + break; + case Specifier: + printf("Specifier (%ld)", node->line); + break; + case StructSpecifier: + printf("StructSpecifier (%ld)", node->line); + break; + case OptTag: + printf("OptTag (%ld)", node->line); + break; + case Tag: + printf("Tag (%ld)", node->line); + break; + case VarDec: + printf("VarDec (%ld)", node->line); + break; + case FunDec: + printf("FunDec (%ld)", node->line); + break; + case VarList: + printf("VarList (%ld)", node->line); + break; + case ParamDec: + printf("ParamDec (%ld)", node->line); + break; + case CompSt: + printf("CompSt (%ld)", node->line); + break; + case StmtList: + printf("StmtList (%ld)", node->line); + break; + case Stmt: + printf("Stmt (%ld)", node->line); + break; + case DefList: + printf("DefList (%ld)", node->line); + break; + case Def: + printf("Def (%ld)", node->line); + break; + case DecList: + printf("DecList (%ld)", node->line); + break; + case Dec: + printf("Dec (%ld)", node->line); + break; + case Exp: + printf("Exp (%ld)", node->line); + break; + case Args: + printf("Args (%ld)", node->line); + break; + default: + printf("%d (%ld)", node->type, node->line); + } +} diff --git a/node.h b/node.h new file mode 100644 index 0000000..c255921 --- /dev/null +++ b/node.h @@ -0,0 +1,74 @@ +#pragma once +#include + +typedef enum node_kind_t { + Semi, + Comma, + Assignop, + Plus, + Minus, + Times, + Div, + And, + Or, + Dot, + Not, + Lp, + Rp, + Lb, + Rb, + Lc, + Rc, + Struct, + Return, + If, + Else, + While, + Type_int, + Type_float, + Integer, + Float, + Ident, + Line_comment, + Block_comment, + Program, + ExtDefList, + ExtDef, + ExtDecList, + Specifier, + StructSpecifier, + OptTag, + Tag, + VarDec, + FunDec, + VarList, + ParamDec, + CompSt, + StmtList, + Stmt, + DefList, + Def, + DecList, + Dec, + Exp, + Args +} NodeType; +typedef union value { + double real; + intptr_t integer; + char const *id; + void *null; +} ValueUnion; +typedef struct node { + NodeType type; + intptr_t line; + intptr_t size; + struct node **children; + union value value; +} Node; + +typedef Node *pNode; + +pNode newNode(NodeType, intptr_t, intptr_t, Node **, ValueUnion); +ValueUnion nullValue(); +void DFS(pNode, int); diff --git a/node.o b/node.o new file mode 100644 index 0000000..ad43ffe Binary files /dev/null and b/node.o differ diff --git a/parser.y b/parser.y new file mode 100644 index 0000000..66b6240 --- /dev/null +++ b/parser.y @@ -0,0 +1,311 @@ +%{ + #include + #include "node.h" + extern int yylex(void); + extern int yylineno; + int errors = 0; + struct node; + void yyerror(char const *); + pNode root; +%} + +%define api.value.type {struct node*} + +%token INTEGER FLOAT +%token SEMI COMMA ASSIGNOP PLUS MINUS TIMES DIV AND OR DOT NOT LP RP LB RB LC RC STRUCT RETURN IF ELSE WHILE +%token TYPE_INT TYPE_FLOAT ID +%token LINE_COMMENT BLOCK_COMMENT + +%% + +// High-level Definitions +Program: + ExtDefList { + pNode children[1] = { $1 }; + $$ = newNode(Program, yylineno, 1, children, nullValue()); + root = $$; + } + ; +ExtDefList: + ExtDef ExtDefList { + pNode children[2] = { $1, $2 }; + $$ = newNode(ExtDefList, yylineno, 2, children, nullValue()); + } + | { + $$ = newNode(ExtDefList, -1, 0, NULL, nullValue()); + } + ; +ExtDef: + Specifier ExtDecList SEMI { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(ExtDef, yylineno, 3, children, nullValue()); + } + | Specifier SEMI { + pNode children[2] = { $1, $2 }; + $$ = newNode(ExtDef, yylineno, 2, children, nullValue()); + } + | Specifier FunDec CompSt { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(ExtDef, yylineno, 3, children, nullValue()); + } + ; +ExtDecList: + VarDec { + pNode children[1] = { $1 }; + $$ = newNode(ExtDecList, yylineno, 1, children, nullValue()); + } + | VarDec COMMA ExtDecList { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(ExtDecList, yylineno, 3, children, nullValue()); + } + ; + +// Specifiers +Specifier: + TYPE_INT { + pNode children[1] = { $1 }; + $$ = newNode(Specifier, yylineno, 1, children, nullValue()); + } + | TYPE_FLOAT { + pNode children[1] = { $1 }; + $$ = newNode(Specifier, yylineno, 1, children, nullValue()); + } + | StructSpecifier { + pNode children[1] = { $1 }; + $$ = newNode(Specifier, yylineno, 1, children, nullValue()); + } + ; +StructSpecifier: + STRUCT OptTag LC DefList RC { + pNode children[5] = { $1, $2, $3, $4, $5 }; + $$ = newNode(StructSpecifier, yylineno, 5, children, nullValue()); + } + | STRUCT Tag { + pNode children[2] = { $1, $2 }; + $$ = newNode(StructSpecifier, yylineno, 2, children, nullValue()); + } + ; +OptTag: + ID { + pNode children[1] = { $1 }; + $$ = newNode(OptTag, yylineno, 1, children, nullValue()); + } + | { + $$ = newNode(OptTag, -1, 0, NULL, nullValue()); + } + ; +Tag: + ID { + pNode children[1] = { $1 }; + $$ = newNode(Tag, yylineno, 1, children, nullValue()); + } + ; + +// Declarators +VarDec: + ID { + pNode children[1] = { $1 }; + $$ = newNode(VarDec, yylineno, 1, children, nullValue()); + } + | VarDec LB INTEGER RB { + pNode children[4] = { $1, $2, $3, $4 }; + $$ = newNode(VarDec, yylineno, 4, children, nullValue()); + } + ; +FunDec: + ID LP VarList RP { + pNode children[4] = { $1, $2, $3, $4 }; + $$ = newNode(FunDec, yylineno, 4, children, nullValue()); + } + | ID LP RP { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(FunDec, yylineno, 3, children, nullValue()); + } + ; +VarList: + ParamDec COMMA VarList { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(VarList, yylineno, 3, children, nullValue()); + } + | ParamDec { + pNode children[1] = { $1 }; + $$ = newNode(VarList, yylineno, 1, children, nullValue()); + } + ; +ParamDec: + Specifier VarDec { + pNode children[1] = { $1 }; + $$ = newNode(ParamDec, yylineno, 1, children, nullValue()); + } + ; + +// Statements +CompSt: + LC DefList StmtList RC { + pNode children[4] = { $1, $2, $3, $4 }; + $$ = newNode(CompSt, yylineno, 4, children, nullValue()); + } + ; +StmtList: + Stmt StmtList { + pNode children[2] = { $1, $2 }; + $$ = newNode(StmtList, yylineno, 2, children, nullValue()); + } + | { + $$ = newNode(StmtList, -1, 0, NULL, nullValue()); + } + ; +Stmt: + Exp SEMI { + pNode children[2] = { $1, $2 }; + $$ = newNode(Stmt, yylineno, 2, children, nullValue()); + } + | CompSt { + pNode children[1] = { $1 }; + $$ = newNode(Stmt, yylineno, 1, children, nullValue()); + } + | RETURN Exp SEMI { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Stmt, yylineno, 3, children, nullValue()); + } + | IF LP Exp RP Stmt { + pNode children[5] = { $1, $2, $3, $4, $5 }; + $$ = newNode(Stmt, yylineno, 5, children, nullValue()); + } + | IF LP Exp RP Stmt ELSE Stmt { + pNode children[7] = { $1, $2, $3, $4, $5, $6, $7 }; + $$ = newNode(Stmt, yylineno, 7, children, nullValue()); + } + | WHILE LP Exp RP Stmt { + pNode children[5] = { $1, $2, $3, $4, $5 }; + $$ = newNode(Stmt, yylineno, 5, children, nullValue()); + } + | error SEMI + ; + +// Local Definitions +DefList: + Def DefList { + pNode children[2] = { $1, $2 }; + $$ = newNode(DefList, yylineno, 2, children, nullValue()); + } + | { + $$ = newNode(DefList, -1, 0, NULL, nullValue()); + } + ; +Def: + Specifier DecList SEMI { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Def, yylineno, 3, children, nullValue()); + } + ; +DecList: + Dec { + pNode children[1] = { $1 }; + $$ = newNode(DecList, yylineno, 1, children, nullValue()); + } + | Dec COMMA DecList { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(DecList, yylineno, 3, children, nullValue()); + } + ; +Dec: + VarDec { + pNode children[1] = { $1 }; + $$ = newNode(Dec, yylineno, 1, children, nullValue()); + } + | VarDec ASSIGNOP Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Dec, yylineno, 3, children, nullValue()); + } + ; + +// Expressions +Exp: + Exp ASSIGNOP Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp AND Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp OR Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp PLUS Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp MINUS Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp TIMES Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp DIV Exp { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | LP Exp RP { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | MINUS Exp { + pNode children[2] = { $1, $2 }; + $$ = newNode(Exp, yylineno, 2, children, nullValue()); + } + | NOT Exp { + pNode children[2] = { $1, $2 }; + $$ = newNode(Exp, yylineno, 2, children, nullValue()); + } + | ID LP Args RP { + pNode children[4] = { $1, $2, $3, $4 }; + $$ = newNode(Exp, yylineno, 4, children, nullValue()); + } + | ID LP RP { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | Exp LB Exp RB { + pNode children[4] = { $1, $2, $3, $4 }; + $$ = newNode(Exp, yylineno, 4, children, nullValue()); + } + | Exp DOT ID { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Exp, yylineno, 3, children, nullValue()); + } + | ID { + pNode children[1] = { $1 }; + $$ = newNode(Exp, yylineno, 1, children, nullValue()); + } + | INTEGER { + pNode children[1] = { $1 }; + $$ = newNode(Exp, yylineno, 1, children, nullValue()); + } + | FLOAT { + pNode children[1] = { $1 }; + $$ = newNode(Exp, yylineno, 1, children, nullValue()); + } + ; + +Args: + Exp COMMA Args { + pNode children[3] = { $1, $2, $3 }; + $$ = newNode(Args, yylineno, 3, children, nullValue()); + } + | Exp { + pNode children[1] = { $1 }; + $$ = newNode(Exp, yylineno, 1, children, nullValue()); + } + ; + +%% + +void yyerror(char const *s) { + errors += 1; + fprintf(stderr, "Error type B at Line %d: \"%s\".\n", yylineno, s); +} diff --git a/scanner.l b/scanner.l new file mode 100644 index 0000000..b3291fc --- /dev/null +++ b/scanner.l @@ -0,0 +1,188 @@ +%{ + #include "parser.tab.h" + #include "node.h" + #include + #include + extern int errors; +%} + +%option yylineno + +DEC_DIGIT [0-9] +HEX_DIGIT [0-9a-fA-F] +OCT_DIGIT [0-8] +DEC_INTEGER [+-]?(0|[1-9]{DEC_DIGIT}*) +HEX_INTEGER [+-]?0[xX]{HEX_DIGIT}+ +OCT_INTEGER [+-]?0{OCT_DIGIT}+ +FLOAT [+-]?{DEC_DIGIT}*\.{DEC_DIGIT}+([eE][-+]?{DEC_DIGIT}+)? + +NEWLINE (\n|\r\n) +WHITE_SPACE [ \t] +SEMI ";" +COMMA "," +ASSIGNOP "=" +PLUS "+" +MINUS "-" +TIMES "*" +DIV "/" +AND "&&" +OR "||" +DOT "." +NOT "!" +LP "(" +RP ")" +LB "[" +RB "]" +LC "{" +RC "}" +STRUCT "struct" +RETURN "return" +IF "if" +ELSE "else" +WHILE "while" +TYPE_INT "int" +TYPE_FLOAT "float" +ID [a-zA-Z_][a-zA-Z0-9_]* +LINE_COMMENT \/\/.*$ +BLOCK_COMMENT \/\*(.|\n)*?\*\/ + +%% +{SEMI} { + yylval = newNode(Semi, yylineno, 0, NULL, nullValue()); + return SEMI; +} +{COMMA} { + yylval = newNode(Comma, yylineno, 0, NULL, nullValue()); + return COMMA; +} +{ASSIGNOP} { + yylval = newNode(Assignop, yylineno, 0, NULL, nullValue()); + return ASSIGNOP; +} +{PLUS} { + yylval = newNode(Plus, yylineno, 0, NULL, nullValue()); + return PLUS; +} +{MINUS} { + yylval = newNode(Minus, yylineno, 0, NULL, nullValue()); + return MINUS; +} +{TIMES} { + yylval = newNode(Times, yylineno, 0, NULL, nullValue()); + return TIMES; +} +{DIV} { + yylval = newNode(Div, yylineno, 0, NULL, nullValue()); + return DIV; +} +{AND} { + yylval = newNode(And, yylineno, 0, NULL, nullValue()); + return AND; +} +{OR} { + yylval = newNode(Or, yylineno, 0, NULL, nullValue()); + return OR; +} +{DOT} { + yylval = newNode(Dot, yylineno, 0, NULL, nullValue()); + return DOT; +} +{NOT} { + yylval = newNode(Not, yylineno, 0, NULL, nullValue()); + return NOT; +} +{LP} { + yylval = newNode(Lp, yylineno, 0, NULL, nullValue()); + return LP; +} +{RP} { + yylval = newNode(Rp, yylineno, 0, NULL, nullValue()); + return RP; +} +{LB} { + yylval = newNode(Lb, yylineno, 0, NULL, nullValue()); + return LB; +} +{RB} { + yylval = newNode(Rb, yylineno, 0, NULL, nullValue()); + return RB; +} +{LC} { + yylval = newNode(Lc, yylineno, 0, NULL, nullValue()); + return LC; +} +{RC} { + yylval = newNode(Rc, yylineno, 0, NULL, nullValue()); + return RC; +} +{STRUCT} { + yylval = newNode(Struct, yylineno, 0, NULL, nullValue()); + return STRUCT; +} +{RETURN} { + yylval = newNode(Return, yylineno, 0, NULL, nullValue()); + return RETURN; +} +{IF} { + yylval = newNode(If, yylineno, 0, NULL, nullValue()); + return IF; +} +{ELSE} { + yylval = newNode(Else, yylineno, 0, NULL, nullValue()); + return ELSE; +} +{WHILE} { + yylval = newNode(While, yylineno, 0, NULL, nullValue()); + return WHILE; +} +{TYPE_INT} { + yylval = newNode(Type_int, yylineno, 0, NULL, nullValue()); + return TYPE_INT; +} +{TYPE_FLOAT} { + yylval = newNode(Type_float, yylineno, 0, NULL, nullValue()); + return TYPE_FLOAT; +} +{DEC_INTEGER} { + ValueUnion value = { .integer = strtol(yytext, NULL, 10) }; + yylval = newNode(Integer, yylineno, 0, NULL, value); + return INTEGER; +} +{HEX_INTEGER} { + ValueUnion value = { .integer = strtol(yytext, NULL, 16) }; + yylval = newNode(Integer, yylineno, 0, NULL, value); + return INTEGER; +} +{OCT_INTEGER} { + ValueUnion value = { .integer = strtol(yytext, NULL, 8) }; + yylval = newNode(Integer, yylineno, 0, NULL, value); + return INTEGER; +} +{FLOAT} { + ValueUnion value = { .real = atof(yytext) }; + yylval = newNode(Float, yylineno, 0, NULL, value); + return FLOAT; +} +{ID} { + char* id = (char*)calloc(sizeof(char), yyleng + 1); + strcpy(id, yytext); + ValueUnion value = { .id = id }; + yylval = newNode(Ident, yylineno, 0, NULL, value); + return ID; +} +{LINE_COMMENT} { + yylval = newNode(Line_comment, yylineno, 0, NULL, nullValue()); + return LINE_COMMENT; +} +{BLOCK_COMMENT} { + yylval = newNode(Block_comment, yylineno, 0, NULL, nullValue()); + return BLOCK_COMMENT; +} +{NEWLINE}|{WHITE_SPACE} {} +. { + errors += 1; + fprintf(stderr, "Error type A at Line %d: Mysterious character \"%s\".\n", yylineno, yytext); + return YYerror; +} +%% +