Makefile100644 1040 1001 2677 6744212127 10732 0ustar everyone CC = cl LD = link DEBUG = -Zi # Compile try/except EXCEPT = -EHsc CFLAGS = $(EXCEPT) -DWIN32 -Tp OBJ = obj EXE = .exe INCLUDE = -I "e:\antlr\cpp" -I "d:\Program Files\Microsoft Visual Studio\Vc98\include" LD_DEBUG = /debug ANTLR_LIB = /defaultlib:"e:\antlr\cpp\antlr.lib" LINK_FLAGS = $(LD_DEBUG) $(ANTLR_LIB) FLAGS = $(INCLUDE) $(DEBUG) $(CFLAGS) # # To customize, fill in EXENAME, GRAMMAR, PARSER and LEXER # EXENAME = tc GRAMMAR = tiny PARSER = MyTinyC LEXER = MyTinyC PARSE_OBJ = $(LEXER)Lexer.$(OBJ) $(PARSER)Parser.$(OBJ) print_tree.$(OBJ) main.$(OBJ) all: $(EXENAME)$(EXE) clean: rm -f $(PARSE_OBJ) $(EXENAME)$(EXE) $(LEXER)Lexer.cpp $(LEXER)Lexer.hpp $(PARSER)Parser.cpp $(PARSER)Parser.hpp MyTinyCParserTokenTypes.hpp MyTinyCParserTokenTypes.txt MyTinyCTokenTypes.hpp MyTinyCTokenTypes.txt $(EXENAME)$(EXE): $(PARSE_OBJ) $(LD) $(LINK_FLAGS) /out:$@ $(PARSE_OBJ) $(LEXER)Lexer.cpp $(LEXER)Lexer.hpp $(PARSER)Parser.cpp $(PARSER)Parser.hpp: $(GRAMMAR).g jview antlr.Tool $(GRAMMAR).g $(LEXER)Lexer.$(OBJ): $(LEXER)Lexer.cpp $(LEXER)Lexer.hpp $(CC) -c $(FLAGS) $(LEXER)Lexer.cpp $(PARSER)Parser.$(OBJ): $(PARSER)Parser.cpp $(PARSER)Parser.hpp $(CC) -c $(FLAGS) $(PARSER)Parser.cpp print_tree.$(OBJ): print_tree.cpp print_tree.hpp $(CC) -c $(FLAGS) print_tree.cpp main.$(OBJ): main.cpp $(PARSER)Parser.hpp $(LEXER)Lexer.hpp $(CC) -c $(FLAGS) main.cpp main.cpp100644 1040 1001 3424 6740542335 10714 0ustar everyone #include #include #include #include #include "MyTinyCLexer.hpp" #include "MyTinyCParser.hpp" #include "print_tree.hpp" using namespace std; void indent(int indent_level) { if (indent_level > 0) { const size_t BUFSIZE = 127; char buf[ BUFSIZE+1 ]; int i; for (i = 0; i < indent_level && i < BUFSIZE; i++) { buf[i] = ' '; } buf[i] = '\0'; printf("%s", buf ); } } // pr_indent /* * trav_tree * A simple tree traversal function that shows "kid" and "sib" (sibling) relations in the tree. */ void trav_tree( RefAST top, int ind ) { if (top != NULL) { std::string str; indent( ind ); str = top->getText(); std::cout << str << "\n"; if (top->getFirstChild() != NULL) { printf("kid: "); trav_tree( top->getFirstChild(), ind+2 ); } if (top->getNextSibling()) { printf("sib: "); trav_tree( top->getNextSibling(), ind ); } } } // trav_tree void usage( char *progname ) { printf("usage: %s \n", progname ); } int main(int argc, char *argv[] ) { if (argc == 2) { const char *filename; filename = argv[1]; ifstream str( filename ); printf("Ursine Tiny C parser, version 1.0\n"); if (str.is_open()) { try { MyTinyCLexer lexer( str ); MyTinyCParser parser(lexer); parser.funclist(); print_tree pr; pr.pr_tree( parser.getAST() ); // trav_tree( parser.getAST(), 0 ); } catch(std::exception& e) { std::cerr << "exception: " << e.what() << std::endl; } } else { printf("%s: could not open file %s\n", argv[0], filename ); } } else { usage( argv[0] ); } return 0; } print_tree.cpp100644 1040 1001 4031 6740543515 12137 0ustar everyone #include #include #include "MyTinyCParser.hpp" #include "print_tree.hpp" /* * pr_name * Print the character string associated with an ANTLR tree node. */ void print_tree::pr_name( RefAST node ) { std::string str; str = node->getText(); printf("%s ", str.c_str()); } // pr_name /* * pr_indent * Print indentation for a node. */ void print_tree::pr_indent(void) { const size_t BUFSIZE = 127; char buf[ BUFSIZE+1 ]; int i; for (i = 0; i < indent_level && i < BUFSIZE; i++) { buf[i] = ' '; } buf[i] = '\0'; printf("%s", buf ); } // pr_indent void print_tree::pr_open_angle(void) { if ( indent_level ) printf("\n"); pr_indent(); printf("<"); indent_level += INDENT; } // pr_open_angle /* * pr_close_angle * Print the ">" bracket to show the close of a tree production. */ void print_tree::pr_close_angle(Boolean first) { assert( indent_level > 0 ); indent_level -= INDENT; if (!first) { printf("\n"); pr_indent(); } printf(">"); } // pr_close_angle /* * pr_leaves * Print the leaves of an AST node */ void print_tree::pr_leaves( RefAST top ) { RefAST t; For_each_kid(t, top) { if (is_nonleaf( t )) pr_top( t ); else pr_name( t ); } } // pr_leaves /* * pr_top * Recursively print a tree (or a sub-tree) from the top down. */ void print_tree::pr_top( RefAST top ) { RefAST tmp; Boolean first = TRUE; pr_open_angle(); pr_name( top ); if (is_nonleaf( top )) { For_each_kid( tmp, top ) { if (is_nonleaf( tmp )) first = FALSE; } pr_leaves( top ); } pr_close_angle( first ); } // pr_top /* * pr_tree * Main entry point for tree print. */ void print_tree::pr_tree( RefAST top ) { RefAST t; for (t = top; t != NULL; t = t->getNextSibling()) { indent_level = 0; pr_top( t ); printf("\n"); } } // pr_tree print_tree.hpp100644 1040 1001 1513 6740542412 12141 0ustar everyone #ifndef _PRINT_TREE_HPP_ #define _PRINT_TREE_HPP_ /* print_tree Print an ANTLR abstract syntax tree in operator prefix form. */ typedef enum { FALSE = 0, TRUE = 1 } Boolean; #define For_each_kid(t,top) for(t=( (top && is_nonleaf(top)) ? top->getFirstChild() : (RefAST)NULL ); t; t = t->getNextSibling() ) class print_tree { private: typedef enum { INDENT = 2 } bogus; int indent_level; private: void pr_name( RefAST node ); void pr_indent(); void pr_top( RefAST top ); void pr_open_angle(void); void pr_close_angle(Boolean first); void pr_leaves( RefAST top ); Boolean is_nonleaf( RefAST node ) { Boolean rslt; rslt = (Boolean)(node->getFirstChild() != NULL); return rslt; } public: void pr_tree( const RefAST top ); }; // print_tree #endif tiny.g100644 1040 1001 11545 6741546022 10440 0ustar everyone // // An experimental "tiny C" grammer by // Ian Kaplan // options { language="Cpp"; } class MyTinyCParser extends Parser; options { k = 2; exportVocab=MyTinyC; buildAST = true; } tokens { NULL_NODE; FUNC_LIST; FUNC; FUNC_HEAD; FORMAL_LIST; DECL; BLOCK; } // Function list funclist : ( function_def )* EOF! { #funclist = #([FUNC_LIST, "func_list"], #funclist ); } ; // Function definition function_def : func_header block { #function_def = #([FUNC, "func_decl"], #function_def ); } ; // function header (function type, name and argument list) func_header : func_name_decl LPAREN! ( formal_list )? RPAREN! { #func_header = #( [FUNC_HEAD, "func_head"], #func_header ); } ; // function type (which is optional) and function name func_name_decl : IDENT { #func_name_decl = #( #func_name_decl, [NULL_NODE, "null_node"] ); } | base_type IDENT^ ; // function formal argument list formal_list : base_type IDENT ( COMMA! base_type IDENT )* { #formal_list = #([FORMAL_LIST, "formal_list"], #formal_list); } ; // types base_type : ( "char"^ | "int"^ ) ; statement : block | assignment_statement | if_stmt | for_loop | return_statement | SEMICOLON! { #statement = #([ NULL_NODE, "null_stmt"]); } ; // a bracketed block block : LCURL! (decl)* (statement)* RCURL! { #block = #( [BLOCK, "block"], #block ); } ; // variable declaration list decl : base_type IDENT ( COMMA! IDENT )* SEMICOLON! { #decl = #([DECL,"decl"], #decl); } ; assignment_statement : assignment SEMICOLON! ; if_stmt : "if"^ LPAREN! expr RPAREN! statement ( ("else") => else_part | () // nothing ) ; else_part : "else"^ statement ; for_loop : "for"^ loop_cntrl statement ; // loop control loop_cntrl : LPAREN! loop_init loop_cond loop_incr RPAREN! ; loop_init : SEMICOLON! { #loop_init = #([NULL_NODE, "null_init"]); } | assignment SEMICOLON! ; loop_cond : SEMICOLON! { #loop_cond = #([NULL_NODE, "null_cond"]); } | expr SEMICOLON! ; loop_incr : () // empry { #loop_incr = #([NULL_NODE, "null_incr"]); } | assignment ; return_statement : "return"^ expr SEMICOLON! ; primary_expr : IDENT | constant | (LPAREN! expr RPAREN! ) ; assignment : (IDENT ASSIGN^ )? expr ; postfix_expr: primary_expr ( (LPAREN) => arg_list )? ; boolneg_expr : ( "not"^ )* postfix_expr ; sign_expr : boolneg_expr | MINUS^ boolneg_expr ; mul_expr : sign_expr (( TIMES^ | DIVIDE^ | MOD^ ) sign_expr)* ; add_expr : mul_expr ( ( PLUS^ | MINUS^ ) mul_expr )* ; shift_expr : add_expr (( SHIFT_LEFT^ | SHIFT_RIGHT^ ) add_expr )* ; rel_expr : shift_expr (( LTHAN^ | GTHAN^ | GEQ^ | LEQ^ ) shift_expr)* ; eq_expr : rel_expr (( EQ^ | NEQ^ ) rel_expr)* ; lmul_expr : eq_expr ( "and"^ eq_expr )* ; expr : lmul_expr ("or"^ lmul_expr)* ; arg_list : LPAREN! expr ( COMMA! expr )* RPAREN! ; constant : (ICON | CHCON) ; class MyTinyCLexer extends Lexer; options { k = 2; exportVocab=MyTinyC; } WS_ : (' ' | '\t' | '\n' { newline(); } // increment the line counter | '\r') { _ttype = Token::SKIP; } ; IDENT options { paraphrase = "identifier"; } : ('a'..'z' | 'A'..'Z' | '_' ) ( ('a'..'z' | 'A'..'Z' | '_') | ('0'..'9' ))* ; ICON options { paraphrase = "integer constant"; } : '0'..'9' ('0'..'9')* ; CHCON options { paraphrase = "character constant"; } : "'" '\0'..'\255' "'" ; COMMA options { paraphrase = ","; } : ',' ; SEMICOLON options { paraphrase = ";"; } : ';' ; LPAREN options { paraphrase = "("; } : '(' ; RPAREN options { paraphrase = ")"; } : ')' ; LCURL options { paraphrase = "{"; } : '{' ; RCURL options { paraphrase = "}"; } : '}' ; PLUS options { paraphrase = "+"; } : '+' ; MINUS options { paraphrase = "-"; } : '-' ; TIMES options { paraphrase = "*"; } : '*' ; DIVIDE options { paraphrase = "/"; } : '/' ; MOD options { paraphrase = "%"; } : '%' ; ASSIGN options { paraphrase = "="; } : '=' ; EQ options { paraphrase = "=="; } : "==" ; NEQ options { paraphrase = "!="; } : "!=" ; LTHAN options { paraphrase = "<"; } : '<' ; GTHAN options { paraphrase = ">"; } : '>' ; LEQ options { paraphrase = "<="; } : "<=" ; GEQ options { paraphrase = ">="; } : ">=" ; SHIFT_LEFT options { paraphrase = "<<"; } : "<<" ; SHIFT_RIGHT options { paraphrase = ">>"; } : ">>" ; bar.c100644 1040 1001 77 6735521125 10133 0ustar everyone int bogosity() { int a, b, c, d; a = b + c * d; } foo.c100644 1040 1001 643 6736310406 10170 0ustar everyone int bogus( int a, int b) { int x, y; int i, sum; char this_is_a_char_ident; if (x <= 4) { x = x - 1; y = y - -(x + 2); } else { x = x + 1; } sum = 0; for (i = 0; i < 10; i = i + 1) sum = sum + i; for (; ; ) ; for (sum = 0; sum < 10; sum = sum + 1) ; return sum + x * y; } bogosity( char p, int q) { q = p + 4; return q; }