/* parser.d */ import stringtools; import std.string; import ast; import lqtypes; const string[] QUOTATIONS = ["'", "`", ",", ",@"]; class ParserState { ASTElement matched; // element matched string[] tokens_left; bool ok; this(ASTElement matched, string[] tokens_left) { this.matched = matched; this.tokens_left = tokens_left; this.ok = true; } this() { this.ok = false; } } bool is_delimiter(string token) { return token == "(" || token == ")"; } bool is_quotation(string token) { return string_in_list(token, QUOTATIONS); } string quotation_full(string qname) { switch (qname) { case "'": return "quote"; case "`": return "backquote"; case ",": return "unquote"; case ",@": return "unquote-splicing"; } throw new Exception(format("Not a quotation: %s", qname)); } unittest { assert(is_quotation("'") == true); assert(is_quotation("?") == false); assert(quotation_full(",@") == "unquote-splicing"); } class Parser { this() { NO_MATCH = new ParserState(); } ASTElement[] parse(string[] tokens) { /* return a list of expressions (as ASTElements) from the given tokens. */ return match_program(tokens); } private: ParserState NO_MATCH; ParserState match_atom(string[] tokens) { string x = tokens[0]; if (is_delimiter(x) || is_quotation(x)) { return NO_MATCH; } else { return new ParserState(new ASTAtom(tokens[0]), tokens[1..tokens.length]); } } ParserState match_list(string[] tokens) { ASTElement[] collected = []; ParserState a = match_token(tokens, "("); if (a.ok) { tokens = a.tokens_left; while (tokens.length > 0) { ParserState b = match_expr(tokens); if (b.ok) { ASTElement match = b.matched; tokens = b.tokens_left; collected ~= match; } else break; } ParserState c = match_token(tokens, ")"); if (c.ok) { tokens = c.tokens_left; return new ParserState(new ASTList(collected), tokens); } } return NO_MATCH; } ParserState match_expr(string[] tokens) { ParserState a = match_atom(tokens); if (a != NO_MATCH) return a; ParserState b = match_list(tokens); if (b != NO_MATCH) return b; ParserState c = match_quotation(tokens); if (c != NO_MATCH) return c; return NO_MATCH; } ParserState match_quotation(string[] tokens) { ParserState a = match_any(tokens, QUOTATIONS); if (a.ok) { ASTAtom q = cast(ASTAtom)a.matched; tokens = a.tokens_left; ParserState b = match_expr(tokens); if (b.ok) { ASTElement expr = b.matched; string qf = quotation_full(q.data); // not sure if the following upcast works: :-/ ASTElement[] qexpr = [cast(ASTElement)(new ASTAtom(qf)), expr]; return new ParserState(new ASTList(qexpr), b.tokens_left); } } return NO_MATCH; } ASTElement[] match_program(string[] tokens) { ASTElement[] collected = []; while (tokens.length > 0) { ParserState p = match_expr(tokens); if (p.ok) { collected ~= p.matched; tokens = p.tokens_left; } else { throw new Exception("syntax error"); // FIXME } } return collected; } /* auxiliary methods */ ParserState match_token(string[] tokens, string token) { string x = tokens[0]; if (x == token) { return new ParserState(new ASTAtom(x), tokens[1..tokens.length]); } else { return NO_MATCH; } } ParserState match_any(string[] tokens, string[] choices) { string x = tokens[0]; if (string_in_list(x, choices)) { return new ParserState(new ASTAtom(x), tokens[1..tokens.length]); } else { return NO_MATCH; } } } /* Return a list of expressions (each of which is in the form of an LqType). */ LqType[] parse(string[] tokens) { Parser p = new Parser(); ASTElement[] elems = p.parse(tokens); LqType[] exprs = []; foreach (ASTElement elem; elems) { LqType expr = elem.convert(); exprs ~= expr; } return exprs; } unittest { Parser p = new Parser(); void test_parser(string[] tokens, string result) { ASTElement e = p.parse(tokens)[0]; assert(e.toString() == result, e.toString()); } test_parser(["x"], "x"); test_parser(["42"], "42"); test_parser(["(", ")"], "()"); test_parser(["(", "a", ")"], "(a)"); test_parser(["(", "a", "3", ")"], "(a 3)"); test_parser(["'", "x"], "(quote x)"); test_parser(["'", "(", "y", ")"], "(quote (y))"); }