From de1be1dc660ea59ff8813ba9227a335be1d9cd08 Mon Sep 17 00:00:00 2001 From: joe-conigliaro Date: Wed, 25 Dec 2019 23:39:58 +1100 Subject: [PATCH] compiler2: start implementing pratt style parser --- vlib/compiler2/ast/ast.v | 92 ++++++++++++++++++++--------- vlib/compiler2/parser/parser.v | 74 +++++++++++------------ vlib/compiler2/parser/parser_test.v | 39 ++++++++++-- vlib/compiler2/token/token.v | 49 +++++++++++++++ 4 files changed, 180 insertions(+), 74 deletions(-) diff --git a/vlib/compiler2/ast/ast.v b/vlib/compiler2/ast/ast.v index 5960117e5b..9deab68fcb 100644 --- a/vlib/compiler2/ast/ast.v +++ b/vlib/compiler2/ast/ast.v @@ -10,12 +10,13 @@ import ( struct Foo {} -pub type Expr = Foo | IfExpr | BinaryExpr | IntegerExpr +// pub type Expr = Foo | IfExpr | BinaryExpr | IntegerExpr +pub type Expr = Foo | IfExpr | BinaryExpr | ScalarExpr | UnaryExpr -pub struct IntegerExpr { -pub: - val int -} +// pub struct IntegerExpr { +// pub: +// val int +// } /* pub enum Expr { @@ -45,6 +46,24 @@ pub: right Expr } +pub struct ScalarExpr { +pub: + token token.Token + // op BinaryOp + // op token.Token + typ token.Token + val string + left Expr +} + +pub struct UnaryExpr { +pub: + // token token.Token + //op BinaryOp + op token.Token + left Expr +} + struct IfExpr { token token.Token cond Expr @@ -57,29 +76,44 @@ struct ReturnStmt { results []Expr } -enum BinaryOp { - sum - difference - product - quotient - remainder - bitwise_and - bitwise_or - bitwise_xor - left_shift - right_shift - - equality - inequality - less_than - less_than_or_equal - more_than - more_than_or_equal - - in_check - - //These are suffixed with `bool` to prevent conflict with the keyword `or` - and_bool - or_bool +// string representaiton of expr +pub fn (x Expr) str() string { + match x { + BinaryExpr { + return '(${it.left.str()}$it.op.str()${it.right.str()})' + } + ScalarExpr { + return '${it.left.str()}$it.val' + } + UnaryExpr { + return '${it.left.str()}$it.op.str()' + } + else { return '' } + } } +// enum BinaryOp { +// sum +// difference +// product +// quotient +// remainder +// bitwise_and +// bitwise_or +// bitwise_xor +// left_shift +// right_shift + +// equality +// inequality +// less_than +// less_than_or_equal +// more_than +// more_than_or_equal + +// in_check + +// //These are suffixed with `bool` to prevent conflict with the keyword `or` +// and_bool +// or_bool +// } diff --git a/vlib/compiler2/parser/parser.v b/vlib/compiler2/parser/parser.v index d3b403fe45..fe69dc54c3 100644 --- a/vlib/compiler2/parser/parser.v +++ b/vlib/compiler2/parser/parser.v @@ -24,7 +24,8 @@ pub fn parse_expr(text string) ast.Expr { tok: res.tok lit: res.lit } - return p.expr() + // return p.expr() + return p.expr(token.lowest_prec) } fn (p mut Parser) next() { @@ -34,48 +35,43 @@ fn (p mut Parser) next() { p.lit = res.lit } -fn (p mut Parser) expr() ast.Expr { - //println('\n\nexpr()') - mut node := p.term() - for p.tok == .plus || p.tok == .minus { - op := p.tok - p.next() - node = ast.BinaryExpr { - left: node - op: op - right: p.term() +// Implementation of Pratt Precedence +pub fn (p mut Parser) expr(rbp int) ast.Expr { + // null denotation (prefix) + tok := p.tok + lit := p.lit + p.next() + mut left := ast.Expr{} + match tok { + .lpar { + left = p.expr(0) + if p.tok != .rpar { + panic("Parse Error: expected )") + } + p.next() + } + else { + // TODO: fix bug. note odd conditon instead of else if (same below) + if tok.is_scalar() { + left = ast.ScalarExpr{val: lit, typ: tok} + } + if !tok.is_scalar() && tok.is_unary() { + left = ast.UnaryExpr{left: p.expr(token.highest_prec), op: tok} + } } } - return node -} -fn (p mut Parser) term() ast.Expr { - mut node := p.factor() - for p.tok == .mul || p.tok == .div || p.tok == .mod { - op := p.tok + // left binding power + for rbp < p.tok.precedence() { + tok2 := p.tok p.next() - node = ast.BinaryExpr { - left: node - op: op - right: p.factor() + // left denotation (infix) + if tok2.is_right_assoc() { + left = ast.BinaryExpr{left: left, op: tok2, right: p.expr(tok2.precedence() - 1)} + } + if !tok2.is_right_assoc() && tok2.is_left_assoc() { + left = ast.BinaryExpr{left: left, op: tok2, right: p.expr(tok2.precedence())} } } - return node - //return ast.BinaryExpr{} - //return ast.Expr.Binary(ast.BinaryExpr{}) + return left } - -fn (p mut Parser) factor() ast.Expr { - if p.tok == .number { - val := p.lit.int() - p.next() - return ast.IntegerExpr { val: val } - } else { - println('bad factor token') - println(p.tok) - exit(1) - } -} - - - diff --git a/vlib/compiler2/parser/parser_test.v b/vlib/compiler2/parser/parser_test.v index 2d5231692d..72d5d0ff57 100644 --- a/vlib/compiler2/parser/parser_test.v +++ b/vlib/compiler2/parser/parser_test.v @@ -5,19 +5,37 @@ import compiler2.ast fn test_parser() { //expr := ast.IntegerExpr {val:10} //expr := ast.BinaryExpr{} + + // print using walk expr := parse_expr('3 + 7') walk(expr) - println('') + println('\n') + + text_expr := [ + '4 + 4', + '1 + 2 * 5', + '(2 * 3) / 2', + '3 + (7 * 6)', + '2 ^ 8 * (7 * 6)', + '(2) + (17*2-30) * (5)+2 - (8/2)*4' + ] + for s in text_expr { + // print using str method + x := parse_expr(s) + println('source: $s') + println('parsed: $x') + println('===================') + } } + fn walk(node ast.Expr) { //println('walk()') match node { - ast.IntegerExpr { - print(it.val) - } ast.BinaryExpr { + print(' (') walk(it.left) + // print('$it.op.str()') match it.op { .plus { print(' + ') @@ -29,7 +47,16 @@ fn walk(node ast.Expr) { } walk(it.right) + print(') ') } - else {} + ast.ScalarExpr { + walk(it.left) + print(' $it.val ') + } + ast.UnaryExpr { + walk(it.left) + print(' $it.op ') + } + else { } } -} +} \ No newline at end of file diff --git a/vlib/compiler2/token/token.v b/vlib/compiler2/token/token.v index ab6446b58c..5946ad91f3 100644 --- a/vlib/compiler2/token/token.v +++ b/vlib/compiler2/token/token.v @@ -303,3 +303,52 @@ pub fn (t Token) str() string { return token_str[int(t)] } + +// Representation of highest and lowest precedence +const ( + lowest_prec = 0 + highest_prec = 7 +) + +// Precedence returns a tokens precedence if defined, otherwise lowest_prec +pub fn (tok Token) precedence() int { + match tok { + .plus, .minus { return 4 } + .mul, .div { return 4 } + .xor { return 6 } + .mod {return 7 } + else { return lowest_prec } + } +} + +// is_scalar returns true if the token is a scalar +pub fn (tok Token) is_scalar() bool { + match tok { + .number { return true } + else { return false } + } +} + +// is_unary returns true if the token can be in a unary expression +pub fn (tok Token) is_unary() bool { + match tok { + .plus, .minus { return true } + else { return false } + } +} + +// is_left_assoc returns true if the token is left associative +pub fn (tok Token) is_left_assoc() bool { + match tok { + .number, .plus, .minus, .mul, .div, .mod { return true } + else { return false } + } +} + +// is_right_assoc returns true if the token is right associative +pub fn (tok Token) is_right_assoc() bool { + match tok { + .xor { return true } + else { return false } + } +}