1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

new AST built with sum types

This commit is contained in:
Alexander Medvednikov 2019-12-22 04:34:37 +03:00
parent 803ded3dec
commit ca284482cb
15 changed files with 1521 additions and 38 deletions

View File

@ -47,8 +47,12 @@ fn (p mut Parser) bool_expression() string {
p.error('expr() returns empty type') p.error('expr() returns empty type')
} }
if expected != typ && expected in p.table.sum_types { // TODO perf if expected != typ && expected in p.table.sum_types { // TODO perf
p.cgen.set_placeholder(start_ph, '/*KUK*/($expected) { .obj = ($typ[]) { ') p.cgen.set_placeholder(start_ph,
p.gen('}, .typ = 1}')//${val}_type }') //'/*SUM TYPE CAST*/($expected) { .obj = &($typ[]) { ')
'/*SUM TYPE CAST*/($expected) { .obj = memdup(& ')
tt := typ.all_after('_') // TODO
//p.gen('}, .typ = SumType_${tt} }')//${val}_type }')
p.gen(', sizeof($typ) ), .typ = SumType_${tt} }')//${val}_type }')
} }
return typ return typ
@ -369,7 +373,7 @@ fn (p mut Parser) name_expr() string {
//println(q) //println(q)
//println(q[idx]) //println(q[idx])
arg_type := q[idx] arg_type := q[idx]
p.gen('($enum_type.name) { .obj = ($arg_type[]) { ') p.gen('($enum_type.name) { .obj = ($arg_type[]) { ')
p.bool_expression() p.bool_expression()
p.check(.rpar) p.check(.rpar)
p.gen('}, .typ = ${val}_type }') p.gen('}, .typ = ${val}_type }')

View File

@ -220,7 +220,7 @@ fn (p mut Parser) fn_decl() {
mut f := Fn{ mut f := Fn{
mod: p.mod mod: p.mod
is_public: is_pub || p.is_vh // functions defined in .vh are always public is_public: is_pub || p.is_vh // functions defined in .vh are always public
is_unsafe: p.attr == 'unsafe_fn' is_unsafe: p.attr == 'unsafe_fn'
is_deprecated: p.attr == 'deprecated' is_deprecated: p.attr == 'deprecated'
comptime_define: if p.attr.starts_with('if ') { p.attr[3..] } else { '' } comptime_define: if p.attr.starts_with('if ') { p.attr[3..] } else { '' }
@ -799,7 +799,7 @@ fn (p mut Parser) fn_call(f mut Fn, method_ph int, receiver_var, receiver_type s
if f.is_method { if f.is_method {
receiver := f.args.first() receiver := f.args.first()
mut receiver_is_interface := false mut receiver_is_interface := false
if receiver.typ.ends_with('er') { if receiver.typ.ends_with('er') || receiver.typ[0] == `I` {
// I absolutely love this syntax // I absolutely love this syntax
// `s.speak()` => // `s.speak()` =>
// `((void (*)())(Speaker_name_table[s._interface_idx][1]))(s._object); // `((void (*)())(Speaker_name_table[s._interface_idx][1]))(s._object);
@ -893,7 +893,7 @@ fn (p mut Parser) fn_args(f mut Fn) {
typ: typ typ: typ
is_arg: true is_arg: true
// is_mut: is_mut // is_mut: is_mut
line_nr: p.scanner.line_nr line_nr: p.scanner.line_nr
token_idx: p.cur_tok_index() token_idx: p.cur_tok_index()
} }
@ -1083,7 +1083,7 @@ fn (p mut Parser) fn_call_args(f mut Fn, generic_param_types []string) {
// fn run(r Animal) { ... } // fn run(r Animal) { ... }
// `run(dog)` adds `Dog` to the `Animal` interface. // `run(dog)` adds `Dog` to the `Animal` interface.
// This is needed to generate an interface table. // This is needed to generate an interface table.
if arg.typ.ends_with('er') { if arg.typ.ends_with('er') || arg.typ[0] == `I` {
t := p.table.find_type(arg.typ) t := p.table.find_type(arg.typ)
if t.cat == .interface_ { if t.cat == .interface_ {
// perform((Speaker) { ._object = &dog, // perform((Speaker) { ._object = &dog,

View File

@ -16,6 +16,9 @@ fn (p mut Parser) match_statement(is_expr bool) string {
if typ.starts_with('array_') { if typ.starts_with('array_') {
p.error('arrays cannot be compared') p.error('arrays cannot be compared')
} }
is_sum_type := typ in p.table.sum_types
mut sum_child_type := ''
// is it safe to use p.cgen.insert_before ??? // is it safe to use p.cgen.insert_before ???
tmp_var := p.get_tmp() tmp_var := p.get_tmp()
p.cgen.insert_before('$typ $tmp_var = $expr;') p.cgen.insert_before('$typ $tmp_var = $expr;')
@ -111,6 +114,7 @@ fn (p mut Parser) match_statement(is_expr bool) string {
} }
ph := p.cgen.add_placeholder() ph := p.cgen.add_placeholder()
// Multiple checks separated by comma // Multiple checks separated by comma
p.open_scope()
mut got_comma := false mut got_comma := false
for { for {
if got_comma { if got_comma {
@ -121,11 +125,26 @@ fn (p mut Parser) match_statement(is_expr bool) string {
got_string = true got_string = true
p.gen('string_eq($tmp_var, ') p.gen('string_eq($tmp_var, ')
} }
else if is_sum_type {
p.gen('${tmp_var}.typ == ')
}
else { else {
p.gen('$tmp_var == ') p.gen('$tmp_var == ')
} }
p.expected_type = typ p.expected_type = typ
p.check_types(p.bool_expression(), typ) // `match node { ast.BoolExpr { it := node as BoolExpr ... } }`
if is_sum_type {
sum_child_type = p.get_type2().name
tt := sum_child_type.all_after('_')
p.gen('SumType_$tt')
//println('got child $sum_child_type')
p.register_var(Var{
name: 'it'
typ: sum_child_type
})
} else {
p.check_types(p.bool_expression(), typ)
}
p.expected_type = '' p.expected_type = ''
if got_string { if got_string {
p.gen(')') p.gen(')')
@ -169,12 +188,16 @@ fn (p mut Parser) match_statement(is_expr bool) string {
p.fspace() p.fspace()
p.check(.lcbr) p.check(.lcbr)
p.genln('{ ') p.genln('{ ')
if is_sum_type {
p.genln(' $sum_child_type it = *($sum_child_type*)$tmp_var .obj ;')
}
p.statements() p.statements()
all_cases_return = all_cases_return && p.returns all_cases_return = all_cases_return && p.returns
// p.gen(')') // p.gen(')')
} }
i++ i++
p.fgen_nl() p.fgen_nl()
p.close_scope()
} }
p.error('match must be exhaustive') p.error('match must be exhaustive')
// p.returns = false // only get here when no default, so return is not guaranteed // p.returns = false // only get here when no default, so return is not guaranteed
@ -229,12 +252,12 @@ fn (p mut Parser) if_statement(is_expr bool, elif_depth int) string {
name: var_name name: var_name
typ: typ typ: typ
is_mut: false // TODO is_mut: false // TODO
is_used: true // TODO is_used: true // TODO
// is_alloc: p.is_alloc || typ.starts_with('array_') // is_alloc: p.is_alloc || typ.starts_with('array_')
// line_nr: p.tokens[ var_token_idx ].line_nr // line_nr: p.tokens[ var_token_idx ].line_nr
// token_idx: var_token_idx // token_idx: var_token_idx
}) })
p.statements() p.statements()
p.close_scope() p.close_scope()

View File

@ -787,7 +787,7 @@ fn (p mut Parser) type_decl() {
} }
p.check(.key_type) p.check(.key_type)
p.fspace() p.fspace()
name := p.check_name() mut name := p.check_name()
p.fspace() p.fspace()
// V used to have 'type Foo struct', many Go users might use this syntax // V used to have 'type Foo struct', many Go users might use this syntax
if p.tok == .key_struct { if p.tok == .key_struct {
@ -801,6 +801,9 @@ fn (p mut Parser) type_decl() {
// Sum type // Sum type
is_sum := p.tok == .pipe is_sum := p.tok == .pipe
if is_sum { if is_sum {
if !p.builtin_mod && p.mod != 'main' {
name = p.prepend_mod(name)
}
// Register the first child (name we already parsed) // Register the first child (name we already parsed)
/* /*
p.table.register_type(Type{ p.table.register_type(Type{
@ -811,26 +814,21 @@ fn (p mut Parser) type_decl() {
}) })
*/ */
// Register the rest of them // Register the rest of them
mut idx := 0
for p.tok == .pipe { for p.tok == .pipe {
idx++
p.next() p.next()
child := p.check_name() child_type_name := p.check_name()
if p.pass == .main { if p.pass == .main {
// Update the type's parent // Update the type's parent
println('child=$child parent=$name') //println('child=$child_type_name parent=$name')
mut t := p.table.find_type(child) mut t := p.find_type(child_type_name)
if t.name == '' { if t.name == '' {
p.error('unknown type `$child`') p.error('qunknown type `$child_type_name`')
} }
t.parent = name t.parent = name
p.table.rewrite_type(t) p.table.rewrite_type(t)
/* p.cgen.consts << '#define SumType_$child_type_name $idx // DEF2'
p.table.register_type(Type{
parent: name
name: child
mod: p.mod
is_public: is_pub
})
*/
} }
} }
if p.pass == .decl { if p.pass == .decl {
@ -838,7 +836,7 @@ fn (p mut Parser) type_decl() {
println(p.table.sum_types) println(p.table.sum_types)
} }
// Register the actual sum type // Register the actual sum type
println('reging sum $name') //println('registering sum $name')
p.table.register_type(Type{ p.table.register_type(Type{
name: name name: name
mod: p.mod mod: p.mod

View File

@ -15,7 +15,7 @@ const (
error_context_after = 2 // ^^^ same, but after error_context_after = 2 // ^^^ same, but after
) )
struct Scanner { pub struct Scanner {
mut: mut:
file_path string file_path string
text string text string

View File

@ -38,7 +38,7 @@ fn (p mut Parser) struct_decl(generic_param_types []string) {
if !p.builtin_mod && !name[0].is_capital() { if !p.builtin_mod && !name[0].is_capital() {
p.error('mod=$p.mod struct names must be capitalized: use `struct ${name.capitalize()}`') p.error('mod=$p.mod struct names must be capitalized: use `struct ${name.capitalize()}`')
} }
if is_interface && !name.ends_with('er') { if is_interface && !name.ends_with('er') && name[0] != `I` {
p.error('interface names temporarily have to end with `er` (e.g. `Speaker`, `Reader`)') p.error('interface names temporarily have to end with `er` (e.g. `Speaker`, `Reader`)')
} }
mut generic_types := map[string]string mut generic_types := map[string]string

View File

@ -594,9 +594,9 @@ fn (t &Table) find_type(name_ string) Type {
} }
fn (p mut Parser) check_types2(got_, expected_ string, throw bool) bool { fn (p mut Parser) check_types2(got_, expected_ string, throw bool) bool {
if p.fileis('type_test') { //if p.fileis('type_test') {
println('got=$got_ exp=$expected_') //println('got=$got_ exp=$expected_')
} //}
mut got := got_ mut got := got_
mut expected := expected_ mut expected := expected_
// p.log('check types got="$got" exp="$expected" ') // p.log('check types got="$got" exp="$expected" ')
@ -724,18 +724,17 @@ fn (p mut Parser) check_types2(got_, expected_ string, throw bool) bool {
got = got.replace('*', '').replace('ptr', '') got = got.replace('*', '').replace('ptr', '')
if got != expected { if got != expected {
// Interface check // Interface check
if expected.ends_with('er') { if expected.ends_with('er') || expected[0] == `I` {
if p.satisfies_interface(expected, got, throw) { if p.satisfies_interface(expected, got, throw) {
return true return true
} }
} }
// Sum type // Sum type
println(expected)
if expected in p.table.sum_types { if expected in p.table.sum_types {
println('checking sum') //println('checking sum')
child := p.table.find_type(got) child := p.table.find_type(got)
if child.parent == expected { if child.parent == expected {
println('yep $expected') //println('yep $expected')
return true return true
} }
} }

View File

@ -20,9 +20,7 @@ fn test_person_str() {
struct Foo {} struct Foo {}
struct WTF { type Expr = Foo | BoolExpr | BinExpr | UnaryExpr
wtf int
}
struct BoolExpr { struct BoolExpr {
foo int foo int
@ -37,7 +35,6 @@ struct UnaryExpr {
} }
type Expr = Foo | BoolExpr | BinExpr | UnaryExpr
fn handle_expr(e Expr) { fn handle_expr(e Expr) {
@ -47,7 +44,7 @@ fn parse_bool() BoolExpr {
return BoolExpr{} return BoolExpr{}
} }
fn test_sum() { fn test_sum_types() {
b := parse_bool() b := parse_bool()
handle_expr(b) handle_expr(b)
} }

85
vlib/compiler2/ast/ast.v Normal file
View File

@ -0,0 +1,85 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module ast
import (
compiler2.token
)
struct Foo {}
pub type Expr = Foo | IfExpr | BinaryExpr | IntegerExpr
pub struct IntegerExpr {
pub:
val int
}
/*
pub enum Expr {
Binary(BinaryExpr)
If(IfExpr)
Integer(IntegerExpr)
}
*/
pub struct Stmt {
pos int
//end int
}
// A single identifier
struct Ident {
token token.Token
value string
}
pub struct BinaryExpr {
pub:
token token.Token
//op BinaryOp
op token.Token
left Expr
right Expr
}
struct IfExpr {
token token.Token
cond Expr
body []Stmt
else_ []Stmt
}
struct ReturnStmt {
token token.Token // or pos
results []Expr
}
enum BinaryOp {
sum
difference
product
quotient
remainder
bitwise_and
bitwise_or
bitwise_xor
left_shift
right_shift
equality
inequality
less_than
less_than_or_equal
more_than
more_than_or_equal
in_check
//These are suffixed with `bool` to prevent conflict with the keyword `or`
and_bool
or_bool
}

15
vlib/compiler2/fmt/fmt.v Normal file
View File

@ -0,0 +1,15 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module fmt
struct Fmt {
// vfmt fields TODO move to a separate struct
// fmt_out strings.Builder
fmt_lines []string
// fmt_line string
fmt_indent int
fmt_line_empty bool
// fmt_needs_nl bool
}

View File

@ -0,0 +1,81 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module parser
import (
compiler2.scanner
compiler2.ast
compiler2.token
)
struct Parser {
scanner &scanner.Scanner
mut:
tok token.Token
lit string
}
pub fn parse_expr(text string) ast.Expr {
mut s := scanner.new_scanner(text)
res := s.scan()
mut p := Parser{
scanner: s
tok: res.tok
lit: res.lit
}
return p.expr()
}
fn (p mut Parser) next() {
res := p.scanner.scan()
p.tok = res.tok
//println(p.tok.str())
p.lit = res.lit
}
fn (p mut Parser) expr() ast.Expr {
//println('\n\nexpr()')
mut node := p.term()
for p.tok == .plus || p.tok == .minus {
op := p.tok
p.next()
node = ast.BinaryExpr {
left: node
op: op
right: p.term()
}
}
return node
}
fn (p mut Parser) term() ast.Expr {
mut node := p.factor()
for p.tok == .mul || p.tok == .div || p.tok == .mod {
op := p.tok
p.next()
node = ast.BinaryExpr {
left: node
op: op
right: p.factor()
}
}
return node
//return ast.BinaryExpr{}
//return ast.Expr.Binary(ast.BinaryExpr{})
}
fn (p mut Parser) factor() ast.Expr {
if p.tok == .number {
val := p.lit.int()
p.next()
return ast.IntegerExpr { val: val }
} else {
println('bad factor token')
println(p.tok)
exit(1)
}
}

View File

@ -0,0 +1,35 @@
module parser
import compiler2.ast
fn test_parser() {
//expr := ast.IntegerExpr {val:10}
//expr := ast.BinaryExpr{}
expr := parse_expr('3 + 7')
walk(expr)
println('')
}
fn walk(node ast.Expr) {
//println('walk()')
match node {
ast.IntegerExpr {
print(it.val)
}
ast.BinaryExpr {
walk(it.left)
match it.op {
.plus {
print(' + ')
}
.minus {
print(' - ')
}
else {}
}
walk(it.right)
}
else {}
}
}

View File

@ -0,0 +1,911 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module scanner
import (
os
compiler2.token
// strings
)
const (
single_quote = `\'`
double_quote = `"`
error_context_before = 2 // how many lines of source context to print before the pointer line
error_context_after = 2 // ^^^ same, but after
)
pub struct Scanner {
mut:
file_path string
text string
pos int
line_nr int
last_nl_pos int // for calculating column
inside_string bool
inter_start bool // for hacky string interpolation TODO simplify
inter_end bool
debug bool
line_comment string
//prev_tok TokenKind
started bool
fn_name string // needed for @FN
print_line_on_error bool
print_colored_error bool
print_rel_paths_on_error bool
quote byte // which quote is used to denote current string: ' or "
line_ends []int // the positions of source lines ends (i.e. \n signs)
nr_lines int // total number of lines in the source file that were scanned
is_vh bool // Keep newlines
is_fmt bool // Used only for skipping ${} in strings, since we need literal
// string values when generating formatted code.
}
// new scanner from file.
fn new_scanner_file(file_path string) &Scanner {
if !os.exists(file_path) {
verror("$file_path doesn't exist")
}
mut raw_text := os.read_file(file_path)or{
verror('scanner: failed to open $file_path')
return 0
}
// BOM check
if raw_text.len >= 3 {
c_text := raw_text.str
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
// skip three BOM bytes
offset_from_begin := 3
raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
}
}
mut s := new_scanner(raw_text)
//s.init_fmt()
s.file_path = file_path
return s
}
// new scanner from string.
pub fn new_scanner(text string) &Scanner {
return &Scanner{
text: text
print_line_on_error: true
print_colored_error: true
print_rel_paths_on_error: true
}
}
// TODO remove once multiple return values are implemented
pub struct ScanRes {
pub:
tok token.Token
lit string
}
fn scan_res(tok token.Token, lit string) ScanRes {
return ScanRes{
tok,lit}
}
fn (s mut Scanner) ident_name() string {
start := s.pos
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
if !is_name_char(c) && !c.is_digit() {
break
}
}
name := s.text[start..s.pos]
s.pos--
return name
}
fn (s mut Scanner) ident_hex_number() string {
start_pos := s.pos
s.pos += 2 // skip '0x'
for {
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
if !c.is_hex_digit() {
break
}
s.pos++
}
number := s.text[start_pos..s.pos]
s.pos--
return number
}
fn (s mut Scanner) ident_oct_number() string {
start_pos := s.pos
for {
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
if c.is_digit() {
if !c.is_oct_digit() {
s.error('malformed octal constant')
}
}
else {
break
}
s.pos++
}
number := s.text[start_pos..s.pos]
s.pos--
return number
}
fn (s mut Scanner) ident_dec_number() string {
start_pos := s.pos
// scan integer part
for s.pos < s.text.len && s.text[s.pos].is_digit() {
s.pos++
}
// e.g. 1..9
// we just return '1' and don't scan '..9'
if s.expect('..', s.pos) {
number := s.text[start_pos..s.pos]
s.pos--
return number
}
// scan fractional part
if s.pos < s.text.len && s.text[s.pos] == `.` {
s.pos++
for s.pos < s.text.len && s.text[s.pos].is_digit() {
s.pos++
}
if !s.inside_string && s.pos < s.text.len && s.text[s.pos] == `f` {
s.error('no `f` is needed for floats')
}
}
// scan exponential part
mut has_exponential_part := false
if s.expect('e+', s.pos) || s.expect('e-', s.pos) {
exp_start_pos := s.pos += 2
for s.pos < s.text.len && s.text[s.pos].is_digit() {
s.pos++
}
if exp_start_pos == s.pos {
s.error('exponent has no digits')
}
has_exponential_part = true
}
// error check: 1.23.4, 123.e+3.4
if s.pos < s.text.len && s.text[s.pos] == `.` {
if has_exponential_part {
s.error('exponential part should be integer')
}
else {
s.error('too many decimal points in number')
}
}
number := s.text[start_pos..s.pos]
s.pos--
return number
}
fn (s mut Scanner) ident_number() string {
if s.expect('0x', s.pos) {
return s.ident_hex_number()
}
if s.expect('0.', s.pos) || s.expect('0e', s.pos) {
return s.ident_dec_number()
}
if s.text[s.pos] == `0` {
return s.ident_oct_number()
}
return s.ident_dec_number()
}
fn (s mut Scanner) skip_whitespace() {
// if s.is_vh { println('vh') return }
for s.pos < s.text.len && s.text[s.pos].is_white() {
if is_nl(s.text[s.pos]) && s.is_vh {
return
}
// Count \r\n as one line
if is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos - 1) {
s.inc_line_number()
}
s.pos++
}
}
fn (s mut Scanner) end_of_file() ScanRes {
s.pos = s.text.len
s.inc_line_number()
return scan_res(.eof, '')
}
pub fn (s mut Scanner) scan() ScanRes {
// if s.line_comment != '' {
// s.fgenln('// LC "$s.line_comment"')
// s.line_comment = ''
// }
if s.started {
s.pos++
}
s.started = true
if s.pos >= s.text.len {
return s.end_of_file()
}
if !s.inside_string {
s.skip_whitespace()
}
// End of $var, start next string
if s.inter_end {
if s.text[s.pos] == s.quote {
s.inter_end = false
return scan_res(.str, '')
}
s.inter_end = false
return scan_res(.str, s.ident_string())
}
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return s.end_of_file()
}
// handle each char
c := s.text[s.pos]
mut nextc := `\0`
if s.pos + 1 < s.text.len {
nextc = s.text[s.pos + 1]
}
// name or keyword
if is_name_char(c) {
name := s.ident_name()
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
if token.is_key(name) {
return scan_res(token.key_to_token(name), '')
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.inside_string {
if next_char == s.quote {
s.inter_end = true
s.inter_start = false
s.inside_string = false
}
}
// end of `$expr`
// allow `'$a.b'` and `'$a.c()'`
if s.inter_start && next_char != `.` && next_char != `(` {
s.inter_end = true
s.inter_start = false
}
if s.pos == 0 && next_char == ` ` {
// If a single letter name at the start of the file, increment
// Otherwise the scanner would be stuck at s.pos = 0
s.pos++
}
return scan_res(.name, name)
}
// `123`, `.123`
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
num := s.ident_number()
return scan_res(.number, num)
}
// Handle `'$fn()'`
if c == `)` && s.inter_start {
s.inter_end = true
s.inter_start = false
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
if next_char == s.quote {
s.inside_string = false
}
return scan_res(.rpar, '')
}
// all other tokens
match c {
`+` {
if nextc == `+` {
s.pos++
return scan_res(.inc, '')
}
else if nextc == `=` {
s.pos++
return scan_res(.plus_assign, '')
}
return scan_res(.plus, '')
}
`-` {
if nextc == `-` {
s.pos++
return scan_res(.dec, '')
}
else if nextc == `=` {
s.pos++
return scan_res(.minus_assign, '')
}
return scan_res(.minus, '')
}
`*` {
if nextc == `=` {
s.pos++
return scan_res(.mult_assign, '')
}
return scan_res(.mul, '')
}
`^` {
if nextc == `=` {
s.pos++
return scan_res(.xor_assign, '')
}
return scan_res(.xor, '')
}
`%` {
if nextc == `=` {
s.pos++
return scan_res(.mod_assign, '')
}
return scan_res(.mod, '')
}
`?` {
return scan_res(.question, '')
}
single_quote, double_quote {
return scan_res(.str, s.ident_string())
}
`\`` {
// ` // apostrophe balance comment. do not remove
return scan_res(.chartoken, s.ident_char())
}
`(` {
return scan_res(.lpar, '')
}
`)` {
return scan_res(.rpar, '')
}
`[` {
return scan_res(.lsbr, '')
}
`]` {
return scan_res(.rsbr, '')
}
`{` {
// Skip { in `${` in strings
if s.inside_string {
return s.scan()
}
return scan_res(.lcbr, '')
}
`$` {
if s.inside_string {
return scan_res(.str_dollar, '')
}
else {
return scan_res(.dollar, '')
}
}
`}` {
// s = `hello $name !`
// s = `hello ${name} !`
if s.inside_string {
s.pos++
if s.text[s.pos] == s.quote {
s.inside_string = false
return scan_res(.str, '')
}
return scan_res(.str, s.ident_string())
}
else {
return scan_res(.rcbr, '')
}
}
`&` {
if nextc == `=` {
s.pos++
return scan_res(.and_assign, '')
}
if nextc == `&` {
s.pos++
return scan_res(.and, '')
}
return scan_res(.amp, '')
}
`|` {
if nextc == `|` {
s.pos++
return scan_res(.logical_or, '')
}
if nextc == `=` {
s.pos++
return scan_res(.or_assign, '')
}
return scan_res(.pipe, '')
}
`,` {
return scan_res(.comma, '')
}
`@` {
s.pos++
name := s.ident_name()
// @FN => will be substituted with the name of the current V function
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
// This allows things like this:
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' {
return scan_res(.str, s.fn_name)
}
if name == 'FILE' {
return scan_res(.str, cescaped_path(os.realpath(s.file_path)))
}
if name == 'LINE' {
return scan_res(.str, (s.line_nr + 1).str())
}
if name == 'COLUMN' {
return scan_res(.str, (s.current_column()).str())
}
if name == 'VHASH' {
return scan_res(.str, vhash())
}
if !token.is_key(name) {
s.error('@ must be used before keywords (e.g. `@type string`)')
}
return scan_res(.name, name)
}
/*
case `\r`:
if nextc == `\n` {
s.pos++
s.last_nl_pos = s.pos
return scan_res(.nl, '')
}
}
case `\n`:
s.last_nl_pos = s.pos
return scan_res(.nl, '')
}
*/
`.` {
if nextc == `.` {
s.pos++
if s.text[s.pos + 1] == `.` {
s.pos++
return scan_res(.ellipsis, '')
}
return scan_res(.dotdot, '')
}
return scan_res(.dot, '')
}
`#` {
start := s.pos + 1
s.ignore_line()
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text[start + 1..s.pos].trim_space()
// s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
hash := s.text[start..s.pos]
return scan_res(.hash, hash.trim_space())
}
`>` {
if nextc == `=` {
s.pos++
return scan_res(.ge, '')
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(.righ_shift_assign, '')
}
s.pos++
return scan_res(.righ_shift, '')
}
else {
return scan_res(.gt, '')
}
}
0xE2 {
// case `≠`:
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
s.pos += 2
return scan_res(.ne, '')
}
// ⩽
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return scan_res(.le, '')
}
// ⩾
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return scan_res(.ge, '')
}
}
`<` {
if nextc == `=` {
s.pos++
return scan_res(.le, '')
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(.left_shift_assign, '')
}
s.pos++
return scan_res(.left_shift, '')
}
else {
return scan_res(.lt, '')
}
}
`=` {
if nextc == `=` {
s.pos++
return scan_res(.eq, '')
}
else if nextc == `>` {
s.pos++
return scan_res(.arrow, '')
}
else {
return scan_res(.assign, '')
}
}
`:` {
if nextc == `=` {
s.pos++
return scan_res(.decl_assign, '')
}
else {
return scan_res(.colon, '')
}
}
`;` {
return scan_res(.semicolon, '')
}
`!` {
if nextc == `=` {
s.pos++
return scan_res(.ne, '')
}
else {
return scan_res(.not, '')
}
}
`~` {
return scan_res(.bit_not, '')
}
`/` {
if nextc == `=` {
s.pos++
return scan_res(.div_assign, '')
}
if nextc == `/` {
start := s.pos + 1
s.ignore_line()
s.line_comment = s.text[start + 1..s.pos]
s.line_comment = s.line_comment.trim_space()
if s.is_fmt {
s.pos-- // fix line_nr, \n was read, and the comment is marked on the next line
s.line_nr--
return scan_res(.line_comment, s.line_comment)
}
// s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
// Skip the comment (return the next token)
return s.scan()
}
// Multiline comments
if nextc == `*` {
start := s.pos
mut nest_count := 1
// Skip comment
for nest_count > 0 {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.inc_line_number()
continue
}
if s.expect('/*', s.pos) {
nest_count++
continue
}
if s.expect('*/', s.pos) {
nest_count--
}
}
s.pos++
end := s.pos + 1
comment := s.text[start..end]
if s.is_fmt {
s.line_comment = comment
return scan_res(.mline_comment, s.line_comment)
}
// Skip if not in fmt mode
return s.scan()
}
return scan_res(.div, '')
}
else {
}}
$if windows {
if c == `\0` {
return s.end_of_file()
}
}
s.error('invalid character `${c.str()}`')
return s.end_of_file()
}
fn (s &Scanner) current_column() int {
return s.pos - s.last_nl_pos
}
fn (s Scanner) count_symbol_before(p int, sym byte) int {
mut count := 0
for i := p; i >= 0; i-- {
if s.text[i] != sym {
break
}
count++
}
return count
}
fn (s mut Scanner) ident_string() string {
q := s.text[s.pos]
is_quote := q == single_quote || q == double_quote
is_raw := is_quote && s.text[s.pos - 1] == `r`
if is_quote && !s.inside_string {
s.quote = q
}
// if s.file_path.contains('string_test') {
// println('\nident_string() at char=${s.text[s.pos].str()}')
// println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
// }
mut start := s.pos
s.inside_string = false
slash := `\\`
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
prevc := s.text[s.pos - 1]
// end of string
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
// handle '123\\' slash at the end
break
}
if c == `\n` {
s.inc_line_number()
}
// Don't allow \0
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == slash {
if s.pos < s.text.len - 1 && s.text[s.pos + 1].is_digit() {
}
else {
s.error('0 character in a string literal')
}
}
// Don't allow \x00
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
s.error('0 character in a string literal')
}
// ${var} (ignore in vfmt mode)
if c == `{` && prevc == `$` && !is_raw && !s.is_fmt && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
s.inside_string = true
// so that s.pos points to $ at the next step
s.pos -= 2
break
}
// $var
if is_name_char(c) && prevc == `$` && !s.is_fmt && !is_raw && s.count_symbol_before(s.pos - 2, slash) % 2 == 0 {
s.inside_string = true
s.inter_start = true
s.pos -= 2
break
}
}
mut lit := ''
if s.text[start] == s.quote {
start++
}
mut end := s.pos
if s.inside_string {
end++
}
if start > s.pos {
}
else {
lit = s.text[start..end]
}
return lit
}
fn (s mut Scanner) ident_char() string {
start := s.pos
slash := `\\`
mut len := 0
for {
s.pos++
if s.pos >= s.text.len {
break
}
if s.text[s.pos] != slash {
len++
}
double_slash := s.expect('\\\\', s.pos - 2)
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
// ` // apostrophe balance comment. do not remove
if double_slash {
len++
}
break
}
}
len--
c := s.text[start + 1..s.pos]
if len != 1 {
u := c.ustring()
if u.len != 1 {
s.error('invalid character literal (more than one character)\n' + 'use quotes for strings, backticks for characters')
}
}
if c == '\\`' {
return '`'
}
// Escapes a `'` character
return if c == "\'" { '\\' + c } else { c }
}
fn (s &Scanner) expect(want string, start_pos int) bool {
end_pos := start_pos + want.len
if start_pos < 0 || start_pos >= s.text.len {
return false
}
if end_pos < 0 || end_pos > s.text.len {
return false
}
for pos in start_pos .. end_pos {
if s.text[pos] != want[pos - start_pos] {
return false
}
}
return true
}
fn (s mut Scanner) debug_tokens() {
s.pos = 0
s.started = false
s.debug = true
fname := s.file_path.all_after(os.path_separator)
println('\n===DEBUG TOKENS $fname===')
for {
res := s.scan()
tok := res.tok
lit := res.lit
print(tok.str())
if lit != '' {
println(' `$lit`')
}
else {
println('')
}
if tok == .eof {
println('============ END OF DEBUG TOKENS ==================')
break
}
}
}
fn (s mut Scanner) ignore_line() {
s.eat_to_end_of_line()
s.inc_line_number()
}
fn (s mut Scanner) eat_to_end_of_line() {
for s.pos < s.text.len && s.text[s.pos] != `\n` {
s.pos++
}
}
fn (s mut Scanner) inc_line_number() {
s.last_nl_pos = s.pos
s.line_nr++
s.line_ends << s.pos
if s.line_nr > s.nr_lines {
s.nr_lines = s.line_nr
}
}
fn (s Scanner) line(n int) string {
mut res := ''
if n >= 0 && n < s.line_ends.len {
nline_start := if n == 0 { 0 } else { s.line_ends[n - 1] }
nline_end := s.line_ends[n]
if nline_start <= nline_end {
res = s.text[nline_start..nline_end]
}
}
return res.trim_right('\r\n').trim_left('\r\n')
}
fn is_name_char(c byte) bool {
return c == `_` || c.is_letter()
}
[inline]
fn is_nl(c byte) bool {
return c == `\r` || c == `\n`
}
fn contains_capital(s string) bool {
for c in s {
if c >= `A` && c <= `Z` {
return true
}
}
return false
}
// HTTPRequest bad
// HttpRequest good
fn good_type_name(s string) bool {
if s.len < 4 {
return true
}
for i in 2 .. s.len {
if s[i].is_capital() && s[i - 1].is_capital() && s[i - 2].is_capital() {
return false
}
}
return true
}
// registration_date good
// registrationdate bad
fn (s &Scanner) validate_var_name(name string) {
if name.len > 15 && !name.contains('_') {
s.error('bad variable name `$name`\n' + 'looks like you have a multi-word name without separating them with `_`' + '\nfor example, use `registration_date` instead of `registrationdate` ')
}
}
pub fn (s &Scanner) error(msg string) {
println('$s.line_nr : $msg')
exit(1)
}
pub fn verror(s string) {
println('V error: $s')
os.flush_stdout()
exit(1)
}
pub fn vhash() string {
mut buf := [50]byte
buf[0] = 0
C.snprintf(charptr(buf), 50, '%s', C.V_COMMIT_HASH)
return tos_clone(buf)
}
pub fn cescaped_path(s string) string {
return s.replace('\\', '\\\\')
}

View File

@ -0,0 +1,30 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module scanner
import (
compiler2.token
)
fn test_scan() {
text := 'println(2 + 3)'
mut scanner := new_scanner(text)
mut tokens := []token.Token
for {
res := scanner.scan()
if res.tok == .eof {
break
}
tokens << res.tok
}
assert tokens.len == 6
assert tokens[0] == .name
assert tokens[1] == .lpar
assert tokens[2] == .number
assert tokens[3] == .plus
assert tokens[4] == .number
assert tokens[5] == .rpar
}

View File

@ -0,0 +1,305 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module token
/*
struct Token {
tok TokenKind // the token number/enum; for quick comparisons
lit string // literal representation of the token
line_nr int // the line number in the source where the token occured
//name_idx int // name table index for O(1) lookup
pos int // the position of the token in scanner text
}
*/
pub enum Token {
eof
name // user
number // 123
str // 'foo'
str_inter // 'name=$user.name'
chartoken // `A`
plus
minus
mul
div
mod
xor // ^
pipe // |
inc // ++
dec // --
and // &&
logical_or
not
bit_not
question
comma
semicolon
colon
arrow // =>
amp
hash
dollar
str_dollar
left_shift
righ_shift
// at // @
assign // =
decl_assign // :=
plus_assign // +=
minus_assign // -=
div_assign
mult_assign
xor_assign
mod_assign
or_assign
and_assign
righ_shift_assign
left_shift_assign
// {} () []
lcbr
rcbr
lpar
rpar
lsbr
rsbr
// == != <= < >= >
eq
ne
gt
lt
ge
le
// comments
line_comment
mline_comment
nl
dot
dotdot
ellipsis
// keywords
keyword_beg
key_as
key_asm
key_assert
key_atomic
key_break
key_const
key_continue
key_defer
key_else
key_embed
key_enum
key_false
key_for
key_fn
key_global
key_go
key_goto
key_if
key_import
key_import_const
key_in
key_interface
// key_it
key_match
key_module
key_mut
key_none
key_return
key_select
key_sizeof
key_offsetof
key_struct
key_switch
key_true
key_type
// typeof
key_orelse
key_union
key_pub
key_static
key_unsafe
keyword_end
}
const (
assign_tokens = [Token.assign, .plus_assign, .minus_assign, .mult_assign,
.div_assign, .xor_assign, .mod_assign, .or_assign, .and_assign,
.righ_shift_assign, .left_shift_assign]
nr_tokens = 141
)
// build_keys genereates a map with keywords' string values:
// Keywords['return'] == .key_return
fn build_keys() map[string]int {
mut res := map[string]int
for t := int(Token.keyword_beg) + 1; t < int(Token.keyword_end); t++ {
key := token_str[t]
res[key] = t
}
return res
}
// TODO remove once we have `enum Token { name('name') if('if') ... }`
fn build_token_str() []string {
mut s := [''].repeat(nr_tokens)
s[Token.keyword_beg] = ''
s[Token.keyword_end] = ''
s[Token.eof] = 'eof'
s[Token.name] = 'name'
s[Token.number] = 'number'
s[Token.str] = 'STR'
s[Token.chartoken] = 'char'
s[Token.plus] = '+'
s[Token.minus] = '-'
s[Token.mul] = '*'
s[Token.div] = '/'
s[Token.mod] = '%'
s[Token.xor] = '^'
s[Token.bit_not] = '~'
s[Token.pipe] = '|'
s[Token.hash] = '#'
s[Token.amp] = '&'
s[Token.inc] = '++'
s[Token.dec] = '--'
s[Token.and] = '&&'
s[Token.logical_or] = '||'
s[Token.not] = '!'
s[Token.dot] = '.'
s[Token.dotdot] = '..'
s[Token.ellipsis] = '...'
s[Token.comma] = ','
// s[Token.at] = '@'
s[Token.semicolon] = ';'
s[Token.colon] = ':'
s[Token.arrow] = '=>'
s[Token.assign] = '='
s[Token.decl_assign] = ':='
s[Token.plus_assign] = '+='
s[Token.minus_assign] = '-='
s[Token.mult_assign] = '*='
s[Token.div_assign] = '/='
s[Token.xor_assign] = '^='
s[Token.mod_assign] = '%='
s[Token.or_assign] = '|='
s[Token.and_assign] = '&='
s[Token.righ_shift_assign] = '>>='
s[Token.left_shift_assign] = '<<='
s[Token.lcbr] = '{'
s[Token.rcbr] = '}'
s[Token.lpar] = '('
s[Token.rpar] = ')'
s[Token.lsbr] = '['
s[Token.rsbr] = ']'
s[Token.eq] = '=='
s[Token.ne] = '!='
s[Token.gt] = '>'
s[Token.lt] = '<'
s[Token.ge] = '>='
s[Token.le] = '<='
s[Token.question] = '?'
s[Token.left_shift] = '<<'
s[Token.righ_shift] = '>>'
s[Token.line_comment] = '// line comment'
s[Token.mline_comment] = '/* mline comment */'
s[Token.nl] = 'NLL'
s[Token.dollar] = '$'
s[Token.str_dollar] = '$2'
s[Token.key_assert] = 'assert'
s[Token.key_struct] = 'struct'
s[Token.key_if] = 'if'
// s[Token.key_it] = 'it'
s[Token.key_else] = 'else'
s[Token.key_asm] = 'asm'
s[Token.key_return] = 'return'
s[Token.key_module] = 'module'
s[Token.key_sizeof] = 'sizeof'
s[Token.key_go] = 'go'
s[Token.key_goto] = 'goto'
s[Token.key_const] = 'const'
s[Token.key_mut] = 'mut'
s[Token.key_type] = 'type'
s[Token.key_for] = 'for'
s[Token.key_switch] = 'switch'
s[Token.key_fn] = 'fn'
s[Token.key_true] = 'true'
s[Token.key_false] = 'false'
s[Token.key_continue] = 'continue'
s[Token.key_break] = 'break'
s[Token.key_import] = 'import'
s[Token.key_embed] = 'embed'
s[Token.key_unsafe] = 'unsafe'
// Tokens[key_typeof] = 'typeof'
s[Token.key_enum] = 'enum'
s[Token.key_interface] = 'interface'
s[Token.key_pub] = 'pub'
s[Token.key_import_const] = 'import_const'
s[Token.key_in] = 'in'
s[Token.key_atomic] = 'atomic'
s[Token.key_orelse] = 'or'
s[Token.key_global] = '__global'
s[Token.key_union] = 'union'
s[Token.key_static] = 'static'
s[Token.key_as] = 'as'
s[Token.key_defer] = 'defer'
s[Token.key_match] = 'match'
s[Token.key_select] = 'select'
s[Token.key_none] = 'none'
s[Token.key_offsetof] = '__offsetof'
return s
}
const (
token_str = build_token_str()
keywords = build_keys()
)
pub fn key_to_token(key string) Token {
a := Token(keywords[key])
return a
}
pub fn is_key(key string) bool {
return int(key_to_token(key)) > 0
}
pub fn is_decl(t Token) bool {
return t in [.key_enum,
.key_interface, .key_fn, .key_struct, .key_type, .key_const, .key_import_const,
.key_pub, .eof]
}
fn (t Token) is_assign() bool {
return t in assign_tokens
}
fn (t []Token) contains(val Token) bool {
for tt in t {
if tt == val {
return true
}
}
return false
}
pub fn (t Token) str() string {
lit := 't.lit'
if t == .number {
return lit
}
if t == .chartoken {
return '`lit`'
}
if t == .str {
return "'lit'"
}
if t < .plus {
return lit // string, number etc
}
return token_str[int(t)]
}