1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

V 0.0.12 open-source release

This commit is contained in:
Alexander Medvednikov
2019-06-22 20:20:28 +02:00
commit d32e538073
43 changed files with 12573 additions and 0 deletions

290
compiler/cgen.v Normal file
View File

@ -0,0 +1,290 @@
module main
struct CGen {
out os.File
out_path string
typedefs []string
type_aliases []string
includes []string
types []string
thread_args []string
thread_fns []string
consts []string
fns []string
so_fns []string
consts_init []string
// tmp_lines []string
// tmp_lines_pos int
lines []string
is_user bool
mut:
run Pass
nogen bool
tmp_line string
cur_line string
prev_line string
is_tmp bool
fn_main string
stash string
// st_start_pos int
}
fn new_cgen(out_name_c string) *CGen {
// println('NEW CGENN($out_name_c)')
// println('$LANG_TMP/$out_name_c')
gen := &CGen {
out_path: '$TmpPath/$out_name_c'
out: os.create_file('$TmpPath/$out_name_c')
}
for i := 0; i < 10; i++ {
// gen.tmp_lines.push('')
}
return gen
}
fn (g mut CGen) genln(s string) {
if g.nogen || g.run == RUN_DECLS {
return
}
if g.is_tmp {
// if g.tmp_lines_pos > 0 {
g.tmp_line = '$g.tmp_line $s\n'
return
}
g.cur_line = '$g.cur_line $s'
if g.cur_line != '' {
g.lines << g.cur_line
g.prev_line = g.cur_line
g.cur_line = ''
}
// g.lines << s
}
fn (g mut CGen) gen(s string) {
// if g.nogen || g.run == RunType.RUN_DECLS {
if g.nogen || g.run == RUN_DECLS {
return
}
if g.is_tmp {
// if g.tmp_lines_pos > 0 {
g.tmp_line = '$g.tmp_line $s'
}
else {
g.cur_line = '$g.cur_line $s'
}
}
fn (g mut CGen) save() {
s := g.lines.join('\n')
g.out.appendln(s)
g.out.close()
// os.system('clang-format -i $g.out_path')
}
fn (g mut CGen) start_tmp() {
if g.is_tmp {
println(g.tmp_line)
os.exit('start_tmp() already started. cur_line="$g.cur_line"')
}
// kg.tmp_lines_pos++
g.tmp_line = ''
// g.tmp_lines[g.tmp_lines_pos] = ''
// g.tmp_lines.set(g.tmp_lines_pos, '')
g.is_tmp = true
}
fn (g mut CGen) end_tmp() string {
g.is_tmp = false
res := g.tmp_line
g.tmp_line = ''
// g.tmp_lines_pos--
// g.tmp_line = g.tmp_lines[g.tmp_lines_pos]
return res
}
fn (g mut CGen) add_placeholder() int {
// g.genln('/*placeholder*/')
// g.genln('')
// return g.lines.len - 1
if g.is_tmp {
return g.tmp_line.len
}
return g.cur_line.len
}
fn (g mut CGen) set_placeholder(pos int, val string) {
if g.nogen {
return
}
// g.lines.set(pos, val)
if g.is_tmp {
left := g.tmp_line.left(pos)
right := g.tmp_line.right(pos)
g.tmp_line = '${left}${val}${right}'
return
}
left := g.cur_line.left(pos)
right := g.cur_line.right(pos)
g.cur_line = '${left}${val}${right}'
// g.genln('')
}
// /////////////////////
fn (g mut CGen) add_placeholder2() int {
if g.is_tmp {
exit('tmp in addp2')
}
g.lines << ''
return g.lines.len - 1
}
fn (g mut CGen) set_placeholder2(pos int, val string) {
if g.nogen {
return
}
if g.is_tmp {
exit('tmp in setp2')
}
g.lines[pos] = val
}
// /////////////////
// fn (g mut CGen) cut_lines_after(pos int) string {
// end := g.lines.len
// lines := g.lines.slice_fast(pos, end)
// body := lines.join('\n')
// g.lines = g.lines.slice_fast(0, pos)
// return body
// }
// fn (g mut CGen) set_prev_line(val string) {
// g.lines.set(g.lines.len - 3, val)
// }
// ////fn (g mut CGen) go_back() {
// ////g.stash = g.prev_line + g.cur_line
// g.lines.set(g.lin
// ////}
// fn (g mut CGen) end_statement() {
// last_lines := g.lines.slice_fast(g.st_start_pos, g.lines.len - 1)
// mut merged := last_lines.join(' ')
// merged += '/* M $last_lines.len */'
// merged = merged.replace('\n', '')
// // zero last N lines instead of deleting them
// for i := g.st_start_pos; i < g.lines.len; i++ {
// g.lines.set(i, '')
// }
// g.lines.set(g.lines.len - 1, merged)
// // g.genln('')
// g.st_start_pos = g.lines.len - 1
// // os.exitkmerged)
// }
// fn (g mut CGen) prepend_type(typ string) {
// g.cur_line = typ.add(g.cur_line)
// g.cur_line='!!!'
// }
fn (g mut CGen) insert_before(val string) {
// g.cur_line = val.add(g.cur_line)
// return
// val += '/*inserted*/'
g.lines.insert(g.lines.len - 1, val)
}
// fn (g mut CGen) swap_last_lines() {
// return
// if g.run == RUN_DECLS {
// return
// }
// i := g.lines.len - 1
// j := i - 1
// tmp := g.lines[i]
// // println('lines i = $tmp')
// // println('lines j = ${g.lines[j]}')
// // // os.exit('')
// g.lines.set(i, g.lines[j])
// g.lines.set(j, tmp)
// }
fn (g mut CGen) register_thread_fn(wrapper_name, wrapper_text, struct_text string) {
for arg in g.thread_args {
if arg.contains(wrapper_name) {
return
}
}
g.thread_args << struct_text
g.thread_args << wrapper_text
}
/*
fn (g mut CGen) delete_all_after(pos int) {
if pos > g.cur_line.len - 1 {
return
}
g.cur_line = g.cur_line.substr(0, pos)
}
*/
fn (c mut V) prof_counters() string {
mut res := []string
// Global fns
for f in c.table.fns {
res << 'double ${c.table.cgen_name(f)}_time;'
// println(f.name)
}
// Methods
for typ in c.table.types {
// println('')
for f in typ.methods {
// res << f.cgen_name()
res << 'double ${c.table.cgen_name(f)}_time;'
// println(f.cgen_name())
}
}
return res.join(';\n')
}
fn (p mut Parser) print_prof_counters() string {
mut res := []string
// Global fns
for f in p.table.fns {
counter := '${p.table.cgen_name(f)}_time'
res << 'if ($counter) printf("%%f : $f.name \\n", $counter);'
// println(f.name)
}
// Methods
for typ in p.table.types {
// println('')
for f in typ.methods {
counter := '${p.table.cgen_name(f)}_time'
res << 'if ($counter) printf("%%f : ${p.table.cgen_name(f)} \\n", $counter);'
// res << 'if ($counter) printf("$f.name : %%f\\n", $counter);'
// res << f.cgen_name()
// res << 'double ${f.cgen_name()}_time;'
// println(f.cgen_name())
}
}
return res.join(';\n')
}
fn (p mut Parser) gen_type(s string) {
if !p.first_run() {
return
}
p.cgen.types << s
}
fn (p mut Parser) gen_typedef(s string) {
if !p.first_run() {
return
}
p.cgen.typedefs << s
}
fn (p mut Parser) gen_type_alias(s string) {
if !p.first_run() {
return
}
p.cgen.type_aliases << s
}
fn (g mut CGen) add_to_main(s string) {
println('add to main')
g.fn_main = g.fn_main + s
}

848
compiler/fn.v Normal file
View File

@ -0,0 +1,848 @@
module main
const (
MaxLocalVars = 50
)
struct Fn {
// addr int
mut:
pkg string
local_vars []Var
var_idx int
args []Var
is_interface bool
// called_fns []string
// idx int
scope_level int
typ string // return type
name string
is_c bool
receiver_typ string
is_private bool
is_method bool
returns_error bool
is_decl bool // type myfn fn(int, int)
defer string
}
fn (f &Fn) find_var(name string) Var {
for i in 0 .. f.var_idx {
if f.local_vars[i].name == name {
return f.local_vars[i]
}
}
return Var{}
}
fn (f mut Fn) open_scope() {
f.scope_level++
}
fn (f mut Fn) close_scope() {
// println('close_scope level=$f.scope_level var_idx=$f.var_idx')
// Move back `var_idx` (pointer to the end of the array) till we reach the previous scope level.
// This effectivly deletes (closes) current scope.
mut i := f.var_idx - 1
for; i >= 0; i-- {
v := f.local_vars[i]
if v.scope_level != f.scope_level {
// println('breaking. "$v.name" v.scope_level=$v.scope_level')
break
}
}
f.var_idx = i + 1
// println('close_scope new var_idx=$f.var_idx\n')
f.scope_level--
}
fn (f &Fn) mark_var_used(v Var) {
for i, vv in f.local_vars {
if vv.name == v.name {
mut ptr := &f.local_vars[i]
ptr.is_used = true
// / f.local_vars[i].is_used = true
// return
}
}
}
fn (f &Fn) known_var(name string) bool {
v := f.find_var(name)
return v.name.len > 0
}
fn (f mut Fn) register_var(v Var) {
new_var := {v | scope_level: f.scope_level}
// Expand the array
if f.var_idx >= f.local_vars.len {
f.local_vars << new_var
}
else {
f.local_vars[f.var_idx] = new_var
f.var_idx++
}
}
// vlib header file?
fn (p mut Parser) is_sig() bool {
return (p.build_mode == DEFAULT_MODE || p.build_mode == BUILD) &&
(p.file_path.contains(TmpPath))
}
fn new_fn(pkg string) *Fn {
mut f := &Fn {
pkg: pkg
local_vars: [Var{}
; MaxLocalVars]
}
return f
}
// Function signatures are added to the top of the .c file in the first run.
fn (p mut Parser) fn_decl() {
p.fgen('fn ')
is_pub := p.tok == PUB
if is_pub {
p.next()
}
p.returns = false
p.next()
mut f := new_fn(p.pkg)
// Method receiver
mut receiver_typ := ''
if p.tok == LPAR {
f.is_method = true
p.check(LPAR)
receiver_name := p.check_name()
is_mut := p.tok == MUT
is_amp := p.tok == AMP
if is_mut || is_amp {
p.next()
}
receiver_typ = p.get_type()
T := p.table.find_type(receiver_typ)
if T.is_interface {
p.error('invalid receiver type `$receiver_typ` (`$receiver_typ` is an interface)')
}
// Don't allow modifying types from a different module
if !p.first_run() && !p.builtin_pkg && T.pkg != p.pkg {
println('T.pkg=$T.pkg')
println('pkg=$p.pkg')
p.error('cannot define new methods on non-local type `$receiver_typ`')
}
// (a *Foo) instead of (a mut Foo) is a common mistake
if !p.builtin_pkg && receiver_typ.contains('*') {
t := receiver_typ.replace('*', '')
p.error('use `($receiver_name mut $t)` instead of `($receiver_name *$t)`')
}
f.receiver_typ = receiver_typ
if is_mut || is_amp {
receiver_typ += '*'
}
p.check(RPAR)
receiver := Var {
name: receiver_name
is_arg: true
typ: receiver_typ
is_mut: is_mut
ref: is_amp
ptr: is_mut
line_nr: p.scanner.line_nr
}
f.args << receiver
f.register_var(receiver)
}
if p.tok == PLUS || p.tok == MINUS || p.tok == MUL {
f.name = p.tok.str()
println('!!! $f.name')
p.next()
}
else {
f.name = p.check_name()
}
// C function header def? (fn C.NSMakeRect(int,int,int,int))
is_c := f.name == 'C' && p.tok == DOT
// Just fn signature? only builtin.v + default build mode
// is_sig := p.builtin_pkg && p.build_mode == DEFAULT_MODE
// is_sig := p.build_mode == DEFAULT_MODE && (p.builtin_pkg || p.file.contains(LANG_TMP))
is_sig := p.is_sig()
// println('\n\nfn decl !!is_sig=$is_sig name=$f.name $p.builtin_pkg')
if is_c {
p.check(DOT)
f.name = p.check_name()
f.is_c = true
}
else if !p.translated && !p.file_path.contains('view.v') {
if contains_capital(f.name) {
p.error('function names cannot contain uppercase letters, use snake_case instead')
}
if f.name.contains('__') {
p.error('function names cannot contain double underscores ("__"), use single underscores instead')
}
}
// simple_name := f.name
// println('!SIMPLE=$simple_name')
// user.register() => User_register()
has_receiver := receiver_typ.len > 0
if receiver_typ != '' {
// f.name = '${receiver_typ}_${f.name}'
}
// full pkg function name
// os.exit ==> os__exit()
if !is_c && !p.builtin_pkg && p.pkg != 'main' && receiver_typ.len == 0 {
f.name = p.prepend_pkg(f.name)
}
if p.first_run() && p.table.known_fn(f.name) && receiver_typ.len == 0 {
existing_fn := p.table.find_fn(f.name)
// This existing function could be defined as C decl before (no body), then we don't need to throw an erro
if !existing_fn.is_decl {
p.error('redefinition of `$f.name`')
}
}
// Generic?
mut is_generic := false
if p.tok == LT {
p.next()
gen_type := p.check_name()
if gen_type != 'T' {
p.error('only `T` is allowed as a generic type for now')
}
p.check(GT)
is_generic = true
}
// Args (...)
p.fn_args(mut f)
// Returns an error?
if p.tok == NOT {
p.next()
f.returns_error = true
}
// Returns a type?
mut typ := 'void'
if p.tok == NAME || p.tok == MUL || p.tok == AMP || p.tok == LSBR ||
p.tok == QUESTION {
p.fgen(' ')
// TODO In
// if p.tok in [ NAME, MUL, AMP, LSBR ] {
typ = p.get_type()
}
// Translated C code can have empty functions (just definitions)
is_fn_header := !is_c && !is_sig && (p.translated || p.is_test) &&
(p.tok != LCBR)// || (p.tok == NAME && p.peek() != LCBR))
if is_fn_header {
f.is_decl = true
// println('GOT fn header $f.name')
}
// { required only in normal function declarations
if !is_c && !is_sig && !is_fn_header {
p.fgen(' ')
p.check(LCBR)
}
// Register option ? type
if typ.starts_with('Option_') {
p.cgen.typedefs << 'typedef Option $typ;'
}
// Register function
f.typ = typ
mut str_args := f.str_args(p.table)
// println('FN DECL $f.name typ=$f.typ str_args="$str_args"')
// Special case for main() args
if f.name == 'main' && !has_receiver {
if str_args != '' {
p.error('fn main must have no arguments and no return values')
}
typ = 'int'
str_args = 'int argc, char** argv'
}
// Only in C code generate User_register() instead of register()
// Internally it's still stored as "register" in type User
// mut fn_name_cgen := f.name
// if receiver_typ != '' {
// fn_name_cgen = '${receiver_typ}_$f.name'
// fn_name_cgen = fn_name_cgen.replace(' ', '')
// fn_name_cgen = fn_name_cgen.replace('*', '')
// }
mut fn_name_cgen := p.table.cgen_name(f)
// Start generation of the function body
is_live := p.is_live && f.name != 'main' && f.name != 'reload_so'
skip_main_in_test := f.name == 'main' && p.is_test
if !is_c && !is_live && !is_sig && !is_fn_header && !skip_main_in_test {
if p.obfuscate {
p.genln('; // ${f.name}')
}
p.genln('$typ $fn_name_cgen($str_args) {')
// if f.name == 'WinMain' {
// typ = 'int'
// }
}
if is_fn_header {
p.genln('$typ $fn_name_cgen($str_args);')
p.fgenln('')
}
if is_c {
p.fgenln('\n')
}
p.cur_fn = f
// Register the method
if receiver_typ != '' {
mut receiver_T := p.table.find_type(receiver_typ)
// No such type yet? It could be defined later. Create a new type.
// struct declaration later will modify it instead of creating a new one.
if p.first_run() && receiver_T.name == '' {
// println('fn decl !!!!!!! REG PH $receiver_typ')
ttyp := Type {
name: receiver_typ.replace('*', '')
pkg: p.pkg
is_placeholder: true
}
p.table.register_type2(ttyp)
}
// f.idx = p.table.fn_cnt
receiver_T.add_method(f)
}
else {
// println('register_fn typ=$typ isg=$is_generic')
p.table.register_fn(f)
}
if is_sig || p.first_run() || is_live || is_fn_header || skip_main_in_test {
// First pass? Skip the body for now [BIG]
if !is_sig && !is_fn_header {
for {
p.next()
if p.tok.is_decl() {
break
}
}
}
// Live code reloading? Load all fns from .so
if is_live && p.first_run() {
// p.cgen.consts_init.push('$fn_name_cgen = dlsym(lib, "$fn_name_cgen");')
p.cgen.so_fns << fn_name_cgen
fn_name_cgen = '(* $fn_name_cgen )'
}
// Actual fn declaration!
mut fn_decl := '$typ $fn_name_cgen($str_args)'
if p.obfuscate {
fn_decl += '; // ${f.name}'
}
// Add function definition to the top
if !is_c && f.name != 'main' && p.first_run() {
// TODO hack to make Volt compile without -embed_vlib
if f.name == 'darwin__nsstring' && p.build_mode == DEFAULT_MODE {
return
}
p.cgen.fns << fn_decl + ';'
}
p.fgenln('\n')// TODO defer this instead of copy pasting
return
}
if f.name == 'main' || f.name == 'WinMain' {
p.genln('init_consts();')
if p.table.imports.contains('os') {
if f.name == 'main' {
p.genln('os__init_os_args(argc, argv);')
}
else if f.name == 'WinMain' {
p.genln('os__parse_windows_cmd_line(pCmdLine);')
}
}
// We are in live code reload mode, call the .so loader in bg
if p.is_live {
p.genln('
load_so("bounce.so");
pthread_t _thread_so;
pthread_create(&_thread_so , NULL, &reload_so, NULL); ')
}
if p.is_test && !p.scanner.file_path.contains('/volt') {
p.error('tests cannot have function `main`')
}
}
// println('is_c=$is_c name=$f.name')
if is_c || is_sig || is_fn_header {
// println('IS SIG RETURNING tok=${p.strtok()}')
p.fgenln('\n')
return
}
// We are in profile mode? Start counting at the beginning of the function (save current time).
if p.is_prof && f.name != 'main' && f.name != 'time__ticks' {
p.genln('double _PROF_START = time__ticks();//$f.name')
cgen_name := p.table.cgen_name(f)
f.defer = ' ${cgen_name}_time += time__ticks() - _PROF_START;'
}
p.statements_no_curly_end()
// Print counting result after all statements in main
if p.is_prof && f.name == 'main' {
p.genln(p.print_prof_counters())
}
// Counting or not, always need to add defer before the end
p.genln(f.defer)
if typ != 'void' && !p.returns && f.name != 'main' && f.name != 'WinMain' {
p.error('$f.name must return "$typ"')
}
// {} closed correctly? scope_level should be 0
if p.pkg == 'main' {
// println(p.cur_fn.scope_level)
}
if p.cur_fn.scope_level > 2 {
// p.error('unclosed {')
}
// Make sure all vars in this function are used (only in main for now)
// if p.builtin_pkg || p.pkg == 'os' ||p.pkg=='http'{
if p.pkg != 'main' {
p.genln('}')
p.fgenln('\n')
return
}
for var in f.local_vars {
if var.name == '' {
break
}
if !var.is_used && !var.is_arg && !p.translated && var.name != '_' {
p.scanner.line_nr = var.line_nr - 1
p.error('`$var.name` declared and not used')
}
// Very basic automatic memory management at the end of the function.
// This is inserted right before the final `}`, so if the object is being returned,
// the free method will not be called.
if p.is_test && var.typ.contains('array_') {
// p.genln('v_${var.typ}_free($var.name); // !!!! XAXA')
// p.genln('free(${var.name}.data); // !!!! XAXA')
}
}
// println('end of func decl')
// p.print_tok()
p.cur_fn = EmptyFn
p.fgenln('\n')
p.genln('}')
}
// Important function with 5 args.
// user.say_hi() => "User_say_hi(user)"
// method_ph - where to insert "user_say_hi("
// receiver_var - "user" (needed for pthreads)
// receiver_type - "User"
fn (p mut Parser) async_fn_call(f Fn, method_ph int, receiver_var, receiver_type string) {
// println('\nfn_call $f.name is_method=$f.is_method receiver_type=$f.receiver_type')
// p.print_tok()
mut thread_name := ''
// Normal function => just its name, method => TYPE_FNNAME
mut fn_name := f.name
if f.is_method {
receiver_type = receiver_type.replace('*', '')
fn_name = '${receiver_type}_${f.name}'
}
// Generate tmp struct with args
arg_struct_name := 'thread_arg_$fn_name'
tmp_struct := p.get_tmp()
p.genln('$arg_struct_name * $tmp_struct = malloc(sizeof($arg_struct_name));')
mut arg_struct := 'typedef struct $arg_struct_name { '
p.next()
p.check(LPAR)
// str_args contains the args for the wrapper function:
// wrapper(arg_struct * arg) { fn("arg->a, arg->b"); }
mut str_args := ''
for i, arg in f.args {
arg_struct += '$arg.typ $arg.name ;'// Add another field (arg) to the tmp struct definition
str_args += 'arg->$arg.name'
if i == 0 && f.is_method {
p.genln('$tmp_struct -> $arg.name = $receiver_var ;')
if i < f.args.len - 1 {
str_args += ','
}
continue
}
// Set the struct values (args)
p.genln('$tmp_struct -> $arg.name = ')
p.expression()
p.genln(';')
if i < f.args.len - 1 {
p.check(COMMA)
str_args += ','
}
}
arg_struct += '} $arg_struct_name ;'
// Also register the wrapper, so we can use the original function without modifying it
fn_name = p.table.cgen_name(f)
wrapper_name := '${fn_name}_thread_wrapper'
wrapper_text := 'void* $wrapper_name($arg_struct_name * arg) {$fn_name( /*f*/$str_args ); }'
p.cgen.register_thread_fn(wrapper_name, wrapper_text, arg_struct)
// Create thread object
tmp_nr := p.get_tmp_counter()
thread_name = '_thread$tmp_nr'
if p.os != WINDOWS {
p.genln('pthread_t $thread_name;')
}
tmp2 := p.get_tmp()
mut parg := 'NULL'
if f.args.len > 0 {
parg = ' $tmp_struct'
}
// Call the wrapper
if p.os == WINDOWS {
p.genln(' CreateThread(0,0, $wrapper_name, $parg, 0,0);')
}
else {
p.genln('int $tmp2 = pthread_create(& $thread_name, NULL, $wrapper_name, $parg);')
}
p.check(RPAR)
}
fn (p mut Parser) fn_call(f Fn, method_ph int, receiver_var, receiver_type string) {
p.calling_c = f.is_c
is_print := p.is_prod &&// Hide prints only in prod
!p.is_test &&
!p.builtin_pkg &&// Allow prints in builtin pkgs
(f.name == 'println' || (f.is_c && f.name == 'printf'))
if !p.cgen.nogen {
p.cgen.nogen = is_print
}
cgen_name := p.table.cgen_name(f)
// if p.is_prof {
// p.cur_fn.called_fns << cgen_name
// }
// Normal function call
if !f.is_method {
p.gen(cgen_name)
p.gen('(')
// p.fgen(f.name)
}
// If we have a method placeholder,
// we need to preappend "method(receiver, ...)"
else {
// C only knows about functions "array_get", "array_set" etc
// TODO I don't need this?
// mut cgen_typ := receiver_type.replace('*', '')
// if cgen_typ.starts_with('array_') {
// cgen_typ = 'array'
// }
// println('METHOD fn_call name=$cgen_name')
// mut method_call := '${cgen_typ}_${cgen_name}('
mut method_call := '${cgen_name}('
// println('GGGG $f.name')
receiver := f.args.first()
if receiver.is_mut && !p.expr_var.is_mut {
println('$method_call recv=$receiver.name recv_mut=$receiver.is_mut')
p.error('`$p.expr_var.name` is immutable')
}
// if receiver is mutable or a ref (&), generate & for the first arg
if receiver.ref || (receiver.is_mut && !receiver_type.contains('*')) {
method_call += '& /* ? */'
}
// generate deref (TODO copy pasta later in fn_call_args)
if !receiver.is_mut && receiver_type.contains('*') {
method_call += '*'
}
mut cast = ''
// Method returns (void*) => cast it to int, string, user etc
// number := *(int*)numbers.first()
if f.typ == 'void*' {
// array_int => int
cast = receiver_type.all_after('_')
cast = '*($cast*) '
}
p.cgen.set_placeholder(method_ph, '$cast $method_call')
}
p.next()
// p.check(LPAR)
p.fn_call_args(f)
p.gen(')')
// p.check(RPAR)
p.calling_c = false
if is_print {
p.cgen.nogen = false
}
// println('end of fn call typ=$f.typ')
}
// for declaration
// return an updated Fn object with args[] field set
fn (p mut Parser) fn_args(f mut Fn) {
p.check(LPAR)
// TODO defer p.check(RPAR)
if f.is_interface {
int_arg := Var {
typ: f.receiver_typ
}
f.args << int_arg
}
// Just register fn arg types
types_only := p.tok == MUL || (p.peek() == COMMA && p.table.known_type(p.lit)) || p.peek() == RPAR// (int, string)
if types_only {
for p.tok != RPAR {
typ := p.get_type()
v := Var {
typ: typ
is_arg: true
// is_mut: is_mut
line_nr: p.scanner.line_nr
}
// f.register_var(v)
f.args << v
if p.tok == COMMA {
p.next()
}
}
}
// (a int, b,c string) syntax
for p.tok != RPAR {
mut names := [
p.check_name()
]
// a,b,c int syntax
for p.tok == COMMA {
p.check(COMMA)
p.fspace()
names << p.check_name()
}
p.fspace()
is_mut := p.tok == MUT
if is_mut {
p.next()
}
mut typ2 := p.get_type()
for name in names {
if !p.first_run() && !p.table.known_type(typ2) {
p.error('fn_args: unknown type $typ2')
}
if is_mut {
// && !typ2.starts_with('array_') {
typ2 += '*'
}
v := Var {
name: name
typ: typ2
is_arg: true
is_mut: is_mut
ptr: is_mut
line_nr: p.scanner.line_nr
}
f.register_var(v)
f.args << v
}
if p.tok == COMMA {
p.next()
}
if p.tok == DOTDOT {
f.args << Var {
name: '..'
}
p.next()
}
}
p.check(RPAR)
}
fn (p mut Parser) fn_call_args(f *Fn) *Fn {
// p.gen('(')
// println('fn_call_args() name=$f.name args.len=$f.args.len')
// C func. # of args is not known
// if f.name.starts_with('c_') {
p.check(LPAR)
if f.is_c {
for p.tok != RPAR {
// debug("LEX before EXP", p.tok)
p.bool_expression()
// debug("LEX AFTER EXP", p.tok)
if p.tok == COMMA {
p.gen(', ')
p.check(COMMA)
// debug("UUUUU C FUNC" + fnName)
// p.g.Gen("C FN " + fnName)
}
}
p.check(RPAR)
// p.gen(')')
return f
}
// Receiver - first arg
for i, arg in f.args {
// println('$i) arg=$arg.name')
// Skip receiver, because it was already generated in the expression
if i == 0 && f.is_method {
if f.args.len > 1 {
p.gen(',')
}
continue
}
// Reached the final vararg? Quit
if i == f.args.len - 1 && arg.name == '..' {
break
}
amp_ph := p.cgen.add_placeholder()
// ) here means not enough args were supplied
if p.tok == RPAR {
str_args := f.str_args(p.table)// TODO this is C args
p.error('not enough arguments in call to `$f.name ($str_args)`')
}
// If `arg` is mutable, the caller needs to provide MUT:
// `arr := [1,2,3]; reverse(mut arr);`
if arg.is_mut {
if p.tok != MUT {
p.error('`$arg.name` is a mutable argument, you need to provide `mut`: `$f.name(...mut a...)`')
}
if p.peek() != NAME {
p.error('`$arg.name` is a mutable argument, you need to provide a variable to modify: `$f.name(... mut a...)`')
}
p.check(MUT)
}
typ := p.bool_expression()
// TODO temporary hack to allow println(777)
if i == 0 && f.name == 'println' && typ != 'string'
&& typ != 'void' {
// If we dont check for void, then V will compile "println(procedure())"
if !p.is_prod {
T := p.table.find_type(typ)
if typ == 'u8' {
p.cgen.set_placeholder(amp_ph, 'u8_str(')
}
else if T.parent == 'int' {
p.cgen.set_placeholder(amp_ph, 'int_str(')
}
else if typ.ends_with('*') {
p.cgen.set_placeholder(amp_ph, 'ptr_str(')
}
else {
// Make sure this type has a `str()` method
if !T.has_method('str') {
p.error('`$typ` needs to have method `str() string` to be printable')
}
p.cgen.set_placeholder(amp_ph, '${typ}_str(')
}
p.gen(')')
}
continue
}
got := typ
expected := arg.typ
// println('fn arg got="$got" exp="$expected"')
if !p.check_types_no_throw(got, expected) {
mut err := 'Fn "$f.name" wrong arg #${i+1}. '
err += 'Expected "$arg.typ" ($arg.name) but got "$typ"'
p.error(err)
}
is_interface := p.table.is_interface(arg.typ)
// Add & or * before arg?
if !is_interface {
// Dereference
if got.contains('*') && !expected.contains('*') {
p.cgen.set_placeholder(amp_ph, '*')
}
// Reference
// TODO ptr hacks. DOOM hacks, fix please.
if !got.contains('*') && expected.contains('*') && got != 'voidptr' {
// println('\ne:"$expected" got:"$got"')
if ! (expected == 'void*' && got == 'int') &&
! (expected == 'byte*' && got.contains(']byte')) &&
! (expected == 'byte*' && got == 'string') {
p.cgen.set_placeholder(amp_ph, '& /*11 EXP:"$expected" GOT:"$got" */')
}
}
}
// interface?
if is_interface {
if !got.contains('*') {
p.cgen.set_placeholder(amp_ph, '&')
}
// Pass all interface methods
interface_type := p.table.find_type(arg.typ)
for method in interface_type.methods {
p.gen(', ${typ}_${method.name} ')
}
}
// Check for commas
if i < f.args.len - 1 {
// Handle 0 args passed to varargs
is_vararg := i == f.args.len - 2 && f.args[i + 1].name == '..'
if p.tok != COMMA && !is_vararg {
p.error('wrong number of arguments for $i,$arg.name fn `$f.name`: expected $f.args.len, but got less')
}
if p.tok == COMMA {
p.fgen(', ')
}
if !is_vararg {
p.next()
p.gen(',')
}
}
}
// varargs
if f.args.len > 0 {
last_arg := f.args.last()
if last_arg.name == '..' {
println('GOT VAR ARGS AFTER')
for p.tok != RPAR {
if p.tok == COMMA {
p.gen(',')
p.check(COMMA)
}
p.bool_expression()
}
}
}
if p.tok == COMMA {
p.error('wrong number of arguments for fn `$f.name`: expected $f.args.len, but got more')
}
p.check(RPAR)
// p.gen(')')
}
fn contains_capital(s string) bool {
// for c in s {
for i := 0; i < s.len; i++ {
c := s[i]
if c >= `A` && c <= `Z` {
return true
}
}
return false
}
// "fn (int, string) int"
fn (f Fn) typ_str() string {
mut sb := new_string_builder(50)
sb.write('fn (')
for i, arg in f.args {
sb.write(arg.typ)
if i < f.args.len - 1 {
sb.write(',')
}
}
sb.write(')')
if f.typ != 'void' {
sb.write(' $f.typ')
}
return sb.str()
}
// f.args => "int a, string b"
fn (f &Fn) str_args(table *Table) string {
mut s := ''
for i, arg in f.args {
// Interfaces are a special case. We need to pass the object + pointers
// to all methods:
// fn handle(r Runner) { =>
// void handle(void *r, void (*Runner_run)(void*)) {
if table.is_interface(arg.typ) {
// First the object (same name as the interface argument)
s += ' void* $arg.name'
// Now all methods
interface_type := table.find_type(arg.typ)
for method in interface_type.methods {
s += ', $method.typ (*${arg.typ}_${method.name})(void*) '
}
}
else if arg.name == '..' {
s += '...'
}
else {
// s += '$arg.typ $arg.name'
s += table.cgen_name_type_pair(arg.name, arg.typ)// '$arg.typ $arg.name'
}
if i < f.args.len - 1 {
s += ', '
}
}
return s
}

158
compiler/jsgen.v Normal file
View File

@ -0,0 +1,158 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
module main
// TODO replace with comptime code generation.
// TODO remove cJSON dependency.
// OLD: User decode_User(string js) {
// now it's
// User decode_User(cJSON* root) {
// User res;
// res.name = decode_string(js_get(root, "name"));
// res.profile = decode_Profile(js_get(root, "profile"));
// return res;
// }
// Codegen json_decode/encode funcs
fn (p mut Parser) gen_json_for_type(typ Type) {
mut dec := ''
mut enc := ''
t := typ.name
if t == 'int' || t == 'string' || t == 'bool' {
return
}
if p.first_run() {
return
}
// println('gen_json_for_type( $typ.name )')
// Register decoder fn
mut dec_fn := Fn {
pkg: p.pkg
typ: 'Option_$typ.name'
name: js_dec_name(t)
}
// Already registered? Skip.
if p.table.known_fn(dec_fn.name) {
return
}
// decode_TYPE funcs receive an actual cJSON* object to decode
// cJSON_Parse(str) call is added by the compiler
arg := Var {
typ: 'cJSON*'
}
dec_fn.args << arg
p.table.register_fn(dec_fn)
// Register encoder fn
mut enc_fn := Fn {
pkg: p.pkg
typ: 'cJSON*'
name: js_enc_name(t)
}
// encode_TYPE funcs receive an object to encode
enc_arg := Var {
typ: t
}
enc_fn.args << enc_arg
p.table.register_fn(enc_fn)
// Code gen decoder
dec += '
//$t $dec_fn.name(cJSON* root) {
Option $dec_fn.name(cJSON* root, $t* res) {
// $t res;
if (!root) {
const char *error_ptr = cJSON_GetErrorPtr();
if (error_ptr != NULL) {
fprintf(stderr, "Error in decode() for $t error_ptr=: %%s\\n", error_ptr);
// printf("\\nbad js=%%s\\n", js.str);
return b_error(tos2(error_ptr));
}
}
'
// Code gen encoder
enc += '
cJSON* $enc_fn.name($t val) {
cJSON *o = cJSON_CreateObject();
string res = tos2("");
'
// Handle arrays
if t.starts_with('array_') {
dec += p.decode_array(t)
enc += p.encode_array(t)
}
// Range through fields
for field in typ.fields {
if field.attr == 'skip' {
continue
}
field_type := p.table.find_type(field.typ)
// Now generate decoders for all field types in this struct
// need to do it here so that these functions are generated first
p.gen_json_for_type(field_type)
name := field.name
_typ := field.typ.replace('*', '')
enc_name := js_enc_name(_typ)
dec_name := js_dec_name(_typ)
if is_js_prim(_typ) {
dec += ' /*prim*/ res->$name = $dec_name(js_get(root, "$field.name"))'
// dec += '.data'
}
else {
dec += ' /*!!*/ $dec_name(js_get(root, "$field.name"), & (res->$name))'
}
dec += ';\n'
enc += ' cJSON_AddItemToObject(o, "$name", $enc_name(val.$name)); \n'
}
// cJSON_delete
p.cgen.fns << '$dec return opt_ok(res); \n}'
p.cgen.fns << '/*enc start*/ $enc return o;}'
}
fn is_js_prim(typ string) bool {
return typ == 'int' || typ == 'string' || typ == 'bool'
}
fn (p mut Parser) decode_array(typ string) string {
typ = typ.replace('array_', '')
t := p.table.find_type(typ)
fn_name := js_dec_name(typ)
// If we have `[]Profile`, have to register a Profile en(de)coder first
p.gen_json_for_type(t)
mut s := ''
if is_js_prim(typ) {
s = '$typ val= $fn_name(jsval); '
}
else {
s = ' $typ val; $fn_name(jsval, &val); '
}
return '
*res = new_array(0, 0, sizeof($typ));
const cJSON *jsval = NULL;
cJSON_ArrayForEach(jsval, root)
{
$s
array__push(res, &val);
}
'
}
fn js_enc_name(typ string) string {
name := 'json__jsencode_$typ'
return name
}
fn js_dec_name(typ string) string {
name := 'json__jsdecode_$typ'
return name
}
fn (p &Parser) encode_array(typ string) string {
typ = typ.replace('array_', '')
fn_name := js_enc_name(typ)
return '
o = cJSON_CreateArray();
for (int i = 0; i < val.len; i++){
cJSON_AddItemToArray(o, $fn_name( (($typ*)val.data)[i] ));
}
'
}

845
compiler/main.v Normal file
View File

@ -0,0 +1,845 @@
module main
import os
import time
const (
Version = '0.0.12'
)
// TODO no caps
enum BuildMode {
// `v program.v'
// Build user code only, and add pre-compiled vlib (`cc program.o builtin.o os.o...`)
DEFAULT_MODE
// `v -embed_vlib program.v`
// vlib + user code in one file (slower compilation, but easier when working on vlib and cross-compiling)
EMBED_VLIB
// `v -lib ~/v/os`
// build any module (generate os.o + os.vh)
BUILD // TODO a better name would be smth like `.build_module` I think
}
fn vtmp_path() string {
$if windows {
return os.home_dir() + '/.vlang$Version/'
}
return '/var/tmp/vlang$Version/'
}
const (
SupportedPlatforms = ['windows', 'mac', 'linux']
TmpPath = vtmp_path()
)
// TODO V was re-written in V before enums were implemented. Lots of consts need to be replaced with
// enums.
const (
MAC = 0
LINUX = 1
WINDOWS = 2
)
enum Pass {
// A very short pass that only looks at imports in the begginning of each file
RUN_IMPORTS
// First pass, only parses and saves declarations (fn signatures, consts, types).
// Skips function bodies.
// We need this because in V things can be used before they are declared.
RUN_DECLS
// Second pass, parses function bodies and generates C or machine code.
RUN_MAIN
}
/*
// TODO rename to:
enum Pass {
imports
decls
main
}
*/
struct V {
mut:
build_mode BuildMode
os int // the OS to build for
nofmt bool // disable vfmt
out_name_c string // name of the temporary C file
files []string // all V files that need to be parsed and compiled
dir string // directory (or file) being compiled (TODO rename to path?)
table *Table // table with types, vars, functions etc
cgen *CGen // C code generator
is_test bool // `v test string_test.v`
is_script bool // single file mode (`v program.v`), `fn main(){}` can be skipped
is_so bool
is_live bool // for hot code reloading
is_prof bool // benchmark every function
translated bool // `v translated doom.v` are we running V code translated from C? allow globals, ++ expressions, etc
obfuscate bool // `v -obf program.v`, renames functions to "f_XXX"
lang_dir string // "~/code/v"
is_verbose bool // print extra information with `v.log()`
is_run bool // `v run program.v`
is_play bool // playground mode
show_c_cmd bool // `v -show_c_cmd` prints the C command to build program.v.c
sanitize bool // use Clang's new "-fsanitize" option
out_name string // "program.exe"
is_prod bool // use "-O2" and skip printlns (TODO I don't thik many people want printlns to disappear in prod buidls)
is_repl bool
}
fn main() {
// There's no `flags` module yet, so args have to be parsed manually
args := os.args
// Print the version and exit.
if 'version' in args {
println2('V $Version')
return
}
if '-h' in args || '--help' in args || 'help' in args {
println(HelpText)
}
// u := os.file_last_mod_unix('/var/tmp/alex')
// t := time.unixn(u)
// println(t.clean())
// If there's not tmp path with current version yet, the user must be using a pre-built package
// Copy the `vlib` directory to the tmp path.
if !os.file_exists(TmpPath) && os.file_exists('vlib') {
os.mkdir(TmpPath)
os.system2('cp -rf vlib $TmpPath/')
// os.system2('cp -rf json $TmpPath/')
}
// Just fmt and exit
if args.contains('fmt') {
file := args.last()
if !os.file_exists(file) {
os.exit1('"$file" does not exist')
}
if !file.ends_with('.v') {
os.exit1('v fmt can only be used on .v files')
}
println2('vfmt is temporarily disabled')
return
}
// V with no args? REPL
if args.len < 2 {
run_repl()
return
}
// Construct the V object from command line arguments
mut c := new_v(args)
if c.is_verbose {
println(args)
}
// Generate the docs and exit
if args.contains('doc') {
// c.gen_doc_html_for_module(args.last())
os.exit('')
}
c.compile()
}
fn (c mut V) compile() {
mut cgen := c.cgen
cgen.genln('// Generated by V')
// Add user files to compile
c.add_user_v_files()
if c.is_verbose {
println('all .v files:')
println(c.files)
}
// First pass (declarations)
for file in c.files {
mut p := c.new_parser(file, RUN_DECLS)
p.parse()
}
// Main pass
cgen.run = RUN_MAIN
if c.os == MAC {
cgen.genln('#define mac (1) ')
// cgen.genln('#include <pthread.h>')
}
if c.os == LINUX {
cgen.genln('#define linux (1) ')
cgen.genln('#include <pthread.h>')
}
if c.os == WINDOWS {
cgen.genln('#define windows (1) ')
// cgen.genln('#include <WinSock2.h>')
cgen.genln('#include <windows.h> ')
}
if c.is_play {
cgen.genln('#define VPLAY (1) ')
}
cgen.genln('
#include <stdio.h> // TODO remove all these includes, define all function signatures and types manually
#include <stdlib.h>
#include <signal.h>
#include <stdarg.h> // for va_list
#include <inttypes.h> // int64_t etc
//================================== TYPEDEFS ================================*/
typedef unsigned char byte;
typedef unsigned int uint;
typedef int64_t i64;
typedef int32_t i32;
typedef int16_t i16;
typedef int8_t i8;
typedef uint64_t u64;
typedef uint32_t u32;
typedef uint16_t u16;
typedef uint8_t u8;
typedef uint32_t rune;
typedef float f32;
typedef double f64;
typedef unsigned char* byteptr;
typedef int* intptr;
typedef void* voidptr;
typedef struct array array;
typedef struct map map;
typedef array array_string;
typedef array array_int;
typedef array array_byte;
typedef array array_uint;
typedef array array_float;
typedef map map_int;
typedef map map_string;
#ifndef bool
typedef int bool;
#define true 1
#define false 0
#endif
//============================== HELPER C MACROS =============================*/
#define _PUSH(arr, val, tmp, tmp_typ) {tmp_typ tmp = (val); array__push(arr, &tmp);}
#define _IN(typ, val, arr) array_##typ##_contains(arr, val)
#define ALLOC_INIT(type, ...) (type *)memdup((type[]){ __VA_ARGS__ }, sizeof(type))
#define UTF8_CHAR_LEN( byte ) (( 0xE5000000 >> (( byte >> 3 ) & 0x1e )) & 3 ) + 1
//================================== GLOBALS =================================*/
//int V_ZERO = 0;
byteptr g_str_buf;
int load_so(byteptr);
void reload_so();
void init_consts();')
imports_json := c.table.imports.contains('json')
// TODO remove global UI hack
if c.os == MAC && ((c.build_mode == EMBED_VLIB && c.table.imports.contains('ui')) ||
(c.build_mode == BUILD && c.dir.contains('/ui'))) {
cgen.genln('id defaultFont = 0; // main.v')
}
// TODO remove ugly .c include once V has its own json parser
// Embed cjson either in embedvlib or in json.o
if imports_json && c.build_mode == EMBED_VLIB ||
(c.build_mode == BUILD && c.out_name.contains('json.o')) {
cgen.genln('#include "json/cJSON/cJSON.c" ')
}
// We need the cjson header for all the json decoding user will do in default mode
if c.build_mode == DEFAULT_MODE {
if imports_json {
cgen.genln('#include "json/cJSON/cJSON.h"')
}
}
if c.build_mode == EMBED_VLIB || c.build_mode == DEFAULT_MODE {
// If we declare these for all modes, then when running `v a.v` we'll get
// `/usr/bin/ld: multiple definition of 'total_m'`
cgen.genln('i64 total_m = 0; // For counting total RAM allocated')
cgen.genln('int g_test_ok = 1; ')
if c.table.imports.contains('json') {
cgen.genln('
#define js_get(object, key) cJSON_GetObjectItemCaseSensitive((object), (key))
')
}
}
if os.args.contains('-debug_alloc') {
cgen.genln('#define DEBUG_ALLOC 1')
}
cgen.genln('/*================================== FNS =================================*/')
cgen.genln('this line will be replaced with definitions')
defs_pos := cgen.lines.len - 1
for file in c.files {
mut p := c.new_parser(file, RUN_MAIN)
p.parse()
// p.g.gen_x64()
// Format all files (don't format automatically generated vlib headers)
if !c.nofmt && !file.contains('/vlib/') {
// new vfmt is not ready yet
}
}
c.log('Done parsing.')
// Write everything
mut d := new_string_builder(10000)// Just to avoid some unnecessary allocations
d.writeln(cgen.includes.join_lines())
d.writeln(cgen.typedefs.join_lines())
d.writeln(cgen.types.join_lines())
d.writeln('\nstring _STR(const char*, ...);\n')
d.writeln('\nstring _STR_TMP(const char*, ...);\n')
d.writeln(cgen.fns.join_lines())
d.writeln(cgen.consts.join_lines())
d.writeln(cgen.thread_args.join_lines())
if c.is_prof {
d.writeln('; // Prof counters:')
d.writeln(c.prof_counters())
}
dd := d.str()
cgen.lines.set(defs_pos, dd)// TODO `def.str()` doesn't compile
// if c.build_mode in [.default, .embed_vlib] {
if c.build_mode == DEFAULT_MODE || c.build_mode == EMBED_VLIB {
// vlib can't have `init_consts()`
cgen.genln('void init_consts() { g_str_buf=malloc(1000); ${cgen.consts_init.join_lines()} }')
// _STR function can't be defined in vlib
cgen.genln('
string _STR(const char *fmt, ...) {
va_list argptr;
va_start(argptr, fmt);
size_t len = vsnprintf(0, 0, fmt, argptr) + 1;
va_end(argptr);
byte* buf = malloc(len);
va_start(argptr, fmt);
vsprintf(buf, fmt, argptr);
va_end(argptr);
#ifdef DEBUG_ALLOC
puts("_STR:");
puts(buf);
#endif
return tos2(buf);
}
string _STR_TMP(const char *fmt, ...) {
va_list argptr;
va_start(argptr, fmt);
size_t len = vsnprintf(0, 0, fmt, argptr) + 1;
va_end(argptr);
va_start(argptr, fmt);
vsprintf(g_str_buf, fmt, argptr);
va_end(argptr);
#ifdef DEBUG_ALLOC
//puts("_STR_TMP:");
//puts(g_str_buf);
#endif
return tos2(g_str_buf);
}
')
}
// Make sure the main function exists
// Obviously we don't need it in libraries
if c.build_mode != BUILD {
if !c.table.main_exists() && !c.is_test {
// It can be skipped in single file programs
if c.is_script {
println('Generating main()...')
cgen.genln('int main() { $cgen.fn_main; return 0; }')
}
else {
println('panic: function `main` is undeclared in the main module')
}
}
// Generate `main` which calls every single test function
else if c.is_test {
cgen.genln('int main() { init_consts();')
for v in c.table.fns {
if v.name.starts_with('test_') {
cgen.genln('$v.name();')
}
}
cgen.genln('return g_test_ok == 0; }')
}
}
if c.is_live {
cgen.genln(' int load_so(byteptr path) {
printf("load_so %s\\n", path); dlclose(live_lib); live_lib = dlopen(path, RTLD_LAZY);
if (!live_lib) {puts("open failed"); exit(1); return 0;}
')
for so_fn in cgen.so_fns {
cgen.genln('$so_fn = dlsym(live_lib, "$so_fn"); ')
}
cgen.genln('return 1; }')
}
cgen.save()
c.log('flags=')
if c.is_verbose {
println(c.table.flags)
}
c.cc()
if c.is_test || c.is_run {
if true || c.is_verbose {
println('============running $c.out_name==============================')
}
cmd := if c.out_name.starts_with('/') {
c.out_name
}
else {
'./' + c.out_name
}
ret := os.system2(cmd)
if ret != 0 {
s := os.system(cmd)
println2(s)
os.exit1('ret not 0, exiting')
}
}
}
fn (c mut V) cc() {
linux_host := os.user_os() == 'linux'
c.log('cc() isprod=$c.is_prod outname=$c.out_name')
mut a := ['-w']// arguments for the C compiler
flags := c.table.flags.join(' ')
/*
mut shared := ''
if c.is_so {
a << '-shared'// -Wl,-z,defs'
c.out_name = c.out_name + '.so'
}
*/
if c.is_prod {
a << '-O2'
}
else {
a << '-g'
}
mut libs := ''// builtin.o os.o http.o etc
if c.build_mode == BUILD {
a << '-c'
}
else if c.build_mode == EMBED_VLIB {
//
}
else if c.build_mode == DEFAULT_MODE {
libs = '$TmpPath/vlib/builtin.o'
if !os.file_exists(libs) {
println2('`builtin.o` not found')
exit('')
}
for imp in c.table.imports {
if imp == 'webview' {
continue
}
libs += ' $TmpPath/vlib/${imp}.o'
}
}
// -I flags
/*
mut args := ''
for flag in c.table.flags {
if !flag.starts_with('-l') {
args += flag
args += ' '
}
}
*/
if c.sanitize {
a << '-fsanitize=leak'
}
// Cross compiling linux
sysroot := '/Users/alex/tmp/lld/linuxroot/'
if c.os == LINUX && !linux_host {
// Build file.o
a << '-c --sysroot=$sysroot -target x86_64-linux-gnu'
// Right now `out_name` can be `file`, not `file.o`
if !c.out_name.ends_with('.o') {
c.out_name = c.out_name + '.o'
}
}
// Cross compiling windows
// sysroot := '/Users/alex/tmp/lld/linuxroot/'
// Output executable name
// else {
a << '-o $c.out_name'
// }
// Min macos version is mandatory I think?
if c.os == MAC {
a << '-mmacosx-version-min=10.7'
}
a << flags
a << libs
// macOS code can include objective C TODO remove once objective C is replaced with C
if c.os == MAC {
a << '-x objective-c'
}
// The C file we are compiling
a << '$TmpPath/$c.out_name_c'
// Without these libs compilation will fail on Linux
if c.os == LINUX && c.build_mode != BUILD {
a << '-lm -ldl -lpthread'
}
// Find clang executable
fast_clang := '/usr/local/Cellar/llvm/8.0.0/bin/clang'
args := a.join(' ')
cmd := if os.file_exists(fast_clang) {
'$fast_clang -I. $args'
}
else {
'clang -I. $args'
}
// Print the C command
if c.show_c_cmd || c.is_verbose {
println('\n==========\n$cmd\n=========\n')
}
// Run
res := os.system(cmd)
// println('C OUTPUT:')
if res.contains('error: ') {
println2(res)
panic('clang error')
}
// Link it if we are cross compiling and need an executable
if c.os == LINUX && !linux_host && c.build_mode != BUILD {
c.out_name = c.out_name.replace('.o', '')
obj_file := c.out_name + '.o'
println('linux obj_file=$obj_file out_name=$c.out_name')
ress := os.system('/usr/local/Cellar/llvm/8.0.0/bin/ld.lld --sysroot=$sysroot ' +
'-v -o $c.out_name ' +
'-m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 ' +
'/usr/lib/x86_64-linux-gnu/crt1.o ' +
'$sysroot/lib/x86_64-linux-gnu/libm-2.28.a ' +
'/usr/lib/x86_64-linux-gnu/crti.o ' +
obj_file +
' /usr/lib/x86_64-linux-gnu/libc.so ' +
'/usr/lib/x86_64-linux-gnu/crtn.o')
println(ress)
if ress.contains('error:') {
os.exit1('')
}
println('linux cross compilation done. resulting binary: "$c.out_name"')
}
// print_time('after gcc')
}
fn (c &V) v_files_from_dir(dir string) []string {
mut res := []string
mut files := os.ls(dir)
if !os.file_exists(dir) {
panic('$dir doesnt exist')
}
if c.is_verbose {
println('v_files_from_dir ("$dir")')
}
// println(files.len)
// println(files)
files.sort()
for file in files {
c.log('F=$file')
if !file.ends_with('.v') && !file.ends_with('.vh') {
continue
}
if file.ends_with('_test.v') {
continue
}
if file.ends_with('_win.v') && c.os != WINDOWS {
continue
}
if file.ends_with('_lin.v') && c.os != LINUX {
continue
}
if file.ends_with('_mac.v') && c.os != MAC {
lin_file := file.replace('_mac.v', '_lin.v')
// println('lin_file="$lin_file"')
// If there are both _mac.v and _lin.v, don't use _mac.v
if os.file_exists('$dir/$lin_file') {
continue
}
else if c.os == WINDOWS {
continue
}
else {
// If there's only _mac.v, then it can be used on Linux too
}
}
res << '$dir/$file'
}
return res
}
// Parses imports, adds necessary libs, and then user files
fn (c mut V) add_user_v_files() {
mut dir := c.dir
c.log('add_v_files($dir)')
// Need to store user files separately, because they have to be added after libs, but we dont know
// which libs need to be added yet
mut user_files := []string
// v volt/slack_test.v: compile all .v files to get the environment
// I need to implement user packages! TODO
is_test_with_imports := dir.ends_with('_test.v') &&
(dir.contains('/volt') || dir.contains('/c2volt'))// TODO
if is_test_with_imports {
user_files << dir
pos := dir.last_index('/')
dir = dir.left(pos) + '/'// TODO WHY IS THIS NEEDED?
}
if dir.ends_with('.v') {
// Just compile one file and get parent dir
user_files << dir
dir = dir.all_before('/')
}
else {
// Add files from the dir user is compiling (only .v files)
files := c.v_files_from_dir(dir)
for file in files {
user_files << file
}
}
if user_files.len == 0 {
exit('No input .v files')
}
if c.is_verbose {
c.log('user_files:')
println(user_files)
}
// Parse user imports
for file in user_files {
mut p := c.new_parser(file, RUN_IMPORTS)
p.parse()
}
// Parse lib imports
if c.build_mode == DEFAULT_MODE {
for i := 0; i < c.table.imports.len; i++ {
pkg := c.table.imports[i]
vfiles := c.v_files_from_dir('$TmpPath/vlib/$pkg')
// Add all imports referenced by these libs
for file in vfiles {
mut p := c.new_parser(file, RUN_IMPORTS)
p.parse()
}
}
}
else {
// TODO this used to crash compiler?
// for pkg in c.table.imports {
for i := 0; i < c.table.imports.len; i++ {
pkg := c.table.imports[i]
// mut import_path := '$c.lang_dir/$pkg'
vfiles := c.v_files_from_dir('$c.lang_dir/$pkg')
// Add all imports referenced by these libs
for file in vfiles {
mut p := c.new_parser(file, RUN_IMPORTS)
p.parse()
}
}
}
if c.is_verbose {
c.log('imports:')
println(c.table.imports)
}
// Only now add all combined lib files
for pkg in c.table.imports {
mut module_path := '$c.lang_dir/$pkg'
// If we are in default mode, we don't parse vlib .v files, but header .vh files in
// TmpPath/vlib
// These were generated by vfmt
if c.build_mode == DEFAULT_MODE || c.build_mode == BUILD {
module_path = '$TmpPath/vlib/$pkg'
}
vfiles := c.v_files_from_dir(module_path)
for vfile in vfiles {
c.files << vfile
}
// TODO c.files.append_array(vfiles)
}
// Add user code last
for file in user_files {
c.files << file
}
// c.files.append_array(user_files)
}
fn get_arg(joined_args, arg, def string) string {
key := '-$arg '
mut pos := joined_args.index(key)
if pos == -1 {
return def
}
pos += key.len
mut space := joined_args.index_after(' ', pos)
if space == -1 {
space = joined_args.len
}
res := joined_args.substr(pos, space)
// println('get_arg($arg) = "$res"')
return res
}
fn (c &V) log(s string) {
if !c.is_verbose {
return
}
println(s)
}
fn new_v(args[]string) *V {
mut dir := args.last()
// println('new compiler "$dir"')
if args.len < 2 {
dir = ''
}
joined_args := args.join(' ')
target_os := get_arg(joined_args, 'os', '')
mut out_name := get_arg(joined_args, 'o', 'a.out')
// build mode
mut build_mode := DEFAULT_MODE
if args.contains('-lib') {
build_mode = BUILD
// v -lib ~/v/os => os.o
base := dir.all_after('/')
println('Building module ${base}...')
out_name = '$TmpPath/vlib/${base}.o'
// Cross compiling? Use separate dirs for each os
if target_os != os.user_os() {
os.mkdir('$TmpPath/vlib/$target_os')
out_name = '$TmpPath/vlib/$target_os/${base}.o'
println('Cross compiling $out_name')
}
}
// TODO embed_vlib is temporarily the default mode. It's much slower.
else if !args.contains('-embed_vlib') {
build_mode = EMBED_VLIB
}
//
is_test := dir.ends_with('_test.v')
is_script := dir.ends_with('.v')
if is_script && !os.file_exists(dir) {
exit('`$dir` does not exist')
}
// No -o provided? foo.v => foo
if out_name == 'a.out' && dir.ends_with('.v') {
out_name = dir.left(dir.len - 2)
}
// if we are in `/foo` and run `v .`, the executable should be `foo`
if dir == '.' && out_name == 'a.out' {
base := os.getwd().all_after('/')
out_name = base.trim_space()
}
mut _os := MAC
// No OS specifed? Use current system
if target_os == '' {
$if linux {
_os = LINUX
}
$if mac {
_os = MAC
}
$if windows {
_os = WINDOWS
}
}
else {
switch target_os {
case 'linux': _os = LINUX
case 'windows': _os = WINDOWS
case 'mac': _os = MAC
}
}
builtins := [
'array.v',
'string.v',
'builtin.v',
'int.v',
'utf8.v',
'map.v',
'smap.v',
'option.v',
'string_builder.v',
]
// Location of all vlib files TODO allow custom location
mut lang_dir = os.home_dir() + '/code/v/'
out_name_c := out_name.all_after('/') + '.c'
mut files := []string
// Add builtin files
if !out_name.contains('builtin.o') {
for builtin in builtins {
mut f := '$lang_dir/builtin/$builtin'
// In default mode we use precompiled vlib.o, point to .vh files with signatures
if build_mode == DEFAULT_MODE || build_mode == BUILD {
f = '$TmpPath/vlib/builtin/${builtin}h'
}
files << f
}
}
obfuscate := args.contains('-obf')
return &V {
os: _os
out_name: out_name
files: files
dir: dir
lang_dir: lang_dir
table: new_table(obfuscate)
out_name: out_name
out_name_c: out_name_c
is_test: is_test
is_script: is_script
is_so: args.contains('-shared')
is_play: args.contains('play')
is_prod: args.contains('-prod')
is_verbose: args.contains('-verbose')
obfuscate: obfuscate
is_prof: args.contains('-prof')
is_live: args.contains('-live')
sanitize: args.contains('-sanitize')
nofmt: args.contains('-nofmt')
show_c_cmd: args.contains('-show_c_cmd')
translated: args.contains('translated')
cgen: new_cgen(out_name_c)
build_mode: build_mode
is_run: args.contains('run')
is_repl: args.contains('-repl')
}
}
fn run_repl() []string {
println2('V $Version')
println2('Use Ctrl-D to exit')
println2('For now you have to use println() to print values, this will be fixed soon\n')
file := TmpPath + '/vrepl.v'
mut lines := []string
for {
print('>>> ')
mut line := os.get_line().trim_space()
if line == '' {
break
}
// Save the source only if the user is printing something,
// but don't add this print call to the `lines` array,
// so that it doesn't get called during the next print.
if line.starts_with('print') {
// TODO remove this once files without main compile correctly
source_code := 'fn main(){' + lines.join('\n') + '\n' + line + '}'
os.write_file(file, source_code)
mut v := new_v( ['v', '-repl', file])
v.compile()
s := os.system(TmpPath + '/vrepl')
println2(s)
}
else {
lines << line
}
}
return lines
}
// This definitely needs to be better :)
const (
HelpText = '
- To build a V program:
v file.v
- To get current V version:
v version
- To build an optimized executable:
v -prod file.v
- To specify the executable\'s name:
v -o program file.v
'
)
/*
- To disable automatic formatting:
v -nofmt file.v
- To build a program with an embedded vlib (use this if you do not have prebuilt vlib libraries or if you
are working on vlib)
v -embed_vlib file.v
*/

3216
compiler/parser.v Normal file

File diff suppressed because it is too large Load Diff

630
compiler/scanner.v Normal file
View File

@ -0,0 +1,630 @@
module main
struct Scanner {
mut:
file_path string
text string
pos int
line_nr int
inside_string bool
dollar_start bool // for hacky string interpolation TODO simplify
dollar_end bool
debug bool
line_comment string
started bool
is_fmt bool
// vfmt fields
fmt_out StringBuilder
fmt_indent int
fmt_line_empty bool
}
const (
SINGLE_QUOTE = `\'`
QUOTE = `"`
)
fn new_scanner(file_path string) *Scanner {
if !os.file_exists(file_path) {
panic('"$file_path" doesnt exist')
}
scanner := &Scanner {
file_path: file_path
text: os.read_file(file_path)
fmt_out: new_string_builder(1000)
}
// println('new scanner "$file_path" txt.len=$scanner.text.len')
return scanner
}
// TODO remove once multiple return values are implemented
struct ScanRes {
tok Token
lit string
}
fn scan_res(tok Token, lit string) ScanRes {
return ScanRes{tok, lit}
}
fn is_white(c byte) bool {
return c.is_white()
}
fn is_nl(c byte) bool {
i := int(c)
return i == 12 || i == 10
}
fn (s mut Scanner) ident_name() string {
start := s.pos
for {
s.pos++
c := s.text[s.pos]
if !is_name_char(c) && !c.is_digit() {
break
}
}
name := s.text.substr(start, s.pos)
s.pos--
return name
}
fn (s mut Scanner) ident_number() string {
start := s.pos
is_hex := s.text[s.pos] == `0` && s.text[s.pos + 1] == `x`
is_oct := !is_hex && s.text[s.pos] == `0`
mut is_float := false
for {
s.pos++
c := s.text[s.pos]
if c == `.` {
is_float = true
}
is_good_hex := is_hex && (c == `x` || c == `u` || (c >= `a` && c <= `f`))
// 1e+3, 1e-3, 1e3
if !is_hex && c == `e` {
next := s.text[s.pos + 1]
if next == `+` || next == `-` || next.is_digit() {
s.pos++
continue
}
}
if !c.is_digit() && c != `.` && !is_good_hex {
break
}
// 1..9
if c == `.` && s.text[s.pos + 1] == `.` {
break
}
if is_oct && c >= `8` && !is_float {
s.error('malformed octal constant')
}
}
number := s.text.substr(start, s.pos)
s.pos--
return number
}
fn (s mut Scanner) skip_whitespace() {
for s.pos < s.text.len && is_white(s.text[s.pos]) {
if is_nl(s.text[s.pos]) {
s.line_nr++
if s.is_fmt {
return
}
}
s.pos++
}
// if s.pos == s.text.len {
// return scan_res(EOF, '')
// }
}
fn (s mut Scanner) scan() ScanRes {
// if s.file_path == 'd.v' {
// println('\nscan()')
// }
// if s.started {
if s.pos > 0 {
// || (s.pos == 0 && s.text.len > 0 && s.text[s.pos] == `\n`) {
s.pos++
}
s.started = true
if s.pos >= s.text.len {
return scan_res(EOF, '')
}
// skip whitespace
if !s.inside_string {
s.skip_whitespace()
}
if s.is_fmt && s.text[s.pos] == `\n` {
return scan_res(NL, '')
}
// End of $var, start next string
if !s.is_fmt && s.dollar_end {
// fmt.Println("end of $var, get string", s.pos, string(s.text[s.pos]))
if s.text[s.pos] == SINGLE_QUOTE {
// fmt.Println("ENDDD")
s.dollar_end = false
return scan_res(STRING, '')
}
s.dollar_end = false
return scan_res(STRING, s.ident_string())
}
s.skip_whitespace()
// println('ws skipped')
// end of file
if s.pos >= s.text.len {
// println('scan(): returning EOF (pos >= len)')
return scan_res(EOF, '')
}
// println('!!!!! HANDLE CHAR pos=$s.pos')
// handle each char
c := s.text[s.pos]
mut nextc := `\0`
if s.pos + 1 < s.text.len {
nextc = s.text[s.pos + 1]
}
// name or keyword
if is_name_char(c) {
name := s.ident_name()
next_char := s.text[s.pos + 1]// tmp hack to detect . in ${}
// println('!!! got name=$name next_char=$next_char')
if is_key(name) {
// println('IS KEY')
// tok := (key_to_token(name))
// println(tok.str())
return scan_res(key_to_token(name), '')
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.inside_string {
// println('is_letter inside string! nextc=${nextc.str()}')
if s.text[s.pos + 1] == SINGLE_QUOTE {
// println('var is last before QUOTE')
s.pos++
s.dollar_start = false
s.inside_string = false
}
}
if s.dollar_start && next_char != `.` {
// println('INSIDE STRING .dollar var=$name')
s.dollar_end = true
s.dollar_start = false
}
return scan_res(NAME, name)
}
// number, `.123`
else if c.is_digit() || c == `.` && nextc.is_digit() {
num := s.ident_number()
return scan_res(INT, num)
}
// all other tokens
switch c {
case `+`:
if nextc == `+` {
s.pos++
return scan_res(INC, '')
}
else if nextc == `=` {
s.pos++
return scan_res(PLUS_ASSIGN, '')
}
return scan_res(PLUS, '')
case `-`:
if nextc == `-` {
s.pos++
return scan_res(DEC, '')
}
else if nextc == `=` {
s.pos++
return scan_res(MINUS_ASSIGN, '')
}
return scan_res(MINUS, '')
case `*`:
if nextc == `=` {
s.pos++
return scan_res(MULT_ASSIGN, '')
}
return scan_res(MUL, '')
case `^`:
if nextc == `=` {
s.pos++
return scan_res(XOR_ASSIGN, '')
}
return scan_res(XOR, '')
case `%`:
if nextc == `=` {
s.pos++
return scan_res(MOD_ASSIGN, '')
}
return scan_res(MOD, '')
case `?`:
return scan_res(QUESTION, '')
case SINGLE_QUOTE:
return scan_res(STRING, s.ident_string())
// TODO allow double quotes
// case QUOTE:
// return scan_res(STRING, s.ident_string())
case `\``:
return scan_res(CHAR, s.ident_char())
case `(`:
return scan_res(LPAR, '')
case `)`:
return scan_res(RPAR, '')
case `[`:
return scan_res(LSBR, '')
case `]`:
return scan_res(RSBR, '')
case `{`:
// Skip { in ${ in strings
if s.inside_string {
return s.scan()
}
return scan_res(LCBR, '')
case `$`:
return scan_res(DOLLAR, '')
case `}`:
// s = `hello $name kek`
// s = `hello ${name} kek`
if s.inside_string {
s.pos++
// TODO UNNEEDED?
if s.text[s.pos] == SINGLE_QUOTE {
s.inside_string = false
return scan_res(STRING, '')
}
return scan_res(STRING, s.ident_string())
}
else {
return scan_res(RCBR, '')
}
case `&`:
if nextc == `=` {
s.pos++
return scan_res(AND_ASSIGN, '')
}
if s.text[s.pos + 1] == `&` {
s.pos++
return scan_res(AND, '')
}
return scan_res(AMP, '')
case `|`:
if s.text[s.pos + 1] == `|` {
s.pos++
return scan_res(OR, '')
}
if nextc == `=` {
s.pos++
return scan_res(OR_ASSIGN, '')
}
return scan_res(PIPE, '')
case `,`:
return scan_res(COMMA, '')
case `\n`:
return scan_res(NL, '')
case `.`:
if s.text[s.pos + 1] == `.` {
s.pos++
return scan_res(DOTDOT, '')
}
return scan_res(DOT, '')
case `#`:
start := s.pos + 1
for s.text[s.pos] != `\n` {
s.pos++
}
s.line_nr++
hash := s.text.substr(start, s.pos)
if s.is_fmt {
// fmt needs NL after #
s.pos--
}
return scan_res(HASH, hash.trim_space())
case `@`:
start := s.pos + 1
for s.text[s.pos] != `\n` {
s.pos++
}
s.line_nr++
at := s.text.substr(start, s.pos)
return scan_res(AT, at.trim_space())
case `>`:
if s.text[s.pos + 1] == `=` {
s.pos++
return scan_res(GE, '')
}
else if s.text[s.pos + 1] == `>` {
if s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(RIGHT_SHIFT_ASSIGN, '')
}
s.pos++
return scan_res(RIGHT_SHIFT, '')
}
else {
return scan_res(GT, '')
}
case `<`:
if s.text[s.pos + 1] == `=` {
s.pos++
return scan_res(LE, '')
}
else if s.text[s.pos + 1] == `<` {
if s.text[s.pos + 2] == `=` {
s.pos += 2
return scan_res(LEFT_SHIFT_ASSIGN, '')
}
s.pos++
return scan_res(LEFT_SHIFT, '')
}
else {
return scan_res(LT, '')
}
case `=`:
if s.text[s.pos + 1] == `=` {
s.pos++
return scan_res(EQ, '')
}
else {
return scan_res(ASSIGN, '')
}
case `:`:
if s.text[s.pos + 1] == `=` {
s.pos++
return scan_res(DECL_ASSIGN, '')
}
else {
return scan_res(COLON, '')
}
case `;`:
return scan_res(SEMICOLON, '')
case `!`:
if s.text[s.pos + 1] == `=` {
s.pos++
return scan_res(NE, '')
}
else {
return scan_res(NOT, '')
}
case `~`:
return scan_res(BIT_NOT, '')
case `/`:
if nextc == `=` {
s.pos++
return scan_res(DIV_ASSIGN, '')
}
if s.text[s.pos + 1] == `/` {
// debug("!!!!!!GOT LINE COM")
start := s.pos + 1
for s.text[s.pos] != `\n` {
s.pos++
}
s.line_nr++
s.line_comment = s.text.substr(start + 1, s.pos)
s.line_comment = s.line_comment.trim_space()
s.fgenln('// $s.line_comment')
if s.is_fmt {
// fmt needs NL after comment
s.pos--
}
else {
// Skip comment
return s.scan()
}
return scan_res(LINE_COM, s.line_comment)
}
// Multiline comments
if s.text[s.pos + 1] == `*` {
start := s.pos
// Skip comment
for ! (s.text[s.pos] == `*` && s.text[s.pos + 1] == `/`) {
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.line_nr++
}
}
s.pos++
end := s.pos + 1
comm := s.text.substr(start, end)
s.fgenln(comm)
if s.is_fmt {
return scan_res(MLINE_COM, comm)
}
// Skip if not in fmt mode
return s.scan()
}
return scan_res(DIV, '')
}
println2('(char code=$c) pos=$s.pos len=$s.text.len')
s.error('invalid character `${c.str()}`')
return scan_res(EOF, '')
}
fn (s &Scanner) error(msg string) {
// println('!! SCANNER ERROR: $msg')
file := s.file_path.all_after('/')
println2('panic: $file:${s.line_nr + 1}')
println2(msg)
// os.print_backtrace()
// println(file)
// println(s.file_path)
os.exit1(' ')
}
// println2('array out of bounds $idx len=$a.len')
// This is really bad. It needs a major clean up
fn (s mut Scanner) ident_string() string {
// println("\nidentString() at char=", string(s.text[s.pos]),
// "chard=", s.text[s.pos], " pos=", s.pos, "txt=", s.text[s.pos:s.pos+7])
debug := s.file_path.contains('test_test')
if debug {
println('identStr() $s.file_path line=$s.line_nr pos=$s.pos')
}
mut start := s.pos
s.inside_string = false
slash := `\\`
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
if debug {
println(c.str())
}
prevc := s.text[s.pos - 1]
// end of string
if c == SINGLE_QUOTE && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
// handle '123\\' slash at the end
break
}
if c == `\n` {
s.line_nr++
}
// Don't allow \0
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == `\\` {
s.error('0 character in a string literal')
}
// Don't allow \x00
if c == `0` && s.pos > 5 && s.text[s.pos - 1] == `0` && s.text[s.pos - 2] == `x` &&
s.text[s.pos - 3] == `\\` {
s.error('0 character in a string literal')
}
// ${var}
if !s.is_fmt && c == `{` && prevc == `$` {
s.inside_string = true
// fmt.Println("breaking out of is()")
// so that s.pos points to $ at the next step
s.pos -= 2
// fmt.Println("break pos=", s.pos, "c=", string(s.text[s.pos]), "d=", s.text[s.pos])
break
}
// $var
// if !s.is_fmt && c != `{` && c != ` ` && ! (c >= `0` && c <= `9`) && prevc == `$` {
if !s.is_fmt && (c.is_letter() || c == `_`) && prevc == `$` {
s.inside_string = true
s.dollar_start = true
// println('setting s.dollar=true pos=$s.pos')
s.pos -= 2
break
}
}
mut lit := ''
if s.text[start] == SINGLE_QUOTE {
start++
}
mut end := s.pos
if s.inside_string {
end++
}
if start > s.pos{}
else {
lit = s.text.substr(start, end)
}
// if lit.contains('\n') {
// println('\nstring lit="$lit" pos=$s.pos line=$s.line_nr')
// }
/*
for c in lit {
if s.file_path.contains('range_test') {
println('!')
println(c)
}
}
*/
return lit
}
fn (s mut Scanner) ident_char() string {
start := s.pos
slash := `\\`
mut len := 0
for {
s.pos++
if s.pos >= s.text.len {
break
}
if s.text[s.pos] != slash {
len++
}
double_slash := s.text[s.pos - 1] == slash && s.text[s.pos - 2] == slash
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) {
if double_slash {
len++
}
break
}
}
len--
c := s.text.substr(start + 1, s.pos)
if len != 1 {
s.error('invalid character literal (more than one character: $len)')
}
return c
}
fn (p mut Parser) peek() Token {
for {
tok := p.scanner.peek()
if tok != NL {
return tok
}
}
}
fn (s mut Scanner) peek() Token {
pos := s.pos
line := s.line_nr
inside_string := s.inside_string
dollar_start := s.dollar_start
dollar_end := s.dollar_end
// /////
res := s.scan()
tok := res.tok
s.pos = pos
s.line_nr = line
s.inside_string = inside_string
s.dollar_start = dollar_start
s.dollar_end = dollar_end
return tok
}
fn (s mut Scanner) debug_tokens() {
s.pos = 0
fname := s.file_path.all_after('/')
println('\n===DEBUG TOKENS $fname ============')
// allToks := ''
s.debug = true
for {
res := s.scan()
tok := res.tok
lit := res.lit
// printiln(tok)
print(tok.str())
// allToks += tok.String()
if lit != '' {
println(' `$lit`')
// allToks += " `" + lit + "`"
}
else {
println('')
}
// allToks += "\n"
if tok == EOF {
println('============ END OF DEBUG TOKENS ==================')
// fmt.Println("========"+s.file+"========\n", allToks)
break
}
}
}
fn is_name_char(c byte) bool {
return c.is_letter() || c == `_`
}

644
compiler/table.v Normal file
View File

@ -0,0 +1,644 @@
module main
struct Table {
mut:
types []Type
consts []Var
fns []Fn
obf_ids map_int // obf_ids 'myfunction'] == 23
packages []string // List of all modules registered by the application
imports []string // List of all imports
flags []string // ['-framework Cocoa', '-lglfw3']
fn_cnt int atomic
obfuscate bool
}
enum AccessMod {
PRIVATE // private immutable
PRIVET_MUT // private mutable
PUBLIC // public immmutable (readonly)
PUBLIC_MUT // public, but mutable only in this module
PUBLIC_MUT_MUT // public and mutable both inside and outside (not recommended to use, that's why it's so verbose)
}
enum TypeCategory {
TYPE_STRUCT
T_CAT_FN
}
struct Type {
mut:
pkg string
name string
fields []Var
methods []Fn
parent string
cat TypeCategory
gen_types []string
func Fn // For cat == FN (type kek fn())
is_c bool // C.FILE
is_interface bool
is_enum bool
// This field is used for types that are not defined yet but are known to exist.
// It allows having things like `fn (f Foo) bar()` before `Foo` is defined.
// This information is needed in the first pass.
is_placeholder bool
}
// For debugging types
fn (t Type) str() string {
mut s := 'type "$t.name" {'
if t.fields.len > 0 {
// s += '\n $t.fields.len fields:\n'
for field in t.fields {
s += '\n $field.name $field.typ'
}
s += '\n'
}
if t.methods.len > 0 {
// s += '\n $t.methods.len methods:\n'
for method in t.methods {
s += '\n ${method.str()}'
}
s += '\n'
}
s += '}\n'
return s
}
const (
CReserved = [
'exit',
'unix',
'print',
// 'ok',
'error',
'malloc',
'calloc',
'char',
'free',
'panic',
'register'
]
)
// This is used in generated C code
fn (f Fn) str() string {
t := Table{}
str_args := f.str_args(t)
return '$f.name($str_args) $f.typ'
}
// fn (types array_Type) print_to_file(f string) {
// }
const (
NUMBER_TYPES = ['number', 'int', 'i8', 'u8', 'i16', 'u16', 'i32', 'u32', 'byte', 'i64', 'u64', 'long', 'double', 'float', 'f32', 'f64']
FLOAT_TYPES = ['double', 'float', 'f32', 'f64']
)
fn is_number_type(typ string) bool {
return NUMBER_TYPES.contains(typ)
}
fn is_float_type(typ string) bool {
return FLOAT_TYPES.contains(typ)
}
fn new_table(obfuscate bool) *Table {
mut t := &Table {
obf_ids: map[string]int{}
obfuscate: obfuscate
}
t.register_type('int')
t.register_type('size_t')
t.register_type_with_parent('i8', 'int')
t.register_type_with_parent('u8', 'int')
t.register_type_with_parent('i16', 'int')
t.register_type_with_parent('u16', 'int')
t.register_type_with_parent('i32', 'int')
t.register_type_with_parent('u32', 'int')
t.register_type_with_parent('byte', 'int')
// t.register_type_with_parent('i64', 'int')
t.register_type('i64')
t.register_type_with_parent('u64', 'int')
t.register_type('long')
t.register_type('byteptr')
t.register_type('intptr')
t.register_type('double')// TODO remove
t.register_type('float')// TODO remove
t.register_type('f32')
t.register_type('f64')
t.register_type('rune')
t.register_type('bool')
t.register_type('void')
t.register_type('voidptr')
t.register_type('va_list')
t.register_const('stdin', 'int', 'main', false)
t.register_const('stderr', 'int', 'main', false)
t.register_type_with_parent('map_string', 'map')
t.register_type_with_parent('map_int', 'map')
return t
}
// If `name` is a reserved C keyword, returns `v_name` instead.
fn (t mut Table) var_cgen_name(name string) string {
if CReserved.contains(name) {
return 'v_$name'
}
else {
return name
}
}
fn (t mut Table) register_package(pkg string) {
if t.packages.contains(pkg) {
return
}
t.packages << pkg
}
fn (table &Table) known_pkg(pkg string) bool {
return pkg in table.packages
}
fn (t mut Table) register_const(name, typ string, pkg string, is_imported bool) {
t.consts << Var {
name: name
typ: typ
is_const: true
is_import_const: is_imported
pkg: pkg
}
}
// Only for translated code
fn (p mut Parser) register_global(name, typ string) {
p.table.consts << Var {
name: name
typ: typ
is_const: true
is_global: true
pkg: p.pkg
}
}
// TODO PERF O(N) this slows down the comiler a lot!
fn (t mut Table) register_fn(f Fn) {
// Avoid duplicate fn names TODO why? the name should already be unique?
for ff in t.fns {
if ff.name == f.name {
return
}
}
t.fns << f
}
fn (table &Table) known_type(typ string) bool {
// 'byte*' => look up 'byte', but don't mess up fns
if typ.ends_with('*') && !typ.contains(' ') {
typ = typ.left(typ.len - 1)
}
for t in table.types {
if t.name == typ && !t.is_placeholder {
return true
}
}
return false
}
// TODO PERF O(N) this slows down the comiler a lot!
fn (t &Table) find_fn(name string) Fn {
for f in t.fns {
if f.name == name {
return f
}
}
return Fn{}
}
// TODO PERF O(N) this slows down the comiler a lot!
fn (t &Table) known_fn(name string) bool {
for f in t.fns {
if f.name == name {
return true
}
}
return false
}
fn (t &Table) known_const(name string) bool {
v := t.find_const(name)
// TODO use optional
return v.name.len > 0
}
fn (t mut Table) register_type(typ string) {
if typ.len == 0 {
return
}
// println('REGISTER TYPE $typ')
for typ2 in t.types {
if typ2.name == typ {
return
}
}
// if t.types.filter( _.name == typ.name).len > 0 {
// return
// }
datyp := Type {
name: typ
}
t.types << datyp
}
fn (p mut Parser) register_type_with_parent(strtyp, parent string) {
typ := Type {
name: strtyp
parent: parent
pkg: p.pkg
}
p.table.register_type2(typ)
}
fn (t mut Table) register_type_with_parent(typ, parent string) {
if typ.len == 0 {
return
}
// if t.types.filter(_.name == typ) > 0
for typ2 in t.types {
if typ2.name == typ {
return
}
}
/*
mut pkg := ''
if parent == 'array' {
pkg = 'builtin'
}
*/
datyp := Type {
name: typ
parent: parent
}
t.types << datyp
}
fn (t mut Table) register_type2(typ Type) {
if typ.name.len == 0 {
return
}
// println('register type2 $typ.name')
for typ2 in t.types {
if typ2.name == typ.name {
return
}
}
t.types << typ
}
fn (t mut Type) add_field(name, typ string, is_mut bool, attr string, access_mod AccessMod) {
// if t.name == 'Parser' {
// println('adding field $name')
// }
v := Var {
name: name
typ: typ
is_mut: is_mut
attr: attr
access_mod: access_mod
}
t.fields << v
}
fn (t &Type) has_field(name string) bool {
field := t.find_field(name)
return (field.name != '')
}
fn (t &Type) find_field(name string) Var {
for field in t.fields {
if field.name == name {
return field
}
}
return Var{}
}
fn (table &Table) type_has_field(typ &Type, name string) bool {
field := table.find_field(typ, name)
return (field.name != '')
}
fn (table &Table) find_field(typ &Type, name string) Var {
field := typ.find_field(name)
if field.name.len == 0 && typ.parent.len > 0 {
parent := table.find_type(typ.parent)
return parent.find_field(name)
}
return field
}
fn (t mut Type) add_method(f Fn) {
// if t.name.contains('Parser') {
// println('!!!add_method() $f.name to $t.name len=$t.methods.len cap=$t.methods.cap')
// }
t.methods << f
// println('end add_method()')
}
fn (t &Type) has_method(name string) bool {
method := t.find_method(name)
return (method.name != '')
}
fn (table &Table) type_has_method(typ &Type, name string) bool {
method := table.find_method(typ, name)
return (method.name != '')
}
// TODO use `?Fn`
fn (table &Table) find_method(typ &Type, name string) Fn {
// println('TYPE HAS METHOD $name')
method := typ.find_method(name)
if method.name.len == 0 && typ.parent.len > 0 {
parent := table.find_type(typ.parent)
return parent.find_method(name)
// println('parent = $parent.name $res')
// return res
}
return method
}
fn (t &Type) find_method(name string) Fn {
// println('$t.name find_method($name) methods.len=$t.methods.len')
for method in t.methods {
// println('method=$method.name')
if method.name == name {
return method
}
}
return Fn{}
}
fn (t mut Type) add_gen_type(type_name string) {
// println('add_gen_type($s)')
if t.gen_types.contains(type_name) {
return
}
t.gen_types << type_name
}
fn (p &Parser) find_type(name string) *Type {
typ := p.table.find_type(name)
if typ.name.len == 0 {
return p.table.find_type(p.prepend_pkg(name))
}
return typ
}
fn (t &Table) find_type(name string) *Type {
if name.ends_with('*') && !name.contains(' ') {
name = name.left(name.len - 1)
}
// TODO PERF use map
for i, typ in t.types {
if typ.name == name {
return &t.types[i]
}
}
return &Type{}
}
fn (p mut Parser) _check_types(got, expected string, throw bool) bool {
p.log('check types got="$got" exp="$expected" ')
if p.translated {
return true
}
// Allow ints to be used as floats
if got.eq('int') && expected.eq('float') {
return true
}
if got.eq('int') && expected.eq('f64') {
return true
}
if got == 'f64' && expected == 'float' {
return true
}
if got == 'float' && expected == 'f64' {
return true
}
// Allow ints to be used as longs
if got.eq('int') && expected.eq('long') {
return true
}
if got == 'void*' && expected.starts_with('fn ') {
return true
}
if got.starts_with('[') && expected == 'byte*' {
return true
}
// Todo void* allows everything right now
if got.eq('void*') || expected.eq('void*') {
// if !p.builtin_pkg {
if p.is_play {
return false
}
return true
}
// TODO only allow numeric consts to be assigned to bytes, and
// throw an error if they are bigger than 255
if got.eq('int') && expected.eq('byte') {
return true
}
if got.eq('int') && expected.eq('byte*') {
return true
}
// byteptr += int
if got.eq('int') && expected.eq('byteptr') {
return true
}
if got == 'Option' && expected.starts_with('Option_') {
return true
}
// lines := new_array
if got == 'array' && expected.starts_with('array_') {
return true
}
// Expected type "Option_os__File", got "os__File"
if expected.starts_with('Option_') && expected.ends_with(got) {
return true
}
// NsColor* return 0
if !p.is_play {
if expected.ends_with('*') && got == 'int' {
return true
}
// if got == 'T' || got.contains('<T>') {
// return true
// }
// if expected == 'T' || expected.contains('<T>') {
// return true
// }
// Allow pointer arithmetic
if expected.eq('void*') && got.eq('int') {
return true
}
}
expected = expected.replace('*', '')
got = got.replace('*', '')
if got != expected {
// Interface check
if expected.ends_with('er') {
if p.satisfies_interface(expected, got, throw) {
return true
}
}
if !throw {
return false
}
else {
p.error('expected type `$expected`, but got `$got`')
}
}
return true
}
// throw by default
fn (p mut Parser) check_types(got, expected string) bool {
return p._check_types(got, expected, true)
}
fn (p mut Parser) check_types_no_throw(got, expected string) bool {
return p._check_types(got, expected, false)
}
fn (p mut Parser) satisfies_interface(interface_name, _typ string, throw bool) bool {
int_typ := p.table.find_type(interface_name)
typ := p.table.find_type(_typ)
for method in int_typ.methods {
if !typ.has_method(method.name) {
// if throw {
p.error('Type "$_typ" doesnt satisfy interface "$interface_name" (method "$method.name" is not implemented)')
// }
return false
}
}
return true
}
fn type_default(typ string) string {
if typ.starts_with('array_') {
typ = typ.right(6)
return 'new_array(0, 1, sizeof($typ))'
}
// Always set pointers to 0
if typ.ends_with('*') {
return '0'
}
// ?
if typ.contains('__') {
return ''
}
// Default values for other types are not needed because of mandatory initialization
switch typ {
case 'int': return '0'
case 'string': return 'tos("", 0)'
case 'void*': return '0'
case 'byte*': return '0'
case 'bool': return '0'
}
return ''
}
// TODO PERF O(n)
fn (t &Table) is_interface(name string) bool {
for typ in t.types {
if typ.is_interface && typ.name == name {
return true
}
}
return false
}
// Do we have fn main()?
fn (t &Table) main_exists() bool {
for f in t.fns {
if f.name == 'main' {
return true
}
}
return false
}
// TODO use `?Var`
fn (t &Table) find_const(name string) Var {
for c in t.consts {
if c.name == name {
return c
}
}
return Var{}
}
fn (table mut Table) cgen_name(f &Fn) string {
mut name := f.name
if f.is_method {
name = '${f.receiver_typ}_$f.name'
name = name.replace(' ', '')
name = name.replace('*', '')
name = name.replace('+', 'plus')
name = name.replace('-', 'minus')
}
// Avoid name conflicts (with things like abs(), print() etc).
// Generate b_abs(), b_print()
// TODO duplicate functionality
if f.pkg == 'builtin' && CReserved.contains(f.name) {
return 'v_$name'
}
// Obfuscate but skip certain names
// TODO ugly, fix
if table.obfuscate && f.name != 'main' && f.name != 'WinMain' && f.pkg != 'builtin' && !f.is_c &&
f.pkg != 'darwin' && f.pkg != 'os' && !f.name.contains('window_proc') && f.name != 'gg__vec2' &&
f.name != 'build_token_str' && f.name != 'build_keys' && f.pkg != 'json' &&
!name.ends_with('_str') && !name.contains('contains') {
mut idx := table.obf_ids[name]
// No such function yet, register it
if idx == 0 {
table.fn_cnt++
table.obf_ids[name] = table.fn_cnt
idx = table.fn_cnt
}
old := name
name = 'f_$idx'
println2('$old ==> $name')
}
return name
}
// ('s', 'string') => 'string s'
// ('nums', '[20]byte') => 'byte nums[20]'
// ('myfn', 'fn(int) string') => 'string (*myfn)(int)'
fn (table &Table) cgen_name_type_pair(name, typ string) string {
// Special case for [10]int
if typ.len > 0 && typ[0] == `[` {
tmp := typ.all_after(']')
size := typ.all_before(']')
return '$tmp $name $size ]'
}
// fn()
else if typ.starts_with('fn (') {
T := table.find_type(typ)
if T.name == '' {
os.exit1('this should never happen')
}
str_args := T.func.str_args(table)
return '$T.func.typ (*$name)( $str_args /*FFF*/ )'
}
// TODO tm hack, do this for all C struct args
else if typ == 'tm' {
return 'struct tm $name'
}
return '$typ $name'
}

265
compiler/token.v Normal file
View File

@ -0,0 +1,265 @@
module main
enum Token {
EOF
NAME
INT
STRING
CHAR
FLOAT
PLUS
MINUS
MUL
DIV
MOD
XOR
PIPE
INC
DEC
AND
OR
NOT
BIT_NOT
QUESTION
COMMA
SEMICOLON
COLON
AMP
HASH
AT
DOLLAR
LEFT_SHIFT
RIGHT_SHIFT
// = := += -=
ASSIGN
DECL_ASSIGN
PLUS_ASSIGN
MINUS_ASSIGN
DIV_ASSIGN
MULT_ASSIGN
XOR_ASSIGN
MOD_ASSIGN
OR_ASSIGN
AND_ASSIGN
RIGHT_SHIFT_ASSIGN
LEFT_SHIFT_ASSIGN
// {} () []
LCBR
RCBR
LPAR
RPAR
LSBR
RSBR
// == != <= < >= >
EQ
NE
GT
LT
GE
LE
// comments
LINE_COM
MLINE_COM
NL
DOT
DOTDOT
// keywords
keyword_beg
PACKAGE
// MODULE
STRUCT
IF
ELSE
RETURN
GO
CONST
IMPORT_CONST
MUT
TIP
ENUM
FOR
SWITCH
MATCH
CASE
FUNC
TRUE
FALSE
CONTINUE
BREAK
EMBED
IMPORT
TYPEOF
DEFAULT
ENDIF
ASSERT
SIZEOF
IN
ATOMIC
INTERFACE
OR_ELSE
GLOBAL
UNION
PUB
GOTO
STATIC
keyword_end
}
// build_keys genereates a map with keywords' string values:
// Keywords['return'] == .return
fn build_keys() map_int {
mut res := map[string]int{}
for t := int(keyword_beg) + 1; t < int(keyword_end); t++ {
key := TOKENSTR[t]
res[key] = int(t)
}
return res
}
fn build_token_str() []string {
mut s := [''; 140]// TODO define a const
s[keyword_beg] = ''
s[keyword_end] = ''
s[EOF] = 'EOF'
s[NAME] = 'NAME'
s[INT] = 'INT'
s[STRING] = 'STR'
s[CHAR] = 'CHAR'
s[FLOAT] = 'FLOAT'
s[PLUS] = '+'
s[MINUS] = '-'
s[MUL] = '*'
s[DIV] = '/'
s[MOD] = '%'
s[XOR] = '^'
s[BIT_NOT] = '~'
s[PIPE] = '|'
s[HASH] = '#'
s[AMP] = '&'
s[AT] = '@'
s[INC] = '++'
s[DEC] = '--'
s[AND] = '&&'
s[OR] = '||'
s[NOT] = '!'
s[DOT] = '.'
s[DOTDOT] = '..'
s[COMMA] = ','
s[SEMICOLON] = ';'
s[COLON] = ':'
s[ASSIGN] = '='
s[DECL_ASSIGN] = ':='
s[PLUS_ASSIGN] = '+='
s[MINUS_ASSIGN] = '-='
s[MULT_ASSIGN] = '*='
s[DIV_ASSIGN] = '/='
s[XOR_ASSIGN] = '^='
s[MOD_ASSIGN] = '%='
s[OR_ASSIGN] = '|='
s[AND_ASSIGN] = '&='
s[RIGHT_SHIFT_ASSIGN] = '>>='
s[LEFT_SHIFT_ASSIGN] = '<<='
s[LCBR] = '{'
s[RCBR] = '}'
s[LPAR] = '('
s[RPAR] = ')'
s[LSBR] = '['
s[RSBR] = ']'
s[EQ] = '=='
s[NE] = '!='
s[GT] = '>'
s[LT] = '<'
s[GE] = '>='
s[LE] = '<='
s[QUESTION] = '?'
s[LEFT_SHIFT] = '<<'
s[RIGHT_SHIFT] = '>>'
s[LINE_COM] = '//'
s[NL] = 'NLL'
s[DOLLAR] = '$'
s[ASSERT] = 'assert'
s[STRUCT] = 'struct'
s[IF] = 'if'
s[ELSE] = 'else'
s[RETURN] = 'return'
s[PACKAGE] = 'module'
s[SIZEOF] = 'sizeof'
s[GO] = 'go'
s[GOTO] = 'goto'
s[CONST] = 'const'
s[MUT] = 'mut'
s[TIP] = 'type'
s[FOR] = 'for'
s[SWITCH] = 'switch'
s[MATCH] = 'match'
s[CASE] = 'case'
s[FUNC] = 'fn'
s[TRUE] = 'true'
s[FALSE] = 'false'
s[CONTINUE] = 'continue'
s[BREAK] = 'break'
s[IMPORT] = 'import'
s[EMBED] = 'embed'
s[TYPEOF] = 'typeof'
s[DEFAULT] = 'default'
s[ENDIF] = 'endif'
s[ENUM] = 'enum'
s[INTERFACE] = 'interface'
s[PUB] = 'pub'
s[IMPORT_CONST] = 'import_const'
s[IN] = 'in'
s[ATOMIC] = 'atomic'
s[OR_ELSE] = 'or'
s[GLOBAL] = '__global'
s[UNION] = 'union'
s[STATIC] = 'static'
return s
}
const (
TOKENSTR = build_token_str()
KEYWORDS = build_keys()
)
fn key_to_token(key string) Token {
a := Token(KEYWORDS[key])
return a
}
fn is_key(key string) bool {
return int(key_to_token(key)) > 0
}
fn (t Token) str() string {
return TOKENSTR[int(t)]
}
fn (t Token) is_decl() bool {
// TODO return t in [FUNC ,TIP, CONST, IMPORT_CONST ,AT ,EOF]
return t == ENUM || t == INTERFACE || t == FUNC || t == STRUCT || t == TIP ||
t == CONST || t == IMPORT_CONST || t == AT || t == EOF
}
const (
AssignTokens = [
ASSIGN, PLUS_ASSIGN, MINUS_ASSIGN,
MULT_ASSIGN, DIV_ASSIGN, XOR_ASSIGN, MOD_ASSIGN,
OR_ASSIGN, AND_ASSIGN, RIGHT_SHIFT_ASSIGN,
LEFT_SHIFT_ASSIGN
]
)
fn (t Token) is_assign() bool {
return t in AssignTokens
}
fn (t[]Token) contains(val Token) bool {
for tt in t {
if tt == val {
return true
}
}
return false
}