2019-06-23 05:21:30 +03:00
|
|
|
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
|
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
2019-10-13 16:37:43 +03:00
|
|
|
module compiler
|
2019-06-22 21:20:28 +03:00
|
|
|
|
2019-10-23 13:03:14 +03:00
|
|
|
struct Token {
|
2019-12-20 00:29:37 +03:00
|
|
|
tok TokenKind // the token number/enum; for quick comparisons
|
2019-10-23 13:03:14 +03:00
|
|
|
lit string // literal representation of the token
|
|
|
|
line_nr int // the line number in the source where the token occured
|
|
|
|
name_idx int // name table index for O(1) lookup
|
2019-11-23 19:31:28 +03:00
|
|
|
pos int // the position of the token in scanner text
|
2019-10-23 13:03:14 +03:00
|
|
|
}
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
enum TokenKind {
|
2019-07-07 23:30:15 +03:00
|
|
|
eof
|
2019-12-20 00:29:37 +03:00
|
|
|
name // user
|
|
|
|
number // 123
|
|
|
|
str // 'foo'
|
|
|
|
str_inter // 'name=$user.name'
|
|
|
|
chartoken // `A`
|
2019-07-07 23:30:15 +03:00
|
|
|
plus
|
|
|
|
minus
|
|
|
|
mul
|
|
|
|
div
|
|
|
|
mod
|
2019-08-17 22:19:37 +03:00
|
|
|
xor // ^
|
|
|
|
pipe // |
|
|
|
|
inc // ++
|
|
|
|
dec // --
|
|
|
|
and // &&
|
|
|
|
logical_or
|
|
|
|
not
|
2019-07-07 23:30:15 +03:00
|
|
|
bit_not
|
|
|
|
question
|
|
|
|
comma
|
|
|
|
semicolon
|
|
|
|
colon
|
2019-08-17 22:19:37 +03:00
|
|
|
arrow // =>
|
2020-01-21 05:22:18 +03:00
|
|
|
left_arrow // <-
|
2019-07-07 23:30:15 +03:00
|
|
|
amp
|
|
|
|
hash
|
|
|
|
dollar
|
2019-12-03 13:08:57 +03:00
|
|
|
str_dollar
|
2019-07-07 23:30:15 +03:00
|
|
|
left_shift
|
|
|
|
righ_shift
|
2019-12-20 00:29:37 +03:00
|
|
|
// at // @
|
2019-10-01 06:33:03 +03:00
|
|
|
assign // =
|
|
|
|
decl_assign // :=
|
|
|
|
plus_assign // +=
|
|
|
|
minus_assign // -=
|
2019-07-07 23:30:15 +03:00
|
|
|
div_assign
|
|
|
|
mult_assign
|
|
|
|
xor_assign
|
|
|
|
mod_assign
|
|
|
|
or_assign
|
|
|
|
and_assign
|
|
|
|
righ_shift_assign
|
|
|
|
left_shift_assign
|
2019-06-22 21:20:28 +03:00
|
|
|
// {} () []
|
2019-07-07 23:30:15 +03:00
|
|
|
lcbr
|
|
|
|
rcbr
|
|
|
|
lpar
|
|
|
|
rpar
|
|
|
|
lsbr
|
|
|
|
rsbr
|
2019-06-22 21:20:28 +03:00
|
|
|
// == != <= < >= >
|
2019-07-07 23:30:15 +03:00
|
|
|
eq
|
|
|
|
ne
|
|
|
|
gt
|
|
|
|
lt
|
|
|
|
ge
|
|
|
|
le
|
2019-06-22 21:20:28 +03:00
|
|
|
// comments
|
2019-11-11 17:18:32 +03:00
|
|
|
line_comment
|
|
|
|
mline_comment
|
2019-08-17 22:19:37 +03:00
|
|
|
nl
|
|
|
|
dot
|
2019-07-07 23:30:15 +03:00
|
|
|
dotdot
|
2019-09-30 13:46:50 +03:00
|
|
|
ellipsis
|
2019-06-22 21:20:28 +03:00
|
|
|
// keywords
|
|
|
|
keyword_beg
|
2019-07-14 12:01:32 +03:00
|
|
|
key_as
|
2019-11-17 00:58:09 +03:00
|
|
|
key_asm
|
2019-07-14 12:01:32 +03:00
|
|
|
key_assert
|
|
|
|
key_atomic
|
|
|
|
key_break
|
2019-07-07 23:30:15 +03:00
|
|
|
key_const
|
2019-07-14 12:01:32 +03:00
|
|
|
key_continue
|
2019-08-17 22:19:37 +03:00
|
|
|
key_defer
|
2019-07-14 12:01:32 +03:00
|
|
|
key_else
|
|
|
|
key_embed
|
2019-07-07 23:30:15 +03:00
|
|
|
key_enum
|
2019-07-14 12:01:32 +03:00
|
|
|
key_false
|
2019-07-07 23:30:15 +03:00
|
|
|
key_for
|
2019-10-21 14:21:30 +03:00
|
|
|
key_fn
|
2019-07-14 12:01:32 +03:00
|
|
|
key_global
|
|
|
|
key_go
|
2019-08-17 22:19:37 +03:00
|
|
|
key_goto
|
|
|
|
key_if
|
2019-07-07 23:30:15 +03:00
|
|
|
key_import
|
2019-08-17 22:19:37 +03:00
|
|
|
key_import_const
|
|
|
|
key_in
|
|
|
|
key_interface
|
2019-12-20 00:29:37 +03:00
|
|
|
// key_it
|
2019-08-17 22:19:37 +03:00
|
|
|
key_match
|
2019-07-14 12:01:32 +03:00
|
|
|
key_module
|
|
|
|
key_mut
|
2019-09-17 22:41:58 +03:00
|
|
|
key_none
|
2019-07-14 12:01:32 +03:00
|
|
|
key_return
|
2019-08-17 22:19:37 +03:00
|
|
|
key_select
|
2019-07-14 12:01:32 +03:00
|
|
|
key_sizeof
|
2019-12-22 00:46:09 +03:00
|
|
|
key_offsetof
|
2019-07-14 12:01:32 +03:00
|
|
|
key_struct
|
2019-08-17 22:19:37 +03:00
|
|
|
key_switch
|
2019-07-14 12:01:32 +03:00
|
|
|
key_true
|
2019-08-17 22:19:37 +03:00
|
|
|
key_type
|
2019-12-20 00:29:37 +03:00
|
|
|
// typeof
|
2019-07-07 23:30:15 +03:00
|
|
|
key_orelse
|
|
|
|
key_union
|
2019-08-17 22:19:37 +03:00
|
|
|
key_pub
|
2019-07-07 23:30:15 +03:00
|
|
|
key_static
|
2019-10-20 19:59:53 +03:00
|
|
|
key_unsafe
|
2019-06-22 21:20:28 +03:00
|
|
|
keyword_end
|
|
|
|
}
|
|
|
|
|
|
|
|
// build_keys genereates a map with keywords' string values:
|
2019-07-07 23:30:15 +03:00
|
|
|
// Keywords['return'] == .key_return
|
2019-07-14 12:01:32 +03:00
|
|
|
fn build_keys() map[string]int {
|
2019-08-17 22:19:37 +03:00
|
|
|
mut res := map[string]int
|
2019-10-09 01:05:34 +03:00
|
|
|
for t := int(TokenKind.keyword_beg) + 1; t < int(TokenKind.keyword_end); t++ {
|
2019-07-14 12:01:32 +03:00
|
|
|
key := TokenStr[t]
|
2019-12-07 15:51:00 +03:00
|
|
|
res[key] = t
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
// TODO remove once we have `enum TokenKind { name('name') if('if') ... }`
|
2019-06-22 21:20:28 +03:00
|
|
|
fn build_token_str() []string {
|
2019-09-19 05:22:24 +03:00
|
|
|
mut s := [''].repeat(NrTokens)
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.keyword_beg] = ''
|
|
|
|
s[TokenKind.keyword_end] = ''
|
|
|
|
s[TokenKind.eof] = 'eof'
|
|
|
|
s[TokenKind.name] = 'name'
|
|
|
|
s[TokenKind.number] = 'number'
|
|
|
|
s[TokenKind.str] = 'STR'
|
|
|
|
s[TokenKind.chartoken] = 'char'
|
|
|
|
s[TokenKind.plus] = '+'
|
|
|
|
s[TokenKind.minus] = '-'
|
|
|
|
s[TokenKind.mul] = '*'
|
|
|
|
s[TokenKind.div] = '/'
|
|
|
|
s[TokenKind.mod] = '%'
|
|
|
|
s[TokenKind.xor] = '^'
|
|
|
|
s[TokenKind.bit_not] = '~'
|
|
|
|
s[TokenKind.pipe] = '|'
|
|
|
|
s[TokenKind.hash] = '#'
|
|
|
|
s[TokenKind.amp] = '&'
|
|
|
|
s[TokenKind.inc] = '++'
|
|
|
|
s[TokenKind.dec] = '--'
|
|
|
|
s[TokenKind.and] = '&&'
|
|
|
|
s[TokenKind.logical_or] = '||'
|
|
|
|
s[TokenKind.not] = '!'
|
|
|
|
s[TokenKind.dot] = '.'
|
|
|
|
s[TokenKind.dotdot] = '..'
|
|
|
|
s[TokenKind.ellipsis] = '...'
|
|
|
|
s[TokenKind.comma] = ','
|
2019-12-20 00:29:37 +03:00
|
|
|
// s[TokenKind.at] = '@'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.semicolon] = ';'
|
|
|
|
s[TokenKind.colon] = ':'
|
|
|
|
s[TokenKind.arrow] = '=>'
|
|
|
|
s[TokenKind.assign] = '='
|
|
|
|
s[TokenKind.decl_assign] = ':='
|
|
|
|
s[TokenKind.plus_assign] = '+='
|
|
|
|
s[TokenKind.minus_assign] = '-='
|
|
|
|
s[TokenKind.mult_assign] = '*='
|
|
|
|
s[TokenKind.div_assign] = '/='
|
|
|
|
s[TokenKind.xor_assign] = '^='
|
|
|
|
s[TokenKind.mod_assign] = '%='
|
|
|
|
s[TokenKind.or_assign] = '|='
|
|
|
|
s[TokenKind.and_assign] = '&='
|
|
|
|
s[TokenKind.righ_shift_assign] = '>>='
|
|
|
|
s[TokenKind.left_shift_assign] = '<<='
|
|
|
|
s[TokenKind.lcbr] = '{'
|
|
|
|
s[TokenKind.rcbr] = '}'
|
|
|
|
s[TokenKind.lpar] = '('
|
|
|
|
s[TokenKind.rpar] = ')'
|
|
|
|
s[TokenKind.lsbr] = '['
|
|
|
|
s[TokenKind.rsbr] = ']'
|
|
|
|
s[TokenKind.eq] = '=='
|
|
|
|
s[TokenKind.ne] = '!='
|
|
|
|
s[TokenKind.gt] = '>'
|
|
|
|
s[TokenKind.lt] = '<'
|
|
|
|
s[TokenKind.ge] = '>='
|
|
|
|
s[TokenKind.le] = '<='
|
|
|
|
s[TokenKind.question] = '?'
|
|
|
|
s[TokenKind.left_shift] = '<<'
|
|
|
|
s[TokenKind.righ_shift] = '>>'
|
2019-11-11 17:18:32 +03:00
|
|
|
s[TokenKind.line_comment] = '// line comment'
|
|
|
|
s[TokenKind.mline_comment] = '/* mline comment */'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.nl] = 'NLL'
|
|
|
|
s[TokenKind.dollar] = '$'
|
2019-12-03 13:08:57 +03:00
|
|
|
s[TokenKind.str_dollar] = '$2'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.key_assert] = 'assert'
|
|
|
|
s[TokenKind.key_struct] = 'struct'
|
|
|
|
s[TokenKind.key_if] = 'if'
|
2019-12-20 00:29:37 +03:00
|
|
|
// s[TokenKind.key_it] = 'it'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.key_else] = 'else'
|
2019-11-17 00:58:09 +03:00
|
|
|
s[TokenKind.key_asm] = 'asm'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.key_return] = 'return'
|
|
|
|
s[TokenKind.key_module] = 'module'
|
|
|
|
s[TokenKind.key_sizeof] = 'sizeof'
|
|
|
|
s[TokenKind.key_go] = 'go'
|
|
|
|
s[TokenKind.key_goto] = 'goto'
|
|
|
|
s[TokenKind.key_const] = 'const'
|
|
|
|
s[TokenKind.key_mut] = 'mut'
|
|
|
|
s[TokenKind.key_type] = 'type'
|
|
|
|
s[TokenKind.key_for] = 'for'
|
|
|
|
s[TokenKind.key_switch] = 'switch'
|
2019-10-21 14:21:30 +03:00
|
|
|
s[TokenKind.key_fn] = 'fn'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.key_true] = 'true'
|
|
|
|
s[TokenKind.key_false] = 'false'
|
|
|
|
s[TokenKind.key_continue] = 'continue'
|
|
|
|
s[TokenKind.key_break] = 'break'
|
|
|
|
s[TokenKind.key_import] = 'import'
|
|
|
|
s[TokenKind.key_embed] = 'embed'
|
2019-10-20 19:59:53 +03:00
|
|
|
s[TokenKind.key_unsafe] = 'unsafe'
|
2019-12-20 00:29:37 +03:00
|
|
|
// Tokens[key_typeof] = 'typeof'
|
2019-10-09 01:05:34 +03:00
|
|
|
s[TokenKind.key_enum] = 'enum'
|
|
|
|
s[TokenKind.key_interface] = 'interface'
|
|
|
|
s[TokenKind.key_pub] = 'pub'
|
|
|
|
s[TokenKind.key_import_const] = 'import_const'
|
|
|
|
s[TokenKind.key_in] = 'in'
|
|
|
|
s[TokenKind.key_atomic] = 'atomic'
|
|
|
|
s[TokenKind.key_orelse] = 'or'
|
|
|
|
s[TokenKind.key_global] = '__global'
|
|
|
|
s[TokenKind.key_union] = 'union'
|
|
|
|
s[TokenKind.key_static] = 'static'
|
|
|
|
s[TokenKind.key_as] = 'as'
|
|
|
|
s[TokenKind.key_defer] = 'defer'
|
|
|
|
s[TokenKind.key_match] = 'match'
|
|
|
|
s[TokenKind.key_select] = 'select'
|
|
|
|
s[TokenKind.key_none] = 'none'
|
2019-12-22 00:46:09 +03:00
|
|
|
s[TokenKind.key_offsetof] = '__offsetof'
|
2019-06-22 21:20:28 +03:00
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
2019-12-22 00:46:09 +03:00
|
|
|
NrTokens = 141
|
2019-07-14 12:01:32 +03:00
|
|
|
TokenStr = build_token_str()
|
2019-06-22 21:20:28 +03:00
|
|
|
KEYWORDS = build_keys()
|
|
|
|
)
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
fn key_to_token(key string) TokenKind {
|
|
|
|
a := TokenKind(KEYWORDS[key])
|
2019-06-22 21:20:28 +03:00
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_key(key string) bool {
|
|
|
|
return int(key_to_token(key)) > 0
|
|
|
|
}
|
|
|
|
|
2019-12-11 03:24:26 +03:00
|
|
|
pub fn (t TokenKind) str() string {
|
2019-07-14 12:01:32 +03:00
|
|
|
return TokenStr[int(t)]
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
fn (t TokenKind) is_decl() bool {
|
2019-12-20 00:29:37 +03:00
|
|
|
return t in [.key_enum, .key_interface, .key_fn, .key_struct, .key_type, .key_const, .key_import_const, .key_pub, .eof]
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
2019-12-20 00:29:37 +03:00
|
|
|
AssignTokens = [TokenKind.assign, .plus_assign, .minus_assign, .mult_assign, .div_assign, .xor_assign, .mod_assign, .or_assign, .and_assign, .righ_shift_assign, .left_shift_assign]
|
2019-06-22 21:20:28 +03:00
|
|
|
)
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
fn (t TokenKind) is_assign() bool {
|
2019-06-22 21:20:28 +03:00
|
|
|
return t in AssignTokens
|
|
|
|
}
|
|
|
|
|
2019-10-09 01:05:34 +03:00
|
|
|
fn (t []TokenKind) contains(val TokenKind) bool {
|
2019-06-22 21:20:28 +03:00
|
|
|
for tt in t {
|
|
|
|
if tt == val {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-12-11 03:24:26 +03:00
|
|
|
pub fn (t Token) str() string {
|
2019-11-09 22:05:44 +03:00
|
|
|
if t.tok == .number {
|
|
|
|
return t.lit
|
2019-12-03 13:08:57 +03:00
|
|
|
}
|
2019-11-11 08:58:50 +03:00
|
|
|
if t.tok == .chartoken {
|
|
|
|
return '`$t.lit`'
|
2019-12-03 13:08:57 +03:00
|
|
|
}
|
2019-10-23 13:03:14 +03:00
|
|
|
if t.tok == .str {
|
|
|
|
return "'$t.lit'"
|
2019-12-03 13:08:57 +03:00
|
|
|
}
|
2020-01-12 03:46:25 +03:00
|
|
|
if t.tok == .eof {
|
|
|
|
return '.EOF'
|
|
|
|
}
|
2019-10-21 14:21:30 +03:00
|
|
|
if t.tok < .plus {
|
|
|
|
return t.lit // string, number etc
|
2019-12-03 13:08:57 +03:00
|
|
|
}
|
2019-10-21 14:21:30 +03:00
|
|
|
return t.tok.str()
|
2019-12-03 13:08:57 +03:00
|
|
|
}
|
2019-10-21 14:21:30 +03:00
|
|
|
|
2020-01-12 03:46:25 +03:00
|
|
|
pub fn (t Token) detailed_str() string {
|
|
|
|
return 'Token{ .line:${t.line_nr:4d}, .pos:${t.pos:5d}, .tok: ${t.tok:3d} } = $t '
|
|
|
|
}
|
|
|
|
|