1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00
v/compiler/scanner.v

929 lines
20 KiB
V
Raw Normal View History

2019-06-23 05:21:30 +03:00
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license
// that can be found in the LICENSE file.
2019-06-22 21:20:28 +03:00
module main
2019-09-21 01:23:53 +03:00
import (
os
strings
)
const (
single_quote = `\'`
double_quote = `"`
)
2019-07-12 08:37:54 +03:00
2019-06-22 21:20:28 +03:00
struct Scanner {
mut:
file_path string
text string
pos int
line_nr int
inside_string bool
inter_start bool // for hacky string interpolation TODO simplify
inter_end bool
2019-06-22 21:20:28 +03:00
debug bool
line_comment string
started bool
// vfmt fields
fmt_out strings.Builder
2019-06-22 21:20:28 +03:00
fmt_indent int
fmt_line_empty bool
2019-08-17 22:19:37 +03:00
prev_tok Token
2019-09-07 13:44:41 +03:00
fn_name string // needed for @FN
should_print_line_on_error bool
2019-09-21 16:26:25 +03:00
quote byte // which quote is used to denote current string: ' or "
2019-06-22 21:20:28 +03:00
}
2019-09-01 22:51:16 +03:00
fn new_scanner(file_path string) &Scanner {
2019-06-22 21:20:28 +03:00
if !os.file_exists(file_path) {
verror('"$file_path" doesn\'t exist')
2019-06-22 21:20:28 +03:00
}
2019-07-24 01:06:48 +03:00
2019-07-17 01:05:04 +03:00
mut raw_text := os.read_file(file_path) or {
verror('scanner: failed to open "$file_path"')
2019-08-29 03:30:17 +03:00
return 0
}
2019-07-17 01:05:04 +03:00
// BOM check
if raw_text.len >= 3 {
c_text := raw_text.str
2019-07-17 01:05:04 +03:00
if c_text[0] == 0xEF && c_text[1] == 0xBB && c_text[2] == 0xBF {
// skip three BOM bytes
offset_from_begin := 3
2019-09-15 15:36:05 +03:00
raw_text = tos(c_text[offset_from_begin], vstrlen(c_text) - offset_from_begin)
2019-07-17 01:05:04 +03:00
}
}
text := raw_text
2019-06-22 21:20:28 +03:00
scanner := &Scanner {
file_path: file_path
text: text
fmt_out: strings.new_builder(1000)
should_print_line_on_error: true
2019-06-22 21:20:28 +03:00
}
2019-07-17 01:05:04 +03:00
2019-06-22 21:20:28 +03:00
return scanner
}
struct ScannerPos {
mut:
pos int
line_nr int
}
fn (s ScannerPos) str() string {
return 'ScannerPos{ ${s.pos:5d} , ${s.line_nr:5d} }'
}
fn (s &Scanner) get_scanner_pos() ScannerPos {
2019-09-21 01:23:53 +03:00
return ScannerPos{ pos: s.pos line_nr: s.line_nr }
}
fn (s mut Scanner) goto_scanner_position(scp ScannerPos) {
2019-09-21 01:23:53 +03:00
s.pos = scp.pos
s.line_nr = scp.line_nr
}
2019-06-22 21:20:28 +03:00
// TODO remove once multiple return values are implemented
struct ScanRes {
tok Token
lit string
}
fn scan_res(tok Token, lit string) ScanRes {
return ScanRes{tok, lit}
}
fn (s mut Scanner) ident_name() string {
start := s.pos
for {
s.pos++
if s.pos >= s.text.len {
break
}
2019-06-22 21:20:28 +03:00
c := s.text[s.pos]
if !is_name_char(c) && !c.is_digit() {
break
}
}
name := s.text.substr(start, s.pos)
s.pos--
return name
}
2019-07-24 01:06:48 +03:00
fn (s mut Scanner) ident_hex_number() string {
start_pos := s.pos
s.pos += 2 // skip '0x'
2019-06-22 21:20:28 +03:00
for {
if s.pos >= s.text.len {
break
}
2019-06-22 21:20:28 +03:00
c := s.text[s.pos]
2019-07-24 01:06:48 +03:00
if !c.is_hex_digit() {
break
2019-06-22 21:20:28 +03:00
}
2019-07-24 01:06:48 +03:00
s.pos++
}
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
fn (s mut Scanner) ident_oct_number() string {
start_pos := s.pos
for {
if s.pos >= s.text.len {
2019-06-22 21:20:28 +03:00
break
}
2019-07-24 01:06:48 +03:00
c := s.text[s.pos]
if c.is_digit() {
if !c.is_oct_digit() {
s.error('malformed octal constant')
}
} else {
2019-06-22 21:20:28 +03:00
break
}
2019-07-24 01:06:48 +03:00
s.pos++
}
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
fn (s mut Scanner) ident_dec_number() string {
start_pos := s.pos
// scan integer part
for s.pos < s.text.len && s.text[s.pos].is_digit() {
2019-07-24 01:06:48 +03:00
s.pos++
}
// e.g. 1..9
// we just return '1' and don't scan '..9'
if s.expect('..', s.pos) {
number := s.text.substr(start_pos, s.pos)
s.pos--
return number
}
// scan fractional part
if s.pos < s.text.len && s.text[s.pos] == `.` {
2019-07-24 01:06:48 +03:00
s.pos++
for s.pos < s.text.len && s.text[s.pos].is_digit() {
2019-07-24 01:06:48 +03:00
s.pos++
2019-06-22 21:20:28 +03:00
}
if !s.inside_string && s.pos < s.text.len && s.text[s.pos] == `f` {
s.error('no `f` is needed for floats')
}
2019-06-22 21:20:28 +03:00
}
2019-07-24 01:06:48 +03:00
// scan exponential part
mut has_exponential_part := false
if s.expect('e+', s.pos) || s.expect('e-', s.pos) {
exp_start_pos := s.pos += 2
for s.pos < s.text.len && s.text[s.pos].is_digit() {
2019-07-24 01:06:48 +03:00
s.pos++
}
if exp_start_pos == s.pos {
s.error('exponent has no digits')
}
has_exponential_part = true
}
// error check: 1.23.4, 123.e+3.4
if s.pos < s.text.len && s.text[s.pos] == `.` {
2019-07-24 01:06:48 +03:00
if has_exponential_part {
s.error('exponential part should be integer')
}
else {
s.error('too many decimal points in number')
}
}
number := s.text.substr(start_pos, s.pos)
2019-06-22 21:20:28 +03:00
s.pos--
return number
}
2019-07-24 01:06:48 +03:00
fn (s mut Scanner) ident_number() string {
if s.expect('0x', s.pos) {
return s.ident_hex_number()
}
if s.expect('0.', s.pos) || s.expect('0e', s.pos) {
return s.ident_dec_number()
}
if s.text[s.pos] == `0` {
return s.ident_oct_number()
}
return s.ident_dec_number()
}
fn (s Scanner) has_gone_over_line_end() bool {
mut i := s.pos-1
2019-07-24 01:06:48 +03:00
for i >= 0 && !s.text[i].is_white() {
i--
}
2019-07-24 01:06:48 +03:00
for i >= 0 && s.text[i].is_white() {
if is_nl(s.text[i]) {
return true
}
i--
}
return false
}
2019-06-22 21:20:28 +03:00
fn (s mut Scanner) skip_whitespace() {
2019-07-24 01:06:48 +03:00
for s.pos < s.text.len && s.text[s.pos].is_white() {
2019-08-17 22:19:37 +03:00
// Count \r\n as one line
2019-07-24 01:06:48 +03:00
if is_nl(s.text[s.pos]) && !s.expect('\r\n', s.pos-1) {
2019-07-26 17:45:16 +03:00
s.line_nr++
2019-06-22 21:20:28 +03:00
}
s.pos++
}
}
2019-07-06 16:33:15 +03:00
fn (s mut Scanner) scan() ScanRes {
2019-08-17 22:19:37 +03:00
if s.line_comment != '' {
2019-07-24 01:06:48 +03:00
//s.fgenln('// LOL "$s.line_comment"')
2019-08-17 22:19:37 +03:00
//s.line_comment = ''
}
2019-07-01 19:07:22 +03:00
if s.started {
2019-06-22 21:20:28 +03:00
s.pos++
}
s.started = true
if s.pos >= s.text.len {
return scan_res(.eof, '')
2019-06-22 21:20:28 +03:00
}
// skip whitespace
if !s.inside_string {
s.skip_whitespace()
}
// End of $var, start next string
if s.inter_end {
2019-07-24 01:06:48 +03:00
if s.text[s.pos] == `\'` {
s.inter_end = false
2019-07-16 18:59:07 +03:00
return scan_res(.str, '')
2019-06-22 21:20:28 +03:00
}
s.inter_end = false
2019-07-16 18:59:07 +03:00
return scan_res(.str, s.ident_string())
2019-06-22 21:20:28 +03:00
}
s.skip_whitespace()
// end of file
if s.pos >= s.text.len {
return scan_res(.eof, '')
2019-06-22 21:20:28 +03:00
}
// handle each char
c := s.text[s.pos]
mut nextc := `\0`
if s.pos + 1 < s.text.len {
nextc = s.text[s.pos + 1]
}
// name or keyword
if is_name_char(c) {
name := s.ident_name()
// tmp hack to detect . in ${}
// Check if not .eof to prevent panic
next_char := if s.pos + 1 < s.text.len { s.text[s.pos + 1] } else { `\0` }
2019-06-22 21:20:28 +03:00
if is_key(name) {
return scan_res(key_to_token(name), '')
}
// 'asdf $b' => "b" is the last name in the string, dont start parsing string
// at the next ', skip it
if s.inside_string {
2019-07-24 01:06:48 +03:00
if next_char == `\'` {
s.inter_end = true
s.inter_start = false
2019-06-22 21:20:28 +03:00
s.inside_string = false
}
}
if s.inter_start && next_char != `.` {
s.inter_end = true
s.inter_start = false
2019-06-22 21:20:28 +03:00
}
if s.pos == 0 && next_char == ` ` {
s.pos++
//If a single letter name at the start of the file, increment
//Otherwise the scanner would be stuck at s.pos = 0
}
return scan_res(.name, name)
2019-06-22 21:20:28 +03:00
}
2019-07-16 18:59:07 +03:00
// `123`, `.123`
else if c.is_digit() || (c == `.` && nextc.is_digit()) {
2019-06-22 21:20:28 +03:00
num := s.ident_number()
2019-07-24 01:06:48 +03:00
return scan_res(.number, num)
2019-06-22 21:20:28 +03:00
}
// all other tokens
switch c {
case `+`:
if nextc == `+` {
s.pos++
return scan_res(.inc, '')
2019-06-22 21:20:28 +03:00
}
else if nextc == `=` {
s.pos++
return scan_res(.plus_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.plus, '')
2019-06-22 21:20:28 +03:00
case `-`:
if nextc == `-` {
s.pos++
return scan_res(.dec, '')
2019-06-22 21:20:28 +03:00
}
else if nextc == `=` {
s.pos++
return scan_res(.minus_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.minus, '')
2019-06-22 21:20:28 +03:00
case `*`:
if nextc == `=` {
s.pos++
return scan_res(.mult_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.mul, '')
2019-06-22 21:20:28 +03:00
case `^`:
if nextc == `=` {
s.pos++
return scan_res(.xor_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.xor, '')
2019-06-22 21:20:28 +03:00
case `%`:
if nextc == `=` {
s.pos++
return scan_res(.mod_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.mod, '')
2019-06-22 21:20:28 +03:00
case `?`:
return scan_res(.question, '')
2019-09-21 16:26:25 +03:00
case single_quote, double_quote:
2019-07-16 18:59:07 +03:00
return scan_res(.str, s.ident_string())
case `\``: // ` // apostrophe balance comment. do not remove
return scan_res(.chartoken, s.ident_char())
2019-06-22 21:20:28 +03:00
case `(`:
return scan_res(.lpar, '')
2019-06-22 21:20:28 +03:00
case `)`:
return scan_res(.rpar, '')
2019-06-22 21:20:28 +03:00
case `[`:
return scan_res(.lsbr, '')
2019-06-22 21:20:28 +03:00
case `]`:
return scan_res(.rsbr, '')
2019-06-22 21:20:28 +03:00
case `{`:
// Skip { in ${ in strings
if s.inside_string {
return s.scan()
}
return scan_res(.lcbr, '')
2019-06-22 21:20:28 +03:00
case `$`:
return scan_res(.dollar, '')
2019-06-22 21:20:28 +03:00
case `}`:
2019-07-10 10:48:10 +03:00
// s = `hello $name !`
// s = `hello ${name} !`
2019-06-22 21:20:28 +03:00
if s.inside_string {
s.pos++
// TODO UNNEEDED?
2019-07-24 01:06:48 +03:00
if s.text[s.pos] == `\'` {
2019-06-22 21:20:28 +03:00
s.inside_string = false
2019-07-16 18:59:07 +03:00
return scan_res(.str, '')
2019-06-22 21:20:28 +03:00
}
2019-07-16 18:59:07 +03:00
return scan_res(.str, s.ident_string())
2019-06-22 21:20:28 +03:00
}
else {
return scan_res(.rcbr, '')
2019-06-22 21:20:28 +03:00
}
case `&`:
if nextc == `=` {
s.pos++
return scan_res(.and_assign, '')
2019-06-22 21:20:28 +03:00
}
if nextc == `&` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.and, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.amp, '')
2019-06-22 21:20:28 +03:00
case `|`:
if nextc == `|` {
2019-06-22 21:20:28 +03:00
s.pos++
2019-07-14 12:01:32 +03:00
return scan_res(.logical_or, '')
2019-06-22 21:20:28 +03:00
}
if nextc == `=` {
s.pos++
return scan_res(.or_assign, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.pipe, '')
2019-06-22 21:20:28 +03:00
case `,`:
return scan_res(.comma, '')
2019-07-24 03:22:34 +03:00
case `@`:
2019-08-17 22:19:37 +03:00
s.pos++
2019-07-24 03:22:34 +03:00
name := s.ident_name()
2019-09-07 13:44:41 +03:00
// @FN => will be substituted with the name of the current V function
// @FILE => will be substituted with the path of the V source file
// @LINE => will be substituted with the V line number where it appears (as a string).
// @COLUMN => will be substituted with the column where it appears (as a string).
// @VHASH => will be substituted with the shortened commit hash of the V compiler (as a string).
2019-09-14 23:48:30 +03:00
// This allows things like this:
2019-09-07 13:44:41 +03:00
// println( 'file: ' + @FILE + ' | line: ' + @LINE + ' | fn: ' + @FN)
// ... which is useful while debugging/tracing
if name == 'FN' { return scan_res(.str, s.fn_name) }
2019-09-11 20:16:42 +03:00
if name == 'FILE' { return scan_res(.str, os.realpath(s.file_path).replace('\\', '\\\\')) } // escape \
2019-09-07 13:44:41 +03:00
if name == 'LINE' { return scan_res(.str, (s.line_nr+1).str()) }
if name == 'COLUMN' { return scan_res(.str, (s.current_column()).str()) }
if name == 'VHASH' { return scan_res(.str, vhash()) }
2019-07-24 03:22:34 +03:00
if !is_key(name) {
2019-08-17 22:19:37 +03:00
s.error('@ must be used before keywords (e.g. `@type string`)')
}
2019-07-24 03:22:34 +03:00
return scan_res(.name, name)
2019-06-23 06:21:22 +03:00
case `\r`:
if nextc == `\n` {
s.pos++
return scan_res(.nl, '')
2019-06-23 06:21:22 +03:00
}
2019-06-22 21:20:28 +03:00
case `\n`:
return scan_res(.nl, '')
2019-06-22 21:20:28 +03:00
case `.`:
if nextc == `.` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.dotdot, '')
2019-06-22 21:20:28 +03:00
}
return scan_res(.dot, '')
2019-06-22 21:20:28 +03:00
case `#`:
start := s.pos + 1
for s.pos < s.text.len && s.text[s.pos] != `\n` {
2019-06-22 21:20:28 +03:00
s.pos++
}
s.line_nr++
2019-07-25 13:27:59 +03:00
if nextc == `!` {
// treat shebang line (#!) as a comment
s.line_comment = s.text.substr(start + 1, s.pos).trim_space()
s.fgenln('// shebang line "$s.line_comment"')
return s.scan()
}
2019-06-22 21:20:28 +03:00
hash := s.text.substr(start, s.pos)
return scan_res(.hash, hash.trim_space())
2019-06-22 21:20:28 +03:00
case `>`:
if nextc == `=` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.ge, '')
2019-06-22 21:20:28 +03:00
}
else if nextc == `>` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
2019-06-22 21:20:28 +03:00
s.pos += 2
return scan_res(.righ_shift_assign, '')
2019-06-22 21:20:28 +03:00
}
s.pos++
return scan_res(.righ_shift, '')
2019-06-22 21:20:28 +03:00
}
else {
return scan_res(.gt, '')
2019-06-22 21:20:28 +03:00
}
2019-08-05 17:57:54 +03:00
case 0xE2:
//case `≠`:
if nextc == 0x89 && s.text[s.pos + 2] == 0xA0 {
s.pos += 2
return scan_res(.ne, '')
}
// ⩽
else if nextc == 0x89 && s.text[s.pos + 2] == 0xBD {
s.pos += 2
return scan_res(.le, '')
}
// ⩾
else if nextc == 0xA9 && s.text[s.pos + 2] == 0xBE {
s.pos += 2
return scan_res(.ge, '')
}
2019-06-22 21:20:28 +03:00
case `<`:
if nextc == `=` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.le, '')
2019-06-22 21:20:28 +03:00
}
else if nextc == `<` {
if s.pos + 2 < s.text.len && s.text[s.pos + 2] == `=` {
2019-06-22 21:20:28 +03:00
s.pos += 2
return scan_res(.left_shift_assign, '')
2019-06-22 21:20:28 +03:00
}
s.pos++
return scan_res(.left_shift, '')
2019-06-22 21:20:28 +03:00
}
else {
return scan_res(.lt, '')
2019-06-22 21:20:28 +03:00
}
case `=`:
if nextc == `=` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.eq, '')
2019-06-22 21:20:28 +03:00
}
2019-07-17 02:43:59 +03:00
else if nextc == `>` {
s.pos++
return scan_res(.arrow, '')
2019-08-17 22:19:37 +03:00
}
2019-06-22 21:20:28 +03:00
else {
return scan_res(.assign, '')
2019-06-22 21:20:28 +03:00
}
case `:`:
if nextc == `=` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.decl_assign, '')
2019-06-22 21:20:28 +03:00
}
else {
return scan_res(.colon, '')
2019-06-22 21:20:28 +03:00
}
case `;`:
return scan_res(.semicolon, '')
2019-06-22 21:20:28 +03:00
case `!`:
if nextc == `=` {
2019-06-22 21:20:28 +03:00
s.pos++
return scan_res(.ne, '')
2019-06-22 21:20:28 +03:00
}
else {
return scan_res(.not, '')
2019-06-22 21:20:28 +03:00
}
case `~`:
return scan_res(.bit_not, '')
2019-06-22 21:20:28 +03:00
case `/`:
if nextc == `=` {
s.pos++
return scan_res(.div_assign, '')
2019-06-22 21:20:28 +03:00
}
if nextc == `/` {
2019-06-22 21:20:28 +03:00
start := s.pos + 1
for s.pos < s.text.len && s.text[s.pos] != `\n`{
2019-06-22 21:20:28 +03:00
s.pos++
}
s.line_nr++
s.line_comment = s.text.substr(start + 1, s.pos)
s.line_comment = s.line_comment.trim_space()
2019-07-16 18:59:07 +03:00
s.fgenln('// ${s.prev_tok.str()} "$s.line_comment"')
2019-08-17 22:19:37 +03:00
// Skip the comment (return the next token)
2019-07-16 18:59:07 +03:00
return s.scan()
2019-06-22 21:20:28 +03:00
}
// Multiline comments
if nextc == `*` {
2019-06-22 21:20:28 +03:00
start := s.pos
mut nest_count := 1
2019-06-22 21:20:28 +03:00
// Skip comment
for nest_count > 0 {
2019-06-22 21:20:28 +03:00
s.pos++
if s.pos >= s.text.len {
s.line_nr--
s.error('comment not terminated')
}
if s.text[s.pos] == `\n` {
s.line_nr++
continue
}
2019-07-24 01:06:48 +03:00
if s.expect('/*', s.pos) {
nest_count++
continue
}
2019-07-24 01:06:48 +03:00
if s.expect('*/', s.pos) {
nest_count--
2019-06-22 21:20:28 +03:00
}
}
s.pos++
end := s.pos + 1
comm := s.text.substr(start, end)
s.fgenln(comm)
// Skip if not in fmt mode
return s.scan()
}
return scan_res(.div, '')
2019-06-22 21:20:28 +03:00
}
2019-06-29 18:58:20 +03:00
$if windows {
if c == `\0` {
return scan_res(.eof, '')
2019-08-17 22:19:37 +03:00
}
}
mut msg := 'invalid character `${c.str()}`'
2019-07-03 14:20:43 +03:00
if c == `"` {
2019-08-17 22:19:37 +03:00
msg += ', use \' to denote strings'
}
s.error(msg)
return scan_res(.eof, '')
2019-06-22 21:20:28 +03:00
}
fn (s &Scanner) find_current_line_start_position() int {
if s.pos >= s.text.len { return s.pos }
mut linestart := s.pos
for {
if linestart <= 0 {
linestart = 1
break
}
if s.text[linestart] == 10 || s.text[linestart] == 13 {
linestart++
break
}
linestart--
2019-08-23 03:28:25 +03:00
}
return linestart
}
fn (s &Scanner) find_current_line_end_position() int {
if s.pos >= s.text.len { return s.pos }
mut lineend := s.pos
for {
if lineend >= s.text.len {
lineend = s.text.len
break
}
if s.text[lineend] == 10 || s.text[lineend] == 13 {
break
}
lineend++
}
return lineend
}
2019-09-07 13:44:41 +03:00
fn (s &Scanner) current_column() int {
return s.pos - s.find_current_line_start_position()
}
2019-06-22 21:20:28 +03:00
fn (s &Scanner) error(msg string) {
linestart := s.find_current_line_start_position()
lineend := s.find_current_line_end_position()
column := s.pos - linestart
2019-09-14 23:48:30 +03:00
if s.should_print_line_on_error && lineend > linestart {
line := s.text.substr( linestart, lineend )
// The pointerline should have the same spaces/tabs as the offending
// line, so that it prints the ^ character exactly on the *same spot*
// where it is needed. That is the reason we can not just
// use strings.repeat(` `, column) to form it.
pointerline := line.clone()
mut pl := pointerline.str
for i,c in line {
pl[i] = ` `
if i == column { pl[i] = `^` }
else if c.is_space() { pl[i] = c }
}
println(line)
println(pointerline)
}
fullpath := os.realpath( s.file_path )
// The filepath:line:col: format is the default C compiler
// error output format. It allows editors and IDE's like
// emacs to quickly find the errors in the output
// and jump to their source with a keyboard shortcut.
// Using only the filename leads to inability of IDE/editors
// to find the source file, when it is in another folder.
println('${fullpath}:${s.line_nr + 1}:${column+1}: $msg')
exit(1)
2019-06-22 21:20:28 +03:00
}
2019-08-15 12:41:23 +03:00
fn (s Scanner) count_symbol_before(p int, sym byte) int {
mut count := 0
for i:=p; i>=0; i-- {
if s.text[i] != sym {
break
}
count++
}
return count
}
2019-06-23 11:01:55 +03:00
// println('array out of bounds $idx len=$a.len')
2019-06-22 21:20:28 +03:00
// This is really bad. It needs a major clean up
fn (s mut Scanner) ident_string() string {
2019-09-21 16:26:25 +03:00
q := s.text[s.pos]
if (q == single_quote || q == double_quote) && !s.inside_string{
s.quote = q
}
//if s.file_path.contains('string_test') {
//println('\nident_string() at char=${s.text[s.pos].str()}')
//println('linenr=$s.line_nr quote= $qquote ${qquote.str()}')
//}
2019-06-22 21:20:28 +03:00
mut start := s.pos
s.inside_string = false
slash := `\\`
for {
s.pos++
if s.pos >= s.text.len {
break
}
c := s.text[s.pos]
prevc := s.text[s.pos - 1]
// end of string
2019-09-21 16:26:25 +03:00
if c == s.quote && (prevc != slash || (prevc == slash && s.text[s.pos - 2] == slash)) {
2019-06-22 21:20:28 +03:00
// handle '123\\' slash at the end
break
}
if c == `\n` {
s.line_nr++
}
// Don't allow \0
if c == `0` && s.pos > 2 && s.text[s.pos - 1] == `\\` {
s.error('0 character in a string literal')
}
// Don't allow \x00
2019-07-24 01:06:48 +03:00
if c == `0` && s.pos > 5 && s.expect('\\x0', s.pos - 3) {
2019-06-22 21:20:28 +03:00
s.error('0 character in a string literal')
}
// ${var}
2019-08-15 12:41:23 +03:00
if c == `{` && prevc == `$` && s.count_symbol_before(s.pos-2, `\\`) % 2 == 0 {
2019-06-22 21:20:28 +03:00
s.inside_string = true
// so that s.pos points to $ at the next step
s.pos -= 2
break
}
// $var
2019-08-15 12:41:23 +03:00
if (c.is_letter() || c == `_`) && prevc == `$` && s.count_symbol_before(s.pos-2, `\\`) % 2 == 0 {
2019-06-22 21:20:28 +03:00
s.inside_string = true
s.inter_start = true
2019-06-22 21:20:28 +03:00
s.pos -= 2
break
}
}
mut lit := ''
2019-09-21 16:26:25 +03:00
if s.text[start] == s.quote {
2019-06-22 21:20:28 +03:00
start++
}
mut end := s.pos
if s.inside_string {
end++
}
if start > s.pos{}
else {
lit = s.text.substr(start, end)
}
return lit
}
fn (s mut Scanner) ident_char() string {
start := s.pos
slash := `\\`
mut len := 0
for {
s.pos++
if s.pos >= s.text.len {
break
}
if s.text[s.pos] != slash {
len++
}
2019-07-24 01:06:48 +03:00
double_slash := s.expect('\\\\', s.pos - 2)
if s.text[s.pos] == `\`` && (s.text[s.pos - 1] != slash || double_slash) { // ` // apostrophe balance comment. do not remove
2019-06-22 21:20:28 +03:00
if double_slash {
len++
}
break
}
}
len--
c := s.text.substr(start + 1, s.pos)
if len != 1 {
2019-07-07 22:46:21 +03:00
u := c.ustring()
if u.len != 1 {
2019-07-24 01:06:48 +03:00
s.error('invalid character literal (more than one character: $len)')
2019-07-07 22:46:21 +03:00
}
2019-06-22 21:20:28 +03:00
}
if c == '\\`' {
return '`'
}
2019-09-11 15:21:20 +03:00
// Escapes a `'` character
return if c == '\'' { '\\' + c } else { c }
2019-06-22 21:20:28 +03:00
}
fn (s mut Scanner) peek() Token {
2019-07-24 01:06:48 +03:00
// save scanner state
2019-06-22 21:20:28 +03:00
pos := s.pos
line := s.line_nr
inside_string := s.inside_string
inter_start := s.inter_start
inter_end := s.inter_end
2019-07-24 01:06:48 +03:00
2019-06-22 21:20:28 +03:00
res := s.scan()
tok := res.tok
2019-07-24 01:06:48 +03:00
// restore scanner state
2019-06-22 21:20:28 +03:00
s.pos = pos
s.line_nr = line
s.inside_string = inside_string
s.inter_start = inter_start
s.inter_end = inter_end
2019-06-22 21:20:28 +03:00
return tok
}
fn (s &Scanner) expect(want string, start_pos int) bool {
2019-07-24 01:06:48 +03:00
end_pos := start_pos + want.len
if start_pos < 0 || start_pos >= s.text.len {
return false
}
if end_pos < 0 || end_pos > s.text.len {
return false
}
for pos in start_pos..end_pos {
if s.text[pos] != want[pos-start_pos] {
return false
}
}
return true
}
2019-06-22 21:20:28 +03:00
fn (s mut Scanner) debug_tokens() {
s.pos = 0
2019-07-24 01:06:48 +03:00
s.debug = true
2019-06-22 21:20:28 +03:00
fname := s.file_path.all_after('/')
2019-07-16 18:59:07 +03:00
println('\n===DEBUG TOKENS $fname===')
2019-07-24 01:06:48 +03:00
2019-06-22 21:20:28 +03:00
for {
res := s.scan()
tok := res.tok
lit := res.lit
print(tok.str())
if lit != '' {
println(' `$lit`')
}
else {
println('')
}
if tok == .eof {
2019-06-22 21:20:28 +03:00
println('============ END OF DEBUG TOKENS ==================')
break
}
}
}
fn is_name_char(c byte) bool {
return c.is_letter() || c == `_`
}
2019-07-24 01:06:48 +03:00
fn is_nl(c byte) bool {
return c == `\r` || c == `\n`
}
fn (s &Scanner) get_opening_bracket() int {
mut pos := s.pos
mut parentheses := 0
mut inside_string := false
for pos > 0 && s.text[pos] != `\n` {
if s.text[pos] == `)` && !inside_string {
parentheses++
}
if s.text[pos] == `(` && !inside_string {
parentheses--
}
if s.text[pos] == `\'` && s.text[pos - 1] != `\\` && s.text[pos - 1] != `\`` { // ` // apostrophe balance comment. do not remove
inside_string = !inside_string
}
if parentheses == 0 {
break
}
pos--
}
return pos
}
2019-06-27 02:55:37 +03:00
// Foo { bar: 3, baz: 'hi' } => '{ bar: 3, baz: "hi" }'
fn (s mut Scanner) create_type_string(T Type, name string) {
line := s.line_nr
inside_string := s.inside_string
mut newtext := '\'{ '
start := s.get_opening_bracket() + 1
end := s.pos
for i, field in T.fields {
if i != 0 {
newtext += ', '
}
newtext += '$field.name: ' + '$${name}.${field.name}'
}
newtext += ' }\''
s.text = s.text.substr(0, start) + newtext + s.text.substr(end, s.text.len)
s.pos = start - 2
s.line_nr = line
s.inside_string = inside_string
}
2019-07-29 19:21:36 +03:00
2019-08-04 10:36:21 +03:00
fn contains_capital(s string) bool {
// for c in s {
for i := 0; i < s.len; i++ {
c := s[i]
if c >= `A` && c <= `Z` {
return true
}
}
return false
}
// HTTPRequest bad
2019-08-17 22:19:37 +03:00
// HttpRequest good
2019-08-04 10:36:21 +03:00
fn good_type_name(s string) bool {
if s.len < 4 {
2019-08-17 22:19:37 +03:00
return true
}
for i in 2 .. s.len {
2019-08-04 10:36:21 +03:00
if s[i].is_capital() && s[i-1].is_capital() && s[i-2].is_capital() {
2019-08-17 22:19:37 +03:00
return false
}
}
return true
}
2019-08-04 10:36:21 +03:00